From 0f0254fa8ddce39ce4e98113e7050e1cd88ff884 Mon Sep 17 00:00:00 2001 From: Huang Weiyi Date: Tue, 21 Oct 2008 06:33:42 +0800 Subject: [MTD] [NAND] OMAP2: remove duplicated #include Removed duplicated #include in drivers/mtd/onenand/omap2.c. Signed-off-by: Huang Weiyi Signed-off-by: David Woodhouse --- drivers/mtd/onenand/omap2.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/mtd/onenand/omap2.c b/drivers/mtd/onenand/omap2.c index 8387e05daae..e39b21d3e16 100644 --- a/drivers/mtd/onenand/omap2.c +++ b/drivers/mtd/onenand/omap2.c @@ -38,7 +38,6 @@ #include #include #include -#include #include #include -- cgit v1.2.3-18-g5258 From f04de505e3fa322728d1a851e08bf7060b117743 Mon Sep 17 00:00:00 2001 From: Steve Glendinning Date: Tue, 21 Oct 2008 13:25:51 +0100 Subject: [JFFS2] Fix build failure with !CONFIG_JFFS2_FS_WRITEBUFFER Build failure introduced by 5bf1723723487ddb0b9c9641b6559da96b27cc93 [JFFS2] Write buffer offset adjustment for NOR-ECC (Sibley) flash Signed-off-by: Steve Glendinning Signed-off-by: David Woodhouse --- fs/jffs2/nodemgmt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c index 0875b60b4bf..21a052915aa 100644 --- a/fs/jffs2/nodemgmt.c +++ b/fs/jffs2/nodemgmt.c @@ -261,9 +261,11 @@ static int jffs2_find_nextblock(struct jffs2_sb_info *c) jffs2_sum_reset_collected(c->summary); /* reset collected summary */ +#ifdef CONFIG_JFFS2_FS_WRITEBUFFER /* adjust write buffer offset, else we get a non contiguous write bug */ if (!(c->wbuf_ofs % c->sector_size) && !c->wbuf_len) c->wbuf_ofs = 0xffffffff; +#endif D1(printk(KERN_DEBUG "jffs2_find_nextblock(): new nextblock = 0x%08x\n", c->nextblock->offset)); -- cgit v1.2.3-18-g5258 From ba8b453de08c18cbc2453bcabfd0936c1d6695cb Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 24 Oct 2008 15:05:12 +0200 Subject: Change UTF8 chars in Kconfig help text about Oprofile AMD barcelona Fixes screwing up text output when doing a make oldconfig and viewing help text of "OProfile AMD IBS support". When the terminal is not using an UTF8 locale / LANG. "make config" breaks terminal output and its not possible to continue. (Change added by changeset 852402cc Tue Jul 22 21:09:06 2008) Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Robert Richter --- arch/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/Kconfig b/arch/Kconfig index e6ab550bceb..8977d99987c 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -21,7 +21,7 @@ config OPROFILE_IBS Instruction-Based Sampling (IBS) is a new profiling technique that provides rich, precise program performance information. IBS is introduced by AMD Family10h processors - (AMD Opteron Quad-Core processor “Barcelona”) to overcome + (AMD Opteron Quad-Core processor "Barcelona") to overcome the limitations of conventional performance counter sampling. -- cgit v1.2.3-18-g5258 From fa448d6008cc81a3537e5db168fa0490e0caba68 Mon Sep 17 00:00:00 2001 From: Carl Love Date: Fri, 24 Oct 2008 11:47:29 -0700 Subject: Cell OProfile: Incorrect local array size in activate spu profiling function The size of the pm_signal_local array should be equal to the number of SPUs being configured in the call. Currently, the array is of size 4 (NR_PHYS_CTRS) but being indexed by a for loop from 0 to 7 (NUM_SPUS_PER_NODE). Signed-off-by: Carl Love Signed-off-by: Robert Richter --- arch/powerpc/oprofile/op_model_cell.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/oprofile/op_model_cell.c b/arch/powerpc/oprofile/op_model_cell.c index 35141a8bc3d..6b2d974880b 100644 --- a/arch/powerpc/oprofile/op_model_cell.c +++ b/arch/powerpc/oprofile/op_model_cell.c @@ -830,7 +830,7 @@ static int calculate_lfsr(int n) static int pm_rtas_activate_spu_profiling(u32 node) { int ret, i; - struct pm_signal pm_signal_local[NR_PHYS_CTRS]; + struct pm_signal pm_signal_local[NUM_SPUS_PER_NODE]; /* * Set up the rtas call to configure the debug bus to -- cgit v1.2.3-18-g5258 From cae042a73bb22fc4132b04ff94bd684456203089 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Thu, 23 Oct 2008 16:25:54 +0200 Subject: oprofile: fix memory ordering Regular bitops don't work as locks on all architectures. Also: can use non-atomic unlock as no concurrent stores to the word. Signed-off-by: Nick Piggin Signed-off-by: Robert Richter --- drivers/oprofile/event_buffer.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/oprofile/event_buffer.c b/drivers/oprofile/event_buffer.c index d962ba0dd87..191a3202cec 100644 --- a/drivers/oprofile/event_buffer.c +++ b/drivers/oprofile/event_buffer.c @@ -105,7 +105,7 @@ static int event_buffer_open(struct inode *inode, struct file *file) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (test_and_set_bit(0, &buffer_opened)) + if (test_and_set_bit_lock(0, &buffer_opened)) return -EBUSY; /* Register as a user of dcookies @@ -129,7 +129,7 @@ static int event_buffer_open(struct inode *inode, struct file *file) fail: dcookie_unregister(file->private_data); out: - clear_bit(0, &buffer_opened); + __clear_bit_unlock(0, &buffer_opened); return err; } @@ -141,7 +141,7 @@ static int event_buffer_release(struct inode *inode, struct file *file) dcookie_unregister(file->private_data); buffer_pos = 0; atomic_set(&buffer_ready, 0); - clear_bit(0, &buffer_opened); + __clear_bit_unlock(0, &buffer_opened); return 0; } -- cgit v1.2.3-18-g5258 From edf1ae403896cb7750800508b14996ba6be39a53 Mon Sep 17 00:00:00 2001 From: Steve French Date: Wed, 29 Oct 2008 00:47:57 +0000 Subject: [CIFS] Reduce number of socket retries in large write path CIFS in some heavy stress conditions cifs could get EAGAIN repeatedly in smb_send2 which led to repeated retries and eventually failure of large writes which could lead to data corruption. There are three changes that were suggested by various network developers: 1) convert cifs from non-blocking to blocking tcp sendmsg (we left in the retry on failure) 2) change cifs to not set sendbuf and rcvbuf size for the socket (let tcp autotune the buffer sizes since that works much better in the TCP stack now) 3) if we have a partial frame sent in smb_send2, mark the tcp session as invalid (close the socket and reconnect) so we do not corrupt the remaining part of the SMB with the beginning of the next SMB. This does not appear to hurt performance measurably and has been run in various scenarios, but it definately removes a corruption that we were seeing in some high stress test cases. Acked-by: Shirish Pargaonkar Signed-off-by: Steve French --- fs/cifs/CHANGES | 6 +++++- fs/cifs/cifsglob.h | 2 ++ fs/cifs/cifsproto.h | 2 +- fs/cifs/connect.c | 50 +++++++++++++++++++++++++++++++++++++------------- fs/cifs/transport.c | 41 +++++++++++++++++++++++++++++++---------- 5 files changed, 76 insertions(+), 25 deletions(-) diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index 8f528ea24c4..8855331b2fb 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES @@ -4,7 +4,11 @@ Various fixes to make delete of open files behavior more predictable (when delete of an open file fails we mark the file as "delete-on-close" in a way that more servers accept, but only if we can first rename the file to a temporary name). Add experimental support for more safely -handling fcntl(F_SETLEASE). +handling fcntl(F_SETLEASE). Convert cifs to using blocking tcp +sends, and also let tcp autotune the socket send and receive buffers. +This reduces the number of EAGAIN errors returned by TCP/IP in +high stress workloads (and the number of retries on socket writes +when sending large SMBWriteX requests). Version 1.54 ------------ diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index c791e5b5a91..1cb1189f24e 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -141,6 +141,8 @@ struct TCP_Server_Info { char versionMajor; char versionMinor; bool svlocal:1; /* local server or remote */ + bool noblocksnd; /* use blocking sendmsg */ + bool noautotune; /* do not autotune send buf sizes */ atomic_t socketUseCount; /* number of open cifs sessions on socket */ atomic_t inFlight; /* number of requests on the wire to server */ #ifdef CONFIG_CIFS_STATS2 diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 0cff7fe986e..6f21ecb85ce 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -36,7 +36,7 @@ extern void cifs_buf_release(void *); extern struct smb_hdr *cifs_small_buf_get(void); extern void cifs_small_buf_release(void *); extern int smb_send(struct socket *, struct smb_hdr *, - unsigned int /* length */ , struct sockaddr *); + unsigned int /* length */ , struct sockaddr *, bool); extern unsigned int _GetXid(void); extern void _FreeXid(unsigned int); #define GetXid() (int)_GetXid(); cFYI(1,("CIFS VFS: in %s as Xid: %d with uid: %d",__func__, xid,current->fsuid)); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 71b7661e226..e9f9248cb3f 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -92,6 +92,8 @@ struct smb_vol { bool seal:1; /* request transport encryption on share */ bool nodfs:1; /* Do not request DFS, even if available */ bool local_lease:1; /* check leases only on local system, not remote */ + bool noblocksnd:1; + bool noautotune:1; unsigned int rsize; unsigned int wsize; unsigned int sockopt; @@ -102,9 +104,11 @@ struct smb_vol { static int ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket, char *netb_name, - char *server_netb_name); + char *server_netb_name, + bool noblocksnd, + bool nosndbuf); /* ipv6 never set sndbuf size */ static int ipv6_connect(struct sockaddr_in6 *psin_server, - struct socket **csocket); + struct socket **csocket, bool noblocksnd); /* @@ -191,12 +195,13 @@ cifs_reconnect(struct TCP_Server_Info *server) try_to_freeze(); if (server->protocolType == IPV6) { rc = ipv6_connect(&server->addr.sockAddr6, - &server->ssocket); + &server->ssocket, server->noautotune); } else { rc = ipv4_connect(&server->addr.sockAddr, &server->ssocket, server->workstation_RFC1001_name, - server->server_RFC1001_name); + server->server_RFC1001_name, + server->noblocksnd, server->noautotune); } if (rc) { cFYI(1, ("reconnect error %d", rc)); @@ -1192,6 +1197,10 @@ cifs_parse_mount_options(char *options, const char *devname, /* ignore */ } else if (strnicmp(data, "rw", 2) == 0) { vol->rw = true; + } else if (strnicmp(data, "noblocksend", 11) == 0) { + vol->noblocksnd = 1; + } else if (strnicmp(data, "noautotune", 10) == 0) { + vol->noautotune = 1; } else if ((strnicmp(data, "suid", 4) == 0) || (strnicmp(data, "nosuid", 6) == 0) || (strnicmp(data, "exec", 4) == 0) || @@ -1518,7 +1527,8 @@ static void rfc1002mangle(char *target, char *source, unsigned int length) static int ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket, - char *netbios_name, char *target_name) + char *netbios_name, char *target_name, + bool noblocksnd, bool noautotune) { int rc = 0; int connected = 0; @@ -1590,11 +1600,16 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket, (*csocket)->sk->sk_sndbuf, (*csocket)->sk->sk_rcvbuf, (*csocket)->sk->sk_rcvtimeo)); (*csocket)->sk->sk_rcvtimeo = 7 * HZ; + if (!noblocksnd) + (*csocket)->sk->sk_sndtimeo = 3 * HZ; + /* make the bufsizes depend on wsize/rsize and max requests */ - if ((*csocket)->sk->sk_sndbuf < (200 * 1024)) - (*csocket)->sk->sk_sndbuf = 200 * 1024; - if ((*csocket)->sk->sk_rcvbuf < (140 * 1024)) - (*csocket)->sk->sk_rcvbuf = 140 * 1024; + if (noautotune) { + if ((*csocket)->sk->sk_sndbuf < (200 * 1024)) + (*csocket)->sk->sk_sndbuf = 200 * 1024; + if ((*csocket)->sk->sk_rcvbuf < (140 * 1024)) + (*csocket)->sk->sk_rcvbuf = 140 * 1024; + } /* send RFC1001 sessinit */ if (psin_server->sin_port == htons(RFC1001_PORT)) { @@ -1631,7 +1646,7 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket, /* sizeof RFC1002_SESSION_REQUEST with no scope */ smb_buf->smb_buf_length = 0x81000044; rc = smb_send(*csocket, smb_buf, 0x44, - (struct sockaddr *)psin_server); + (struct sockaddr *)psin_server, noblocksnd); kfree(ses_init_buf); msleep(1); /* RFC1001 layer in at least one server requires very short break before negprot @@ -1651,7 +1666,8 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket, } static int -ipv6_connect(struct sockaddr_in6 *psin_server, struct socket **csocket) +ipv6_connect(struct sockaddr_in6 *psin_server, struct socket **csocket, + bool noblocksnd) { int rc = 0; int connected = 0; @@ -1720,6 +1736,9 @@ ipv6_connect(struct sockaddr_in6 *psin_server, struct socket **csocket) the default. sock_setsockopt not used because it expects user space buffer */ (*csocket)->sk->sk_rcvtimeo = 7 * HZ; + if (!noblocksnd) + (*csocket)->sk->sk_sndtimeo = 3 * HZ; + return rc; } @@ -1983,11 +2002,14 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, cFYI(1, ("attempting ipv6 connect")); /* BB should we allow ipv6 on port 139? */ /* other OS never observed in Wild doing 139 with v6 */ - rc = ipv6_connect(&sin_server6, &csocket); + rc = ipv6_connect(&sin_server6, &csocket, + volume_info.noblocksnd); } else rc = ipv4_connect(&sin_server, &csocket, volume_info.source_rfc1001_name, - volume_info.target_rfc1001_name); + volume_info.target_rfc1001_name, + volume_info.noblocksnd, + volume_info.noautotune); if (rc < 0) { cERROR(1, ("Error connecting to IPv4 socket. " "Aborting operation")); @@ -2002,6 +2024,8 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, sock_release(csocket); goto out; } else { + srvTcp->noblocksnd = volume_info.noblocksnd; + srvTcp->noautotune = volume_info.noautotune; memcpy(&srvTcp->addr.sockAddr, &sin_server, sizeof(struct sockaddr_in)); atomic_set(&srvTcp->inFlight, 0); diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index bf0e6d8e382..ba4d66644eb 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -161,7 +161,7 @@ void DeleteTconOplockQEntries(struct cifsTconInfo *tcon) int smb_send(struct socket *ssocket, struct smb_hdr *smb_buffer, - unsigned int smb_buf_length, struct sockaddr *sin) + unsigned int smb_buf_length, struct sockaddr *sin, bool noblocksnd) { int rc = 0; int i = 0; @@ -178,7 +178,10 @@ smb_send(struct socket *ssocket, struct smb_hdr *smb_buffer, smb_msg.msg_namelen = sizeof(struct sockaddr); smb_msg.msg_control = NULL; smb_msg.msg_controllen = 0; - smb_msg.msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL; /* BB add more flags?*/ + if (noblocksnd) + smb_msg.msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL; + else + smb_msg.msg_flags = MSG_NOSIGNAL; /* smb header is converted in header_assemble. bcc and rest of SMB word area, and byte area if necessary, is converted to littleendian in @@ -229,8 +232,8 @@ smb_send(struct socket *ssocket, struct smb_hdr *smb_buffer, } static int -smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec, - struct sockaddr *sin) +smb_send2(struct TCP_Server_Info *server, struct kvec *iov, int n_vec, + struct sockaddr *sin, bool noblocksnd) { int rc = 0; int i = 0; @@ -240,6 +243,7 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec, unsigned int total_len; int first_vec = 0; unsigned int smb_buf_length = smb_buffer->smb_buf_length; + struct socket *ssocket = server->ssocket; if (ssocket == NULL) return -ENOTSOCK; /* BB eventually add reconnect code here */ @@ -248,7 +252,10 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec, smb_msg.msg_namelen = sizeof(struct sockaddr); smb_msg.msg_control = NULL; smb_msg.msg_controllen = 0; - smb_msg.msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL; /* BB add more flags?*/ + if (noblocksnd) + smb_msg.msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL; + else + smb_msg.msg_flags = MSG_NOSIGNAL; /* smb header is converted in header_assemble. bcc and rest of SMB word area, and byte area if necessary, is converted to littleendian in @@ -312,6 +319,16 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec, i = 0; /* in case we get ENOSPC on the next send */ } + if ((total_len > 0) && (total_len != smb_buf_length + 4)) { + cFYI(1, ("partial send (%d remaining), terminating session", + total_len)); + /* If we have only sent part of an SMB then the next SMB + could be taken as the remainder of this one. We need + to kill the socket so the server throws away the partial + SMB */ + server->tcpStatus = CifsNeedReconnect; + } + if (rc < 0) { cERROR(1, ("Error %d sending data on socket to server", rc)); } else @@ -518,8 +535,9 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, #ifdef CONFIG_CIFS_STATS2 atomic_inc(&ses->server->inSend); #endif - rc = smb_send2(ses->server->ssocket, iov, n_vec, - (struct sockaddr *) &(ses->server->addr.sockAddr)); + rc = smb_send2(ses->server, iov, n_vec, + (struct sockaddr *) &(ses->server->addr.sockAddr), + ses->server->noblocksnd); #ifdef CONFIG_CIFS_STATS2 atomic_dec(&ses->server->inSend); midQ->when_sent = jiffies; @@ -711,7 +729,8 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, atomic_inc(&ses->server->inSend); #endif rc = smb_send(ses->server->ssocket, in_buf, in_buf->smb_buf_length, - (struct sockaddr *) &(ses->server->addr.sockAddr)); + (struct sockaddr *) &(ses->server->addr.sockAddr), + ses->server->noblocksnd); #ifdef CONFIG_CIFS_STATS2 atomic_dec(&ses->server->inSend); midQ->when_sent = jiffies; @@ -851,7 +870,8 @@ send_nt_cancel(struct cifsTconInfo *tcon, struct smb_hdr *in_buf, return rc; } rc = smb_send(ses->server->ssocket, in_buf, in_buf->smb_buf_length, - (struct sockaddr *) &(ses->server->addr.sockAddr)); + (struct sockaddr *) &(ses->server->addr.sockAddr), + ses->server->noblocksnd); up(&ses->server->tcpSem); return rc; } @@ -941,7 +961,8 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon, atomic_inc(&ses->server->inSend); #endif rc = smb_send(ses->server->ssocket, in_buf, in_buf->smb_buf_length, - (struct sockaddr *) &(ses->server->addr.sockAddr)); + (struct sockaddr *) &(ses->server->addr.sockAddr), + ses->server->noblocksnd); #ifdef CONFIG_CIFS_STATS2 atomic_dec(&ses->server->inSend); midQ->when_sent = jiffies; -- cgit v1.2.3-18-g5258 From ae9b9403644f3ecc76867af042e7e1cfd5c099d0 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 30 Oct 2008 17:43:57 +0100 Subject: AMD IOMMU: fix detection of NP capable IOMMUs This patch changes the code to use IOMMU_CAP_NPCACHE as a shift and not as a mask. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 3b346c6f551..38e88d40ab1 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -50,7 +50,7 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, /* returns !0 if the IOMMU is caching non-present entries in its TLB */ static int iommu_has_npcache(struct amd_iommu *iommu) { - return iommu->cap & IOMMU_CAP_NPCACHE; + return iommu->cap & (1UL << IOMMU_CAP_NPCACHE); } /**************************************************************************** -- cgit v1.2.3-18-g5258 From 61de800d33af585cb7e6f27b5cdd51029c6855cb Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 30 Oct 2008 20:15:22 +0000 Subject: [CIFS] fix error in smb_send2 smb_send2 exit logic was strange, and with the previous change could cause us to fail large smb writes when all of the smb was not sent as one chunk. Acked-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifssmb.c | 2 +- fs/cifs/file.c | 2 +- fs/cifs/transport.c | 7 +++++-- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 843a85fb8b9..d5eac48fc41 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1536,7 +1536,7 @@ CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon, __u32 bytes_sent; __u16 byte_count; - /* cFYI(1,("write at %lld %d bytes",offset,count));*/ + /* cFYI(1, ("write at %lld %d bytes", offset, count));*/ if (tcon->ses == NULL) return -ECONNABORTED; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 62d8bd8f14c..ead1a3bb025 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1824,7 +1824,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, pTcon = cifs_sb->tcon; pagevec_init(&lru_pvec, 0); - cFYI(DBG2, ("rpages: num pages %d", num_pages)); + cFYI(DBG2, ("rpages: num pages %d", num_pages)); for (i = 0; i < num_pages; ) { unsigned contig_pages; struct page *tmp_page; diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index ba4d66644eb..ff8243a8fe3 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -290,8 +290,11 @@ smb_send2(struct TCP_Server_Info *server, struct kvec *iov, int n_vec, if (rc < 0) break; - if (rc >= total_len) { - WARN_ON(rc > total_len); + if (rc == total_len) { + total_len = 0; + break; + } else if (rc > total_len) { + cERROR(1, ("sent %d requested %d", rc, total_len)); break; } if (rc == 0) { -- cgit v1.2.3-18-g5258 From a1f64819fe9f136c98d572794a35a7e377c951ef Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Thu, 30 Oct 2008 01:41:56 +0100 Subject: firewire: struct device - replace bus_id with dev_name(), dev_set_name() Acked-by: Greg Kroah-Hartman Signed-off-by: Kay Sievers Signed-off-by: Stefan Richter --- drivers/firewire/fw-device.c | 14 ++++++-------- drivers/firewire/fw-ohci.c | 2 +- drivers/firewire/fw-sbp2.c | 2 +- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/firewire/fw-device.c b/drivers/firewire/fw-device.c index 3fccdd48410..6b9be42c7b9 100644 --- a/drivers/firewire/fw-device.c +++ b/drivers/firewire/fw-device.c @@ -587,8 +587,7 @@ static void create_units(struct fw_device *device) unit->device.bus = &fw_bus_type; unit->device.type = &fw_unit_type; unit->device.parent = &device->device; - snprintf(unit->device.bus_id, sizeof(unit->device.bus_id), - "%s.%d", device->device.bus_id, i++); + dev_set_name(&unit->device, "%s.%d", dev_name(&device->device), i++); init_fw_attribute_group(&unit->device, fw_unit_attributes, @@ -711,8 +710,7 @@ static void fw_device_init(struct work_struct *work) device->device.type = &fw_device_type; device->device.parent = device->card->device; device->device.devt = MKDEV(fw_cdev_major, minor); - snprintf(device->device.bus_id, sizeof(device->device.bus_id), - "fw%d", minor); + dev_set_name(&device->device, "fw%d", minor); init_fw_attribute_group(&device->device, fw_device_attributes, @@ -741,13 +739,13 @@ static void fw_device_init(struct work_struct *work) if (device->config_rom_retries) fw_notify("created device %s: GUID %08x%08x, S%d00, " "%d config ROM retries\n", - device->device.bus_id, + dev_name(&device->device), device->config_rom[3], device->config_rom[4], 1 << device->max_speed, device->config_rom_retries); else fw_notify("created device %s: GUID %08x%08x, S%d00\n", - device->device.bus_id, + dev_name(&device->device), device->config_rom[3], device->config_rom[4], 1 << device->max_speed); device->config_rom_retries = 0; @@ -883,12 +881,12 @@ static void fw_device_refresh(struct work_struct *work) FW_DEVICE_RUNNING) == FW_DEVICE_SHUTDOWN) goto gone; - fw_notify("refreshed device %s\n", device->device.bus_id); + fw_notify("refreshed device %s\n", dev_name(&device->device)); device->config_rom_retries = 0; goto out; give_up: - fw_notify("giving up on refresh of device %s\n", device->device.bus_id); + fw_notify("giving up on refresh of device %s\n", dev_name(&device->device)); gone: atomic_set(&device->state, FW_DEVICE_SHUTDOWN); fw_device_shutdown(work); diff --git a/drivers/firewire/fw-ohci.c b/drivers/firewire/fw-ohci.c index 8e16bfbdcb3..46610b09041 100644 --- a/drivers/firewire/fw-ohci.c +++ b/drivers/firewire/fw-ohci.c @@ -2468,7 +2468,7 @@ pci_probe(struct pci_dev *dev, const struct pci_device_id *ent) goto fail_self_id; fw_notify("Added fw-ohci device %s, OHCI version %x.%x\n", - dev->dev.bus_id, version >> 16, version & 0xff); + dev_name(&dev->dev), version >> 16, version & 0xff); return 0; fail_self_id: diff --git a/drivers/firewire/fw-sbp2.c b/drivers/firewire/fw-sbp2.c index d334cac5e1f..97df6dac3a8 100644 --- a/drivers/firewire/fw-sbp2.c +++ b/drivers/firewire/fw-sbp2.c @@ -1135,7 +1135,7 @@ static int sbp2_probe(struct device *dev) tgt->unit = unit; kref_init(&tgt->kref); INIT_LIST_HEAD(&tgt->lu_list); - tgt->bus_id = unit->device.bus_id; + tgt->bus_id = dev_name(&unit->device); tgt->guid = (u64)device->config_rom[3] << 32 | device->config_rom[4]; if (fw_device_enable_phys_dma(device) < 0) -- cgit v1.2.3-18-g5258 From 233976e539a93de1320fc7625b24076b1f9e2c9c Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Thu, 30 Oct 2008 01:49:20 +0100 Subject: ieee1394: struct device - replace bus_id with dev_name(), dev_set_name() Acked-by: Greg Kroah-Hartman Signed-off-by: Kay Sievers Signed-off-by: Stefan Richter --- drivers/ieee1394/hosts.c | 4 ++-- drivers/ieee1394/nodemgr.c | 14 +++++--------- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/drivers/ieee1394/hosts.c b/drivers/ieee1394/hosts.c index 8dd09d85041..237d0c9d69c 100644 --- a/drivers/ieee1394/hosts.c +++ b/drivers/ieee1394/hosts.c @@ -155,11 +155,11 @@ struct hpsb_host *hpsb_alloc_host(struct hpsb_host_driver *drv, size_t extra, memcpy(&h->device, &nodemgr_dev_template_host, sizeof(h->device)); h->device.parent = dev; set_dev_node(&h->device, dev_to_node(dev)); - snprintf(h->device.bus_id, BUS_ID_SIZE, "fw-host%d", h->id); + dev_set_name(&h->device, "fw-host%d", h->id); h->host_dev.parent = &h->device; h->host_dev.class = &hpsb_host_class; - snprintf(h->host_dev.bus_id, BUS_ID_SIZE, "fw-host%d", h->id); + dev_set_name(&h->host_dev, "fw-host%d", h->id); if (device_register(&h->device)) goto fail; diff --git a/drivers/ieee1394/nodemgr.c b/drivers/ieee1394/nodemgr.c index 2376b729e87..9e39f73282e 100644 --- a/drivers/ieee1394/nodemgr.c +++ b/drivers/ieee1394/nodemgr.c @@ -826,13 +826,11 @@ static struct node_entry *nodemgr_create_node(octlet_t guid, memcpy(&ne->device, &nodemgr_dev_template_ne, sizeof(ne->device)); ne->device.parent = &host->device; - snprintf(ne->device.bus_id, BUS_ID_SIZE, "%016Lx", - (unsigned long long)(ne->guid)); + dev_set_name(&ne->device, "%016Lx", (unsigned long long)(ne->guid)); ne->node_dev.parent = &ne->device; ne->node_dev.class = &nodemgr_ne_class; - snprintf(ne->node_dev.bus_id, BUS_ID_SIZE, "%016Lx", - (unsigned long long)(ne->guid)); + dev_set_name(&ne->node_dev, "%016Lx", (unsigned long long)(ne->guid)); if (device_register(&ne->device)) goto fail_devreg; @@ -932,13 +930,11 @@ static void nodemgr_register_device(struct node_entry *ne, ud->device.parent = parent; - snprintf(ud->device.bus_id, BUS_ID_SIZE, "%s-%u", - ne->device.bus_id, ud->id); + dev_set_name(&ud->device, "%s-%u", dev_name(&ne->device), ud->id); ud->unit_dev.parent = &ud->device; ud->unit_dev.class = &nodemgr_ud_class; - snprintf(ud->unit_dev.bus_id, BUS_ID_SIZE, "%s-%u", - ne->device.bus_id, ud->id); + dev_set_name(&ud->unit_dev, "%s-%u", dev_name(&ne->device), ud->id); if (device_register(&ud->device)) goto fail_devreg; @@ -953,7 +949,7 @@ static void nodemgr_register_device(struct node_entry *ne, fail_classdevreg: device_unregister(&ud->device); fail_devreg: - HPSB_ERR("Failed to create unit %s", ud->device.bus_id); + HPSB_ERR("Failed to create unit %s", dev_name(&ud->device)); } -- cgit v1.2.3-18-g5258 From 638570b54346f140bc09b986d93e76025d35180f Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Sun, 26 Oct 2008 12:03:37 +0100 Subject: ieee1394: raw1394: fix possible deadlock in multithreaded clients Regression in 2.6.28-rc1: When I added the new state_mutex which prevents corruption of raw1394's internal state when accessed by multithreaded client applications, the following possible though highly unlikely deadlock slipped in: Thread A: Thread B: - acquire mmap_sem - raw1394_write() or raw1394_ioctl() - raw1394_mmap() - acquire state_mutex - acquire state_mutex - copy_to/from_user(), possible page fault: acquire mmap_sem The simplest fix is to use mutex_trylock() instead of mutex_lock() in raw1394_mmap(). This changes the behavior under contention in a way which is visible to userspace clients. However, since multithreaded access was entirely buggy before state_mutex was added and libraw1394's documentation advised application programmers to use a handle only in a single thread, this change in behaviour should not be an issue in practice at all. Since we have to use mutex_trylock() in raw1394_mmap() regardless whether /dev/raw1394 was opened with O_NONBLOCK or not, we now use mutex_trylock() unconditionally everywhere for state_mutex, just to have consistent behavior. Reported-by: Johannes Weiner Signed-off-by: Stefan Richter --- drivers/ieee1394/raw1394.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/ieee1394/raw1394.c b/drivers/ieee1394/raw1394.c index 2cf4ae75bec..4bdfff0a919 100644 --- a/drivers/ieee1394/raw1394.c +++ b/drivers/ieee1394/raw1394.c @@ -2268,7 +2268,8 @@ static ssize_t raw1394_write(struct file *file, const char __user * buffer, return -EFAULT; } - mutex_lock(&fi->state_mutex); + if (!mutex_trylock(&fi->state_mutex)) + return -EAGAIN; switch (fi->state) { case opened: @@ -2548,7 +2549,8 @@ static int raw1394_mmap(struct file *file, struct vm_area_struct *vma) struct file_info *fi = file->private_data; int ret; - mutex_lock(&fi->state_mutex); + if (!mutex_trylock(&fi->state_mutex)) + return -EAGAIN; if (fi->iso_state == RAW1394_ISO_INACTIVE) ret = -EINVAL; @@ -2669,7 +2671,8 @@ static long raw1394_ioctl(struct file *file, unsigned int cmd, break; } - mutex_lock(&fi->state_mutex); + if (!mutex_trylock(&fi->state_mutex)) + return -EAGAIN; switch (fi->iso_state) { case RAW1394_ISO_INACTIVE: -- cgit v1.2.3-18-g5258 From 8449fc3ae58bf8ee5acbd2280754cde67b5db128 Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Sun, 26 Oct 2008 12:02:03 +0100 Subject: ieee1394: dv1394: fix possible deadlock in multithreaded clients Fix a possible though highly unlikely deadlock: Thread A: Thread B: - acquire mmap_sem - dv1394_ioctl/read/write() - dv1394_mmap() - acquire video->mtx - acquire video->mtx - copy_to/from_user(), possible page fault: acquire mmap_sem The simplest fix is to use mutex_trylock() instead of mutex_lock() in dv1394_mmap(). This changes the behavior under contention in a way which is visible to userspace clients. However, my guess is that no clients exist which use mmap vs. ioctl/read/write on the dv1394 character device file interface in concurrent threads. Reported-by: Johannes Weiner Signed-off-by: Stefan Richter --- drivers/ieee1394/dv1394.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/ieee1394/dv1394.c b/drivers/ieee1394/dv1394.c index df70f51279d..53329972c7d 100644 --- a/drivers/ieee1394/dv1394.c +++ b/drivers/ieee1394/dv1394.c @@ -1270,8 +1270,14 @@ static int dv1394_mmap(struct file *file, struct vm_area_struct *vma) struct video_card *video = file_to_video_card(file); int retval = -EINVAL; - /* serialize mmap */ - mutex_lock(&video->mtx); + /* + * We cannot use the blocking variant mutex_lock here because .mmap + * is called with mmap_sem held, while .ioctl, .read, .write acquire + * video->mtx and subsequently call copy_to/from_user which will + * grab mmap_sem in case of a page fault. + */ + if (!mutex_trylock(&video->mtx)) + return -EAGAIN; if ( ! video_card_initialized(video) ) { retval = do_dv1394_init_default(video); -- cgit v1.2.3-18-g5258 From b27cf88e9592953ae292d05324887f2f44979433 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 31 Oct 2008 14:52:24 +0000 Subject: [JFFS2] Fix lack of locking in thread_should_wake() The thread_should_wake() function trawls through the list of 'very dirty' eraseblocks, determining whether the background GC thread should wake. Doing this without holding the appropriate locks is a bad idea. OLPC Trac #8615 Signed-off-by: David Woodhouse Cc: stable@kernel.org --- fs/jffs2/background.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/jffs2/background.c b/fs/jffs2/background.c index 8adebd3e43c..3cceef4ad2b 100644 --- a/fs/jffs2/background.c +++ b/fs/jffs2/background.c @@ -85,15 +85,15 @@ static int jffs2_garbage_collect_thread(void *_c) for (;;) { allow_signal(SIGHUP); again: + spin_lock(&c->erase_completion_lock); if (!jffs2_thread_should_wake(c)) { set_current_state (TASK_INTERRUPTIBLE); + spin_unlock(&c->erase_completion_lock); D1(printk(KERN_DEBUG "jffs2_garbage_collect_thread sleeping...\n")); - /* Yes, there's a race here; we checked jffs2_thread_should_wake() - before setting current->state to TASK_INTERRUPTIBLE. But it doesn't - matter - We don't care if we miss a wakeup, because the GC thread - is only an optimisation anyway. */ schedule(); - } + } else + spin_unlock(&c->erase_completion_lock); + /* This thread is purely an optimisation. But if it runs when other things could be running, it actually makes things a -- cgit v1.2.3-18-g5258 From 22cffe494b6b5773b44fe8bb8f41b471c1734b53 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Fri, 31 Oct 2008 16:09:47 +0100 Subject: Revert "Cell OProfile: Incorrect local array size in activate spu profiling function" This reverts commit fa448d6008cc81a3537e5db168fa0490e0caba68. --- arch/powerpc/oprofile/op_model_cell.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/oprofile/op_model_cell.c b/arch/powerpc/oprofile/op_model_cell.c index 6b2d974880b..35141a8bc3d 100644 --- a/arch/powerpc/oprofile/op_model_cell.c +++ b/arch/powerpc/oprofile/op_model_cell.c @@ -830,7 +830,7 @@ static int calculate_lfsr(int n) static int pm_rtas_activate_spu_profiling(u32 node) { int ret, i; - struct pm_signal pm_signal_local[NUM_SPUS_PER_NODE]; + struct pm_signal pm_signal_local[NR_PHYS_CTRS]; /* * Set up the rtas call to configure the debug bus to -- cgit v1.2.3-18-g5258 From 99219b4f3e0772a11fc0d98213b00e89fee7b049 Mon Sep 17 00:00:00 2001 From: Carl Love Date: Wed, 29 Oct 2008 08:06:45 -0700 Subject: Cell OProfile: Incorrect local array size in activate spu profiling function Updated the patch to address comments by Michael Ellerman. Specifically, changed upper limit in for loop to ARRAY_SIZE() macro and added a check to make sure the number of events specified by the user, which is used as the max for indexing various arrays, is no bigger then the declared size of the arrays. The size of the pm_signal_local array should be equal to the number of SPUs being configured in the array. Currently, the array is of size 4 (NR_PHYS_CTRS) but being indexed by a for loop from 0 to 7 (NUM_SPUS_PER_NODE). Signed-off-by: Carl Love --- arch/powerpc/oprofile/op_model_cell.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/oprofile/op_model_cell.c b/arch/powerpc/oprofile/op_model_cell.c index 35141a8bc3d..25a4ec2514a 100644 --- a/arch/powerpc/oprofile/op_model_cell.c +++ b/arch/powerpc/oprofile/op_model_cell.c @@ -582,6 +582,13 @@ static int cell_reg_setup(struct op_counter_config *ctr, num_counters = num_ctrs; + if (unlikely(num_ctrs > NR_PHYS_CTRS)) { + printk(KERN_ERR + "%s: Oprofile, number of specified events " \ + "exceeds number of physical counters\n", + __func__); + return -EIO; + } pm_regs.group_control = 0; pm_regs.debug_bus_control = 0; @@ -830,13 +837,13 @@ static int calculate_lfsr(int n) static int pm_rtas_activate_spu_profiling(u32 node) { int ret, i; - struct pm_signal pm_signal_local[NR_PHYS_CTRS]; + struct pm_signal pm_signal_local[NUM_SPUS_PER_NODE]; /* * Set up the rtas call to configure the debug bus to * route the SPU PCs. Setup the pm_signal for each SPU */ - for (i = 0; i < NUM_SPUS_PER_NODE; i++) { + for (i = 0; i < ARRAY_SIZE(pm_signal_local); i++) { pm_signal_local[i].cpu = node; pm_signal_local[i].signal_group = 41; /* spu i on word (i/2) */ @@ -848,7 +855,7 @@ static int pm_rtas_activate_spu_profiling(u32 node) ret = rtas_ibm_cbe_perftools(SUBFUNC_ACTIVATE, PASSTHRU_ENABLE, pm_signal_local, - (NUM_SPUS_PER_NODE + (ARRAY_SIZE(pm_signal_local) * sizeof(struct pm_signal))); if (unlikely(ret)) { -- cgit v1.2.3-18-g5258 From 70d9d15833864e7120c3ffcfdbd6fa61f5f9726a Mon Sep 17 00:00:00 2001 From: Will Newton Date: Tue, 28 Oct 2008 10:52:36 +0000 Subject: drivers/net/smc911x.c: Fix lockdep warning on xmit. dev_kfree_skb should not be called with irqs disabled, use dev_kfree_skb_irq instead. The warning caused looks like this: ====================================================== [ INFO: hard-safe -> hard-unsafe lock order detected ] 2.6.28-rc1 #273 ------------------------------------------------------ swapper/0 [HC0[0]:SC1[2]:HE0:SE0] is trying to acquire: (clock-AF_INET){-..+}, at: [<4015c17c>] _sock_def_write_space+0x28/0xd8 and this task is already holding: (&lp->lock){++..}, at: [<4013f230>] _smc911x_hard_start_xmit+0x30/0x4b8 which would create a new lock dependency: (&lp->lock){++..} -> (clock-AF_INET){-..+} Signed-off-by: Will Newton Signed-off-by: Jeff Garzik --- drivers/net/smc911x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/smc911x.c b/drivers/net/smc911x.c index f59c7772f34..5051554ff05 100644 --- a/drivers/net/smc911x.c +++ b/drivers/net/smc911x.c @@ -499,7 +499,7 @@ static void smc911x_hardware_send_pkt(struct net_device *dev) #else SMC_PUSH_DATA(lp, buf, len); dev->trans_start = jiffies; - dev_kfree_skb(skb); + dev_kfree_skb_irq(skb); #endif if (!lp->tx_throttle) { netif_wake_queue(dev); -- cgit v1.2.3-18-g5258 From e219cca082f52e7dfea41f3be264b7b5eb204227 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 6 Nov 2008 22:37:59 -0500 Subject: jbd: don't give up looking for space so easily in __log_wait_for_space Commit be07c4ed introducd a regression because it assumed that if there were no transactions ready to be checkpointed, that no progress could be made on making space available in the journal, and so the journal should be aborted. This assumption is false; it could be the case that simply calling cleanup_journal_tail() will recover the necessary space, or, for small journals, the currently committing transaction could be responsible for chewing up the required space in the log, so we need to wait for the currently committing transaction to finish before trying to force a checkpoint operation. This patch fixes the bug reported by Meelis Roos at: http://bugzilla.kernel.org/show_bug.cgi?id=11937 Signed-off-by: "Theodore Ts'o" Cc: Duane Griffin Cc: Toshiyuki Okajima --- fs/jbd/checkpoint.c | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c index 1bd8d4acc6f..61f32f3868c 100644 --- a/fs/jbd/checkpoint.c +++ b/fs/jbd/checkpoint.c @@ -115,7 +115,7 @@ static int __try_to_free_cp_buf(struct journal_head *jh) */ void __log_wait_for_space(journal_t *journal) { - int nblocks; + int nblocks, space_left; assert_spin_locked(&journal->j_state_lock); nblocks = jbd_space_needed(journal); @@ -128,25 +128,42 @@ void __log_wait_for_space(journal_t *journal) /* * Test again, another process may have checkpointed while we * were waiting for the checkpoint lock. If there are no - * outstanding transactions there is nothing to checkpoint and - * we can't make progress. Abort the journal in this case. + * transactions ready to be checkpointed, try to recover + * journal space by calling cleanup_journal_tail(), and if + * that doesn't work, by waiting for the currently committing + * transaction to complete. If there is absolutely no way + * to make progress, this is either a BUG or corrupted + * filesystem, so abort the journal and leave a stack + * trace for forensic evidence. */ spin_lock(&journal->j_state_lock); spin_lock(&journal->j_list_lock); nblocks = jbd_space_needed(journal); - if (__log_space_left(journal) < nblocks) { + space_left = __log_space_left(journal); + if (space_left < nblocks) { int chkpt = journal->j_checkpoint_transactions != NULL; + tid_t tid = 0; + if (journal->j_committing_transaction) + tid = journal->j_committing_transaction->t_tid; spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_state_lock); if (chkpt) { log_do_checkpoint(journal); + } else if (cleanup_journal_tail(journal) == 0) { + /* We were able to recover space; yay! */ + ; + } else if (tid) { + log_wait_commit(journal, tid); } else { - printk(KERN_ERR "%s: no transactions\n", - __func__); + printk(KERN_ERR "%s: needed %d blocks and " + "only had %d space available\n", + __func__, nblocks, space_left); + printk(KERN_ERR "%s: no way to get more " + "journal space\n", __func__); + WARN_ON(1); journal_abort(journal, 0); } - spin_lock(&journal->j_state_lock); } else { spin_unlock(&journal->j_list_lock); -- cgit v1.2.3-18-g5258 From 8c3f25d8950c3e9fe6c9849f88679b3f2a071550 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 6 Nov 2008 22:38:07 -0500 Subject: jbd2: don't give up looking for space so easily in __jbd2_log_wait_for_space MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 23f8b79e introducd a regression because it assumed that if there were no transactions ready to be checkpointed, that no progress could be made on making space available in the journal, and so the journal should be aborted. This assumption is false; it could be the case that simply calling jbd2_cleanup_journal_tail() will recover the necessary space, or, for small journals, the currently committing transaction could be responsible for chewing up the required space in the log, so we need to wait for the currently committing transaction to finish before trying to force a checkpoint operation. This patch fixes a bug reported by Mihai Harpau at: https://bugzilla.redhat.com/show_bug.cgi?id=469582 This patch fixes a bug reported by François Valenduc at: http://bugzilla.kernel.org/show_bug.cgi?id=11840 Signed-off-by: "Theodore Ts'o" Cc: Duane Griffin Cc: Toshiyuki Okajima --- fs/jbd2/checkpoint.c | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 9203c3332f1..9497718fe92 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -116,7 +116,7 @@ static int __try_to_free_cp_buf(struct journal_head *jh) */ void __jbd2_log_wait_for_space(journal_t *journal) { - int nblocks; + int nblocks, space_left; assert_spin_locked(&journal->j_state_lock); nblocks = jbd_space_needed(journal); @@ -129,25 +129,43 @@ void __jbd2_log_wait_for_space(journal_t *journal) /* * Test again, another process may have checkpointed while we * were waiting for the checkpoint lock. If there are no - * outstanding transactions there is nothing to checkpoint and - * we can't make progress. Abort the journal in this case. + * transactions ready to be checkpointed, try to recover + * journal space by calling cleanup_journal_tail(), and if + * that doesn't work, by waiting for the currently committing + * transaction to complete. If there is absolutely no way + * to make progress, this is either a BUG or corrupted + * filesystem, so abort the journal and leave a stack + * trace for forensic evidence. */ spin_lock(&journal->j_state_lock); spin_lock(&journal->j_list_lock); nblocks = jbd_space_needed(journal); - if (__jbd2_log_space_left(journal) < nblocks) { + space_left = __jbd2_log_space_left(journal); + if (space_left < nblocks) { int chkpt = journal->j_checkpoint_transactions != NULL; + tid_t tid = 0; + if (journal->j_committing_transaction) + tid = journal->j_committing_transaction->t_tid; spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_state_lock); if (chkpt) { jbd2_log_do_checkpoint(journal); + } else if (jbd2_cleanup_journal_tail(journal) == 0) { + /* We were able to recover space; yay! */ + ; + } else if (tid) { + jbd2_log_wait_commit(journal, tid); } else { - printk(KERN_ERR "%s: no transactions\n", - __func__); + printk(KERN_ERR "%s: needed %d blocks and " + "only had %d space available\n", + __func__, nblocks, space_left); + printk(KERN_ERR "%s: no way to get more " + "journal space in %s\n", __func__, + journal->j_devname); + WARN_ON(1); jbd2_journal_abort(journal, 0); } - spin_lock(&journal->j_state_lock); } else { spin_unlock(&journal->j_list_lock); -- cgit v1.2.3-18-g5258 From 2423840ded13e6d3b52d88aff8d033bb78fafd08 Mon Sep 17 00:00:00 2001 From: Sami Liedes Date: Sun, 2 Nov 2008 19:23:30 -0500 Subject: jbd2: deregister proc on failure in jbd2_journal_init_inode jbd2_journal_init_inode() does not call jbd2_stats_proc_exit() on all failure paths after calling jbd2_stats_proc_init(). This leaves dangling references to the fs in proc. This patch fixes a bug reported by Sami Leides at: http://bugzilla.kernel.org/show_bug.cgi?id=11493 Signed-off-by: Sami Liedes Signed-off-by: "Theodore Ts'o" --- fs/jbd2/journal.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 783de118de9..e70d657a19f 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1089,6 +1089,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode) if (!journal->j_wbuf) { printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", __func__); + jbd2_stats_proc_exit(journal); kfree(journal); return NULL; } @@ -1098,6 +1099,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode) if (err) { printk(KERN_ERR "%s: Cannnot locate journal superblock\n", __func__); + jbd2_stats_proc_exit(journal); kfree(journal); return NULL; } -- cgit v1.2.3-18-g5258 From f26ba1751145edbf52b2c89a40e389f2fbdfc1af Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sun, 2 Nov 2008 16:11:01 +0000 Subject: udp: Fix the SNMP counter of UDP_MIB_INDATAGRAMS If UDP echo is sent to xinetd/echo-dgram, the UDP reply will be received at the sender. But the SNMP counter of UDP_MIB_INDATAGRAMS will be not increased, UDP6_MIB_INDATAGRAMS will be increased instead. Endpoint A Endpoint B UDP Echo request -----------> (IPv4, Dst port=7) <---------- UDP Echo Reply (IPv4, Src port=7) This bug is come from this patch cb75994ec311b2cd50e5205efdcc0696abd6675d. It do counter UDP[6]_MIB_INDATAGRAMS until udp[v6]_recvmsg. Because xinetd used IPv6 socket to receive UDP messages, thus, when received UDP packet, the UDP6_MIB_INDATAGRAMS will be increased in function udpv6_recvmsg() even if the packet is a IPv4 UDP packet. This patch fixed the problem. Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/ipv6/udp.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 71e259e866a..18696af106d 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -138,6 +138,7 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, int peeked; int err; int is_udplite = IS_UDPLITE(sk); + int is_udp4; if (addr_len) *addr_len=sizeof(struct sockaddr_in6); @@ -158,6 +159,8 @@ try_again: else if (copied < ulen) msg->msg_flags |= MSG_TRUNC; + is_udp4 = (skb->protocol == htons(ETH_P_IP)); + /* * If checksum is needed at all, try to do it while copying the * data. If the data is truncated, or if we only want a partial @@ -180,9 +183,14 @@ try_again: if (err) goto out_free; - if (!peeked) - UDP6_INC_STATS_USER(sock_net(sk), - UDP_MIB_INDATAGRAMS, is_udplite); + if (!peeked) { + if (is_udp4) + UDP_INC_STATS_USER(sock_net(sk), + UDP_MIB_INDATAGRAMS, is_udplite); + else + UDP6_INC_STATS_USER(sock_net(sk), + UDP_MIB_INDATAGRAMS, is_udplite); + } sock_recv_timestamp(msg, sk, skb); @@ -196,7 +204,7 @@ try_again: sin6->sin6_flowinfo = 0; sin6->sin6_scope_id = 0; - if (skb->protocol == htons(ETH_P_IP)) + if (is_udp4) ipv6_addr_set(&sin6->sin6_addr, 0, 0, htonl(0xffff), ip_hdr(skb)->saddr); else { @@ -207,7 +215,7 @@ try_again: } } - if (skb->protocol == htons(ETH_P_IP)) { + if (is_udp4) { if (inet->cmsg_flags) ip_cmsg_recv(msg, skb); } else { -- cgit v1.2.3-18-g5258 From 0856f93958c488f0cc656be53c26dfd20663bdb3 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sun, 2 Nov 2008 16:14:27 +0000 Subject: udp: Fix the SNMP counter of UDP_MIB_INERRORS UDP packets received in udpv6_recvmsg() are not only IPv6 UDP packets, but also have IPv4 UDP packets, so when do the counter of UDP_MIB_INERRORS in udpv6_recvmsg(), we should check whether the packet is a IPv6 UDP packet or a IPv4 UDP packet. Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/ipv6/udp.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 18696af106d..8b48512ebf6 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -236,8 +236,14 @@ out: csum_copy_err: lock_sock(sk); - if (!skb_kill_datagram(sk, skb, flags)) - UDP6_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite); + if (!skb_kill_datagram(sk, skb, flags)) { + if (is_udp4) + UDP_INC_STATS_USER(sock_net(sk), + UDP_MIB_INERRORS, is_udplite); + else + UDP6_INC_STATS_USER(sock_net(sk), + UDP_MIB_INERRORS, is_udplite); + } release_sock(sk); if (flags & MSG_DONTWAIT) -- cgit v1.2.3-18-g5258 From 55c8eb6c8eaa5009eed1557b296da5d4ea9c369a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 3 Nov 2008 00:04:24 -0800 Subject: SMC91x: Fix compilation on some platforms. This reverts 51ac3beffd4afaea4350526cf01fe74aaff25eff ('SMC91x: delete unused local variable "lp"') and adds __maybe_unused markers to these (potentially) unused variables. The issue is that in some configurations SMC_IO_SHIFT evaluates to '(lp->io_shift)', but in some others it's plain '0'. Based upon a build failure report from Manuel Lauss. Signed-off-by: David S. Miller --- drivers/net/smc91x.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/smc91x.c b/drivers/net/smc91x.c index 6f9895d4e5b..fc80f250da3 100644 --- a/drivers/net/smc91x.c +++ b/drivers/net/smc91x.c @@ -2060,6 +2060,7 @@ static int smc_request_attrib(struct platform_device *pdev, struct net_device *ndev) { struct resource * res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "smc91x-attrib"); + struct smc_local *lp __maybe_unused = netdev_priv(ndev); if (!res) return 0; @@ -2074,6 +2075,7 @@ static void smc_release_attrib(struct platform_device *pdev, struct net_device *ndev) { struct resource * res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "smc91x-attrib"); + struct smc_local *lp __maybe_unused = netdev_priv(ndev); if (res) release_mem_region(res->start, ATTRIB_SIZE); -- cgit v1.2.3-18-g5258 From a1caa32295d67284ecba18cd8db692c7166f0706 Mon Sep 17 00:00:00 2001 From: Arnaud Ebalard Date: Mon, 3 Nov 2008 01:30:23 -0800 Subject: XFRM: copy_to_user_kmaddress() reports local address twice While adding support for MIGRATE/KMADDRESS in strongSwan (as specified in draft-ebalard-mext-pfkey-enhanced-migrate-00), Andreas Steffen noticed that XFRMA_KMADDRESS attribute passed to userland contains the local address twice (remote provides local address instead of remote one). This bug in copy_to_user_kmaddress() affects only key managers that use native XFRM interface (key managers that use PF_KEY are not affected). For the record, the bug was in the initial changeset I posted which added support for KMADDRESS (13c1d18931ebb5cf407cb348ef2cd6284d68902d 'xfrm: MIGRATE enhancements (draft-ebalard-mext-pfkey-enhanced-migrate)'). Signed-off-by: Arnaud Ebalard Reported-by: Andreas Steffen Signed-off-by: David S. Miller --- net/xfrm/xfrm_user.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 4a8a1abb59e..a278a6f3b99 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1816,7 +1816,7 @@ static int copy_to_user_kmaddress(struct xfrm_kmaddress *k, struct sk_buff *skb) uk.family = k->family; uk.reserved = k->reserved; memcpy(&uk.local, &k->local, sizeof(uk.local)); - memcpy(&uk.remote, &k->local, sizeof(uk.remote)); + memcpy(&uk.remote, &k->remote, sizeof(uk.remote)); return nla_put(skb, XFRMA_KMADDRESS, sizeof(uk), &uk); } -- cgit v1.2.3-18-g5258 From 73557af5bf32c3db973050de1fb73423e8fc873e Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Fri, 31 Oct 2008 13:59:49 -0400 Subject: x86, voyager: fix smp_intr_init() compile breakage Impact: fix x86/Voyager build Looks like this became static on the rest of x86. Fix it up by adding an external definition to mach-voyager/setup.c Signed-off-by: Ingo Molnar --- arch/x86/include/asm/voyager.h | 1 + arch/x86/mach-voyager/setup.c | 2 +- arch/x86/mach-voyager/voyager_smp.c | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/voyager.h b/arch/x86/include/asm/voyager.h index 9c811d2e6f9..b3e64730762 100644 --- a/arch/x86/include/asm/voyager.h +++ b/arch/x86/include/asm/voyager.h @@ -520,6 +520,7 @@ extern void voyager_restart(void); extern void voyager_cat_power_off(void); extern void voyager_cat_do_common_interrupt(void); extern void voyager_handle_nmi(void); +extern void voyager_smp_intr_init(void); /* Commands for the following are */ #define VOYAGER_PSI_READ 0 #define VOYAGER_PSI_WRITE 1 diff --git a/arch/x86/mach-voyager/setup.c b/arch/x86/mach-voyager/setup.c index 6bbdd633864..a580b9562e7 100644 --- a/arch/x86/mach-voyager/setup.c +++ b/arch/x86/mach-voyager/setup.c @@ -27,7 +27,7 @@ static struct irqaction irq2 = { void __init intr_init_hook(void) { #ifdef CONFIG_SMP - smp_intr_init(); + voyager_smp_intr_init(); #endif setup_irq(2, &irq2); diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index 7f4c6af1435..0e331652681 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c @@ -1258,7 +1258,7 @@ static void handle_vic_irq(unsigned int irq, struct irq_desc *desc) #define QIC_SET_GATE(cpi, vector) \ set_intr_gate((cpi) + QIC_DEFAULT_CPI_BASE, (vector)) -void __init smp_intr_init(void) +void __init voyager_smp_intr_init(void) { int i; -- cgit v1.2.3-18-g5258 From ae6884a9da56f8921e432e663b4ccb4a1851b2ea Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 3 Nov 2008 14:05:08 -0500 Subject: cifs: fix renaming one hardlink on top of another cifs: fix renaming one hardlink on top of another POSIX says that renaming one hardlink on top of another to the same inode is a no-op. We had the logic mostly right, but forgot to clear the return code. Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/inode.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index d54fa8aeaea..ff8c68de4a9 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1361,9 +1361,11 @@ int cifs_rename(struct inode *source_dir, struct dentry *source_dentry, CIFS_MOUNT_MAP_SPECIAL_CHR); if (tmprc == 0 && (info_buf_source->UniqueId == - info_buf_target->UniqueId)) + info_buf_target->UniqueId)) { /* same file, POSIX says that this is a noop */ + rc = 0; goto cifs_rename_exit; + } } /* else ... BB we could add the same check for Windows by checking the UniqueId via FILE_INTERNAL_INFO */ -- cgit v1.2.3-18-g5258 From 7385d595751874854a6729fbaaa7f793480bbb67 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sun, 2 Nov 2008 17:49:59 +0300 Subject: fs_enet: fix polling 1. compile fix for irqreturn_t type change 2. restore ->poll_controller after CONFIG_PPC_CPM_NEW_BINDING transition Signed-off-by: Alexey Dobriyan Signed-off-by: Jeff Garzik --- drivers/net/fs_enet/fs_enet-main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/fs_enet/fs_enet-main.c b/drivers/net/fs_enet/fs_enet-main.c index cb51c1fb033..a6f49d02578 100644 --- a/drivers/net/fs_enet/fs_enet-main.c +++ b/drivers/net/fs_enet/fs_enet-main.c @@ -1099,7 +1099,9 @@ static int __devinit fs_enet_probe(struct of_device *ofdev, ndev->stop = fs_enet_close; ndev->get_stats = fs_enet_get_stats; ndev->set_multicast_list = fs_set_multicast_list; - +#ifdef CONFIG_NET_POLL_CONTROLLER + ndev->poll_controller = fs_enet_netpoll; +#endif if (fpi->use_napi) netif_napi_add(ndev, &fep->napi, fs_enet_rx_napi, fpi->napi_weight); @@ -1209,7 +1211,7 @@ static void __exit fs_cleanup(void) static void fs_enet_netpoll(struct net_device *dev) { disable_irq(dev->irq); - fs_enet_interrupt(dev->irq, dev, NULL); + fs_enet_interrupt(dev->irq, dev); enable_irq(dev->irq); } #endif -- cgit v1.2.3-18-g5258 From 1d19ecfc65ed01bac7a58f83004057ad704ee7cc Mon Sep 17 00:00:00 2001 From: Jeff Kirsher Date: Sun, 2 Nov 2008 20:30:33 -0800 Subject: net: kconfig cleanup The bool kconfig option added to ixgbe and myri10ge for DCA is ambigous, so this patch adds a description to the kconfig option. Signed-off-by: Jeff Kirsher Signed-off-by: Jeff Garzik --- drivers/net/Kconfig | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index f749b40f954..11f143f4adf 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -2010,9 +2010,13 @@ config IGB_LRO If in doubt, say N. config IGB_DCA - bool "Enable DCA" + bool "Direct Cache Access (DCA) Support" default y depends on IGB && DCA && !(IGB=y && DCA=m) + ---help--- + Say Y here if you want to use Direct Cache Access (DCA) in the + driver. DCA is a method for warming the CPU cache before data + is used, with the intent of lessening the impact of cache misses. source "drivers/net/ixp2000/Kconfig" @@ -2437,9 +2441,13 @@ config IXGBE will be called ixgbe. config IXGBE_DCA - bool + bool "Direct Cache Access (DCA) Support" default y depends on IXGBE && DCA && !(IXGBE=y && DCA=m) + ---help--- + Say Y here if you want to use Direct Cache Access (DCA) in the + driver. DCA is a method for warming the CPU cache before data + is used, with the intent of lessening the impact of cache misses. config IXGB tristate "Intel(R) PRO/10GbE support" @@ -2489,9 +2497,13 @@ config MYRI10GE will be called myri10ge. config MYRI10GE_DCA - bool + bool "Direct Cache Access (DCA) Support" default y depends on MYRI10GE && DCA && !(MYRI10GE=y && DCA=m) + ---help--- + Say Y here if you want to use Direct Cache Access (DCA) in the + driver. DCA is a method for warming the CPU cache before data + is used, with the intent of lessening the impact of cache misses. config NETXEN_NIC tristate "NetXen Multi port (1/10) Gigabit Ethernet NIC" -- cgit v1.2.3-18-g5258 From ee04448d8871e71f55520d62cf6adbf5dd403c99 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Sat, 1 Nov 2008 06:32:20 +0100 Subject: mv643xx_eth: fix SMI bus access timeouts The mv643xx_eth mii bus implementation uses wait_event_timeout() to wait for SMI completion interrupts. If wait_event_timeout() would return zero, mv643xx_eth would conclude that the SMI access timed out, but this is not necessarily true -- wait_event_timeout() can also return zero in the case where the SMI completion interrupt did happen in time but where it took longer than the requested timeout for the process performing the SMI access to be scheduled again. This would lead to occasional SMI access timeouts when the system would be under heavy load. The fix is to ignore the return value of wait_event_timeout(), and to re-check the SMI done bit after wait_event_timeout() returns to determine whether or not the SMI access timed out. Signed-off-by: Lennert Buytenhek Signed-off-by: Jeff Garzik --- drivers/net/mv643xx_eth.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c index a9c8c08044b..b9dcdbd369f 100644 --- a/drivers/net/mv643xx_eth.c +++ b/drivers/net/mv643xx_eth.c @@ -1066,9 +1066,12 @@ static int smi_wait_ready(struct mv643xx_eth_shared_private *msp) return 0; } - if (!wait_event_timeout(msp->smi_busy_wait, smi_is_done(msp), - msecs_to_jiffies(100))) - return -ETIMEDOUT; + if (!smi_is_done(msp)) { + wait_event_timeout(msp->smi_busy_wait, smi_is_done(msp), + msecs_to_jiffies(100)); + if (!smi_is_done(msp)) + return -ETIMEDOUT; + } return 0; } -- cgit v1.2.3-18-g5258 From bffadffd43d438c3143b8d172a463de89345b836 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Tue, 28 Oct 2008 14:44:11 +0800 Subject: PCI: fix VPD limit quirk for Broadcom 5708S VPD quirks need to be called after the VPD capability is initialized. Since VPD initialization now runs after pci_fixup_header (due to the capabilities consolidation), VPD quirks should be done at pci_fixup_final stage correspondingly. Tested-by: Eric Dumazet Signed-off-by: Yu Zhao Signed-off-by: Jesse Barnes --- drivers/pci/quirks.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index bbf66ea8fd8..5049a47030a 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -1692,24 +1692,24 @@ static void __devinit quirk_brcm_570x_limit_vpd(struct pci_dev *dev) } } -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_BROADCOM, - PCI_DEVICE_ID_NX2_5706, - quirk_brcm_570x_limit_vpd); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_BROADCOM, - PCI_DEVICE_ID_NX2_5706S, - quirk_brcm_570x_limit_vpd); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_BROADCOM, - PCI_DEVICE_ID_NX2_5708, - quirk_brcm_570x_limit_vpd); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_BROADCOM, - PCI_DEVICE_ID_NX2_5708S, - quirk_brcm_570x_limit_vpd); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_BROADCOM, - PCI_DEVICE_ID_NX2_5709, - quirk_brcm_570x_limit_vpd); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_BROADCOM, - PCI_DEVICE_ID_NX2_5709S, - quirk_brcm_570x_limit_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_BROADCOM, + PCI_DEVICE_ID_NX2_5706, + quirk_brcm_570x_limit_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_BROADCOM, + PCI_DEVICE_ID_NX2_5706S, + quirk_brcm_570x_limit_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_BROADCOM, + PCI_DEVICE_ID_NX2_5708, + quirk_brcm_570x_limit_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_BROADCOM, + PCI_DEVICE_ID_NX2_5708S, + quirk_brcm_570x_limit_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_BROADCOM, + PCI_DEVICE_ID_NX2_5709, + quirk_brcm_570x_limit_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_BROADCOM, + PCI_DEVICE_ID_NX2_5709S, + quirk_brcm_570x_limit_vpd); #ifdef CONFIG_PCI_MSI /* Some chipsets do not support MSI. We cannot easily rely on setting -- cgit v1.2.3-18-g5258 From a7b930cdf8ec790c85f81416c87f7c066679d373 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Sun, 2 Nov 2008 13:32:43 -0800 Subject: PCI: annotate return value of pci_ioremap_bar with __iomem Was missing from the initial patch. Acked-by: Arjan van de Ven Signed-off-by: Harvey Harrison Signed-off-by: Jesse Barnes --- include/linux/pci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/pci.h b/include/linux/pci.h index c75b82bda32..feb4657bb04 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1136,7 +1136,7 @@ static inline void pci_mmcfg_late_init(void) { } #endif #ifdef CONFIG_HAS_IOMEM -static inline void * pci_ioremap_bar(struct pci_dev *pdev, int bar) +static inline void __iomem *pci_ioremap_bar(struct pci_dev *pdev, int bar) { /* * Make sure the BAR is actually a memory resource, not an IO resource -- cgit v1.2.3-18-g5258 From f5dafca52d366ef8c6c86cbdfecc71a9a78b63a6 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 29 Oct 2008 22:35:12 -0700 Subject: PCI: remove excess kernel-doc notation Fix pci/rom.c kernel-doc function notation: Warning(drivers/pci/rom.c:110): Excess function parameter or struct member 'return' description in 'pci_map_rom' Warning(drivers/pci/rom.c:177): Excess function parameter or struct member 'return' description in 'pci_map_rom_copy' Signed-off-by: Randy Dunlap Signed-off-by: Jesse Barnes --- drivers/pci/rom.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/pci/rom.c b/drivers/pci/rom.c index 1f5f6143f35..132a78159b6 100644 --- a/drivers/pci/rom.c +++ b/drivers/pci/rom.c @@ -100,7 +100,8 @@ size_t pci_get_rom_size(void __iomem *rom, size_t size) * pci_map_rom - map a PCI ROM to kernel space * @pdev: pointer to pci device struct * @size: pointer to receive size of pci window over ROM - * @return: kernel virtual pointer to image of ROM + * + * Return: kernel virtual pointer to image of ROM * * Map a PCI ROM into kernel space. If ROM is boot video ROM, * the shadow BIOS copy will be returned instead of the @@ -167,7 +168,8 @@ void __iomem *pci_map_rom(struct pci_dev *pdev, size_t *size) * pci_map_rom_copy - map a PCI ROM to kernel space, create a copy * @pdev: pointer to pci device struct * @size: pointer to receive size of pci window over ROM - * @return: kernel virtual pointer to image of ROM + * + * Return: kernel virtual pointer to image of ROM * * Map a PCI ROM into kernel space. If ROM is boot video ROM, * the shadow BIOS copy will be returned instead of the -- cgit v1.2.3-18-g5258 From 88e7df0b7ee717f9db3333fb1248827bbdb2d4d3 Mon Sep 17 00:00:00 2001 From: Ed Swierk Date: Mon, 3 Nov 2008 14:41:16 -0800 Subject: PCI: fix range check on mmapped sysfs resource files pci_mmap_fits() returns the wrong answer if the sysfs resource file size is not a multiple of the page size. vm_end and vm_start are already page-aligned, so size - start < nr, causing mmap() to return EINVAL. Signed-off-by: Ed Swierk Signed-off-by: Andrew Morton Signed-off-by: Jesse Barnes --- drivers/pci/pci-sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index 110022d7868..5d72866897a 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -575,7 +575,7 @@ static int pci_mmap_fits(struct pci_dev *pdev, int resno, struct vm_area_struct nr = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; start = vma->vm_pgoff; - size = pci_resource_len(pdev, resno) >> PAGE_SHIFT; + size = ((pci_resource_len(pdev, resno) - 1) >> PAGE_SHIFT) + 1; if (start < size && size - start >= nr) return 1; WARN(1, "process \"%s\" tried to map 0x%08lx-0x%08lx on %s BAR %d (size 0x%08lx)\n", -- cgit v1.2.3-18-g5258 From ae2d9fb18e575ed37ffc241ece4bf68f0be4ae32 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 4 Nov 2008 09:10:50 -0500 Subject: ext4: fix missing ext4_unlock_group in error path If we try to free a block which is already freed, the code was returning without first unlocking the group. Signed-off-by: Aneesh Kumar K.V Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index dfe17a13405..444ad998f72 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4441,6 +4441,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, else if (block >= (entry->start_blk + entry->count)) n = &(*n)->rb_right; else { + ext4_unlock_group(sb, group); ext4_error(sb, __func__, "Double free of blocks %d (%d %d)\n", block, entry->start_blk, entry->count); -- cgit v1.2.3-18-g5258 From d94e99a64c3beece22dbfb2b335771a59184eb0a Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 4 Nov 2008 09:11:26 -0500 Subject: ext4: Convert to host order before using the values. Use le16_to_cpu to read the s_reserved_gdt_blocks values from super block. Signed-off-by: Aneesh Kumar K.V Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 994859df010..e27acd18b4b 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1458,9 +1458,8 @@ static int ext4_fill_flex_info(struct super_block *sb) /* We allocate both existing and potentially added groups */ flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + - ((sbi->s_es->s_reserved_gdt_blocks +1 ) << - EXT4_DESC_PER_BLOCK_BITS(sb))) / - groups_per_flex; + ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) << + EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex; sbi->s_flex_groups = kzalloc(flex_group_count * sizeof(struct flex_groups), GFP_KERNEL); if (sbi->s_flex_groups == NULL) { -- cgit v1.2.3-18-g5258 From 14ce0cb411c88681ab8f3a4c9caa7f42e97a3184 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 3 Nov 2008 18:10:55 -0500 Subject: ext4: wait on all pending commits in ext4_sync_fs() In ext4_sync_fs, we only wait for a commit to finish if we started it, but there may be one already in progress which will not be synced. In the case of a data=ordered umount with pending long symlinks which are delayed due to a long list of other I/O on the backing block device, this causes the buffer associated with the long symlinks to not be moved to the inode dirty list in the second phase of fsync_super. Then, before they can be dirtied again, kjournald exits, seeing the UMOUNT flag and the dirty pages are never written to the backing block device, causing long symlink corruption and exposing new or previously freed block data to userspace. To ensure all commits are synced, we flush all journal commits now when sync_fs'ing ext4. Signed-off-by: Arthur Jones Signed-off-by: Andrew Morton Signed-off-by: "Theodore Ts'o" Cc: Eric Sandeen Cc: --- fs/ext4/super.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index e27acd18b4b..e4a241c65db 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2884,12 +2884,9 @@ int ext4_force_commit(struct super_block *sb) /* * Ext4 always journals updates to the superblock itself, so we don't * have to propagate any other updates to the superblock on disk at this - * point. Just start an async writeback to get the buffers on their way - * to the disk. - * - * This implicitly triggers the writebehind on sync(). + * point. (We can probably nuke this function altogether, and remove + * any mention to sb->s_dirt in all of fs/ext4; eventual cleanup...) */ - static void ext4_write_super(struct super_block *sb) { if (mutex_trylock(&sb->s_lock) != 0) @@ -2899,15 +2896,15 @@ static void ext4_write_super(struct super_block *sb) static int ext4_sync_fs(struct super_block *sb, int wait) { - tid_t target; + int ret = 0; trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait); sb->s_dirt = 0; - if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) { - if (wait) - jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, target); - } - return 0; + if (wait) + ret = ext4_force_commit(sb); + else + jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, NULL); + return ret; } /* -- cgit v1.2.3-18-g5258 From 9a0354405feb0f8bd460349a93db05e4cca8d166 Mon Sep 17 00:00:00 2001 From: Eilon Greenstein Date: Mon, 3 Nov 2008 16:45:55 -0800 Subject: bnx2x: Removing the PMF indication when unloading When the PMF flag is set, the driver can access the HW freely. When the driver is unloaded, it should not access the HW. The problem caused fatal errors when "ethtool -i" was called after the calling instance was unloaded and another instance was already loaded Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/bnx2x_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/bnx2x_main.c b/drivers/net/bnx2x_main.c index fce745148ff..61152e149e6 100644 --- a/drivers/net/bnx2x_main.c +++ b/drivers/net/bnx2x_main.c @@ -6481,6 +6481,7 @@ load_int_disable: bnx2x_free_irq(bp); load_error: bnx2x_free_mem(bp); + bp->port.pmf = 0; /* TBD we really need to reset the chip if we want to recover from this */ @@ -6791,6 +6792,7 @@ unload_error: /* Report UNLOAD_DONE to MCP */ if (!BP_NOMCP(bp)) bnx2x_fw_command(bp, DRV_MSG_CODE_UNLOAD_DONE); + bp->port.pmf = 0; /* Free SKBs, SGEs, TPA pool and driver internals */ bnx2x_free_skbs(bp); -- cgit v1.2.3-18-g5258 From 7d96567ac0527703cf1b80043fc0ebd7f21a10ad Mon Sep 17 00:00:00 2001 From: Eilon Greenstein Date: Mon, 3 Nov 2008 16:46:19 -0800 Subject: bnx2x: PCI configuration bug on big-endian The current code read nothing but zeros on big-endian (wrong part of the 32bits). This caused poor performance on big-endian machines. Though this issue did not cause the system to crash, the performance is significantly better with the fix so I view it as critical bug fix. Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/bnx2x_init.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/bnx2x_init.h b/drivers/net/bnx2x_init.h index 130927cfc75..a6c0b3abba2 100644 --- a/drivers/net/bnx2x_init.h +++ b/drivers/net/bnx2x_init.h @@ -564,14 +564,15 @@ static const struct arb_line write_arb_addr[NUM_WR_Q-1] = { static void bnx2x_init_pxp(struct bnx2x *bp) { + u16 devctl; int r_order, w_order; u32 val, i; pci_read_config_word(bp->pdev, - bp->pcie_cap + PCI_EXP_DEVCTL, (u16 *)&val); - DP(NETIF_MSG_HW, "read 0x%x from devctl\n", (u16)val); - w_order = ((val & PCI_EXP_DEVCTL_PAYLOAD) >> 5); - r_order = ((val & PCI_EXP_DEVCTL_READRQ) >> 12); + bp->pcie_cap + PCI_EXP_DEVCTL, &devctl); + DP(NETIF_MSG_HW, "read 0x%x from devctl\n", devctl); + w_order = ((devctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5); + r_order = ((devctl & PCI_EXP_DEVCTL_READRQ) >> 12); if (r_order > MAX_RD_ORD) { DP(NETIF_MSG_HW, "read order of %d order adjusted to %d\n", -- cgit v1.2.3-18-g5258 From 12b56ea89e70d4b04f2f5199750310e82894ebbd Mon Sep 17 00:00:00 2001 From: Eilon Greenstein Date: Mon, 3 Nov 2008 16:46:40 -0800 Subject: bnx2x: Calling netif_carrier_off at the end of the probe netif_carrier_off was called too early at the probe. In case of failure or simply bad timing, this can cause a fatal error since linkwatch_event might run too soon. Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/bnx2x_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/bnx2x_main.c b/drivers/net/bnx2x_main.c index 61152e149e6..5b013d8457c 100644 --- a/drivers/net/bnx2x_main.c +++ b/drivers/net/bnx2x_main.c @@ -10206,8 +10206,6 @@ static int __devinit bnx2x_init_one(struct pci_dev *pdev, return -ENOMEM; } - netif_carrier_off(dev); - bp = netdev_priv(dev); bp->msglevel = debug; @@ -10231,6 +10229,8 @@ static int __devinit bnx2x_init_one(struct pci_dev *pdev, goto init_one_exit; } + netif_carrier_off(dev); + bp->common.name = board_info[ent->driver_data].name; printk(KERN_INFO "%s: %s (%c%d) PCI-E x%d %s found at mem %lx," " IRQ %d, ", dev->name, bp->common.name, -- cgit v1.2.3-18-g5258 From ca8eac55fa554043c57fd18d595ca356e752833e Mon Sep 17 00:00:00 2001 From: Eilon Greenstein Date: Mon, 3 Nov 2008 16:46:58 -0800 Subject: bnx2x: Version Update Updating the version Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/bnx2x_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/bnx2x_main.c b/drivers/net/bnx2x_main.c index 5b013d8457c..600210d7eff 100644 --- a/drivers/net/bnx2x_main.c +++ b/drivers/net/bnx2x_main.c @@ -59,8 +59,8 @@ #include "bnx2x.h" #include "bnx2x_init.h" -#define DRV_MODULE_VERSION "1.45.22" -#define DRV_MODULE_RELDATE "2008/09/09" +#define DRV_MODULE_VERSION "1.45.23" +#define DRV_MODULE_RELDATE "2008/11/03" #define BNX2X_BC_VER 0x040200 /* Time in jiffies before concluding the transmitter is hung */ -- cgit v1.2.3-18-g5258 From 19ecb6ba800765743bb4525c66562f0d30993f8d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 3 Nov 2008 17:05:16 -0800 Subject: niu: Use pci_ioremap_bar(). Signed-off-by: David S. Miller --- drivers/net/niu.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/net/niu.c b/drivers/net/niu.c index ebc81270290..9acb5d70a3a 100644 --- a/drivers/net/niu.c +++ b/drivers/net/niu.c @@ -8667,7 +8667,6 @@ static void __devinit niu_device_announce(struct niu *np) static int __devinit niu_pci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { - unsigned long niureg_base, niureg_len; union niu_parent_id parent_id; struct net_device *dev; struct niu *np; @@ -8758,10 +8757,7 @@ static int __devinit niu_pci_init_one(struct pci_dev *pdev, dev->features |= (NETIF_F_SG | NETIF_F_HW_CSUM); - niureg_base = pci_resource_start(pdev, 0); - niureg_len = pci_resource_len(pdev, 0); - - np->regs = ioremap_nocache(niureg_base, niureg_len); + np->regs = pci_ioremap_bar(pdev, 0); if (!np->regs) { dev_err(&pdev->dev, PFX "Cannot map device registers, " "aborting.\n"); -- cgit v1.2.3-18-g5258 From bbb770e7ab9a436752babfc8765e422d7481be1f Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 3 Nov 2008 19:11:29 -0800 Subject: xfrm: Fix xfrm_policy_gc_lock handling. From: Alexey Dobriyan Based upon a lockdep trace by Simon Arlott. xfrm_policy_kill() can be called from both BH and non-BH contexts, so we have to grab xfrm_policy_gc_lock with BH disabling. Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 25872747762..058f04f54b9 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -315,9 +315,9 @@ static void xfrm_policy_kill(struct xfrm_policy *policy) return; } - spin_lock(&xfrm_policy_gc_lock); + spin_lock_bh(&xfrm_policy_gc_lock); hlist_add_head(&policy->bydst, &xfrm_policy_gc_list); - spin_unlock(&xfrm_policy_gc_lock); + spin_unlock_bh(&xfrm_policy_gc_lock); schedule_work(&xfrm_policy_gc_work); } -- cgit v1.2.3-18-g5258 From cadef677e4a9b9c1d069675043767df486782986 Mon Sep 17 00:00:00 2001 From: Mikael Pettersson Date: Fri, 31 Oct 2008 08:03:55 +0100 Subject: sata_promise: add ATA engine reset to reset ops Promise ATA engines need to be reset when errors occur. That's currently done for errors detected by sata_promise itself, but it's not done for errors like timeouts detected outside of the low-level driver. The effect of this omission is that a timeout tends to result in a sequence of failed COMRESETs after which libata EH gives up and disables the port. At that point the port's ATA engine hangs and even reloading the driver will not resume it. To fix this, make sata_promise override ->hardreset on SATA ports with code which calls pdc_reset_port() on the port in question before calling libata's hardreset. PATA ports don't use ->hardreset, so for those we override ->softreset instead. Signed-off-by: Mikael Pettersson Signed-off-by: Jeff Garzik --- drivers/ata/sata_promise.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/ata/sata_promise.c b/drivers/ata/sata_promise.c index 750d8cdc00c..ba9a2570a74 100644 --- a/drivers/ata/sata_promise.c +++ b/drivers/ata/sata_promise.c @@ -153,6 +153,10 @@ static void pdc_freeze(struct ata_port *ap); static void pdc_sata_freeze(struct ata_port *ap); static void pdc_thaw(struct ata_port *ap); static void pdc_sata_thaw(struct ata_port *ap); +static int pdc_pata_softreset(struct ata_link *link, unsigned int *class, + unsigned long deadline); +static int pdc_sata_hardreset(struct ata_link *link, unsigned int *class, + unsigned long deadline); static void pdc_error_handler(struct ata_port *ap); static void pdc_post_internal_cmd(struct ata_queued_cmd *qc); static int pdc_pata_cable_detect(struct ata_port *ap); @@ -186,6 +190,7 @@ static struct ata_port_operations pdc_sata_ops = { .scr_read = pdc_sata_scr_read, .scr_write = pdc_sata_scr_write, .port_start = pdc_sata_port_start, + .hardreset = pdc_sata_hardreset, }; /* First-generation chips need a more restrictive ->check_atapi_dma op */ @@ -200,6 +205,7 @@ static struct ata_port_operations pdc_pata_ops = { .freeze = pdc_freeze, .thaw = pdc_thaw, .port_start = pdc_common_port_start, + .softreset = pdc_pata_softreset, }; static const struct ata_port_info pdc_port_info[] = { @@ -693,6 +699,20 @@ static void pdc_sata_thaw(struct ata_port *ap) readl(host_mmio + hotplug_offset); /* flush */ } +static int pdc_pata_softreset(struct ata_link *link, unsigned int *class, + unsigned long deadline) +{ + pdc_reset_port(link->ap); + return ata_sff_softreset(link, class, deadline); +} + +static int pdc_sata_hardreset(struct ata_link *link, unsigned int *class, + unsigned long deadline) +{ + pdc_reset_port(link->ap); + return sata_sff_hardreset(link, class, deadline); +} + static void pdc_error_handler(struct ata_port *ap) { if (!(ap->pflags & ATA_PFLAG_FROZEN)) -- cgit v1.2.3-18-g5258 From 554d491de112a378b4d1a705bb93b58bcd444a70 Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Sun, 2 Nov 2008 22:18:52 +0100 Subject: sata_via: restore vt*_prepare_host error handling commit b9d5b89b487517cbd4cb4702da829e07ef9e4432 (sata_via: fix support for 5287) accidently (?) removed vt*_prepare_host error handling - restore it catched by gcc: drivers/ata/sata_via.c: In function 'svia_init_one': drivers/ata/sata_via.c:567: warning: 'host' may be used uninitialized in this function Signed-off-by: Marcin Slusarz Cc: Tejun Heo Cc: Joseph Chan Cc: Jeff Garzik Signed-off-by: Jeff Garzik --- drivers/ata/sata_via.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/ata/sata_via.c b/drivers/ata/sata_via.c index 62367fe4d5d..c18935f0bda 100644 --- a/drivers/ata/sata_via.c +++ b/drivers/ata/sata_via.c @@ -602,8 +602,10 @@ static int svia_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) rc = vt8251_prepare_host(pdev, &host); break; default: - return -EINVAL; + rc = -EINVAL; } + if (rc) + return rc; svia_configure(pdev); -- cgit v1.2.3-18-g5258 From 3c324283e6cdb79210cf7975c3e40d3ba3e672b2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Nov 2008 12:37:49 +0900 Subject: sata_nv: fix generic, nf2/3 detection regression All three flavors of sata_nv's are different in how their hardreset behaves. * generic: Hardreset is not reliable. Link often doesn't come online after hardreset. * nf2/3: A little bit better - link comes online with longer debounce timing. However, nf2/3 can't reliable wait for the first D2H Register FIS, so it can't wait for device readiness or classify the device after hardreset. Follow-up SRST required. * ck804: Hardreset finally works. The core layer change to prefer hardreset and follow up changes exposed the above issues and caused various detection regressions for all three flavors. This patch, hopefully, fixes all the known issues and should make sata_nv error handling more reliable. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- drivers/ata/sata_nv.c | 53 ++++++++++++++++++++++++--------------------------- 1 file changed, 25 insertions(+), 28 deletions(-) diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c index fae3841de0d..6f146061432 100644 --- a/drivers/ata/sata_nv.c +++ b/drivers/ata/sata_nv.c @@ -307,10 +307,10 @@ static int nv_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val); static void nv_nf2_freeze(struct ata_port *ap); static void nv_nf2_thaw(struct ata_port *ap); +static int nv_nf2_hardreset(struct ata_link *link, unsigned int *class, + unsigned long deadline); static void nv_ck804_freeze(struct ata_port *ap); static void nv_ck804_thaw(struct ata_port *ap); -static int nv_hardreset(struct ata_link *link, unsigned int *class, - unsigned long deadline); static int nv_adma_slave_config(struct scsi_device *sdev); static int nv_adma_check_atapi_dma(struct ata_queued_cmd *qc); static void nv_adma_qc_prep(struct ata_queued_cmd *qc); @@ -405,17 +405,8 @@ static struct scsi_host_template nv_swncq_sht = { .slave_configure = nv_swncq_slave_config, }; -/* OSDL bz3352 reports that some nv controllers can't determine device - * signature reliably and nv_hardreset is implemented to work around - * the problem. This was reported on nf3 and it's unclear whether any - * other controllers are affected. However, the workaround has been - * applied to all variants and there isn't much to gain by trying to - * find out exactly which ones are affected at this point especially - * because NV has moved over to ahci for newer controllers. - */ static struct ata_port_operations nv_common_ops = { .inherits = &ata_bmdma_port_ops, - .hardreset = nv_hardreset, .scr_read = nv_scr_read, .scr_write = nv_scr_write, }; @@ -429,12 +420,22 @@ static struct ata_port_operations nv_generic_ops = { .hardreset = ATA_OP_NULL, }; +/* OSDL bz3352 reports that nf2/3 controllers can't determine device + * signature reliably. Also, the following thread reports detection + * failure on cold boot with the standard debouncing timing. + * + * http://thread.gmane.org/gmane.linux.ide/34098 + * + * Debounce with hotplug timing and request follow-up SRST. + */ static struct ata_port_operations nv_nf2_ops = { .inherits = &nv_common_ops, .freeze = nv_nf2_freeze, .thaw = nv_nf2_thaw, + .hardreset = nv_nf2_hardreset, }; +/* CK804 finally gets hardreset right */ static struct ata_port_operations nv_ck804_ops = { .inherits = &nv_common_ops, .freeze = nv_ck804_freeze, @@ -443,7 +444,7 @@ static struct ata_port_operations nv_ck804_ops = { }; static struct ata_port_operations nv_adma_ops = { - .inherits = &nv_common_ops, + .inherits = &nv_ck804_ops, .check_atapi_dma = nv_adma_check_atapi_dma, .sff_tf_read = nv_adma_tf_read, @@ -467,7 +468,7 @@ static struct ata_port_operations nv_adma_ops = { }; static struct ata_port_operations nv_swncq_ops = { - .inherits = &nv_common_ops, + .inherits = &nv_generic_ops, .qc_defer = ata_std_qc_defer, .qc_prep = nv_swncq_qc_prep, @@ -1553,6 +1554,17 @@ static void nv_nf2_thaw(struct ata_port *ap) iowrite8(mask, scr_addr + NV_INT_ENABLE); } +static int nv_nf2_hardreset(struct ata_link *link, unsigned int *class, + unsigned long deadline) +{ + bool online; + int rc; + + rc = sata_link_hardreset(link, sata_deb_timing_hotplug, deadline, + &online, NULL); + return online ? -EAGAIN : rc; +} + static void nv_ck804_freeze(struct ata_port *ap) { void __iomem *mmio_base = ap->host->iomap[NV_MMIO_BAR]; @@ -1605,21 +1617,6 @@ static void nv_mcp55_thaw(struct ata_port *ap) ata_sff_thaw(ap); } -static int nv_hardreset(struct ata_link *link, unsigned int *class, - unsigned long deadline) -{ - int rc; - - /* SATA hardreset fails to retrieve proper device signature on - * some controllers. Request follow up SRST. For more info, - * see http://bugzilla.kernel.org/show_bug.cgi?id=3352 - */ - rc = sata_sff_hardreset(link, class, deadline); - if (rc) - return rc; - return -EAGAIN; -} - static void nv_adma_error_handler(struct ata_port *ap) { struct nv_adma_port_priv *pp = ap->private_data; -- cgit v1.2.3-18-g5258 From a464189de350b050aa8f334bd4cc53ed406e56dd Mon Sep 17 00:00:00 2001 From: Elias Oltmanns Date: Mon, 3 Nov 2008 19:01:08 +0900 Subject: libata: Fix a potential race condition in ata_scsi_park_show() Peter Moulder has pointed out that there is a slight chance that a negative value might be passed to jiffies_to_msecs() in ata_scsi_park_show(). This is fixed by saving the value of jiffies in a local variable, thus also reducing code since the volatile variable jiffies is accessed only once. Signed-off-by: Elias Oltmanns Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- drivers/ata/libata-scsi.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index bbb30d882f0..3fa75eac135 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -190,7 +190,7 @@ static ssize_t ata_scsi_park_show(struct device *device, struct ata_port *ap; struct ata_link *link; struct ata_device *dev; - unsigned long flags; + unsigned long flags, now; unsigned int uninitialized_var(msecs); int rc = 0; @@ -208,10 +208,11 @@ static ssize_t ata_scsi_park_show(struct device *device, } link = dev->link; + now = jiffies; if (ap->pflags & ATA_PFLAG_EH_IN_PROGRESS && link->eh_context.unloaded_mask & (1 << dev->devno) && - time_after(dev->unpark_deadline, jiffies)) - msecs = jiffies_to_msecs(dev->unpark_deadline - jiffies); + time_after(dev->unpark_deadline, now)) + msecs = jiffies_to_msecs(dev->unpark_deadline - now); else msecs = 0; -- cgit v1.2.3-18-g5258 From 6a87e42e955ff27e07a77f65f8f077dc7c4171e1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Nov 2008 19:01:09 +0900 Subject: libata: implement ATA_HORKAGE_ATAPI_MOD16_DMA and apply it libata always uses PIO for ATAPI commands when the number of bytes to transfer isn't multiple of 16 but quantum DAT72 chokes on odd bytes PIO transfers. Implement a horkage to skip the mod16 check and apply it to the quantum device. This is reported by John Clark in the following thread. http://thread.gmane.org/gmane.linux.ide/34748 Signed-off-by: Tejun Heo Cc: John Clark Signed-off-by: Jeff Garzik --- drivers/ata/libata-core.c | 4 +++- include/linux/libata.h | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 82af7011f2d..91b478f2055 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4024,6 +4024,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { /* Weird ATAPI devices */ { "TORiSAN DVD-ROM DRD-N216", NULL, ATA_HORKAGE_MAX_SEC_128 }, + { "QUANTUM DAT DAT72-000", NULL, ATA_HORKAGE_ATAPI_MOD16_DMA }, /* Devices we expect to fail diagnostics */ @@ -4444,7 +4445,8 @@ int atapi_check_dma(struct ata_queued_cmd *qc) /* Don't allow DMA if it isn't multiple of 16 bytes. Quite a * few ATAPI devices choke on such DMA requests. */ - if (unlikely(qc->nbytes & 15)) + if (!(qc->dev->horkage & ATA_HORKAGE_ATAPI_MOD16_DMA) && + unlikely(qc->nbytes & 15)) return 1; if (ap->ops->check_atapi_dma) diff --git a/include/linux/libata.h b/include/linux/libata.h index f5441edee55..c7665a4134c 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -373,6 +373,8 @@ enum { ATA_HORKAGE_IVB = (1 << 8), /* cbl det validity bit bugs */ ATA_HORKAGE_STUCK_ERR = (1 << 9), /* stuck ERR on next PACKET */ ATA_HORKAGE_BRIDGE_OK = (1 << 10), /* no bridge limits */ + ATA_HORKAGE_ATAPI_MOD16_DMA = (1 << 11), /* use ATAPI DMA for commands + not multiple of 16 bytes */ /* DMA mask for user DMA control: User visible values; DO NOT renumber */ -- cgit v1.2.3-18-g5258 From 299246f9a2a4c5c531863d72bad7ebd0de213de9 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Nov 2008 19:27:07 +0900 Subject: libata: mask off DET when restoring SControl for detach libata restores SControl on detach; however, trying to restore non-zero DET can cause undeterministic behavior including PMP device going offline till power cycling. Mask off DET when restoring SControl. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- drivers/ata/libata-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 91b478f2055..622350d9b2e 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -5936,7 +5936,7 @@ static void ata_port_detach(struct ata_port *ap) * to us. Restore SControl and disable all existing devices. */ __ata_port_for_each_link(link, ap) { - sata_scr_write(link, SCR_CONTROL, link->saved_scontrol); + sata_scr_write(link, SCR_CONTROL, link->saved_scontrol & 0xff0); ata_link_for_each_dev(dev, link) ata_dev_disable(dev); } -- cgit v1.2.3-18-g5258 From 70de9a97049e0ba79dc040868564408d5ce697f9 Mon Sep 17 00:00:00 2001 From: Alok Kataria Date: Mon, 3 Nov 2008 11:18:47 -0800 Subject: x86: don't use tsc_khz to calculate lpj if notsc is passed Impact: fix udelay when "notsc" boot parameter is passed With notsc passed on commandline, tsc may not be used for udelays, make sure that we do not use tsc_khz to calculate the lpj value in such cases. Reported-by: Bartlomiej Zolnierkiewicz Signed-off-by: Alok N Kataria Cc: Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 62348e4fd8d..2ef80e30192 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -813,10 +813,6 @@ void __init tsc_init(void) cpu_khz = calibrate_cpu(); #endif - lpj = ((u64)tsc_khz * 1000); - do_div(lpj, HZ); - lpj_fine = lpj; - printk("Detected %lu.%03lu MHz processor.\n", (unsigned long)cpu_khz / 1000, (unsigned long)cpu_khz % 1000); @@ -836,6 +832,10 @@ void __init tsc_init(void) /* now allow native_sched_clock() to use rdtsc */ tsc_disabled = 0; + lpj = ((u64)tsc_khz * 1000); + do_div(lpj, HZ); + lpj_fine = lpj; + use_tsc_delay(); /* Check and install the TSC clocksource */ dmi_check_system(bad_tsc_dmi_table); -- cgit v1.2.3-18-g5258 From c4dc5071859bf666a5a9d6565f16c51a261a88b7 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 4 Nov 2008 13:30:57 +0100 Subject: ALSA: hda - Limit the number of GPIOs show in proc Limit the number of GPIOs shown in proc. Otherwise it gets too long unnecessarily, and hard to analyze. Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_proc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/hda_proc.c b/sound/pci/hda/hda_proc.c index 743d77922bc..c39af986bff 100644 --- a/sound/pci/hda/hda_proc.c +++ b/sound/pci/hda/hda_proc.c @@ -483,6 +483,8 @@ static void print_gpio(struct snd_info_buffer *buffer, (gpio & AC_GPIO_UNSOLICITED) ? 1 : 0, (gpio & AC_GPIO_WAKE) ? 1 : 0); max = gpio & AC_GPIO_IO_COUNT; + if (!max || max > 8) + return; enable = snd_hda_codec_read(codec, nid, 0, AC_VERB_GET_GPIO_MASK, 0); direction = snd_hda_codec_read(codec, nid, 0, -- cgit v1.2.3-18-g5258 From e4ab1b3cbb8042f1653471c6333931134105d455 Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Tue, 4 Nov 2008 12:46:03 +0000 Subject: x86/docs: remove noirqbalance param docs Impact: documentation fix irqbalance was removed by: commit 8b8e8c1bf7275eca859fe551dfa484134eaf013b Author: Yinghai Lu Date: Tue Aug 19 20:50:23 2008 -0700 Remove the associated documentation for noirqbalance. Signed-off-by: Mark McLoughlin Signed-off-by: Ingo Molnar --- Documentation/kernel-parameters.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 1bbcaa8982b..de4de3e7bc1 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1470,8 +1470,6 @@ and is between 256 and 4096 characters. It is defined in the file Valid arguments: on, off Default: on - noirqbalance [X86-32,SMP,KNL] Disable kernel irq balancing - noirqdebug [X86-32] Disables the code which attempts to detect and disable unhandled interrupt sources. -- cgit v1.2.3-18-g5258 From d2ed5cb80a241518dd71f467c884bfabbe15f68c Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 2 Nov 2008 09:16:50 +0000 Subject: [ARM] fix VFP+softfloat binaries 2.6.28-rc tightened up the ELF architecture checks on ARM. For non-EABI it only allows VFP if the hardware supports it. However, the kernel fails to also inspect the soft-float flag, so it incorrectly rejects binaries using soft-VFP. The fix is simple: also check that EF_ARM_SOFT_FLOAT isn't set before rejecting VFP binaries on non-VFP hardware. Acked-by: Mikael Pettersson Signed-off-by: Russell King --- arch/arm/kernel/elf.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/arm/kernel/elf.c b/arch/arm/kernel/elf.c index 513f332f040..84849098c8e 100644 --- a/arch/arm/kernel/elf.c +++ b/arch/arm/kernel/elf.c @@ -21,12 +21,16 @@ int elf_check_arch(const struct elf32_hdr *x) eflags = x->e_flags; if ((eflags & EF_ARM_EABI_MASK) == EF_ARM_EABI_UNKNOWN) { + unsigned int flt_fmt; + /* APCS26 is only allowed if the CPU supports it */ if ((eflags & EF_ARM_APCS_26) && !(elf_hwcap & HWCAP_26BIT)) return 0; + flt_fmt = eflags & (EF_ARM_VFP_FLOAT | EF_ARM_SOFT_FLOAT); + /* VFP requires the supporting code */ - if ((eflags & EF_ARM_VFP_FLOAT) && !(elf_hwcap & HWCAP_VFP)) + if (flt_fmt == EF_ARM_VFP_FLOAT && !(elf_hwcap & HWCAP_VFP)) return 0; } return 1; -- cgit v1.2.3-18-g5258 From 54074d59320581a6d7e4f4dd405e8cac1d174b75 Mon Sep 17 00:00:00 2001 From: Jianjun Kong Date: Tue, 4 Nov 2008 21:47:07 +0800 Subject: drivers: remove duplicated #include Signed-off-by: Jianjun Kong Signed-off-by: Linus Torvalds --- drivers/mtd/onenand/omap2.c | 1 - drivers/net/atl1e/atl1e.h | 1 - drivers/net/ucc_geth_ethtool.c | 1 - drivers/pnp/interface.c | 1 - drivers/sbus/char/jsflash.c | 1 - drivers/staging/echo/echo.c | 1 - drivers/staging/me4000/me4000.c | 1 - drivers/xen/balloon.c | 1 - 8 files changed, 8 deletions(-) diff --git a/drivers/mtd/onenand/omap2.c b/drivers/mtd/onenand/omap2.c index 8387e05daae..e39b21d3e16 100644 --- a/drivers/mtd/onenand/omap2.c +++ b/drivers/mtd/onenand/omap2.c @@ -38,7 +38,6 @@ #include #include #include -#include #include #include diff --git a/drivers/net/atl1e/atl1e.h b/drivers/net/atl1e/atl1e.h index b645fa0f3f6..c49550d507a 100644 --- a/drivers/net/atl1e/atl1e.h +++ b/drivers/net/atl1e/atl1e.h @@ -46,7 +46,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/net/ucc_geth_ethtool.c b/drivers/net/ucc_geth_ethtool.c index cfbbfee5583..85f38a6b6a4 100644 --- a/drivers/net/ucc_geth_ethtool.c +++ b/drivers/net/ucc_geth_ethtool.c @@ -37,7 +37,6 @@ #include #include #include -#include #include "ucc_geth.h" #include "ucc_geth_mii.h" diff --git a/drivers/pnp/interface.c b/drivers/pnp/interface.c index 478a4a739c0..c3f1c8e9d25 100644 --- a/drivers/pnp/interface.c +++ b/drivers/pnp/interface.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/sbus/char/jsflash.c b/drivers/sbus/char/jsflash.c index 2bec9ccc029..a9a9893a5f9 100644 --- a/drivers/sbus/char/jsflash.c +++ b/drivers/sbus/char/jsflash.c @@ -36,7 +36,6 @@ #include #include #include -#include #include #include diff --git a/drivers/staging/echo/echo.c b/drivers/staging/echo/echo.c index b8f2c5e9dee..fd4007e329e 100644 --- a/drivers/staging/echo/echo.c +++ b/drivers/staging/echo/echo.c @@ -106,7 +106,6 @@ #include /* We're doing kernel work */ #include -#include #include #include "bit_operations.h" diff --git a/drivers/staging/me4000/me4000.c b/drivers/staging/me4000/me4000.c index cf8b01bcac8..0394e270927 100644 --- a/drivers/staging/me4000/me4000.c +++ b/drivers/staging/me4000/me4000.c @@ -39,7 +39,6 @@ #include #include #include -#include /* Include-File for the Meilhaus ME-4000 I/O board */ #include "me4000.h" diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 8c83abc7340..a0fb5eac407 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -41,7 +41,6 @@ #include #include #include -#include #include #include -- cgit v1.2.3-18-g5258 From 9ac8d3fb22b593d39d161dcd716af0f1f7546837 Mon Sep 17 00:00:00 2001 From: Russ Anderson Date: Mon, 3 Nov 2008 16:58:51 -0600 Subject: [IA64] Simplify SGI uv vs. sn2 driver issues Add partition id, coherence id, and region size to UV to make life simpler for drivers shared between sn2 & uv. Signed-off-by: Russ Anderson Signed-off-by: Tony Luck --- arch/ia64/uv/kernel/setup.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/ia64/uv/kernel/setup.c b/arch/ia64/uv/kernel/setup.c index cf5f28ae96c..7a5ae633198 100644 --- a/arch/ia64/uv/kernel/setup.c +++ b/arch/ia64/uv/kernel/setup.c @@ -19,6 +19,12 @@ EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info); #ifdef CONFIG_IA64_SGI_UV int sn_prom_type; +long sn_partition_id; +EXPORT_SYMBOL(sn_partition_id); +long sn_coherency_id; +EXPORT_SYMBOL_GPL(sn_coherency_id); +long sn_region_size; +EXPORT_SYMBOL(sn_region_size); #endif struct redir_addr { -- cgit v1.2.3-18-g5258 From 7576f684494e927b901eee25a44ce52c82f9f60e Mon Sep 17 00:00:00 2001 From: Russ Anderson Date: Mon, 3 Nov 2008 13:29:41 -0800 Subject: [IA64] Add UV watchlist support. This is used by SGI xp drivers (drivers/misc/sgi-xp). Signed-off-by: Russ Anderson Signed-off-by: Tony Luck --- arch/ia64/include/asm/sn/sn_sal.h | 45 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/arch/ia64/include/asm/sn/sn_sal.h b/arch/ia64/include/asm/sn/sn_sal.h index 57e649d388b..e310fc0135d 100644 --- a/arch/ia64/include/asm/sn/sn_sal.h +++ b/arch/ia64/include/asm/sn/sn_sal.h @@ -90,6 +90,8 @@ #define SN_SAL_SET_CPU_NUMBER 0x02000068 #define SN_SAL_KERNEL_LAUNCH_EVENT 0x02000069 +#define SN_SAL_WATCHLIST_ALLOC 0x02000070 +#define SN_SAL_WATCHLIST_FREE 0x02000071 /* * Service-specific constants @@ -1185,4 +1187,47 @@ ia64_sn_kernel_launch_event(void) SAL_CALL_NOLOCK(rv, SN_SAL_KERNEL_LAUNCH_EVENT, 0, 0, 0, 0, 0, 0, 0); return rv.status; } + +union sn_watchlist_u { + u64 val; + struct { + u64 blade : 16, + size : 32, + filler : 16; + }; +}; + +static inline int +sn_mq_watchlist_alloc(int blade, void *mq, unsigned int mq_size, + unsigned long *intr_mmr_offset) +{ + struct ia64_sal_retval rv; + unsigned long addr; + union sn_watchlist_u size_blade; + int watchlist; + + addr = (unsigned long)mq; + size_blade.size = mq_size; + size_blade.blade = blade; + + /* + * bios returns watchlist number or negative error number. + */ + ia64_sal_oemcall_nolock(&rv, SN_SAL_WATCHLIST_ALLOC, addr, + size_blade.val, (u64)intr_mmr_offset, + (u64)&watchlist, 0, 0, 0); + if (rv.status < 0) + return rv.status; + + return watchlist; +} + +static inline int +sn_mq_watchlist_free(int blade, int watchlist_num) +{ + struct ia64_sal_retval rv; + ia64_sal_oemcall_nolock(&rv, SN_SAL_WATCHLIST_FREE, blade, + watchlist_num, 0, 0, 0, 0, 0); + return rv.status; +} #endif /* _ASM_IA64_SN_SN_SAL_H */ -- cgit v1.2.3-18-g5258 From d6e15199d1784df90b7535e625f7617bd343d202 Mon Sep 17 00:00:00 2001 From: Russ Anderson Date: Mon, 3 Nov 2008 13:32:23 -0800 Subject: [IA64] Add error_recovery_info field to SAL section header Add the error_recovery_info field to the SAL section header, as defined in the SAL Spec. Signed-off-by: Russ Anderson Signed-off-by: Tony Luck --- arch/ia64/include/asm/sal.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/arch/ia64/include/asm/sal.h b/arch/ia64/include/asm/sal.h index ea310c0812a..966797a97c9 100644 --- a/arch/ia64/include/asm/sal.h +++ b/arch/ia64/include/asm/sal.h @@ -337,11 +337,24 @@ typedef struct sal_log_record_header { #define sal_log_severity_fatal 1 #define sal_log_severity_corrected 2 +/* + * Error Recovery Info (ERI) bit decode. From SAL Spec section B.2.2 Table B-3 + * Error Section Error_Recovery_Info Field Definition. + */ +#define ERI_NOT_VALID 0x0 /* Error Recovery Field is not valid */ +#define ERI_NOT_ACCESSIBLE 0x30 /* Resource not accessible */ +#define ERI_CONTAINMENT_WARN 0x22 /* Corrupt data propagated */ +#define ERI_UNCORRECTED_ERROR 0x20 /* Uncorrected error */ +#define ERI_COMPONENT_RESET 0x24 /* Component must be reset */ +#define ERI_CORR_ERROR_LOG 0x21 /* Corrected error, needs logging */ +#define ERI_CORR_ERROR_THRESH 0x29 /* Corrected error threshold exceeded */ + /* Definition of log section header structures */ typedef struct sal_log_sec_header { efi_guid_t guid; /* Unique Section ID */ sal_log_revision_t revision; /* Major and Minor revision of Section */ - u16 reserved; + u8 error_recovery_info; /* Platform error recovery status */ + u8 reserved; u32 len; /* Section length */ } sal_log_section_hdr_t; -- cgit v1.2.3-18-g5258 From aca14f33104bb7b101df23cdd36c520b7c66bcfd Mon Sep 17 00:00:00 2001 From: Ken'ichi Ohmichi Date: Wed, 29 Oct 2008 14:17:57 -0700 Subject: [IA64] fix the difference between node_mem_map and node_start_pfn makedumpfile[1] cannot run on ia64 discontigmem kernel, because the member node_mem_map of struct pgdat_list has invalid value. This patch fixes it. node_start_pfn shows the start pfn of each node, and node_mem_map should point 'struct page' of each node's node_start_pfn. On my machine, node0's node_start_pfn shows 0x400 and its node_mem_map points 0xa0007fffbf000000. This address is the same as vmem_map, so the node_mem_map points 'struct page' of pfn 0, even if its node_start_pfn shows 0x400. The cause is due to the round down of min_pfn in count_node_pages() and node0's node_mem_map points 'struct page' of inactive pfn (0x0). This patch fixes it. makedumpfile[1]: dump filtering command https://sourceforge.net/projects/makedumpfile/ Signed-off-by: Ken'ichi Ohmichi Cc: Bernhard Walle Cc: Jay Lan Signed-off-by: Andrew Morton Signed-off-by: Tony Luck --- arch/ia64/include/asm/meminit.h | 1 - arch/ia64/mm/discontig.c | 1 - 2 files changed, 2 deletions(-) diff --git a/arch/ia64/include/asm/meminit.h b/arch/ia64/include/asm/meminit.h index 6bc96ee5432..c0cea375620 100644 --- a/arch/ia64/include/asm/meminit.h +++ b/arch/ia64/include/asm/meminit.h @@ -48,7 +48,6 @@ extern int reserve_elfcorehdr(unsigned long *start, unsigned long *end); */ #define GRANULEROUNDDOWN(n) ((n) & ~(IA64_GRANULE_SIZE-1)) #define GRANULEROUNDUP(n) (((n)+IA64_GRANULE_SIZE-1) & ~(IA64_GRANULE_SIZE-1)) -#define ORDERROUNDDOWN(n) ((n) & ~((PAGE_SIZE<>PAGE_SHIFT; #endif start = GRANULEROUNDDOWN(start); - start = ORDERROUNDDOWN(start); end = GRANULEROUNDUP(end); mem_data[node].max_pfn = max(mem_data[node].max_pfn, end >> PAGE_SHIFT); -- cgit v1.2.3-18-g5258 From 9979aa7778e664bb31efb4281a9e118a9909c35c Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 29 Oct 2008 14:17:59 -0700 Subject: [IA64] use common header for software IO/TLB Remove the swiotlb prototypes from the architecture code and use the common header file instead. Signed-off-by: Joerg Roedel Signed-off-by: Andrew Morton Signed-off-by: Tony Luck --- arch/ia64/hp/common/hwsw_iommu.c | 9 +-------- arch/ia64/include/asm/machvec.h | 22 +--------------------- 2 files changed, 2 insertions(+), 29 deletions(-) diff --git a/arch/ia64/hp/common/hwsw_iommu.c b/arch/ia64/hp/common/hwsw_iommu.c index 88b6e6f3fd8..2769dbfd03b 100644 --- a/arch/ia64/hp/common/hwsw_iommu.c +++ b/arch/ia64/hp/common/hwsw_iommu.c @@ -13,19 +13,12 @@ */ #include +#include #include /* swiotlb declarations & definitions: */ extern int swiotlb_late_init_with_default_size (size_t size); -extern ia64_mv_dma_alloc_coherent swiotlb_alloc_coherent; -extern ia64_mv_dma_free_coherent swiotlb_free_coherent; -extern ia64_mv_dma_map_single_attrs swiotlb_map_single_attrs; -extern ia64_mv_dma_unmap_single_attrs swiotlb_unmap_single_attrs; -extern ia64_mv_dma_map_sg_attrs swiotlb_map_sg_attrs; -extern ia64_mv_dma_unmap_sg_attrs swiotlb_unmap_sg_attrs; -extern ia64_mv_dma_supported swiotlb_dma_supported; -extern ia64_mv_dma_mapping_error swiotlb_dma_mapping_error; /* hwiommu declarations & definitions: */ diff --git a/arch/ia64/include/asm/machvec.h b/arch/ia64/include/asm/machvec.h index 1ea28bcee33..59c17e44668 100644 --- a/arch/ia64/include/asm/machvec.h +++ b/arch/ia64/include/asm/machvec.h @@ -11,6 +11,7 @@ #define _ASM_IA64_MACHVEC_H #include +#include /* forward declarations: */ struct device; @@ -297,27 +298,6 @@ extern void machvec_init_from_cmdline(const char *cmdline); # error Unknown configuration. Update arch/ia64/include/asm/machvec.h. # endif /* CONFIG_IA64_GENERIC */ -/* - * Declare default routines which aren't declared anywhere else: - */ -extern ia64_mv_dma_init swiotlb_init; -extern ia64_mv_dma_alloc_coherent swiotlb_alloc_coherent; -extern ia64_mv_dma_free_coherent swiotlb_free_coherent; -extern ia64_mv_dma_map_single swiotlb_map_single; -extern ia64_mv_dma_map_single_attrs swiotlb_map_single_attrs; -extern ia64_mv_dma_unmap_single swiotlb_unmap_single; -extern ia64_mv_dma_unmap_single_attrs swiotlb_unmap_single_attrs; -extern ia64_mv_dma_map_sg swiotlb_map_sg; -extern ia64_mv_dma_map_sg_attrs swiotlb_map_sg_attrs; -extern ia64_mv_dma_unmap_sg swiotlb_unmap_sg; -extern ia64_mv_dma_unmap_sg_attrs swiotlb_unmap_sg_attrs; -extern ia64_mv_dma_sync_single_for_cpu swiotlb_sync_single_for_cpu; -extern ia64_mv_dma_sync_sg_for_cpu swiotlb_sync_sg_for_cpu; -extern ia64_mv_dma_sync_single_for_device swiotlb_sync_single_for_device; -extern ia64_mv_dma_sync_sg_for_device swiotlb_sync_sg_for_device; -extern ia64_mv_dma_mapping_error swiotlb_dma_mapping_error; -extern ia64_mv_dma_supported swiotlb_dma_supported; - /* * Define default versions so we can extend machvec for new platforms without having * to update the machvec files for all existing platforms. -- cgit v1.2.3-18-g5258 From 6a2d26fd3fd1129824ffe53778832f0794d99cc2 Mon Sep 17 00:00:00 2001 From: Huang Weiyi Date: Sun, 26 Oct 2008 23:09:19 +0800 Subject: [IA64] remove duplicated #include from pci-dma.c Removed duplicated #include and in arch/ia64/kernel/pci-dma.c. Signed-off-by: Huang Weiyi Signed-off-by: Tony Luck --- arch/ia64/kernel/pci-dma.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/ia64/kernel/pci-dma.c b/arch/ia64/kernel/pci-dma.c index 031abbf9c87..dbdb778efa0 100644 --- a/arch/ia64/kernel/pci-dma.c +++ b/arch/ia64/kernel/pci-dma.c @@ -12,13 +12,11 @@ #include #include -#include #include #ifdef CONFIG_DMAR #include -#include #include #include -- cgit v1.2.3-18-g5258 From d8d54b0252280f519ad4f9268d2612b0463b3f9e Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 24 Oct 2008 14:41:09 +0900 Subject: [IA64] remove dead BIO_VMERGE_BOUNDARY definition The block layer dropped the virtual merge feature (b8b3e16cfe6435d961f6aaebcfd52a1ff2a988c5). BIO_VMERGE_BOUNDARY definition is meaningless now (For IA64, BIO_VMERGE_BOUNDARY has been meaningless for a long time since IA64 disables the virtual merge feature). Signed-off-by: FUJITA Tomonori Signed-off-by: Tony Luck --- arch/ia64/include/asm/io.h | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/arch/ia64/include/asm/io.h b/arch/ia64/include/asm/io.h index 7f257507cd8..0d9d16e2d94 100644 --- a/arch/ia64/include/asm/io.h +++ b/arch/ia64/include/asm/io.h @@ -434,28 +434,4 @@ extern void memset_io(volatile void __iomem *s, int c, long n); # endif /* __KERNEL__ */ -/* - * Enabling BIO_VMERGE_BOUNDARY forces us to turn off I/O MMU bypassing. It is said that - * BIO-level virtual merging can give up to 4% performance boost (not verified for ia64). - * On the other hand, we know that I/O MMU bypassing gives ~8% performance improvement on - * SPECweb-like workloads on zx1-based machines. Thus, for now we favor I/O MMU bypassing - * over BIO-level virtual merging. - */ -extern unsigned long ia64_max_iommu_merge_mask; -#if 1 -#define BIO_VMERGE_BOUNDARY 0 -#else -/* - * It makes no sense at all to have this BIO_VMERGE_BOUNDARY macro here. Should be - * replaced by dma_merge_mask() or something of that sort. Note: the only way - * BIO_VMERGE_BOUNDARY is used is to mask off bits. Effectively, our definition gets - * expanded into: - * - * addr & ((ia64_max_iommu_merge_mask + 1) - 1) == (addr & ia64_max_iommu_vmerge_mask) - * - * which is precisely what we want. - */ -#define BIO_VMERGE_BOUNDARY (ia64_max_iommu_merge_mask + 1) -#endif - #endif /* _ASM_IA64_IO_H */ -- cgit v1.2.3-18-g5258 From f2b3fdc8873629d154a1ef052141ebaf3e5a231d Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Mon, 3 Nov 2008 13:54:52 -0800 Subject: [IA64] Build VT-D iommu support into generic kernel Now that all the ia64 mmu pieces are in the tree we can build support into the generic kernel. Signed-off-by: Tony Luck --- arch/ia64/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 27eec71429b..9f481ba59a4 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -148,6 +148,7 @@ config IA64_GENERIC select ACPI_NUMA select SWIOTLB select PCI_MSI + select DMAR help This selects the system type of your hardware. A "generic" kernel will run on any supported IA-64 system. However, if you configure -- cgit v1.2.3-18-g5258 From 85d7a070264272ceffec0c7ce0e9af1e37c62b6e Mon Sep 17 00:00:00 2001 From: Sanjeev Premi Date: Tue, 4 Nov 2008 13:35:06 -0800 Subject: ARM: OMAP: Fix compiler warnings in gpmc.c Fix these compiler warnings: gpmc.c: In function 'gpmc_init': gpmc.c:432: warning: 'return' with a value, in function returning void gpmc.c:439: warning: 'return' with a value, in function returning void Signed-off-by: Sanjeev Premi Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/gpmc.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/arm/mach-omap2/gpmc.c b/arch/arm/mach-omap2/gpmc.c index 763bdbeaf68..2249049c1d5 100644 --- a/arch/arm/mach-omap2/gpmc.c +++ b/arch/arm/mach-omap2/gpmc.c @@ -429,18 +429,16 @@ void __init gpmc_init(void) gpmc_l3_clk = clk_get(NULL, ck); if (IS_ERR(gpmc_l3_clk)) { printk(KERN_ERR "Could not get GPMC clock %s\n", ck); - return -ENODEV; + BUG(); } gpmc_base = ioremap(l, SZ_4K); if (!gpmc_base) { clk_put(gpmc_l3_clk); printk(KERN_ERR "Could not get GPMC register memory\n"); - return -ENOMEM; + BUG(); } - BUG_ON(IS_ERR(gpmc_l3_clk)); - l = gpmc_read_reg(GPMC_REVISION); printk(KERN_INFO "GPMC revision %d.%d\n", (l >> 4) & 0x0f, l & 0x0f); /* Set smart idle mode and automatic L3 clock gating */ -- cgit v1.2.3-18-g5258 From e621f266d4cd18a07a833877c3995d2ccb35b951 Mon Sep 17 00:00:00 2001 From: Zhaolei Date: Tue, 4 Nov 2008 13:35:07 -0800 Subject: ARM: OMAP: Fix debugfs_create_*'s error checking method for arm/plat-omap debugfs_create_*() returns NULL if an error occurs, returns -ENODEV when debugfs is not enabled in the kernel. Comparing to PATCH v1, because clk_debugfs_init is included in "#if defined CONFIG_DEBUG_FS", we only need to check NULL return. Thanks Li Zefan debugfs_create_u8() and other function's return value's checking method are also fixed in this patch. Signed-off-by: Zhao Lei Signed-off-by: Tony Lindgren --- arch/arm/plat-omap/clock.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/arm/plat-omap/clock.c b/arch/arm/plat-omap/clock.c index bf6a10c5fc4..be6aab9c683 100644 --- a/arch/arm/plat-omap/clock.c +++ b/arch/arm/plat-omap/clock.c @@ -428,23 +428,23 @@ static int clk_debugfs_register_one(struct clk *c) if (c->id != 0) sprintf(p, ":%d", c->id); d = debugfs_create_dir(s, pa ? pa->dent : clk_debugfs_root); - if (IS_ERR(d)) - return PTR_ERR(d); + if (!d) + return -ENOMEM; c->dent = d; d = debugfs_create_u8("usecount", S_IRUGO, c->dent, (u8 *)&c->usecount); - if (IS_ERR(d)) { - err = PTR_ERR(d); + if (!d) { + err = -ENOMEM; goto err_out; } d = debugfs_create_u32("rate", S_IRUGO, c->dent, (u32 *)&c->rate); - if (IS_ERR(d)) { - err = PTR_ERR(d); + if (!d) { + err = -ENOMEM; goto err_out; } d = debugfs_create_x32("flags", S_IRUGO, c->dent, (u32 *)&c->flags); - if (IS_ERR(d)) { - err = PTR_ERR(d); + if (!d) { + err = -ENOMEM; goto err_out; } return 0; @@ -483,8 +483,8 @@ static int __init clk_debugfs_init(void) int err; d = debugfs_create_dir("clock", NULL); - if (IS_ERR(d)) - return PTR_ERR(d); + if (!d) + return -ENOMEM; clk_debugfs_root = d; list_for_each_entry(c, &clocks, node) { -- cgit v1.2.3-18-g5258 From 52414739ca3df12f6d1e78d4dc670e97af0e845f Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 4 Nov 2008 13:35:07 -0800 Subject: ARM: OMAP: Fix get_irqnr_and_base to clear spurious interrupt bits On omap24xx, INTCPS_SIR_IRQ_OFFSET bits [6:0] contains the current active interrupt number. However, on 34xx INTCPS_SIR_IRQ_OFFSET bits [31:7] also contains the SPURIOUSIRQFLAG, which gets set if the interrupt sorting information is invalid. If the SPURIOUSIRQFLAG bits are not ignored, the interrupt code will occasionally produce a bunch of confusing errors: irq -33, desc: c02ddcc8, depth: 0, count: 0, unhandled: 0 ->handle_irq(): c006f23c, handle_bad_irq+0x0/0x22c ->chip(): 00000000, 0x0 ->action(): 00000000 Fix this by masking out only the ACTIVEIRQ bits. Also fix a confusing comment. Signed-off-by: Tony Lindgren --- arch/arm/plat-omap/include/mach/entry-macro.S | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm/plat-omap/include/mach/entry-macro.S b/arch/arm/plat-omap/include/mach/entry-macro.S index 030118ee204..2276f89671d 100644 --- a/arch/arm/plat-omap/include/mach/entry-macro.S +++ b/arch/arm/plat-omap/include/mach/entry-macro.S @@ -65,7 +65,8 @@ #include #endif -#define INTCPS_SIR_IRQ_OFFSET 0x0040 /* Active interrupt number */ +#define INTCPS_SIR_IRQ_OFFSET 0x0040 /* Active interrupt offset */ +#define ACTIVEIRQ_MASK 0x7f /* Active interrupt bits */ .macro disable_fiq .endm @@ -88,6 +89,7 @@ cmp \irqnr, #0x0 2222: ldrne \irqnr, [\base, #INTCPS_SIR_IRQ_OFFSET] + and \irqnr, \irqnr, #ACTIVEIRQ_MASK /* Clear spurious bits */ .endm -- cgit v1.2.3-18-g5258 From 5c32f62b97d62bec097c09e54e6602d0fce2af07 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 4 Nov 2008 13:35:08 -0800 Subject: ARM: OMAP: Fix define for twl4030 irqs Otherwise twl4030 gpios won't work. Signed-off-by: Tony Lindgren --- arch/arm/plat-omap/include/mach/irqs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/plat-omap/include/mach/irqs.h b/arch/arm/plat-omap/include/mach/irqs.h index a2929ac8c68..bed5274c910 100644 --- a/arch/arm/plat-omap/include/mach/irqs.h +++ b/arch/arm/plat-omap/include/mach/irqs.h @@ -372,7 +372,7 @@ /* External TWL4030 gpio interrupts are optional */ #define TWL4030_GPIO_IRQ_BASE TWL4030_PWR_IRQ_END -#ifdef CONFIG_TWL4030_GPIO +#ifdef CONFIG_GPIO_TWL4030 #define TWL4030_GPIO_NR_IRQS 18 #else #define TWL4030_GPIO_NR_IRQS 0 -- cgit v1.2.3-18-g5258 From 79654a7698195fa043063092f5c1ca5245276fba Mon Sep 17 00:00:00 2001 From: Andreas Steffen Date: Tue, 4 Nov 2008 14:49:19 -0800 Subject: xfrm: Have af-specific init_tempsel() initialize family field of temporary selector While adding MIGRATE support to strongSwan, Andreas Steffen noticed that the selectors provided in XFRM_MSG_ACQUIRE have their family field uninitialized (those in MIGRATE do have their family set). Looking at the code, this is because the af-specific init_tempsel() (called via afinfo->init_tempsel() in xfrm_init_tempsel()) do not set the value. Reported-by: Andreas Steffen Acked-by: Herbert Xu Signed-off-by: Arnaud Ebalard --- net/ipv4/xfrm4_state.c | 1 + net/ipv6/xfrm6_state.c | 1 + 2 files changed, 2 insertions(+) diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 07735ed280d..55dc6beab9a 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -33,6 +33,7 @@ __xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl, x->sel.dport_mask = htons(0xffff); x->sel.sport = xfrm_flowi_sport(fl); x->sel.sport_mask = htons(0xffff); + x->sel.family = AF_INET; x->sel.prefixlen_d = 32; x->sel.prefixlen_s = 32; x->sel.proto = fl->proto; diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index 89884a4f23a..60c78cfc273 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -34,6 +34,7 @@ __xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl, x->sel.dport_mask = htons(0xffff); x->sel.sport = xfrm_flowi_sport(fl); x->sel.sport_mask = htons(0xffff); + x->sel.family = AF_INET6; x->sel.prefixlen_d = 128; x->sel.prefixlen_s = 128; x->sel.proto = fl->proto; -- cgit v1.2.3-18-g5258 From 9b22ea560957de1484e6b3e8538f7eef202e3596 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 4 Nov 2008 14:49:57 -0800 Subject: net: fix packet socket delivery in rx irq handler The changes to deliver hardware accelerated VLAN packets to packet sockets (commit bc1d0411) caused a warning for non-NAPI drivers. The __vlan_hwaccel_rx() function is called directly from the drivers RX function, for non-NAPI drivers that means its still in RX IRQ context: [ 27.779463] ------------[ cut here ]------------ [ 27.779509] WARNING: at kernel/softirq.c:136 local_bh_enable+0x37/0x81() ... [ 27.782520] [] netif_nit_deliver+0x5b/0x75 [ 27.782590] [] __vlan_hwaccel_rx+0x79/0x162 [ 27.782664] [] atl1_intr+0x9a9/0xa7c [atl1] [ 27.782738] [] handle_IRQ_event+0x23/0x51 [ 27.782808] [] handle_edge_irq+0xc2/0x102 [ 27.782878] [] do_IRQ+0x4d/0x64 Split hardware accelerated VLAN reception into two parts to fix this: - __vlan_hwaccel_rx just stores the VLAN TCI and performs the VLAN device lookup, then calls netif_receive_skb()/netif_rx() - vlan_hwaccel_do_receive(), which is invoked by netif_receive_skb() in softirq context, performs the real reception and delivery to packet sockets. Reported-and-tested-by: Ramon Casellas Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 7 +++++++ net/8021q/vlan_core.c | 46 +++++++++++++++++++++++++++++++++------------- net/core/dev.c | 3 +++ 3 files changed, 43 insertions(+), 13 deletions(-) diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 9e7b49b8062..a5cb0c3f6dc 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -114,6 +114,8 @@ extern u16 vlan_dev_vlan_id(const struct net_device *dev); extern int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, u16 vlan_tci, int polling); +extern int vlan_hwaccel_do_receive(struct sk_buff *skb); + #else static inline struct net_device *vlan_dev_real_dev(const struct net_device *dev) { @@ -133,6 +135,11 @@ static inline int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, BUG(); return NET_XMIT_SUCCESS; } + +static inline int vlan_hwaccel_do_receive(struct sk_buff *skb) +{ + return 0; +} #endif /** diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 916061f681b..68ced4bf158 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -3,11 +3,20 @@ #include #include "vlan.h" +struct vlan_hwaccel_cb { + struct net_device *dev; +}; + +static inline struct vlan_hwaccel_cb *vlan_hwaccel_cb(struct sk_buff *skb) +{ + return (struct vlan_hwaccel_cb *)skb->cb; +} + /* VLAN rx hw acceleration helper. This acts like netif_{rx,receive_skb}(). */ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, u16 vlan_tci, int polling) { - struct net_device_stats *stats; + struct vlan_hwaccel_cb *cb = vlan_hwaccel_cb(skb); if (skb_bond_should_drop(skb)) { dev_kfree_skb_any(skb); @@ -15,23 +24,35 @@ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, } skb->vlan_tci = vlan_tci; + cb->dev = vlan_group_get_device(grp, vlan_tci & VLAN_VID_MASK); + + return (polling ? netif_receive_skb(skb) : netif_rx(skb)); +} +EXPORT_SYMBOL(__vlan_hwaccel_rx); + +int vlan_hwaccel_do_receive(struct sk_buff *skb) +{ + struct vlan_hwaccel_cb *cb = vlan_hwaccel_cb(skb); + struct net_device *dev = cb->dev; + struct net_device_stats *stats; + netif_nit_deliver(skb); - skb->dev = vlan_group_get_device(grp, vlan_tci & VLAN_VID_MASK); - if (skb->dev == NULL) { - dev_kfree_skb_any(skb); - /* Not NET_RX_DROP, this is not being dropped - * due to congestion. */ - return NET_RX_SUCCESS; + if (dev == NULL) { + kfree_skb(skb); + return -1; } - skb->dev->last_rx = jiffies; + + skb->dev = dev; + skb->priority = vlan_get_ingress_priority(dev, skb->vlan_tci); skb->vlan_tci = 0; - stats = &skb->dev->stats; + dev->last_rx = jiffies; + + stats = &dev->stats; stats->rx_packets++; stats->rx_bytes += skb->len; - skb->priority = vlan_get_ingress_priority(skb->dev, vlan_tci); switch (skb->pkt_type) { case PACKET_BROADCAST: break; @@ -43,13 +64,12 @@ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, * This allows the VLAN to have a different MAC than the * underlying device, and still route correctly. */ if (!compare_ether_addr(eth_hdr(skb)->h_dest, - skb->dev->dev_addr)) + dev->dev_addr)) skb->pkt_type = PACKET_HOST; break; }; - return (polling ? netif_receive_skb(skb) : netif_rx(skb)); + return 0; } -EXPORT_SYMBOL(__vlan_hwaccel_rx); struct net_device *vlan_dev_real_dev(const struct net_device *dev) { diff --git a/net/core/dev.c b/net/core/dev.c index d9038e328cc..9174c77d311 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2218,6 +2218,9 @@ int netif_receive_skb(struct sk_buff *skb) int ret = NET_RX_DROP; __be16 type; + if (skb->vlan_tci && vlan_hwaccel_do_receive(skb)) + return NET_RX_SUCCESS; + /* if we've gotten here through NAPI, check netpoll */ if (netpoll_receive_skb(skb)) return NET_RX_DROP; -- cgit v1.2.3-18-g5258 From fce4d58353e449a1ac637fc8d2b994e0fcc55312 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sun, 2 Nov 2008 07:26:51 +0000 Subject: powerpc/ps3: Fix compile error in ps3-lpm.c Compiling with CONFIG_SMP = n and CONFIG_PS3_LPM != n gives this error: drivers/ps3/ps3-lpm.c:838: error: implicit declaration of function 'get_hard_smp_processor_id' This fixes it. We have to include rather than because the UP definition of get_hard_smp_processor_id() is in , and only includes if CONFIG_SMP = y. Signed-off-by: Alexey Dobriyan Acked-by: Geoff Levand Signed-off-by: Paul Mackerras --- drivers/ps3/ps3-lpm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ps3/ps3-lpm.c b/drivers/ps3/ps3-lpm.c index 85edf945ab8..204158cf7a5 100644 --- a/drivers/ps3/ps3-lpm.c +++ b/drivers/ps3/ps3-lpm.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3-18-g5258 From 9c8b4aff18b59cd0c2d9a77b3df1f9d7077df90c Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sun, 2 Nov 2008 10:21:57 +0000 Subject: powerpc/cell: Fix compile error in ras.c This fixes this error on Cell when CONFIG_KEXEC = n: arch/powerpc/platforms/cell/ras.c:299: error: implicit declaration of function 'crash_shutdown_register' We have to include because it contains the dummy definition of crash_shutdown_register that is used when CONFIG_KEXEC=n, but doesn't include in that case. Signed-off-by: Alexey Dobriyan Signed-off-by: Paul Mackerras --- arch/powerpc/platforms/cell/ras.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/platforms/cell/ras.c b/arch/powerpc/platforms/cell/ras.c index fdf088f2430..7b4cefa2199 100644 --- a/arch/powerpc/platforms/cell/ras.c +++ b/arch/powerpc/platforms/cell/ras.c @@ -16,6 +16,7 @@ #include #include +#include #include #include #include -- cgit v1.2.3-18-g5258 From 454666eb78d890f5740ea1901f8b01a43c77c67c Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Sun, 2 Nov 2008 21:18:24 +0000 Subject: powerpc: Fix "unused variable" warning in pci_dlpar.c This gets rid of this build warning: arch/powerpc/platforms/pseries/pci_dlpar.c: In function 'init_phb_dynamic': arch/powerpc/platforms/pseries/pci_dlpar.c:192: warning: unused variable 'b' This is one of the very few warnings left in a ppc64_defconfig build and getting rid of it will make it easier to see future introduced ones (in fact this was introduced very recently). Signed-off-by: Stephen Rothwell Signed-off-by: Paul Mackerras --- arch/powerpc/platforms/pseries/pci_dlpar.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c index 31481dc485d..7190493e9bd 100644 --- a/arch/powerpc/platforms/pseries/pci_dlpar.c +++ b/arch/powerpc/platforms/pseries/pci_dlpar.c @@ -189,7 +189,6 @@ struct pci_controller * __devinit init_phb_dynamic(struct device_node *dn) { struct pci_controller *phb; int primary; - struct pci_bus *b; primary = list_empty(&hose_list); phb = pcibios_alloc_controller(dn); -- cgit v1.2.3-18-g5258 From f4b6755fb37595da3630d1d6fc130ea6888cd48f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 4 Nov 2008 21:25:07 +0100 Subject: sched: cleanup fair task selection Impact: cleanup Clean up task selection Signed-off-by: Peter Zijlstra Acked-by: Mike Galbraith Signed-off-by: Ingo Molnar --- kernel/sched_fair.c | 33 +++++++++++---------------------- 1 file changed, 11 insertions(+), 22 deletions(-) diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index ce514afd78f..6167336a237 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -347,17 +347,17 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) rb_erase(&se->run_node, &cfs_rq->tasks_timeline); } -static inline struct rb_node *first_fair(struct cfs_rq *cfs_rq) -{ - return cfs_rq->rb_leftmost; -} - static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq) { - return rb_entry(first_fair(cfs_rq), struct sched_entity, run_node); + struct rb_node *left = cfs_rq->rb_leftmost; + + if (!left) + return NULL; + + return rb_entry(left, struct sched_entity, run_node); } -static inline struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) +static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) { struct rb_node *last = rb_last(&cfs_rq->tasks_timeline); @@ -794,28 +794,16 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) static int wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se); -static struct sched_entity * -pick_next(struct cfs_rq *cfs_rq, struct sched_entity *se) +static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) { + struct sched_entity *se = __pick_next_entity(cfs_rq); + if (!cfs_rq->next || wakeup_preempt_entity(cfs_rq->next, se) == 1) return se; return cfs_rq->next; } -static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) -{ - struct sched_entity *se = NULL; - - if (first_fair(cfs_rq)) { - se = __pick_next_entity(cfs_rq); - se = pick_next(cfs_rq, se); - set_next_entity(cfs_rq, se); - } - - return se; -} - static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev) { /* @@ -1396,6 +1384,7 @@ static struct task_struct *pick_next_task_fair(struct rq *rq) do { se = pick_next_entity(cfs_rq); + set_next_entity(cfs_rq, se); cfs_rq = group_cfs_rq(se); } while (cfs_rq); -- cgit v1.2.3-18-g5258 From d95f98d0691d3aba5e35850011946a08c9b36428 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 4 Nov 2008 21:25:08 +0100 Subject: sched: fix fair preempt check Impact: fix cross-class preemption Inter-class wakeup preemptions should go on class order. Signed-off-by: Peter Zijlstra Acked-by: Mike Galbraith Signed-off-by: Ingo Molnar --- kernel/sched_fair.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 6167336a237..ebd6de8d17f 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1329,6 +1329,9 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync) return; } + if (unlikely(p->sched_class != &fair_sched_class)) + return; + if (unlikely(se == pse)) return; -- cgit v1.2.3-18-g5258 From 4793241be408b3926ee00c704d7da3b3faf3a05f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 4 Nov 2008 21:25:09 +0100 Subject: sched: backward looking buddy Impact: improve/change/fix wakeup-buddy scheduling Currently we only have a forward looking buddy, that is, we prefer to schedule to the task we last woke up, under the presumption that its going to consume the data we just produced, and therefore will have cache hot benefits. This allows co-waking producer/consumer task pairs to run ahead of the pack for a little while, keeping their cache warm. Without this, we would interleave all pairs, utterly trashing the cache. This patch introduces a backward looking buddy, that is, suppose that in the above scenario, the consumer preempts the producer before it can go to sleep, we will therefore miss the wakeup from consumer to producer (its already running, after all), breaking the cycle and reverting to the cache-trashing interleaved schedule pattern. The backward buddy will try to schedule back to the task that woke us up in case the forward buddy is not available, under the assumption that the last task will be the one with the most cache hot task around barring current. This will basically allow a task to continue after it got preempted. In order to avoid starvation, we allow either buddy to get wakeup_gran ahead of the pack. Signed-off-by: Peter Zijlstra Acked-by: Mike Galbraith Signed-off-by: Ingo Molnar --- kernel/sched.c | 6 ++++-- kernel/sched_fair.c | 32 +++++++++++++++++++++++++------- kernel/sched_features.h | 1 + 3 files changed, 30 insertions(+), 9 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index e8819bc6f46..82cc839c921 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -397,7 +397,7 @@ struct cfs_rq { * 'curr' points to currently running entity on this cfs_rq. * It is set to NULL otherwise (i.e when none are currently running). */ - struct sched_entity *curr, *next; + struct sched_entity *curr, *next, *last; unsigned long nr_spread_over; @@ -1805,7 +1805,9 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd) /* * Buddy candidates are cache hot: */ - if (sched_feat(CACHE_HOT_BUDDY) && (&p->se == cfs_rq_of(&p->se)->next)) + if (sched_feat(CACHE_HOT_BUDDY) && + (&p->se == cfs_rq_of(&p->se)->next || + &p->se == cfs_rq_of(&p->se)->last)) return 1; if (p->sched_class != &fair_sched_class) diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index ebd6de8d17f..a6b1db8a0bd 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -341,9 +341,6 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) cfs_rq->rb_leftmost = next_node; } - if (cfs_rq->next == se) - cfs_rq->next = NULL; - rb_erase(&se->run_node, &cfs_rq->tasks_timeline); } @@ -741,6 +738,12 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) #endif } + if (cfs_rq->last == se) + cfs_rq->last = NULL; + + if (cfs_rq->next == se) + cfs_rq->next = NULL; + if (se != cfs_rq->curr) __dequeue_entity(cfs_rq, se); account_entity_dequeue(cfs_rq, se); @@ -798,10 +801,13 @@ static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) { struct sched_entity *se = __pick_next_entity(cfs_rq); - if (!cfs_rq->next || wakeup_preempt_entity(cfs_rq->next, se) == 1) - return se; + if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, se) < 1) + return cfs_rq->next; - return cfs_rq->next; + if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, se) < 1) + return cfs_rq->last; + + return se; } static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev) @@ -1319,10 +1325,11 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se) static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync) { struct task_struct *curr = rq->curr; - struct cfs_rq *cfs_rq = task_cfs_rq(curr); struct sched_entity *se = &curr->se, *pse = &p->se; if (unlikely(rt_prio(p->prio))) { + struct cfs_rq *cfs_rq = task_cfs_rq(curr); + update_rq_clock(rq); update_curr(cfs_rq); resched_task(curr); @@ -1335,6 +1342,17 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync) if (unlikely(se == pse)) return; + /* + * Only set the backward buddy when the current task is still on the + * rq. This can happen when a wakeup gets interleaved with schedule on + * the ->pre_schedule() or idle_balance() point, either of which can + * drop the rq lock. + * + * Also, during early boot the idle thread is in the fair class, for + * obvious reasons its a bad idea to schedule back to the idle thread. + */ + if (sched_feat(LAST_BUDDY) && likely(se->on_rq && curr != rq->idle)) + cfs_rq_of(se)->last = se; cfs_rq_of(pse)->next = pse; /* diff --git a/kernel/sched_features.h b/kernel/sched_features.h index fda01621829..da5d93b5d2c 100644 --- a/kernel/sched_features.h +++ b/kernel/sched_features.h @@ -12,3 +12,4 @@ SCHED_FEAT(LB_BIAS, 1) SCHED_FEAT(LB_WAKEUP_UPDATE, 1) SCHED_FEAT(ASYM_EFF_LOAD, 1) SCHED_FEAT(WAKEUP_OVERLAP, 0) +SCHED_FEAT(LAST_BUDDY, 1) -- cgit v1.2.3-18-g5258 From 02479099c286894644f8e96c6bbb535ab64662fd Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 4 Nov 2008 21:25:10 +0100 Subject: sched: fix buddies for group scheduling Impact: scheduling order fix for group scheduling For each level in the hierarchy, set the buddy to point to the right entity. Therefore, when we do the hierarchical schedule, we have a fair chance of ending up where we meant to. Signed-off-by: Peter Zijlstra Acked-by: Mike Galbraith Signed-off-by: Ingo Molnar --- kernel/sched_fair.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index a6b1db8a0bd..51aa3e102ac 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1319,6 +1319,18 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se) return 0; } +static void set_last_buddy(struct sched_entity *se) +{ + for_each_sched_entity(se) + cfs_rq_of(se)->last = se; +} + +static void set_next_buddy(struct sched_entity *se) +{ + for_each_sched_entity(se) + cfs_rq_of(se)->next = se; +} + /* * Preempt the current task with a newly woken task if needed: */ @@ -1352,8 +1364,8 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync) * obvious reasons its a bad idea to schedule back to the idle thread. */ if (sched_feat(LAST_BUDDY) && likely(se->on_rq && curr != rq->idle)) - cfs_rq_of(se)->last = se; - cfs_rq_of(pse)->next = pse; + set_last_buddy(se); + set_next_buddy(pse); /* * We can come here with TIF_NEED_RESCHED already set from new task -- cgit v1.2.3-18-g5258 From b22cecdd8fa4667ebab02def0866387e709927ee Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 5 Nov 2008 01:35:55 -0800 Subject: net/9p: fix printk format warnings Fix printk format warnings in net/9p. Built cleanly on 7 arches. net/9p/client.c:820: warning: format '%llx' expects type 'long long unsigned int', but argument 4 has type 'u64' net/9p/client.c:820: warning: format '%llx' expects type 'long long unsigned int', but argument 5 has type 'u64' net/9p/client.c:867: warning: format '%llx' expects type 'long long unsigned int', but argument 4 has type 'u64' net/9p/client.c:867: warning: format '%llx' expects type 'long long unsigned int', but argument 5 has type 'u64' net/9p/client.c:932: warning: format '%llx' expects type 'long long unsigned int', but argument 5 has type 'u64' net/9p/client.c:932: warning: format '%llx' expects type 'long long unsigned int', but argument 6 has type 'u64' net/9p/client.c:982: warning: format '%llx' expects type 'long long unsigned int', but argument 4 has type 'u64' net/9p/client.c:982: warning: format '%llx' expects type 'long long unsigned int', but argument 5 has type 'u64' net/9p/client.c:1025: warning: format '%llx' expects type 'long long unsigned int', but argument 4 has type 'u64' net/9p/client.c:1025: warning: format '%llx' expects type 'long long unsigned int', but argument 5 has type 'u64' net/9p/client.c:1227: warning: format '%llx' expects type 'long long unsigned int', but argument 7 has type 'u64' net/9p/client.c:1227: warning: format '%llx' expects type 'long long unsigned int', but argument 12 has type 'u64' net/9p/client.c:1227: warning: format '%llx' expects type 'long long unsigned int', but argument 8 has type 'u64' net/9p/client.c:1227: warning: format '%llx' expects type 'long long unsigned int', but argument 13 has type 'u64' net/9p/client.c:1252: warning: format '%llx' expects type 'long long unsigned int', but argument 7 has type 'u64' net/9p/client.c:1252: warning: format '%llx' expects type 'long long unsigned int', but argument 12 has type 'u64' net/9p/client.c:1252: warning: format '%llx' expects type 'long long unsigned int', but argument 8 has type 'u64' net/9p/client.c:1252: warning: format '%llx' expects type 'long long unsigned int', but argument 13 has type 'u64' Signed-off-by: Randy Dunlap Signed-off-by: David S. Miller --- net/9p/client.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/net/9p/client.c b/net/9p/client.c index 67717f69412..0a04faa2211 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -818,7 +818,9 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, } P9_DPRINTK(P9_DEBUG_9P, "<<< RATTACH qid %x.%llx.%x\n", - qid.type, qid.path, qid.version); + qid.type, + (unsigned long long)qid.path, + qid.version); memmove(&fid->qid, &qid, sizeof(struct p9_qid)); @@ -865,7 +867,9 @@ p9_client_auth(struct p9_client *clnt, char *uname, u32 n_uname, char *aname) } P9_DPRINTK(P9_DEBUG_9P, "<<< RAUTH qid %x.%llx.%x\n", - qid.type, qid.path, qid.version); + qid.type, + (unsigned long long)qid.path, + qid.version); memmove(&afid->qid, &qid, sizeof(struct p9_qid)); p9_free_req(clnt, req); @@ -930,7 +934,8 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames, for (count = 0; count < nwqids; count++) P9_DPRINTK(P9_DEBUG_9P, "<<< [%d] %x.%llx.%x\n", - count, wqids[count].type, wqids[count].path, + count, wqids[count].type, + (unsigned long long)wqids[count].path, wqids[count].version); if (nwname) @@ -980,7 +985,9 @@ int p9_client_open(struct p9_fid *fid, int mode) } P9_DPRINTK(P9_DEBUG_9P, "<<< ROPEN qid %x.%llx.%x iounit %x\n", - qid.type, qid.path, qid.version, iounit); + qid.type, + (unsigned long long)qid.path, + qid.version, iounit); fid->mode = mode; fid->iounit = iounit; @@ -1023,7 +1030,9 @@ int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode, } P9_DPRINTK(P9_DEBUG_9P, "<<< RCREATE qid %x.%llx.%x iounit %x\n", - qid.type, qid.path, qid.version, iounit); + qid.type, + (unsigned long long)qid.path, + qid.version, iounit); fid->mode = mode; fid->iounit = iounit; @@ -1230,9 +1239,9 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid) "<<< name=%s uid=%s gid=%s muid=%s extension=(%s)\n" "<<< uid=%d gid=%d n_muid=%d\n", ret->size, ret->type, ret->dev, ret->qid.type, - ret->qid.path, ret->qid.version, ret->mode, - ret->atime, ret->mtime, ret->length, ret->name, - ret->uid, ret->gid, ret->muid, ret->extension, + (unsigned long long)ret->qid.path, ret->qid.version, ret->mode, + ret->atime, ret->mtime, (unsigned long long)ret->length, + ret->name, ret->uid, ret->gid, ret->muid, ret->extension, ret->n_uid, ret->n_gid, ret->n_muid); free_and_error: @@ -1255,9 +1264,9 @@ int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst) " name=%s uid=%s gid=%s muid=%s extension=(%s)\n" " uid=%d gid=%d n_muid=%d\n", wst->size, wst->type, wst->dev, wst->qid.type, - wst->qid.path, wst->qid.version, wst->mode, - wst->atime, wst->mtime, wst->length, wst->name, - wst->uid, wst->gid, wst->muid, wst->extension, + (unsigned long long)wst->qid.path, wst->qid.version, wst->mode, + wst->atime, wst->mtime, (unsigned long long)wst->length, + wst->name, wst->uid, wst->gid, wst->muid, wst->extension, wst->n_uid, wst->n_gid, wst->n_muid); err = 0; clnt = fid->clnt; -- cgit v1.2.3-18-g5258 From e3ec6cfc260e2322834e200c2fa349cdf104fd13 Mon Sep 17 00:00:00 2001 From: Benjamin Thery Date: Wed, 5 Nov 2008 01:43:57 -0800 Subject: ipv6: fix run pending DAD when interface becomes ready With some net devices types, an IPv6 address configured while the interface was down can stay 'tentative' forever, even after the interface is set up. In some case, pending IPv6 DADs are not executed when the device becomes ready. I observed this while doing some tests with kvm. If I assign an IPv6 address to my interface eth0 (kvm driver rtl8139) when it is still down then the address is flagged tentative (IFA_F_TENTATIVE). Then, I set eth0 up, and to my surprise, the address stays 'tentative', no DAD is executed and the address can't be pinged. I also observed the same behaviour, without kvm, with virtual interfaces types macvlan and veth. Some easy steps to reproduce the issue with macvlan: 1. ip link add link eth0 type macvlan 2. ip -6 addr add 2003::ab32/64 dev macvlan0 3. ip addr show dev macvlan0 ... inet6 2003::ab32/64 scope global tentative ... 4. ip link set macvlan0 up 5. ip addr show dev macvlan0 ... inet6 2003::ab32/64 scope global tentative ... Address is still tentative I think there's a bug in net/ipv6/addrconf.c, addrconf_notify(): addrconf_dad_run() is not always run when the interface is flagged IF_READY. Currently it is only run when receiving NETDEV_CHANGE event. Looks like some (virtual) devices doesn't send this event when becoming up. For both NETDEV_UP and NETDEV_CHANGE events, when the interface becomes ready, run_pending should be set to 1. Patch below. 'run_pending = 1' could be moved below the if/else block but it makes the code less readable. Signed-off-by: Benjamin Thery Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index eea9542728c..d9da5eb9dcb 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2483,8 +2483,10 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, if (!idev && dev->mtu >= IPV6_MIN_MTU) idev = ipv6_add_dev(dev); - if (idev) + if (idev) { idev->if_flags |= IF_READY; + run_pending = 1; + } } else { if (!addrconf_qdisc_ok(dev)) { /* device is still not ready. */ -- cgit v1.2.3-18-g5258 From 959973b92d3ba235edfa5dcb5df1be1e5d1deba2 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 5 Nov 2008 11:30:56 +0100 Subject: ALSA: hda - Add a quirk for MEDION MD96630 Use model=lenovo-ms7195-dig for MEDION MD96630 laptop (17c0:4085) with ALC888 codec. Reference: Novell bnc#412548 https://bugzilla.novell.com/show_bug.cgi?id=412528 Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index a4666c96a44..a378c014512 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8469,6 +8469,7 @@ static struct snd_pci_quirk alc883_cfg_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x3bfd, "Lenovo NB0763", ALC883_LENOVO_NB0763), SND_PCI_QUIRK(0x17aa, 0x101d, "Lenovo Sky", ALC888_LENOVO_SKY), SND_PCI_QUIRK(0x17c0, 0x4071, "MEDION MD2", ALC883_MEDION_MD2), + SND_PCI_QUIRK(0x17c0, 0x4085, "MEDION MD96630", ALC888_LENOVO_MS7195_DIG), SND_PCI_QUIRK(0x17f2, 0x5000, "Albatron KI690-AM2", ALC883_6ST_DIG), SND_PCI_QUIRK(0x1991, 0x5625, "Haier W66", ALC883_HAIER_W66), SND_PCI_QUIRK(0x8086, 0x0001, "DG33BUC", ALC883_3ST_6ch_INTEL), -- cgit v1.2.3-18-g5258 From efb9a8c28ca0edd9e2572117105ebad9bbc0c368 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 5 Nov 2008 03:03:18 -0800 Subject: netfilter: netns ct: walk netns list under RTNL netns list (just list) is under RTNL. But helper and proto unregistration happen during rmmod when RTNL is not held, and that's how it was tested: modprobe/rmmod vs clone(CLONE_NEWNET)/exit. BUG: unable to handle kernel paging request at 0000000000100100 <=== IP: [] nf_conntrack_l4proto_unregister+0x96/0xae [nf_conntrack] PGD 15e300067 PUD 15e1d8067 PMD 0 Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC last sysfs file: /sys/kernel/uevent_seqnum CPU 0 Modules linked in: nf_conntrack_proto_sctp(-) nf_conntrack_proto_dccp(-) af_packet iptable_nat nf_nat nf_conntrack_ipv4 nf_conntrack nf_defrag_ipv4 iptable_filter ip_tables xt_tcpudp ip6table_filter ip6_tables x_tables ipv6 sr_mod cdrom [last unloaded: nf_conntrack_proto_sctp] Pid: 16758, comm: rmmod Not tainted 2.6.28-rc2-netns-xfrm #3 RIP: 0010:[] [] nf_conntrack_l4proto_unregister+0x96/0xae [nf_conntrack] RSP: 0018:ffff88015dc1fec8 EFLAGS: 00010212 RAX: 0000000000000000 RBX: 00000000001000f8 RCX: 0000000000000000 RDX: ffffffffa009575c RSI: 0000000000000003 RDI: ffffffffa00956b5 RBP: ffff88015dc1fed8 R08: 0000000000000002 R09: 0000000000000000 R10: 0000000000000000 R11: ffff88015dc1fe48 R12: ffffffffa0458f60 R13: 0000000000000880 R14: 00007fff4c361d30 R15: 0000000000000880 FS: 00007f624435a6f0(0000) GS:ffffffff80521580(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000100100 CR3: 0000000168969000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process rmmod (pid: 16758, threadinfo ffff88015dc1e000, task ffff880179864218) Stack: ffffffffa0459100 0000000000000000 ffff88015dc1fee8 ffffffffa0457934 ffff88015dc1ff78 ffffffff80253fef 746e6e6f635f666e 6f72705f6b636172 00707463735f6f74 ffffffff8024cb30 00000000023b8010 0000000000000000 Call Trace: [] nf_conntrack_proto_sctp_fini+0x10/0x1e [nf_conntrack_proto_sctp] [] sys_delete_module+0x19f/0x1fe [] ? trace_hardirqs_on_caller+0xf0/0x114 [] ? trace_hardirqs_on_thunk+0x3a/0x3f [] system_call_fastpath+0x16/0x1b Code: 13 35 e0 e8 c4 6c 1a e0 48 8b 1d 6d c6 46 e0 eb 16 48 89 df 4c 89 e2 48 c7 c6 fc 85 09 a0 e8 61 cd ff ff 48 8b 5b 08 48 83 eb 08 <48> 8b 43 08 0f 18 08 48 8d 43 08 48 3d 60 4f 50 80 75 d3 5b 41 RIP [] nf_conntrack_l4proto_unregister+0x96/0xae [nf_conntrack] RSP CR2: 0000000000100100 ---[ end trace bde8ac82debf7192 ]--- Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_helper.c | 3 +++ net/netfilter/nf_conntrack_proto.c | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 9c06b9f86ad..c39b6a99413 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -167,10 +168,12 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) */ synchronize_rcu(); + rtnl_lock(); spin_lock_bh(&nf_conntrack_lock); for_each_net(net) __nf_conntrack_helper_unregister(me, net); spin_unlock_bh(&nf_conntrack_lock); + rtnl_unlock(); } EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister); diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index a59a307e685..592d73344d4 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -221,8 +222,10 @@ void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto) synchronize_rcu(); /* Remove all contrack entries for this protocol */ + rtnl_lock(); for_each_net(net) nf_ct_iterate_cleanup(net, kill_l3proto, proto); + rtnl_unlock(); } EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_unregister); @@ -333,8 +336,10 @@ void nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *l4proto) synchronize_rcu(); /* Remove all contrack entries for this protocol */ + rtnl_lock(); for_each_net(net) nf_ct_iterate_cleanup(net, kill_l4proto, l4proto); + rtnl_unlock(); } EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_unregister); -- cgit v1.2.3-18-g5258 From 518a09ef11f8454f4676125d47c3e775b300c6a5 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 5 Nov 2008 03:36:01 -0800 Subject: tcp: Fix recvmsg MSG_PEEK influence of blocking behavior. Vito Caputo noticed that tcp_recvmsg() returns immediately from partial reads when MSG_PEEK is used. In particular, this means that SO_RCVLOWAT is not respected. Simply remove the test. And this matches the behavior of several other systems, including BSD. Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index eccb7165a80..c5aca0bb116 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1374,8 +1374,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, sk->sk_state == TCP_CLOSE || (sk->sk_shutdown & RCV_SHUTDOWN) || !timeo || - signal_pending(current) || - (flags & MSG_PEEK)) + signal_pending(current)) break; } else { if (sock_flag(sk, SOCK_DONE)) -- cgit v1.2.3-18-g5258 From 467622ef2acb01986eab37ef96c3632b3ea35999 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 1 Nov 2008 04:19:11 -0700 Subject: [MTD] [NOR] Fix cfi_send_gen_cmd handling of x16 devices in x8 mode (v4) For "unlock" cycles to 16bit devices in 8bit compatibility mode we need to use the byte addresses 0xaaa and 0x555. These effectively match the word address 0x555 and 0x2aa, except the latter has its low bit set. Most chips don't care about the value of the 'A-1' pin in x8 mode, but some -- like the ST M29W320D -- do. So we need to be careful to set it where appropriate. cfi_send_gen_cmd is only ever passed addresses where the low byte is 0x00, 0x55 or 0xaa. Of those, only addresses ending 0xaa are affected by this patch, by masking in the extra low bit when the device is known to be in compatibility mode. [dwmw2: Do it only when (cmd_ofs & 0xff) == 0xaa] v4: Fix stupid typo in cfi_build_cmd_addr that failed to compile I'm writing this patch way to late at night. v3: Bring all of the work back into cfi_build_cmd_addr including calling of map_bankwidth(map) and cfi_interleave(cfi) So every caller doesn't need to. v2: Only modified the address if we our device_type is larger than our bus width. Cc: stable@kernel.org Signed-off-by: Eric W. Biederman Signed-off-by: David Woodhouse --- drivers/mtd/chips/cfi_cmdset_0002.c | 13 ------------- drivers/mtd/chips/jedec_probe.c | 10 ++++------ include/linux/mtd/cfi.h | 22 +++++++++++++++++++--- 3 files changed, 23 insertions(+), 22 deletions(-) diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c index 3e6f5d8609e..d74ec46aa03 100644 --- a/drivers/mtd/chips/cfi_cmdset_0002.c +++ b/drivers/mtd/chips/cfi_cmdset_0002.c @@ -406,19 +406,6 @@ struct mtd_info *cfi_cmdset_0002(struct map_info *map, int primary) /* Set the default CFI lock/unlock addresses */ cfi->addr_unlock1 = 0x555; cfi->addr_unlock2 = 0x2aa; - /* Modify the unlock address if we are in compatibility mode */ - if ( /* x16 in x8 mode */ - ((cfi->device_type == CFI_DEVICETYPE_X8) && - (cfi->cfiq->InterfaceDesc == - CFI_INTERFACE_X8_BY_X16_ASYNC)) || - /* x32 in x16 mode */ - ((cfi->device_type == CFI_DEVICETYPE_X16) && - (cfi->cfiq->InterfaceDesc == - CFI_INTERFACE_X16_BY_X32_ASYNC))) - { - cfi->addr_unlock1 = 0xaaa; - cfi->addr_unlock2 = 0x555; - } } /* CFI mode */ else if (cfi->cfi_mode == CFI_MODE_JEDEC) { diff --git a/drivers/mtd/chips/jedec_probe.c b/drivers/mtd/chips/jedec_probe.c index f84ab618214..2f3f2f719ba 100644 --- a/drivers/mtd/chips/jedec_probe.c +++ b/drivers/mtd/chips/jedec_probe.c @@ -1808,9 +1808,7 @@ static inline u32 jedec_read_mfr(struct map_info *map, uint32_t base, * several first banks can contain 0x7f instead of actual ID */ do { - uint32_t ofs = cfi_build_cmd_addr(0 + (bank << 8), - cfi_interleave(cfi), - cfi->device_type); + uint32_t ofs = cfi_build_cmd_addr(0 + (bank << 8), map, cfi); mask = (1 << (cfi->device_type * 8)) - 1; result = map_read(map, base + ofs); bank++; @@ -1824,7 +1822,7 @@ static inline u32 jedec_read_id(struct map_info *map, uint32_t base, { map_word result; unsigned long mask; - u32 ofs = cfi_build_cmd_addr(1, cfi_interleave(cfi), cfi->device_type); + u32 ofs = cfi_build_cmd_addr(1, map, cfi); mask = (1 << (cfi->device_type * 8)) -1; result = map_read(map, base + ofs); return result.x[0] & mask; @@ -2067,8 +2065,8 @@ static int jedec_probe_chip(struct map_info *map, __u32 base, } /* Ensure the unlock addresses we try stay inside the map */ - probe_offset1 = cfi_build_cmd_addr(cfi->addr_unlock1, cfi_interleave(cfi), cfi->device_type); - probe_offset2 = cfi_build_cmd_addr(cfi->addr_unlock2, cfi_interleave(cfi), cfi->device_type); + probe_offset1 = cfi_build_cmd_addr(cfi->addr_unlock1, map, cfi); + probe_offset2 = cfi_build_cmd_addr(cfi->addr_unlock2, map, cfi); if ( ((base + probe_offset1 + map_bankwidth(map)) >= map->size) || ((base + probe_offset2 + map_bankwidth(map)) >= map->size)) goto retry; diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h index ee5124ec319..00e2b575021 100644 --- a/include/linux/mtd/cfi.h +++ b/include/linux/mtd/cfi.h @@ -282,9 +282,25 @@ struct cfi_private { /* * Returns the command address according to the given geometry. */ -static inline uint32_t cfi_build_cmd_addr(uint32_t cmd_ofs, int interleave, int type) +static inline uint32_t cfi_build_cmd_addr(uint32_t cmd_ofs, + struct map_info *map, struct cfi_private *cfi) { - return (cmd_ofs * type) * interleave; + unsigned bankwidth = map_bankwidth(map); + unsigned interleave = cfi_interleave(cfi); + unsigned type = cfi->device_type; + uint32_t addr; + + addr = (cmd_ofs * type) * interleave; + + /* Modify the unlock address if we are in compatiblity mode. + * For 16bit devices on 8 bit busses + * and 32bit devices on 16 bit busses + * set the low bit of the alternating bit sequence of the address. + */ + if (((type * interleave) > bankwidth) && ((uint8_t)cmd_ofs == 0xaa)) + addr |= (type >> 1)*interleave; + + return addr; } /* @@ -430,7 +446,7 @@ static inline uint32_t cfi_send_gen_cmd(u_char cmd, uint32_t cmd_addr, uint32_t int type, map_word *prev_val) { map_word val; - uint32_t addr = base + cfi_build_cmd_addr(cmd_addr, cfi_interleave(cfi), type); + uint32_t addr = base + cfi_build_cmd_addr(cmd_addr, map, cfi); val = cfi_build_cmd(cmd, map, cfi); if (prev_val) -- cgit v1.2.3-18-g5258 From 9fcd18c9e63e325dbd2b4c726623f760788d5aa8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 5 Nov 2008 16:52:08 +0100 Subject: sched: re-tune balancing Impact: improve wakeup affinity on NUMA systems, tweak SMP systems Given the fixes+tweaks to the wakeup-buddy code, re-tweak the domain balancing defaults on NUMA and SMP systems. Turn on SD_WAKE_AFFINE which was off on x86 NUMA - there's no reason why we would not want to have wakeup affinity across nodes as well. (we already do this in the standard NUMA template.) lat_ctx on a NUMA box is particularly happy about this change: before: | phoenix:~/l> ./lat_ctx -s 0 2 | "size=0k ovr=2.60 | 2 5.70 after: | phoenix:~/l> ./lat_ctx -s 0 2 | "size=0k ovr=2.65 | 2 2.07 a 2.75x speedup. pipe-test is similarly happy about it too: | phoenix:~/sched-tests> ./pipe-test | 18.26 usecs/loop. | 14.70 usecs/loop. | 14.38 usecs/loop. | 10.55 usecs/loop. # +WAKE_AFFINE on domain0+domain1 | 8.63 usecs/loop. | 8.59 usecs/loop. | 9.03 usecs/loop. | 8.94 usecs/loop. | 8.96 usecs/loop. | 8.63 usecs/loop. Also: - disable SD_BALANCE_NEWIDLE on NUMA and SMP domains (keep it for siblings) - enable SD_WAKE_BALANCE on SMP domains Sysbench+postgresql improves all around the board, quite significantly: .28-rc3-11474e2c .28-rc3-11474e2c-tune ------------------------------------------------- 1: 571 688 +17.08% 2: 1236 1206 -2.55% 4: 2381 2642 +9.89% 8: 4958 5164 +3.99% 16: 9580 9574 -0.07% 32: 7128 8118 +12.20% 64: 7342 8266 +11.18% 128: 7342 8064 +8.95% 256: 7519 7884 +4.62% 512: 7350 7731 +4.93% ------------------------------------------------- SUM: 55412 59341 +6.62% So it's a win both for the runup portion, the peak area and the tail. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/topology.h | 7 ++++--- include/linux/topology.h | 4 ++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 90ac7718469..4850e4b02b6 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -154,7 +154,7 @@ extern unsigned long node_remap_size[]; #endif -/* sched_domains SD_NODE_INIT for NUMAQ machines */ +/* sched_domains SD_NODE_INIT for NUMA machines */ #define SD_NODE_INIT (struct sched_domain) { \ .min_interval = 8, \ .max_interval = 32, \ @@ -169,8 +169,9 @@ extern unsigned long node_remap_size[]; .flags = SD_LOAD_BALANCE \ | SD_BALANCE_EXEC \ | SD_BALANCE_FORK \ - | SD_SERIALIZE \ - | SD_WAKE_BALANCE, \ + | SD_WAKE_AFFINE \ + | SD_WAKE_BALANCE \ + | SD_SERIALIZE, \ .last_balance = jiffies, \ .balance_interval = 1, \ } diff --git a/include/linux/topology.h b/include/linux/topology.h index 2158fc0d5a5..34a7ee0ebed 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -146,10 +146,10 @@ void arch_update_cpu_topology(void); .wake_idx = 1, \ .forkexec_idx = 1, \ .flags = SD_LOAD_BALANCE \ - | SD_BALANCE_NEWIDLE \ - | SD_BALANCE_FORK \ | SD_BALANCE_EXEC \ + | SD_BALANCE_FORK \ | SD_WAKE_AFFINE \ + | SD_WAKE_BALANCE \ | BALANCE_FOR_PKG_POWER,\ .last_balance = jiffies, \ .balance_interval = 1, \ -- cgit v1.2.3-18-g5258 From 517ac45af4b55913587279d89001171c222f22e7 Mon Sep 17 00:00:00 2001 From: Tom Tucker Date: Thu, 23 Oct 2008 16:30:13 -0500 Subject: 9p: rdma: Set trans prior to requesting async connection ops The RDMA connection manager is fundamentally asynchronous. Since the async callback context is the client pointer, the transport in the client struct needs to be set prior to calling the first async op. Signed-off-by: Tom Tucker Signed-off-by: Eric Van Hensbergen --- net/9p/trans_rdma.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index 8d6cc4777aa..4e9d2e673cf 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -589,6 +589,9 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args) if (IS_ERR(rdma->cm_id)) goto error; + /* Associate the client with the transport */ + client->trans = rdma; + /* Resolve the server's address */ rdma->addr.sin_family = AF_INET; rdma->addr.sin_addr.s_addr = in_aton(addr); @@ -669,7 +672,6 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args) if (err || (rdma->state != P9_RDMA_CONNECTED)) goto error; - client->trans = rdma; client->status = Connected; return 0; -- cgit v1.2.3-18-g5258 From cac23d6505546f4cfa42d949ec46d431a44bd39c Mon Sep 17 00:00:00 2001 From: Tom Tucker Date: Thu, 23 Oct 2008 16:31:02 -0500 Subject: 9p: Make all client spin locks IRQ safe The client lock must be IRQ safe. Some of the lock acquisition paths took regular spin locks. Signed-off-by: Tom Tucker Signed-off-by: Eric Van Hensbergen --- net/9p/client.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/9p/client.c b/net/9p/client.c index 67717f69412..f4e6c05b3c6 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -613,6 +613,7 @@ static struct p9_fid *p9_fid_create(struct p9_client *clnt) { int err; struct p9_fid *fid; + unsigned long flags; P9_DPRINTK(P9_DEBUG_FID, "clnt %p\n", clnt); fid = kmalloc(sizeof(struct p9_fid), GFP_KERNEL); @@ -632,9 +633,9 @@ static struct p9_fid *p9_fid_create(struct p9_client *clnt) fid->clnt = clnt; fid->aux = NULL; - spin_lock(&clnt->lock); + spin_lock_irqsave(&clnt->lock, flags); list_add(&fid->flist, &clnt->fidlist); - spin_unlock(&clnt->lock); + spin_unlock_irqrestore(&clnt->lock, flags); return fid; @@ -646,13 +647,14 @@ error: static void p9_fid_destroy(struct p9_fid *fid) { struct p9_client *clnt; + unsigned long flags; P9_DPRINTK(P9_DEBUG_FID, "fid %d\n", fid->fid); clnt = fid->clnt; p9_idpool_put(fid->fid, clnt->fidpool); - spin_lock(&clnt->lock); + spin_lock_irqsave(&clnt->lock, flags); list_del(&fid->flist); - spin_unlock(&clnt->lock); + spin_unlock_irqrestore(&clnt->lock, flags); kfree(fid); } -- cgit v1.2.3-18-g5258 From 82b189eaaf6186b7694317632255fa87460820a0 Mon Sep 17 00:00:00 2001 From: Tom Tucker Date: Thu, 23 Oct 2008 16:32:28 -0500 Subject: 9p: Remove unneeded free of fcall for Flush T and R fcall are reused until the client is destroyed. There does not need to be a special case for Flush Signed-off-by: Tom Tucker Signed-off-by: Eric Van Hensbergen --- net/9p/client.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/net/9p/client.c b/net/9p/client.c index f4e6c05b3c6..26ca8ab4519 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -311,12 +311,6 @@ static void p9_free_req(struct p9_client *c, struct p9_req_t *r) r->status = REQ_STATUS_IDLE; if (tag != P9_NOTAG && p9_idpool_check(tag, c->tagpool)) p9_idpool_put(tag, c->tagpool); - - /* if this was a flush request we have to free response fcall */ - if (r->rc->id == P9_RFLUSH) { - kfree(r->tc); - kfree(r->rc); - } } /** -- cgit v1.2.3-18-g5258 From 45abdf1c7be80d6ec3b0b14e59ee75a0d5d9fb37 Mon Sep 17 00:00:00 2001 From: Tom Tucker Date: Thu, 23 Oct 2008 16:33:25 -0500 Subject: p9: Fix leak of waitqueue in request allocation path If a T or R fcall cannot be allocated, the function returns an error but neglects to free the wait queue that was successfully allocated. If it comes through again a second time this wq will be overwritten with a new allocation and the old allocation will be leaked. Also, if the client is subsequently closed, the close path will attempt to clean up these allocations, so set the req fields to NULL to avoid duplicate free. Signed-off-by: Tom Tucker Signed-off-by: Eric Van Hensbergen --- net/9p/client.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/9p/client.c b/net/9p/client.c index 26ca8ab4519..b56d808e63a 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -189,6 +189,9 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag) printk(KERN_ERR "Couldn't grow tag array\n"); kfree(req->tc); kfree(req->rc); + kfree(req->wq); + req->tc = req->rc = NULL; + req->wq = NULL; return ERR_PTR(-ENOMEM); } req->tc->sdata = (char *) req->tc + sizeof(struct p9_fcall); -- cgit v1.2.3-18-g5258 From 1558c6214904c636d5a37f05f84202d6cdd9cff8 Mon Sep 17 00:00:00 2001 From: Huang Weiyi Date: Mon, 27 Oct 2008 13:15:16 -0500 Subject: 9p: rdma: remove duplicated #include Removed duplicated #include in net/9p/trans_rdma.c. Signed-off-by: Huang Weiyi Signed-off-by: Eric Van Hensbergen --- net/9p/trans_rdma.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index 4e9d2e673cf..2f1fe5fc122 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -45,7 +45,6 @@ #include #include #include -#include #define P9_PORT 5640 #define P9_RDMA_SQ_DEPTH 32 -- cgit v1.2.3-18-g5258 From 9f3e9bbe62b0bdbbaa7c689a68a22a7d3c1670f0 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Tue, 28 Oct 2008 14:22:43 -0500 Subject: unsigned fid->fid cannot be negative Signed-off-by: Roel Kluin Signed-off-by: Eric Van Hensbergen --- net/9p/client.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/9p/client.c b/net/9p/client.c index b56d808e63a..6e800dd51f0 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -608,7 +608,7 @@ reterr: static struct p9_fid *p9_fid_create(struct p9_client *clnt) { - int err; + int ret; struct p9_fid *fid; unsigned long flags; @@ -617,11 +617,12 @@ static struct p9_fid *p9_fid_create(struct p9_client *clnt) if (!fid) return ERR_PTR(-ENOMEM); - fid->fid = p9_idpool_get(clnt->fidpool); + ret = p9_idpool_get(clnt->fidpool); if (fid->fid < 0) { - err = -ENOSPC; + ret = -ENOSPC; goto error; } + fid->fid = ret; memset(&fid->qid, 0, sizeof(struct p9_qid)); fid->mode = -1; @@ -638,7 +639,7 @@ static struct p9_fid *p9_fid_create(struct p9_client *clnt) error: kfree(fid); - return ERR_PTR(err); + return ERR_PTR(ret); } static void p9_fid_destroy(struct p9_fid *fid) -- cgit v1.2.3-18-g5258 From b0d5fdef521b1eadb3fc2c1283000af7ef0297bc Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 4 Nov 2008 20:46:46 -0800 Subject: net/9p: fix printk format warnings Fix printk format warnings in net/9p. Built cleanly on 7 arches. net/9p/client.c:820: warning: format '%llx' expects type 'long long unsigned int', but argument 4 has type 'u64' net/9p/client.c:820: warning: format '%llx' expects type 'long long unsigned int', but argument 5 has type 'u64' net/9p/client.c:867: warning: format '%llx' expects type 'long long unsigned int', but argument 4 has type 'u64' net/9p/client.c:867: warning: format '%llx' expects type 'long long unsigned int', but argument 5 has type 'u64' net/9p/client.c:932: warning: format '%llx' expects type 'long long unsigned int', but argument 5 has type 'u64' net/9p/client.c:932: warning: format '%llx' expects type 'long long unsigned int', but argument 6 has type 'u64' net/9p/client.c:982: warning: format '%llx' expects type 'long long unsigned int', but argument 4 has type 'u64' net/9p/client.c:982: warning: format '%llx' expects type 'long long unsigned int', but argument 5 has type 'u64' net/9p/client.c:1025: warning: format '%llx' expects type 'long long unsigned int', but argument 4 has type 'u64' net/9p/client.c:1025: warning: format '%llx' expects type 'long long unsigned int', but argument 5 has type 'u64' net/9p/client.c:1227: warning: format '%llx' expects type 'long long unsigned int', but argument 7 has type 'u64' net/9p/client.c:1227: warning: format '%llx' expects type 'long long unsigned int', but argument 12 has type 'u64' net/9p/client.c:1227: warning: format '%llx' expects type 'long long unsigned int', but argument 8 has type 'u64' net/9p/client.c:1227: warning: format '%llx' expects type 'long long unsigned int', but argument 13 has type 'u64' net/9p/client.c:1252: warning: format '%llx' expects type 'long long unsigned int', but argument 7 has type 'u64' net/9p/client.c:1252: warning: format '%llx' expects type 'long long unsigned int', but argument 12 has type 'u64' net/9p/client.c:1252: warning: format '%llx' expects type 'long long unsigned int', but argument 8 has type 'u64' net/9p/client.c:1252: warning: format '%llx' expects type 'long long unsigned int', but argument 13 has type 'u64' Signed-off-by: Randy Dunlap Signed-off-by: Eric Van Hensbergen --- net/9p/client.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/net/9p/client.c b/net/9p/client.c index 6e800dd51f0..4b529454616 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -818,7 +818,9 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, } P9_DPRINTK(P9_DEBUG_9P, "<<< RATTACH qid %x.%llx.%x\n", - qid.type, qid.path, qid.version); + qid.type, + (unsigned long long)qid.path, + qid.version); memmove(&fid->qid, &qid, sizeof(struct p9_qid)); @@ -865,7 +867,9 @@ p9_client_auth(struct p9_client *clnt, char *uname, u32 n_uname, char *aname) } P9_DPRINTK(P9_DEBUG_9P, "<<< RAUTH qid %x.%llx.%x\n", - qid.type, qid.path, qid.version); + qid.type, + (unsigned long long)qid.path, + qid.version); memmove(&afid->qid, &qid, sizeof(struct p9_qid)); p9_free_req(clnt, req); @@ -930,7 +934,8 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames, for (count = 0; count < nwqids; count++) P9_DPRINTK(P9_DEBUG_9P, "<<< [%d] %x.%llx.%x\n", - count, wqids[count].type, wqids[count].path, + count, wqids[count].type, + (unsigned long long)wqids[count].path, wqids[count].version); if (nwname) @@ -980,7 +985,9 @@ int p9_client_open(struct p9_fid *fid, int mode) } P9_DPRINTK(P9_DEBUG_9P, "<<< ROPEN qid %x.%llx.%x iounit %x\n", - qid.type, qid.path, qid.version, iounit); + qid.type, + (unsigned long long)qid.path, + qid.version, iounit); fid->mode = mode; fid->iounit = iounit; @@ -1023,7 +1030,9 @@ int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode, } P9_DPRINTK(P9_DEBUG_9P, "<<< RCREATE qid %x.%llx.%x iounit %x\n", - qid.type, qid.path, qid.version, iounit); + qid.type, + (unsigned long long)qid.path, + qid.version, iounit); fid->mode = mode; fid->iounit = iounit; @@ -1230,9 +1239,9 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid) "<<< name=%s uid=%s gid=%s muid=%s extension=(%s)\n" "<<< uid=%d gid=%d n_muid=%d\n", ret->size, ret->type, ret->dev, ret->qid.type, - ret->qid.path, ret->qid.version, ret->mode, - ret->atime, ret->mtime, ret->length, ret->name, - ret->uid, ret->gid, ret->muid, ret->extension, + (unsigned long long)ret->qid.path, ret->qid.version, ret->mode, + ret->atime, ret->mtime, (unsigned long long)ret->length, + ret->name, ret->uid, ret->gid, ret->muid, ret->extension, ret->n_uid, ret->n_gid, ret->n_muid); free_and_error: @@ -1255,9 +1264,9 @@ int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst) " name=%s uid=%s gid=%s muid=%s extension=(%s)\n" " uid=%d gid=%d n_muid=%d\n", wst->size, wst->type, wst->dev, wst->qid.type, - wst->qid.path, wst->qid.version, wst->mode, - wst->atime, wst->mtime, wst->length, wst->name, - wst->uid, wst->gid, wst->muid, wst->extension, + (unsigned long long)wst->qid.path, wst->qid.version, wst->mode, + wst->atime, wst->mtime, (unsigned long long)wst->length, + wst->name, wst->uid, wst->gid, wst->muid, wst->extension, wst->n_uid, wst->n_gid, wst->n_muid); err = 0; clnt = fid->clnt; -- cgit v1.2.3-18-g5258 From dc8a0843a435b2c0891e7eaea64faaf1ebec9b11 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 5 Nov 2008 23:21:16 +0100 Subject: [JFFS2] fix race condition in jffs2_lzo_compress() deflate_mutex protects the globals lzo_mem and lzo_compress_buf. However, jffs2_lzo_compress() unlocks deflate_mutex _before_ it has copied out the compressed data from lzo_compress_buf. Correct this by moving the mutex unlock after the copy. In addition, document what deflate_mutex actually protects. Cc: stable@kernel.org Signed-off-by: Geert Uytterhoeven Acked-by: Richard Purdie Signed-off-by: Andrew Morton Signed-off-by: David Woodhouse --- fs/jffs2/compr_lzo.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/fs/jffs2/compr_lzo.c b/fs/jffs2/compr_lzo.c index 47b045797e4..90cb60d0978 100644 --- a/fs/jffs2/compr_lzo.c +++ b/fs/jffs2/compr_lzo.c @@ -19,7 +19,7 @@ static void *lzo_mem; static void *lzo_compress_buf; -static DEFINE_MUTEX(deflate_mutex); +static DEFINE_MUTEX(deflate_mutex); /* for lzo_mem and lzo_compress_buf */ static void free_workspace(void) { @@ -49,18 +49,21 @@ static int jffs2_lzo_compress(unsigned char *data_in, unsigned char *cpage_out, mutex_lock(&deflate_mutex); ret = lzo1x_1_compress(data_in, *sourcelen, lzo_compress_buf, &compress_size, lzo_mem); - mutex_unlock(&deflate_mutex); - if (ret != LZO_E_OK) - return -1; + goto fail; if (compress_size > *dstlen) - return -1; + goto fail; memcpy(cpage_out, lzo_compress_buf, compress_size); - *dstlen = compress_size; + mutex_unlock(&deflate_mutex); + *dstlen = compress_size; return 0; + + fail: + mutex_unlock(&deflate_mutex); + return -1; } static int jffs2_lzo_decompress(unsigned char *data_in, unsigned char *cpage_out, -- cgit v1.2.3-18-g5258 From c78d0cf2925bffae8a6f00e7d9b8e971b0392edd Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 5 Nov 2008 12:04:46 +0000 Subject: x86: don't allow nr_irqs > NR_IRQS Impact: fix boot hang on 32-bit systems with more than 224 IO-APIC pins On some 32-bit systems with a lot of IO-APICs probe_nr_irqs() can return a value larger than NR_IRQS. This will lead to probe_irq_on() overrunning the irq_desc array. I hit this when running net-next-2.6 (close to 2.6.28-rc3) on a Supermicro dual Xeon system. NR_IRQS is 224 but probe_nr_irqs() detects 5 IOAPICs and returns 240. Here are the log messages: Tue Nov 4 16:53:47 2008 ACPI: IOAPIC (id[0x01] address[0xfec00000] gsi_base[0]) Tue Nov 4 16:53:47 2008 IOAPIC[0]: apic_id 1, version 32, address 0xfec00000, GSI 0-23 Tue Nov 4 16:53:47 2008 ACPI: IOAPIC (id[0x02] address[0xfec81000] gsi_base[24]) Tue Nov 4 16:53:47 2008 IOAPIC[1]: apic_id 2, version 32, address 0xfec81000, GSI 24-47 Tue Nov 4 16:53:47 2008 ACPI: IOAPIC (id[0x03] address[0xfec81400] gsi_base[48]) Tue Nov 4 16:53:47 2008 IOAPIC[2]: apic_id 3, version 32, address 0xfec81400, GSI 48-71 Tue Nov 4 16:53:47 2008 ACPI: IOAPIC (id[0x04] address[0xfec82000] gsi_base[72]) Tue Nov 4 16:53:47 2008 IOAPIC[3]: apic_id 4, version 32, address 0xfec82000, GSI 72-95 Tue Nov 4 16:53:47 2008 ACPI: IOAPIC (id[0x05] address[0xfec82400] gsi_base[96]) Tue Nov 4 16:53:47 2008 IOAPIC[4]: apic_id 5, version 32, address 0xfec82400, GSI 96-119 Tue Nov 4 16:53:47 2008 ACPI: INT_SRC_OVR (bus 0 bus_irq 0 global_irq 2 high edge) Tue Nov 4 16:53:47 2008 ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 9 high level) Tue Nov 4 16:53:47 2008 Enabling APIC mode: Flat. Using 5 I/O APICs Signed-off-by: Ben Hutchings Acked-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index b764d7429c6..7a3f2028e2e 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -3611,6 +3611,8 @@ int __init probe_nr_irqs(void) /* something wrong ? */ if (nr < nr_min) nr = nr_min; + if (WARN_ON(nr > NR_IRQS)) + nr = NR_IRQS; return nr; } -- cgit v1.2.3-18-g5258 From 1b4897688011cd05e07f00dcfe6af3331eb36a3c Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 4 Nov 2008 14:10:13 -0800 Subject: x86: size NR_IRQS on 32-bit systems the same way as 64-bit Impact: make NR_IRQS big enough for system with lots of apic/pins If lots of IO_APIC's are there (or can be there), size the same way as 64-bit, depending on MAX_IO_APICS and NR_CPUS. This fixes the boot problem reported by Ben Hutchings on a 32-bit server with 5 IO-APICs and 240 IO-APIC pins. Signed-off-by: Yinghai Tested-by: Ben Hutchings Signed-off-by: Ingo Molnar --- arch/x86/include/asm/irq_vectors.h | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index d843ed0e9b2..503aadc4ad3 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -101,30 +101,22 @@ #define LAST_VM86_IRQ 15 #define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15) -#ifdef CONFIG_X86_64 +#if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_PARAVIRT) && !defined(CONFIG_X86_VISWS) && !defined(CONFIG_X86_VOYAGER) # if NR_CPUS < MAX_IO_APICS # define NR_IRQS (NR_VECTORS + (32 * NR_CPUS)) # else # define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) # endif -#elif !defined(CONFIG_X86_VOYAGER) +#elif defined(CONFIG_PARAVIRT) || defined(CONFIG_X86_VISWS) || defined(CONFIG_X86_VOYAGER) -# if defined(CONFIG_X86_IO_APIC) || defined(CONFIG_PARAVIRT) || defined(CONFIG_X86_VISWS) - -# define NR_IRQS 224 - -# else /* IO_APIC || PARAVIRT */ - -# define NR_IRQS 16 - -# endif +# define NR_IRQS 224 -#else /* !VISWS && !VOYAGER */ +#else /* IO_APIC || PARAVIRT */ -# define NR_IRQS 224 +# define NR_IRQS 16 -#endif /* VISWS */ +#endif /* Voyager specific defines */ /* These define the CPIs we use in linux */ -- cgit v1.2.3-18-g5258 From cb3ac42b8af357fdd9ad838234245b39e5bdb7fe Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 6 Nov 2008 17:28:01 +1100 Subject: md: revert the recent addition of a call to the BLKRRPART ioctl. It turns out that it is only safe to call blkdev_ioctl when the device is actually open (as ->bd_disk is set to NULL on last close). And it is quite possible for do_md_stop to be called when the device is not open. So discard the call to blkdev_ioctl(BLKRRPART) which was added in commit 934d9c23b4c7e31840a895ba4b7e88d6413c81f3 It is just as easy to call this ioctl from userspace when needed (on mdadm -S) so leave it out of the kernel Signed-off-by: NeilBrown --- drivers/md/md.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 9abf6ed1653..1b1d32694f6 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -3884,7 +3884,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) if (mode == 0) { mdk_rdev_t *rdev; struct list_head *tmp; - struct block_device *bdev; printk(KERN_INFO "md: %s stopped.\n", mdname(mddev)); @@ -3941,11 +3940,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) mddev->degraded = 0; mddev->barriers_work = 0; mddev->safemode = 0; - bdev = bdget_disk(mddev->gendisk, 0); - if (bdev) { - blkdev_ioctl(bdev, 0, BLKRRPART, 0); - bdput(bdev); - } kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); } else if (mddev->pers) -- cgit v1.2.3-18-g5258 From a53a6c85756339f82ff19e001e90cfba2d6299a8 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 6 Nov 2008 17:28:20 +1100 Subject: md: fix bug in raid10 recovery. Adding a spare to a raid10 doesn't cause recovery to start. This is due to an silly type in commit 6c2fce2ef6b4821c21b5c42c7207cb9cf8c87eda and so is a bug in 2.6.27 and .28-rc. Thanks to Thomas Backlund for bisecting to find this. Cc: Thomas Backlund Cc: stable@kernel.org Signed-off-by: NeilBrown --- drivers/md/raid10.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index da5129a24b1..970a96ef9b1 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1137,7 +1137,7 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) if (!enough(conf)) return -EINVAL; - if (rdev->raid_disk) + if (rdev->raid_disk >= 0) first = last = rdev->raid_disk; if (rdev->saved_raid_disk >= 0 && -- cgit v1.2.3-18-g5258 From da85f865b1dcec0853c48b763ed312441ce0c7df Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 5 Nov 2008 13:37:27 -0600 Subject: x86: mention ACPI in top-level Kconfig menu Impact: clarify menuconfig text Mention ACPI in the top-level menu to give a clue as to where it lives. This matches what ia64 does. Signed-off-by: Bjorn Helgaas Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 6f20718d315..5d6aa4013dc 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1494,7 +1494,7 @@ config HAVE_ARCH_EARLY_PFN_TO_NID def_bool X86_64 depends on NUMA -menu "Power management options" +menu "Power management and ACPI options" depends on !X86_VOYAGER config ARCH_HIBERNATION_HEADER -- cgit v1.2.3-18-g5258 From 43381785a5ba1cb424b36812373a6a04054b5c3c Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 20 Oct 2008 15:43:43 +0200 Subject: block: remove unused ll_new_mergeable() Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- block/blk-merge.c | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/block/blk-merge.c b/block/blk-merge.c index 8681cd6f991..b92f5b0866b 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -222,27 +222,6 @@ new_segment: } EXPORT_SYMBOL(blk_rq_map_sg); -static inline int ll_new_mergeable(struct request_queue *q, - struct request *req, - struct bio *bio) -{ - int nr_phys_segs = bio_phys_segments(q, bio); - - if (req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) { - req->cmd_flags |= REQ_NOMERGE; - if (req == q->last_merge) - q->last_merge = NULL; - return 0; - } - - /* - * A hw segment is just getting larger, bump just the phys - * counter. - */ - req->nr_phys_segments += nr_phys_segs; - return 1; -} - static inline int ll_new_hw_segment(struct request_queue *q, struct request *req, struct bio *bio) -- cgit v1.2.3-18-g5258 From f92131c3dd567fc6df18ce3f46fcf57ecbdefbe0 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 29 Oct 2008 14:10:51 +0100 Subject: bio: define __BIOVEC_PHYS_MERGEABLE Define __BIOVEC_PHYS_MERGEABLE as the default implementation of BIOVEC_PHYS_MERGEABLE, so that its available for reuse within an arch-specific definition of BIOVEC_PHYS_MERGEABLE. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Jens Axboe --- include/linux/bio.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/include/linux/bio.h b/include/linux/bio.h index 1c91a176b9a..6a642098e5c 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -236,12 +236,16 @@ static inline void *bio_data(struct bio *bio) #define __BVEC_END(bio) bio_iovec_idx((bio), (bio)->bi_vcnt - 1) #define __BVEC_START(bio) bio_iovec_idx((bio), (bio)->bi_idx) +/* Default implementation of BIOVEC_PHYS_MERGEABLE */ +#define __BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ + ((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2))) + /* * allow arch override, for eg virtualized architectures (put in asm/io.h) */ #ifndef BIOVEC_PHYS_MERGEABLE #define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ - ((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2))) + __BIOVEC_PHYS_MERGEABLE(vec1, vec2) #endif #define __BIO_SEG_BOUNDARY(addr1, addr2, mask) \ -- cgit v1.2.3-18-g5258 From 2920ebbd65f3e80c318adf5191ac0987142bda80 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 30 Oct 2008 08:32:29 +0100 Subject: block: add timer on blkdev_dequeue_request() not elv_next_request() Block queue supports two usage models - one where block driver peeks at the front of queue using elv_next_request(), processes it and finishes it and the other where block driver peeks at the front of queue, dequeue the request using blkdev_dequeue_request() and finishes it. The latter is more flexible as it allows the driver to process multiple commands concurrently. These two inconsistent usage models affect the block layer implementation confusing. For some, elv_next_request() is considered the issue point while others consider blkdev_dequeue_request() the issue point. Till now the inconsistency mostly affect only accounting, so it didn't really break anything seriously; however, with block layer timeout, this inconsistency hits hard. Block layer considers elv_next_request() the issue point and adds timer but SCSI layer thinks it was just peeking and when the request can't process the command right away, it's just left there without further processing. This makes the request dangling on the timer list and, when the timer goes off, the request which the SCSI layer and below think is still on the block queue ends up in the EH queue, causing various problems - EH hang (failed count goes over busy count and EH never wakes up), WARN_ON() and oopses as low level driver trying to handle the unknown command, etc. depending on the timing. As SCSI midlayer is the only user of block layer timer at the moment, moving blk_add_timer() to elv_dequeue_request() fixes the problem; however, this two usage models definitely need to be cleaned up in the future. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/elevator.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/block/elevator.c b/block/elevator.c index 59173a69ebd..9ac82dde99d 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -773,12 +773,6 @@ struct request *elv_next_request(struct request_queue *q) */ rq->cmd_flags |= REQ_STARTED; blk_add_trace_rq(q, rq, BLK_TA_ISSUE); - - /* - * We are now handing the request to the hardware, - * add the timeout handler - */ - blk_add_timer(rq); } if (!q->boundary_rq || q->boundary_rq == rq) { @@ -850,6 +844,12 @@ void elv_dequeue_request(struct request_queue *q, struct request *rq) */ if (blk_account_rq(rq)) q->in_flight++; + + /* + * We are now handing the request to the hardware, add the + * timeout handler. + */ + blk_add_timer(rq); } EXPORT_SYMBOL(elv_dequeue_request); -- cgit v1.2.3-18-g5258 From e78042e5b83936b1d12a4b5bbb492bdd88ad76c6 Mon Sep 17 00:00:00 2001 From: Mike Anderson Date: Thu, 30 Oct 2008 02:16:20 -0700 Subject: blk: move blk_delete_timer call in end_that_request_last Move the calling blk_delete_timer to later in end_that_request_last to address an issue where blkdev_dequeue_request may have add a timer for the request. Signed-off-by: Mike Anderson Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index c3df30cfb3f..10e8a64a5a5 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1770,8 +1770,6 @@ static void end_that_request_last(struct request *req, int error) { struct gendisk *disk = req->rq_disk; - blk_delete_timer(req); - if (blk_rq_tagged(req)) blk_queue_end_tag(req->q, req); @@ -1781,6 +1779,8 @@ static void end_that_request_last(struct request *req, int error) if (unlikely(laptop_mode) && blk_fs_request(req)) laptop_io_completion(); + blk_delete_timer(req); + /* * Account IO completion. bar_rq isn't accounted as a normal * IO on queueing nor completion. Accounting the containing -- cgit v1.2.3-18-g5258 From 561920a0d2bb6d63343e83acfd784c0a77bd28d1 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 30 Oct 2008 18:28:41 +0100 Subject: generic-ipi: fix the smp_mb() placement smp_mb() is needed (to make the memory operations visible globally) before sending the ipi on the sender and the receiver (on Alpha atleast) needs smp_read_barrier_depends() in the handler before reading the call_single_queue list in a lock-free fashion. On x86, x2apic mode register accesses for sending IPI's don't have serializing semantics. So the need for smp_mb() before sending the IPI becomes more critical in x2apic mode. Remove the unnecessary smp_mb() in csd_flag_wait(), as the presence of that smp_mb() doesn't mean anything on the sender, when the ipi receiver is not doing any thing special (like memory fence) after clearing the CSD_FLAG_WAIT. Signed-off-by: Suresh Siddha Signed-off-by: Jens Axboe --- kernel/smp.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/kernel/smp.c b/kernel/smp.c index f362a855377..75c8dde58c5 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -51,10 +51,6 @@ static void csd_flag_wait(struct call_single_data *data) { /* Wait for response */ do { - /* - * We need to see the flags store in the IPI handler - */ - smp_mb(); if (!(data->flags & CSD_FLAG_WAIT)) break; cpu_relax(); @@ -76,6 +72,11 @@ static void generic_exec_single(int cpu, struct call_single_data *data) list_add_tail(&data->list, &dst->list); spin_unlock_irqrestore(&dst->lock, flags); + /* + * Make the list addition visible before sending the ipi. + */ + smp_mb(); + if (ipi) arch_send_call_function_single_ipi(cpu); @@ -157,7 +158,7 @@ void generic_smp_call_function_single_interrupt(void) * Need to see other stores to list head for checking whether * list is empty without holding q->lock */ - smp_mb(); + smp_read_barrier_depends(); while (!list_empty(&q->list)) { unsigned int data_flags; @@ -191,7 +192,7 @@ void generic_smp_call_function_single_interrupt(void) /* * See comment on outer loop */ - smp_mb(); + smp_read_barrier_depends(); } } @@ -370,6 +371,11 @@ int smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info, list_add_tail_rcu(&data->csd.list, &call_function_queue); spin_unlock_irqrestore(&call_function_lock, flags); + /* + * Make the list addition visible before sending the ipi. + */ + smp_mb(); + /* Send a message to all CPUs in the map */ arch_send_call_function_ipi(mask); -- cgit v1.2.3-18-g5258 From 89f97496e81d2112b5e41416fe3020688c443818 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 5 Nov 2008 10:21:06 +0100 Subject: block: fix __blkdev_get() for removable devices Commit 0762b8bde9729f10f8e6249809660ff2ec3ad735 moved disk_get_part() in front of recursive get on the whole disk, which caused removable devices to try disk_get_part() before rescanning after a new media is inserted, which might fail legit open attempts or give the old partition. This patch fixes the problem by moving disk_get_part() after __blkdev_get() on the whole disk. This problem was spotted by Borislav Petkov. Signed-off-by: Tejun Heo Tested-by: Borislav Petkov Signed-off-by: Jens Axboe --- fs/block_dev.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index 88a776fa0ef..db831efbdbb 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -986,7 +986,6 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part); static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) { struct gendisk *disk; - struct hd_struct *part = NULL; int ret; int partno; int perm = 0; @@ -1004,24 +1003,25 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) return ret; } - ret = -ENXIO; - lock_kernel(); + ret = -ENXIO; disk = get_gendisk(bdev->bd_dev, &partno); if (!disk) goto out_unlock_kernel; - part = disk_get_part(disk, partno); - if (!part) - goto out_unlock_kernel; mutex_lock_nested(&bdev->bd_mutex, for_part); if (!bdev->bd_openers) { bdev->bd_disk = disk; - bdev->bd_part = part; bdev->bd_contains = bdev; if (!partno) { struct backing_dev_info *bdi; + + ret = -ENXIO; + bdev->bd_part = disk_get_part(disk, partno); + if (!bdev->bd_part) + goto out_clear; + if (disk->fops->open) { ret = disk->fops->open(bdev, mode); if (ret) @@ -1049,18 +1049,17 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) bdev->bd_contains = whole; bdev->bd_inode->i_data.backing_dev_info = whole->bd_inode->i_data.backing_dev_info; + bdev->bd_part = disk_get_part(disk, partno); if (!(disk->flags & GENHD_FL_UP) || - !part || !part->nr_sects) { + !bdev->bd_part || !bdev->bd_part->nr_sects) { ret = -ENXIO; goto out_clear; } - bd_set_size(bdev, (loff_t)part->nr_sects << 9); + bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9); } } else { - disk_put_part(part); put_disk(disk); module_put(disk->fops->owner); - part = NULL; disk = NULL; if (bdev->bd_contains == bdev) { if (bdev->bd_disk->fops->open) { @@ -1080,6 +1079,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) return 0; out_clear: + disk_put_part(bdev->bd_part); bdev->bd_disk = NULL; bdev->bd_part = NULL; bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; @@ -1091,7 +1091,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) out_unlock_kernel: unlock_kernel(); - disk_put_part(part); if (disk) module_put(disk->fops->owner); put_disk(disk); -- cgit v1.2.3-18-g5258 From 9c133c469d38043d5aadaa03f2fb840d88d1cf4f Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 6 Nov 2008 08:42:48 +0100 Subject: Add round_jiffies_up and related routines This patch (as1158b) adds round_jiffies_up() and friends. These routines work like the analogous round_jiffies() functions, except that they will never round down. The new routines will be useful for timeouts where we don't care exactly when the timer expires, provided it doesn't expire too soon. Signed-off-by: Alan Stern Signed-off-by: Jens Axboe --- include/linux/timer.h | 5 ++ kernel/timer.c | 129 ++++++++++++++++++++++++++++++++++++++------------ 2 files changed, 104 insertions(+), 30 deletions(-) diff --git a/include/linux/timer.h b/include/linux/timer.h index d4ba79248a2..daf9685b861 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -186,4 +186,9 @@ unsigned long __round_jiffies_relative(unsigned long j, int cpu); unsigned long round_jiffies(unsigned long j); unsigned long round_jiffies_relative(unsigned long j); +unsigned long __round_jiffies_up(unsigned long j, int cpu); +unsigned long __round_jiffies_up_relative(unsigned long j, int cpu); +unsigned long round_jiffies_up(unsigned long j); +unsigned long round_jiffies_up_relative(unsigned long j); + #endif diff --git a/kernel/timer.c b/kernel/timer.c index 56becf373c5..dbd50fabe4c 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -112,27 +112,8 @@ timer_set_base(struct timer_list *timer, struct tvec_base *new_base) tbase_get_deferrable(timer->base)); } -/** - * __round_jiffies - function to round jiffies to a full second - * @j: the time in (absolute) jiffies that should be rounded - * @cpu: the processor number on which the timeout will happen - * - * __round_jiffies() rounds an absolute time in the future (in jiffies) - * up or down to (approximately) full seconds. This is useful for timers - * for which the exact time they fire does not matter too much, as long as - * they fire approximately every X seconds. - * - * By rounding these timers to whole seconds, all such timers will fire - * at the same time, rather than at various times spread out. The goal - * of this is to have the CPU wake up less, which saves power. - * - * The exact rounding is skewed for each processor to avoid all - * processors firing at the exact same time, which could lead - * to lock contention or spurious cache line bouncing. - * - * The return value is the rounded version of the @j parameter. - */ -unsigned long __round_jiffies(unsigned long j, int cpu) +static unsigned long round_jiffies_common(unsigned long j, int cpu, + bool force_up) { int rem; unsigned long original = j; @@ -154,8 +135,9 @@ unsigned long __round_jiffies(unsigned long j, int cpu) * due to delays of the timer irq, long irq off times etc etc) then * we should round down to the whole second, not up. Use 1/4th second * as cutoff for this rounding as an extreme upper bound for this. + * But never round down if @force_up is set. */ - if (rem < HZ/4) /* round down */ + if (rem < HZ/4 && !force_up) /* round down */ j = j - rem; else /* round up */ j = j - rem + HZ; @@ -167,6 +149,31 @@ unsigned long __round_jiffies(unsigned long j, int cpu) return original; return j; } + +/** + * __round_jiffies - function to round jiffies to a full second + * @j: the time in (absolute) jiffies that should be rounded + * @cpu: the processor number on which the timeout will happen + * + * __round_jiffies() rounds an absolute time in the future (in jiffies) + * up or down to (approximately) full seconds. This is useful for timers + * for which the exact time they fire does not matter too much, as long as + * they fire approximately every X seconds. + * + * By rounding these timers to whole seconds, all such timers will fire + * at the same time, rather than at various times spread out. The goal + * of this is to have the CPU wake up less, which saves power. + * + * The exact rounding is skewed for each processor to avoid all + * processors firing at the exact same time, which could lead + * to lock contention or spurious cache line bouncing. + * + * The return value is the rounded version of the @j parameter. + */ +unsigned long __round_jiffies(unsigned long j, int cpu) +{ + return round_jiffies_common(j, cpu, false); +} EXPORT_SYMBOL_GPL(__round_jiffies); /** @@ -191,13 +198,10 @@ EXPORT_SYMBOL_GPL(__round_jiffies); */ unsigned long __round_jiffies_relative(unsigned long j, int cpu) { - /* - * In theory the following code can skip a jiffy in case jiffies - * increments right between the addition and the later subtraction. - * However since the entire point of this function is to use approximate - * timeouts, it's entirely ok to not handle that. - */ - return __round_jiffies(j + jiffies, cpu) - jiffies; + unsigned long j0 = jiffies; + + /* Use j0 because jiffies might change while we run */ + return round_jiffies_common(j + j0, cpu, false) - j0; } EXPORT_SYMBOL_GPL(__round_jiffies_relative); @@ -218,7 +222,7 @@ EXPORT_SYMBOL_GPL(__round_jiffies_relative); */ unsigned long round_jiffies(unsigned long j) { - return __round_jiffies(j, raw_smp_processor_id()); + return round_jiffies_common(j, raw_smp_processor_id(), false); } EXPORT_SYMBOL_GPL(round_jiffies); @@ -243,6 +247,71 @@ unsigned long round_jiffies_relative(unsigned long j) } EXPORT_SYMBOL_GPL(round_jiffies_relative); +/** + * __round_jiffies_up - function to round jiffies up to a full second + * @j: the time in (absolute) jiffies that should be rounded + * @cpu: the processor number on which the timeout will happen + * + * This is the same as __round_jiffies() except that it will never + * round down. This is useful for timeouts for which the exact time + * of firing does not matter too much, as long as they don't fire too + * early. + */ +unsigned long __round_jiffies_up(unsigned long j, int cpu) +{ + return round_jiffies_common(j, cpu, true); +} +EXPORT_SYMBOL_GPL(__round_jiffies_up); + +/** + * __round_jiffies_up_relative - function to round jiffies up to a full second + * @j: the time in (relative) jiffies that should be rounded + * @cpu: the processor number on which the timeout will happen + * + * This is the same as __round_jiffies_relative() except that it will never + * round down. This is useful for timeouts for which the exact time + * of firing does not matter too much, as long as they don't fire too + * early. + */ +unsigned long __round_jiffies_up_relative(unsigned long j, int cpu) +{ + unsigned long j0 = jiffies; + + /* Use j0 because jiffies might change while we run */ + return round_jiffies_common(j + j0, cpu, true) - j0; +} +EXPORT_SYMBOL_GPL(__round_jiffies_up_relative); + +/** + * round_jiffies_up - function to round jiffies up to a full second + * @j: the time in (absolute) jiffies that should be rounded + * + * This is the same as round_jiffies() except that it will never + * round down. This is useful for timeouts for which the exact time + * of firing does not matter too much, as long as they don't fire too + * early. + */ +unsigned long round_jiffies_up(unsigned long j) +{ + return round_jiffies_common(j, raw_smp_processor_id(), true); +} +EXPORT_SYMBOL_GPL(round_jiffies_up); + +/** + * round_jiffies_up_relative - function to round jiffies up to a full second + * @j: the time in (relative) jiffies that should be rounded + * + * This is the same as round_jiffies_relative() except that it will never + * round down. This is useful for timeouts for which the exact time + * of firing does not matter too much, as long as they don't fire too + * early. + */ +unsigned long round_jiffies_up_relative(unsigned long j) +{ + return __round_jiffies_up_relative(j, raw_smp_processor_id()); +} +EXPORT_SYMBOL_GPL(round_jiffies_up_relative); + static inline void set_running_timer(struct tvec_base *base, struct timer_list *timer) -- cgit v1.2.3-18-g5258 From 7838c15b8dd18e78a523513749e5b54bda07b0cb Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 6 Nov 2008 08:42:49 +0100 Subject: Block: use round_jiffies_up() This patch (as1159b) changes the timeout routines in the block core to use round_jiffies_up(). There's no point in rounding the timer deadline down, since if it expires too early we will have to restart it. The patch also removes some unnecessary tests when a request is removed from the queue's timer list. Signed-off-by: Alan Stern Signed-off-by: Jens Axboe --- block/blk-timeout.c | 20 +++----------------- 1 file changed, 3 insertions(+), 17 deletions(-) diff --git a/block/blk-timeout.c b/block/blk-timeout.c index 972a63f848f..69185ea9fae 100644 --- a/block/blk-timeout.c +++ b/block/blk-timeout.c @@ -75,14 +75,7 @@ void blk_delete_timer(struct request *req) { struct request_queue *q = req->q; - /* - * Nothing to detach - */ - if (!q->rq_timed_out_fn || !req->deadline) - return; - list_del_init(&req->timeout_list); - if (list_empty(&q->timeout_list)) del_timer(&q->timeout); } @@ -142,7 +135,7 @@ void blk_rq_timed_out_timer(unsigned long data) } if (next_set && !list_empty(&q->timeout_list)) - mod_timer(&q->timeout, round_jiffies(next)); + mod_timer(&q->timeout, round_jiffies_up(next)); spin_unlock_irqrestore(q->queue_lock, flags); } @@ -198,17 +191,10 @@ void blk_add_timer(struct request *req) /* * If the timer isn't already pending or this timeout is earlier - * than an existing one, modify the timer. Round to next nearest + * than an existing one, modify the timer. Round up to next nearest * second. */ - expiry = round_jiffies(req->deadline); - - /* - * We use ->deadline == 0 to detect whether a timer was added or - * not, so just increase to next jiffy for that specific case - */ - if (unlikely(!req->deadline)) - req->deadline = 1; + expiry = round_jiffies_up(req->deadline); if (!timer_pending(&q->timeout) || time_before(expiry, q->timeout.expires)) -- cgit v1.2.3-18-g5258 From 2d3854a37e8b767a51aba38ed6d22817b0631e33 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 5 Nov 2008 13:39:10 +1100 Subject: cpumask: introduce new API, without changing anything Impact: introduce new APIs We want to deprecate cpumasks on the stack, as we are headed for gynormous numbers of CPUs. Eventually, we want to head towards an undefined 'struct cpumask' so they can never be declared on stack. 1) New cpumask functions which take pointers instead of copies. (cpus_* -> cpumask_*) 2) Several new helpers to reduce requirements for temporary cpumasks (cpumask_first_and, cpumask_next_and, cpumask_any_and) 3) Helpers for declaring cpumasks on or offstack for large NR_CPUS (cpumask_var_t, alloc_cpumask_var and free_cpumask_var) 4) 'struct cpumask' for explicitness and to mark new-style code. 5) Make iterator functions stop at nr_cpu_ids (a runtime constant), not NR_CPUS for time efficiency and for smaller dynamic allocations in future. 6) cpumask_copy() so we can allocate less than a full cpumask eventually (for alloc_cpumask_var), and so we can eliminate the 'struct cpumask' definition eventually. 7) work_on_cpu() helper for doing task on a CPU, rather than saving old cpumask for current thread and manipulating it. 8) smp_call_function_many() which is smp_call_function_mask() except taking a cpumask pointer. Note that this patch simply introduces the new functions and leaves the obsolescent ones in place. This is to simplify the transition patches. Signed-off-by: Rusty Russell Signed-off-by: Ingo Molnar --- include/linux/cpumask.h | 502 +++++++++++++++++++++++++++++++++++++++++++++- include/linux/smp.h | 9 + include/linux/workqueue.h | 8 + kernel/cpu.c | 3 + kernel/workqueue.c | 45 +++++ lib/cpumask.c | 73 +++++++ 6 files changed, 638 insertions(+), 2 deletions(-) diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index d3219d73f8e..c8e66619097 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -5,6 +5,9 @@ * Cpumasks provide a bitmap suitable for representing the * set of CPU's in a system, one bit position per CPU number. * + * The new cpumask_ ops take a "struct cpumask *"; the old ones + * use cpumask_t. + * * See detailed comments in the file linux/bitmap.h describing the * data type on which these cpumasks are based. * @@ -31,7 +34,7 @@ * will span the entire range of NR_CPUS. * . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . * - * The available cpumask operations are: + * The obsolescent cpumask operations are: * * void cpu_set(cpu, mask) turn on bit 'cpu' in mask * void cpu_clear(cpu, mask) turn off bit 'cpu' in mask @@ -138,7 +141,7 @@ #include #include -typedef struct { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t; +typedef struct cpumask { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t; extern cpumask_t _unused_cpumask_arg_; #define cpu_set(cpu, dst) __cpu_set((cpu), &(dst)) @@ -527,4 +530,499 @@ extern cpumask_t cpu_active_map; #define for_each_online_cpu(cpu) for_each_cpu_mask_nr((cpu), cpu_online_map) #define for_each_present_cpu(cpu) for_each_cpu_mask_nr((cpu), cpu_present_map) +/* These are the new versions of the cpumask operators: passed by pointer. + * The older versions will be implemented in terms of these, then deleted. */ +#define cpumask_bits(maskp) ((maskp)->bits) + +#if NR_CPUS <= BITS_PER_LONG +#define CPU_BITS_ALL \ +{ \ + [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \ +} + +/* This produces more efficient code. */ +#define nr_cpumask_bits NR_CPUS + +#else /* NR_CPUS > BITS_PER_LONG */ + +#define CPU_BITS_ALL \ +{ \ + [0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL, \ + [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \ +} + +#define nr_cpumask_bits nr_cpu_ids +#endif /* NR_CPUS > BITS_PER_LONG */ + +/* verify cpu argument to cpumask_* operators */ +static inline unsigned int cpumask_check(unsigned int cpu) +{ +#ifdef CONFIG_DEBUG_PER_CPU_MAPS + WARN_ON_ONCE(cpu >= nr_cpumask_bits); +#endif /* CONFIG_DEBUG_PER_CPU_MAPS */ + return cpu; +} + +#if NR_CPUS == 1 +/* Uniprocesor. */ +#define cpumask_first(src) ({ (void)(src); 0; }) +#define cpumask_next(n, src) ({ (void)(src); 1; }) +#define cpumask_next_zero(n, src) ({ (void)(src); 1; }) +#define cpumask_next_and(n, srcp, andp) ({ (void)(srcp), (void)(andp); 1; }) +#define cpumask_any_but(mask, cpu) ({ (void)(mask); (void)(cpu); 0; }) + +#define for_each_cpu(cpu, mask) \ + for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) +#define for_each_cpu_and(cpu, mask, and) \ + for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask, (void)and) +#else +/** + * cpumask_first - get the first cpu in a cpumask + * @srcp: the cpumask pointer + * + * Returns >= nr_cpu_ids if no cpus set. + */ +static inline unsigned int cpumask_first(const struct cpumask *srcp) +{ + return find_first_bit(cpumask_bits(srcp), nr_cpumask_bits); +} + +/** + * cpumask_next - get the next cpu in a cpumask + * @n: the cpu prior to the place to search (ie. return will be > @n) + * @srcp: the cpumask pointer + * + * Returns >= nr_cpu_ids if no further cpus set. + */ +static inline unsigned int cpumask_next(int n, const struct cpumask *srcp) +{ + /* -1 is a legal arg here. */ + if (n != -1) + cpumask_check(n); + return find_next_bit(cpumask_bits(srcp), nr_cpumask_bits, n+1); +} + +/** + * cpumask_next_zero - get the next unset cpu in a cpumask + * @n: the cpu prior to the place to search (ie. return will be > @n) + * @srcp: the cpumask pointer + * + * Returns >= nr_cpu_ids if no further cpus unset. + */ +static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp) +{ + /* -1 is a legal arg here. */ + if (n != -1) + cpumask_check(n); + return find_next_zero_bit(cpumask_bits(srcp), nr_cpumask_bits, n+1); +} + +int cpumask_next_and(int n, const struct cpumask *, const struct cpumask *); +int cpumask_any_but(const struct cpumask *mask, unsigned int cpu); + +#define for_each_cpu(cpu, mask) \ + for ((cpu) = -1; \ + (cpu) = cpumask_next((cpu), (mask)), \ + (cpu) < nr_cpu_ids;) +#define for_each_cpu_and(cpu, mask, and) \ + for ((cpu) = -1; \ + (cpu) = cpumask_next_and((cpu), (mask), (and)), \ + (cpu) < nr_cpu_ids;) +#endif /* SMP */ + +#define CPU_BITS_NONE \ +{ \ + [0 ... BITS_TO_LONGS(NR_CPUS)-1] = 0UL \ +} + +#define CPU_BITS_CPU0 \ +{ \ + [0] = 1UL \ +} + +/** + * cpumask_set_cpu - set a cpu in a cpumask + * @cpu: cpu number (< nr_cpu_ids) + * @dstp: the cpumask pointer + */ +static inline void cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp) +{ + set_bit(cpumask_check(cpu), cpumask_bits(dstp)); +} + +/** + * cpumask_clear_cpu - clear a cpu in a cpumask + * @cpu: cpu number (< nr_cpu_ids) + * @dstp: the cpumask pointer + */ +static inline void cpumask_clear_cpu(int cpu, struct cpumask *dstp) +{ + clear_bit(cpumask_check(cpu), cpumask_bits(dstp)); +} + +/** + * cpumask_test_cpu - test for a cpu in a cpumask + * @cpu: cpu number (< nr_cpu_ids) + * @cpumask: the cpumask pointer + * + * No static inline type checking - see Subtlety (1) above. + */ +#define cpumask_test_cpu(cpu, cpumask) \ + test_bit(cpumask_check(cpu), (cpumask)->bits) + +/** + * cpumask_test_and_set_cpu - atomically test and set a cpu in a cpumask + * @cpu: cpu number (< nr_cpu_ids) + * @cpumask: the cpumask pointer + * + * test_and_set_bit wrapper for cpumasks. + */ +static inline int cpumask_test_and_set_cpu(int cpu, struct cpumask *cpumask) +{ + return test_and_set_bit(cpumask_check(cpu), cpumask_bits(cpumask)); +} + +/** + * cpumask_setall - set all cpus (< nr_cpu_ids) in a cpumask + * @dstp: the cpumask pointer + */ +static inline void cpumask_setall(struct cpumask *dstp) +{ + bitmap_fill(cpumask_bits(dstp), nr_cpumask_bits); +} + +/** + * cpumask_clear - clear all cpus (< nr_cpu_ids) in a cpumask + * @dstp: the cpumask pointer + */ +static inline void cpumask_clear(struct cpumask *dstp) +{ + bitmap_zero(cpumask_bits(dstp), nr_cpumask_bits); +} + +/** + * cpumask_and - *dstp = *src1p & *src2p + * @dstp: the cpumask result + * @src1p: the first input + * @src2p: the second input + */ +static inline void cpumask_and(struct cpumask *dstp, + const struct cpumask *src1p, + const struct cpumask *src2p) +{ + bitmap_and(cpumask_bits(dstp), cpumask_bits(src1p), + cpumask_bits(src2p), nr_cpumask_bits); +} + +/** + * cpumask_or - *dstp = *src1p | *src2p + * @dstp: the cpumask result + * @src1p: the first input + * @src2p: the second input + */ +static inline void cpumask_or(struct cpumask *dstp, const struct cpumask *src1p, + const struct cpumask *src2p) +{ + bitmap_or(cpumask_bits(dstp), cpumask_bits(src1p), + cpumask_bits(src2p), nr_cpumask_bits); +} + +/** + * cpumask_xor - *dstp = *src1p ^ *src2p + * @dstp: the cpumask result + * @src1p: the first input + * @src2p: the second input + */ +static inline void cpumask_xor(struct cpumask *dstp, + const struct cpumask *src1p, + const struct cpumask *src2p) +{ + bitmap_xor(cpumask_bits(dstp), cpumask_bits(src1p), + cpumask_bits(src2p), nr_cpumask_bits); +} + +/** + * cpumask_andnot - *dstp = *src1p & ~*src2p + * @dstp: the cpumask result + * @src1p: the first input + * @src2p: the second input + */ +static inline void cpumask_andnot(struct cpumask *dstp, + const struct cpumask *src1p, + const struct cpumask *src2p) +{ + bitmap_andnot(cpumask_bits(dstp), cpumask_bits(src1p), + cpumask_bits(src2p), nr_cpumask_bits); +} + +/** + * cpumask_complement - *dstp = ~*srcp + * @dstp: the cpumask result + * @srcp: the input to invert + */ +static inline void cpumask_complement(struct cpumask *dstp, + const struct cpumask *srcp) +{ + bitmap_complement(cpumask_bits(dstp), cpumask_bits(srcp), + nr_cpumask_bits); +} + +/** + * cpumask_equal - *src1p == *src2p + * @src1p: the first input + * @src2p: the second input + */ +static inline bool cpumask_equal(const struct cpumask *src1p, + const struct cpumask *src2p) +{ + return bitmap_equal(cpumask_bits(src1p), cpumask_bits(src2p), + nr_cpumask_bits); +} + +/** + * cpumask_intersects - (*src1p & *src2p) != 0 + * @src1p: the first input + * @src2p: the second input + */ +static inline bool cpumask_intersects(const struct cpumask *src1p, + const struct cpumask *src2p) +{ + return bitmap_intersects(cpumask_bits(src1p), cpumask_bits(src2p), + nr_cpumask_bits); +} + +/** + * cpumask_subset - (*src1p & ~*src2p) == 0 + * @src1p: the first input + * @src2p: the second input + */ +static inline int cpumask_subset(const struct cpumask *src1p, + const struct cpumask *src2p) +{ + return bitmap_subset(cpumask_bits(src1p), cpumask_bits(src2p), + nr_cpumask_bits); +} + +/** + * cpumask_empty - *srcp == 0 + * @srcp: the cpumask to that all cpus < nr_cpu_ids are clear. + */ +static inline bool cpumask_empty(const struct cpumask *srcp) +{ + return bitmap_empty(cpumask_bits(srcp), nr_cpumask_bits); +} + +/** + * cpumask_full - *srcp == 0xFFFFFFFF... + * @srcp: the cpumask to that all cpus < nr_cpu_ids are set. + */ +static inline bool cpumask_full(const struct cpumask *srcp) +{ + return bitmap_full(cpumask_bits(srcp), nr_cpumask_bits); +} + +/** + * cpumask_weight - Count of bits in *srcp + * @srcp: the cpumask to count bits (< nr_cpu_ids) in. + */ +static inline unsigned int cpumask_weight(const struct cpumask *srcp) +{ + return bitmap_weight(cpumask_bits(srcp), nr_cpumask_bits); +} + +/** + * cpumask_shift_right - *dstp = *srcp >> n + * @dstp: the cpumask result + * @srcp: the input to shift + * @n: the number of bits to shift by + */ +static inline void cpumask_shift_right(struct cpumask *dstp, + const struct cpumask *srcp, int n) +{ + bitmap_shift_right(cpumask_bits(dstp), cpumask_bits(srcp), n, + nr_cpumask_bits); +} + +/** + * cpumask_shift_left - *dstp = *srcp << n + * @dstp: the cpumask result + * @srcp: the input to shift + * @n: the number of bits to shift by + */ +static inline void cpumask_shift_left(struct cpumask *dstp, + const struct cpumask *srcp, int n) +{ + bitmap_shift_left(cpumask_bits(dstp), cpumask_bits(srcp), n, + nr_cpumask_bits); +} + +/** + * cpumask_copy - *dstp = *srcp + * @dstp: the result + * @srcp: the input cpumask + */ +static inline void cpumask_copy(struct cpumask *dstp, + const struct cpumask *srcp) +{ + bitmap_copy(cpumask_bits(dstp), cpumask_bits(srcp), nr_cpumask_bits); +} + +/** + * cpumask_any - pick a "random" cpu from *srcp + * @srcp: the input cpumask + * + * Returns >= nr_cpu_ids if no cpus set. + */ +#define cpumask_any(srcp) cpumask_first(srcp) + +/** + * cpumask_first_and - return the first cpu from *srcp1 & *srcp2 + * @src1p: the first input + * @src2p: the second input + * + * Returns >= nr_cpu_ids if no cpus set in both. See also cpumask_next_and(). + */ +#define cpumask_first_and(src1p, src2p) cpumask_next_and(-1, (src1p), (src2p)) + +/** + * cpumask_any_and - pick a "random" cpu from *mask1 & *mask2 + * @mask1: the first input cpumask + * @mask2: the second input cpumask + * + * Returns >= nr_cpu_ids if no cpus set. + */ +#define cpumask_any_and(mask1, mask2) cpumask_first_and((mask1), (mask2)) + +/** + * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask * + * @bitmap: the bitmap + * + * There are a few places where cpumask_var_t isn't appropriate and + * static cpumasks must be used (eg. very early boot), yet we don't + * expose the definition of 'struct cpumask'. + * + * This does the conversion, and can be used as a constant initializer. + */ +#define to_cpumask(bitmap) \ + ((struct cpumask *)(1 ? (bitmap) \ + : (void *)sizeof(__check_is_bitmap(bitmap)))) + +static inline int __check_is_bitmap(const unsigned long *bitmap) +{ + return 1; +} + +/** + * cpumask_size - size to allocate for a 'struct cpumask' in bytes + * + * This will eventually be a runtime variable, depending on nr_cpu_ids. + */ +static inline size_t cpumask_size(void) +{ + /* FIXME: Once all cpumask assignments are eliminated, this + * can be nr_cpumask_bits */ + return BITS_TO_LONGS(NR_CPUS) * sizeof(long); +} + +/* + * cpumask_var_t: struct cpumask for stack usage. + * + * Oh, the wicked games we play! In order to make kernel coding a + * little more difficult, we typedef cpumask_var_t to an array or a + * pointer: doing &mask on an array is a noop, so it still works. + * + * ie. + * cpumask_var_t tmpmask; + * if (!alloc_cpumask_var(&tmpmask, GFP_KERNEL)) + * return -ENOMEM; + * + * ... use 'tmpmask' like a normal struct cpumask * ... + * + * free_cpumask_var(tmpmask); + */ +#ifdef CONFIG_CPUMASK_OFFSTACK +typedef struct cpumask *cpumask_var_t; + +bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags); +void alloc_bootmem_cpumask_var(cpumask_var_t *mask); +void free_cpumask_var(cpumask_var_t mask); + +#else +typedef struct cpumask cpumask_var_t[1]; + +static inline bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) +{ + return true; +} + +static inline void alloc_bootmem_cpumask_var(cpumask_var_t *mask) +{ +} + +static inline void free_cpumask_var(cpumask_var_t mask) +{ +} +#endif /* CONFIG_CPUMASK_OFFSTACK */ + +/* The pointer versions of the maps, these will become the primary versions. */ +#define cpu_possible_mask ((const struct cpumask *)&cpu_possible_map) +#define cpu_online_mask ((const struct cpumask *)&cpu_online_map) +#define cpu_present_mask ((const struct cpumask *)&cpu_present_map) +#define cpu_active_mask ((const struct cpumask *)&cpu_active_map) + +/* It's common to want to use cpu_all_mask in struct member initializers, + * so it has to refer to an address rather than a pointer. */ +extern const DECLARE_BITMAP(cpu_all_bits, NR_CPUS); +#define cpu_all_mask to_cpumask(cpu_all_bits) + +/* First bits of cpu_bit_bitmap are in fact unset. */ +#define cpu_none_mask to_cpumask(cpu_bit_bitmap[0]) + +/* Wrappers for arch boot code to manipulate normally-constant masks */ +static inline void set_cpu_possible(unsigned int cpu, bool possible) +{ + if (possible) + cpumask_set_cpu(cpu, &cpu_possible_map); + else + cpumask_clear_cpu(cpu, &cpu_possible_map); +} + +static inline void set_cpu_present(unsigned int cpu, bool present) +{ + if (present) + cpumask_set_cpu(cpu, &cpu_present_map); + else + cpumask_clear_cpu(cpu, &cpu_present_map); +} + +static inline void set_cpu_online(unsigned int cpu, bool online) +{ + if (online) + cpumask_set_cpu(cpu, &cpu_online_map); + else + cpumask_clear_cpu(cpu, &cpu_online_map); +} + +static inline void set_cpu_active(unsigned int cpu, bool active) +{ + if (active) + cpumask_set_cpu(cpu, &cpu_active_map); + else + cpumask_clear_cpu(cpu, &cpu_active_map); +} + +static inline void init_cpu_present(const struct cpumask *src) +{ + cpumask_copy(&cpu_present_map, src); +} + +static inline void init_cpu_possible(const struct cpumask *src) +{ + cpumask_copy(&cpu_possible_map, src); +} + +static inline void init_cpu_online(const struct cpumask *src) +{ + cpumask_copy(&cpu_online_map, src); +} #endif /* __LINUX_CPUMASK_H */ diff --git a/include/linux/smp.h b/include/linux/smp.h index 2e4d58b26c0..3f9a60043a9 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -64,8 +64,17 @@ extern void smp_cpus_done(unsigned int max_cpus); * Call a function on all other processors */ int smp_call_function(void(*func)(void *info), void *info, int wait); +/* Deprecated: use smp_call_function_many() which uses a cpumask ptr. */ int smp_call_function_mask(cpumask_t mask, void(*func)(void *info), void *info, int wait); + +static inline void smp_call_function_many(const struct cpumask *mask, + void (*func)(void *info), void *info, + int wait) +{ + smp_call_function_mask(*mask, func, info, wait); +} + int smp_call_function_single(int cpuid, void (*func) (void *info), void *info, int wait); void __smp_call_function_single(int cpuid, struct call_single_data *data); diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 89a5a1231ff..b36291130f2 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -240,4 +240,12 @@ void cancel_rearming_delayed_work(struct delayed_work *work) cancel_delayed_work_sync(work); } +#ifndef CONFIG_SMP +static inline long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg) +{ + return fn(arg); +} +#else +long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg); +#endif /* CONFIG_SMP */ #endif diff --git a/kernel/cpu.c b/kernel/cpu.c index 86d49045dae..5a732c5ef08 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -499,3 +499,6 @@ const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = { #endif }; EXPORT_SYMBOL_GPL(cpu_bit_bitmap); + +const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL; +EXPORT_SYMBOL(cpu_all_bits); diff --git a/kernel/workqueue.c b/kernel/workqueue.c index f928f2a87b9..d4dc69ddebd 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -970,6 +970,51 @@ undo: return ret; } +#ifdef CONFIG_SMP +struct work_for_cpu { + struct work_struct work; + long (*fn)(void *); + void *arg; + long ret; +}; + +static void do_work_for_cpu(struct work_struct *w) +{ + struct work_for_cpu *wfc = container_of(w, struct work_for_cpu, work); + + wfc->ret = wfc->fn(wfc->arg); +} + +/** + * work_on_cpu - run a function in user context on a particular cpu + * @cpu: the cpu to run on + * @fn: the function to run + * @arg: the function arg + * + * This will return -EINVAL in the cpu is not online, or the return value + * of @fn otherwise. + */ +long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg) +{ + struct work_for_cpu wfc; + + INIT_WORK(&wfc.work, do_work_for_cpu); + wfc.fn = fn; + wfc.arg = arg; + get_online_cpus(); + if (unlikely(!cpu_online(cpu))) + wfc.ret = -EINVAL; + else { + schedule_work_on(cpu, &wfc.work); + flush_work(&wfc.work); + } + put_online_cpus(); + + return wfc.ret; +} +EXPORT_SYMBOL_GPL(work_on_cpu); +#endif /* CONFIG_SMP */ + void __init init_workqueues(void) { cpu_populated_map = cpu_online_map; diff --git a/lib/cpumask.c b/lib/cpumask.c index 5f97dc25ef9..5ceb4211c83 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -2,6 +2,7 @@ #include #include #include +#include int __first_cpu(const cpumask_t *srcp) { @@ -35,3 +36,75 @@ int __any_online_cpu(const cpumask_t *mask) return cpu; } EXPORT_SYMBOL(__any_online_cpu); + +/** + * cpumask_next_and - get the next cpu in *src1p & *src2p + * @n: the cpu prior to the place to search (ie. return will be > @n) + * @src1p: the first cpumask pointer + * @src2p: the second cpumask pointer + * + * Returns >= nr_cpu_ids if no further cpus set in both. + */ +int cpumask_next_and(int n, const struct cpumask *src1p, + const struct cpumask *src2p) +{ + while ((n = cpumask_next(n, src1p)) < nr_cpu_ids) + if (cpumask_test_cpu(n, src2p)) + break; + return n; +} +EXPORT_SYMBOL(cpumask_next_and); + +/** + * cpumask_any_but - return a "random" in a cpumask, but not this one. + * @mask: the cpumask to search + * @cpu: the cpu to ignore. + * + * Often used to find any cpu but smp_processor_id() in a mask. + * Returns >= nr_cpu_ids if no cpus set. + */ +int cpumask_any_but(const struct cpumask *mask, unsigned int cpu) +{ + unsigned int i; + + for_each_cpu(i, mask) + if (i != cpu) + break; + return i; +} + +/* These are not inline because of header tangles. */ +#ifdef CONFIG_CPUMASK_OFFSTACK +bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) +{ + if (likely(slab_is_available())) + *mask = kmalloc(cpumask_size(), flags); + else { +#ifdef CONFIG_DEBUG_PER_CPU_MAPS + printk(KERN_ERR + "=> alloc_cpumask_var: kmalloc not available!\n"); + dump_stack(); +#endif + *mask = NULL; + } +#ifdef CONFIG_DEBUG_PER_CPU_MAPS + if (!*mask) { + printk(KERN_ERR "=> alloc_cpumask_var: failed!\n"); + dump_stack(); + } +#endif + return *mask != NULL; +} +EXPORT_SYMBOL(alloc_cpumask_var); + +void __init alloc_bootmem_cpumask_var(cpumask_var_t *mask) +{ + *mask = alloc_bootmem(cpumask_size()); +} + +void free_cpumask_var(cpumask_var_t mask) +{ + kfree(mask); +} +EXPORT_SYMBOL(free_cpumask_var); +#endif -- cgit v1.2.3-18-g5258 From 7db282fa67b58daff8a57f9e1c93d4474b5908ff Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 5 Nov 2008 23:36:48 -0800 Subject: x86: remove VISWS and PARAVIRT around NR_IRQS puzzle Impact: fix warning message when PARAVIRT is set in config Remove stale #ifdef components from our IRQ sizing logic. x86/Voyager is the only holdout. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/include/asm/irq_vectors.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 503aadc4ad3..0005adb0f94 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -101,18 +101,18 @@ #define LAST_VM86_IRQ 15 #define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15) -#if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_PARAVIRT) && !defined(CONFIG_X86_VISWS) && !defined(CONFIG_X86_VOYAGER) +#if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_X86_VOYAGER) # if NR_CPUS < MAX_IO_APICS # define NR_IRQS (NR_VECTORS + (32 * NR_CPUS)) # else # define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) # endif -#elif defined(CONFIG_PARAVIRT) || defined(CONFIG_X86_VISWS) || defined(CONFIG_X86_VOYAGER) +#elif defined(CONFIG_X86_VOYAGER) # define NR_IRQS 224 -#else /* IO_APIC || PARAVIRT */ +#else /* IO_APIC || VOYAGER */ # define NR_IRQS 16 -- cgit v1.2.3-18-g5258 From f1cd14ae52985634d0389e934eba25b5ecf24565 Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Thu, 6 Nov 2008 19:41:24 +1100 Subject: md: linear: Fix a division by zero bug for very small arrays. We currently oops with a divide error on starting a linear software raid array consisting of at least two very small (< 500K) devices. The bug is caused by the calculation of the hash table size which tries to compute sector_div(sz, base) with "base" being zero due to the small size of the component devices of the array. Fix this by requiring the hash spacing to be at least one which implies that also "base" is non-zero. This bug has existed since about 2.6.14. Cc: stable@kernel.org Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- drivers/md/linear.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 190147c79e7..3b90c5c924e 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -148,6 +148,8 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) min_sectors = conf->array_sectors; sector_div(min_sectors, PAGE_SIZE/sizeof(struct dev_info *)); + if (min_sectors == 0) + min_sectors = 1; /* min_sectors is the minimum spacing that will fit the hash * table in one PAGE. This may be much smaller than needed. -- cgit v1.2.3-18-g5258 From d6f0f39b7d05e62b347c4352d070e4afb3ade4b5 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 4 Nov 2008 13:53:04 -0800 Subject: x86: add smp_mb() before sending INVALIDATE_TLB_VECTOR Impact: fix rare x2apic hang On x86, x2apic mode accesses for sending IPI's don't have serializing semantics. If the IPI receivner refers(in lock-free fashion) to some memory setup by the sender, the need for smp_mb() before sending the IPI becomes critical in x2apic mode. Add the smp_mb() in native_flush_tlb_others() before sending the IPI. Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/tlb_32.c | 6 ++++++ arch/x86/kernel/tlb_64.c | 5 +++++ 2 files changed, 11 insertions(+) diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c index e00534b3353..f4049f3513b 100644 --- a/arch/x86/kernel/tlb_32.c +++ b/arch/x86/kernel/tlb_32.c @@ -154,6 +154,12 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, flush_mm = mm; flush_va = va; cpus_or(flush_cpumask, cpumask, flush_cpumask); + + /* + * Make the above memory operations globally visible before + * sending the IPI. + */ + smp_mb(); /* * We have to send the IPI only to * CPUs affected. diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c index dcbf7a1159e..8f919ca6949 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/kernel/tlb_64.c @@ -182,6 +182,11 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, f->flush_va = va; cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask); + /* + * Make the above memory operations globally visible before + * sending the IPI. + */ + smp_mb(); /* * We have to send the IPI only to * CPUs affected. -- cgit v1.2.3-18-g5258 From b954f6f63e7938a11de5bd15cb5cbcac7423cf97 Mon Sep 17 00:00:00 2001 From: Andrew Victor Date: Wed, 5 Nov 2008 22:18:41 +0200 Subject: [WATCHDOG] SAM9 watchdog - update for moved headers The architecture header files were recently moved from include/asm-arm/mach-at91/ to arch/arm/mach-at91/include/mach/. The SAM9 watchdog driver still includes a header from the old location. Signed-off-by: Andrew Victor Signed-off-by: Wim Van Sebroeck Signed-off-by: Andrew Morton --- drivers/watchdog/at91sam9_wdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/watchdog/at91sam9_wdt.c b/drivers/watchdog/at91sam9_wdt.c index b4babfc3158..b1da287f90e 100644 --- a/drivers/watchdog/at91sam9_wdt.c +++ b/drivers/watchdog/at91sam9_wdt.c @@ -30,7 +30,7 @@ #include #include -#include +#include #define DRV_NAME "AT91SAM9 Watchdog" -- cgit v1.2.3-18-g5258 From f0e625c1aa24e861c224fb778c377b2ddb443d2b Mon Sep 17 00:00:00 2001 From: Andrew Victor Date: Wed, 5 Nov 2008 22:36:35 +0200 Subject: [WATCHDOG] SAM9 watchdog - supported on all SAM9 and CAP9 processors The SAM9 watchdog driver is usable on the whole family of AT91SAM9 and CAP9 processors. Update the configuration to indicate this and allow the driver to be selected. Signed-off-by: Andrew Victor Signed-off-by: Wim Van Sebroeck Signed-off-by: Andrew Morton --- drivers/watchdog/Kconfig | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index 1a22fe782a2..4fd3fa5546b 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -67,11 +67,11 @@ config AT91RM9200_WATCHDOG system when the timeout is reached. config AT91SAM9X_WATCHDOG - tristate "AT91SAM9X watchdog" - depends on WATCHDOG && (ARCH_AT91SAM9260 || ARCH_AT91SAM9261) + tristate "AT91SAM9X / AT91CAP9 watchdog" + depends on ARCH_AT91 && !ARCH_AT91RM9200 help - Watchdog timer embedded into AT91SAM9X chips. This will reboot your - system when the timeout is reached. + Watchdog timer embedded into AT91SAM9X and AT91CAP9 chips. This will + reboot your system when the timeout is reached. config 21285_WATCHDOG tristate "DC21285 watchdog" -- cgit v1.2.3-18-g5258 From 80be308dfa3798c7bad0fc81760b2faf83870e91 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 6 Nov 2008 14:59:05 +0100 Subject: AMD IOMMU: fix lazy IO/TLB flushing in unmap path Lazy flushing needs to take care of the unmap path too which is not yet implemented and leads to stale IO/TLB entries. This is fixed by this patch. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 38e88d40ab1..4755bbc7ae5 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -526,6 +526,9 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom, { address >>= PAGE_SHIFT; iommu_area_free(dom->bitmap, address, pages); + + if (address + pages >= dom->next_bit) + dom->need_flush = true; } /**************************************************************************** @@ -981,8 +984,10 @@ static void __unmap_single(struct amd_iommu *iommu, dma_ops_free_addresses(dma_dom, dma_addr, pages); - if (amd_iommu_unmap_flush) + if (amd_iommu_unmap_flush || dma_dom->need_flush) { iommu_flush_pages(iommu, dma_dom->domain.id, dma_addr, size); + dma_dom->need_flush = false; + } } /* -- cgit v1.2.3-18-g5258 From b9c3bfc24e1088d260de4091b2b41808c7398355 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Thu, 6 Nov 2008 12:05:40 +0000 Subject: x86: align DirectMap in /proc/meminfo Impact: right-align /proc/meminfo consistent with other fields When the split-LRU patches added Inactive(anon) and Inactive(file) lines to /proc/meminfo, all counts were moved two columns rightwards to fit in. Now move x86's DirectMap lines two columns rightwards to line up. Signed-off-by: Hugh Dickins Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index f1dc1b75d16..e89d24815f2 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -67,18 +67,18 @@ static void split_page_count(int level) void arch_report_meminfo(struct seq_file *m) { - seq_printf(m, "DirectMap4k: %8lu kB\n", + seq_printf(m, "DirectMap4k: %8lu kB\n", direct_pages_count[PG_LEVEL_4K] << 2); #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) - seq_printf(m, "DirectMap2M: %8lu kB\n", + seq_printf(m, "DirectMap2M: %8lu kB\n", direct_pages_count[PG_LEVEL_2M] << 11); #else - seq_printf(m, "DirectMap4M: %8lu kB\n", + seq_printf(m, "DirectMap4M: %8lu kB\n", direct_pages_count[PG_LEVEL_2M] << 12); #endif #ifdef CONFIG_X86_64 if (direct_gbpages) - seq_printf(m, "DirectMap1G: %8lu kB\n", + seq_printf(m, "DirectMap1G: %8lu kB\n", direct_pages_count[PG_LEVEL_1G] << 20); #endif } -- cgit v1.2.3-18-g5258 From 8d00450d296dedec9ada38d43b83e79cca6fd5a3 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Tue, 4 Nov 2008 12:52:44 -0200 Subject: Revert "x86: default to reboot via ACPI" This reverts commit c7ffa6c26277b403920e2255d10df849bd613380. the assumptio of this change was that this would not break any existing machine. Andrey Borzenkov reported troubles with the ACPI reboot method: the system would hang on reboot, necessiating a power cycle. Probably more systems are affected as well. Also, there are patches queued up for v2.6.29 to disable virtualization on emergency_restart() - which was the original motivation of this change. Reported-by: Andrey Borzenkov Bisected-by: Andrey Borzenkov Signed-off-by: Eduardo Habkost Acked-by: Avi Kivity Signed-off-by: Ingo Molnar --- arch/x86/kernel/reboot.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index f4c93f1cfc1..724adfc63cb 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -29,11 +29,7 @@ EXPORT_SYMBOL(pm_power_off); static const struct desc_ptr no_idt = {}; static int reboot_mode; -/* - * Keyboard reset and triple fault may result in INIT, not RESET, which - * doesn't work when we're in vmx root mode. Try ACPI first. - */ -enum reboot_type reboot_type = BOOT_ACPI; +enum reboot_type reboot_type = BOOT_KBD; int reboot_force; #if defined(CONFIG_X86_32) && defined(CONFIG_SMP) -- cgit v1.2.3-18-g5258 From ab4f2ee130d5ffcf35616e1f5c6ab75af5b463b6 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 6 Nov 2008 17:11:07 +0000 Subject: [ARM] fix naming of MODULE_START / MODULE_END As of 73bdf0a60e607f4b8ecc5aec597105976565a84f, the kernel needs to know where modules are located in the virtual address space. On ARM, we located this region between MODULE_START and MODULE_END. Unfortunately, everyone else calls it MODULES_VADDR and MODULES_END. Update ARM to use the same naming, so is_vmalloc_or_module_addr() can work properly. Also update the comment on mm/vmalloc.c to reflect that ARM also places modules in a separate region from the vmalloc space. Signed-off-by: Russell King --- arch/arm/include/asm/memory.h | 12 ++++++------ arch/arm/kernel/module.c | 8 ++++---- arch/arm/mm/mmu.c | 4 ++-- mm/vmalloc.c | 2 +- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index 809ff9ab853..77764301844 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -44,10 +44,10 @@ * The module space lives between the addresses given by TASK_SIZE * and PAGE_OFFSET - it must be within 32MB of the kernel text. */ -#define MODULE_END (PAGE_OFFSET) -#define MODULE_START (MODULE_END - 16*1048576) +#define MODULES_END (PAGE_OFFSET) +#define MODULES_VADDR (MODULES_END - 16*1048576) -#if TASK_SIZE > MODULE_START +#if TASK_SIZE > MODULES_VADDR #error Top of user space clashes with start of module space #endif @@ -56,7 +56,7 @@ * Since we use sections to map it, this macro replaces the physical address * with its virtual address while keeping offset from the base section. */ -#define XIP_VIRT_ADDR(physaddr) (MODULE_START + ((physaddr) & 0x000fffff)) +#define XIP_VIRT_ADDR(physaddr) (MODULES_VADDR + ((physaddr) & 0x000fffff)) /* * Allow 16MB-aligned ioremap pages @@ -94,8 +94,8 @@ /* * The module can be at any place in ram in nommu mode. */ -#define MODULE_END (END_MEM) -#define MODULE_START (PHYS_OFFSET) +#define MODULES_END (END_MEM) +#define MODULES_VADDR (PHYS_OFFSET) #endif /* !CONFIG_MMU */ diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c index 9203ba7d58e..b8d965dcd6f 100644 --- a/arch/arm/kernel/module.c +++ b/arch/arm/kernel/module.c @@ -26,12 +26,12 @@ /* * The XIP kernel text is mapped in the module area for modules and * some other stuff to work without any indirect relocations. - * MODULE_START is redefined here and not in asm/memory.h to avoid + * MODULES_VADDR is redefined here and not in asm/memory.h to avoid * recompiling the whole kernel when CONFIG_XIP_KERNEL is turned on/off. */ extern void _etext; -#undef MODULE_START -#define MODULE_START (((unsigned long)&_etext + ~PGDIR_MASK) & PGDIR_MASK) +#undef MODULES_VADDR +#define MODULES_VADDR (((unsigned long)&_etext + ~PGDIR_MASK) & PGDIR_MASK) #endif #ifdef CONFIG_MMU @@ -43,7 +43,7 @@ void *module_alloc(unsigned long size) if (!size) return NULL; - area = __get_vm_area(size, VM_ALLOC, MODULE_START, MODULE_END); + area = __get_vm_area(size, VM_ALLOC, MODULES_VADDR, MODULES_END); if (!area) return NULL; diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 8ba75406455..34e53596ff1 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -654,7 +654,7 @@ static inline void prepare_page_table(struct meminfo *mi) /* * Clear out all the mappings below the kernel image. */ - for (addr = 0; addr < MODULE_START; addr += PGDIR_SIZE) + for (addr = 0; addr < MODULES_VADDR; addr += PGDIR_SIZE) pmd_clear(pmd_off_k(addr)); #ifdef CONFIG_XIP_KERNEL @@ -766,7 +766,7 @@ static void __init devicemaps_init(struct machine_desc *mdesc) */ #ifdef CONFIG_XIP_KERNEL map.pfn = __phys_to_pfn(CONFIG_XIP_PHYS_ADDR & SECTION_MASK); - map.virtual = MODULE_START; + map.virtual = MODULES_VADDR; map.length = ((unsigned long)&_etext - map.virtual + ~SECTION_MASK) & SECTION_MASK; map.type = MT_ROM; create_mapping(&map); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index f1cc03bbf6a..66fad3fc02b 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -178,7 +178,7 @@ static int vmap_page_range(unsigned long addr, unsigned long end, static inline int is_vmalloc_or_module_addr(const void *x) { /* - * x86-64 and sparc64 put modules in a special place, + * ARM, x86-64 and sparc64 put modules in a special place, * and fall back on vmalloc() if that fails. Others * just put it in the vmalloc space. */ -- cgit v1.2.3-18-g5258 From 1547a01226bcc3fd575fd5383b8b55476a02b518 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 6 Nov 2008 10:53:54 -0600 Subject: [IA64] reorder Kconfig options to match x86 No functional change, just reorder some config options and update the "Power management and ACPI" label to match the defacto x86 standard. Signed-off-by: Bjorn Helgaas Signed-off-by: Tony Luck --- arch/ia64/Kconfig | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 9f481ba59a4..6bd91ed7cd0 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -586,7 +586,7 @@ source "fs/Kconfig.binfmt" endmenu -menu "Power management and ACPI" +menu "Power management and ACPI options" source "kernel/power/Kconfig" @@ -642,6 +642,8 @@ source "net/Kconfig" source "drivers/Kconfig" +source "arch/ia64/hp/sim/Kconfig" + config MSPEC tristate "Memory special operations driver" depends on IA64 @@ -653,6 +655,12 @@ config MSPEC source "fs/Kconfig" +source "arch/ia64/Kconfig.debug" + +source "security/Kconfig" + +source "crypto/Kconfig" + source "arch/ia64/kvm/Kconfig" source "lib/Kconfig" @@ -679,11 +687,3 @@ config IRQ_PER_CPU config IOMMU_HELPER def_bool (IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB || IA64_GENERIC || SWIOTLB) - -source "arch/ia64/hp/sim/Kconfig" - -source "arch/ia64/Kconfig.debug" - -source "security/Kconfig" - -source "crypto/Kconfig" -- cgit v1.2.3-18-g5258 From 62ee0540f5e5a804b79cae8b3c0185a85f02436b Mon Sep 17 00:00:00 2001 From: Doug Chapman Date: Wed, 5 Nov 2008 17:57:52 -0500 Subject: [IA64] fix boot panic caused by offline CPUs This fixes a regression introduced by 2c6e6db41f01b6b4eb98809350827c9678996698 "Minimize per_cpu reservations." That patch incorrectly used information about what CPUs are possible that was not yet initialized by ACPI. The end result was that per_cpu structures for offline CPUs were not initialized causing a NULL pointer reference. Since we cannot do the full acpi_boot_init() call any earlier, the simplest fix is to just parse the MADT for SAPIC entries early to find the CPU info. This should also allow for some cleanup of the code added by the "Minimize per_cpu reservations". This patch just fixes the regressions, the cleanup will come in a later patch. Signed-off-by: Doug Chapman Signed-off-by: Alex Chiang CC: Robin Holt Signed-off-by: Tony Luck --- arch/ia64/kernel/acpi.c | 29 ++++++++++++++++++++++++----- arch/ia64/kernel/setup.c | 7 ++++--- 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index 0635015d0aa..bd7acc71e8a 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -678,6 +678,30 @@ static int __init acpi_parse_fadt(struct acpi_table_header *table) return 0; } +int __init early_acpi_boot_init(void) +{ + int ret; + + /* + * do a partial walk of MADT to determine how many CPUs + * we have including offline CPUs + */ + if (acpi_table_parse(ACPI_SIG_MADT, acpi_parse_madt)) { + printk(KERN_ERR PREFIX "Can't find MADT\n"); + return 0; + } + + ret = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_SAPIC, + acpi_parse_lsapic, NR_CPUS); + if (ret < 1) + printk(KERN_ERR PREFIX + "Error parsing MADT - no LAPIC entries\n"); + + return 0; +} + + + int __init acpi_boot_init(void) { @@ -701,11 +725,6 @@ int __init acpi_boot_init(void) printk(KERN_ERR PREFIX "Error parsing LAPIC address override entry\n"); - if (acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_SAPIC, acpi_parse_lsapic, NR_CPUS) - < 1) - printk(KERN_ERR PREFIX - "Error parsing MADT - no LAPIC entries\n"); - if (acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_NMI, acpi_parse_lapic_nmi, 0) < 0) printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n"); diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index ae7911702bf..bf441f49682 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -561,8 +561,12 @@ setup_arch (char **cmdline_p) #ifdef CONFIG_ACPI /* Initialize the ACPI boot-time table parser */ acpi_table_init(); + early_acpi_boot_init(); # ifdef CONFIG_ACPI_NUMA acpi_numa_init(); +#ifdef CONFIG_ACPI_HOTPLUG_CPU + prefill_possible_map(); +#endif per_cpu_scan_finalize((cpus_weight(early_cpu_possible_map) == 0 ? 32 : cpus_weight(early_cpu_possible_map)), additional_cpus > 0 ? additional_cpus : 0); @@ -853,9 +857,6 @@ void __init setup_per_cpu_areas (void) { /* start_kernel() requires this... */ -#ifdef CONFIG_ACPI_HOTPLUG_CPU - prefill_possible_map(); -#endif } /* -- cgit v1.2.3-18-g5258 From b1cce6b1b2785fd61454b47ceacb461815407662 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 4 Nov 2008 10:52:28 +0000 Subject: [ARM] mm: fix page table initialization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As a result of the ptebits changes, we ended up marking device mappings as normal memory on ARMv7 CPUs, resulting in undesirable behaviour with serial ports and the like. While reviewing the section mapping table entries, other errors in the memory type settings for devices were detected and confirmed to prevent Xscale3 platforms booting. Tested on: OMAP34xx (ARMv7), OMAP24xx (ARMv6), OMAP16xx (ARM926T, ARMv5), PXA311 (Xscale3), PXA272 (Xscale), PXA255 (Xscale), IXP42x (Xscale), S3C2410 (ARM920T, ARMv4T), ARM720T (ARMv4T) StrongARM-110 (ARMv4) Acked-by: Tony Lindgren Tested-by: Robert Jarzmik Tested-by: Mike Rapoport Tested-by: Ben Dooks Tested-by: Anders Grafström Signed-off-by: Russell King --- arch/arm/include/asm/system.h | 4 ++ arch/arm/mm/mmu.c | 107 ++++++++++++++++++++++++++++++------------ arch/arm/mm/proc-v7.S | 12 ++--- 3 files changed, 87 insertions(+), 36 deletions(-) diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h index 7aad78420f1..568020b34e3 100644 --- a/arch/arm/include/asm/system.h +++ b/arch/arm/include/asm/system.h @@ -42,6 +42,10 @@ #define CR_U (1 << 22) /* Unaligned access operation */ #define CR_XP (1 << 23) /* Extended page tables */ #define CR_VE (1 << 24) /* Vectored interrupts */ +#define CR_EE (1 << 25) /* Exception (Big) Endian */ +#define CR_TRE (1 << 28) /* TEX remap enable */ +#define CR_AFE (1 << 29) /* Access flag enable */ +#define CR_TE (1 << 30) /* Thumb exception enable */ /* * This is used to ensure the compiler did actually allocate the register we diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 34e53596ff1..e63db11f16a 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -180,20 +180,20 @@ void adjust_cr(unsigned long mask, unsigned long set) #endif #define PROT_PTE_DEVICE L_PTE_PRESENT|L_PTE_YOUNG|L_PTE_DIRTY|L_PTE_WRITE -#define PROT_SECT_DEVICE PMD_TYPE_SECT|PMD_SECT_XN|PMD_SECT_AP_WRITE +#define PROT_SECT_DEVICE PMD_TYPE_SECT|PMD_SECT_AP_WRITE static struct mem_type mem_types[] = { [MT_DEVICE] = { /* Strongly ordered / ARMv6 shared device */ .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_SHARED | L_PTE_SHARED, .prot_l1 = PMD_TYPE_TABLE, - .prot_sect = PROT_SECT_DEVICE | PMD_SECT_UNCACHED, + .prot_sect = PROT_SECT_DEVICE | PMD_SECT_S, .domain = DOMAIN_IO, }, [MT_DEVICE_NONSHARED] = { /* ARMv6 non-shared device */ .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_NONSHARED, .prot_l1 = PMD_TYPE_TABLE, - .prot_sect = PROT_SECT_DEVICE | PMD_SECT_TEX(2), + .prot_sect = PROT_SECT_DEVICE, .domain = DOMAIN_IO, }, [MT_DEVICE_CACHED] = { /* ioremap_cached */ @@ -205,7 +205,7 @@ static struct mem_type mem_types[] = { [MT_DEVICE_WC] = { /* ioremap_wc */ .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_WC, .prot_l1 = PMD_TYPE_TABLE, - .prot_sect = PROT_SECT_DEVICE | PMD_SECT_BUFFERABLE, + .prot_sect = PROT_SECT_DEVICE, .domain = DOMAIN_IO, }, [MT_CACHECLEAN] = { @@ -273,22 +273,23 @@ static void __init build_mem_type_table(void) #endif /* - * On non-Xscale3 ARMv5-and-older systems, use CB=01 - * (Uncached/Buffered) for ioremap_wc() mappings. On XScale3 - * and ARMv6+, use TEXCB=00100 mappings (Inner/Outer Uncacheable - * in xsc3 parlance, Uncached Normal in ARMv6 parlance). + * Strip out features not present on earlier architectures. + * Pre-ARMv5 CPUs don't have TEX bits. Pre-ARMv6 CPUs or those + * without extended page tables don't have the 'Shared' bit. */ - if (cpu_is_xsc3() || cpu_arch >= CPU_ARCH_ARMv6) { - mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1); - mem_types[MT_DEVICE_WC].prot_sect &= ~PMD_SECT_BUFFERABLE; - } + if (cpu_arch < CPU_ARCH_ARMv5) + for (i = 0; i < ARRAY_SIZE(mem_types); i++) + mem_types[i].prot_sect &= ~PMD_SECT_TEX(7); + if ((cpu_arch < CPU_ARCH_ARMv6 || !(cr & CR_XP)) && !cpu_is_xsc3()) + for (i = 0; i < ARRAY_SIZE(mem_types); i++) + mem_types[i].prot_sect &= ~PMD_SECT_S; /* - * ARMv5 and lower, bit 4 must be set for page tables. - * (was: cache "update-able on write" bit on ARM610) - * However, Xscale cores require this bit to be cleared. + * ARMv5 and lower, bit 4 must be set for page tables (was: cache + * "update-able on write" bit on ARM610). However, Xscale and + * Xscale3 require this bit to be cleared. */ - if (cpu_is_xscale()) { + if (cpu_is_xscale() || cpu_is_xsc3()) { for (i = 0; i < ARRAY_SIZE(mem_types); i++) { mem_types[i].prot_sect &= ~PMD_BIT4; mem_types[i].prot_l1 &= ~PMD_BIT4; @@ -302,6 +303,64 @@ static void __init build_mem_type_table(void) } } + /* + * Mark the device areas according to the CPU/architecture. + */ + if (cpu_is_xsc3() || (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP))) { + if (!cpu_is_xsc3()) { + /* + * Mark device regions on ARMv6+ as execute-never + * to prevent speculative instruction fetches. + */ + mem_types[MT_DEVICE].prot_sect |= PMD_SECT_XN; + mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_XN; + mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_XN; + mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_XN; + } + if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) { + /* + * For ARMv7 with TEX remapping, + * - shared device is SXCB=1100 + * - nonshared device is SXCB=0100 + * - write combine device mem is SXCB=0001 + * (Uncached Normal memory) + */ + mem_types[MT_DEVICE].prot_sect |= PMD_SECT_TEX(1); + mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(1); + mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_BUFFERABLE; + } else if (cpu_is_xsc3()) { + /* + * For Xscale3, + * - shared device is TEXCB=00101 + * - nonshared device is TEXCB=01000 + * - write combine device mem is TEXCB=00100 + * (Inner/Outer Uncacheable in xsc3 parlance) + */ + mem_types[MT_DEVICE].prot_sect |= PMD_SECT_TEX(1) | PMD_SECT_BUFFERED; + mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(2); + mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1); + } else { + /* + * For ARMv6 and ARMv7 without TEX remapping, + * - shared device is TEXCB=00001 + * - nonshared device is TEXCB=01000 + * - write combine device mem is TEXCB=00100 + * (Uncached Normal in ARMv6 parlance). + */ + mem_types[MT_DEVICE].prot_sect |= PMD_SECT_BUFFERED; + mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(2); + mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1); + } + } else { + /* + * On others, write combining is "Uncached/Buffered" + */ + mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_BUFFERABLE; + } + + /* + * Now deal with the memory-type mappings + */ cp = &cache_policies[cachepolicy]; vecs_pgprot = kern_pgprot = user_pgprot = cp->pte; @@ -317,12 +376,8 @@ static void __init build_mem_type_table(void) * Enable CPU-specific coherency if supported. * (Only available on XSC3 at the moment.) */ - if (arch_is_coherent()) { - if (cpu_is_xsc3()) { - mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S; - mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED; - } - } + if (arch_is_coherent() && cpu_is_xsc3()) + mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S; /* * ARMv6 and above have extended page tables. @@ -336,11 +391,6 @@ static void __init build_mem_type_table(void) mem_types[MT_MINICLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; - /* - * Mark the device area as "shared device" - */ - mem_types[MT_DEVICE].prot_sect |= PMD_SECT_BUFFERED; - #ifdef CONFIG_SMP /* * Mark memory with the "shared" attribute for SMP systems @@ -360,9 +410,6 @@ static void __init build_mem_type_table(void) mem_types[MT_LOW_VECTORS].prot_pte |= vecs_pgprot; mem_types[MT_HIGH_VECTORS].prot_pte |= vecs_pgprot; - if (cpu_arch < CPU_ARCH_ARMv5) - mem_types[MT_MINICLEAN].prot_sect &= ~PMD_SECT_TEX(1); - pgprot_user = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | user_pgprot); pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | L_PTE_WRITE | diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 07f82db7094..4d3c0a73e7f 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -115,7 +115,7 @@ ENTRY(cpu_v7_set_pte_ext) orr r3, r3, r2 orr r3, r3, #PTE_EXT_AP0 | 2 - tst r2, #1 << 4 + tst r1, #1 << 4 orrne r3, r3, #PTE_EXT_TEX(1) tst r1, #L_PTE_WRITE @@ -192,11 +192,11 @@ __v7_setup: mov pc, lr @ return to head.S:__ret ENDPROC(__v7_setup) - /* - * V X F I D LR - * .... ...E PUI. .T.T 4RVI ZFRS BLDP WCAM - * rrrr rrrx xxx0 0101 xxxx xxxx x111 xxxx < forced - * 0 110 0011 1.00 .111 1101 < we want + /* AT + * TFR EV X F I D LR + * .EEE ..EE PUI. .T.T 4RVI ZFRS BLDP WCAM + * rxxx rrxx xxx0 0101 xxxx xxxx x111 xxxx < forced + * 1 0 110 0011 1.00 .111 1101 < we want */ .type v7_crval, #object v7_crval: -- cgit v1.2.3-18-g5258 From c7cf72dcadbe39c2077b32460f86c9f8167be3be Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 6 Nov 2008 17:43:55 -0700 Subject: [ARM] xsc3: fix xsc3_l2_inv_range MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When 'start' and 'end' are less than a cacheline apart and 'start' is unaligned we are done after cleaning and invalidating the first cacheline. So check for (start < end) which will not walk off into invalid address ranges when (start > end). This issue was caught by drivers/dma/dmatest. 2.6.27 is susceptible. Cc: Cc: Haavard Skinnemoen Cc: Lothar WaÃ<9f>mann Cc: Lennert Buytenhek Cc: Eric Miao Signed-off-by: Dan Williams --- arch/arm/mm/cache-xsc3l2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mm/cache-xsc3l2.c b/arch/arm/mm/cache-xsc3l2.c index 10b1bae1a25..464de893a98 100644 --- a/arch/arm/mm/cache-xsc3l2.c +++ b/arch/arm/mm/cache-xsc3l2.c @@ -98,7 +98,7 @@ static void xsc3_l2_inv_range(unsigned long start, unsigned long end) /* * Clean and invalidate partial last cache line. */ - if (end & (CACHE_LINE_SIZE - 1)) { + if (start < end && (end & (CACHE_LINE_SIZE - 1))) { xsc3_l2_clean_pa(end & ~(CACHE_LINE_SIZE - 1)); xsc3_l2_inv_pa(end & ~(CACHE_LINE_SIZE - 1)); end &= ~(CACHE_LINE_SIZE - 1); @@ -107,7 +107,7 @@ static void xsc3_l2_inv_range(unsigned long start, unsigned long end) /* * Invalidate all full cache lines between 'start' and 'end'. */ - while (start != end) { + while (start < end) { xsc3_l2_inv_pa(start); start += CACHE_LINE_SIZE; } -- cgit v1.2.3-18-g5258 From 934f6c3f8e7f5d6a6d07ae2df283fd02393019dd Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Thu, 6 Nov 2008 15:49:04 -0500 Subject: Revert "ath5k: honor FIF_BCN_PRBRESP_PROMISC in STA mode" Unfortunately, the result was that mac80211 didn't see all the beacons it actually wanted to see. This caused lost associations. Hopefully we can revisit this when mac80211 is less greedy about seeing beacons directly... This reverts commit 063279062a8c530cc90fb77797db16c49c905b26. Signed-off-by: John W. Linville --- drivers/net/wireless/ath5k/base.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath5k/base.c b/drivers/net/wireless/ath5k/base.c index 9e47d727e22..cfd4d052d66 100644 --- a/drivers/net/wireless/ath5k/base.c +++ b/drivers/net/wireless/ath5k/base.c @@ -2942,8 +2942,10 @@ static void ath5k_configure_filter(struct ieee80211_hw *hw, sc->opmode != NL80211_IFTYPE_MESH_POINT && test_bit(ATH_STAT_PROMISC, sc->status)) rfilt |= AR5K_RX_FILTER_PROM; - if (sc->opmode == NL80211_IFTYPE_ADHOC) + if (sc->opmode == NL80211_IFTYPE_STATION || + sc->opmode == NL80211_IFTYPE_ADHOC) { rfilt |= AR5K_RX_FILTER_BEACON; + } /* Set filters */ ath5k_hw_set_rx_filter(ah,rfilt); -- cgit v1.2.3-18-g5258 From 502c12e1ef14967e08dabb04c674cf0f000e8f7e Mon Sep 17 00:00:00 2001 From: Mohamed Abbas Date: Thu, 23 Oct 2008 23:48:54 -0700 Subject: iwlwifi: clear scanning bits upon failure In iwl_bg_request_scan function, if we could not send a scan command it will go to done. In done it does the right thing to call mac80211 with scan complete, but the problem is STATUS_SCAN_HW is still set causing any future scan to fail. Fix by clearing the scanning status bits if scan fails. Signed-off-by: Mohamed Abbas Signed-off-by: Reinette Chatre Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/iwl-scan.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/wireless/iwlwifi/iwl-scan.c b/drivers/net/wireless/iwlwifi/iwl-scan.c index 3b0bee331a3..c89365e2ca5 100644 --- a/drivers/net/wireless/iwlwifi/iwl-scan.c +++ b/drivers/net/wireless/iwlwifi/iwl-scan.c @@ -896,6 +896,13 @@ static void iwl_bg_request_scan(struct work_struct *data) return; done: + /* Cannot perform scan. Make sure we clear scanning + * bits from status so next scan request can be performed. + * If we don't clear scanning status bit here all next scan + * will fail + */ + clear_bit(STATUS_SCAN_HW, &priv->status); + clear_bit(STATUS_SCANNING, &priv->status); /* inform mac80211 scan aborted */ queue_work(priv->workqueue, &priv->scan_completed); mutex_unlock(&priv->mutex); -- cgit v1.2.3-18-g5258 From 964d2777438bf7687324243d38ade538d9bbfe3c Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Thu, 30 Oct 2008 14:12:21 -0400 Subject: iwlagn: avoid sleep in softirq context __ieee80211_tasklet_handler -> __ieee80211_rx -> __ieee80211_rx_handle_packet -> ieee80211_invoke_rx_handlers -> ieee80211_rx_h_decrypt -> ieee80211_crypto_tkip_decrypt -> ieee80211_tkip_decrypt_data -> iwl4965_mac_update_tkip_key -> iwl_scan_cancel_timeout -> msleep Ooops! Avoid the sleep by changing iwl_scan_cancel_timeout with iwl_scan_cancel and simply returning on failure if the scan persists. This will cause hardware decryption to fail and we'll handle a few more frames with software decryption. Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/iwl-agn.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/iwlwifi/iwl-agn.c b/drivers/net/wireless/iwlwifi/iwl-agn.c index 321dbc8c034..8d690a0eb1a 100644 --- a/drivers/net/wireless/iwlwifi/iwl-agn.c +++ b/drivers/net/wireless/iwlwifi/iwl-agn.c @@ -3252,7 +3252,11 @@ static void iwl4965_mac_update_tkip_key(struct ieee80211_hw *hw, return; } - iwl_scan_cancel_timeout(priv, 100); + if (iwl_scan_cancel(priv)) { + /* cancel scan failed, just live w/ bad key and rely + briefly on SW decryption */ + return; + } key_flags |= (STA_KEY_FLG_TKIP | STA_KEY_FLG_MAP_KEY_MSK); key_flags |= cpu_to_le16(keyconf->keyidx << STA_KEY_FLG_KEYID_POS); -- cgit v1.2.3-18-g5258 From 4a9d916717de0aab4313d43817164577255242fb Mon Sep 17 00:00:00 2001 From: Jonathan McDowell Date: Thu, 30 Oct 2008 22:46:48 +0000 Subject: Fix logic error in rfkill_check_duplicity > I'll have a prod at why the [hso] rfkill stuff isn't working next Ok, I believe this is due to the addition of rfkill_check_duplicity in rfkill and the fact that test_bit actually returns a negative value rather than the postive one expected (which is of course equally true). So when the second WLAN device (the hso device, with the EEE PC WLAN being the first) comes along rfkill_check_duplicity returns a negative value and so rfkill_register returns an error. Patch below fixes this for me. Signed-Off-By: Jonathan McDowell Acked-by: Henrique de Moraes Holschuh Signed-off-by: John W. Linville --- net/rfkill/rfkill.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c index f949a482b00..25ba3bd57e6 100644 --- a/net/rfkill/rfkill.c +++ b/net/rfkill/rfkill.c @@ -603,7 +603,7 @@ static int rfkill_check_duplicity(const struct rfkill *rfkill) } /* 0: first switch of its kind */ - return test_bit(rfkill->type, seen); + return (test_bit(rfkill->type, seen)) ? 1 : 0; } static int rfkill_add_switch(struct rfkill *rfkill) -- cgit v1.2.3-18-g5258 From 0feec9dfe7b8880ab3b4c38d7cc4107dd706ea7f Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Sat, 1 Nov 2008 17:03:48 +0000 Subject: zd1211rw: Add 2 device IDs 07fa/1196 Bewan BWIFI-USB54AR: Tested by night1308, this device is a ZD1211B with an AL2230S radio. 0ace/b215 HP 802.11abg: Tested by Robert Philippe Signed-off-by: John W. Linville --- drivers/net/wireless/zd1211rw/zd_usb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/zd1211rw/zd_usb.c b/drivers/net/wireless/zd1211rw/zd_usb.c index a60ae86bd5c..a3ccd8c1c71 100644 --- a/drivers/net/wireless/zd1211rw/zd_usb.c +++ b/drivers/net/wireless/zd1211rw/zd_usb.c @@ -61,6 +61,7 @@ static struct usb_device_id usb_ids[] = { { USB_DEVICE(0x0105, 0x145f), .driver_info = DEVICE_ZD1211 }, /* ZD1211B */ { USB_DEVICE(0x0ace, 0x1215), .driver_info = DEVICE_ZD1211B }, + { USB_DEVICE(0x0ace, 0xb215), .driver_info = DEVICE_ZD1211B }, { USB_DEVICE(0x157e, 0x300d), .driver_info = DEVICE_ZD1211B }, { USB_DEVICE(0x079b, 0x0062), .driver_info = DEVICE_ZD1211B }, { USB_DEVICE(0x1582, 0x6003), .driver_info = DEVICE_ZD1211B }, @@ -82,6 +83,7 @@ static struct usb_device_id usb_ids[] = { { USB_DEVICE(0x0cde, 0x001a), .driver_info = DEVICE_ZD1211B }, { USB_DEVICE(0x0586, 0x340a), .driver_info = DEVICE_ZD1211B }, { USB_DEVICE(0x0471, 0x1237), .driver_info = DEVICE_ZD1211B }, + { USB_DEVICE(0x07fa, 0x1196), .driver_info = DEVICE_ZD1211B }, /* "Driverless" devices that need ejecting */ { USB_DEVICE(0x0ace, 0x2011), .driver_info = DEVICE_INSTALLER }, { USB_DEVICE(0x0ace, 0x20ff), .driver_info = DEVICE_INSTALLER }, -- cgit v1.2.3-18-g5258 From c793033945bea23d7a6e0d8d94b2da6603e02af2 Mon Sep 17 00:00:00 2001 From: Bob Copeland Date: Mon, 3 Nov 2008 22:14:00 -0500 Subject: ath5k: correct handling of rx status fields ath5k_rx_status fields rs_antenna and rs_more are u8s, but we were setting them with bitwise ANDs of 32-bit values. As a consequence, jumbo frames would not be discarded as intended. Then, because the hw rate value of such frames is zero, and, since "ath5k: rates cleanup", we do not fall back to the basic rate, such packets would trigger the following WARN_ON: ------------[ cut here ]------------ WARNING: at net/mac80211/rx.c:2192 __ieee80211_rx+0x4d/0x57e [mac80211]() Modules linked in: ath5k af_packet sha256_generic aes_i586 aes_generic cbc loop i915 drm binfmt_misc acpi_cpufreq fan container nls_utf8 hfsplus dm_crypt dm_mod kvm_intel kvm fuse sbp2 snd_hda_intel snd_pcm_oss snd_pcm snd_mixer_oss snd_seq_dummy snd_seq_oss arc4 joydev hid_apple ecb snd_seq_midi snd_rawmidi snd_seq_midi_event snd_seq snd_timer snd_seq_device usbhid appletouch mac80211 sky2 snd ehci_hcd ohci1394 bitrev crc32 sr_mod cdrom rtc sg uhci_hcd snd_page_alloc cfg80211 ieee1394 thermal ac battery processor button evdev unix [last unloaded: ath5k] Pid: 0, comm: swapper Tainted: G W 2.6.28-rc2-wl #14 Call Trace: [] warn_on_slowpath+0x41/0x5b [] ? sched_debug_show+0x31e/0x9c6 [] ? vprintk+0x369/0x389 [] ? _spin_unlock_irqrestore+0x54/0x58 [] ? try_to_wake_up+0x14f/0x15a [] __ieee80211_rx+0x4d/0x57e [mac80211] [] ath5k_tasklet_rx+0x5a1/0x5e4 [ath5k] [] ? clockevents_program_event+0xd4/0xe3 [] tasklet_action+0x94/0xfd [] __do_softirq+0x8c/0x13e [] do_softirq+0x39/0x55 [] irq_exit+0x46/0x85 [] do_IRQ+0x9a/0xb2 [] common_interrupt+0x28/0x30 [] ? acpi_idle_enter_bm+0x2ad/0x31b [processor] [] cpuidle_idle_call+0x65/0x9a [] cpu_idle+0x76/0xa6 [] rest_init+0x62/0x64 Signed-off-by: Bob Copeland Signed-off-by: John W. Linville --- drivers/net/wireless/ath5k/desc.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/ath5k/desc.c b/drivers/net/wireless/ath5k/desc.c index dd1374052ba..5e362a7a362 100644 --- a/drivers/net/wireless/ath5k/desc.c +++ b/drivers/net/wireless/ath5k/desc.c @@ -531,10 +531,10 @@ static int ath5k_hw_proc_5210_rx_status(struct ath5k_hw *ah, AR5K_5210_RX_DESC_STATUS0_RECEIVE_SIGNAL); rs->rs_rate = AR5K_REG_MS(rx_status->rx_status_0, AR5K_5210_RX_DESC_STATUS0_RECEIVE_RATE); - rs->rs_antenna = rx_status->rx_status_0 & - AR5K_5210_RX_DESC_STATUS0_RECEIVE_ANTENNA; - rs->rs_more = rx_status->rx_status_0 & - AR5K_5210_RX_DESC_STATUS0_MORE; + rs->rs_antenna = AR5K_REG_MS(rx_status->rx_status_0, + AR5K_5210_RX_DESC_STATUS0_RECEIVE_ANTENNA); + rs->rs_more = !!(rx_status->rx_status_0 & + AR5K_5210_RX_DESC_STATUS0_MORE); /* TODO: this timestamp is 13 bit, later on we assume 15 bit */ rs->rs_tstamp = AR5K_REG_MS(rx_status->rx_status_1, AR5K_5210_RX_DESC_STATUS1_RECEIVE_TIMESTAMP); @@ -607,10 +607,10 @@ static int ath5k_hw_proc_5212_rx_status(struct ath5k_hw *ah, AR5K_5212_RX_DESC_STATUS0_RECEIVE_SIGNAL); rs->rs_rate = AR5K_REG_MS(rx_status->rx_status_0, AR5K_5212_RX_DESC_STATUS0_RECEIVE_RATE); - rs->rs_antenna = rx_status->rx_status_0 & - AR5K_5212_RX_DESC_STATUS0_RECEIVE_ANTENNA; - rs->rs_more = rx_status->rx_status_0 & - AR5K_5212_RX_DESC_STATUS0_MORE; + rs->rs_antenna = AR5K_REG_MS(rx_status->rx_status_0, + AR5K_5212_RX_DESC_STATUS0_RECEIVE_ANTENNA); + rs->rs_more = !!(rx_status->rx_status_0 & + AR5K_5212_RX_DESC_STATUS0_MORE); rs->rs_tstamp = AR5K_REG_MS(rx_status->rx_status_1, AR5K_5212_RX_DESC_STATUS1_RECEIVE_TIMESTAMP); rs->rs_status = 0; -- cgit v1.2.3-18-g5258 From 2420ebc104d38567ee977a3c15dc675a9dd3b07c Mon Sep 17 00:00:00 2001 From: Mohamed Abbas Date: Tue, 4 Nov 2008 12:21:34 -0800 Subject: iwl3945: clear scanning bits upon failure This patch ensures we clear any scan status bit when an error occurs while sending the scan command. It is the implementation of patch: "iwlwifi: clear scanning bits upon failure" for iwl3945. Signed-off-by: Mohamed Abbas Signed-off-by: Reinette Chatre Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/iwl3945-base.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c index d15a2c99795..b9eac5551d8 100644 --- a/drivers/net/wireless/iwlwifi/iwl3945-base.c +++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c @@ -6273,6 +6273,14 @@ static void iwl3945_bg_request_scan(struct work_struct *data) return; done: + /* can not perform scan make sure we clear scanning + * bits from status so next scan request can be performed. + * if we dont clear scanning status bit here all next scan + * will fail + */ + clear_bit(STATUS_SCAN_HW, &priv->status); + clear_bit(STATUS_SCANNING, &priv->status); + /* inform mac80211 scan aborted */ queue_work(priv->workqueue, &priv->scan_completed); mutex_unlock(&priv->mutex); -- cgit v1.2.3-18-g5258 From 14b5433606289dbc5b6fd70ced11462f80e95003 Mon Sep 17 00:00:00 2001 From: Reinette Chatre Date: Tue, 4 Nov 2008 12:21:35 -0800 Subject: iwl3945: do not send scan command if channel count zero Do not send scan command if no channels to scan. This avoids a Microcode error as reported in: http://www.intellinuxwireless.org/bugzilla/show_bug.cgi?id=1650 http://bugzilla.kernel.org/show_bug.cgi?id=11806 http://marc.info/?l=linux-wireless&m=122437145211886&w=2 Signed-off-by: Reinette Chatre Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/iwl3945-base.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c index b9eac5551d8..81dfcb88285 100644 --- a/drivers/net/wireless/iwlwifi/iwl3945-base.c +++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c @@ -6256,6 +6256,11 @@ static void iwl3945_bg_request_scan(struct work_struct *data) n_probes, (void *)&scan->data[le16_to_cpu(scan->tx_cmd.len)]); + if (scan->channel_count == 0) { + IWL_DEBUG_SCAN("channel count %d\n", scan->channel_count); + goto done; + } + cmd.len += le16_to_cpu(scan->tx_cmd.len) + scan->channel_count * sizeof(struct iwl3945_scan_channel); cmd.data = scan; -- cgit v1.2.3-18-g5258 From d54bc4e3fc5c56600a13c9ebc0a7e1077ac05d59 Mon Sep 17 00:00:00 2001 From: "Zhu, Yi" Date: Tue, 4 Nov 2008 12:21:36 -0800 Subject: iwl3945: fix deadlock on suspend This patch fixes iwl3945 deadlock during suspend by moving notify_mac out of iwl3945 mutex. This is a portion of the same fix for iwlwifi by Tomas. Signed-off-by: Zhu Yi Signed-off-by: Tomas Winkler Signed-off-by: Reinette Chatre Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/iwl3945-base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c index 81dfcb88285..285b53e7e26 100644 --- a/drivers/net/wireless/iwlwifi/iwl3945-base.c +++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c @@ -5768,7 +5768,6 @@ static void iwl3945_alive_start(struct iwl3945_priv *priv) if (priv->error_recovering) iwl3945_error_recovery(priv); - ieee80211_notify_mac(priv->hw, IEEE80211_NOTIFY_RE_ASSOC); return; restart: @@ -6013,6 +6012,7 @@ static void iwl3945_bg_alive_start(struct work_struct *data) mutex_lock(&priv->mutex); iwl3945_alive_start(priv); mutex_unlock(&priv->mutex); + ieee80211_notify_mac(priv->hw, IEEE80211_NOTIFY_RE_ASSOC); } static void iwl3945_bg_rf_kill(struct work_struct *work) -- cgit v1.2.3-18-g5258 From ac51d83705c2a38c71f39cde99708b14e6212a60 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 6 Nov 2008 16:49:36 -0500 Subject: ext4: calculate journal credits correctly This fixes a 2.6.27 regression which was introduced in commit a02908f1. We weren't passing the chunk parameter down to the two subections, ext4_indirect_trans_blocks() and ext4_ext_index_trans_blocks(), with the result that massively overestimate the amount of credits needed by ext4_da_writepages, especially in the non-extents case. This causes failures especially on /boot partitions, which tend to be small and non-extent using since GRUB doesn't handle extents. This patch fixes the bug reported by Joseph Fannin at: http://bugzilla.kernel.org/show_bug.cgi?id=11964 Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 8dbf6953845..5a130b56f1c 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4580,9 +4580,10 @@ static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks, static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) { if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) - return ext4_indirect_trans_blocks(inode, nrblocks, 0); - return ext4_ext_index_trans_blocks(inode, nrblocks, 0); + return ext4_indirect_trans_blocks(inode, nrblocks, chunk); + return ext4_ext_index_trans_blocks(inode, nrblocks, chunk); } + /* * Account for index blocks, block groups bitmaps and block group * descriptor blocks if modify datablocks and index blocks -- cgit v1.2.3-18-g5258 From f8d570a4745835f2238a33b537218a1bb03fc671 Mon Sep 17 00:00:00 2001 From: David Miller Date: Thu, 6 Nov 2008 00:37:40 -0800 Subject: net: Fix recursive descent in __scm_destroy(). __scm_destroy() walks the list of file descriptors in the scm_fp_list pointed to by the scm_cookie argument. Those, in turn, can close sockets and invoke __scm_destroy() again. There is nothing which limits how deeply this can occur. The idea for how to fix this is from Linus. Basically, we do all of the fput()s at the top level by collecting all of the scm_fp_list objects hit by an fput(). Inside of the initial __scm_destroy() we keep running the list until it is empty. Signed-off-by: David S. Miller Signed-off-by: Linus Torvalds --- include/linux/sched.h | 2 ++ include/net/scm.h | 5 +++-- net/core/scm.c | 24 +++++++++++++++++++++--- 3 files changed, 26 insertions(+), 5 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index b483f39a711..295b7c756ca 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1349,6 +1349,8 @@ struct task_struct { */ unsigned long timer_slack_ns; unsigned long default_timer_slack_ns; + + struct list_head *scm_work_list; }; /* diff --git a/include/net/scm.h b/include/net/scm.h index 06df126103c..33e9986beb8 100644 --- a/include/net/scm.h +++ b/include/net/scm.h @@ -14,8 +14,9 @@ struct scm_fp_list { - int count; - struct file *fp[SCM_MAX_FD]; + struct list_head list; + int count; + struct file *fp[SCM_MAX_FD]; }; struct scm_cookie diff --git a/net/core/scm.c b/net/core/scm.c index 10f5c65f6a4..ab242cc1acc 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -75,6 +75,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp) if (!fpl) return -ENOMEM; *fplp = fpl; + INIT_LIST_HEAD(&fpl->list); fpl->count = 0; } fpp = &fpl->fp[fpl->count]; @@ -106,9 +107,25 @@ void __scm_destroy(struct scm_cookie *scm) if (fpl) { scm->fp = NULL; - for (i=fpl->count-1; i>=0; i--) - fput(fpl->fp[i]); - kfree(fpl); + if (current->scm_work_list) { + list_add_tail(&fpl->list, current->scm_work_list); + } else { + LIST_HEAD(work_list); + + current->scm_work_list = &work_list; + + list_add(&fpl->list, &work_list); + while (!list_empty(&work_list)) { + fpl = list_first_entry(&work_list, struct scm_fp_list, list); + + list_del(&fpl->list); + for (i=fpl->count-1; i>=0; i--) + fput(fpl->fp[i]); + kfree(fpl); + } + + current->scm_work_list = NULL; + } } } @@ -284,6 +301,7 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl) new_fpl = kmalloc(sizeof(*fpl), GFP_KERNEL); if (new_fpl) { + INIT_LIST_HEAD(&new_fpl->list); for (i=fpl->count-1; i>=0; i--) get_file(fpl->fp[i]); memcpy(new_fpl, fpl, sizeof(*fpl)); -- cgit v1.2.3-18-g5258 From 47cb2ed9df2789fc4a3fe1201e475078f93c4839 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 6 Nov 2008 13:48:24 -0800 Subject: x86, xen: fix use of pgd_page now that it really does return a page Impact: fix 32-bit Xen guest boot crash On 32-bit PAE, pud_page, for no good reason, didn't really return a struct page *. Since Jan Beulich's fix "i386/PAE: fix pud_page()", pud_page does return a struct page *. Because PAE has 3 pagetable levels, the pud level is folded into the pgd level, so pgd_page() is the same as pud_page(), and now returns a struct page *. Update the xen/mmu.c code which uses pgd_page() accordingly. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/xen/mmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index aba77b2b7d1..49697d86c6a 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -874,7 +874,7 @@ static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd) #else /* CONFIG_X86_32 */ #ifdef CONFIG_X86_PAE /* Need to make sure unshared kernel PMD is pinnable */ - xen_pin_page(mm, virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), + xen_pin_page(mm, pgd_page(pgd[pgd_index(TASK_SIZE)]), PT_PMD); #endif xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd))); @@ -991,7 +991,7 @@ static void __xen_pgd_unpin(struct mm_struct *mm, pgd_t *pgd) #ifdef CONFIG_X86_PAE /* Need to make sure unshared kernel PMD is unpinned */ - xen_unpin_page(mm, virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), + xen_unpin_page(mm, pgd_page(pgd[pgd_index(TASK_SIZE)]), PT_PMD); #endif -- cgit v1.2.3-18-g5258 From 77ca7286d10b798e4907af941f29672bf484db77 Mon Sep 17 00:00:00 2001 From: Mike Miller Date: Thu, 6 Nov 2008 12:53:14 -0800 Subject: cciss: new hardware support Add support for 2 new SAS/SATA controllers. Signed-off-by: Mike Miller Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/cciss.txt | 2 ++ drivers/block/cciss.c | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/Documentation/cciss.txt b/Documentation/cciss.txt index 8244c6442fa..48d80d95f0f 100644 --- a/Documentation/cciss.txt +++ b/Documentation/cciss.txt @@ -26,6 +26,8 @@ This driver is known to work with the following cards: * SA P410i * SA P411 * SA P812 + * SA P712m + * SA P711m Detecting drive failures: ------------------------- diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 4023885353e..00048bd26e7 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -96,6 +96,8 @@ static const struct pci_device_id cciss_pci_device_id[] = { {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x3245}, {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x3247}, {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x3249}, + {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x324A}, + {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x324B}, {PCI_VENDOR_ID_HP, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_STORAGE_RAID << 8, 0xffff << 8, 0}, {0,} @@ -133,6 +135,8 @@ static struct board_type products[] = { {0x3245103C, "Smart Array P410i", &SA5_access}, {0x3247103C, "Smart Array P411", &SA5_access}, {0x3249103C, "Smart Array P812", &SA5_access}, + {0x324A103C, "Smart Array P712m", &SA5_access}, + {0x324B103C, "Smart Array P711m", &SA5_access}, {0xFFFF103C, "Unknown Smart Array", &SA5_access}, }; -- cgit v1.2.3-18-g5258 From 2197d18ded232ef6eef63cce57b6b21eddf1b7b6 Mon Sep 17 00:00:00 2001 From: Andrey Borzenkov Date: Thu, 6 Nov 2008 12:53:15 -0800 Subject: cpqarry: fix return value of cpqarray_init() As reported by Dick Gevers on Compaq ProLiant: Oct 13 18:06:51 dvgcpl kernel: Compaq SMART2 Driver (v 2.6.0) Oct 13 18:06:51 dvgcpl kernel: sys_init_module: 'cpqarray'->init suspiciously returned 1, it should follow 0/-E convention Oct 13 18:06:51 dvgcpl kernel: sys_init_module: loading module anyway... Oct 13 18:06:51 dvgcpl kernel: Pid: 315, comm: modprobe Not tainted 2.6.27-desktop-0.rc8.2mnb #1 Oct 13 18:06:51 dvgcpl kernel: [] ? printk+0x18/0x1e Oct 13 18:06:51 dvgcpl kernel: [] sys_init_module+0x155/0x1c0 Oct 13 18:06:51 dvgcpl kernel: [] syscall_call+0x7/0xb Oct 13 18:06:51 dvgcpl kernel: ======================= Make it return 0 on success and -ENODEV if no array was found. Reported-by: Dick Gevers Signed-off-by: Andrey Borzenkov Cc: Jens Axboe Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/cpqarray.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c index 47d233c6d0b..5d39df14ed9 100644 --- a/drivers/block/cpqarray.c +++ b/drivers/block/cpqarray.c @@ -567,7 +567,12 @@ static int __init cpqarray_init(void) num_cntlrs_reg++; } - return(num_cntlrs_reg); + if (num_cntlrs_reg) + return 0; + else { + pci_unregister_driver(&cpqarray_pci_driver); + return -ENODEV; + } } /* Function to find the first free pointer into our hba[] array */ -- cgit v1.2.3-18-g5258 From a564738c1c9c7b9ed696bf4116267789201ac8ac Mon Sep 17 00:00:00 2001 From: Wolfgang Kroworsch Date: Thu, 6 Nov 2008 12:53:16 -0800 Subject: vt: incomplete initialization of vc_tab_stop Problem 1 (see patch below): vc_tab_stop is declared as an array of 8 unsigned ints in struct vc_data in include/linux/console_struct.h . In drivers/char/vt.c only 5 of these 8 unsigned ints get initialized leading to unintended tabulator placement on displays with more than 160 columns text. Problem 2 (open): Upcoming displays will have more than 256 columns of text leading to invalid memory access in drivers/char/vt.c during tabulator calculations: if (vc->vc_tab_stop[vc->vc_x >> 5] & (1 << (vc->vc_x & 31))) break; Signed-off-by: Wolfgang Kroworsch Cc: Alan Cox Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/vt.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/char/vt.c b/drivers/char/vt.c index d8f83e26e4a..a5af6072e2b 100644 --- a/drivers/char/vt.c +++ b/drivers/char/vt.c @@ -1644,7 +1644,10 @@ static void reset_terminal(struct vc_data *vc, int do_clear) vc->vc_tab_stop[1] = vc->vc_tab_stop[2] = vc->vc_tab_stop[3] = - vc->vc_tab_stop[4] = 0x01010101; + vc->vc_tab_stop[4] = + vc->vc_tab_stop[5] = + vc->vc_tab_stop[6] = + vc->vc_tab_stop[7] = 0x01010101; vc->vc_bell_pitch = DEFAULT_BELL_PITCH; vc->vc_bell_duration = DEFAULT_BELL_DURATION; @@ -1935,7 +1938,10 @@ static void do_con_trol(struct tty_struct *tty, struct vc_data *vc, int c) vc->vc_tab_stop[1] = vc->vc_tab_stop[2] = vc->vc_tab_stop[3] = - vc->vc_tab_stop[4] = 0; + vc->vc_tab_stop[4] = + vc->vc_tab_stop[5] = + vc->vc_tab_stop[6] = + vc->vc_tab_stop[7] = 0; } return; case 'm': -- cgit v1.2.3-18-g5258 From 9e3a4afd5a66f9047e30ba225525e6ff01612dc4 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 6 Nov 2008 12:53:18 -0800 Subject: rtc: fix handling of missing tm_year data when reading alarms When fixing up invalid years rtc_read_alarm() was calling rtc_valid_tm() as a boolean but rtc_valid_tm() returns zero on success or a negative number if the time is not valid so the test was inverted. Signed-off-by: Mark Brown Acked-by: Alessandro Zummo Cc: David Brownell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rtc/interface.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c index 7af60b98d8a..a04c1b6b157 100644 --- a/drivers/rtc/interface.c +++ b/drivers/rtc/interface.c @@ -271,7 +271,7 @@ int rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) dev_dbg(&rtc->dev, "alarm rollover: %s\n", "year"); do { alarm->time.tm_year++; - } while (!rtc_valid_tm(&alarm->time)); + } while (rtc_valid_tm(&alarm->time) != 0); break; default: -- cgit v1.2.3-18-g5258 From 6e3530fa241ae759313496f67295c9252691ed04 Mon Sep 17 00:00:00 2001 From: Henrik Rydberg Date: Thu, 6 Nov 2008 12:53:19 -0800 Subject: hwmon: applesmc: add support for iMac 5 Add temperature sensor support for iMac 5. Signed-off-by: Henrik Rydberg Tested-by: Ricky Campbell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/hwmon/applesmc.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c index bc011da79e1..80d545d3aa1 100644 --- a/drivers/hwmon/applesmc.c +++ b/drivers/hwmon/applesmc.c @@ -116,6 +116,8 @@ static const char* temperature_sensors_sets[][36] = { /* Set 9: Macbook Pro 3,1 (Santa Rosa) */ { "TALP", "TB0T", "TC0D", "TC0P", "TG0D", "TG0H", "TTF0", "TW0P", "Th0H", "Th1H", "Th2H", "Tm0P", "Ts0P", NULL }, +/* Set 10: iMac 5,1 */ + { "TA0P", "TC0D", "TC0P", "TG0D", "TH0P", "TO0P", "Tm0P", NULL }, }; /* List of keys used to read/write fan speeds */ @@ -1276,6 +1278,8 @@ static __initdata struct dmi_match_data applesmc_dmi_data[] = { { .accelerometer = 1, .light = 1, .temperature_set = 8 }, /* MacBook Pro 3: accelerometer, backlight and temperature set 9 */ { .accelerometer = 1, .light = 1, .temperature_set = 9 }, +/* iMac 5: light sensor only, temperature set 10 */ + { .accelerometer = 0, .light = 0, .temperature_set = 10 }, }; /* Note that DMI_MATCH(...,"MacBook") will match "MacBookPro1,1". @@ -1317,6 +1321,10 @@ static __initdata struct dmi_system_id applesmc_whitelist[] = { DMI_MATCH(DMI_BOARD_VENDOR,"Apple"), DMI_MATCH(DMI_PRODUCT_NAME,"MacPro2") }, &applesmc_dmi_data[4]}, + { applesmc_dmi_match, "Apple iMac 5", { + DMI_MATCH(DMI_BOARD_VENDOR, "Apple"), + DMI_MATCH(DMI_PRODUCT_NAME, "iMac5") }, + &applesmc_dmi_data[10]}, { applesmc_dmi_match, "Apple iMac", { DMI_MATCH(DMI_BOARD_VENDOR,"Apple"), DMI_MATCH(DMI_PRODUCT_NAME,"iMac") }, -- cgit v1.2.3-18-g5258 From 181209a1d91756bfd83b1d6ce2008cea3ca225b6 Mon Sep 17 00:00:00 2001 From: Henrik Rydberg Date: Thu, 6 Nov 2008 12:53:20 -0800 Subject: hwmon: applesmc: add support for Macbook 5 Add accelerometer, backlight and temperature sensor support for the new unibody Macbook 5. Signed-off-by: Henrik Rydberg Tested-by: David M. Lary Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/hwmon/applesmc.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c index 80d545d3aa1..074f7f4719f 100644 --- a/drivers/hwmon/applesmc.c +++ b/drivers/hwmon/applesmc.c @@ -118,6 +118,9 @@ static const char* temperature_sensors_sets[][36] = { "Th0H", "Th1H", "Th2H", "Tm0P", "Ts0P", NULL }, /* Set 10: iMac 5,1 */ { "TA0P", "TC0D", "TC0P", "TG0D", "TH0P", "TO0P", "Tm0P", NULL }, +/* Set 11: Macbook 5,1 */ + { "TB0T", "TB1T", "TB2T", "TB3T", "TC0D", "TC0P", "TN0D", "TN0P", + "TTF0", "Th0H", "Th1H", "ThFH", "Ts0P", "Ts0S", NULL }, }; /* List of keys used to read/write fan speeds */ @@ -1280,6 +1283,8 @@ static __initdata struct dmi_match_data applesmc_dmi_data[] = { { .accelerometer = 1, .light = 1, .temperature_set = 9 }, /* iMac 5: light sensor only, temperature set 10 */ { .accelerometer = 0, .light = 0, .temperature_set = 10 }, +/* MacBook 5: accelerometer, backlight and temperature set 11 */ + { .accelerometer = 1, .light = 1, .temperature_set = 11 }, }; /* Note that DMI_MATCH(...,"MacBook") will match "MacBookPro1,1". @@ -1309,6 +1314,10 @@ static __initdata struct dmi_system_id applesmc_whitelist[] = { DMI_MATCH(DMI_BOARD_VENDOR,"Apple"), DMI_MATCH(DMI_PRODUCT_NAME,"MacBook3") }, &applesmc_dmi_data[6]}, + { applesmc_dmi_match, "Apple MacBook 5", { + DMI_MATCH(DMI_BOARD_VENDOR, "Apple"), + DMI_MATCH(DMI_PRODUCT_NAME, "MacBook5") }, + &applesmc_dmi_data[11]}, { applesmc_dmi_match, "Apple MacBook", { DMI_MATCH(DMI_BOARD_VENDOR,"Apple"), DMI_MATCH(DMI_PRODUCT_NAME,"MacBook") }, -- cgit v1.2.3-18-g5258 From a66603257bf88bbe2c9fd6a97ee5dc24de15d196 Mon Sep 17 00:00:00 2001 From: Henrik Rydberg Date: Thu, 6 Nov 2008 12:53:21 -0800 Subject: hwmon: applesmc: add support for Macbook Pro 5 Add accelerometer, backlight and temperature sensor support for the new unibody Macbook Pro 5. Signed-off-by: Henrik Rydberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/hwmon/applesmc.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c index 074f7f4719f..9f04283beae 100644 --- a/drivers/hwmon/applesmc.c +++ b/drivers/hwmon/applesmc.c @@ -121,6 +121,10 @@ static const char* temperature_sensors_sets[][36] = { /* Set 11: Macbook 5,1 */ { "TB0T", "TB1T", "TB2T", "TB3T", "TC0D", "TC0P", "TN0D", "TN0P", "TTF0", "Th0H", "Th1H", "ThFH", "Ts0P", "Ts0S", NULL }, +/* Set 12: Macbook Pro 5,1 */ + { "TB0T", "TB1T", "TB2T", "TB3T", "TC0D", "TC0F", "TC0P", "TG0D", + "TG0F", "TG0H", "TG0P", "TG0T", "TG1H", "TN0D", "TN0P", "TTF0", + "Th2H", "Tm0P", "Ts0P", "Ts0S", NULL }, }; /* List of keys used to read/write fan speeds */ @@ -1285,6 +1289,8 @@ static __initdata struct dmi_match_data applesmc_dmi_data[] = { { .accelerometer = 0, .light = 0, .temperature_set = 10 }, /* MacBook 5: accelerometer, backlight and temperature set 11 */ { .accelerometer = 1, .light = 1, .temperature_set = 11 }, +/* MacBook Pro 5: accelerometer, backlight and temperature set 12 */ + { .accelerometer = 1, .light = 1, .temperature_set = 12 }, }; /* Note that DMI_MATCH(...,"MacBook") will match "MacBookPro1,1". @@ -1294,6 +1300,10 @@ static __initdata struct dmi_system_id applesmc_whitelist[] = { DMI_MATCH(DMI_BOARD_VENDOR, "Apple"), DMI_MATCH(DMI_PRODUCT_NAME, "MacBookAir") }, &applesmc_dmi_data[7]}, + { applesmc_dmi_match, "Apple MacBook Pro 5", { + DMI_MATCH(DMI_BOARD_VENDOR, "Apple"), + DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro5") }, + &applesmc_dmi_data[12]}, { applesmc_dmi_match, "Apple MacBook Pro 4", { DMI_MATCH(DMI_BOARD_VENDOR, "Apple"), DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro4") }, -- cgit v1.2.3-18-g5258 From eefc488f96cdde6e152b45675b50bf380b95d99f Mon Sep 17 00:00:00 2001 From: Henrik Rydberg Date: Thu, 6 Nov 2008 12:53:22 -0800 Subject: hwmon: applesmc: add support for iMac 8 Add temperature sensor support for iMac 8. Signed-off-by: Henrik Rydberg Tested-by: Klaus Doblmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/hwmon/applesmc.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c index 9f04283beae..be3285912cb 100644 --- a/drivers/hwmon/applesmc.c +++ b/drivers/hwmon/applesmc.c @@ -125,6 +125,9 @@ static const char* temperature_sensors_sets[][36] = { { "TB0T", "TB1T", "TB2T", "TB3T", "TC0D", "TC0F", "TC0P", "TG0D", "TG0F", "TG0H", "TG0P", "TG0T", "TG1H", "TN0D", "TN0P", "TTF0", "Th2H", "Tm0P", "Ts0P", "Ts0S", NULL }, +/* Set 13: iMac 8,1 */ + { "TA0P", "TC0D", "TC0H", "TC0P", "TG0D", "TG0H", "TG0P", "TH0P", + "TL0P", "TO0P", "TW0P", "Tm0P", "Tp0P", NULL }, }; /* List of keys used to read/write fan speeds */ @@ -1291,6 +1294,8 @@ static __initdata struct dmi_match_data applesmc_dmi_data[] = { { .accelerometer = 1, .light = 1, .temperature_set = 11 }, /* MacBook Pro 5: accelerometer, backlight and temperature set 12 */ { .accelerometer = 1, .light = 1, .temperature_set = 12 }, +/* iMac 8: light sensor only, temperature set 13 */ + { .accelerometer = 0, .light = 0, .temperature_set = 13 }, }; /* Note that DMI_MATCH(...,"MacBook") will match "MacBookPro1,1". @@ -1340,6 +1345,10 @@ static __initdata struct dmi_system_id applesmc_whitelist[] = { DMI_MATCH(DMI_BOARD_VENDOR,"Apple"), DMI_MATCH(DMI_PRODUCT_NAME,"MacPro2") }, &applesmc_dmi_data[4]}, + { applesmc_dmi_match, "Apple iMac 8", { + DMI_MATCH(DMI_BOARD_VENDOR, "Apple"), + DMI_MATCH(DMI_PRODUCT_NAME, "iMac8") }, + &applesmc_dmi_data[13]}, { applesmc_dmi_match, "Apple iMac 5", { DMI_MATCH(DMI_BOARD_VENDOR, "Apple"), DMI_MATCH(DMI_PRODUCT_NAME, "iMac5") }, -- cgit v1.2.3-18-g5258 From bc9c4068388eea01d3b5da31016879f2341ecec5 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Thu, 6 Nov 2008 12:53:22 -0800 Subject: autofs4: correct offset mount expire check When checking a directory tree in autofs_tree_busy() we can incorrectly decide that the tree isn't busy. This happens for the case of an active offset mount as autofs4_follow_mount() follows past the active offset mount, which has an open file handle used for expires, causing the file handle not to count toward the busyness check. Signed-off-by: Ian Kent Signed-off-by: Jeff Moyer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/autofs4/expire.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index cde2f8e8935..4b6fb3f628c 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -56,12 +56,23 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry) mntget(mnt); dget(dentry); - if (!autofs4_follow_mount(&mnt, &dentry)) + if (!follow_down(&mnt, &dentry)) goto done; - /* This is an autofs submount, we can't expire it */ - if (is_autofs4_dentry(dentry)) - goto done; + if (is_autofs4_dentry(dentry)) { + struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); + + /* This is an autofs submount, we can't expire it */ + if (sbi->type == AUTOFS_TYPE_INDIRECT) + goto done; + + /* + * Otherwise it's an offset mount and we need to check + * if we can umount its mount, if there is one. + */ + if (!d_mountpoint(dentry)) + goto done; + } /* Update the expiry counter if fs is busy */ if (!may_umount_tree(mnt)) { -- cgit v1.2.3-18-g5258 From 96b0317906690997c16c7efffbc4c0fafcd6f7f2 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Thu, 6 Nov 2008 12:53:23 -0800 Subject: autofs4: collect version check return The function check_dev_ioctl_version() returns an error code upon fail but it isn't captured and returned in validate_dev_ioctl() as it should be. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Ian Kent Signed-off-by: Jeff Moyer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/autofs4/dev-ioctl.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index 625abf5422e..33bf8cbfd05 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c @@ -128,9 +128,10 @@ static inline void free_dev_ioctl(struct autofs_dev_ioctl *param) */ static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param) { - int err = -EINVAL; + int err; - if (check_dev_ioctl_version(cmd, param)) { + err = check_dev_ioctl_version(cmd, param); + if (err) { AUTOFS_WARN("invalid device control module version " "supplied for cmd(0x%08x)", cmd); goto out; -- cgit v1.2.3-18-g5258 From 404443081ce5e6f68b5f7eda16c959835ff200c0 Mon Sep 17 00:00:00 2001 From: Mike Miller Date: Thu, 6 Nov 2008 12:53:24 -0800 Subject: cciss: fix sysfs broken symlink regression Regression introduced by commit 6ae5ce8e8d4de666f31286808d2285aa6a50fa40 ("cciss: remove redundant code"). This patch fixes a broken symlink in sysfs that was introduced by the above commit. We broke it in 2.6.27-rc on or about 20080804. Some installers are broken if this symlink does not exist and they may not detect the logical drives configured on the controller. It does not require being backported into 2.6.26.x or earlier kernels. Signed-off-by: Mike Miller Cc: Jens Axboe Cc: [2.6.27.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/cciss.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 00048bd26e7..dc38368435a 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -1370,6 +1370,7 @@ static void cciss_add_disk(ctlr_info_t *h, struct gendisk *disk, disk->first_minor = drv_index << NWD_SHIFT; disk->fops = &cciss_fops; disk->private_data = &h->drv[drv_index]; + disk->driverfs_dev = &h->pdev->dev; /* Set up queue information */ blk_queue_bounce_limit(disk->queue, h->pdev->dma_mask); -- cgit v1.2.3-18-g5258 From 22bece00dc1f28dd3374c55e464c9f02eb642876 Mon Sep 17 00:00:00 2001 From: Mike Miller Date: Thu, 6 Nov 2008 12:53:25 -0800 Subject: cciss: fix regression firmware not displayed in procfs This regression was introduced by commit 6ae5ce8e8d4de666f31286808d2285aa6a50fa40 ("cciss: remove redundant code"). This patch fixes a regression where the controller firmware version is not displayed in procfs. The previous patch would be called anytime something changed. This will get called only once for each controller. Signed-off-by: Mike Miller Cc: FUJITA Tomonori Cc: Jens Axboe Cc: [2.6.27.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/cciss.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index dc38368435a..12de1fdaa6c 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -3409,7 +3409,8 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, int i; int j = 0; int rc; - int dac; + int dac, return_code; + InquiryData_struct *inq_buff = NULL; i = alloc_cciss_hba(); if (i < 0) @@ -3515,6 +3516,25 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, /* Turn the interrupts on so we can service requests */ hba[i]->access.set_intr_mask(hba[i], CCISS_INTR_ON); + /* Get the firmware version */ + inq_buff = kzalloc(sizeof(InquiryData_struct), GFP_KERNEL); + if (inq_buff == NULL) { + printk(KERN_ERR "cciss: out of memory\n"); + goto clean4; + } + + return_code = sendcmd_withirq(CISS_INQUIRY, i, inq_buff, + sizeof(InquiryData_struct), 0, 0 , 0, TYPE_CMD); + if (return_code == IO_OK) { + hba[i]->firm_ver[0] = inq_buff->data_byte[32]; + hba[i]->firm_ver[1] = inq_buff->data_byte[33]; + hba[i]->firm_ver[2] = inq_buff->data_byte[34]; + hba[i]->firm_ver[3] = inq_buff->data_byte[35]; + } else { /* send command failed */ + printk(KERN_WARNING "cciss: unable to determine firmware" + " version of controller\n"); + } + cciss_procinit(i); hba[i]->cciss_max_sectors = 2048; @@ -3525,6 +3545,7 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, return 1; clean4: + kfree(inq_buff); #ifdef CONFIG_CISS_SCSI_TAPE kfree(hba[i]->scsi_rejects.complete); #endif -- cgit v1.2.3-18-g5258 From 69d177c2fc702d402b17fdca2190d5a7e3ca55c5 Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Thu, 6 Nov 2008 12:53:26 -0800 Subject: hugetlbfs: handle pages higher order than MAX_ORDER When working with hugepages, hugetlbfs assumes that those hugepages are smaller than MAX_ORDER. Specifically it assumes that the mem_map is contigious and uses that to optimise access to the elements of the mem_map that represent the hugepage. Gigantic pages (such as 16GB pages on powerpc) by definition are of greater order than MAX_ORDER (larger than MAX_ORDER_NR_PAGES in size). This means that we can no longer make use of the buddy alloctor guarentees for the contiguity of the mem_map, which ensures that the mem_map is at least contigious for maximmally aligned areas of MAX_ORDER_NR_PAGES pages. This patch adds new mem_map accessors and iterator helpers which handle any discontiguity at MAX_ORDER_NR_PAGES boundaries. It then uses these to implement gigantic page versions of copy_huge_page and clear_huge_page, and to allow follow_hugetlb_page handle gigantic pages. Signed-off-by: Andy Whitcroft Cc: Jon Tollefson Cc: Mel Gorman Cc: Nick Piggin Cc: Christoph Lameter Cc: [2.6.27.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 37 ++++++++++++++++++++++++++++++++++++- mm/internal.h | 28 ++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 1 deletion(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 421aee99b84..e6afe527bd0 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -354,11 +354,26 @@ static int vma_has_reserves(struct vm_area_struct *vma) return 0; } +static void clear_gigantic_page(struct page *page, + unsigned long addr, unsigned long sz) +{ + int i; + struct page *p = page; + + might_sleep(); + for (i = 0; i < sz/PAGE_SIZE; i++, p = mem_map_next(p, page, i)) { + cond_resched(); + clear_user_highpage(p, addr + i * PAGE_SIZE); + } +} static void clear_huge_page(struct page *page, unsigned long addr, unsigned long sz) { int i; + if (unlikely(sz > MAX_ORDER_NR_PAGES)) + return clear_gigantic_page(page, addr, sz); + might_sleep(); for (i = 0; i < sz/PAGE_SIZE; i++) { cond_resched(); @@ -366,12 +381,32 @@ static void clear_huge_page(struct page *page, } } +static void copy_gigantic_page(struct page *dst, struct page *src, + unsigned long addr, struct vm_area_struct *vma) +{ + int i; + struct hstate *h = hstate_vma(vma); + struct page *dst_base = dst; + struct page *src_base = src; + might_sleep(); + for (i = 0; i < pages_per_huge_page(h); ) { + cond_resched(); + copy_user_highpage(dst, src, addr + i*PAGE_SIZE, vma); + + i++; + dst = mem_map_next(dst, dst_base, i); + src = mem_map_next(src, src_base, i); + } +} static void copy_huge_page(struct page *dst, struct page *src, unsigned long addr, struct vm_area_struct *vma) { int i; struct hstate *h = hstate_vma(vma); + if (unlikely(pages_per_huge_page(h) > MAX_ORDER_NR_PAGES)) + return copy_gigantic_page(dst, src, addr, vma); + might_sleep(); for (i = 0; i < pages_per_huge_page(h); i++) { cond_resched(); @@ -2130,7 +2165,7 @@ same_page: if (zeropage_ok) pages[i] = ZERO_PAGE(0); else - pages[i] = page + pfn_offset; + pages[i] = mem_map_offset(page, pfn_offset); get_page(pages[i]); } diff --git a/mm/internal.h b/mm/internal.h index e4e728bdf32..f482460de3e 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -175,6 +175,34 @@ static inline void free_page_mlock(struct page *page) { } #endif /* CONFIG_UNEVICTABLE_LRU */ +/* + * Return the mem_map entry representing the 'offset' subpage within + * the maximally aligned gigantic page 'base'. Handle any discontiguity + * in the mem_map at MAX_ORDER_NR_PAGES boundaries. + */ +static inline struct page *mem_map_offset(struct page *base, int offset) +{ + if (unlikely(offset >= MAX_ORDER_NR_PAGES)) + return pfn_to_page(page_to_pfn(base) + offset); + return base + offset; +} + +/* + * Iterator over all subpages withing the maximally aligned gigantic + * page 'base'. Handle any discontiguity in the mem_map. + */ +static inline struct page *mem_map_next(struct page *iter, + struct page *base, int offset) +{ + if (unlikely((offset & (MAX_ORDER_NR_PAGES - 1)) == 0)) { + unsigned long pfn = page_to_pfn(base) + offset; + if (!pfn_valid(pfn)) + return NULL; + return pfn_to_page(pfn); + } + return iter + 1; +} + /* * FLATMEM and DISCONTIGMEM configurations use alloc_bootmem_node, * so all functions starting at paging_init should be marked __init -- cgit v1.2.3-18-g5258 From 18229df5b613ed0732a766fc37850de2e7988e43 Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Thu, 6 Nov 2008 12:53:27 -0800 Subject: hugetlb: pull gigantic page initialisation out of the default path As we can determine exactly when a gigantic page is in use we can optimise the common regular page cases by pulling out gigantic page initialisation into its own function. As gigantic pages are never released to buddy we do not need a destructor. This effectivly reverts the previous change to the main buddy allocator. It also adds a paranoid check to ensure we never release gigantic pages from hugetlbfs to the main buddy. Signed-off-by: Andy Whitcroft Cc: Jon Tollefson Cc: Mel Gorman Cc: Nick Piggin Cc: Christoph Lameter Cc: [2.6.27.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 12 +++++++++++- mm/internal.h | 1 + mm/page_alloc.c | 28 +++++++++++++++++++++------- 3 files changed, 33 insertions(+), 8 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index e6afe527bd0..d143ab67be4 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -491,6 +491,8 @@ static void update_and_free_page(struct hstate *h, struct page *page) { int i; + VM_BUG_ON(h->order >= MAX_ORDER); + h->nr_huge_pages--; h->nr_huge_pages_node[page_to_nid(page)]--; for (i = 0; i < pages_per_huge_page(h); i++) { @@ -1005,6 +1007,14 @@ found: return 1; } +static void prep_compound_huge_page(struct page *page, int order) +{ + if (unlikely(order > (MAX_ORDER - 1))) + prep_compound_gigantic_page(page, order); + else + prep_compound_page(page, order); +} + /* Put bootmem huge pages into the standard lists after mem_map is up */ static void __init gather_bootmem_prealloc(void) { @@ -1015,7 +1025,7 @@ static void __init gather_bootmem_prealloc(void) struct hstate *h = m->hstate; __ClearPageReserved(page); WARN_ON(page_count(page) != 1); - prep_compound_page(page, h->order); + prep_compound_huge_page(page, h->order); prep_new_huge_page(h, page, page_to_nid(page)); } } diff --git a/mm/internal.h b/mm/internal.h index f482460de3e..13333bc2eb6 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -17,6 +17,7 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma, unsigned long floor, unsigned long ceiling); extern void prep_compound_page(struct page *page, unsigned long order); +extern void prep_compound_gigantic_page(struct page *page, unsigned long order); static inline void set_page_count(struct page *page, int v) { diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d0a240fbb8b..54069e64e3a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -260,6 +260,23 @@ static void free_compound_page(struct page *page) } void prep_compound_page(struct page *page, unsigned long order) +{ + int i; + int nr_pages = 1 << order; + + set_compound_page_dtor(page, free_compound_page); + set_compound_order(page, order); + __SetPageHead(page); + for (i = 1; i < nr_pages; i++) { + struct page *p = page + i; + + __SetPageTail(p); + p->first_page = page; + } +} + +#ifdef CONFIG_HUGETLBFS +void prep_compound_gigantic_page(struct page *page, unsigned long order) { int i; int nr_pages = 1 << order; @@ -268,19 +285,17 @@ void prep_compound_page(struct page *page, unsigned long order) set_compound_page_dtor(page, free_compound_page); set_compound_order(page, order); __SetPageHead(page); - for (i = 1; i < nr_pages; i++, p++) { - if (unlikely((i & (MAX_ORDER_NR_PAGES - 1)) == 0)) - p = pfn_to_page(page_to_pfn(page) + i); + for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) { __SetPageTail(p); p->first_page = page; } } +#endif static void destroy_compound_page(struct page *page, unsigned long order) { int i; int nr_pages = 1 << order; - struct page *p = page + 1; if (unlikely(compound_order(page) != order)) bad_page(page); @@ -288,9 +303,8 @@ static void destroy_compound_page(struct page *page, unsigned long order) if (unlikely(!PageHead(page))) bad_page(page); __ClearPageHead(page); - for (i = 1; i < nr_pages; i++, p++) { - if (unlikely((i & (MAX_ORDER_NR_PAGES - 1)) == 0)) - p = pfn_to_page(page_to_pfn(page) + i); + for (i = 1; i < nr_pages; i++) { + struct page *p = page + i; if (unlikely(!PageTail(p) | (p->first_page != page))) -- cgit v1.2.3-18-g5258 From 953a64798d82ee5467da2bdc0e467ef874fbc208 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 6 Nov 2008 12:53:28 -0800 Subject: MAINTAINERS: make IOAT easier to find Searching MAINTAINERS for "ioat" comes up empty. Fix this. Cc: "Dan Williams" Cc: "Sosnowski, Maciej" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index d643e862b8e..7e6a17e1de0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -721,7 +721,7 @@ W: http://sourceforge.net/projects/acpi4asus W: http://xf.iksaif.net/acpi4asus S: Maintained -ASYNCHRONOUS TRANSFERS/TRANSFORMS API +ASYNCHRONOUS TRANSFERS/TRANSFORMS (IOAT) API P: Dan Williams M: dan.j.williams@intel.com P: Maciej Sosnowski -- cgit v1.2.3-18-g5258 From b4416d2bea007f07f2e74cdc4cb64042ec996c83 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Thu, 6 Nov 2008 12:53:29 -0800 Subject: oom: do not dump task state for non thread group leaders When /proc/sys/vm/oom_dump_tasks is enabled, it's only necessary to dump task state information for thread group leaders. The kernel log gets quickly overwhelmed on machines with a massive number of threads by dumping non-thread group leaders. Reviewed-by: Christoph Lameter Signed-off-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/oom_kill.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 64e5b4bcd96..2846a58e5de 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -295,6 +295,8 @@ static void dump_tasks(const struct mem_cgroup *mem) continue; if (mem && !task_in_mem_cgroup(p, mem)) continue; + if (!thread_group_leader(p)) + continue; task_lock(p); printk(KERN_INFO "[%5d] %5d %5d %8lu %8lu %3d %3d %s\n", -- cgit v1.2.3-18-g5258 From 17a1217e12d8c8434f8a3deef7bf980c724a6ac7 Mon Sep 17 00:00:00 2001 From: Anatolij Gustschin Date: Thu, 6 Nov 2008 12:53:29 -0800 Subject: fbdev: add new framebuffer driver for Fujitsu MB862xx GDCs Add a framebuffer driver for the Fujitsu Carmine/Coral-P(A)/Lime graphics controllers. Lime GDC support is known to work on PPC440EPx based lwmon5 and MPC8544E based socrates embedded boards, both equipped with Lime GDC. Carmine/Coral-P PCI GDC support is known to work on PPC440EPx based Sequoia board and also on x86 platform. Signed-off-by: Anatolij Gustschin Cc: Dmitry Baryshkov Cc: Anton Vorontsov Cc: Matteo Fortini Cc: Krzysztof Helt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/Kconfig | 32 ++ drivers/video/Makefile | 1 + drivers/video/mb862xx/Makefile | 5 + drivers/video/mb862xx/mb862xx_reg.h | 138 +++++ drivers/video/mb862xx/mb862xxfb.c | 1061 +++++++++++++++++++++++++++++++++++ drivers/video/mb862xx/mb862xxfb.h | 83 +++ 6 files changed, 1320 insertions(+) create mode 100644 drivers/video/mb862xx/Makefile create mode 100644 drivers/video/mb862xx/mb862xx_reg.h create mode 100644 drivers/video/mb862xx/mb862xxfb.c create mode 100644 drivers/video/mb862xx/mb862xxfb.h diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig index 0f13448c6f7..3f3ce13fef4 100644 --- a/drivers/video/Kconfig +++ b/drivers/video/Kconfig @@ -2083,6 +2083,38 @@ config FB_METRONOME controller. The pre-release name for this device was 8track and could also have been called by some vendors as PVI-nnnn. +config FB_MB862XX + tristate "Fujitsu MB862xx GDC support" + depends on FB + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + ---help--- + Frame buffer driver for Fujitsu Carmine/Coral-P(A)/Lime controllers. + +config FB_MB862XX_PCI_GDC + bool "Carmine/Coral-P(A) GDC" + depends on PCI && FB_MB862XX + ---help--- + This enables framebuffer support for Fujitsu Carmine/Coral-P(A) + PCI graphics controller devices. + +config FB_MB862XX_LIME + bool "Lime GDC" + depends on FB_MB862XX + depends on OF && !FB_MB862XX_PCI_GDC + select FB_FOREIGN_ENDIAN + select FB_LITTLE_ENDIAN + ---help--- + Framebuffer support for Fujitsu Lime GDC on host CPU bus. + +config FB_PRE_INIT_FB + bool "Don't reinitialize, use bootloader's GDC/Display configuration" + depends on FB_MB862XX_LIME + ---help--- + Select this option if display contents should be inherited as set by + the bootloader. + source "drivers/video/omap/Kconfig" source "drivers/video/backlight/Kconfig" diff --git a/drivers/video/Makefile b/drivers/video/Makefile index 248bddc8d0b..e39e33e797d 100644 --- a/drivers/video/Makefile +++ b/drivers/video/Makefile @@ -122,6 +122,7 @@ obj-$(CONFIG_FB_SH_MOBILE_LCDC) += sh_mobile_lcdcfb.o obj-$(CONFIG_FB_OMAP) += omap/ obj-$(CONFIG_XEN_FBDEV_FRONTEND) += xen-fbfront.o obj-$(CONFIG_FB_CARMINE) += carminefb.o +obj-$(CONFIG_FB_MB862XX) += mb862xx/ # Platform or fallback drivers go here obj-$(CONFIG_FB_UVESA) += uvesafb.o diff --git a/drivers/video/mb862xx/Makefile b/drivers/video/mb862xx/Makefile new file mode 100644 index 00000000000..07664814bb1 --- /dev/null +++ b/drivers/video/mb862xx/Makefile @@ -0,0 +1,5 @@ +# +# Makefile for the MB862xx framebuffer driver +# + +obj-$(CONFIG_FB_MB862XX) := mb862xxfb.o diff --git a/drivers/video/mb862xx/mb862xx_reg.h b/drivers/video/mb862xx/mb862xx_reg.h new file mode 100644 index 00000000000..2ba65e11850 --- /dev/null +++ b/drivers/video/mb862xx/mb862xx_reg.h @@ -0,0 +1,138 @@ +/* + * Fujitsu MB862xx Graphics Controller Registers/Bits + */ + +#ifndef _MB862XX_REG_H +#define _MB862XX_REG_H + +#ifdef MB862XX_MMIO_BOTTOM +#define MB862XX_MMIO_BASE 0x03fc0000 +#else +#define MB862XX_MMIO_BASE 0x01fc0000 +#endif +#define MB862XX_I2C_BASE 0x0000c000 +#define MB862XX_DISP_BASE 0x00010000 +#define MB862XX_CAP_BASE 0x00018000 +#define MB862XX_DRAW_BASE 0x00030000 +#define MB862XX_GEO_BASE 0x00038000 +#define MB862XX_PIO_BASE 0x00038000 +#define MB862XX_MMIO_SIZE 0x40000 + +/* Host interface/pio registers */ +#define GC_IST 0x00000020 +#define GC_IMASK 0x00000024 +#define GC_SRST 0x0000002c +#define GC_CCF 0x00000038 +#define GC_CID 0x000000f0 +#define GC_REVISION 0x00000084 + +#define GC_CCF_CGE_100 0x00000000 +#define GC_CCF_CGE_133 0x00040000 +#define GC_CCF_CGE_166 0x00080000 +#define GC_CCF_COT_100 0x00000000 +#define GC_CCF_COT_133 0x00010000 +#define GC_CID_CNAME_MSK 0x0000ff00 +#define GC_CID_VERSION_MSK 0x000000ff + +/* define enabled interrupts hereby */ +#define GC_INT_EN 0x00000000 + +/* Memory interface mode register */ +#define GC_MMR 0x0000fffc + +/* Display Controller registers */ +#define GC_DCM0 0x00000000 +#define GC_HTP 0x00000004 +#define GC_HDB_HDP 0x00000008 +#define GC_VSW_HSW_HSP 0x0000000c +#define GC_VTR 0x00000010 +#define GC_VDP_VSP 0x00000014 +#define GC_WY_WX 0x00000018 +#define GC_WH_WW 0x0000001c +#define GC_L0M 0x00000020 +#define GC_L0OA0 0x00000024 +#define GC_L0DA0 0x00000028 +#define GC_L0DY_L0DX 0x0000002c +#define GC_DCM1 0x00000100 +#define GC_L0EM 0x00000110 +#define GC_L0WY_L0WX 0x00000114 +#define GC_L0WH_L0WW 0x00000118 +#define GC_DCM2 0x00000104 +#define GC_DCM3 0x00000108 +#define GC_CPM_CUTC 0x000000a0 +#define GC_CUOA0 0x000000a4 +#define GC_CUY0_CUX0 0x000000a8 +#define GC_CUOA1 0x000000ac +#define GC_CUY1_CUX1 0x000000b0 +#define GC_L0PAL0 0x00000400 + +#define GC_CPM_CEN0 0x00100000 +#define GC_CPM_CEN1 0x00200000 + +#define GC_DCM01_ESY 0x00000004 +#define GC_DCM01_SC 0x00003f00 +#define GC_DCM01_RESV 0x00004000 +#define GC_DCM01_CKS 0x00008000 +#define GC_DCM01_L0E 0x00010000 +#define GC_DCM01_DEN 0x80000000 +#define GC_L0M_L0C_8 0x00000000 +#define GC_L0M_L0C_16 0x80000000 +#define GC_L0EM_L0EC_24 0x40000000 +#define GC_L0M_L0W_UNIT 64 + +#define GC_DISP_REFCLK_400 400 + +/* Carmine specific */ +#define MB86297_DRAW_BASE 0x00020000 +#define MB86297_DISP0_BASE 0x00100000 +#define MB86297_DISP1_BASE 0x00140000 +#define MB86297_WRBACK_BASE 0x00180000 +#define MB86297_CAP0_BASE 0x00200000 +#define MB86297_CAP1_BASE 0x00280000 +#define MB86297_DRAMCTRL_BASE 0x00300000 +#define MB86297_CTRL_BASE 0x00400000 +#define MB86297_I2C_BASE 0x00500000 + +#define GC_CTRL_STATUS 0x00000000 +#define GC_CTRL_INT_MASK 0x00000004 +#define GC_CTRL_CLK_ENABLE 0x0000000c +#define GC_CTRL_SOFT_RST 0x00000010 + +#define GC_CTRL_CLK_EN_DRAM 0x00000001 +#define GC_CTRL_CLK_EN_2D3D 0x00000002 +#define GC_CTRL_CLK_EN_DISP0 0x00000020 +#define GC_CTRL_CLK_EN_DISP1 0x00000040 + +#define GC_2D3D_REV 0x000004b4 +#define GC_RE_REVISION 0x24240200 + +/* define enabled interrupts hereby */ +#define GC_CARMINE_INT_EN 0x00000004 + +/* DRAM controller */ +#define GC_DCTL_MODE_ADD 0x00000000 +#define GC_DCTL_SETTIME1_EMODE 0x00000004 +#define GC_DCTL_REFRESH_SETTIME2 0x00000008 +#define GC_DCTL_RSV0_STATES 0x0000000C +#define GC_DCTL_RSV2_RSV1 0x00000010 +#define GC_DCTL_DDRIF2_DDRIF1 0x00000014 +#define GC_DCTL_IOCONT1_IOCONT0 0x00000024 + +#define GC_DCTL_STATES_MSK 0x0000000f +#define GC_DCTL_INIT_WAIT_CNT 3000 +#define GC_DCTL_INIT_WAIT_INTERVAL 1 + +/* DRAM ctrl values for Carmine PCI Eval. board */ +#define GC_EVB_DCTL_MODE_ADD 0x012105c3 +#define GC_EVB_DCTL_MODE_ADD_AFT_RST 0x002105c3 +#define GC_EVB_DCTL_SETTIME1_EMODE 0x47498000 +#define GC_EVB_DCTL_REFRESH_SETTIME2 0x00422a22 +#define GC_EVB_DCTL_RSV0_STATES 0x00200003 +#define GC_EVB_DCTL_RSV0_STATES_AFT_RST 0x00200002 +#define GC_EVB_DCTL_RSV2_RSV1 0x0000000f +#define GC_EVB_DCTL_DDRIF2_DDRIF1 0x00556646 +#define GC_EVB_DCTL_IOCONT1_IOCONT0 0x05550555 + +#define GC_DISP_REFCLK_533 533 + +#endif diff --git a/drivers/video/mb862xx/mb862xxfb.c b/drivers/video/mb862xx/mb862xxfb.c new file mode 100644 index 00000000000..38718d95fbb --- /dev/null +++ b/drivers/video/mb862xx/mb862xxfb.c @@ -0,0 +1,1061 @@ +/* + * drivers/mb862xx/mb862xxfb.c + * + * Fujitsu Carmine/Coral-P(A)/Lime framebuffer driver + * + * (C) 2008 Anatolij Gustschin + * DENX Software Engineering + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#undef DEBUG + +#include +#include +#include +#include +#include +#if defined(CONFIG_PPC_OF) +#include +#endif +#include "mb862xxfb.h" +#include "mb862xx_reg.h" + +#define NR_PALETTE 256 +#define MB862XX_MEM_SIZE 0x1000000 +#define CORALP_MEM_SIZE 0x4000000 +#define CARMINE_MEM_SIZE 0x8000000 +#define DRV_NAME "mb862xxfb" + +#if defined(CONFIG_LWMON5) +static struct mb862xx_gc_mode lwmon5_gc_mode = { + /* Mode for Sharp LQ104V1DG61 TFT LCD Panel */ + { "640x480", 60, 640, 480, 40000, 48, 16, 32, 11, 96, 2, 0, 0, 0 }, + /* 16 bits/pixel, 32MB, 100MHz, SDRAM memory mode value */ + 16, 0x2000000, GC_CCF_COT_100, 0x414fb7f2 +}; +#endif + +#if defined(CONFIG_SOCRATES) +static struct mb862xx_gc_mode socrates_gc_mode = { + /* Mode for Prime View PM070WL4 TFT LCD Panel */ + { "800x480", 45, 800, 480, 40000, 86, 42, 33, 10, 128, 2, 0, 0, 0 }, + /* 16 bits/pixel, 16MB, 133MHz, SDRAM memory mode value */ + 16, 0x1000000, GC_CCF_COT_133, 0x4157ba63 +}; +#endif + +/* Helpers */ +static inline int h_total(struct fb_var_screeninfo *var) +{ + return var->xres + var->left_margin + + var->right_margin + var->hsync_len; +} + +static inline int v_total(struct fb_var_screeninfo *var) +{ + return var->yres + var->upper_margin + + var->lower_margin + var->vsync_len; +} + +static inline int hsp(struct fb_var_screeninfo *var) +{ + return var->xres + var->right_margin - 1; +} + +static inline int vsp(struct fb_var_screeninfo *var) +{ + return var->yres + var->lower_margin - 1; +} + +static inline int d_pitch(struct fb_var_screeninfo *var) +{ + return var->xres * var->bits_per_pixel / 8; +} + +static inline unsigned int chan_to_field(unsigned int chan, + struct fb_bitfield *bf) +{ + chan &= 0xffff; + chan >>= 16 - bf->length; + return chan << bf->offset; +} + +static int mb862xxfb_setcolreg(unsigned regno, + unsigned red, unsigned green, unsigned blue, + unsigned transp, struct fb_info *info) +{ + struct mb862xxfb_par *par = info->par; + unsigned int val; + + switch (info->fix.visual) { + case FB_VISUAL_TRUECOLOR: + if (regno < 16) { + val = chan_to_field(red, &info->var.red); + val |= chan_to_field(green, &info->var.green); + val |= chan_to_field(blue, &info->var.blue); + par->pseudo_palette[regno] = val; + } + break; + case FB_VISUAL_PSEUDOCOLOR: + if (regno < 256) { + val = (red >> 8) << 16; + val |= (green >> 8) << 8; + val |= blue >> 8; + outreg(disp, GC_L0PAL0 + (regno * 4), val); + } + break; + default: + return 1; /* unsupported type */ + } + return 0; +} + +static int mb862xxfb_check_var(struct fb_var_screeninfo *var, + struct fb_info *fbi) +{ + unsigned long tmp; + + if (fbi->dev) + dev_dbg(fbi->dev, "%s\n", __func__); + + /* check if these values fit into the registers */ + if (var->hsync_len > 255 || var->vsync_len > 255) + return -EINVAL; + + if ((var->xres + var->right_margin) >= 4096) + return -EINVAL; + + if ((var->yres + var->lower_margin) > 4096) + return -EINVAL; + + if (h_total(var) > 4096 || v_total(var) > 4096) + return -EINVAL; + + if (var->xres_virtual > 4096 || var->yres_virtual > 4096) + return -EINVAL; + + if (var->bits_per_pixel <= 8) + var->bits_per_pixel = 8; + else if (var->bits_per_pixel <= 16) + var->bits_per_pixel = 16; + else if (var->bits_per_pixel <= 32) + var->bits_per_pixel = 32; + + /* + * can cope with 8,16 or 24/32bpp if resulting + * pitch is divisible by 64 without remainder + */ + if (d_pitch(&fbi->var) % GC_L0M_L0W_UNIT) { + int r; + + var->bits_per_pixel = 0; + do { + var->bits_per_pixel += 8; + r = d_pitch(&fbi->var) % GC_L0M_L0W_UNIT; + } while (r && var->bits_per_pixel <= 32); + + if (d_pitch(&fbi->var) % GC_L0M_L0W_UNIT) + return -EINVAL; + } + + /* line length is going to be 128 bit aligned */ + tmp = (var->xres * var->bits_per_pixel) / 8; + if ((tmp & 15) != 0) + return -EINVAL; + + /* set r/g/b positions and validate bpp */ + switch (var->bits_per_pixel) { + case 8: + var->red.length = var->bits_per_pixel; + var->green.length = var->bits_per_pixel; + var->blue.length = var->bits_per_pixel; + var->red.offset = 0; + var->green.offset = 0; + var->blue.offset = 0; + var->transp.length = 0; + break; + case 16: + var->red.length = 5; + var->green.length = 5; + var->blue.length = 5; + var->red.offset = 10; + var->green.offset = 5; + var->blue.offset = 0; + var->transp.length = 0; + break; + case 24: + case 32: + var->transp.length = 8; + var->red.length = 8; + var->green.length = 8; + var->blue.length = 8; + var->transp.offset = 24; + var->red.offset = 16; + var->green.offset = 8; + var->blue.offset = 0; + break; + default: + return -EINVAL; + } + return 0; +} + +/* + * set display parameters + */ +static int mb862xxfb_set_par(struct fb_info *fbi) +{ + struct mb862xxfb_par *par = fbi->par; + unsigned long reg, sc; + + dev_dbg(par->dev, "%s\n", __func__); + + if (par->pre_init) + return 0; + + /* disp off */ + reg = inreg(disp, GC_DCM1); + reg &= ~GC_DCM01_DEN; + outreg(disp, GC_DCM1, reg); + + /* set display reference clock div. */ + sc = par->refclk / (1000000 / fbi->var.pixclock) - 1; + reg = inreg(disp, GC_DCM1); + reg &= ~(GC_DCM01_CKS | GC_DCM01_RESV | GC_DCM01_SC); + reg |= sc << 8; + outreg(disp, GC_DCM1, reg); + dev_dbg(par->dev, "SC 0x%lx\n", sc); + + /* disp dimension, format */ + reg = pack(d_pitch(&fbi->var) / GC_L0M_L0W_UNIT, + (fbi->var.yres - 1)); + if (fbi->var.bits_per_pixel == 16) + reg |= GC_L0M_L0C_16; + outreg(disp, GC_L0M, reg); + + if (fbi->var.bits_per_pixel == 32) { + reg = inreg(disp, GC_L0EM); + outreg(disp, GC_L0EM, reg | GC_L0EM_L0EC_24); + } + outreg(disp, GC_WY_WX, 0); + reg = pack(fbi->var.yres - 1, fbi->var.xres); + outreg(disp, GC_WH_WW, reg); + outreg(disp, GC_L0OA0, 0); + outreg(disp, GC_L0DA0, 0); + outreg(disp, GC_L0DY_L0DX, 0); + outreg(disp, GC_L0WY_L0WX, 0); + outreg(disp, GC_L0WH_L0WW, reg); + + /* both HW-cursors off */ + reg = inreg(disp, GC_CPM_CUTC); + reg &= ~(GC_CPM_CEN0 | GC_CPM_CEN1); + outreg(disp, GC_CPM_CUTC, reg); + + /* timings */ + reg = pack(fbi->var.xres - 1, fbi->var.xres - 1); + outreg(disp, GC_HDB_HDP, reg); + reg = pack((fbi->var.yres - 1), vsp(&fbi->var)); + outreg(disp, GC_VDP_VSP, reg); + reg = ((fbi->var.vsync_len - 1) << 24) | + pack((fbi->var.hsync_len - 1), hsp(&fbi->var)); + outreg(disp, GC_VSW_HSW_HSP, reg); + outreg(disp, GC_HTP, pack(h_total(&fbi->var) - 1, 0)); + outreg(disp, GC_VTR, pack(v_total(&fbi->var) - 1, 0)); + + /* display on */ + reg = inreg(disp, GC_DCM1); + reg |= GC_DCM01_DEN | GC_DCM01_L0E; + reg &= ~GC_DCM01_ESY; + outreg(disp, GC_DCM1, reg); + return 0; +} + +static int mb862xxfb_pan(struct fb_var_screeninfo *var, + struct fb_info *info) +{ + struct mb862xxfb_par *par = info->par; + unsigned long reg; + + reg = pack(var->yoffset, var->xoffset); + outreg(disp, GC_L0WY_L0WX, reg); + + reg = pack(var->yres_virtual, var->xres_virtual); + outreg(disp, GC_L0WH_L0WW, reg); + return 0; +} + +static int mb862xxfb_blank(int mode, struct fb_info *fbi) +{ + struct mb862xxfb_par *par = fbi->par; + unsigned long reg; + + dev_dbg(fbi->dev, "blank mode=%d\n", mode); + + switch (mode) { + case FB_BLANK_POWERDOWN: + reg = inreg(disp, GC_DCM1); + reg &= ~GC_DCM01_DEN; + outreg(disp, GC_DCM1, reg); + break; + case FB_BLANK_UNBLANK: + reg = inreg(disp, GC_DCM1); + reg |= GC_DCM01_DEN; + outreg(disp, GC_DCM1, reg); + break; + case FB_BLANK_NORMAL: + case FB_BLANK_VSYNC_SUSPEND: + case FB_BLANK_HSYNC_SUSPEND: + default: + return 1; + } + return 0; +} + +/* framebuffer ops */ +static struct fb_ops mb862xxfb_ops = { + .owner = THIS_MODULE, + .fb_check_var = mb862xxfb_check_var, + .fb_set_par = mb862xxfb_set_par, + .fb_setcolreg = mb862xxfb_setcolreg, + .fb_blank = mb862xxfb_blank, + .fb_pan_display = mb862xxfb_pan, + .fb_fillrect = cfb_fillrect, + .fb_copyarea = cfb_copyarea, + .fb_imageblit = cfb_imageblit, +}; + +/* initialize fb_info data */ +static int mb862xxfb_init_fbinfo(struct fb_info *fbi) +{ + struct mb862xxfb_par *par = fbi->par; + struct mb862xx_gc_mode *mode = par->gc_mode; + unsigned long reg; + + fbi->fbops = &mb862xxfb_ops; + fbi->pseudo_palette = par->pseudo_palette; + fbi->screen_base = par->fb_base; + fbi->screen_size = par->mapped_vram; + + strcpy(fbi->fix.id, DRV_NAME); + fbi->fix.smem_start = (unsigned long)par->fb_base_phys; + fbi->fix.smem_len = par->mapped_vram; + fbi->fix.mmio_start = (unsigned long)par->mmio_base_phys; + fbi->fix.mmio_len = par->mmio_len; + fbi->fix.accel = FB_ACCEL_NONE; + fbi->fix.type = FB_TYPE_PACKED_PIXELS; + fbi->fix.type_aux = 0; + fbi->fix.xpanstep = 1; + fbi->fix.ypanstep = 1; + fbi->fix.ywrapstep = 0; + + reg = inreg(disp, GC_DCM1); + if (reg & GC_DCM01_DEN && reg & GC_DCM01_L0E) { + /* get the disp mode from active display cfg */ + unsigned long sc = ((reg & GC_DCM01_SC) >> 8) + 1; + unsigned long hsp, vsp, ht, vt; + + dev_dbg(par->dev, "using bootloader's disp. mode\n"); + fbi->var.pixclock = (sc * 1000000) / par->refclk; + fbi->var.xres = (inreg(disp, GC_HDB_HDP) & 0x0fff) + 1; + reg = inreg(disp, GC_VDP_VSP); + fbi->var.yres = ((reg >> 16) & 0x0fff) + 1; + vsp = (reg & 0x0fff) + 1; + fbi->var.xres_virtual = fbi->var.xres; + fbi->var.yres_virtual = fbi->var.yres; + reg = inreg(disp, GC_L0EM); + if (reg & GC_L0EM_L0EC_24) { + fbi->var.bits_per_pixel = 32; + } else { + reg = inreg(disp, GC_L0M); + if (reg & GC_L0M_L0C_16) + fbi->var.bits_per_pixel = 16; + else + fbi->var.bits_per_pixel = 8; + } + reg = inreg(disp, GC_VSW_HSW_HSP); + fbi->var.hsync_len = ((reg & 0xff0000) >> 16) + 1; + fbi->var.vsync_len = ((reg & 0x3f000000) >> 24) + 1; + hsp = (reg & 0xffff) + 1; + ht = ((inreg(disp, GC_HTP) & 0xfff0000) >> 16) + 1; + fbi->var.right_margin = hsp - fbi->var.xres; + fbi->var.left_margin = ht - hsp - fbi->var.hsync_len; + vt = ((inreg(disp, GC_VTR) & 0xfff0000) >> 16) + 1; + fbi->var.lower_margin = vsp - fbi->var.yres; + fbi->var.upper_margin = vt - vsp - fbi->var.vsync_len; + } else if (mode) { + dev_dbg(par->dev, "using supplied mode\n"); + fb_videomode_to_var(&fbi->var, (struct fb_videomode *)mode); + fbi->var.bits_per_pixel = mode->def_bpp ? mode->def_bpp : 8; + } else { + int ret; + + ret = fb_find_mode(&fbi->var, fbi, "640x480-16@60", + NULL, 0, NULL, 16); + if (ret == 0 || ret == 4) { + dev_err(par->dev, + "failed to get initial mode\n"); + return -EINVAL; + } + } + + fbi->var.xoffset = 0; + fbi->var.yoffset = 0; + fbi->var.grayscale = 0; + fbi->var.nonstd = 0; + fbi->var.height = -1; + fbi->var.width = -1; + fbi->var.accel_flags = 0; + fbi->var.vmode = FB_VMODE_NONINTERLACED; + fbi->var.activate = FB_ACTIVATE_NOW; + fbi->flags = FBINFO_DEFAULT | +#ifdef __BIG_ENDIAN + FBINFO_FOREIGN_ENDIAN | +#endif + FBINFO_HWACCEL_XPAN | + FBINFO_HWACCEL_YPAN; + + /* check and possibly fix bpp */ + if ((fbi->fbops->fb_check_var)(&fbi->var, fbi)) + dev_err(par->dev, "check_var() failed on initial setup?\n"); + + fbi->fix.visual = fbi->var.bits_per_pixel == 8 ? + FB_VISUAL_PSEUDOCOLOR : FB_VISUAL_TRUECOLOR; + fbi->fix.line_length = (fbi->var.xres_virtual * + fbi->var.bits_per_pixel) / 8; + return 0; +} + +/* + * show some display controller and cursor registers + */ +static ssize_t mb862xxfb_show_dispregs(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct fb_info *fbi = dev_get_drvdata(dev); + struct mb862xxfb_par *par = fbi->par; + char *ptr = buf; + unsigned int reg; + + for (reg = GC_DCM0; reg <= GC_L0DY_L0DX; reg += 4) + ptr += sprintf(ptr, "%08x = %08x\n", + reg, inreg(disp, reg)); + + for (reg = GC_CPM_CUTC; reg <= GC_CUY1_CUX1; reg += 4) + ptr += sprintf(ptr, "%08x = %08x\n", + reg, inreg(disp, reg)); + + for (reg = GC_DCM1; reg <= GC_L0WH_L0WW; reg += 4) + ptr += sprintf(ptr, "%08x = %08x\n", + reg, inreg(disp, reg)); + + return ptr - buf; +} + +static DEVICE_ATTR(dispregs, 0444, mb862xxfb_show_dispregs, NULL); + +irqreturn_t mb862xx_intr(int irq, void *dev_id) +{ + struct mb862xxfb_par *par = (struct mb862xxfb_par *) dev_id; + unsigned long reg_ist, mask; + + if (!par) + return IRQ_NONE; + + if (par->type == BT_CARMINE) { + /* Get Interrupt Status */ + reg_ist = inreg(ctrl, GC_CTRL_STATUS); + mask = inreg(ctrl, GC_CTRL_INT_MASK); + if (reg_ist == 0) + return IRQ_HANDLED; + + reg_ist &= mask; + if (reg_ist == 0) + return IRQ_HANDLED; + + /* Clear interrupt status */ + outreg(ctrl, 0x0, reg_ist); + } else { + /* Get status */ + reg_ist = inreg(host, GC_IST); + mask = inreg(host, GC_IMASK); + + reg_ist &= mask; + if (reg_ist == 0) + return IRQ_HANDLED; + + /* Clear status */ + outreg(host, GC_IST, ~reg_ist); + } + return IRQ_HANDLED; +} + +#if defined(CONFIG_FB_MB862XX_LIME) +/* + * GDC (Lime, Coral(B/Q), Mint, ...) on host bus + */ +static int mb862xx_gdc_init(struct mb862xxfb_par *par) +{ + unsigned long ccf, mmr; + unsigned long ver, rev; + + if (!par) + return -ENODEV; + +#if defined(CONFIG_FB_PRE_INIT_FB) + par->pre_init = 1; +#endif + par->host = par->mmio_base; + par->i2c = par->mmio_base + MB862XX_I2C_BASE; + par->disp = par->mmio_base + MB862XX_DISP_BASE; + par->cap = par->mmio_base + MB862XX_CAP_BASE; + par->draw = par->mmio_base + MB862XX_DRAW_BASE; + par->geo = par->mmio_base + MB862XX_GEO_BASE; + par->pio = par->mmio_base + MB862XX_PIO_BASE; + + par->refclk = GC_DISP_REFCLK_400; + + ver = inreg(host, GC_CID); + rev = inreg(pio, GC_REVISION); + if ((ver == 0x303) && (rev & 0xffffff00) == 0x20050100) { + dev_info(par->dev, "Fujitsu Lime v1.%d found\n", + (int)rev & 0xff); + par->type = BT_LIME; + ccf = par->gc_mode ? par->gc_mode->ccf : GC_CCF_COT_100; + mmr = par->gc_mode ? par->gc_mode->mmr : 0x414fb7f2; + } else { + dev_info(par->dev, "? GDC, CID/Rev.: 0x%lx/0x%lx \n", ver, rev); + return -ENODEV; + } + + if (!par->pre_init) { + outreg(host, GC_CCF, ccf); + udelay(200); + outreg(host, GC_MMR, mmr); + udelay(10); + } + + /* interrupt status */ + outreg(host, GC_IST, 0); + outreg(host, GC_IMASK, GC_INT_EN); + return 0; +} + +static int __devinit of_platform_mb862xx_probe(struct of_device *ofdev, + const struct of_device_id *id) +{ + struct device_node *np = ofdev->node; + struct device *dev = &ofdev->dev; + struct mb862xxfb_par *par; + struct fb_info *info; + struct resource res; + resource_size_t res_size; + unsigned long ret = -ENODEV; + + if (of_address_to_resource(np, 0, &res)) { + dev_err(dev, "Invalid address\n"); + return -ENXIO; + } + + info = framebuffer_alloc(sizeof(struct mb862xxfb_par), dev); + if (info == NULL) { + dev_err(dev, "cannot allocate framebuffer\n"); + return -ENOMEM; + } + + par = info->par; + par->info = info; + par->dev = dev; + + par->irq = irq_of_parse_and_map(np, 0); + if (par->irq == NO_IRQ) { + dev_err(dev, "failed to map irq\n"); + ret = -ENODEV; + goto fbrel; + } + + res_size = 1 + res.end - res.start; + par->res = request_mem_region(res.start, res_size, DRV_NAME); + if (par->res == NULL) { + dev_err(dev, "Cannot claim framebuffer/mmio\n"); + ret = -ENXIO; + goto irqdisp; + } + +#if defined(CONFIG_LWMON5) + par->gc_mode = &lwmon5_gc_mode; +#endif + +#if defined(CONFIG_SOCRATES) + par->gc_mode = &socrates_gc_mode; +#endif + + par->fb_base_phys = res.start; + par->mmio_base_phys = res.start + MB862XX_MMIO_BASE; + par->mmio_len = MB862XX_MMIO_SIZE; + if (par->gc_mode) + par->mapped_vram = par->gc_mode->max_vram; + else + par->mapped_vram = MB862XX_MEM_SIZE; + + par->fb_base = ioremap(par->fb_base_phys, par->mapped_vram); + if (par->fb_base == NULL) { + dev_err(dev, "Cannot map framebuffer\n"); + goto rel_reg; + } + + par->mmio_base = ioremap(par->mmio_base_phys, par->mmio_len); + if (par->mmio_base == NULL) { + dev_err(dev, "Cannot map registers\n"); + goto fb_unmap; + } + + dev_dbg(dev, "fb phys 0x%llx 0x%lx\n", + (u64)par->fb_base_phys, (ulong)par->mapped_vram); + dev_dbg(dev, "mmio phys 0x%llx 0x%lx, (irq = %d)\n", + (u64)par->mmio_base_phys, (ulong)par->mmio_len, par->irq); + + if (mb862xx_gdc_init(par)) + goto io_unmap; + + if (request_irq(par->irq, mb862xx_intr, IRQF_DISABLED, + DRV_NAME, (void *)par)) { + dev_err(dev, "Cannot request irq\n"); + goto io_unmap; + } + + mb862xxfb_init_fbinfo(info); + + if (fb_alloc_cmap(&info->cmap, NR_PALETTE, 0) < 0) { + dev_err(dev, "Could not allocate cmap for fb_info.\n"); + goto free_irq; + } + + if ((info->fbops->fb_set_par)(info)) + dev_err(dev, "set_var() failed on initial setup?\n"); + + if (register_framebuffer(info)) { + dev_err(dev, "failed to register framebuffer\n"); + goto rel_cmap; + } + + dev_set_drvdata(dev, info); + + if (device_create_file(dev, &dev_attr_dispregs)) + dev_err(dev, "Can't create sysfs regdump file\n"); + return 0; + +rel_cmap: + fb_dealloc_cmap(&info->cmap); +free_irq: + outreg(host, GC_IMASK, 0); + free_irq(par->irq, (void *)par); +io_unmap: + iounmap(par->mmio_base); +fb_unmap: + iounmap(par->fb_base); +rel_reg: + release_mem_region(res.start, res_size); +irqdisp: + irq_dispose_mapping(par->irq); +fbrel: + dev_set_drvdata(dev, NULL); + framebuffer_release(info); + return ret; +} + +static int __devexit of_platform_mb862xx_remove(struct of_device *ofdev) +{ + struct fb_info *fbi = dev_get_drvdata(&ofdev->dev); + struct mb862xxfb_par *par = fbi->par; + resource_size_t res_size = 1 + par->res->end - par->res->start; + unsigned long reg; + + dev_dbg(fbi->dev, "%s release\n", fbi->fix.id); + + /* display off */ + reg = inreg(disp, GC_DCM1); + reg &= ~(GC_DCM01_DEN | GC_DCM01_L0E); + outreg(disp, GC_DCM1, reg); + + /* disable interrupts */ + outreg(host, GC_IMASK, 0); + + free_irq(par->irq, (void *)par); + irq_dispose_mapping(par->irq); + + device_remove_file(&ofdev->dev, &dev_attr_dispregs); + + unregister_framebuffer(fbi); + fb_dealloc_cmap(&fbi->cmap); + + iounmap(par->mmio_base); + iounmap(par->fb_base); + + dev_set_drvdata(&ofdev->dev, NULL); + release_mem_region(par->res->start, res_size); + framebuffer_release(fbi); + return 0; +} + +/* + * common types + */ +static struct of_device_id __devinitdata of_platform_mb862xx_tbl[] = { + { .compatible = "fujitsu,MB86276", }, + { .compatible = "fujitsu,lime", }, + { .compatible = "fujitsu,MB86277", }, + { .compatible = "fujitsu,mint", }, + { .compatible = "fujitsu,MB86293", }, + { .compatible = "fujitsu,MB86294", }, + { .compatible = "fujitsu,coral", }, + { /* end */ } +}; + +static struct of_platform_driver of_platform_mb862xxfb_driver = { + .owner = THIS_MODULE, + .name = DRV_NAME, + .match_table = of_platform_mb862xx_tbl, + .probe = of_platform_mb862xx_probe, + .remove = __devexit_p(of_platform_mb862xx_remove), +}; +#endif + +#if defined(CONFIG_FB_MB862XX_PCI_GDC) +static int coralp_init(struct mb862xxfb_par *par) +{ + int cn, ver; + + par->host = par->mmio_base; + par->i2c = par->mmio_base + MB862XX_I2C_BASE; + par->disp = par->mmio_base + MB862XX_DISP_BASE; + par->cap = par->mmio_base + MB862XX_CAP_BASE; + par->draw = par->mmio_base + MB862XX_DRAW_BASE; + par->geo = par->mmio_base + MB862XX_GEO_BASE; + par->pio = par->mmio_base + MB862XX_PIO_BASE; + + par->refclk = GC_DISP_REFCLK_400; + + ver = inreg(host, GC_CID); + cn = (ver & GC_CID_CNAME_MSK) >> 8; + ver = ver & GC_CID_VERSION_MSK; + if (cn == 3) { + dev_info(par->dev, "Fujitsu Coral-%s GDC Rev.%d found\n",\ + (ver == 6) ? "P" : (ver == 8) ? "PA" : "?", + par->pdev->revision); + outreg(host, GC_CCF, GC_CCF_CGE_166 | GC_CCF_COT_133); + udelay(200); + outreg(host, GC_MMR, GC_MMR_CORALP_EVB_VAL); + udelay(10); + /* Clear interrupt status */ + outreg(host, GC_IST, 0); + } else { + return -ENODEV; + } + return 0; +} + +static int init_dram_ctrl(struct mb862xxfb_par *par) +{ + unsigned long i = 0; + + /* + * Set io mode first! Spec. says IC may be destroyed + * if not set to SSTL2/LVCMOS before init. + */ + outreg(dram_ctrl, GC_DCTL_IOCONT1_IOCONT0, GC_EVB_DCTL_IOCONT1_IOCONT0); + + /* DRAM init */ + outreg(dram_ctrl, GC_DCTL_MODE_ADD, GC_EVB_DCTL_MODE_ADD); + outreg(dram_ctrl, GC_DCTL_SETTIME1_EMODE, GC_EVB_DCTL_SETTIME1_EMODE); + outreg(dram_ctrl, GC_DCTL_REFRESH_SETTIME2, + GC_EVB_DCTL_REFRESH_SETTIME2); + outreg(dram_ctrl, GC_DCTL_RSV2_RSV1, GC_EVB_DCTL_RSV2_RSV1); + outreg(dram_ctrl, GC_DCTL_DDRIF2_DDRIF1, GC_EVB_DCTL_DDRIF2_DDRIF1); + outreg(dram_ctrl, GC_DCTL_RSV0_STATES, GC_EVB_DCTL_RSV0_STATES); + + /* DLL reset done? */ + while ((inreg(dram_ctrl, GC_DCTL_RSV0_STATES) & GC_DCTL_STATES_MSK)) { + udelay(GC_DCTL_INIT_WAIT_INTERVAL); + if (i++ > GC_DCTL_INIT_WAIT_CNT) { + dev_err(par->dev, "VRAM init failed.\n"); + return -EINVAL; + } + } + outreg(dram_ctrl, GC_DCTL_MODE_ADD, GC_EVB_DCTL_MODE_ADD_AFT_RST); + outreg(dram_ctrl, GC_DCTL_RSV0_STATES, GC_EVB_DCTL_RSV0_STATES_AFT_RST); + return 0; +} + +static int carmine_init(struct mb862xxfb_par *par) +{ + unsigned long reg; + + par->ctrl = par->mmio_base + MB86297_CTRL_BASE; + par->i2c = par->mmio_base + MB86297_I2C_BASE; + par->disp = par->mmio_base + MB86297_DISP0_BASE; + par->disp1 = par->mmio_base + MB86297_DISP1_BASE; + par->cap = par->mmio_base + MB86297_CAP0_BASE; + par->cap1 = par->mmio_base + MB86297_CAP1_BASE; + par->draw = par->mmio_base + MB86297_DRAW_BASE; + par->dram_ctrl = par->mmio_base + MB86297_DRAMCTRL_BASE; + par->wrback = par->mmio_base + MB86297_WRBACK_BASE; + + par->refclk = GC_DISP_REFCLK_533; + + /* warm up */ + reg = GC_CTRL_CLK_EN_DRAM | GC_CTRL_CLK_EN_2D3D | GC_CTRL_CLK_EN_DISP0; + outreg(ctrl, GC_CTRL_CLK_ENABLE, reg); + + /* check for engine module revision */ + if (inreg(draw, GC_2D3D_REV) == GC_RE_REVISION) + dev_info(par->dev, "Fujitsu Carmine GDC Rev.%d found\n", + par->pdev->revision); + else + goto err_init; + + reg &= ~GC_CTRL_CLK_EN_2D3D; + outreg(ctrl, GC_CTRL_CLK_ENABLE, reg); + + /* set up vram */ + if (init_dram_ctrl(par) < 0) + goto err_init; + + outreg(ctrl, GC_CTRL_INT_MASK, 0); + return 0; + +err_init: + outreg(ctrl, GC_CTRL_CLK_ENABLE, 0); + return -EINVAL; +} + +static inline int mb862xx_pci_gdc_init(struct mb862xxfb_par *par) +{ + switch (par->type) { + case BT_CORALP: + return coralp_init(par); + case BT_CARMINE: + return carmine_init(par); + default: + return -ENODEV; + } +} + +#define CHIP_ID(id) \ + { PCI_DEVICE(PCI_VENDOR_ID_FUJITSU_LIMITED, id) } + +static struct pci_device_id mb862xx_pci_tbl[] __devinitdata = { + /* MB86295/MB86296 */ + CHIP_ID(PCI_DEVICE_ID_FUJITSU_CORALP), + CHIP_ID(PCI_DEVICE_ID_FUJITSU_CORALPA), + /* MB86297 */ + CHIP_ID(PCI_DEVICE_ID_FUJITSU_CARMINE), + { 0, } +}; + +MODULE_DEVICE_TABLE(pci, mb862xx_pci_tbl); + +static int __devinit mb862xx_pci_probe(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + struct mb862xxfb_par *par; + struct fb_info *info; + struct device *dev = &pdev->dev; + int ret; + + ret = pci_enable_device(pdev); + if (ret < 0) { + dev_err(dev, "Cannot enable PCI device\n"); + goto out; + } + + info = framebuffer_alloc(sizeof(struct mb862xxfb_par), dev); + if (!info) { + dev_err(dev, "framebuffer alloc failed\n"); + ret = -ENOMEM; + goto dis_dev; + } + + par = info->par; + par->info = info; + par->dev = dev; + par->pdev = pdev; + par->irq = pdev->irq; + + ret = pci_request_regions(pdev, DRV_NAME); + if (ret < 0) { + dev_err(dev, "Cannot reserve region(s) for PCI device\n"); + goto rel_fb; + } + + switch (pdev->device) { + case PCI_DEVICE_ID_FUJITSU_CORALP: + case PCI_DEVICE_ID_FUJITSU_CORALPA: + par->fb_base_phys = pci_resource_start(par->pdev, 0); + par->mapped_vram = CORALP_MEM_SIZE; + par->mmio_base_phys = par->fb_base_phys + MB862XX_MMIO_BASE; + par->mmio_len = MB862XX_MMIO_SIZE; + par->type = BT_CORALP; + break; + case PCI_DEVICE_ID_FUJITSU_CARMINE: + par->fb_base_phys = pci_resource_start(par->pdev, 2); + par->mmio_base_phys = pci_resource_start(par->pdev, 3); + par->mmio_len = pci_resource_len(par->pdev, 3); + par->mapped_vram = CARMINE_MEM_SIZE; + par->type = BT_CARMINE; + break; + default: + /* should never occur */ + goto rel_reg; + } + + par->fb_base = ioremap(par->fb_base_phys, par->mapped_vram); + if (par->fb_base == NULL) { + dev_err(dev, "Cannot map framebuffer\n"); + goto rel_reg; + } + + par->mmio_base = ioremap(par->mmio_base_phys, par->mmio_len); + if (par->mmio_base == NULL) { + dev_err(dev, "Cannot map registers\n"); + ret = -EIO; + goto fb_unmap; + } + + dev_dbg(dev, "fb phys 0x%llx 0x%lx\n", + (u64)par->fb_base_phys, (ulong)par->mapped_vram); + dev_dbg(dev, "mmio phys 0x%llx 0x%lx\n", + (u64)par->mmio_base_phys, (ulong)par->mmio_len); + + if (mb862xx_pci_gdc_init(par)) + goto io_unmap; + + if (request_irq(par->irq, mb862xx_intr, IRQF_DISABLED | IRQF_SHARED, + DRV_NAME, (void *)par)) { + dev_err(dev, "Cannot request irq\n"); + goto io_unmap; + } + + mb862xxfb_init_fbinfo(info); + + if (fb_alloc_cmap(&info->cmap, NR_PALETTE, 0) < 0) { + dev_err(dev, "Could not allocate cmap for fb_info.\n"); + ret = -ENOMEM; + goto free_irq; + } + + if ((info->fbops->fb_set_par)(info)) + dev_err(dev, "set_var() failed on initial setup?\n"); + + ret = register_framebuffer(info); + if (ret < 0) { + dev_err(dev, "failed to register framebuffer\n"); + goto rel_cmap; + } + + pci_set_drvdata(pdev, info); + + if (device_create_file(dev, &dev_attr_dispregs)) + dev_err(dev, "Can't create sysfs regdump file\n"); + + if (par->type == BT_CARMINE) + outreg(ctrl, GC_CTRL_INT_MASK, GC_CARMINE_INT_EN); + else + outreg(host, GC_IMASK, GC_INT_EN); + + return 0; + +rel_cmap: + fb_dealloc_cmap(&info->cmap); +free_irq: + free_irq(par->irq, (void *)par); +io_unmap: + iounmap(par->mmio_base); +fb_unmap: + iounmap(par->fb_base); +rel_reg: + pci_release_regions(pdev); +rel_fb: + framebuffer_release(info); +dis_dev: + pci_disable_device(pdev); +out: + return ret; +} + +static void __devexit mb862xx_pci_remove(struct pci_dev *pdev) +{ + struct fb_info *fbi = pci_get_drvdata(pdev); + struct mb862xxfb_par *par = fbi->par; + unsigned long reg; + + dev_dbg(fbi->dev, "%s release\n", fbi->fix.id); + + /* display off */ + reg = inreg(disp, GC_DCM1); + reg &= ~(GC_DCM01_DEN | GC_DCM01_L0E); + outreg(disp, GC_DCM1, reg); + + if (par->type == BT_CARMINE) { + outreg(ctrl, GC_CTRL_INT_MASK, 0); + outreg(ctrl, GC_CTRL_CLK_ENABLE, 0); + } else { + outreg(host, GC_IMASK, 0); + } + + device_remove_file(&pdev->dev, &dev_attr_dispregs); + + pci_set_drvdata(pdev, NULL); + unregister_framebuffer(fbi); + fb_dealloc_cmap(&fbi->cmap); + + free_irq(par->irq, (void *)par); + iounmap(par->mmio_base); + iounmap(par->fb_base); + + pci_release_regions(pdev); + framebuffer_release(fbi); + pci_disable_device(pdev); +} + +static struct pci_driver mb862xxfb_pci_driver = { + .name = DRV_NAME, + .id_table = mb862xx_pci_tbl, + .probe = mb862xx_pci_probe, + .remove = __devexit_p(mb862xx_pci_remove), +}; +#endif + +static int __devinit mb862xxfb_init(void) +{ + int ret = -ENODEV; + +#if defined(CONFIG_FB_MB862XX_LIME) + ret = of_register_platform_driver(&of_platform_mb862xxfb_driver); +#endif +#if defined(CONFIG_FB_MB862XX_PCI_GDC) + ret = pci_register_driver(&mb862xxfb_pci_driver); +#endif + return ret; +} + +static void __exit mb862xxfb_exit(void) +{ +#if defined(CONFIG_FB_MB862XX_LIME) + of_unregister_platform_driver(&of_platform_mb862xxfb_driver); +#endif +#if defined(CONFIG_FB_MB862XX_PCI_GDC) + pci_unregister_driver(&mb862xxfb_pci_driver); +#endif +} + +module_init(mb862xxfb_init); +module_exit(mb862xxfb_exit); + +MODULE_DESCRIPTION("Fujitsu MB862xx Framebuffer driver"); +MODULE_AUTHOR("Anatolij Gustschin "); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/video/mb862xx/mb862xxfb.h b/drivers/video/mb862xx/mb862xxfb.h new file mode 100644 index 00000000000..c4c8f4dd221 --- /dev/null +++ b/drivers/video/mb862xx/mb862xxfb.h @@ -0,0 +1,83 @@ +#ifndef __MB862XX_H__ +#define __MB862XX_H__ + +#define PCI_VENDOR_ID_FUJITSU_LIMITED 0x10cf +#define PCI_DEVICE_ID_FUJITSU_CORALP 0x2019 +#define PCI_DEVICE_ID_FUJITSU_CORALPA 0x201e +#define PCI_DEVICE_ID_FUJITSU_CARMINE 0x202b + +#define GC_MMR_CORALP_EVB_VAL 0x11d7fa13 + +enum gdctype { + BT_NONE, + BT_LIME, + BT_MINT, + BT_CORAL, + BT_CORALP, + BT_CARMINE, +}; + +struct mb862xx_gc_mode { + struct fb_videomode def_mode; /* mode of connected display */ + unsigned int def_bpp; /* default depth */ + unsigned long max_vram; /* connected SDRAM size */ + unsigned long ccf; /* gdc clk */ + unsigned long mmr; /* memory mode for SDRAM */ +}; + +/* private data */ +struct mb862xxfb_par { + struct fb_info *info; /* fb info head */ + struct device *dev; + struct pci_dev *pdev; + struct resource *res; /* framebuffer/mmio resource */ + + resource_size_t fb_base_phys; /* fb base, 36-bit PPC440EPx */ + resource_size_t mmio_base_phys; /* io base addr */ + void __iomem *fb_base; /* remapped framebuffer */ + void __iomem *mmio_base; /* remapped registers */ + size_t mapped_vram; /* length of remapped vram */ + size_t mmio_len; /* length of register region */ + + void __iomem *host; /* relocatable reg. bases */ + void __iomem *i2c; + void __iomem *disp; + void __iomem *disp1; + void __iomem *cap; + void __iomem *cap1; + void __iomem *draw; + void __iomem *geo; + void __iomem *pio; + void __iomem *ctrl; + void __iomem *dram_ctrl; + void __iomem *wrback; + + unsigned int irq; + unsigned int type; /* GDC type */ + unsigned int refclk; /* disp. reference clock */ + struct mb862xx_gc_mode *gc_mode; /* GDC mode init data */ + int pre_init; /* don't init display if 1 */ + + u32 pseudo_palette[16]; +}; + +#if defined(CONFIG_FB_MB862XX_LIME) && defined(CONFIG_FB_MB862XX_PCI_GDC) +#error "Select Lime GDC or CoralP/Carmine support, but not both together" +#endif +#if defined(CONFIG_FB_MB862XX_LIME) +#define gdc_read __raw_readl +#define gdc_write __raw_writel +#else +#define gdc_read readl +#define gdc_write writel +#endif + +#define inreg(type, off) \ + gdc_read((par->type + (off))) + +#define outreg(type, off, val) \ + gdc_write((val), (par->type + (off))) + +#define pack(a, b) (((a) << 16) | (b)) + +#endif -- cgit v1.2.3-18-g5258 From 0aedadf91a70a11c4a3e7c7d99b21e5528af8d5d Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Thu, 6 Nov 2008 12:53:30 -0800 Subject: mm: move migrate_prep out from under mmap_sem Move the migrate_prep outside the mmap_sem for the following system calls 1. sys_move_pages 2. sys_migrate_pages 3. sys_mbind() It really does not matter when we flush the lru. The system is free to add pages onto the lru even during migration which will make the page migration either skip the page (mbind, migrate_pages) or return a busy state (move_pages). Fixes this lockdep warning (and potential deadlock): Some VM place has mmap_sem -> kevent_wq via lru_add_drain_all() net/core/dev.c::dev_ioctl() has rtnl_lock -> mmap_sem (*) the ioctl has copy_from_user() and it can do page fault. linkwatch_event has kevent_wq -> rtnl_lock Signed-off-by: Christoph Lameter Cc: KOSAKI Motohiro Reported-by: Heiko Carstens Cc: Nick Piggin Cc: Hugh Dickins Cc: Rik van Riel Cc: Lee Schermerhorn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mempolicy.c | 18 +++++++++++------- mm/migrate.c | 2 +- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 36f42573a33..e9493b1c111 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -489,12 +489,6 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end, int err; struct vm_area_struct *first, *vma, *prev; - if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { - - err = migrate_prep(); - if (err) - return ERR_PTR(err); - } first = find_vma(mm, start); if (!first) @@ -809,9 +803,13 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags) { int busy = 0; - int err = 0; + int err; nodemask_t tmp; + err = migrate_prep(); + if (err) + return err; + down_read(&mm->mmap_sem); err = migrate_vmas(mm, from_nodes, to_nodes, flags); @@ -974,6 +972,12 @@ static long do_mbind(unsigned long start, unsigned long len, start, start + len, mode, mode_flags, nmask ? nodes_addr(*nmask)[0] : -1); + if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { + + err = migrate_prep(); + if (err) + return err; + } down_write(&mm->mmap_sem); vma = check_range(mm, start, end, nmask, flags | MPOL_MF_INVERT, &pagelist); diff --git a/mm/migrate.c b/mm/migrate.c index 6602941bfab..385db89f0c3 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -841,12 +841,12 @@ static int do_move_page_to_node_array(struct mm_struct *mm, struct page_to_node *pp; LIST_HEAD(pagelist); + migrate_prep(); down_read(&mm->mmap_sem); /* * Build a list of pages to migrate */ - migrate_prep(); for (pp = pm; pp->node != MAX_NUMNODES; pp++) { struct vm_area_struct *vma; struct page *page; -- cgit v1.2.3-18-g5258 From b41ad14c30acf023d09ac064096a4cf41248ce46 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Thu, 6 Nov 2008 12:53:31 -0800 Subject: vmemmap: warn about page_structs with remote distance It's insufficient to simply compare node ids when warning about offnode page_structs since it's possible to still have local affinity. Acked-by: Christoph Lameter Signed-off-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/sparse-vmemmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index a91b5f8fcaf..a13ea6401ae 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -64,7 +64,7 @@ void __meminit vmemmap_verify(pte_t *pte, int node, unsigned long pfn = pte_pfn(*pte); int actual_node = early_pfn_to_nid(pfn); - if (actual_node != node) + if (node_distance(actual_node, node) > LOCAL_DISTANCE) printk(KERN_WARNING "[%lx-%lx] potential offnode " "page_structs\n", start, end - 1); } -- cgit v1.2.3-18-g5258 From 24eb089950ce44603b30a3145a2c8520e2b55bb1 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 6 Nov 2008 12:53:32 -0800 Subject: cgroups: fix invalid cgrp->dentry before cgroup has been completely removed This fixes an oops when reading /proc/sched_debug. A cgroup won't be removed completely until finishing cgroup_diput(), so we shouldn't invalidate cgrp->dentry in cgroup_rmdir(). Otherwise, when a group is being removed while cgroup_path() gets called, we may trigger NULL dereference BUG. The bug can be reproduced: # cat test.sh #!/bin/sh mount -t cgroup -o cpu xxx /mnt for (( ; ; )) { mkdir /mnt/sub rmdir /mnt/sub } # ./test.sh & # cat /proc/sched_debug BUG: unable to handle kernel NULL pointer dereference at 00000038 IP: [] cgroup_path+0x39/0x90 ... Call Trace: [] ? print_cfs_rq+0x6e/0x75d [] ? sched_debug_show+0x72d/0xc1e ... Signed-off-by: Li Zefan Acked-by: Paul Menage Cc: Peter Zijlstra Cc: Ingo Molnar Cc: [2.6.26.x, 2.6.27.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cgroup.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 35eebd5510c..358e77564e6 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2497,7 +2497,6 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) list_del(&cgrp->sibling); spin_lock(&cgrp->dentry->d_lock); d = dget(cgrp->dentry); - cgrp->dentry = NULL; spin_unlock(&d->d_lock); cgroup_d_remove_dir(d); -- cgit v1.2.3-18-g5258 From 1b6bcdbe7eaacde19b5d633b33c8d056e4818de0 Mon Sep 17 00:00:00 2001 From: Tim Hockin Date: Thu, 6 Nov 2008 12:53:33 -0800 Subject: Documentation/email-clients.txt: add some info about gmail Signed-off-by: Tim Hockin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/email-clients.txt | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/Documentation/email-clients.txt b/Documentation/email-clients.txt index 2ebb94d6ed8..a618efab7b1 100644 --- a/Documentation/email-clients.txt +++ b/Documentation/email-clients.txt @@ -213,4 +213,29 @@ TkRat (GUI) Works. Use "Insert file..." or external editor. +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Gmail (Web GUI) + +If you just have to use Gmail to send patches, it CAN be made to work. It +requires a bit of external help, though. + +The first problem is that Gmail converts tabs to spaces. This will +totally break your patches. To prevent this, you have to use a different +editor. There is a firefox extension called "ViewSourceWith" +(https://addons.mozilla.org/en-US/firefox/addon/394) which allows you to +edit any text box in the editor of your choice. Configure it to launch +your favorite editor. When you want to send a patch, use this technique. +Once you have crafted your messsage + patch, save and exit the editor, +which should reload the Gmail edit box. GMAIL WILL PRESERVE THE TABS. +Hoorah. Apparently you can cut-n-paste literal tabs, but Gmail will +convert those to spaces upon sending! + +The second problem is that Gmail converts tabs to spaces on replies. If +you reply to a patch, don't expect to be able to apply it as a patch. + +The last problem is that Gmail will base64-encode any message that has a +non-ASCII character. That includes things like European names. Be aware. + +Gmail is not convenient for lkml patches, but CAN be made to work. + ### -- cgit v1.2.3-18-g5258 From fd96feb2583688ad13d8467ded442f9c8d73cc4b Mon Sep 17 00:00:00 2001 From: dann frazier Date: Thu, 6 Nov 2008 12:53:34 -0800 Subject: cciss: add P700m to list of supported controllers P700m support was added in: 9cff3b383dad193b0762c27278a16237e10b53dc Update cciss.txt to match. Signed-off-by: dann frazier Acked-by: Mike Miller Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/cciss.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/cciss.txt b/Documentation/cciss.txt index 48d80d95f0f..89698e8df7d 100644 --- a/Documentation/cciss.txt +++ b/Documentation/cciss.txt @@ -21,6 +21,7 @@ This driver is known to work with the following cards: * SA E200 * SA E200i * SA E500 + * SA P700m * SA P212 * SA P410 * SA P410i -- cgit v1.2.3-18-g5258 From fbdd12676c83df77480f00ebd32fc98fbe3bf836 Mon Sep 17 00:00:00 2001 From: Qinghuang Feng Date: Thu, 6 Nov 2008 12:53:34 -0800 Subject: mm/oom_kill.c: fix badness() kerneldoc Paramter @mem has been removed since v2.6.26, now delete it's comment. Signed-off-by: Qinghuang Feng Acked-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/oom_kill.c | 1 - 1 file changed, 1 deletion(-) diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 2846a58e5de..a0a01902f55 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -38,7 +38,6 @@ static DEFINE_SPINLOCK(zone_scan_mutex); * badness - calculate a numeric value for how bad this task has been * @p: task struct of which task we should calculate * @uptime: current uptime in seconds - * @mem: target memory controller * * The formula used is relatively simple and documented inline in the * function. The main rationale is that we want to select a good task -- cgit v1.2.3-18-g5258 From c87591b719737b4e91eb1a9fa8fd55a4ff1886d6 Mon Sep 17 00:00:00 2001 From: Arthur Jones Date: Thu, 6 Nov 2008 12:53:35 -0800 Subject: ext3: wait on all pending commits in ext3_sync_fs In ext3_sync_fs, we only wait for a commit to finish if we started it, but there may be one already in progress which will not be synced. In the case of a data=ordered umount with pending long symlinks which are delayed due to a long list of other I/O on the backing block device, this causes the buffer associated with the long symlinks to not be moved to the inode dirty list in the second phase of fsync_super. Then, before they can be dirtied again, kjournald exits, seeing the UMOUNT flag and the dirty pages are never written to the backing block device, causing long symlink corruption and exposing new or previously freed block data to userspace. This can be reproduced with a script created by Eric Sandeen : #!/bin/bash umount /mnt/test2 mount /dev/sdb4 /mnt/test2 rm -f /mnt/test2/* dd if=/dev/zero of=/mnt/test2/bigfile bs=1M count=512 touch /mnt/test2/thisisveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryverylongfilename ln -s /mnt/test2/thisisveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryverylongfilename /mnt/test2/link umount /mnt/test2 mount /dev/sdb4 /mnt/test2 ls /mnt/test2/ umount /mnt/test2 To ensure all commits are synced, we flush all journal commits now when sync_fs'ing ext3. Signed-off-by: Arthur Jones Cc: Eric Sandeen Cc: Theodore Ts'o Cc: Cc: [2.6.everything] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext3/super.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/ext3/super.c b/fs/ext3/super.c index e5717a4fae6..5dec6d1356c 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -2390,13 +2390,12 @@ static void ext3_write_super (struct super_block * sb) static int ext3_sync_fs(struct super_block *sb, int wait) { - tid_t target; - sb->s_dirt = 0; - if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) { - if (wait) - log_wait_commit(EXT3_SB(sb)->s_journal, target); - } + if (wait) + ext3_force_commit(sb); + else + journal_start_commit(EXT3_SB(sb)->s_journal, NULL); + return 0; } -- cgit v1.2.3-18-g5258 From a70dcb969f64e2fa98c24f47854f20bf02ff0092 Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Thu, 6 Nov 2008 12:53:36 -0800 Subject: memory hotplug: fix page_zone() calculation in test_pages_isolated() My last bugfix here (adding zone->lock) introduced a new problem: Using page_zone(pfn_to_page(pfn)) to get the zone after the for() loop is wrong. pfn will then be >= end_pfn, which may be in a different zone or not present at all. This may lead to an addressing exception in page_zone() or spin_lock_irqsave(). Now I use __first_valid_page() again after the loop to find a valid page for page_zone(). Signed-off-by: Gerald Schaefer Acked-by: Nathan Fontenot Reviewed-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_isolation.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mm/page_isolation.c b/mm/page_isolation.c index b70a7fec1ff..5e0ffd96745 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -130,10 +130,11 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn) if (page && get_pageblock_migratetype(page) != MIGRATE_ISOLATE) break; } - if (pfn < end_pfn) + page = __first_valid_page(start_pfn, end_pfn - start_pfn); + if ((pfn < end_pfn) || !page) return -EBUSY; /* Check all pages are free or Marked as ISOLATED */ - zone = page_zone(pfn_to_page(pfn)); + zone = page_zone(page); spin_lock_irqsave(&zone->lock, flags); ret = __test_page_isolated_in_pageblock(start_pfn, end_pfn); spin_unlock_irqrestore(&zone->lock, flags); -- cgit v1.2.3-18-g5258 From a684e7d33096892093456dd56a582cfc3bfad648 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 6 Nov 2008 12:53:37 -0800 Subject: fbdev: fix fb_compat_ioctl() deadlocks commit 3e680aae4e53ab54cdbb0c29257dae0cbb158e1c ("fb: convert lock/unlock_kernel() into local fb mutex") introduced several deadlocks in the fb_compat_ioctl() path, as mutex_lock() doesn't allow recursion, unlike lock_kernel(). This broke frame buffer applications on 64-bit systems with a 32-bit userland. commit 120a37470c2831fea49fdebaceb5a7039f700ce6 ("framebuffer compat_ioctl deadlock") fixed one of the deadlocks. This patch fixes the remaining deadlocks: - Revert commit 120a37470c2831fea49fdebaceb5a7039f700ce6, - Extract the core logic of fb_ioctl() into a new function do_fb_ioctl(), - Change all callsites of fb_ioctl() where info->lock is already held to call do_fb_ioctl() instead, - Add sparse annotations to all routines that take info->lock. Signed-off-by: Geert Uytterhoeven Cc: Mikulas Patocka Cc: Krzysztof Helt Cc: Alan Cox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/fbmem.c | 63 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 39 insertions(+), 24 deletions(-) diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c index 6048b55f287..1d5ae39cb27 100644 --- a/drivers/video/fbmem.c +++ b/drivers/video/fbmem.c @@ -1002,13 +1002,9 @@ fb_blank(struct fb_info *info, int blank) return ret; } -static long -fb_ioctl(struct file *file, unsigned int cmd, - unsigned long arg) +static long do_fb_ioctl(struct fb_info *info, unsigned int cmd, + unsigned long arg) { - struct inode *inode = file->f_path.dentry->d_inode; - int fbidx = iminor(inode); - struct fb_info *info; struct fb_ops *fb; struct fb_var_screeninfo var; struct fb_fix_screeninfo fix; @@ -1018,14 +1014,10 @@ fb_ioctl(struct file *file, unsigned int cmd, void __user *argp = (void __user *)arg; long ret = 0; - info = registered_fb[fbidx]; - mutex_lock(&info->lock); fb = info->fbops; - - if (!fb) { - mutex_unlock(&info->lock); + if (!fb) return -ENODEV; - } + switch (cmd) { case FBIOGET_VSCREENINFO: ret = copy_to_user(argp, &info->var, @@ -1126,6 +1118,21 @@ fb_ioctl(struct file *file, unsigned int cmd, else ret = fb->fb_ioctl(info, cmd, arg); } + return ret; +} + +static long fb_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +__acquires(&info->lock) +__releases(&info->lock) +{ + struct inode *inode = file->f_path.dentry->d_inode; + int fbidx = iminor(inode); + struct fb_info *info; + long ret; + + info = registered_fb[fbidx]; + mutex_lock(&info->lock); + ret = do_fb_ioctl(info, cmd, arg); mutex_unlock(&info->lock); return ret; } @@ -1157,8 +1164,8 @@ struct fb_cmap32 { compat_caddr_t transp; }; -static int fb_getput_cmap(struct inode *inode, struct file *file, - unsigned int cmd, unsigned long arg) +static int fb_getput_cmap(struct fb_info *info, unsigned int cmd, + unsigned long arg) { struct fb_cmap_user __user *cmap; struct fb_cmap32 __user *cmap32; @@ -1181,7 +1188,7 @@ static int fb_getput_cmap(struct inode *inode, struct file *file, put_user(compat_ptr(data), &cmap->transp)) return -EFAULT; - err = fb_ioctl(file, cmd, (unsigned long) cmap); + err = do_fb_ioctl(info, cmd, (unsigned long) cmap); if (!err) { if (copy_in_user(&cmap32->start, @@ -1223,8 +1230,8 @@ static int do_fscreeninfo_to_user(struct fb_fix_screeninfo *fix, return err; } -static int fb_get_fscreeninfo(struct inode *inode, struct file *file, - unsigned int cmd, unsigned long arg) +static int fb_get_fscreeninfo(struct fb_info *info, unsigned int cmd, + unsigned long arg) { mm_segment_t old_fs; struct fb_fix_screeninfo fix; @@ -1235,7 +1242,7 @@ static int fb_get_fscreeninfo(struct inode *inode, struct file *file, old_fs = get_fs(); set_fs(KERNEL_DS); - err = fb_ioctl(file, cmd, (unsigned long) &fix); + err = do_fb_ioctl(info, cmd, (unsigned long) &fix); set_fs(old_fs); if (!err) @@ -1244,8 +1251,10 @@ static int fb_get_fscreeninfo(struct inode *inode, struct file *file, return err; } -static long -fb_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +static long fb_compat_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +__acquires(&info->lock) +__releases(&info->lock) { struct inode *inode = file->f_path.dentry->d_inode; int fbidx = iminor(inode); @@ -1262,16 +1271,16 @@ fb_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case FBIOPUT_CON2FBMAP: arg = (unsigned long) compat_ptr(arg); case FBIOBLANK: - mutex_unlock(&info->lock); - return fb_ioctl(file, cmd, arg); + ret = do_fb_ioctl(info, cmd, arg); + break; case FBIOGET_FSCREENINFO: - ret = fb_get_fscreeninfo(inode, file, cmd, arg); + ret = fb_get_fscreeninfo(info, cmd, arg); break; case FBIOGETCMAP: case FBIOPUTCMAP: - ret = fb_getput_cmap(inode, file, cmd, arg); + ret = fb_getput_cmap(info, cmd, arg); break; default: @@ -1286,6 +1295,8 @@ fb_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) static int fb_mmap(struct file *file, struct vm_area_struct * vma) +__acquires(&info->lock) +__releases(&info->lock) { int fbidx = iminor(file->f_path.dentry->d_inode); struct fb_info *info = registered_fb[fbidx]; @@ -1339,6 +1350,8 @@ fb_mmap(struct file *file, struct vm_area_struct * vma) static int fb_open(struct inode *inode, struct file *file) +__acquires(&info->lock) +__releases(&info->lock) { int fbidx = iminor(inode); struct fb_info *info; @@ -1374,6 +1387,8 @@ out: static int fb_release(struct inode *inode, struct file *file) +__acquires(&info->lock) +__releases(&info->lock) { struct fb_info * const info = file->private_data; -- cgit v1.2.3-18-g5258 From b225d44e27521290faca2e0f9b1a4a8c74dc510a Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 6 Nov 2008 12:53:39 -0800 Subject: Documentation/kernel-parameters.txt: update 'isolcpus' kernel option cpuset can be used to move a process onto or off an isolated CPU. Signed-off-by: Li Zefan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/kernel-parameters.txt | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 1bbcaa8982b..8a8cb0ccc5f 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -995,13 +995,15 @@ and is between 256 and 4096 characters. It is defined in the file Format: ,..., or - - (must be a positive range in ascending order) + - + (must be a positive range in ascending order) or a mixture ,...,- + This option can be used to specify one or more CPUs to isolate from the general SMP balancing and scheduling - algorithms. The only way to move a process onto or off - an "isolated" CPU is via the CPU affinity syscalls. + algorithms. You can move a process onto or off an + "isolated" CPU via the CPU affinity syscalls or cpuset. begins at 0 and the maximum value is "number of CPUs in system - 1". -- cgit v1.2.3-18-g5258 From 06a7f058761cd232cab42d5c7da82f7255b51d5b Mon Sep 17 00:00:00 2001 From: David Brownell Date: Thu, 6 Nov 2008 12:53:40 -0800 Subject: atmel_serial: keep clock off when it's not needed The atmel_serial driver is mismanaging its clock by leaving it on at all times ... the whole point of clock management is to leave it off unless it's actively needed, which conserves power!! Although the kernel doesn't actually hang without my fix, it does discard quite a lot of early console output. The result still looks correct: usart users= 1 on 35000000 Hz, for atmel_usart.0 usart users= 0 off 35000000 Hz, for atmel_usart.2 when using ttyS0 as serial console. [haavard.skinnemoen@atmel.com: Make sure clock is enabled early for console] Signed-off-by: David Brownell Signed-off-by: Haavard Skinnemoen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/serial/atmel_serial.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/serial/atmel_serial.c b/drivers/serial/atmel_serial.c index 61fb8b6d19a..d5efd6c7790 100644 --- a/drivers/serial/atmel_serial.c +++ b/drivers/serial/atmel_serial.c @@ -1258,6 +1258,8 @@ static void __devinit atmel_init_port(struct atmel_uart_port *atmel_port, atmel_port->clk = clk_get(&pdev->dev, "usart"); clk_enable(atmel_port->clk); port->uartclk = clk_get_rate(atmel_port->clk); + clk_disable(atmel_port->clk); + /* only enable clock when USART is in use */ } atmel_port->use_dma_rx = data->use_dma_rx; @@ -1379,6 +1381,8 @@ static int __init atmel_console_setup(struct console *co, char *options) return -ENODEV; } + clk_enable(atmel_ports[co->index].clk); + UART_PUT_IDR(port, -1); UART_PUT_CR(port, ATMEL_US_RSTSTA | ATMEL_US_RSTRX); UART_PUT_CR(port, ATMEL_US_TXEN | ATMEL_US_RXEN); @@ -1403,7 +1407,7 @@ static struct console atmel_console = { .data = &atmel_uart, }; -#define ATMEL_CONSOLE_DEVICE &atmel_console +#define ATMEL_CONSOLE_DEVICE (&atmel_console) /* * Early console initialization (before VM subsystem initialized). @@ -1534,6 +1538,15 @@ static int __devinit atmel_serial_probe(struct platform_device *pdev) if (ret) goto err_add_port; + if (atmel_is_console_port(&port->uart) + && ATMEL_CONSOLE_DEVICE->flags & CON_ENABLED) { + /* + * The serial core enabled the clock for us, so undo + * the clk_enable() in atmel_console_setup() + */ + clk_disable(port->clk); + } + device_init_wakeup(&pdev->dev, 1); platform_set_drvdata(pdev, port); @@ -1544,7 +1557,6 @@ err_add_port: port->rx_ring.buf = NULL; err_alloc_ring: if (!atmel_is_console_port(&port->uart)) { - clk_disable(port->clk); clk_put(port->clk); port->clk = NULL; } @@ -1568,7 +1580,6 @@ static int __devexit atmel_serial_remove(struct platform_device *pdev) /* "port" is allocated statically, so we shouldn't free it */ - clk_disable(atmel_port->clk); clk_put(atmel_port->clk); return ret; -- cgit v1.2.3-18-g5258 From 80bb26d4062657c52862d1b112beead47ff9b793 Mon Sep 17 00:00:00 2001 From: Frans Pop Date: Thu, 6 Nov 2008 12:53:41 -0800 Subject: rtc-cmos: fix boot log message -rtc0: alarms up to one month, y3k, 114 bytes nvram, , hpet irqs irqs +rtc0: alarms up to one month, y3k, 114 bytes nvram, hpet irqs Signed-off-by: Frans Pop Cc: David Brownell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rtc/rtc-cmos.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index 5549231179a..6cf8e282338 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -794,7 +794,7 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq) goto cleanup2; } - pr_info("%s: alarms up to one %s%s, %zd bytes nvram, %s irqs\n", + pr_info("%s: alarms up to one %s%s, %zd bytes nvram%s\n", cmos_rtc.rtc->dev.bus_id, is_valid_irq(rtc_irq) ? (cmos_rtc.mon_alrm -- cgit v1.2.3-18-g5258 From c1dfda399ace020126547e7d454ba94edc8c8797 Mon Sep 17 00:00:00 2001 From: Andrew Victor Date: Thu, 6 Nov 2008 12:53:42 -0800 Subject: SAM9 watchdog: update for moved headers The architecture header files were recently moved from include/asm-arm/mach-at91/ to arch/arm/mach-at91/include/mach/. The SAM9 watchdog driver still includes a header from the old location. Signed-off-by: Andrew Victor Cc: Wim Van Sebroeck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/watchdog/at91sam9_wdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/watchdog/at91sam9_wdt.c b/drivers/watchdog/at91sam9_wdt.c index b4babfc3158..b1da287f90e 100644 --- a/drivers/watchdog/at91sam9_wdt.c +++ b/drivers/watchdog/at91sam9_wdt.c @@ -30,7 +30,7 @@ #include #include -#include +#include #define DRV_NAME "AT91SAM9 Watchdog" -- cgit v1.2.3-18-g5258 From 8986ab59631b1f2e82ac820c8fd76a34462915ca Mon Sep 17 00:00:00 2001 From: Bart Trojanowski Date: Thu, 6 Nov 2008 12:53:44 -0800 Subject: fat: document additional vfat mount options While debugging a sync mount regression on vfat I noticed that there were mount options parsed by the driver that were not documented. [hirofumi@mail.parknet.co.jp: fix some parts] Signed-off-by: Bart Trojanowski Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/vfat.txt | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/Documentation/filesystems/vfat.txt b/Documentation/filesystems/vfat.txt index bbac4f1d905..dc9dc73d7d3 100644 --- a/Documentation/filesystems/vfat.txt +++ b/Documentation/filesystems/vfat.txt @@ -8,6 +8,12 @@ if you want to format from within Linux. VFAT MOUNT OPTIONS ---------------------------------------------------------------------- +uid=### -- Set the owner of all files on this filesystem. + The default is the uid of current process. + +gid=### -- Set the group of all files on this filesystem. + The default is the gid of current process. + umask=### -- The permission mask (for files and directories, see umask(1)). The default is the umask of current process. @@ -36,7 +42,7 @@ codepage=### -- Sets the codepage number for converting to shortname characters on FAT filesystem. By default, FAT_DEFAULT_CODEPAGE setting is used. -iocharset=name -- Character set to use for converting between the +iocharset= -- Character set to use for converting between the encoding is used for user visible filename and 16 bit Unicode characters. Long filenames are stored on disk in Unicode format, but Unix for the most part doesn't @@ -86,6 +92,8 @@ check=s|r|n -- Case sensitivity checking setting. r: relaxed, case insensitive n: normal, default setting, currently case insensitive +nocase -- This was deprecated for vfat. Use shortname=win95 instead. + shortname=lower|win95|winnt|mixed -- Shortname display/create setting. lower: convert to lowercase for display, @@ -99,11 +107,23 @@ shortname=lower|win95|winnt|mixed tz=UTC -- Interpret timestamps as UTC rather than local time. This option disables the conversion of timestamps between local time (as used by Windows on FAT) and UTC - (which Linux uses internally). This is particuluarly + (which Linux uses internally). This is particularly useful when mounting devices (like digital cameras) that are set to UTC in order to avoid the pitfalls of local time. +showexec -- If set, the execute permission bits of the file will be + allowed only if the extension part of the name is .EXE, + .COM, or .BAT. Not set by default. + +debug -- Can be set, but unused by the current implementation. + +sys_immutable -- If set, ATTR_SYS attribute on FAT is handled as + IMMUTABLE flag on Linux. Not set by default. + +flush -- If set, the filesystem will try to flush to disk more + early than normal. Not set by default. + : 0,1,yes,no,true,false TODO -- cgit v1.2.3-18-g5258 From 990e194e69009028e029b7d25da68c38241ec4f0 Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 6 Nov 2008 12:53:45 -0800 Subject: fat: move fs/vfat/* and fs/msdos/* to fs/fat This just moves those files, but change link order from MSDOS, VFAT to VFAT, MSDOS. Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/Makefile | 2 - fs/fat/Makefile | 6 +- fs/fat/namei_msdos.c | 702 +++++++++++++++++++++++++++++++++ fs/fat/namei_vfat.c | 1055 ++++++++++++++++++++++++++++++++++++++++++++++++++ fs/msdos/Makefile | 7 - fs/msdos/namei.c | 702 --------------------------------- fs/vfat/Makefile | 7 - fs/vfat/namei.c | 1055 -------------------------------------------------- 8 files changed, 1762 insertions(+), 1774 deletions(-) create mode 100644 fs/fat/namei_msdos.c create mode 100644 fs/fat/namei_vfat.c delete mode 100644 fs/msdos/Makefile delete mode 100644 fs/msdos/namei.c delete mode 100644 fs/vfat/Makefile delete mode 100644 fs/vfat/namei.c diff --git a/fs/Makefile b/fs/Makefile index 2168c902d5c..d9f8afe6f0c 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -81,8 +81,6 @@ obj-$(CONFIG_HUGETLBFS) += hugetlbfs/ obj-$(CONFIG_CODA_FS) += coda/ obj-$(CONFIG_MINIX_FS) += minix/ obj-$(CONFIG_FAT_FS) += fat/ -obj-$(CONFIG_MSDOS_FS) += msdos/ -obj-$(CONFIG_VFAT_FS) += vfat/ obj-$(CONFIG_BFS_FS) += bfs/ obj-$(CONFIG_ISO9660_FS) += isofs/ obj-$(CONFIG_HFSPLUS_FS) += hfsplus/ # Before hfs to find wrapped HFS+ diff --git a/fs/fat/Makefile b/fs/fat/Makefile index bfb5f06cf2c..e06190322c1 100644 --- a/fs/fat/Makefile +++ b/fs/fat/Makefile @@ -3,5 +3,9 @@ # obj-$(CONFIG_FAT_FS) += fat.o +obj-$(CONFIG_VFAT_FS) += vfat.o +obj-$(CONFIG_MSDOS_FS) += msdos.o -fat-objs := cache.o dir.o fatent.o file.o inode.o misc.o +fat-y := cache.o dir.o fatent.o file.o inode.o misc.o +vfat-y := namei_vfat.o +msdos-y := namei_msdos.o diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c new file mode 100644 index 00000000000..e844b9809d2 --- /dev/null +++ b/fs/fat/namei_msdos.c @@ -0,0 +1,702 @@ +/* + * linux/fs/msdos/namei.c + * + * Written 1992,1993 by Werner Almesberger + * Hidden files 1995 by Albert Cahalan + * Rewritten for constant inumbers 1999 by Al Viro + */ + +#include +#include +#include +#include +#include + +/* Characters that are undesirable in an MS-DOS file name */ +static unsigned char bad_chars[] = "*?<>|\""; +static unsigned char bad_if_strict[] = "+=,; "; + +/***** Formats an MS-DOS file name. Rejects invalid names. */ +static int msdos_format_name(const unsigned char *name, int len, + unsigned char *res, struct fat_mount_options *opts) + /* + * name is the proposed name, len is its length, res is + * the resulting name, opts->name_check is either (r)elaxed, + * (n)ormal or (s)trict, opts->dotsOK allows dots at the + * beginning of name (for hidden files) + */ +{ + unsigned char *walk; + unsigned char c; + int space; + + if (name[0] == '.') { /* dotfile because . and .. already done */ + if (opts->dotsOK) { + /* Get rid of dot - test for it elsewhere */ + name++; + len--; + } else + return -EINVAL; + } + /* + * disallow names that _really_ start with a dot + */ + space = 1; + c = 0; + for (walk = res; len && walk - res < 8; walk++) { + c = *name++; + len--; + if (opts->name_check != 'r' && strchr(bad_chars, c)) + return -EINVAL; + if (opts->name_check == 's' && strchr(bad_if_strict, c)) + return -EINVAL; + if (c >= 'A' && c <= 'Z' && opts->name_check == 's') + return -EINVAL; + if (c < ' ' || c == ':' || c == '\\') + return -EINVAL; + /* + * 0xE5 is legal as a first character, but we must substitute + * 0x05 because 0xE5 marks deleted files. Yes, DOS really + * does this. + * It seems that Microsoft hacked DOS to support non-US + * characters after the 0xE5 character was already in use to + * mark deleted files. + */ + if ((res == walk) && (c == 0xE5)) + c = 0x05; + if (c == '.') + break; + space = (c == ' '); + *walk = (!opts->nocase && c >= 'a' && c <= 'z') ? c - 32 : c; + } + if (space) + return -EINVAL; + if (opts->name_check == 's' && len && c != '.') { + c = *name++; + len--; + if (c != '.') + return -EINVAL; + } + while (c != '.' && len--) + c = *name++; + if (c == '.') { + while (walk - res < 8) + *walk++ = ' '; + while (len > 0 && walk - res < MSDOS_NAME) { + c = *name++; + len--; + if (opts->name_check != 'r' && strchr(bad_chars, c)) + return -EINVAL; + if (opts->name_check == 's' && + strchr(bad_if_strict, c)) + return -EINVAL; + if (c < ' ' || c == ':' || c == '\\') + return -EINVAL; + if (c == '.') { + if (opts->name_check == 's') + return -EINVAL; + break; + } + if (c >= 'A' && c <= 'Z' && opts->name_check == 's') + return -EINVAL; + space = c == ' '; + if (!opts->nocase && c >= 'a' && c <= 'z') + *walk++ = c - 32; + else + *walk++ = c; + } + if (space) + return -EINVAL; + if (opts->name_check == 's' && len) + return -EINVAL; + } + while (walk - res < MSDOS_NAME) + *walk++ = ' '; + + return 0; +} + +/***** Locates a directory entry. Uses unformatted name. */ +static int msdos_find(struct inode *dir, const unsigned char *name, int len, + struct fat_slot_info *sinfo) +{ + struct msdos_sb_info *sbi = MSDOS_SB(dir->i_sb); + unsigned char msdos_name[MSDOS_NAME]; + int err; + + err = msdos_format_name(name, len, msdos_name, &sbi->options); + if (err) + return -ENOENT; + + err = fat_scan(dir, msdos_name, sinfo); + if (!err && sbi->options.dotsOK) { + if (name[0] == '.') { + if (!(sinfo->de->attr & ATTR_HIDDEN)) + err = -ENOENT; + } else { + if (sinfo->de->attr & ATTR_HIDDEN) + err = -ENOENT; + } + if (err) + brelse(sinfo->bh); + } + return err; +} + +/* + * Compute the hash for the msdos name corresponding to the dentry. + * Note: if the name is invalid, we leave the hash code unchanged so + * that the existing dentry can be used. The msdos fs routines will + * return ENOENT or EINVAL as appropriate. + */ +static int msdos_hash(struct dentry *dentry, struct qstr *qstr) +{ + struct fat_mount_options *options = &MSDOS_SB(dentry->d_sb)->options; + unsigned char msdos_name[MSDOS_NAME]; + int error; + + error = msdos_format_name(qstr->name, qstr->len, msdos_name, options); + if (!error) + qstr->hash = full_name_hash(msdos_name, MSDOS_NAME); + return 0; +} + +/* + * Compare two msdos names. If either of the names are invalid, + * we fall back to doing the standard name comparison. + */ +static int msdos_cmp(struct dentry *dentry, struct qstr *a, struct qstr *b) +{ + struct fat_mount_options *options = &MSDOS_SB(dentry->d_sb)->options; + unsigned char a_msdos_name[MSDOS_NAME], b_msdos_name[MSDOS_NAME]; + int error; + + error = msdos_format_name(a->name, a->len, a_msdos_name, options); + if (error) + goto old_compare; + error = msdos_format_name(b->name, b->len, b_msdos_name, options); + if (error) + goto old_compare; + error = memcmp(a_msdos_name, b_msdos_name, MSDOS_NAME); +out: + return error; + +old_compare: + error = 1; + if (a->len == b->len) + error = memcmp(a->name, b->name, a->len); + goto out; +} + +static struct dentry_operations msdos_dentry_operations = { + .d_hash = msdos_hash, + .d_compare = msdos_cmp, +}; + +/* + * AV. Wrappers for FAT sb operations. Is it wise? + */ + +/***** Get inode using directory and name */ +static struct dentry *msdos_lookup(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) +{ + struct super_block *sb = dir->i_sb; + struct fat_slot_info sinfo; + struct inode *inode = NULL; + int res; + + dentry->d_op = &msdos_dentry_operations; + + lock_super(sb); + res = msdos_find(dir, dentry->d_name.name, dentry->d_name.len, &sinfo); + if (res == -ENOENT) + goto add; + if (res < 0) + goto out; + inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos); + brelse(sinfo.bh); + if (IS_ERR(inode)) { + res = PTR_ERR(inode); + goto out; + } +add: + res = 0; + dentry = d_splice_alias(inode, dentry); + if (dentry) + dentry->d_op = &msdos_dentry_operations; +out: + unlock_super(sb); + if (!res) + return dentry; + return ERR_PTR(res); +} + +/***** Creates a directory entry (name is already formatted). */ +static int msdos_add_entry(struct inode *dir, const unsigned char *name, + int is_dir, int is_hid, int cluster, + struct timespec *ts, struct fat_slot_info *sinfo) +{ + struct msdos_sb_info *sbi = MSDOS_SB(dir->i_sb); + struct msdos_dir_entry de; + __le16 time, date; + int err; + + memcpy(de.name, name, MSDOS_NAME); + de.attr = is_dir ? ATTR_DIR : ATTR_ARCH; + if (is_hid) + de.attr |= ATTR_HIDDEN; + de.lcase = 0; + fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc); + de.cdate = de.adate = 0; + de.ctime = 0; + de.ctime_cs = 0; + de.time = time; + de.date = date; + de.start = cpu_to_le16(cluster); + de.starthi = cpu_to_le16(cluster >> 16); + de.size = 0; + + err = fat_add_entries(dir, &de, 1, sinfo); + if (err) + return err; + + dir->i_ctime = dir->i_mtime = *ts; + if (IS_DIRSYNC(dir)) + (void)fat_sync_inode(dir); + else + mark_inode_dirty(dir); + + return 0; +} + +/***** Create a file */ +static int msdos_create(struct inode *dir, struct dentry *dentry, int mode, + struct nameidata *nd) +{ + struct super_block *sb = dir->i_sb; + struct inode *inode = NULL; + struct fat_slot_info sinfo; + struct timespec ts; + unsigned char msdos_name[MSDOS_NAME]; + int err, is_hid; + + lock_super(sb); + + err = msdos_format_name(dentry->d_name.name, dentry->d_name.len, + msdos_name, &MSDOS_SB(sb)->options); + if (err) + goto out; + is_hid = (dentry->d_name.name[0] == '.') && (msdos_name[0] != '.'); + /* Have to do it due to foo vs. .foo conflicts */ + if (!fat_scan(dir, msdos_name, &sinfo)) { + brelse(sinfo.bh); + err = -EINVAL; + goto out; + } + + ts = CURRENT_TIME_SEC; + err = msdos_add_entry(dir, msdos_name, 0, is_hid, 0, &ts, &sinfo); + if (err) + goto out; + inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos); + brelse(sinfo.bh); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto out; + } + inode->i_mtime = inode->i_atime = inode->i_ctime = ts; + /* timestamp is already written, so mark_inode_dirty() is unneeded. */ + + d_instantiate(dentry, inode); +out: + unlock_super(sb); + if (!err) + err = fat_flush_inodes(sb, dir, inode); + return err; +} + +/***** Remove a directory */ +static int msdos_rmdir(struct inode *dir, struct dentry *dentry) +{ + struct super_block *sb = dir->i_sb; + struct inode *inode = dentry->d_inode; + struct fat_slot_info sinfo; + int err; + + lock_super(sb); + /* + * Check whether the directory is not in use, then check + * whether it is empty. + */ + err = fat_dir_empty(inode); + if (err) + goto out; + err = msdos_find(dir, dentry->d_name.name, dentry->d_name.len, &sinfo); + if (err) + goto out; + + err = fat_remove_entries(dir, &sinfo); /* and releases bh */ + if (err) + goto out; + drop_nlink(dir); + + clear_nlink(inode); + inode->i_ctime = CURRENT_TIME_SEC; + fat_detach(inode); +out: + unlock_super(sb); + if (!err) + err = fat_flush_inodes(sb, dir, inode); + + return err; +} + +/***** Make a directory */ +static int msdos_mkdir(struct inode *dir, struct dentry *dentry, int mode) +{ + struct super_block *sb = dir->i_sb; + struct fat_slot_info sinfo; + struct inode *inode; + unsigned char msdos_name[MSDOS_NAME]; + struct timespec ts; + int err, is_hid, cluster; + + lock_super(sb); + + err = msdos_format_name(dentry->d_name.name, dentry->d_name.len, + msdos_name, &MSDOS_SB(sb)->options); + if (err) + goto out; + is_hid = (dentry->d_name.name[0] == '.') && (msdos_name[0] != '.'); + /* foo vs .foo situation */ + if (!fat_scan(dir, msdos_name, &sinfo)) { + brelse(sinfo.bh); + err = -EINVAL; + goto out; + } + + ts = CURRENT_TIME_SEC; + cluster = fat_alloc_new_dir(dir, &ts); + if (cluster < 0) { + err = cluster; + goto out; + } + err = msdos_add_entry(dir, msdos_name, 1, is_hid, cluster, &ts, &sinfo); + if (err) + goto out_free; + inc_nlink(dir); + + inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos); + brelse(sinfo.bh); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + /* the directory was completed, just return a error */ + goto out; + } + inode->i_nlink = 2; + inode->i_mtime = inode->i_atime = inode->i_ctime = ts; + /* timestamp is already written, so mark_inode_dirty() is unneeded. */ + + d_instantiate(dentry, inode); + + unlock_super(sb); + fat_flush_inodes(sb, dir, inode); + return 0; + +out_free: + fat_free_clusters(dir, cluster); +out: + unlock_super(sb); + return err; +} + +/***** Unlink a file */ +static int msdos_unlink(struct inode *dir, struct dentry *dentry) +{ + struct inode *inode = dentry->d_inode; + struct super_block *sb= inode->i_sb; + struct fat_slot_info sinfo; + int err; + + lock_super(sb); + err = msdos_find(dir, dentry->d_name.name, dentry->d_name.len, &sinfo); + if (err) + goto out; + + err = fat_remove_entries(dir, &sinfo); /* and releases bh */ + if (err) + goto out; + clear_nlink(inode); + inode->i_ctime = CURRENT_TIME_SEC; + fat_detach(inode); +out: + unlock_super(sb); + if (!err) + err = fat_flush_inodes(sb, dir, inode); + + return err; +} + +static int do_msdos_rename(struct inode *old_dir, unsigned char *old_name, + struct dentry *old_dentry, + struct inode *new_dir, unsigned char *new_name, + struct dentry *new_dentry, int is_hid) +{ + struct buffer_head *dotdot_bh; + struct msdos_dir_entry *dotdot_de; + struct inode *old_inode, *new_inode; + struct fat_slot_info old_sinfo, sinfo; + struct timespec ts; + loff_t dotdot_i_pos, new_i_pos; + int err, old_attrs, is_dir, update_dotdot, corrupt = 0; + + old_sinfo.bh = sinfo.bh = dotdot_bh = NULL; + old_inode = old_dentry->d_inode; + new_inode = new_dentry->d_inode; + + err = fat_scan(old_dir, old_name, &old_sinfo); + if (err) { + err = -EIO; + goto out; + } + + is_dir = S_ISDIR(old_inode->i_mode); + update_dotdot = (is_dir && old_dir != new_dir); + if (update_dotdot) { + if (fat_get_dotdot_entry(old_inode, &dotdot_bh, &dotdot_de, + &dotdot_i_pos) < 0) { + err = -EIO; + goto out; + } + } + + old_attrs = MSDOS_I(old_inode)->i_attrs; + err = fat_scan(new_dir, new_name, &sinfo); + if (!err) { + if (!new_inode) { + /* "foo" -> ".foo" case. just change the ATTR_HIDDEN */ + if (sinfo.de != old_sinfo.de) { + err = -EINVAL; + goto out; + } + if (is_hid) + MSDOS_I(old_inode)->i_attrs |= ATTR_HIDDEN; + else + MSDOS_I(old_inode)->i_attrs &= ~ATTR_HIDDEN; + if (IS_DIRSYNC(old_dir)) { + err = fat_sync_inode(old_inode); + if (err) { + MSDOS_I(old_inode)->i_attrs = old_attrs; + goto out; + } + } else + mark_inode_dirty(old_inode); + + old_dir->i_version++; + old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME_SEC; + if (IS_DIRSYNC(old_dir)) + (void)fat_sync_inode(old_dir); + else + mark_inode_dirty(old_dir); + goto out; + } + } + + ts = CURRENT_TIME_SEC; + if (new_inode) { + if (err) + goto out; + if (is_dir) { + err = fat_dir_empty(new_inode); + if (err) + goto out; + } + new_i_pos = MSDOS_I(new_inode)->i_pos; + fat_detach(new_inode); + } else { + err = msdos_add_entry(new_dir, new_name, is_dir, is_hid, 0, + &ts, &sinfo); + if (err) + goto out; + new_i_pos = sinfo.i_pos; + } + new_dir->i_version++; + + fat_detach(old_inode); + fat_attach(old_inode, new_i_pos); + if (is_hid) + MSDOS_I(old_inode)->i_attrs |= ATTR_HIDDEN; + else + MSDOS_I(old_inode)->i_attrs &= ~ATTR_HIDDEN; + if (IS_DIRSYNC(new_dir)) { + err = fat_sync_inode(old_inode); + if (err) + goto error_inode; + } else + mark_inode_dirty(old_inode); + + if (update_dotdot) { + int start = MSDOS_I(new_dir)->i_logstart; + dotdot_de->start = cpu_to_le16(start); + dotdot_de->starthi = cpu_to_le16(start >> 16); + mark_buffer_dirty(dotdot_bh); + if (IS_DIRSYNC(new_dir)) { + err = sync_dirty_buffer(dotdot_bh); + if (err) + goto error_dotdot; + } + drop_nlink(old_dir); + if (!new_inode) + inc_nlink(new_dir); + } + + err = fat_remove_entries(old_dir, &old_sinfo); /* and releases bh */ + old_sinfo.bh = NULL; + if (err) + goto error_dotdot; + old_dir->i_version++; + old_dir->i_ctime = old_dir->i_mtime = ts; + if (IS_DIRSYNC(old_dir)) + (void)fat_sync_inode(old_dir); + else + mark_inode_dirty(old_dir); + + if (new_inode) { + drop_nlink(new_inode); + if (is_dir) + drop_nlink(new_inode); + new_inode->i_ctime = ts; + } +out: + brelse(sinfo.bh); + brelse(dotdot_bh); + brelse(old_sinfo.bh); + return err; + +error_dotdot: + /* data cluster is shared, serious corruption */ + corrupt = 1; + + if (update_dotdot) { + int start = MSDOS_I(old_dir)->i_logstart; + dotdot_de->start = cpu_to_le16(start); + dotdot_de->starthi = cpu_to_le16(start >> 16); + mark_buffer_dirty(dotdot_bh); + corrupt |= sync_dirty_buffer(dotdot_bh); + } +error_inode: + fat_detach(old_inode); + fat_attach(old_inode, old_sinfo.i_pos); + MSDOS_I(old_inode)->i_attrs = old_attrs; + if (new_inode) { + fat_attach(new_inode, new_i_pos); + if (corrupt) + corrupt |= fat_sync_inode(new_inode); + } else { + /* + * If new entry was not sharing the data cluster, it + * shouldn't be serious corruption. + */ + int err2 = fat_remove_entries(new_dir, &sinfo); + if (corrupt) + corrupt |= err2; + sinfo.bh = NULL; + } + if (corrupt < 0) { + fat_fs_panic(new_dir->i_sb, + "%s: Filesystem corrupted (i_pos %lld)", + __func__, sinfo.i_pos); + } + goto out; +} + +/***** Rename, a wrapper for rename_same_dir & rename_diff_dir */ +static int msdos_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) +{ + struct super_block *sb = old_dir->i_sb; + unsigned char old_msdos_name[MSDOS_NAME], new_msdos_name[MSDOS_NAME]; + int err, is_hid; + + lock_super(sb); + + err = msdos_format_name(old_dentry->d_name.name, + old_dentry->d_name.len, old_msdos_name, + &MSDOS_SB(old_dir->i_sb)->options); + if (err) + goto out; + err = msdos_format_name(new_dentry->d_name.name, + new_dentry->d_name.len, new_msdos_name, + &MSDOS_SB(new_dir->i_sb)->options); + if (err) + goto out; + + is_hid = + (new_dentry->d_name.name[0] == '.') && (new_msdos_name[0] != '.'); + + err = do_msdos_rename(old_dir, old_msdos_name, old_dentry, + new_dir, new_msdos_name, new_dentry, is_hid); +out: + unlock_super(sb); + if (!err) + err = fat_flush_inodes(sb, old_dir, new_dir); + return err; +} + +static const struct inode_operations msdos_dir_inode_operations = { + .create = msdos_create, + .lookup = msdos_lookup, + .unlink = msdos_unlink, + .mkdir = msdos_mkdir, + .rmdir = msdos_rmdir, + .rename = msdos_rename, + .setattr = fat_setattr, + .getattr = fat_getattr, +}; + +static int msdos_fill_super(struct super_block *sb, void *data, int silent) +{ + int res; + + res = fat_fill_super(sb, data, silent, &msdos_dir_inode_operations, 0); + if (res) + return res; + + sb->s_flags |= MS_NOATIME; + sb->s_root->d_op = &msdos_dentry_operations; + return 0; +} + +static int msdos_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, + void *data, struct vfsmount *mnt) +{ + return get_sb_bdev(fs_type, flags, dev_name, data, msdos_fill_super, + mnt); +} + +static struct file_system_type msdos_fs_type = { + .owner = THIS_MODULE, + .name = "msdos", + .get_sb = msdos_get_sb, + .kill_sb = kill_block_super, + .fs_flags = FS_REQUIRES_DEV, +}; + +static int __init init_msdos_fs(void) +{ + return register_filesystem(&msdos_fs_type); +} + +static void __exit exit_msdos_fs(void) +{ + unregister_filesystem(&msdos_fs_type); +} + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Werner Almesberger"); +MODULE_DESCRIPTION("MS-DOS filesystem support"); + +module_init(init_msdos_fs) +module_exit(exit_msdos_fs) diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c new file mode 100644 index 00000000000..155c10b4adb --- /dev/null +++ b/fs/fat/namei_vfat.c @@ -0,0 +1,1055 @@ +/* + * linux/fs/vfat/namei.c + * + * Written 1992,1993 by Werner Almesberger + * + * Windows95/Windows NT compatible extended MSDOS filesystem + * by Gordon Chaffee Copyright (C) 1995. Send bug reports for the + * VFAT filesystem to . Specify + * what file operation caused you trouble and if you can duplicate + * the problem, send a script that demonstrates it. + * + * Short name translation 1999, 2001 by Wolfram Pienkoss + * + * Support Multibyte characters and cleanup by + * OGAWA Hirofumi + */ + +#include + +#include +#include +#include +#include +#include +#include +#include + +static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd) +{ + int ret = 1; + + if (!dentry->d_inode && + nd && !(nd->flags & LOOKUP_CONTINUE) && (nd->flags & LOOKUP_CREATE)) + /* + * negative dentry is dropped, in order to make sure + * to use the name which a user desires if this is + * create path. + */ + ret = 0; + else { + spin_lock(&dentry->d_lock); + if (dentry->d_time != dentry->d_parent->d_inode->i_version) + ret = 0; + spin_unlock(&dentry->d_lock); + } + return ret; +} + +/* returns the length of a struct qstr, ignoring trailing dots */ +static unsigned int vfat_striptail_len(struct qstr *qstr) +{ + unsigned int len = qstr->len; + + while (len && qstr->name[len - 1] == '.') + len--; + return len; +} + +/* + * Compute the hash for the vfat name corresponding to the dentry. + * Note: if the name is invalid, we leave the hash code unchanged so + * that the existing dentry can be used. The vfat fs routines will + * return ENOENT or EINVAL as appropriate. + */ +static int vfat_hash(struct dentry *dentry, struct qstr *qstr) +{ + qstr->hash = full_name_hash(qstr->name, vfat_striptail_len(qstr)); + return 0; +} + +/* + * Compute the hash for the vfat name corresponding to the dentry. + * Note: if the name is invalid, we leave the hash code unchanged so + * that the existing dentry can be used. The vfat fs routines will + * return ENOENT or EINVAL as appropriate. + */ +static int vfat_hashi(struct dentry *dentry, struct qstr *qstr) +{ + struct nls_table *t = MSDOS_SB(dentry->d_inode->i_sb)->nls_io; + const unsigned char *name; + unsigned int len; + unsigned long hash; + + name = qstr->name; + len = vfat_striptail_len(qstr); + + hash = init_name_hash(); + while (len--) + hash = partial_name_hash(nls_tolower(t, *name++), hash); + qstr->hash = end_name_hash(hash); + + return 0; +} + +/* + * Case insensitive compare of two vfat names. + */ +static int vfat_cmpi(struct dentry *dentry, struct qstr *a, struct qstr *b) +{ + struct nls_table *t = MSDOS_SB(dentry->d_inode->i_sb)->nls_io; + unsigned int alen, blen; + + /* A filename cannot end in '.' or we treat it like it has none */ + alen = vfat_striptail_len(a); + blen = vfat_striptail_len(b); + if (alen == blen) { + if (nls_strnicmp(t, a->name, b->name, alen) == 0) + return 0; + } + return 1; +} + +/* + * Case sensitive compare of two vfat names. + */ +static int vfat_cmp(struct dentry *dentry, struct qstr *a, struct qstr *b) +{ + unsigned int alen, blen; + + /* A filename cannot end in '.' or we treat it like it has none */ + alen = vfat_striptail_len(a); + blen = vfat_striptail_len(b); + if (alen == blen) { + if (strncmp(a->name, b->name, alen) == 0) + return 0; + } + return 1; +} + +static struct dentry_operations vfat_dentry_ops[4] = { + { + .d_hash = vfat_hashi, + .d_compare = vfat_cmpi, + }, + { + .d_revalidate = vfat_revalidate, + .d_hash = vfat_hashi, + .d_compare = vfat_cmpi, + }, + { + .d_hash = vfat_hash, + .d_compare = vfat_cmp, + }, + { + .d_revalidate = vfat_revalidate, + .d_hash = vfat_hash, + .d_compare = vfat_cmp, + } +}; + +/* Characters that are undesirable in an MS-DOS file name */ + +static inline wchar_t vfat_bad_char(wchar_t w) +{ + return (w < 0x0020) + || (w == '*') || (w == '?') || (w == '<') || (w == '>') + || (w == '|') || (w == '"') || (w == ':') || (w == '/') + || (w == '\\'); +} + +static inline wchar_t vfat_replace_char(wchar_t w) +{ + return (w == '[') || (w == ']') || (w == ';') || (w == ',') + || (w == '+') || (w == '='); +} + +static wchar_t vfat_skip_char(wchar_t w) +{ + return (w == '.') || (w == ' '); +} + +static inline int vfat_is_used_badchars(const wchar_t *s, int len) +{ + int i; + + for (i = 0; i < len; i++) + if (vfat_bad_char(s[i])) + return -EINVAL; + + if (s[i - 1] == ' ') /* last character cannot be space */ + return -EINVAL; + + return 0; +} + +static int vfat_find_form(struct inode *dir, unsigned char *name) +{ + struct fat_slot_info sinfo; + int err = fat_scan(dir, name, &sinfo); + if (err) + return -ENOENT; + brelse(sinfo.bh); + return 0; +} + +/* + * 1) Valid characters for the 8.3 format alias are any combination of + * letters, uppercase alphabets, digits, any of the + * following special characters: + * $ % ' ` - @ { } ~ ! # ( ) & _ ^ + * In this case Longfilename is not stored in disk. + * + * WinNT's Extension: + * File name and extension name is contain uppercase/lowercase + * only. And it is expressed by CASE_LOWER_BASE and CASE_LOWER_EXT. + * + * 2) File name is 8.3 format, but it contain the uppercase and + * lowercase char, muliti bytes char, etc. In this case numtail is not + * added, but Longfilename is stored. + * + * 3) When the one except for the above, or the following special + * character are contained: + * . [ ] ; , + = + * numtail is added, and Longfilename must be stored in disk . + */ +struct shortname_info { + unsigned char lower:1, + upper:1, + valid:1; +}; +#define INIT_SHORTNAME_INFO(x) do { \ + (x)->lower = 1; \ + (x)->upper = 1; \ + (x)->valid = 1; \ +} while (0) + +static inline int to_shortname_char(struct nls_table *nls, + unsigned char *buf, int buf_size, + wchar_t *src, struct shortname_info *info) +{ + int len; + + if (vfat_skip_char(*src)) { + info->valid = 0; + return 0; + } + if (vfat_replace_char(*src)) { + info->valid = 0; + buf[0] = '_'; + return 1; + } + + len = nls->uni2char(*src, buf, buf_size); + if (len <= 0) { + info->valid = 0; + buf[0] = '_'; + len = 1; + } else if (len == 1) { + unsigned char prev = buf[0]; + + if (buf[0] >= 0x7F) { + info->lower = 0; + info->upper = 0; + } + + buf[0] = nls_toupper(nls, buf[0]); + if (isalpha(buf[0])) { + if (buf[0] == prev) + info->lower = 0; + else + info->upper = 0; + } + } else { + info->lower = 0; + info->upper = 0; + } + + return len; +} + +/* + * Given a valid longname, create a unique shortname. Make sure the + * shortname does not exist + * Returns negative number on error, 0 for a normal + * return, and 1 for valid shortname + */ +static int vfat_create_shortname(struct inode *dir, struct nls_table *nls, + wchar_t *uname, int ulen, + unsigned char *name_res, unsigned char *lcase) +{ + struct fat_mount_options *opts = &MSDOS_SB(dir->i_sb)->options; + wchar_t *ip, *ext_start, *end, *name_start; + unsigned char base[9], ext[4], buf[8], *p; + unsigned char charbuf[NLS_MAX_CHARSET_SIZE]; + int chl, chi; + int sz = 0, extlen, baselen, i, numtail_baselen, numtail2_baselen; + int is_shortname; + struct shortname_info base_info, ext_info; + + is_shortname = 1; + INIT_SHORTNAME_INFO(&base_info); + INIT_SHORTNAME_INFO(&ext_info); + + /* Now, we need to create a shortname from the long name */ + ext_start = end = &uname[ulen]; + while (--ext_start >= uname) { + if (*ext_start == 0x002E) { /* is `.' */ + if (ext_start == end - 1) { + sz = ulen; + ext_start = NULL; + } + break; + } + } + + if (ext_start == uname - 1) { + sz = ulen; + ext_start = NULL; + } else if (ext_start) { + /* + * Names which start with a dot could be just + * an extension eg. "...test". In this case Win95 + * uses the extension as the name and sets no extension. + */ + name_start = &uname[0]; + while (name_start < ext_start) { + if (!vfat_skip_char(*name_start)) + break; + name_start++; + } + if (name_start != ext_start) { + sz = ext_start - uname; + ext_start++; + } else { + sz = ulen; + ext_start = NULL; + } + } + + numtail_baselen = 6; + numtail2_baselen = 2; + for (baselen = i = 0, p = base, ip = uname; i < sz; i++, ip++) { + chl = to_shortname_char(nls, charbuf, sizeof(charbuf), + ip, &base_info); + if (chl == 0) + continue; + + if (baselen < 2 && (baselen + chl) > 2) + numtail2_baselen = baselen; + if (baselen < 6 && (baselen + chl) > 6) + numtail_baselen = baselen; + for (chi = 0; chi < chl; chi++) { + *p++ = charbuf[chi]; + baselen++; + if (baselen >= 8) + break; + } + if (baselen >= 8) { + if ((chi < chl - 1) || (ip + 1) - uname < sz) + is_shortname = 0; + break; + } + } + if (baselen == 0) { + return -EINVAL; + } + + extlen = 0; + if (ext_start) { + for (p = ext, ip = ext_start; extlen < 3 && ip < end; ip++) { + chl = to_shortname_char(nls, charbuf, sizeof(charbuf), + ip, &ext_info); + if (chl == 0) + continue; + + if ((extlen + chl) > 3) { + is_shortname = 0; + break; + } + for (chi = 0; chi < chl; chi++) { + *p++ = charbuf[chi]; + extlen++; + } + if (extlen >= 3) { + if (ip + 1 != end) + is_shortname = 0; + break; + } + } + } + ext[extlen] = '\0'; + base[baselen] = '\0'; + + /* Yes, it can happen. ".\xe5" would do it. */ + if (base[0] == DELETED_FLAG) + base[0] = 0x05; + + /* OK, at this point we know that base is not longer than 8 symbols, + * ext is not longer than 3, base is nonempty, both don't contain + * any bad symbols (lowercase transformed to uppercase). + */ + + memset(name_res, ' ', MSDOS_NAME); + memcpy(name_res, base, baselen); + memcpy(name_res + 8, ext, extlen); + *lcase = 0; + if (is_shortname && base_info.valid && ext_info.valid) { + if (vfat_find_form(dir, name_res) == 0) + return -EEXIST; + + if (opts->shortname & VFAT_SFN_CREATE_WIN95) { + return (base_info.upper && ext_info.upper); + } else if (opts->shortname & VFAT_SFN_CREATE_WINNT) { + if ((base_info.upper || base_info.lower) && + (ext_info.upper || ext_info.lower)) { + if (!base_info.upper && base_info.lower) + *lcase |= CASE_LOWER_BASE; + if (!ext_info.upper && ext_info.lower) + *lcase |= CASE_LOWER_EXT; + return 1; + } + return 0; + } else { + BUG(); + } + } + + if (opts->numtail == 0) + if (vfat_find_form(dir, name_res) < 0) + return 0; + + /* + * Try to find a unique extension. This used to + * iterate through all possibilities sequentially, + * but that gave extremely bad performance. Windows + * only tries a few cases before using random + * values for part of the base. + */ + + if (baselen > 6) { + baselen = numtail_baselen; + name_res[7] = ' '; + } + name_res[baselen] = '~'; + for (i = 1; i < 10; i++) { + name_res[baselen + 1] = i + '0'; + if (vfat_find_form(dir, name_res) < 0) + return 0; + } + + i = jiffies & 0xffff; + sz = (jiffies >> 16) & 0x7; + if (baselen > 2) { + baselen = numtail2_baselen; + name_res[7] = ' '; + } + name_res[baselen + 4] = '~'; + name_res[baselen + 5] = '1' + sz; + while (1) { + sprintf(buf, "%04X", i); + memcpy(&name_res[baselen], buf, 4); + if (vfat_find_form(dir, name_res) < 0) + break; + i -= 11; + } + return 0; +} + +/* Translate a string, including coded sequences into Unicode */ +static int +xlate_to_uni(const unsigned char *name, int len, unsigned char *outname, + int *longlen, int *outlen, int escape, int utf8, + struct nls_table *nls) +{ + const unsigned char *ip; + unsigned char nc; + unsigned char *op; + unsigned int ec; + int i, k, fill; + int charlen; + + if (utf8) { + int name_len = strlen(name); + + *outlen = utf8_mbstowcs((wchar_t *)outname, name, PATH_MAX); + + /* + * We stripped '.'s before and set len appropriately, + * but utf8_mbstowcs doesn't care about len + */ + *outlen -= (name_len - len); + + if (*outlen > 255) + return -ENAMETOOLONG; + + op = &outname[*outlen * sizeof(wchar_t)]; + } else { + if (nls) { + for (i = 0, ip = name, op = outname, *outlen = 0; + i < len && *outlen <= 255; + *outlen += 1) + { + if (escape && (*ip == ':')) { + if (i > len - 5) + return -EINVAL; + ec = 0; + for (k = 1; k < 5; k++) { + nc = ip[k]; + ec <<= 4; + if (nc >= '0' && nc <= '9') { + ec |= nc - '0'; + continue; + } + if (nc >= 'a' && nc <= 'f') { + ec |= nc - ('a' - 10); + continue; + } + if (nc >= 'A' && nc <= 'F') { + ec |= nc - ('A' - 10); + continue; + } + return -EINVAL; + } + *op++ = ec & 0xFF; + *op++ = ec >> 8; + ip += 5; + i += 5; + } else { + if ((charlen = nls->char2uni(ip, len - i, (wchar_t *)op)) < 0) + return -EINVAL; + ip += charlen; + i += charlen; + op += 2; + } + } + if (i < len) + return -ENAMETOOLONG; + } else { + for (i = 0, ip = name, op = outname, *outlen = 0; + i < len && *outlen <= 255; + i++, *outlen += 1) + { + *op++ = *ip++; + *op++ = 0; + } + if (i < len) + return -ENAMETOOLONG; + } + } + + *longlen = *outlen; + if (*outlen % 13) { + *op++ = 0; + *op++ = 0; + *outlen += 1; + if (*outlen % 13) { + fill = 13 - (*outlen % 13); + for (i = 0; i < fill; i++) { + *op++ = 0xff; + *op++ = 0xff; + } + *outlen += fill; + } + } + + return 0; +} + +static int vfat_build_slots(struct inode *dir, const unsigned char *name, + int len, int is_dir, int cluster, + struct timespec *ts, + struct msdos_dir_slot *slots, int *nr_slots) +{ + struct msdos_sb_info *sbi = MSDOS_SB(dir->i_sb); + struct fat_mount_options *opts = &sbi->options; + struct msdos_dir_slot *ps; + struct msdos_dir_entry *de; + unsigned char cksum, lcase; + unsigned char msdos_name[MSDOS_NAME]; + wchar_t *uname; + __le16 time, date; + int err, ulen, usize, i; + loff_t offset; + + *nr_slots = 0; + + uname = __getname(); + if (!uname) + return -ENOMEM; + + err = xlate_to_uni(name, len, (unsigned char *)uname, &ulen, &usize, + opts->unicode_xlate, opts->utf8, sbi->nls_io); + if (err) + goto out_free; + + err = vfat_is_used_badchars(uname, ulen); + if (err) + goto out_free; + + err = vfat_create_shortname(dir, sbi->nls_disk, uname, ulen, + msdos_name, &lcase); + if (err < 0) + goto out_free; + else if (err == 1) { + de = (struct msdos_dir_entry *)slots; + err = 0; + goto shortname; + } + + /* build the entry of long file name */ + cksum = fat_checksum(msdos_name); + + *nr_slots = usize / 13; + for (ps = slots, i = *nr_slots; i > 0; i--, ps++) { + ps->id = i; + ps->attr = ATTR_EXT; + ps->reserved = 0; + ps->alias_checksum = cksum; + ps->start = 0; + offset = (i - 1) * 13; + fatwchar_to16(ps->name0_4, uname + offset, 5); + fatwchar_to16(ps->name5_10, uname + offset + 5, 6); + fatwchar_to16(ps->name11_12, uname + offset + 11, 2); + } + slots[0].id |= 0x40; + de = (struct msdos_dir_entry *)ps; + +shortname: + /* build the entry of 8.3 alias name */ + (*nr_slots)++; + memcpy(de->name, msdos_name, MSDOS_NAME); + de->attr = is_dir ? ATTR_DIR : ATTR_ARCH; + de->lcase = lcase; + fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc); + de->time = de->ctime = time; + de->date = de->cdate = de->adate = date; + de->ctime_cs = 0; + de->start = cpu_to_le16(cluster); + de->starthi = cpu_to_le16(cluster >> 16); + de->size = 0; +out_free: + __putname(uname); + return err; +} + +static int vfat_add_entry(struct inode *dir, struct qstr *qname, int is_dir, + int cluster, struct timespec *ts, + struct fat_slot_info *sinfo) +{ + struct msdos_dir_slot *slots; + unsigned int len; + int err, nr_slots; + + len = vfat_striptail_len(qname); + if (len == 0) + return -ENOENT; + + slots = kmalloc(sizeof(*slots) * MSDOS_SLOTS, GFP_NOFS); + if (slots == NULL) + return -ENOMEM; + + err = vfat_build_slots(dir, qname->name, len, is_dir, cluster, ts, + slots, &nr_slots); + if (err) + goto cleanup; + + err = fat_add_entries(dir, slots, nr_slots, sinfo); + if (err) + goto cleanup; + + /* update timestamp */ + dir->i_ctime = dir->i_mtime = dir->i_atime = *ts; + if (IS_DIRSYNC(dir)) + (void)fat_sync_inode(dir); + else + mark_inode_dirty(dir); +cleanup: + kfree(slots); + return err; +} + +static int vfat_find(struct inode *dir, struct qstr *qname, + struct fat_slot_info *sinfo) +{ + unsigned int len = vfat_striptail_len(qname); + if (len == 0) + return -ENOENT; + return fat_search_long(dir, qname->name, len, sinfo); +} + +static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) +{ + struct super_block *sb = dir->i_sb; + struct fat_slot_info sinfo; + struct inode *inode = NULL; + struct dentry *alias; + int err, table; + + lock_super(sb); + table = (MSDOS_SB(sb)->options.name_check == 's') ? 2 : 0; + dentry->d_op = &vfat_dentry_ops[table]; + + err = vfat_find(dir, &dentry->d_name, &sinfo); + if (err) { + table++; + goto error; + } + inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos); + brelse(sinfo.bh); + if (IS_ERR(inode)) { + unlock_super(sb); + return ERR_CAST(inode); + } + alias = d_find_alias(inode); + if (alias) { + if (d_invalidate(alias) == 0) + dput(alias); + else { + iput(inode); + unlock_super(sb); + return alias; + } + + } +error: + unlock_super(sb); + dentry->d_op = &vfat_dentry_ops[table]; + dentry->d_time = dentry->d_parent->d_inode->i_version; + dentry = d_splice_alias(inode, dentry); + if (dentry) { + dentry->d_op = &vfat_dentry_ops[table]; + dentry->d_time = dentry->d_parent->d_inode->i_version; + } + return dentry; +} + +static int vfat_create(struct inode *dir, struct dentry *dentry, int mode, + struct nameidata *nd) +{ + struct super_block *sb = dir->i_sb; + struct inode *inode; + struct fat_slot_info sinfo; + struct timespec ts; + int err; + + lock_super(sb); + + ts = CURRENT_TIME_SEC; + err = vfat_add_entry(dir, &dentry->d_name, 0, 0, &ts, &sinfo); + if (err) + goto out; + dir->i_version++; + + inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos); + brelse(sinfo.bh); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto out; + } + inode->i_version++; + inode->i_mtime = inode->i_atime = inode->i_ctime = ts; + /* timestamp is already written, so mark_inode_dirty() is unneeded. */ + + dentry->d_time = dentry->d_parent->d_inode->i_version; + d_instantiate(dentry, inode); +out: + unlock_super(sb); + return err; +} + +static int vfat_rmdir(struct inode *dir, struct dentry *dentry) +{ + struct inode *inode = dentry->d_inode; + struct super_block *sb = dir->i_sb; + struct fat_slot_info sinfo; + int err; + + lock_super(sb); + + err = fat_dir_empty(inode); + if (err) + goto out; + err = vfat_find(dir, &dentry->d_name, &sinfo); + if (err) + goto out; + + err = fat_remove_entries(dir, &sinfo); /* and releases bh */ + if (err) + goto out; + drop_nlink(dir); + + clear_nlink(inode); + inode->i_mtime = inode->i_atime = CURRENT_TIME_SEC; + fat_detach(inode); +out: + unlock_super(sb); + + return err; +} + +static int vfat_unlink(struct inode *dir, struct dentry *dentry) +{ + struct inode *inode = dentry->d_inode; + struct super_block *sb = dir->i_sb; + struct fat_slot_info sinfo; + int err; + + lock_super(sb); + + err = vfat_find(dir, &dentry->d_name, &sinfo); + if (err) + goto out; + + err = fat_remove_entries(dir, &sinfo); /* and releases bh */ + if (err) + goto out; + clear_nlink(inode); + inode->i_mtime = inode->i_atime = CURRENT_TIME_SEC; + fat_detach(inode); +out: + unlock_super(sb); + + return err; +} + +static int vfat_mkdir(struct inode *dir, struct dentry *dentry, int mode) +{ + struct super_block *sb = dir->i_sb; + struct inode *inode; + struct fat_slot_info sinfo; + struct timespec ts; + int err, cluster; + + lock_super(sb); + + ts = CURRENT_TIME_SEC; + cluster = fat_alloc_new_dir(dir, &ts); + if (cluster < 0) { + err = cluster; + goto out; + } + err = vfat_add_entry(dir, &dentry->d_name, 1, cluster, &ts, &sinfo); + if (err) + goto out_free; + dir->i_version++; + inc_nlink(dir); + + inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos); + brelse(sinfo.bh); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + /* the directory was completed, just return a error */ + goto out; + } + inode->i_version++; + inode->i_nlink = 2; + inode->i_mtime = inode->i_atime = inode->i_ctime = ts; + /* timestamp is already written, so mark_inode_dirty() is unneeded. */ + + dentry->d_time = dentry->d_parent->d_inode->i_version; + d_instantiate(dentry, inode); + + unlock_super(sb); + return 0; + +out_free: + fat_free_clusters(dir, cluster); +out: + unlock_super(sb); + return err; +} + +static int vfat_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) +{ + struct buffer_head *dotdot_bh; + struct msdos_dir_entry *dotdot_de; + struct inode *old_inode, *new_inode; + struct fat_slot_info old_sinfo, sinfo; + struct timespec ts; + loff_t dotdot_i_pos, new_i_pos; + int err, is_dir, update_dotdot, corrupt = 0; + struct super_block *sb = old_dir->i_sb; + + old_sinfo.bh = sinfo.bh = dotdot_bh = NULL; + old_inode = old_dentry->d_inode; + new_inode = new_dentry->d_inode; + lock_super(sb); + err = vfat_find(old_dir, &old_dentry->d_name, &old_sinfo); + if (err) + goto out; + + is_dir = S_ISDIR(old_inode->i_mode); + update_dotdot = (is_dir && old_dir != new_dir); + if (update_dotdot) { + if (fat_get_dotdot_entry(old_inode, &dotdot_bh, &dotdot_de, + &dotdot_i_pos) < 0) { + err = -EIO; + goto out; + } + } + + ts = CURRENT_TIME_SEC; + if (new_inode) { + if (is_dir) { + err = fat_dir_empty(new_inode); + if (err) + goto out; + } + new_i_pos = MSDOS_I(new_inode)->i_pos; + fat_detach(new_inode); + } else { + err = vfat_add_entry(new_dir, &new_dentry->d_name, is_dir, 0, + &ts, &sinfo); + if (err) + goto out; + new_i_pos = sinfo.i_pos; + } + new_dir->i_version++; + + fat_detach(old_inode); + fat_attach(old_inode, new_i_pos); + if (IS_DIRSYNC(new_dir)) { + err = fat_sync_inode(old_inode); + if (err) + goto error_inode; + } else + mark_inode_dirty(old_inode); + + if (update_dotdot) { + int start = MSDOS_I(new_dir)->i_logstart; + dotdot_de->start = cpu_to_le16(start); + dotdot_de->starthi = cpu_to_le16(start >> 16); + mark_buffer_dirty(dotdot_bh); + if (IS_DIRSYNC(new_dir)) { + err = sync_dirty_buffer(dotdot_bh); + if (err) + goto error_dotdot; + } + drop_nlink(old_dir); + if (!new_inode) + inc_nlink(new_dir); + } + + err = fat_remove_entries(old_dir, &old_sinfo); /* and releases bh */ + old_sinfo.bh = NULL; + if (err) + goto error_dotdot; + old_dir->i_version++; + old_dir->i_ctime = old_dir->i_mtime = ts; + if (IS_DIRSYNC(old_dir)) + (void)fat_sync_inode(old_dir); + else + mark_inode_dirty(old_dir); + + if (new_inode) { + drop_nlink(new_inode); + if (is_dir) + drop_nlink(new_inode); + new_inode->i_ctime = ts; + } +out: + brelse(sinfo.bh); + brelse(dotdot_bh); + brelse(old_sinfo.bh); + unlock_super(sb); + + return err; + +error_dotdot: + /* data cluster is shared, serious corruption */ + corrupt = 1; + + if (update_dotdot) { + int start = MSDOS_I(old_dir)->i_logstart; + dotdot_de->start = cpu_to_le16(start); + dotdot_de->starthi = cpu_to_le16(start >> 16); + mark_buffer_dirty(dotdot_bh); + corrupt |= sync_dirty_buffer(dotdot_bh); + } +error_inode: + fat_detach(old_inode); + fat_attach(old_inode, old_sinfo.i_pos); + if (new_inode) { + fat_attach(new_inode, new_i_pos); + if (corrupt) + corrupt |= fat_sync_inode(new_inode); + } else { + /* + * If new entry was not sharing the data cluster, it + * shouldn't be serious corruption. + */ + int err2 = fat_remove_entries(new_dir, &sinfo); + if (corrupt) + corrupt |= err2; + sinfo.bh = NULL; + } + if (corrupt < 0) { + fat_fs_panic(new_dir->i_sb, + "%s: Filesystem corrupted (i_pos %lld)", + __func__, sinfo.i_pos); + } + goto out; +} + +static const struct inode_operations vfat_dir_inode_operations = { + .create = vfat_create, + .lookup = vfat_lookup, + .unlink = vfat_unlink, + .mkdir = vfat_mkdir, + .rmdir = vfat_rmdir, + .rename = vfat_rename, + .setattr = fat_setattr, + .getattr = fat_getattr, +}; + +static int vfat_fill_super(struct super_block *sb, void *data, int silent) +{ + int res; + + res = fat_fill_super(sb, data, silent, &vfat_dir_inode_operations, 1); + if (res) + return res; + + if (MSDOS_SB(sb)->options.name_check != 's') + sb->s_root->d_op = &vfat_dentry_ops[0]; + else + sb->s_root->d_op = &vfat_dentry_ops[2]; + + return 0; +} + +static int vfat_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, + void *data, struct vfsmount *mnt) +{ + return get_sb_bdev(fs_type, flags, dev_name, data, vfat_fill_super, + mnt); +} + +static struct file_system_type vfat_fs_type = { + .owner = THIS_MODULE, + .name = "vfat", + .get_sb = vfat_get_sb, + .kill_sb = kill_block_super, + .fs_flags = FS_REQUIRES_DEV, +}; + +static int __init init_vfat_fs(void) +{ + return register_filesystem(&vfat_fs_type); +} + +static void __exit exit_vfat_fs(void) +{ + unregister_filesystem(&vfat_fs_type); +} + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("VFAT filesystem support"); +MODULE_AUTHOR("Gordon Chaffee"); + +module_init(init_vfat_fs) +module_exit(exit_vfat_fs) diff --git a/fs/msdos/Makefile b/fs/msdos/Makefile deleted file mode 100644 index ea67646fcb9..00000000000 --- a/fs/msdos/Makefile +++ /dev/null @@ -1,7 +0,0 @@ -# -# Makefile for the Linux msdos filesystem routines. -# - -obj-$(CONFIG_MSDOS_FS) += msdos.o - -msdos-y := namei.o diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c deleted file mode 100644 index e844b9809d2..00000000000 --- a/fs/msdos/namei.c +++ /dev/null @@ -1,702 +0,0 @@ -/* - * linux/fs/msdos/namei.c - * - * Written 1992,1993 by Werner Almesberger - * Hidden files 1995 by Albert Cahalan - * Rewritten for constant inumbers 1999 by Al Viro - */ - -#include -#include -#include -#include -#include - -/* Characters that are undesirable in an MS-DOS file name */ -static unsigned char bad_chars[] = "*?<>|\""; -static unsigned char bad_if_strict[] = "+=,; "; - -/***** Formats an MS-DOS file name. Rejects invalid names. */ -static int msdos_format_name(const unsigned char *name, int len, - unsigned char *res, struct fat_mount_options *opts) - /* - * name is the proposed name, len is its length, res is - * the resulting name, opts->name_check is either (r)elaxed, - * (n)ormal or (s)trict, opts->dotsOK allows dots at the - * beginning of name (for hidden files) - */ -{ - unsigned char *walk; - unsigned char c; - int space; - - if (name[0] == '.') { /* dotfile because . and .. already done */ - if (opts->dotsOK) { - /* Get rid of dot - test for it elsewhere */ - name++; - len--; - } else - return -EINVAL; - } - /* - * disallow names that _really_ start with a dot - */ - space = 1; - c = 0; - for (walk = res; len && walk - res < 8; walk++) { - c = *name++; - len--; - if (opts->name_check != 'r' && strchr(bad_chars, c)) - return -EINVAL; - if (opts->name_check == 's' && strchr(bad_if_strict, c)) - return -EINVAL; - if (c >= 'A' && c <= 'Z' && opts->name_check == 's') - return -EINVAL; - if (c < ' ' || c == ':' || c == '\\') - return -EINVAL; - /* - * 0xE5 is legal as a first character, but we must substitute - * 0x05 because 0xE5 marks deleted files. Yes, DOS really - * does this. - * It seems that Microsoft hacked DOS to support non-US - * characters after the 0xE5 character was already in use to - * mark deleted files. - */ - if ((res == walk) && (c == 0xE5)) - c = 0x05; - if (c == '.') - break; - space = (c == ' '); - *walk = (!opts->nocase && c >= 'a' && c <= 'z') ? c - 32 : c; - } - if (space) - return -EINVAL; - if (opts->name_check == 's' && len && c != '.') { - c = *name++; - len--; - if (c != '.') - return -EINVAL; - } - while (c != '.' && len--) - c = *name++; - if (c == '.') { - while (walk - res < 8) - *walk++ = ' '; - while (len > 0 && walk - res < MSDOS_NAME) { - c = *name++; - len--; - if (opts->name_check != 'r' && strchr(bad_chars, c)) - return -EINVAL; - if (opts->name_check == 's' && - strchr(bad_if_strict, c)) - return -EINVAL; - if (c < ' ' || c == ':' || c == '\\') - return -EINVAL; - if (c == '.') { - if (opts->name_check == 's') - return -EINVAL; - break; - } - if (c >= 'A' && c <= 'Z' && opts->name_check == 's') - return -EINVAL; - space = c == ' '; - if (!opts->nocase && c >= 'a' && c <= 'z') - *walk++ = c - 32; - else - *walk++ = c; - } - if (space) - return -EINVAL; - if (opts->name_check == 's' && len) - return -EINVAL; - } - while (walk - res < MSDOS_NAME) - *walk++ = ' '; - - return 0; -} - -/***** Locates a directory entry. Uses unformatted name. */ -static int msdos_find(struct inode *dir, const unsigned char *name, int len, - struct fat_slot_info *sinfo) -{ - struct msdos_sb_info *sbi = MSDOS_SB(dir->i_sb); - unsigned char msdos_name[MSDOS_NAME]; - int err; - - err = msdos_format_name(name, len, msdos_name, &sbi->options); - if (err) - return -ENOENT; - - err = fat_scan(dir, msdos_name, sinfo); - if (!err && sbi->options.dotsOK) { - if (name[0] == '.') { - if (!(sinfo->de->attr & ATTR_HIDDEN)) - err = -ENOENT; - } else { - if (sinfo->de->attr & ATTR_HIDDEN) - err = -ENOENT; - } - if (err) - brelse(sinfo->bh); - } - return err; -} - -/* - * Compute the hash for the msdos name corresponding to the dentry. - * Note: if the name is invalid, we leave the hash code unchanged so - * that the existing dentry can be used. The msdos fs routines will - * return ENOENT or EINVAL as appropriate. - */ -static int msdos_hash(struct dentry *dentry, struct qstr *qstr) -{ - struct fat_mount_options *options = &MSDOS_SB(dentry->d_sb)->options; - unsigned char msdos_name[MSDOS_NAME]; - int error; - - error = msdos_format_name(qstr->name, qstr->len, msdos_name, options); - if (!error) - qstr->hash = full_name_hash(msdos_name, MSDOS_NAME); - return 0; -} - -/* - * Compare two msdos names. If either of the names are invalid, - * we fall back to doing the standard name comparison. - */ -static int msdos_cmp(struct dentry *dentry, struct qstr *a, struct qstr *b) -{ - struct fat_mount_options *options = &MSDOS_SB(dentry->d_sb)->options; - unsigned char a_msdos_name[MSDOS_NAME], b_msdos_name[MSDOS_NAME]; - int error; - - error = msdos_format_name(a->name, a->len, a_msdos_name, options); - if (error) - goto old_compare; - error = msdos_format_name(b->name, b->len, b_msdos_name, options); - if (error) - goto old_compare; - error = memcmp(a_msdos_name, b_msdos_name, MSDOS_NAME); -out: - return error; - -old_compare: - error = 1; - if (a->len == b->len) - error = memcmp(a->name, b->name, a->len); - goto out; -} - -static struct dentry_operations msdos_dentry_operations = { - .d_hash = msdos_hash, - .d_compare = msdos_cmp, -}; - -/* - * AV. Wrappers for FAT sb operations. Is it wise? - */ - -/***** Get inode using directory and name */ -static struct dentry *msdos_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) -{ - struct super_block *sb = dir->i_sb; - struct fat_slot_info sinfo; - struct inode *inode = NULL; - int res; - - dentry->d_op = &msdos_dentry_operations; - - lock_super(sb); - res = msdos_find(dir, dentry->d_name.name, dentry->d_name.len, &sinfo); - if (res == -ENOENT) - goto add; - if (res < 0) - goto out; - inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos); - brelse(sinfo.bh); - if (IS_ERR(inode)) { - res = PTR_ERR(inode); - goto out; - } -add: - res = 0; - dentry = d_splice_alias(inode, dentry); - if (dentry) - dentry->d_op = &msdos_dentry_operations; -out: - unlock_super(sb); - if (!res) - return dentry; - return ERR_PTR(res); -} - -/***** Creates a directory entry (name is already formatted). */ -static int msdos_add_entry(struct inode *dir, const unsigned char *name, - int is_dir, int is_hid, int cluster, - struct timespec *ts, struct fat_slot_info *sinfo) -{ - struct msdos_sb_info *sbi = MSDOS_SB(dir->i_sb); - struct msdos_dir_entry de; - __le16 time, date; - int err; - - memcpy(de.name, name, MSDOS_NAME); - de.attr = is_dir ? ATTR_DIR : ATTR_ARCH; - if (is_hid) - de.attr |= ATTR_HIDDEN; - de.lcase = 0; - fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc); - de.cdate = de.adate = 0; - de.ctime = 0; - de.ctime_cs = 0; - de.time = time; - de.date = date; - de.start = cpu_to_le16(cluster); - de.starthi = cpu_to_le16(cluster >> 16); - de.size = 0; - - err = fat_add_entries(dir, &de, 1, sinfo); - if (err) - return err; - - dir->i_ctime = dir->i_mtime = *ts; - if (IS_DIRSYNC(dir)) - (void)fat_sync_inode(dir); - else - mark_inode_dirty(dir); - - return 0; -} - -/***** Create a file */ -static int msdos_create(struct inode *dir, struct dentry *dentry, int mode, - struct nameidata *nd) -{ - struct super_block *sb = dir->i_sb; - struct inode *inode = NULL; - struct fat_slot_info sinfo; - struct timespec ts; - unsigned char msdos_name[MSDOS_NAME]; - int err, is_hid; - - lock_super(sb); - - err = msdos_format_name(dentry->d_name.name, dentry->d_name.len, - msdos_name, &MSDOS_SB(sb)->options); - if (err) - goto out; - is_hid = (dentry->d_name.name[0] == '.') && (msdos_name[0] != '.'); - /* Have to do it due to foo vs. .foo conflicts */ - if (!fat_scan(dir, msdos_name, &sinfo)) { - brelse(sinfo.bh); - err = -EINVAL; - goto out; - } - - ts = CURRENT_TIME_SEC; - err = msdos_add_entry(dir, msdos_name, 0, is_hid, 0, &ts, &sinfo); - if (err) - goto out; - inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos); - brelse(sinfo.bh); - if (IS_ERR(inode)) { - err = PTR_ERR(inode); - goto out; - } - inode->i_mtime = inode->i_atime = inode->i_ctime = ts; - /* timestamp is already written, so mark_inode_dirty() is unneeded. */ - - d_instantiate(dentry, inode); -out: - unlock_super(sb); - if (!err) - err = fat_flush_inodes(sb, dir, inode); - return err; -} - -/***** Remove a directory */ -static int msdos_rmdir(struct inode *dir, struct dentry *dentry) -{ - struct super_block *sb = dir->i_sb; - struct inode *inode = dentry->d_inode; - struct fat_slot_info sinfo; - int err; - - lock_super(sb); - /* - * Check whether the directory is not in use, then check - * whether it is empty. - */ - err = fat_dir_empty(inode); - if (err) - goto out; - err = msdos_find(dir, dentry->d_name.name, dentry->d_name.len, &sinfo); - if (err) - goto out; - - err = fat_remove_entries(dir, &sinfo); /* and releases bh */ - if (err) - goto out; - drop_nlink(dir); - - clear_nlink(inode); - inode->i_ctime = CURRENT_TIME_SEC; - fat_detach(inode); -out: - unlock_super(sb); - if (!err) - err = fat_flush_inodes(sb, dir, inode); - - return err; -} - -/***** Make a directory */ -static int msdos_mkdir(struct inode *dir, struct dentry *dentry, int mode) -{ - struct super_block *sb = dir->i_sb; - struct fat_slot_info sinfo; - struct inode *inode; - unsigned char msdos_name[MSDOS_NAME]; - struct timespec ts; - int err, is_hid, cluster; - - lock_super(sb); - - err = msdos_format_name(dentry->d_name.name, dentry->d_name.len, - msdos_name, &MSDOS_SB(sb)->options); - if (err) - goto out; - is_hid = (dentry->d_name.name[0] == '.') && (msdos_name[0] != '.'); - /* foo vs .foo situation */ - if (!fat_scan(dir, msdos_name, &sinfo)) { - brelse(sinfo.bh); - err = -EINVAL; - goto out; - } - - ts = CURRENT_TIME_SEC; - cluster = fat_alloc_new_dir(dir, &ts); - if (cluster < 0) { - err = cluster; - goto out; - } - err = msdos_add_entry(dir, msdos_name, 1, is_hid, cluster, &ts, &sinfo); - if (err) - goto out_free; - inc_nlink(dir); - - inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos); - brelse(sinfo.bh); - if (IS_ERR(inode)) { - err = PTR_ERR(inode); - /* the directory was completed, just return a error */ - goto out; - } - inode->i_nlink = 2; - inode->i_mtime = inode->i_atime = inode->i_ctime = ts; - /* timestamp is already written, so mark_inode_dirty() is unneeded. */ - - d_instantiate(dentry, inode); - - unlock_super(sb); - fat_flush_inodes(sb, dir, inode); - return 0; - -out_free: - fat_free_clusters(dir, cluster); -out: - unlock_super(sb); - return err; -} - -/***** Unlink a file */ -static int msdos_unlink(struct inode *dir, struct dentry *dentry) -{ - struct inode *inode = dentry->d_inode; - struct super_block *sb= inode->i_sb; - struct fat_slot_info sinfo; - int err; - - lock_super(sb); - err = msdos_find(dir, dentry->d_name.name, dentry->d_name.len, &sinfo); - if (err) - goto out; - - err = fat_remove_entries(dir, &sinfo); /* and releases bh */ - if (err) - goto out; - clear_nlink(inode); - inode->i_ctime = CURRENT_TIME_SEC; - fat_detach(inode); -out: - unlock_super(sb); - if (!err) - err = fat_flush_inodes(sb, dir, inode); - - return err; -} - -static int do_msdos_rename(struct inode *old_dir, unsigned char *old_name, - struct dentry *old_dentry, - struct inode *new_dir, unsigned char *new_name, - struct dentry *new_dentry, int is_hid) -{ - struct buffer_head *dotdot_bh; - struct msdos_dir_entry *dotdot_de; - struct inode *old_inode, *new_inode; - struct fat_slot_info old_sinfo, sinfo; - struct timespec ts; - loff_t dotdot_i_pos, new_i_pos; - int err, old_attrs, is_dir, update_dotdot, corrupt = 0; - - old_sinfo.bh = sinfo.bh = dotdot_bh = NULL; - old_inode = old_dentry->d_inode; - new_inode = new_dentry->d_inode; - - err = fat_scan(old_dir, old_name, &old_sinfo); - if (err) { - err = -EIO; - goto out; - } - - is_dir = S_ISDIR(old_inode->i_mode); - update_dotdot = (is_dir && old_dir != new_dir); - if (update_dotdot) { - if (fat_get_dotdot_entry(old_inode, &dotdot_bh, &dotdot_de, - &dotdot_i_pos) < 0) { - err = -EIO; - goto out; - } - } - - old_attrs = MSDOS_I(old_inode)->i_attrs; - err = fat_scan(new_dir, new_name, &sinfo); - if (!err) { - if (!new_inode) { - /* "foo" -> ".foo" case. just change the ATTR_HIDDEN */ - if (sinfo.de != old_sinfo.de) { - err = -EINVAL; - goto out; - } - if (is_hid) - MSDOS_I(old_inode)->i_attrs |= ATTR_HIDDEN; - else - MSDOS_I(old_inode)->i_attrs &= ~ATTR_HIDDEN; - if (IS_DIRSYNC(old_dir)) { - err = fat_sync_inode(old_inode); - if (err) { - MSDOS_I(old_inode)->i_attrs = old_attrs; - goto out; - } - } else - mark_inode_dirty(old_inode); - - old_dir->i_version++; - old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME_SEC; - if (IS_DIRSYNC(old_dir)) - (void)fat_sync_inode(old_dir); - else - mark_inode_dirty(old_dir); - goto out; - } - } - - ts = CURRENT_TIME_SEC; - if (new_inode) { - if (err) - goto out; - if (is_dir) { - err = fat_dir_empty(new_inode); - if (err) - goto out; - } - new_i_pos = MSDOS_I(new_inode)->i_pos; - fat_detach(new_inode); - } else { - err = msdos_add_entry(new_dir, new_name, is_dir, is_hid, 0, - &ts, &sinfo); - if (err) - goto out; - new_i_pos = sinfo.i_pos; - } - new_dir->i_version++; - - fat_detach(old_inode); - fat_attach(old_inode, new_i_pos); - if (is_hid) - MSDOS_I(old_inode)->i_attrs |= ATTR_HIDDEN; - else - MSDOS_I(old_inode)->i_attrs &= ~ATTR_HIDDEN; - if (IS_DIRSYNC(new_dir)) { - err = fat_sync_inode(old_inode); - if (err) - goto error_inode; - } else - mark_inode_dirty(old_inode); - - if (update_dotdot) { - int start = MSDOS_I(new_dir)->i_logstart; - dotdot_de->start = cpu_to_le16(start); - dotdot_de->starthi = cpu_to_le16(start >> 16); - mark_buffer_dirty(dotdot_bh); - if (IS_DIRSYNC(new_dir)) { - err = sync_dirty_buffer(dotdot_bh); - if (err) - goto error_dotdot; - } - drop_nlink(old_dir); - if (!new_inode) - inc_nlink(new_dir); - } - - err = fat_remove_entries(old_dir, &old_sinfo); /* and releases bh */ - old_sinfo.bh = NULL; - if (err) - goto error_dotdot; - old_dir->i_version++; - old_dir->i_ctime = old_dir->i_mtime = ts; - if (IS_DIRSYNC(old_dir)) - (void)fat_sync_inode(old_dir); - else - mark_inode_dirty(old_dir); - - if (new_inode) { - drop_nlink(new_inode); - if (is_dir) - drop_nlink(new_inode); - new_inode->i_ctime = ts; - } -out: - brelse(sinfo.bh); - brelse(dotdot_bh); - brelse(old_sinfo.bh); - return err; - -error_dotdot: - /* data cluster is shared, serious corruption */ - corrupt = 1; - - if (update_dotdot) { - int start = MSDOS_I(old_dir)->i_logstart; - dotdot_de->start = cpu_to_le16(start); - dotdot_de->starthi = cpu_to_le16(start >> 16); - mark_buffer_dirty(dotdot_bh); - corrupt |= sync_dirty_buffer(dotdot_bh); - } -error_inode: - fat_detach(old_inode); - fat_attach(old_inode, old_sinfo.i_pos); - MSDOS_I(old_inode)->i_attrs = old_attrs; - if (new_inode) { - fat_attach(new_inode, new_i_pos); - if (corrupt) - corrupt |= fat_sync_inode(new_inode); - } else { - /* - * If new entry was not sharing the data cluster, it - * shouldn't be serious corruption. - */ - int err2 = fat_remove_entries(new_dir, &sinfo); - if (corrupt) - corrupt |= err2; - sinfo.bh = NULL; - } - if (corrupt < 0) { - fat_fs_panic(new_dir->i_sb, - "%s: Filesystem corrupted (i_pos %lld)", - __func__, sinfo.i_pos); - } - goto out; -} - -/***** Rename, a wrapper for rename_same_dir & rename_diff_dir */ -static int msdos_rename(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry) -{ - struct super_block *sb = old_dir->i_sb; - unsigned char old_msdos_name[MSDOS_NAME], new_msdos_name[MSDOS_NAME]; - int err, is_hid; - - lock_super(sb); - - err = msdos_format_name(old_dentry->d_name.name, - old_dentry->d_name.len, old_msdos_name, - &MSDOS_SB(old_dir->i_sb)->options); - if (err) - goto out; - err = msdos_format_name(new_dentry->d_name.name, - new_dentry->d_name.len, new_msdos_name, - &MSDOS_SB(new_dir->i_sb)->options); - if (err) - goto out; - - is_hid = - (new_dentry->d_name.name[0] == '.') && (new_msdos_name[0] != '.'); - - err = do_msdos_rename(old_dir, old_msdos_name, old_dentry, - new_dir, new_msdos_name, new_dentry, is_hid); -out: - unlock_super(sb); - if (!err) - err = fat_flush_inodes(sb, old_dir, new_dir); - return err; -} - -static const struct inode_operations msdos_dir_inode_operations = { - .create = msdos_create, - .lookup = msdos_lookup, - .unlink = msdos_unlink, - .mkdir = msdos_mkdir, - .rmdir = msdos_rmdir, - .rename = msdos_rename, - .setattr = fat_setattr, - .getattr = fat_getattr, -}; - -static int msdos_fill_super(struct super_block *sb, void *data, int silent) -{ - int res; - - res = fat_fill_super(sb, data, silent, &msdos_dir_inode_operations, 0); - if (res) - return res; - - sb->s_flags |= MS_NOATIME; - sb->s_root->d_op = &msdos_dentry_operations; - return 0; -} - -static int msdos_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, - void *data, struct vfsmount *mnt) -{ - return get_sb_bdev(fs_type, flags, dev_name, data, msdos_fill_super, - mnt); -} - -static struct file_system_type msdos_fs_type = { - .owner = THIS_MODULE, - .name = "msdos", - .get_sb = msdos_get_sb, - .kill_sb = kill_block_super, - .fs_flags = FS_REQUIRES_DEV, -}; - -static int __init init_msdos_fs(void) -{ - return register_filesystem(&msdos_fs_type); -} - -static void __exit exit_msdos_fs(void) -{ - unregister_filesystem(&msdos_fs_type); -} - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Werner Almesberger"); -MODULE_DESCRIPTION("MS-DOS filesystem support"); - -module_init(init_msdos_fs) -module_exit(exit_msdos_fs) diff --git a/fs/vfat/Makefile b/fs/vfat/Makefile deleted file mode 100644 index 40f2798a4f0..00000000000 --- a/fs/vfat/Makefile +++ /dev/null @@ -1,7 +0,0 @@ -# -# Makefile for the linux vfat-filesystem routines. -# - -obj-$(CONFIG_VFAT_FS) += vfat.o - -vfat-y := namei.o diff --git a/fs/vfat/namei.c b/fs/vfat/namei.c deleted file mode 100644 index 155c10b4adb..00000000000 --- a/fs/vfat/namei.c +++ /dev/null @@ -1,1055 +0,0 @@ -/* - * linux/fs/vfat/namei.c - * - * Written 1992,1993 by Werner Almesberger - * - * Windows95/Windows NT compatible extended MSDOS filesystem - * by Gordon Chaffee Copyright (C) 1995. Send bug reports for the - * VFAT filesystem to . Specify - * what file operation caused you trouble and if you can duplicate - * the problem, send a script that demonstrates it. - * - * Short name translation 1999, 2001 by Wolfram Pienkoss - * - * Support Multibyte characters and cleanup by - * OGAWA Hirofumi - */ - -#include - -#include -#include -#include -#include -#include -#include -#include - -static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd) -{ - int ret = 1; - - if (!dentry->d_inode && - nd && !(nd->flags & LOOKUP_CONTINUE) && (nd->flags & LOOKUP_CREATE)) - /* - * negative dentry is dropped, in order to make sure - * to use the name which a user desires if this is - * create path. - */ - ret = 0; - else { - spin_lock(&dentry->d_lock); - if (dentry->d_time != dentry->d_parent->d_inode->i_version) - ret = 0; - spin_unlock(&dentry->d_lock); - } - return ret; -} - -/* returns the length of a struct qstr, ignoring trailing dots */ -static unsigned int vfat_striptail_len(struct qstr *qstr) -{ - unsigned int len = qstr->len; - - while (len && qstr->name[len - 1] == '.') - len--; - return len; -} - -/* - * Compute the hash for the vfat name corresponding to the dentry. - * Note: if the name is invalid, we leave the hash code unchanged so - * that the existing dentry can be used. The vfat fs routines will - * return ENOENT or EINVAL as appropriate. - */ -static int vfat_hash(struct dentry *dentry, struct qstr *qstr) -{ - qstr->hash = full_name_hash(qstr->name, vfat_striptail_len(qstr)); - return 0; -} - -/* - * Compute the hash for the vfat name corresponding to the dentry. - * Note: if the name is invalid, we leave the hash code unchanged so - * that the existing dentry can be used. The vfat fs routines will - * return ENOENT or EINVAL as appropriate. - */ -static int vfat_hashi(struct dentry *dentry, struct qstr *qstr) -{ - struct nls_table *t = MSDOS_SB(dentry->d_inode->i_sb)->nls_io; - const unsigned char *name; - unsigned int len; - unsigned long hash; - - name = qstr->name; - len = vfat_striptail_len(qstr); - - hash = init_name_hash(); - while (len--) - hash = partial_name_hash(nls_tolower(t, *name++), hash); - qstr->hash = end_name_hash(hash); - - return 0; -} - -/* - * Case insensitive compare of two vfat names. - */ -static int vfat_cmpi(struct dentry *dentry, struct qstr *a, struct qstr *b) -{ - struct nls_table *t = MSDOS_SB(dentry->d_inode->i_sb)->nls_io; - unsigned int alen, blen; - - /* A filename cannot end in '.' or we treat it like it has none */ - alen = vfat_striptail_len(a); - blen = vfat_striptail_len(b); - if (alen == blen) { - if (nls_strnicmp(t, a->name, b->name, alen) == 0) - return 0; - } - return 1; -} - -/* - * Case sensitive compare of two vfat names. - */ -static int vfat_cmp(struct dentry *dentry, struct qstr *a, struct qstr *b) -{ - unsigned int alen, blen; - - /* A filename cannot end in '.' or we treat it like it has none */ - alen = vfat_striptail_len(a); - blen = vfat_striptail_len(b); - if (alen == blen) { - if (strncmp(a->name, b->name, alen) == 0) - return 0; - } - return 1; -} - -static struct dentry_operations vfat_dentry_ops[4] = { - { - .d_hash = vfat_hashi, - .d_compare = vfat_cmpi, - }, - { - .d_revalidate = vfat_revalidate, - .d_hash = vfat_hashi, - .d_compare = vfat_cmpi, - }, - { - .d_hash = vfat_hash, - .d_compare = vfat_cmp, - }, - { - .d_revalidate = vfat_revalidate, - .d_hash = vfat_hash, - .d_compare = vfat_cmp, - } -}; - -/* Characters that are undesirable in an MS-DOS file name */ - -static inline wchar_t vfat_bad_char(wchar_t w) -{ - return (w < 0x0020) - || (w == '*') || (w == '?') || (w == '<') || (w == '>') - || (w == '|') || (w == '"') || (w == ':') || (w == '/') - || (w == '\\'); -} - -static inline wchar_t vfat_replace_char(wchar_t w) -{ - return (w == '[') || (w == ']') || (w == ';') || (w == ',') - || (w == '+') || (w == '='); -} - -static wchar_t vfat_skip_char(wchar_t w) -{ - return (w == '.') || (w == ' '); -} - -static inline int vfat_is_used_badchars(const wchar_t *s, int len) -{ - int i; - - for (i = 0; i < len; i++) - if (vfat_bad_char(s[i])) - return -EINVAL; - - if (s[i - 1] == ' ') /* last character cannot be space */ - return -EINVAL; - - return 0; -} - -static int vfat_find_form(struct inode *dir, unsigned char *name) -{ - struct fat_slot_info sinfo; - int err = fat_scan(dir, name, &sinfo); - if (err) - return -ENOENT; - brelse(sinfo.bh); - return 0; -} - -/* - * 1) Valid characters for the 8.3 format alias are any combination of - * letters, uppercase alphabets, digits, any of the - * following special characters: - * $ % ' ` - @ { } ~ ! # ( ) & _ ^ - * In this case Longfilename is not stored in disk. - * - * WinNT's Extension: - * File name and extension name is contain uppercase/lowercase - * only. And it is expressed by CASE_LOWER_BASE and CASE_LOWER_EXT. - * - * 2) File name is 8.3 format, but it contain the uppercase and - * lowercase char, muliti bytes char, etc. In this case numtail is not - * added, but Longfilename is stored. - * - * 3) When the one except for the above, or the following special - * character are contained: - * . [ ] ; , + = - * numtail is added, and Longfilename must be stored in disk . - */ -struct shortname_info { - unsigned char lower:1, - upper:1, - valid:1; -}; -#define INIT_SHORTNAME_INFO(x) do { \ - (x)->lower = 1; \ - (x)->upper = 1; \ - (x)->valid = 1; \ -} while (0) - -static inline int to_shortname_char(struct nls_table *nls, - unsigned char *buf, int buf_size, - wchar_t *src, struct shortname_info *info) -{ - int len; - - if (vfat_skip_char(*src)) { - info->valid = 0; - return 0; - } - if (vfat_replace_char(*src)) { - info->valid = 0; - buf[0] = '_'; - return 1; - } - - len = nls->uni2char(*src, buf, buf_size); - if (len <= 0) { - info->valid = 0; - buf[0] = '_'; - len = 1; - } else if (len == 1) { - unsigned char prev = buf[0]; - - if (buf[0] >= 0x7F) { - info->lower = 0; - info->upper = 0; - } - - buf[0] = nls_toupper(nls, buf[0]); - if (isalpha(buf[0])) { - if (buf[0] == prev) - info->lower = 0; - else - info->upper = 0; - } - } else { - info->lower = 0; - info->upper = 0; - } - - return len; -} - -/* - * Given a valid longname, create a unique shortname. Make sure the - * shortname does not exist - * Returns negative number on error, 0 for a normal - * return, and 1 for valid shortname - */ -static int vfat_create_shortname(struct inode *dir, struct nls_table *nls, - wchar_t *uname, int ulen, - unsigned char *name_res, unsigned char *lcase) -{ - struct fat_mount_options *opts = &MSDOS_SB(dir->i_sb)->options; - wchar_t *ip, *ext_start, *end, *name_start; - unsigned char base[9], ext[4], buf[8], *p; - unsigned char charbuf[NLS_MAX_CHARSET_SIZE]; - int chl, chi; - int sz = 0, extlen, baselen, i, numtail_baselen, numtail2_baselen; - int is_shortname; - struct shortname_info base_info, ext_info; - - is_shortname = 1; - INIT_SHORTNAME_INFO(&base_info); - INIT_SHORTNAME_INFO(&ext_info); - - /* Now, we need to create a shortname from the long name */ - ext_start = end = &uname[ulen]; - while (--ext_start >= uname) { - if (*ext_start == 0x002E) { /* is `.' */ - if (ext_start == end - 1) { - sz = ulen; - ext_start = NULL; - } - break; - } - } - - if (ext_start == uname - 1) { - sz = ulen; - ext_start = NULL; - } else if (ext_start) { - /* - * Names which start with a dot could be just - * an extension eg. "...test". In this case Win95 - * uses the extension as the name and sets no extension. - */ - name_start = &uname[0]; - while (name_start < ext_start) { - if (!vfat_skip_char(*name_start)) - break; - name_start++; - } - if (name_start != ext_start) { - sz = ext_start - uname; - ext_start++; - } else { - sz = ulen; - ext_start = NULL; - } - } - - numtail_baselen = 6; - numtail2_baselen = 2; - for (baselen = i = 0, p = base, ip = uname; i < sz; i++, ip++) { - chl = to_shortname_char(nls, charbuf, sizeof(charbuf), - ip, &base_info); - if (chl == 0) - continue; - - if (baselen < 2 && (baselen + chl) > 2) - numtail2_baselen = baselen; - if (baselen < 6 && (baselen + chl) > 6) - numtail_baselen = baselen; - for (chi = 0; chi < chl; chi++) { - *p++ = charbuf[chi]; - baselen++; - if (baselen >= 8) - break; - } - if (baselen >= 8) { - if ((chi < chl - 1) || (ip + 1) - uname < sz) - is_shortname = 0; - break; - } - } - if (baselen == 0) { - return -EINVAL; - } - - extlen = 0; - if (ext_start) { - for (p = ext, ip = ext_start; extlen < 3 && ip < end; ip++) { - chl = to_shortname_char(nls, charbuf, sizeof(charbuf), - ip, &ext_info); - if (chl == 0) - continue; - - if ((extlen + chl) > 3) { - is_shortname = 0; - break; - } - for (chi = 0; chi < chl; chi++) { - *p++ = charbuf[chi]; - extlen++; - } - if (extlen >= 3) { - if (ip + 1 != end) - is_shortname = 0; - break; - } - } - } - ext[extlen] = '\0'; - base[baselen] = '\0'; - - /* Yes, it can happen. ".\xe5" would do it. */ - if (base[0] == DELETED_FLAG) - base[0] = 0x05; - - /* OK, at this point we know that base is not longer than 8 symbols, - * ext is not longer than 3, base is nonempty, both don't contain - * any bad symbols (lowercase transformed to uppercase). - */ - - memset(name_res, ' ', MSDOS_NAME); - memcpy(name_res, base, baselen); - memcpy(name_res + 8, ext, extlen); - *lcase = 0; - if (is_shortname && base_info.valid && ext_info.valid) { - if (vfat_find_form(dir, name_res) == 0) - return -EEXIST; - - if (opts->shortname & VFAT_SFN_CREATE_WIN95) { - return (base_info.upper && ext_info.upper); - } else if (opts->shortname & VFAT_SFN_CREATE_WINNT) { - if ((base_info.upper || base_info.lower) && - (ext_info.upper || ext_info.lower)) { - if (!base_info.upper && base_info.lower) - *lcase |= CASE_LOWER_BASE; - if (!ext_info.upper && ext_info.lower) - *lcase |= CASE_LOWER_EXT; - return 1; - } - return 0; - } else { - BUG(); - } - } - - if (opts->numtail == 0) - if (vfat_find_form(dir, name_res) < 0) - return 0; - - /* - * Try to find a unique extension. This used to - * iterate through all possibilities sequentially, - * but that gave extremely bad performance. Windows - * only tries a few cases before using random - * values for part of the base. - */ - - if (baselen > 6) { - baselen = numtail_baselen; - name_res[7] = ' '; - } - name_res[baselen] = '~'; - for (i = 1; i < 10; i++) { - name_res[baselen + 1] = i + '0'; - if (vfat_find_form(dir, name_res) < 0) - return 0; - } - - i = jiffies & 0xffff; - sz = (jiffies >> 16) & 0x7; - if (baselen > 2) { - baselen = numtail2_baselen; - name_res[7] = ' '; - } - name_res[baselen + 4] = '~'; - name_res[baselen + 5] = '1' + sz; - while (1) { - sprintf(buf, "%04X", i); - memcpy(&name_res[baselen], buf, 4); - if (vfat_find_form(dir, name_res) < 0) - break; - i -= 11; - } - return 0; -} - -/* Translate a string, including coded sequences into Unicode */ -static int -xlate_to_uni(const unsigned char *name, int len, unsigned char *outname, - int *longlen, int *outlen, int escape, int utf8, - struct nls_table *nls) -{ - const unsigned char *ip; - unsigned char nc; - unsigned char *op; - unsigned int ec; - int i, k, fill; - int charlen; - - if (utf8) { - int name_len = strlen(name); - - *outlen = utf8_mbstowcs((wchar_t *)outname, name, PATH_MAX); - - /* - * We stripped '.'s before and set len appropriately, - * but utf8_mbstowcs doesn't care about len - */ - *outlen -= (name_len - len); - - if (*outlen > 255) - return -ENAMETOOLONG; - - op = &outname[*outlen * sizeof(wchar_t)]; - } else { - if (nls) { - for (i = 0, ip = name, op = outname, *outlen = 0; - i < len && *outlen <= 255; - *outlen += 1) - { - if (escape && (*ip == ':')) { - if (i > len - 5) - return -EINVAL; - ec = 0; - for (k = 1; k < 5; k++) { - nc = ip[k]; - ec <<= 4; - if (nc >= '0' && nc <= '9') { - ec |= nc - '0'; - continue; - } - if (nc >= 'a' && nc <= 'f') { - ec |= nc - ('a' - 10); - continue; - } - if (nc >= 'A' && nc <= 'F') { - ec |= nc - ('A' - 10); - continue; - } - return -EINVAL; - } - *op++ = ec & 0xFF; - *op++ = ec >> 8; - ip += 5; - i += 5; - } else { - if ((charlen = nls->char2uni(ip, len - i, (wchar_t *)op)) < 0) - return -EINVAL; - ip += charlen; - i += charlen; - op += 2; - } - } - if (i < len) - return -ENAMETOOLONG; - } else { - for (i = 0, ip = name, op = outname, *outlen = 0; - i < len && *outlen <= 255; - i++, *outlen += 1) - { - *op++ = *ip++; - *op++ = 0; - } - if (i < len) - return -ENAMETOOLONG; - } - } - - *longlen = *outlen; - if (*outlen % 13) { - *op++ = 0; - *op++ = 0; - *outlen += 1; - if (*outlen % 13) { - fill = 13 - (*outlen % 13); - for (i = 0; i < fill; i++) { - *op++ = 0xff; - *op++ = 0xff; - } - *outlen += fill; - } - } - - return 0; -} - -static int vfat_build_slots(struct inode *dir, const unsigned char *name, - int len, int is_dir, int cluster, - struct timespec *ts, - struct msdos_dir_slot *slots, int *nr_slots) -{ - struct msdos_sb_info *sbi = MSDOS_SB(dir->i_sb); - struct fat_mount_options *opts = &sbi->options; - struct msdos_dir_slot *ps; - struct msdos_dir_entry *de; - unsigned char cksum, lcase; - unsigned char msdos_name[MSDOS_NAME]; - wchar_t *uname; - __le16 time, date; - int err, ulen, usize, i; - loff_t offset; - - *nr_slots = 0; - - uname = __getname(); - if (!uname) - return -ENOMEM; - - err = xlate_to_uni(name, len, (unsigned char *)uname, &ulen, &usize, - opts->unicode_xlate, opts->utf8, sbi->nls_io); - if (err) - goto out_free; - - err = vfat_is_used_badchars(uname, ulen); - if (err) - goto out_free; - - err = vfat_create_shortname(dir, sbi->nls_disk, uname, ulen, - msdos_name, &lcase); - if (err < 0) - goto out_free; - else if (err == 1) { - de = (struct msdos_dir_entry *)slots; - err = 0; - goto shortname; - } - - /* build the entry of long file name */ - cksum = fat_checksum(msdos_name); - - *nr_slots = usize / 13; - for (ps = slots, i = *nr_slots; i > 0; i--, ps++) { - ps->id = i; - ps->attr = ATTR_EXT; - ps->reserved = 0; - ps->alias_checksum = cksum; - ps->start = 0; - offset = (i - 1) * 13; - fatwchar_to16(ps->name0_4, uname + offset, 5); - fatwchar_to16(ps->name5_10, uname + offset + 5, 6); - fatwchar_to16(ps->name11_12, uname + offset + 11, 2); - } - slots[0].id |= 0x40; - de = (struct msdos_dir_entry *)ps; - -shortname: - /* build the entry of 8.3 alias name */ - (*nr_slots)++; - memcpy(de->name, msdos_name, MSDOS_NAME); - de->attr = is_dir ? ATTR_DIR : ATTR_ARCH; - de->lcase = lcase; - fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc); - de->time = de->ctime = time; - de->date = de->cdate = de->adate = date; - de->ctime_cs = 0; - de->start = cpu_to_le16(cluster); - de->starthi = cpu_to_le16(cluster >> 16); - de->size = 0; -out_free: - __putname(uname); - return err; -} - -static int vfat_add_entry(struct inode *dir, struct qstr *qname, int is_dir, - int cluster, struct timespec *ts, - struct fat_slot_info *sinfo) -{ - struct msdos_dir_slot *slots; - unsigned int len; - int err, nr_slots; - - len = vfat_striptail_len(qname); - if (len == 0) - return -ENOENT; - - slots = kmalloc(sizeof(*slots) * MSDOS_SLOTS, GFP_NOFS); - if (slots == NULL) - return -ENOMEM; - - err = vfat_build_slots(dir, qname->name, len, is_dir, cluster, ts, - slots, &nr_slots); - if (err) - goto cleanup; - - err = fat_add_entries(dir, slots, nr_slots, sinfo); - if (err) - goto cleanup; - - /* update timestamp */ - dir->i_ctime = dir->i_mtime = dir->i_atime = *ts; - if (IS_DIRSYNC(dir)) - (void)fat_sync_inode(dir); - else - mark_inode_dirty(dir); -cleanup: - kfree(slots); - return err; -} - -static int vfat_find(struct inode *dir, struct qstr *qname, - struct fat_slot_info *sinfo) -{ - unsigned int len = vfat_striptail_len(qname); - if (len == 0) - return -ENOENT; - return fat_search_long(dir, qname->name, len, sinfo); -} - -static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) -{ - struct super_block *sb = dir->i_sb; - struct fat_slot_info sinfo; - struct inode *inode = NULL; - struct dentry *alias; - int err, table; - - lock_super(sb); - table = (MSDOS_SB(sb)->options.name_check == 's') ? 2 : 0; - dentry->d_op = &vfat_dentry_ops[table]; - - err = vfat_find(dir, &dentry->d_name, &sinfo); - if (err) { - table++; - goto error; - } - inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos); - brelse(sinfo.bh); - if (IS_ERR(inode)) { - unlock_super(sb); - return ERR_CAST(inode); - } - alias = d_find_alias(inode); - if (alias) { - if (d_invalidate(alias) == 0) - dput(alias); - else { - iput(inode); - unlock_super(sb); - return alias; - } - - } -error: - unlock_super(sb); - dentry->d_op = &vfat_dentry_ops[table]; - dentry->d_time = dentry->d_parent->d_inode->i_version; - dentry = d_splice_alias(inode, dentry); - if (dentry) { - dentry->d_op = &vfat_dentry_ops[table]; - dentry->d_time = dentry->d_parent->d_inode->i_version; - } - return dentry; -} - -static int vfat_create(struct inode *dir, struct dentry *dentry, int mode, - struct nameidata *nd) -{ - struct super_block *sb = dir->i_sb; - struct inode *inode; - struct fat_slot_info sinfo; - struct timespec ts; - int err; - - lock_super(sb); - - ts = CURRENT_TIME_SEC; - err = vfat_add_entry(dir, &dentry->d_name, 0, 0, &ts, &sinfo); - if (err) - goto out; - dir->i_version++; - - inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos); - brelse(sinfo.bh); - if (IS_ERR(inode)) { - err = PTR_ERR(inode); - goto out; - } - inode->i_version++; - inode->i_mtime = inode->i_atime = inode->i_ctime = ts; - /* timestamp is already written, so mark_inode_dirty() is unneeded. */ - - dentry->d_time = dentry->d_parent->d_inode->i_version; - d_instantiate(dentry, inode); -out: - unlock_super(sb); - return err; -} - -static int vfat_rmdir(struct inode *dir, struct dentry *dentry) -{ - struct inode *inode = dentry->d_inode; - struct super_block *sb = dir->i_sb; - struct fat_slot_info sinfo; - int err; - - lock_super(sb); - - err = fat_dir_empty(inode); - if (err) - goto out; - err = vfat_find(dir, &dentry->d_name, &sinfo); - if (err) - goto out; - - err = fat_remove_entries(dir, &sinfo); /* and releases bh */ - if (err) - goto out; - drop_nlink(dir); - - clear_nlink(inode); - inode->i_mtime = inode->i_atime = CURRENT_TIME_SEC; - fat_detach(inode); -out: - unlock_super(sb); - - return err; -} - -static int vfat_unlink(struct inode *dir, struct dentry *dentry) -{ - struct inode *inode = dentry->d_inode; - struct super_block *sb = dir->i_sb; - struct fat_slot_info sinfo; - int err; - - lock_super(sb); - - err = vfat_find(dir, &dentry->d_name, &sinfo); - if (err) - goto out; - - err = fat_remove_entries(dir, &sinfo); /* and releases bh */ - if (err) - goto out; - clear_nlink(inode); - inode->i_mtime = inode->i_atime = CURRENT_TIME_SEC; - fat_detach(inode); -out: - unlock_super(sb); - - return err; -} - -static int vfat_mkdir(struct inode *dir, struct dentry *dentry, int mode) -{ - struct super_block *sb = dir->i_sb; - struct inode *inode; - struct fat_slot_info sinfo; - struct timespec ts; - int err, cluster; - - lock_super(sb); - - ts = CURRENT_TIME_SEC; - cluster = fat_alloc_new_dir(dir, &ts); - if (cluster < 0) { - err = cluster; - goto out; - } - err = vfat_add_entry(dir, &dentry->d_name, 1, cluster, &ts, &sinfo); - if (err) - goto out_free; - dir->i_version++; - inc_nlink(dir); - - inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos); - brelse(sinfo.bh); - if (IS_ERR(inode)) { - err = PTR_ERR(inode); - /* the directory was completed, just return a error */ - goto out; - } - inode->i_version++; - inode->i_nlink = 2; - inode->i_mtime = inode->i_atime = inode->i_ctime = ts; - /* timestamp is already written, so mark_inode_dirty() is unneeded. */ - - dentry->d_time = dentry->d_parent->d_inode->i_version; - d_instantiate(dentry, inode); - - unlock_super(sb); - return 0; - -out_free: - fat_free_clusters(dir, cluster); -out: - unlock_super(sb); - return err; -} - -static int vfat_rename(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry) -{ - struct buffer_head *dotdot_bh; - struct msdos_dir_entry *dotdot_de; - struct inode *old_inode, *new_inode; - struct fat_slot_info old_sinfo, sinfo; - struct timespec ts; - loff_t dotdot_i_pos, new_i_pos; - int err, is_dir, update_dotdot, corrupt = 0; - struct super_block *sb = old_dir->i_sb; - - old_sinfo.bh = sinfo.bh = dotdot_bh = NULL; - old_inode = old_dentry->d_inode; - new_inode = new_dentry->d_inode; - lock_super(sb); - err = vfat_find(old_dir, &old_dentry->d_name, &old_sinfo); - if (err) - goto out; - - is_dir = S_ISDIR(old_inode->i_mode); - update_dotdot = (is_dir && old_dir != new_dir); - if (update_dotdot) { - if (fat_get_dotdot_entry(old_inode, &dotdot_bh, &dotdot_de, - &dotdot_i_pos) < 0) { - err = -EIO; - goto out; - } - } - - ts = CURRENT_TIME_SEC; - if (new_inode) { - if (is_dir) { - err = fat_dir_empty(new_inode); - if (err) - goto out; - } - new_i_pos = MSDOS_I(new_inode)->i_pos; - fat_detach(new_inode); - } else { - err = vfat_add_entry(new_dir, &new_dentry->d_name, is_dir, 0, - &ts, &sinfo); - if (err) - goto out; - new_i_pos = sinfo.i_pos; - } - new_dir->i_version++; - - fat_detach(old_inode); - fat_attach(old_inode, new_i_pos); - if (IS_DIRSYNC(new_dir)) { - err = fat_sync_inode(old_inode); - if (err) - goto error_inode; - } else - mark_inode_dirty(old_inode); - - if (update_dotdot) { - int start = MSDOS_I(new_dir)->i_logstart; - dotdot_de->start = cpu_to_le16(start); - dotdot_de->starthi = cpu_to_le16(start >> 16); - mark_buffer_dirty(dotdot_bh); - if (IS_DIRSYNC(new_dir)) { - err = sync_dirty_buffer(dotdot_bh); - if (err) - goto error_dotdot; - } - drop_nlink(old_dir); - if (!new_inode) - inc_nlink(new_dir); - } - - err = fat_remove_entries(old_dir, &old_sinfo); /* and releases bh */ - old_sinfo.bh = NULL; - if (err) - goto error_dotdot; - old_dir->i_version++; - old_dir->i_ctime = old_dir->i_mtime = ts; - if (IS_DIRSYNC(old_dir)) - (void)fat_sync_inode(old_dir); - else - mark_inode_dirty(old_dir); - - if (new_inode) { - drop_nlink(new_inode); - if (is_dir) - drop_nlink(new_inode); - new_inode->i_ctime = ts; - } -out: - brelse(sinfo.bh); - brelse(dotdot_bh); - brelse(old_sinfo.bh); - unlock_super(sb); - - return err; - -error_dotdot: - /* data cluster is shared, serious corruption */ - corrupt = 1; - - if (update_dotdot) { - int start = MSDOS_I(old_dir)->i_logstart; - dotdot_de->start = cpu_to_le16(start); - dotdot_de->starthi = cpu_to_le16(start >> 16); - mark_buffer_dirty(dotdot_bh); - corrupt |= sync_dirty_buffer(dotdot_bh); - } -error_inode: - fat_detach(old_inode); - fat_attach(old_inode, old_sinfo.i_pos); - if (new_inode) { - fat_attach(new_inode, new_i_pos); - if (corrupt) - corrupt |= fat_sync_inode(new_inode); - } else { - /* - * If new entry was not sharing the data cluster, it - * shouldn't be serious corruption. - */ - int err2 = fat_remove_entries(new_dir, &sinfo); - if (corrupt) - corrupt |= err2; - sinfo.bh = NULL; - } - if (corrupt < 0) { - fat_fs_panic(new_dir->i_sb, - "%s: Filesystem corrupted (i_pos %lld)", - __func__, sinfo.i_pos); - } - goto out; -} - -static const struct inode_operations vfat_dir_inode_operations = { - .create = vfat_create, - .lookup = vfat_lookup, - .unlink = vfat_unlink, - .mkdir = vfat_mkdir, - .rmdir = vfat_rmdir, - .rename = vfat_rename, - .setattr = fat_setattr, - .getattr = fat_getattr, -}; - -static int vfat_fill_super(struct super_block *sb, void *data, int silent) -{ - int res; - - res = fat_fill_super(sb, data, silent, &vfat_dir_inode_operations, 1); - if (res) - return res; - - if (MSDOS_SB(sb)->options.name_check != 's') - sb->s_root->d_op = &vfat_dentry_ops[0]; - else - sb->s_root->d_op = &vfat_dentry_ops[2]; - - return 0; -} - -static int vfat_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, - void *data, struct vfsmount *mnt) -{ - return get_sb_bdev(fs_type, flags, dev_name, data, vfat_fill_super, - mnt); -} - -static struct file_system_type vfat_fs_type = { - .owner = THIS_MODULE, - .name = "vfat", - .get_sb = vfat_get_sb, - .kill_sb = kill_block_super, - .fs_flags = FS_REQUIRES_DEV, -}; - -static int __init init_vfat_fs(void) -{ - return register_filesystem(&vfat_fs_type); -} - -static void __exit exit_vfat_fs(void) -{ - unregister_filesystem(&vfat_fs_type); -} - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("VFAT filesystem support"); -MODULE_AUTHOR("Gordon Chaffee"); - -module_init(init_vfat_fs) -module_exit(exit_vfat_fs) -- cgit v1.2.3-18-g5258 From 9e975dae2970d22557662761c8505ce9fd165684 Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 6 Nov 2008 12:53:46 -0800 Subject: fat: split include/msdos_fs.h This splits __KERNEL__ stuff in include/msdos_fs.h into fs/fat/fat.h. Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/cache.c | 2 +- fs/fat/dir.c | 2 +- fs/fat/fat.h | 274 ++++++++++++++++++++++++++++++++++++++++++++++ fs/fat/fatent.c | 1 + fs/fat/file.c | 2 +- fs/fat/inode.c | 2 +- fs/fat/misc.c | 2 +- fs/fat/namei_msdos.c | 2 +- fs/fat/namei_vfat.c | 3 +- include/linux/msdos_fs.h | 276 +---------------------------------------------- 10 files changed, 284 insertions(+), 282 deletions(-) create mode 100644 fs/fat/fat.h diff --git a/fs/fat/cache.c b/fs/fat/cache.c index 3222f51c41c..589edde9053 100644 --- a/fs/fat/cache.c +++ b/fs/fat/cache.c @@ -9,8 +9,8 @@ */ #include -#include #include +#include "fat.h" /* this must be > 0. */ #define FAT_MAX_CACHE 8 diff --git a/fs/fat/dir.c b/fs/fat/dir.c index bae1c329252..08b23ad25f1 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -16,11 +16,11 @@ #include #include #include -#include #include #include #include #include +#include "fat.h" static inline loff_t fat_make_i_pos(struct super_block *sb, struct buffer_head *bh, diff --git a/fs/fat/fat.h b/fs/fat/fat.h new file mode 100644 index 00000000000..51f1c42ca5e --- /dev/null +++ b/fs/fat/fat.h @@ -0,0 +1,274 @@ +#ifndef _FAT_H +#define _FAT_H + +#include +#include +#include +#include +#include +#include + +/* + * vfat shortname flags + */ +#define VFAT_SFN_DISPLAY_LOWER 0x0001 /* convert to lowercase for display */ +#define VFAT_SFN_DISPLAY_WIN95 0x0002 /* emulate win95 rule for display */ +#define VFAT_SFN_DISPLAY_WINNT 0x0004 /* emulate winnt rule for display */ +#define VFAT_SFN_CREATE_WIN95 0x0100 /* emulate win95 rule for create */ +#define VFAT_SFN_CREATE_WINNT 0x0200 /* emulate winnt rule for create */ + +struct fat_mount_options { + uid_t fs_uid; + gid_t fs_gid; + unsigned short fs_fmask; + unsigned short fs_dmask; + unsigned short codepage; /* Codepage for shortname conversions */ + char *iocharset; /* Charset used for filename input/display */ + unsigned short shortname; /* flags for shortname display/create rule */ + unsigned char name_check; /* r = relaxed, n = normal, s = strict */ + unsigned short allow_utime;/* permission for setting the [am]time */ + unsigned quiet:1, /* set = fake successful chmods and chowns */ + showexec:1, /* set = only set x bit for com/exe/bat */ + sys_immutable:1, /* set = system files are immutable */ + dotsOK:1, /* set = hidden and system files are named '.filename' */ + isvfat:1, /* 0=no vfat long filename support, 1=vfat support */ + utf8:1, /* Use of UTF-8 character set (Default) */ + unicode_xlate:1, /* create escape sequences for unhandled Unicode */ + numtail:1, /* Does first alias have a numeric '~1' type tail? */ + flush:1, /* write things quickly */ + nocase:1, /* Does this need case conversion? 0=need case conversion*/ + usefree:1, /* Use free_clusters for FAT32 */ + tz_utc:1; /* Filesystem timestamps are in UTC */ +}; + +#define FAT_HASH_BITS 8 +#define FAT_HASH_SIZE (1UL << FAT_HASH_BITS) +#define FAT_HASH_MASK (FAT_HASH_SIZE-1) + +/* + * MS-DOS file system in-core superblock data + */ +struct msdos_sb_info { + unsigned short sec_per_clus; /* sectors/cluster */ + unsigned short cluster_bits; /* log2(cluster_size) */ + unsigned int cluster_size; /* cluster size */ + unsigned char fats,fat_bits; /* number of FATs, FAT bits (12 or 16) */ + unsigned short fat_start; + unsigned long fat_length; /* FAT start & length (sec.) */ + unsigned long dir_start; + unsigned short dir_entries; /* root dir start & entries */ + unsigned long data_start; /* first data sector */ + unsigned long max_cluster; /* maximum cluster number */ + unsigned long root_cluster; /* first cluster of the root directory */ + unsigned long fsinfo_sector; /* sector number of FAT32 fsinfo */ + struct mutex fat_lock; + unsigned int prev_free; /* previously allocated cluster number */ + unsigned int free_clusters; /* -1 if undefined */ + unsigned int free_clus_valid; /* is free_clusters valid? */ + struct fat_mount_options options; + struct nls_table *nls_disk; /* Codepage used on disk */ + struct nls_table *nls_io; /* Charset used for input and display */ + const void *dir_ops; /* Opaque; default directory operations */ + int dir_per_block; /* dir entries per block */ + int dir_per_block_bits; /* log2(dir_per_block) */ + + int fatent_shift; + struct fatent_operations *fatent_ops; + + spinlock_t inode_hash_lock; + struct hlist_head inode_hashtable[FAT_HASH_SIZE]; +}; + +#define FAT_CACHE_VALID 0 /* special case for valid cache */ + +/* + * MS-DOS file system inode data in memory + */ +struct msdos_inode_info { + spinlock_t cache_lru_lock; + struct list_head cache_lru; + int nr_caches; + /* for avoiding the race between fat_free() and fat_get_cluster() */ + unsigned int cache_valid_id; + + loff_t mmu_private; + int i_start; /* first cluster or 0 */ + int i_logstart; /* logical first cluster */ + int i_attrs; /* unused attribute bits */ + loff_t i_pos; /* on-disk position of directory entry or 0 */ + struct hlist_node i_fat_hash; /* hash by i_location */ + struct inode vfs_inode; +}; + +struct fat_slot_info { + loff_t i_pos; /* on-disk position of directory entry */ + loff_t slot_off; /* offset for slot or de start */ + int nr_slots; /* number of slots + 1(de) in filename */ + struct msdos_dir_entry *de; + struct buffer_head *bh; +}; + +static inline struct msdos_sb_info *MSDOS_SB(struct super_block *sb) +{ + return sb->s_fs_info; +} + +static inline struct msdos_inode_info *MSDOS_I(struct inode *inode) +{ + return container_of(inode, struct msdos_inode_info, vfs_inode); +} + +/* Return the FAT attribute byte for this inode */ +static inline u8 fat_attr(struct inode *inode) +{ + return ((inode->i_mode & S_IWUGO) ? ATTR_NONE : ATTR_RO) | + (S_ISDIR(inode->i_mode) ? ATTR_DIR : ATTR_NONE) | + MSDOS_I(inode)->i_attrs; +} + +static inline unsigned char fat_checksum(const __u8 *name) +{ + unsigned char s = name[0]; + s = (s<<7) + (s>>1) + name[1]; s = (s<<7) + (s>>1) + name[2]; + s = (s<<7) + (s>>1) + name[3]; s = (s<<7) + (s>>1) + name[4]; + s = (s<<7) + (s>>1) + name[5]; s = (s<<7) + (s>>1) + name[6]; + s = (s<<7) + (s>>1) + name[7]; s = (s<<7) + (s>>1) + name[8]; + s = (s<<7) + (s>>1) + name[9]; s = (s<<7) + (s>>1) + name[10]; + return s; +} + +static inline sector_t fat_clus_to_blknr(struct msdos_sb_info *sbi, int clus) +{ + return ((sector_t)clus - FAT_START_ENT) * sbi->sec_per_clus + + sbi->data_start; +} + +static inline void fat16_towchar(wchar_t *dst, const __u8 *src, size_t len) +{ +#ifdef __BIG_ENDIAN + while (len--) { + *dst++ = src[0] | (src[1] << 8); + src += 2; + } +#else + memcpy(dst, src, len * 2); +#endif +} + +static inline void fatwchar_to16(__u8 *dst, const wchar_t *src, size_t len) +{ +#ifdef __BIG_ENDIAN + while (len--) { + dst[0] = *src & 0x00FF; + dst[1] = (*src & 0xFF00) >> 8; + dst += 2; + src++; + } +#else + memcpy(dst, src, len * 2); +#endif +} + +/* fat/cache.c */ +extern void fat_cache_inval_inode(struct inode *inode); +extern int fat_get_cluster(struct inode *inode, int cluster, + int *fclus, int *dclus); +extern int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys, + unsigned long *mapped_blocks); + +/* fat/dir.c */ +extern const struct file_operations fat_dir_operations; +extern int fat_search_long(struct inode *inode, const unsigned char *name, + int name_len, struct fat_slot_info *sinfo); +extern int fat_dir_empty(struct inode *dir); +extern int fat_subdirs(struct inode *dir); +extern int fat_scan(struct inode *dir, const unsigned char *name, + struct fat_slot_info *sinfo); +extern int fat_get_dotdot_entry(struct inode *dir, struct buffer_head **bh, + struct msdos_dir_entry **de, loff_t *i_pos); +extern int fat_alloc_new_dir(struct inode *dir, struct timespec *ts); +extern int fat_add_entries(struct inode *dir, void *slots, int nr_slots, + struct fat_slot_info *sinfo); +extern int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo); + +/* fat/fatent.c */ +struct fat_entry { + int entry; + union { + u8 *ent12_p[2]; + __le16 *ent16_p; + __le32 *ent32_p; + } u; + int nr_bhs; + struct buffer_head *bhs[2]; +}; + +static inline void fatent_init(struct fat_entry *fatent) +{ + fatent->nr_bhs = 0; + fatent->entry = 0; + fatent->u.ent32_p = NULL; + fatent->bhs[0] = fatent->bhs[1] = NULL; +} + +static inline void fatent_set_entry(struct fat_entry *fatent, int entry) +{ + fatent->entry = entry; + fatent->u.ent32_p = NULL; +} + +static inline void fatent_brelse(struct fat_entry *fatent) +{ + int i; + fatent->u.ent32_p = NULL; + for (i = 0; i < fatent->nr_bhs; i++) + brelse(fatent->bhs[i]); + fatent->nr_bhs = 0; + fatent->bhs[0] = fatent->bhs[1] = NULL; +} + +extern void fat_ent_access_init(struct super_block *sb); +extern int fat_ent_read(struct inode *inode, struct fat_entry *fatent, + int entry); +extern int fat_ent_write(struct inode *inode, struct fat_entry *fatent, + int new, int wait); +extern int fat_alloc_clusters(struct inode *inode, int *cluster, + int nr_cluster); +extern int fat_free_clusters(struct inode *inode, int cluster); +extern int fat_count_free_clusters(struct super_block *sb); + +/* fat/file.c */ +extern int fat_generic_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg); +extern const struct file_operations fat_file_operations; +extern const struct inode_operations fat_file_inode_operations; +extern int fat_setattr(struct dentry * dentry, struct iattr * attr); +extern void fat_truncate(struct inode *inode); +extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, + struct kstat *stat); + +/* fat/inode.c */ +extern void fat_attach(struct inode *inode, loff_t i_pos); +extern void fat_detach(struct inode *inode); +extern struct inode *fat_iget(struct super_block *sb, loff_t i_pos); +extern struct inode *fat_build_inode(struct super_block *sb, + struct msdos_dir_entry *de, loff_t i_pos); +extern int fat_sync_inode(struct inode *inode); +extern int fat_fill_super(struct super_block *sb, void *data, int silent, + const struct inode_operations *fs_dir_inode_ops, int isvfat); + +extern int fat_flush_inodes(struct super_block *sb, struct inode *i1, + struct inode *i2); +/* fat/misc.c */ +extern void fat_fs_panic(struct super_block *s, const char *fmt, ...); +extern void fat_clusters_flush(struct super_block *sb); +extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster); +extern int date_dos2unix(unsigned short time, unsigned short date, int tz_utc); +extern void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date, + int tz_utc); +extern int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs); + +int fat_cache_init(void); +void fat_cache_destroy(void); + +#endif /* !_FAT_H */ diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index fb98b3d847e..5b5f49061b7 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c @@ -7,6 +7,7 @@ #include #include #include +#include "fat.h" struct fatent_operations { void (*ent_blocknr)(struct super_block *, int, int *, sector_t *); diff --git a/fs/fat/file.c b/fs/fat/file.c index ddde37025ca..b21973f266a 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -10,13 +10,13 @@ #include #include #include -#include #include #include #include #include #include #include +#include "fat.h" int fat_generic_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg) diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 2b2eec1283b..3921de2013a 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include @@ -28,6 +27,7 @@ #include #include #include +#include "fat.h" #ifndef CONFIG_FAT_DEFAULT_IOCHARSET /* if user don't select VFAT, this is undefined. */ diff --git a/fs/fat/misc.c b/fs/fat/misc.c index 79fb98ad36d..91ad9be18ff 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c @@ -8,8 +8,8 @@ #include #include -#include #include +#include "fat.h" /* * fat_fs_panic reports a severe file system problem and sets the file system diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index e844b9809d2..c0a4d5cd99b 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -9,8 +9,8 @@ #include #include #include -#include #include +#include "fat.h" /* Characters that are undesirable in an MS-DOS file name */ static unsigned char bad_chars[] = "*?<>|\""; diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 155c10b4adb..facf3bf0211 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -16,14 +16,13 @@ */ #include - #include -#include #include #include #include #include #include +#include "fat.h" static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd) { diff --git a/include/linux/msdos_fs.h b/include/linux/msdos_fs.h index ba63858056c..0982fb47a90 100644 --- a/include/linux/msdos_fs.h +++ b/include/linux/msdos_fs.h @@ -167,282 +167,10 @@ struct msdos_dir_slot { }; #ifdef __KERNEL__ - -#include -#include -#include -#include -#include - -/* - * vfat shortname flags - */ -#define VFAT_SFN_DISPLAY_LOWER 0x0001 /* convert to lowercase for display */ -#define VFAT_SFN_DISPLAY_WIN95 0x0002 /* emulate win95 rule for display */ -#define VFAT_SFN_DISPLAY_WINNT 0x0004 /* emulate winnt rule for display */ -#define VFAT_SFN_CREATE_WIN95 0x0100 /* emulate win95 rule for create */ -#define VFAT_SFN_CREATE_WINNT 0x0200 /* emulate winnt rule for create */ - -struct fat_mount_options { - uid_t fs_uid; - gid_t fs_gid; - unsigned short fs_fmask; - unsigned short fs_dmask; - unsigned short codepage; /* Codepage for shortname conversions */ - char *iocharset; /* Charset used for filename input/display */ - unsigned short shortname; /* flags for shortname display/create rule */ - unsigned char name_check; /* r = relaxed, n = normal, s = strict */ - unsigned short allow_utime;/* permission for setting the [am]time */ - unsigned quiet:1, /* set = fake successful chmods and chowns */ - showexec:1, /* set = only set x bit for com/exe/bat */ - sys_immutable:1, /* set = system files are immutable */ - dotsOK:1, /* set = hidden and system files are named '.filename' */ - isvfat:1, /* 0=no vfat long filename support, 1=vfat support */ - utf8:1, /* Use of UTF-8 character set (Default) */ - unicode_xlate:1, /* create escape sequences for unhandled Unicode */ - numtail:1, /* Does first alias have a numeric '~1' type tail? */ - flush:1, /* write things quickly */ - nocase:1, /* Does this need case conversion? 0=need case conversion*/ - usefree:1, /* Use free_clusters for FAT32 */ - tz_utc:1; /* Filesystem timestamps are in UTC */ -}; - -#define FAT_HASH_BITS 8 -#define FAT_HASH_SIZE (1UL << FAT_HASH_BITS) -#define FAT_HASH_MASK (FAT_HASH_SIZE-1) - -/* - * MS-DOS file system in-core superblock data - */ -struct msdos_sb_info { - unsigned short sec_per_clus; /* sectors/cluster */ - unsigned short cluster_bits; /* log2(cluster_size) */ - unsigned int cluster_size; /* cluster size */ - unsigned char fats,fat_bits; /* number of FATs, FAT bits (12 or 16) */ - unsigned short fat_start; - unsigned long fat_length; /* FAT start & length (sec.) */ - unsigned long dir_start; - unsigned short dir_entries; /* root dir start & entries */ - unsigned long data_start; /* first data sector */ - unsigned long max_cluster; /* maximum cluster number */ - unsigned long root_cluster; /* first cluster of the root directory */ - unsigned long fsinfo_sector; /* sector number of FAT32 fsinfo */ - struct mutex fat_lock; - unsigned int prev_free; /* previously allocated cluster number */ - unsigned int free_clusters; /* -1 if undefined */ - unsigned int free_clus_valid; /* is free_clusters valid? */ - struct fat_mount_options options; - struct nls_table *nls_disk; /* Codepage used on disk */ - struct nls_table *nls_io; /* Charset used for input and display */ - const void *dir_ops; /* Opaque; default directory operations */ - int dir_per_block; /* dir entries per block */ - int dir_per_block_bits; /* log2(dir_per_block) */ - - int fatent_shift; - struct fatent_operations *fatent_ops; - - spinlock_t inode_hash_lock; - struct hlist_head inode_hashtable[FAT_HASH_SIZE]; -}; - -#define FAT_CACHE_VALID 0 /* special case for valid cache */ - -/* - * MS-DOS file system inode data in memory - */ -struct msdos_inode_info { - spinlock_t cache_lru_lock; - struct list_head cache_lru; - int nr_caches; - /* for avoiding the race between fat_free() and fat_get_cluster() */ - unsigned int cache_valid_id; - - loff_t mmu_private; - int i_start; /* first cluster or 0 */ - int i_logstart; /* logical first cluster */ - int i_attrs; /* unused attribute bits */ - loff_t i_pos; /* on-disk position of directory entry or 0 */ - struct hlist_node i_fat_hash; /* hash by i_location */ - struct inode vfs_inode; -}; - -struct fat_slot_info { - loff_t i_pos; /* on-disk position of directory entry */ - loff_t slot_off; /* offset for slot or de start */ - int nr_slots; /* number of slots + 1(de) in filename */ - struct msdos_dir_entry *de; - struct buffer_head *bh; -}; - -static inline struct msdos_sb_info *MSDOS_SB(struct super_block *sb) -{ - return sb->s_fs_info; -} - -static inline struct msdos_inode_info *MSDOS_I(struct inode *inode) -{ - return container_of(inode, struct msdos_inode_info, vfs_inode); -} - -/* Return the FAT attribute byte for this inode */ -static inline u8 fat_attr(struct inode *inode) -{ - return ((inode->i_mode & S_IWUGO) ? ATTR_NONE : ATTR_RO) | - (S_ISDIR(inode->i_mode) ? ATTR_DIR : ATTR_NONE) | - MSDOS_I(inode)->i_attrs; -} - -static inline unsigned char fat_checksum(const __u8 *name) -{ - unsigned char s = name[0]; - s = (s<<7) + (s>>1) + name[1]; s = (s<<7) + (s>>1) + name[2]; - s = (s<<7) + (s>>1) + name[3]; s = (s<<7) + (s>>1) + name[4]; - s = (s<<7) + (s>>1) + name[5]; s = (s<<7) + (s>>1) + name[6]; - s = (s<<7) + (s>>1) + name[7]; s = (s<<7) + (s>>1) + name[8]; - s = (s<<7) + (s>>1) + name[9]; s = (s<<7) + (s>>1) + name[10]; - return s; -} - -static inline sector_t fat_clus_to_blknr(struct msdos_sb_info *sbi, int clus) -{ - return ((sector_t)clus - FAT_START_ENT) * sbi->sec_per_clus - + sbi->data_start; -} - -static inline void fat16_towchar(wchar_t *dst, const __u8 *src, size_t len) -{ -#ifdef __BIG_ENDIAN - while (len--) { - *dst++ = src[0] | (src[1] << 8); - src += 2; - } -#else - memcpy(dst, src, len * 2); -#endif -} - -static inline void fatwchar_to16(__u8 *dst, const wchar_t *src, size_t len) -{ -#ifdef __BIG_ENDIAN - while (len--) { - dst[0] = *src & 0x00FF; - dst[1] = (*src & 0xFF00) >> 8; - dst += 2; - src++; - } -#else - memcpy(dst, src, len * 2); -#endif -} - /* media of boot sector */ static inline int fat_valid_media(u8 media) { return 0xf8 <= media || media == 0xf0; } - -/* fat/cache.c */ -extern void fat_cache_inval_inode(struct inode *inode); -extern int fat_get_cluster(struct inode *inode, int cluster, - int *fclus, int *dclus); -extern int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys, - unsigned long *mapped_blocks); - -/* fat/dir.c */ -extern const struct file_operations fat_dir_operations; -extern int fat_search_long(struct inode *inode, const unsigned char *name, - int name_len, struct fat_slot_info *sinfo); -extern int fat_dir_empty(struct inode *dir); -extern int fat_subdirs(struct inode *dir); -extern int fat_scan(struct inode *dir, const unsigned char *name, - struct fat_slot_info *sinfo); -extern int fat_get_dotdot_entry(struct inode *dir, struct buffer_head **bh, - struct msdos_dir_entry **de, loff_t *i_pos); -extern int fat_alloc_new_dir(struct inode *dir, struct timespec *ts); -extern int fat_add_entries(struct inode *dir, void *slots, int nr_slots, - struct fat_slot_info *sinfo); -extern int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo); - -/* fat/fatent.c */ -struct fat_entry { - int entry; - union { - u8 *ent12_p[2]; - __le16 *ent16_p; - __le32 *ent32_p; - } u; - int nr_bhs; - struct buffer_head *bhs[2]; -}; - -static inline void fatent_init(struct fat_entry *fatent) -{ - fatent->nr_bhs = 0; - fatent->entry = 0; - fatent->u.ent32_p = NULL; - fatent->bhs[0] = fatent->bhs[1] = NULL; -} - -static inline void fatent_set_entry(struct fat_entry *fatent, int entry) -{ - fatent->entry = entry; - fatent->u.ent32_p = NULL; -} - -static inline void fatent_brelse(struct fat_entry *fatent) -{ - int i; - fatent->u.ent32_p = NULL; - for (i = 0; i < fatent->nr_bhs; i++) - brelse(fatent->bhs[i]); - fatent->nr_bhs = 0; - fatent->bhs[0] = fatent->bhs[1] = NULL; -} - -extern void fat_ent_access_init(struct super_block *sb); -extern int fat_ent_read(struct inode *inode, struct fat_entry *fatent, - int entry); -extern int fat_ent_write(struct inode *inode, struct fat_entry *fatent, - int new, int wait); -extern int fat_alloc_clusters(struct inode *inode, int *cluster, - int nr_cluster); -extern int fat_free_clusters(struct inode *inode, int cluster); -extern int fat_count_free_clusters(struct super_block *sb); - -/* fat/file.c */ -extern int fat_generic_ioctl(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg); -extern const struct file_operations fat_file_operations; -extern const struct inode_operations fat_file_inode_operations; -extern int fat_setattr(struct dentry * dentry, struct iattr * attr); -extern void fat_truncate(struct inode *inode); -extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, - struct kstat *stat); - -/* fat/inode.c */ -extern void fat_attach(struct inode *inode, loff_t i_pos); -extern void fat_detach(struct inode *inode); -extern struct inode *fat_iget(struct super_block *sb, loff_t i_pos); -extern struct inode *fat_build_inode(struct super_block *sb, - struct msdos_dir_entry *de, loff_t i_pos); -extern int fat_sync_inode(struct inode *inode); -extern int fat_fill_super(struct super_block *sb, void *data, int silent, - const struct inode_operations *fs_dir_inode_ops, int isvfat); - -extern int fat_flush_inodes(struct super_block *sb, struct inode *i1, - struct inode *i2); -/* fat/misc.c */ -extern void fat_fs_panic(struct super_block *s, const char *fmt, ...); -extern void fat_clusters_flush(struct super_block *sb); -extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster); -extern int date_dos2unix(unsigned short time, unsigned short date, int tz_utc); -extern void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date, - int tz_utc); -extern int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs); - -int fat_cache_init(void); -void fat_cache_destroy(void); - -#endif /* __KERNEL__ */ - -#endif +#endif /* !__KERNEL__ */ +#endif /* !_LINUX_MSDOS_FS_H */ -- cgit v1.2.3-18-g5258 From 7decd1cb0305b97243f283fa7f4baf5fe613edeb Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 6 Nov 2008 12:53:47 -0800 Subject: fat: Fix and cleanup timestamp conversion This cleans date_dos2unix()/fat_date_unix2dos() up. New code should be much more readable. And this fixes those old functions. Those doesn't handle 2100 correctly. 2100 isn't leap year, but old one handles it as leap year. Also, with this, centi sec is handled and is fixed. Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/dir.c | 6 ++- fs/fat/fat.h | 7 +-- fs/fat/inode.c | 34 ++++-------- fs/fat/misc.c | 148 +++++++++++++++++++++++++++++++++++++-------------- fs/fat/namei_msdos.c | 2 +- fs/fat/namei_vfat.c | 5 +- 6 files changed, 130 insertions(+), 72 deletions(-) diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 08b23ad25f1..a601c6d45bc 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -1089,6 +1089,7 @@ int fat_alloc_new_dir(struct inode *dir, struct timespec *ts) struct msdos_dir_entry *de; sector_t blknr; __le16 date, time; + u8 time_cs; int err, cluster; err = fat_alloc_clusters(dir, &cluster, 1); @@ -1102,7 +1103,7 @@ int fat_alloc_new_dir(struct inode *dir, struct timespec *ts) goto error_free; } - fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc); + fat_time_unix2fat(sbi, ts, &time, &date, &time_cs); de = (struct msdos_dir_entry *)bhs[0]->b_data; /* filling the new directory slots ("." and ".." entries) */ @@ -1112,13 +1113,14 @@ int fat_alloc_new_dir(struct inode *dir, struct timespec *ts) de[0].lcase = de[1].lcase = 0; de[0].time = de[1].time = time; de[0].date = de[1].date = date; - de[0].ctime_cs = de[1].ctime_cs = 0; if (sbi->options.isvfat) { /* extra timestamps */ de[0].ctime = de[1].ctime = time; + de[0].ctime_cs = de[1].ctime_cs = time_cs; de[0].adate = de[0].cdate = de[1].adate = de[1].cdate = date; } else { de[0].ctime = de[1].ctime = 0; + de[0].ctime_cs = de[1].ctime_cs = 0; de[0].adate = de[0].cdate = de[1].adate = de[1].cdate = 0; } de[0].start = cpu_to_le16(cluster); diff --git a/fs/fat/fat.h b/fs/fat/fat.h index 51f1c42ca5e..a2a570f8171 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h @@ -263,9 +263,10 @@ extern int fat_flush_inodes(struct super_block *sb, struct inode *i1, extern void fat_fs_panic(struct super_block *s, const char *fmt, ...); extern void fat_clusters_flush(struct super_block *sb); extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster); -extern int date_dos2unix(unsigned short time, unsigned short date, int tz_utc); -extern void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date, - int tz_utc); +extern void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec *ts, + __le16 __time, __le16 __date, u8 time_cs); +extern void fat_time_unix2fat(struct msdos_sb_info *sbi, struct timespec *ts, + __le16 *time, __le16 *date, u8 *time_cs); extern int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs); int fat_cache_init(void); diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 3921de2013a..079d9d5e0d3 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -381,22 +381,12 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de) MSDOS_I(inode)->i_attrs = de->attr & ATTR_UNUSED; inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1)) & ~((loff_t)sbi->cluster_size - 1)) >> 9; - inode->i_mtime.tv_sec = - date_dos2unix(le16_to_cpu(de->time), le16_to_cpu(de->date), - sbi->options.tz_utc); - inode->i_mtime.tv_nsec = 0; + + fat_time_fat2unix(sbi, &inode->i_mtime, de->time, de->date, 0); if (sbi->options.isvfat) { - int secs = de->ctime_cs / 100; - int csecs = de->ctime_cs % 100; - inode->i_ctime.tv_sec = - date_dos2unix(le16_to_cpu(de->ctime), - le16_to_cpu(de->cdate), - sbi->options.tz_utc) + secs; - inode->i_ctime.tv_nsec = csecs * 10000000; - inode->i_atime.tv_sec = - date_dos2unix(0, le16_to_cpu(de->adate), - sbi->options.tz_utc); - inode->i_atime.tv_nsec = 0; + fat_time_fat2unix(sbi, &inode->i_ctime, de->ctime, + de->cdate, de->ctime_cs); + fat_time_fat2unix(sbi, &inode->i_atime, 0, de->adate, 0); } else inode->i_ctime = inode->i_atime = inode->i_mtime; @@ -591,16 +581,14 @@ retry: raw_entry->attr = fat_attr(inode); raw_entry->start = cpu_to_le16(MSDOS_I(inode)->i_logstart); raw_entry->starthi = cpu_to_le16(MSDOS_I(inode)->i_logstart >> 16); - fat_date_unix2dos(inode->i_mtime.tv_sec, &raw_entry->time, - &raw_entry->date, sbi->options.tz_utc); + fat_time_unix2fat(sbi, &inode->i_mtime, &raw_entry->time, + &raw_entry->date, NULL); if (sbi->options.isvfat) { __le16 atime; - fat_date_unix2dos(inode->i_ctime.tv_sec, &raw_entry->ctime, - &raw_entry->cdate, sbi->options.tz_utc); - fat_date_unix2dos(inode->i_atime.tv_sec, &atime, - &raw_entry->adate, sbi->options.tz_utc); - raw_entry->ctime_cs = (inode->i_ctime.tv_sec & 1) * 100 + - inode->i_ctime.tv_nsec / 10000000; + fat_time_unix2fat(sbi, &inode->i_ctime, &raw_entry->ctime, + &raw_entry->cdate, &raw_entry->ctime_cs); + fat_time_unix2fat(sbi, &inode->i_atime, &atime, + &raw_entry->adate, NULL); } spin_unlock(&sbi->inode_hash_lock); mark_buffer_dirty(bh); diff --git a/fs/fat/misc.c b/fs/fat/misc.c index 91ad9be18ff..a191e79e66a 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c @@ -135,65 +135,131 @@ int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster) extern struct timezone sys_tz; +/* + * The epoch of FAT timestamp is 1980. + * : bits : value + * date: 0 - 4: day (1 - 31) + * date: 5 - 8: month (1 - 12) + * date: 9 - 15: year (0 - 127) from 1980 + * time: 0 - 4: sec (0 - 29) 2sec counts + * time: 5 - 10: min (0 - 59) + * time: 11 - 15: hour (0 - 23) + */ +#define SECS_PER_MIN 60 +#define SECS_PER_HOUR (60 * 60) +#define SECS_PER_DAY (SECS_PER_HOUR * 24) +#define UNIX_SECS_1980 315532800L +#if BITS_PER_LONG == 64 +#define UNIX_SECS_2108 4354819200L +#endif +/* days between 1.1.70 and 1.1.80 (2 leap days) */ +#define DAYS_DELTA (365 * 10 + 2) +/* 120 (2100 - 1980) isn't leap year */ +#define YEAR_2100 120 +#define IS_LEAP_YEAR(y) (!((y) & 3) && (y) != YEAR_2100) + /* Linear day numbers of the respective 1sts in non-leap years. */ -static int day_n[] = { - /* Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec */ - 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 0, 0, 0, 0 +static time_t days_in_year[] = { + /* Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec */ + 0, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 0, 0, 0, }; -/* Convert a MS-DOS time/date pair to a UNIX date (seconds since 1 1 70). */ -int date_dos2unix(unsigned short time, unsigned short date, int tz_utc) +/* Convert a FAT time/date pair to a UNIX date (seconds since 1 1 70). */ +void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec *ts, + __le16 __time, __le16 __date, u8 time_cs) { - int month, year, secs; + u16 time = le16_to_cpu(__time), date = le16_to_cpu(__date); + time_t second, day, leap_day, month, year; - /* - * first subtract and mask after that... Otherwise, if - * date == 0, bad things happen - */ - month = ((date >> 5) - 1) & 15; - year = date >> 9; - secs = (time & 31)*2+60*((time >> 5) & 63)+(time >> 11)*3600+86400* - ((date & 31)-1+day_n[month]+(year/4)+year*365-((year & 3) == 0 && - month < 2 ? 1 : 0)+3653); - /* days since 1.1.70 plus 80's leap day */ - if (!tz_utc) - secs += sys_tz.tz_minuteswest*60; - return secs; + year = date >> 9; + month = max(1, (date >> 5) & 0xf); + day = max(1, date & 0x1f) - 1; + + leap_day = (year + 3) / 4; + if (year > YEAR_2100) /* 2100 isn't leap year */ + leap_day--; + if (IS_LEAP_YEAR(year) && month > 2) + leap_day++; + + second = (time & 0x1f) << 1; + second += ((time >> 5) & 0x3f) * SECS_PER_MIN; + second += (time >> 11) * SECS_PER_HOUR; + second += (year * 365 + leap_day + + days_in_year[month] + day + + DAYS_DELTA) * SECS_PER_DAY; + + if (!sbi->options.tz_utc) + second += sys_tz.tz_minuteswest * SECS_PER_MIN; + + if (time_cs) { + ts->tv_sec = second + (time_cs / 100); + ts->tv_nsec = (time_cs % 100) * 10000000; + } else { + ts->tv_sec = second; + ts->tv_nsec = 0; + } } -/* Convert linear UNIX date to a MS-DOS time/date pair. */ -void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date, int tz_utc) +/* Convert linear UNIX date to a FAT time/date pair. */ +void fat_time_unix2fat(struct msdos_sb_info *sbi, struct timespec *ts, + __le16 *time, __le16 *date, u8 *time_cs) { - int day, year, nl_day, month; + time_t second = ts->tv_sec; + time_t day, leap_day, month, year; - if (!tz_utc) - unix_date -= sys_tz.tz_minuteswest*60; + if (!sbi->options.tz_utc) + second -= sys_tz.tz_minuteswest * SECS_PER_MIN; /* Jan 1 GMT 00:00:00 1980. But what about another time zone? */ - if (unix_date < 315532800) - unix_date = 315532800; - - *time = cpu_to_le16((unix_date % 60)/2+(((unix_date/60) % 60) << 5)+ - (((unix_date/3600) % 24) << 11)); - day = unix_date/86400-3652; - year = day/365; - if ((year+3)/4+365*year > day) + if (second < UNIX_SECS_1980) { + *time = 0; + *date = cpu_to_le16((0 << 9) | (1 << 5) | 1); + if (time_cs) + *time_cs = 0; + return; + } +#if BITS_PER_LONG == 64 + if (second >= UNIX_SECS_2108) { + *time = cpu_to_le16((23 << 11) | (59 << 5) | 29); + *date = cpu_to_le16((127 << 9) | (12 << 5) | 31); + if (time_cs) + *time_cs = 199; + return; + } +#endif + + day = second / SECS_PER_DAY - DAYS_DELTA; + year = day / 365; + leap_day = (year + 3) / 4; + if (year > YEAR_2100) /* 2100 isn't leap year */ + leap_day--; + if (year * 365 + leap_day > day) year--; - day -= (year+3)/4+365*year; - if (day == 59 && !(year & 3)) { - nl_day = day; + leap_day = (year + 3) / 4; + if (year > YEAR_2100) /* 2100 isn't leap year */ + leap_day--; + day -= year * 365 + leap_day; + + if (IS_LEAP_YEAR(year) && day == days_in_year[3]) { month = 2; } else { - nl_day = (year & 3) || day <= 59 ? day : day-1; - for (month = 0; month < 12; month++) { - if (day_n[month] > nl_day) + if (IS_LEAP_YEAR(year) && day > days_in_year[3]) + day--; + for (month = 1; month < 12; month++) { + if (days_in_year[month + 1] > day) break; } } - *date = cpu_to_le16(nl_day-day_n[month-1]+1+(month << 5)+(year << 9)); -} + day -= days_in_year[month]; -EXPORT_SYMBOL_GPL(fat_date_unix2dos); + *time = cpu_to_le16(((second / SECS_PER_HOUR) % 24) << 11 + | ((second / SECS_PER_MIN) % 60) << 5 + | (second % SECS_PER_MIN) >> 1); + *date = cpu_to_le16((year << 9) | (month << 5) | (day + 1)); + if (time_cs) + *time_cs = (ts->tv_sec & 1) * 100 + ts->tv_nsec / 10000000; +} +EXPORT_SYMBOL_GPL(fat_time_unix2fat); int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs) { diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index c0a4d5cd99b..e92e8158eba 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -247,7 +247,7 @@ static int msdos_add_entry(struct inode *dir, const unsigned char *name, if (is_hid) de.attr |= ATTR_HIDDEN; de.lcase = 0; - fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc); + fat_time_unix2fat(sbi, ts, &time, &date, NULL); de.cdate = de.adate = 0; de.ctime = 0; de.ctime_cs = 0; diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index facf3bf0211..1536bc3ca0f 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -568,6 +568,7 @@ static int vfat_build_slots(struct inode *dir, const unsigned char *name, unsigned char msdos_name[MSDOS_NAME]; wchar_t *uname; __le16 time, date; + u8 time_cs; int err, ulen, usize, i; loff_t offset; @@ -620,10 +621,10 @@ shortname: memcpy(de->name, msdos_name, MSDOS_NAME); de->attr = is_dir ? ATTR_DIR : ATTR_ARCH; de->lcase = lcase; - fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc); + fat_time_unix2fat(sbi, ts, &time, &date, &time_cs); de->time = de->ctime = time; de->date = de->cdate = de->adate = date; - de->ctime_cs = 0; + de->ctime_cs = time_cs; de->start = cpu_to_le16(cluster); de->starthi = cpu_to_le16(cluster >> 16); de->size = 0; -- cgit v1.2.3-18-g5258 From 53472bc8f810d2fb507593ea03703670506a668d Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 6 Nov 2008 12:53:47 -0800 Subject: fat: use generic_file_llseek() for directory Since fat_dir_ioctl() was already fixed (i.e. called under ->i_mutex), and __fat_readdir() doesn't take BKL anymore. So, BKL for ->llseek() is pointless, and we have to use generic_file_llseek(). Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/dir.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/fat/dir.c b/fs/fat/dir.c index a601c6d45bc..931dd28b528 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -832,6 +832,7 @@ static long fat_compat_dir_ioctl(struct file *filp, unsigned cmd, #endif /* CONFIG_COMPAT */ const struct file_operations fat_dir_operations = { + .llseek = generic_file_llseek, .read = generic_read_dir, .readdir = fat_readdir, .ioctl = fat_dir_ioctl, -- cgit v1.2.3-18-g5258 From 52e9d9f4b32a3bec91feb76c84e37b7dcffe5040 Mon Sep 17 00:00:00 2001 From: Darren Jenkins Date: Thu, 6 Nov 2008 12:53:48 -0800 Subject: fat: cleanup fat_parse_long() error handling Coverity CID 2332 & 2333 RESOURCE_LEAK In fat_search_long() if fat_parse_long() returns a -ve value we return without first freeing unicode. This patch free's them on this error path. The above was false positive on current tree, but this change is more clean, so apply as cleanup. [hirofumi@mail.parknet.co.jp: fix coding style] Signed-off-by: Darren Jenkins Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/dir.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 931dd28b528..140fc39e230 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -373,9 +373,10 @@ parse_record: if (de->attr == ATTR_EXT) { int status = fat_parse_long(inode, &cpos, &bh, &de, &unicode, &nr_slots); - if (status < 0) - return status; - else if (status == PARSE_INVALID) + if (status < 0) { + err = status; + goto end_of_dir; + } else if (status == PARSE_INVALID) continue; else if (status == PARSE_NOT_LONGNAME) goto parse_record; -- cgit v1.2.3-18-g5258 From d3dfa8228f87ab9960ab8b4718013d68e3c25a43 Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 6 Nov 2008 12:53:49 -0800 Subject: fat: improve fat_hash() fat_hash() is using the algorithm known as bad. Instead of it, this uses hash_32(). The following is the summary of test. old hash: hash func (1000 times): 33489 cycles total inodes in hash table: 70926 largest bucket contains: 696 smallest bucket contains: 54 new hash: hash func (1000 times): 33129 cycles total inodes in hash table: 70926 largest bucket contains: 315 smallest bucket contains: 236 Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/fat.h | 1 - fs/fat/inode.c | 18 +++++++----------- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/fs/fat/fat.h b/fs/fat/fat.h index a2a570f8171..2b8e94c3eef 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h @@ -43,7 +43,6 @@ struct fat_mount_options { #define FAT_HASH_BITS 8 #define FAT_HASH_SIZE (1UL << FAT_HASH_BITS) -#define FAT_HASH_MASK (FAT_HASH_SIZE-1) /* * MS-DOS file system in-core superblock data diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 079d9d5e0d3..f58cd48d98b 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include "fat.h" @@ -247,25 +248,21 @@ static void fat_hash_init(struct super_block *sb) INIT_HLIST_HEAD(&sbi->inode_hashtable[i]); } -static inline unsigned long fat_hash(struct super_block *sb, loff_t i_pos) +static inline unsigned long fat_hash(loff_t i_pos) { - unsigned long tmp = (unsigned long)i_pos | (unsigned long) sb; - tmp = tmp + (tmp >> FAT_HASH_BITS) + (tmp >> FAT_HASH_BITS * 2); - return tmp & FAT_HASH_MASK; + return hash_32(i_pos, FAT_HASH_BITS); } void fat_attach(struct inode *inode, loff_t i_pos) { - struct super_block *sb = inode->i_sb; - struct msdos_sb_info *sbi = MSDOS_SB(sb); + struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); + struct hlist_head *head = sbi->inode_hashtable + fat_hash(i_pos); spin_lock(&sbi->inode_hash_lock); MSDOS_I(inode)->i_pos = i_pos; - hlist_add_head(&MSDOS_I(inode)->i_fat_hash, - sbi->inode_hashtable + fat_hash(sb, i_pos)); + hlist_add_head(&MSDOS_I(inode)->i_fat_hash, head); spin_unlock(&sbi->inode_hash_lock); } - EXPORT_SYMBOL_GPL(fat_attach); void fat_detach(struct inode *inode) @@ -276,13 +273,12 @@ void fat_detach(struct inode *inode) hlist_del_init(&MSDOS_I(inode)->i_fat_hash); spin_unlock(&sbi->inode_hash_lock); } - EXPORT_SYMBOL_GPL(fat_detach); struct inode *fat_iget(struct super_block *sb, loff_t i_pos) { struct msdos_sb_info *sbi = MSDOS_SB(sb); - struct hlist_head *head = sbi->inode_hashtable + fat_hash(sb, i_pos); + struct hlist_head *head = sbi->inode_hashtable + fat_hash(i_pos); struct hlist_node *_p; struct msdos_inode_info *i; struct inode *inode = NULL; -- cgit v1.2.3-18-g5258 From 5e35dd4651002207948f10c576fc7d9bad448815 Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 6 Nov 2008 12:53:49 -0800 Subject: fat: Fix fat_ent_update_ptr() for FAT12 This fixes the missing update for bhs/nr_bhs in case the caller accessed from block boundary to first block of boundary. Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/fatent.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index 5b5f49061b7..13513992da3 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c @@ -317,10 +317,20 @@ static inline int fat_ent_update_ptr(struct super_block *sb, /* Is this fatent's blocks including this entry? */ if (!fatent->nr_bhs || bhs[0]->b_blocknr != blocknr) return 0; - /* Does this entry need the next block? */ - if (sbi->fat_bits == 12 && (offset + 1) >= sb->s_blocksize) { - if (fatent->nr_bhs != 2 || bhs[1]->b_blocknr != (blocknr + 1)) - return 0; + if (sbi->fat_bits == 12) { + if ((offset + 1) < sb->s_blocksize) { + /* This entry is on bhs[0]. */ + if (fatent->nr_bhs == 2) { + brelse(bhs[1]); + fatent->nr_bhs = 1; + } + } else { + /* This entry needs the next block. */ + if (fatent->nr_bhs != 2) + return 0; + if (bhs[1]->b_blocknr != (blocknr + 1)) + return 0; + } } ops->ent_set_ptr(fatent, offset); return 1; -- cgit v1.2.3-18-g5258 From a993b542bb4cd3e5a64863b7ef892bbebec2239b Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 6 Nov 2008 12:53:50 -0800 Subject: fat: use fat_detach() in fat_clear_inode() Use fat_detach() instead of opencoding it. Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/inode.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/fs/fat/inode.c b/fs/fat/inode.c index f58cd48d98b..8e1b75c63c7 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -429,13 +429,8 @@ static void fat_delete_inode(struct inode *inode) static void fat_clear_inode(struct inode *inode) { - struct super_block *sb = inode->i_sb; - struct msdos_sb_info *sbi = MSDOS_SB(sb); - - spin_lock(&sbi->inode_hash_lock); fat_cache_inval_inode(inode); - hlist_del_init(&MSDOS_I(inode)->i_fat_hash); - spin_unlock(&sbi->inode_hash_lock); + fat_detach(inode); } static void fat_write_super(struct super_block *sb) -- cgit v1.2.3-18-g5258 From 068f5ae05c51d2cee6b31cb3da06775dd83bd348 Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 6 Nov 2008 12:53:51 -0800 Subject: vfat: Fix vfat_find() error path in vfat_lookup() Current vfat_lookup() creates negetive dentry blindly if vfat_find() returned a error. It's wrong. If the error isn't -ENOENT, just return error. Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/namei_vfat.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 1536bc3ca0f..419deabfb9b 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -683,7 +683,7 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry, { struct super_block *sb = dir->i_sb; struct fat_slot_info sinfo; - struct inode *inode = NULL; + struct inode *inode; struct dentry *alias; int err, table; @@ -693,14 +693,18 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry, err = vfat_find(dir, &dentry->d_name, &sinfo); if (err) { - table++; + if (err == -ENOENT) { + table++; + inode = NULL; + goto out; + } goto error; } inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos); brelse(sinfo.bh); if (IS_ERR(inode)) { - unlock_super(sb); - return ERR_CAST(inode); + err = PTR_ERR(inode); + goto error; } alias = d_find_alias(inode); if (alias) { @@ -713,7 +717,7 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry, } } -error: +out: unlock_super(sb); dentry->d_op = &vfat_dentry_ops[table]; dentry->d_time = dentry->d_parent->d_inode->i_version; @@ -723,6 +727,10 @@ error: dentry->d_time = dentry->d_parent->d_inode->i_version; } return dentry; + +error: + unlock_super(sb); + return ERR_PTR(err); } static int vfat_create(struct inode *dir, struct dentry *dentry, int mode, -- cgit v1.2.3-18-g5258 From 1b52467243c7167b3a267ddbcbb14d550f28eb4a Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 6 Nov 2008 12:53:51 -0800 Subject: fat: Fix/Cleanup dcache handling for vfat - Add comments for handling dcache of vfat. - Separate case-sensitive case and case-insensitive to vfat_revalidate() and vfat_ci_revalidate(). vfat_revalidate() doesn't need to drop case-insensitive negative dentry on creation path. - Current code is missing to set ->d_revalidate to the negative dentry created by unlink/etc.. This sets ->d_revalidate always, and returns 1 for positive dentry. Now, we don't need to change ->d_op dynamically anymore, so this just uses sb->s_root->d_op to set ->d_op. - d_find_alias() may return DCACHE_DISCONNECTED dentry. It's not the interesting dentry there. This checks it. - Add missing LOOKUP_PARENT check. We don't need to drop the valid negative dentry for (LOOKUP_CREATE | LOOKUP_PARENT) lookup. - For consistent filename on creation path, this drops negative dentry if we can't see intent. Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/namei_vfat.c | 124 +++++++++++++++++++++++++++++++++------------------- 1 file changed, 80 insertions(+), 44 deletions(-) diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 419deabfb9b..d585398f9f6 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -24,27 +24,67 @@ #include #include "fat.h" -static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd) +/* + * If new entry was created in the parent, it could create the 8.3 + * alias (the shortname of logname). So, the parent may have the + * negative-dentry which matches the created 8.3 alias. + * + * If it happened, the negative dentry isn't actually negative + * anymore. So, drop it. + */ +static int vfat_revalidate_shortname(struct dentry *dentry) { int ret = 1; - - if (!dentry->d_inode && - nd && !(nd->flags & LOOKUP_CONTINUE) && (nd->flags & LOOKUP_CREATE)) - /* - * negative dentry is dropped, in order to make sure - * to use the name which a user desires if this is - * create path. - */ + spin_lock(&dentry->d_lock); + if (dentry->d_time != dentry->d_parent->d_inode->i_version) ret = 0; - else { - spin_lock(&dentry->d_lock); - if (dentry->d_time != dentry->d_parent->d_inode->i_version) - ret = 0; - spin_unlock(&dentry->d_lock); - } + spin_unlock(&dentry->d_lock); return ret; } +static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd) +{ + /* This is not negative dentry. Always valid. */ + if (dentry->d_inode) + return 1; + return vfat_revalidate_shortname(dentry); +} + +static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd) +{ + /* + * This is not negative dentry. Always valid. + * + * Note, rename() to existing directory entry will have ->d_inode, + * and will use existing name which isn't specified name by user. + * + * We may be able to drop this positive dentry here. But dropping + * positive dentry isn't good idea. So it's unsupported like + * rename("filename", "FILENAME") for now. + */ + if (dentry->d_inode) + return 1; + + /* + * This may be nfsd (or something), anyway, we can't see the + * intent of this. So, since this can be for creation, drop it. + */ + if (!nd) + return 0; + + /* + * Drop the negative dentry, in order to make sure to use the + * case sensitive name which is specified by user if this is + * for creation. + */ + if (!(nd->flags & (LOOKUP_CONTINUE | LOOKUP_PARENT))) { + if (nd->flags & LOOKUP_CREATE) + return 0; + } + + return vfat_revalidate_shortname(dentry); +} + /* returns the length of a struct qstr, ignoring trailing dots */ static unsigned int vfat_striptail_len(struct qstr *qstr) { @@ -126,25 +166,16 @@ static int vfat_cmp(struct dentry *dentry, struct qstr *a, struct qstr *b) return 1; } -static struct dentry_operations vfat_dentry_ops[4] = { - { - .d_hash = vfat_hashi, - .d_compare = vfat_cmpi, - }, - { - .d_revalidate = vfat_revalidate, - .d_hash = vfat_hashi, - .d_compare = vfat_cmpi, - }, - { - .d_hash = vfat_hash, - .d_compare = vfat_cmp, - }, - { - .d_revalidate = vfat_revalidate, - .d_hash = vfat_hash, - .d_compare = vfat_cmp, - } +static struct dentry_operations vfat_ci_dentry_ops = { + .d_revalidate = vfat_revalidate_ci, + .d_hash = vfat_hashi, + .d_compare = vfat_cmpi, +}; + +static struct dentry_operations vfat_dentry_ops = { + .d_revalidate = vfat_revalidate, + .d_hash = vfat_hash, + .d_compare = vfat_cmp, }; /* Characters that are undesirable in an MS-DOS file name */ @@ -685,29 +716,35 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry, struct fat_slot_info sinfo; struct inode *inode; struct dentry *alias; - int err, table; + int err; lock_super(sb); - table = (MSDOS_SB(sb)->options.name_check == 's') ? 2 : 0; - dentry->d_op = &vfat_dentry_ops[table]; err = vfat_find(dir, &dentry->d_name, &sinfo); if (err) { if (err == -ENOENT) { - table++; inode = NULL; goto out; } goto error; } + inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos); brelse(sinfo.bh); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto error; } + alias = d_find_alias(inode); - if (alias) { + if (alias && !(alias->d_flags & DCACHE_DISCONNECTED)) { + /* + * This inode has non DCACHE_DISCONNECTED dentry. This + * means, the user did ->lookup() by an another name + * (longname vs 8.3 alias of it) in past. + * + * Switch to new one for reason of locality if possible. + */ if (d_invalidate(alias) == 0) dput(alias); else { @@ -715,15 +752,14 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry, unlock_super(sb); return alias; } - } out: unlock_super(sb); - dentry->d_op = &vfat_dentry_ops[table]; + dentry->d_op = sb->s_root->d_op; dentry->d_time = dentry->d_parent->d_inode->i_version; dentry = d_splice_alias(inode, dentry); if (dentry) { - dentry->d_op = &vfat_dentry_ops[table]; + dentry->d_op = sb->s_root->d_op; dentry->d_time = dentry->d_parent->d_inode->i_version; } return dentry; @@ -1022,9 +1058,9 @@ static int vfat_fill_super(struct super_block *sb, void *data, int silent) return res; if (MSDOS_SB(sb)->options.name_check != 's') - sb->s_root->d_op = &vfat_dentry_ops[0]; + sb->s_root->d_op = &vfat_ci_dentry_ops; else - sb->s_root->d_op = &vfat_dentry_ops[2]; + sb->s_root->d_op = &vfat_dentry_ops; return 0; } -- cgit v1.2.3-18-g5258 From 1c13a243a461dd5b089d29e5d57f260c990e462c Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 6 Nov 2008 12:53:52 -0800 Subject: fat: Kill d_invalidate() in vfat_lookup() d_invalidate() for positive dentry doesn't work in some cases (vfsmount, nfsd, and maybe others). shrink_dcache_parent() by d_invalidate() is pointless for vfat usage at all. So, this kills it, and intead of it uses d_move(). To save old behavior, this returns alias simply for directory (don't change pwd, etc..). the directory lookup shouldn't be important for performance. Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/namei_vfat.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index d585398f9f6..bf326d4356a 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -745,13 +745,12 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry, * * Switch to new one for reason of locality if possible. */ - if (d_invalidate(alias) == 0) - dput(alias); - else { - iput(inode); - unlock_super(sb); - return alias; - } + BUG_ON(d_unhashed(alias)); + if (!S_ISDIR(inode->i_mode)) + d_move(alias, dentry); + iput(inode); + unlock_super(sb); + return alias; } out: unlock_super(sb); -- cgit v1.2.3-18-g5258 From 45cfbe354785a5bc9a38354754d6f7322f598001 Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 6 Nov 2008 12:53:53 -0800 Subject: fat: Cleanup msdos_lookup() Use same style with vfat_lookup(). Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/namei_msdos.c | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index e92e8158eba..7ba03a4acbe 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -203,33 +203,37 @@ static struct dentry *msdos_lookup(struct inode *dir, struct dentry *dentry, { struct super_block *sb = dir->i_sb; struct fat_slot_info sinfo; - struct inode *inode = NULL; - int res; - - dentry->d_op = &msdos_dentry_operations; + struct inode *inode; + int err; lock_super(sb); - res = msdos_find(dir, dentry->d_name.name, dentry->d_name.len, &sinfo); - if (res == -ENOENT) - goto add; - if (res < 0) - goto out; + + err = msdos_find(dir, dentry->d_name.name, dentry->d_name.len, &sinfo); + if (err) { + if (err == -ENOENT) { + inode = NULL; + goto out; + } + goto error; + } + inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos); brelse(sinfo.bh); if (IS_ERR(inode)) { - res = PTR_ERR(inode); - goto out; + err = PTR_ERR(inode); + goto error; } -add: - res = 0; +out: + unlock_super(sb); + dentry->d_op = &msdos_dentry_operations; dentry = d_splice_alias(inode, dentry); if (dentry) dentry->d_op = &msdos_dentry_operations; -out: + return dentry; + +error: unlock_super(sb); - if (!res) - return dentry; - return ERR_PTR(res); + return ERR_PTR(err); } /***** Creates a directory entry (name is already formatted). */ -- cgit v1.2.3-18-g5258 From 9c0aa1b87bf541affef519eb4879ce7c5a5941ae Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 6 Nov 2008 12:53:54 -0800 Subject: fat: Cleanup FAT attribute stuff This adds three helpers: fat_make_attrs() - makes FAT attributes from inode. fat_make_mode() - makes mode_t from FAT attributes. fat_save_attrs() - saves FAT attributes to inode. Then this replaces: MSDOS_MKMODE() by fat_make_mode(), fat_attr() by fat_make_attrs(), ->i_attrs = attr & ATTR_UNUSED by fat_save_attrs(). And for root inode, those is used with ATTR_DIR instead of bogus ATTR_NONE. Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/fat.h | 20 +++++++++++++++++++- fs/fat/file.c | 32 ++++++++++++-------------------- fs/fat/inode.c | 19 +++++++++---------- include/linux/msdos_fs.h | 5 ----- 4 files changed, 40 insertions(+), 36 deletions(-) diff --git a/fs/fat/fat.h b/fs/fat/fat.h index 2b8e94c3eef..3b4753a024e 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h @@ -117,14 +117,32 @@ static inline struct msdos_inode_info *MSDOS_I(struct inode *inode) return container_of(inode, struct msdos_inode_info, vfs_inode); } +/* Convert attribute bits and a mask to the UNIX mode. */ +static inline mode_t fat_make_mode(struct msdos_sb_info *sbi, + u8 attrs, mode_t mode) +{ + if (attrs & ATTR_RO) + mode &= ~S_IWUGO; + + if (attrs & ATTR_DIR) + return (mode & ~sbi->options.fs_dmask) | S_IFDIR; + else + return (mode & ~sbi->options.fs_fmask) | S_IFREG; +} + /* Return the FAT attribute byte for this inode */ -static inline u8 fat_attr(struct inode *inode) +static inline u8 fat_make_attrs(struct inode *inode) { return ((inode->i_mode & S_IWUGO) ? ATTR_NONE : ATTR_RO) | (S_ISDIR(inode->i_mode) ? ATTR_DIR : ATTR_NONE) | MSDOS_I(inode)->i_attrs; } +static inline void fat_save_attrs(struct inode *inode, u8 attrs) +{ + MSDOS_I(inode)->i_attrs = attrs & ATTR_UNUSED; +} + static inline unsigned char fat_checksum(const __u8 *name) { unsigned char s = name[0]; diff --git a/fs/fat/file.c b/fs/fat/file.c index b21973f266a..f5a7e907a8f 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -27,13 +27,7 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp, switch (cmd) { case FAT_IOCTL_GET_ATTRIBUTES: { - u32 attr; - - if (inode->i_ino == MSDOS_ROOT_INO) - attr = ATTR_DIR; - else - attr = fat_attr(inode); - + u32 attr = fat_make_attrs(inode); return put_user(attr, user_attr); } case FAT_IOCTL_SET_ATTRIBUTES: @@ -62,20 +56,16 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp, /* Merge in ATTR_VOLUME and ATTR_DIR */ attr |= (MSDOS_I(inode)->i_attrs & ATTR_VOLUME) | (is_dir ? ATTR_DIR : 0); - oldattr = fat_attr(inode); + oldattr = fat_make_attrs(inode); /* Equivalent to a chmod() */ ia.ia_valid = ATTR_MODE | ATTR_CTIME; ia.ia_ctime = current_fs_time(inode->i_sb); - if (is_dir) { - ia.ia_mode = MSDOS_MKMODE(attr, - S_IRWXUGO & ~sbi->options.fs_dmask) - | S_IFDIR; - } else { - ia.ia_mode = MSDOS_MKMODE(attr, - (S_IRUGO | S_IWUGO | (inode->i_mode & S_IXUGO)) - & ~sbi->options.fs_fmask) - | S_IFREG; + if (is_dir) + ia.ia_mode = fat_make_mode(sbi, attr, S_IRWXUGO); + else { + ia.ia_mode = fat_make_mode(sbi, attr, + S_IRUGO | S_IWUGO | (inode->i_mode & S_IXUGO)); } /* The root directory has no attributes */ @@ -115,7 +105,7 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp, inode->i_flags &= S_IMMUTABLE; } - MSDOS_I(inode)->i_attrs = attr & ATTR_UNUSED; + fat_save_attrs(inode, attr); mark_inode_dirty(inode); up: mnt_drop_write(filp->f_path.mnt); @@ -274,7 +264,7 @@ static int fat_sanitize_mode(const struct msdos_sb_info *sbi, /* * Note, the basic check is already done by a caller of - * (attr->ia_mode & ~MSDOS_VALID_MODE) + * (attr->ia_mode & ~FAT_VALID_MODE) */ if (S_ISREG(inode->i_mode)) @@ -314,6 +304,8 @@ static int fat_allow_set_time(struct msdos_sb_info *sbi, struct inode *inode) } #define TIMES_SET_FLAGS (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET) +/* valid file mode bits */ +#define FAT_VALID_MODE (S_IFREG | S_IFDIR | S_IRWXUGO) int fat_setattr(struct dentry *dentry, struct iattr *attr) { @@ -356,7 +348,7 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr) ((attr->ia_valid & ATTR_GID) && (attr->ia_gid != sbi->options.fs_gid)) || ((attr->ia_valid & ATTR_MODE) && - (attr->ia_mode & ~MSDOS_VALID_MODE))) + (attr->ia_mode & ~FAT_VALID_MODE))) error = -EPERM; if (error) { diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 8e1b75c63c7..7aaa21cf019 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -337,8 +337,7 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de) if ((de->attr & ATTR_DIR) && !IS_FREE(de->name)) { inode->i_generation &= ~1; - inode->i_mode = MSDOS_MKMODE(de->attr, - S_IRWXUGO & ~sbi->options.fs_dmask) | S_IFDIR; + inode->i_mode = fat_make_mode(sbi, de->attr, S_IRWXUGO); inode->i_op = sbi->dir_ops; inode->i_fop = &fat_dir_operations; @@ -355,10 +354,9 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de) inode->i_nlink = fat_subdirs(inode); } else { /* not a directory */ inode->i_generation |= 1; - inode->i_mode = MSDOS_MKMODE(de->attr, - ((sbi->options.showexec && !is_exec(de->name + 8)) - ? S_IRUGO|S_IWUGO : S_IRWXUGO) - & ~sbi->options.fs_fmask) | S_IFREG; + inode->i_mode = fat_make_mode(sbi, de->attr, + ((sbi->options.showexec && !is_exec(de->name + 8)) + ? S_IRUGO|S_IWUGO : S_IRWXUGO)); MSDOS_I(inode)->i_start = le16_to_cpu(de->start); if (sbi->fat_bits == 32) MSDOS_I(inode)->i_start |= (le16_to_cpu(de->starthi) << 16); @@ -374,7 +372,8 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de) if (sbi->options.sys_immutable) inode->i_flags |= S_IMMUTABLE; } - MSDOS_I(inode)->i_attrs = de->attr & ATTR_UNUSED; + fat_save_attrs(inode, de->attr); + inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1)) & ~((loff_t)sbi->cluster_size - 1)) >> 9; @@ -569,7 +568,7 @@ retry: raw_entry->size = 0; else raw_entry->size = cpu_to_le32(inode->i_size); - raw_entry->attr = fat_attr(inode); + raw_entry->attr = fat_make_attrs(inode); raw_entry->start = cpu_to_le16(MSDOS_I(inode)->i_logstart); raw_entry->starthi = cpu_to_le16(MSDOS_I(inode)->i_logstart >> 16); fat_time_unix2fat(sbi, &inode->i_mtime, &raw_entry->time, @@ -1105,7 +1104,7 @@ static int fat_read_root(struct inode *inode) inode->i_gid = sbi->options.fs_gid; inode->i_version++; inode->i_generation = 0; - inode->i_mode = (S_IRWXUGO & ~sbi->options.fs_dmask) | S_IFDIR; + inode->i_mode = fat_make_mode(sbi, ATTR_DIR, S_IRWXUGO); inode->i_op = sbi->dir_ops; inode->i_fop = &fat_dir_operations; if (sbi->fat_bits == 32) { @@ -1122,7 +1121,7 @@ static int fat_read_root(struct inode *inode) MSDOS_I(inode)->i_logstart = 0; MSDOS_I(inode)->mmu_private = inode->i_size; - MSDOS_I(inode)->i_attrs = ATTR_NONE; + fat_save_attrs(inode, ATTR_DIR); inode->i_mtime.tv_sec = inode->i_atime.tv_sec = inode->i_ctime.tv_sec = 0; inode->i_mtime.tv_nsec = inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = 0; inode->i_nlink = fat_subdirs(inode)+2; diff --git a/include/linux/msdos_fs.h b/include/linux/msdos_fs.h index 0982fb47a90..e0a9b207920 100644 --- a/include/linux/msdos_fs.h +++ b/include/linux/msdos_fs.h @@ -46,11 +46,6 @@ #define DELETED_FLAG 0xe5 /* marks file as deleted when in name[0] */ #define IS_FREE(n) (!*(n) || *(n) == DELETED_FLAG) -/* valid file mode bits */ -#define MSDOS_VALID_MODE (S_IFREG | S_IFDIR | S_IRWXU | S_IRWXG | S_IRWXO) -/* Convert attribute bits and a mask to the UNIX mode. */ -#define MSDOS_MKMODE(a, m) (m & (a & ATTR_RO ? S_IRUGO|S_IXUGO : S_IRWXUGO)) - #define MSDOS_NAME 11 /* maximum name length */ #define MSDOS_LONGNAME 256 /* maximum name length */ #define MSDOS_SLOTS 21 /* max # of slots for short and long names */ -- cgit v1.2.3-18-g5258 From 9183482f5d4a2de00f66641b974e7f351d41b675 Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 6 Nov 2008 12:53:54 -0800 Subject: fat: Fix ATTR_RO in the case of (~umask & S_WUGO) == 0 If inode->i_mode doesn't have S_WUGO, current code assumes it means ATTR_RO. However, if (~[ufd]mask & S_WUGO) == 0, inode->i_mode can't hold S_WUGO. Therefore the updated directory entry will always have ATTR_RO. This adds fat_mode_can_hold_ro() to check it. And if inode->i_mode can't hold, uses -i_attrs to hold ATTR_RO instead. With this, we don't set ATTR_RO unless users change it via ioctl() if (~[ufd]mask & S_WUGO) == 0. And on FAT_IOCTL_GET_ATTRIBUTES path, this adds ->i_mutex to it for not returning the partially updated attributes by FAT_IOCTL_SET_ATTRIBUTES to userland. Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/fat.h | 33 +++++++++++++++++++++++++++++---- fs/fat/file.c | 7 ++++++- 2 files changed, 35 insertions(+), 5 deletions(-) diff --git a/fs/fat/fat.h b/fs/fat/fat.h index 3b4753a024e..313b645b812 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h @@ -117,6 +117,25 @@ static inline struct msdos_inode_info *MSDOS_I(struct inode *inode) return container_of(inode, struct msdos_inode_info, vfs_inode); } +/* + * If ->i_mode can't hold S_IWUGO (i.e. ATTR_RO), we use ->i_attrs to + * save ATTR_RO instead of ->i_mode. + */ +static inline int fat_mode_can_hold_ro(struct inode *inode) +{ + struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); + mode_t mask; + + if (S_ISDIR(inode->i_mode)) + mask = ~sbi->options.fs_dmask; + else + mask = ~sbi->options.fs_fmask; + + if (!(mask & S_IWUGO)) + return 0; + return 1; +} + /* Convert attribute bits and a mask to the UNIX mode. */ static inline mode_t fat_make_mode(struct msdos_sb_info *sbi, u8 attrs, mode_t mode) @@ -133,14 +152,20 @@ static inline mode_t fat_make_mode(struct msdos_sb_info *sbi, /* Return the FAT attribute byte for this inode */ static inline u8 fat_make_attrs(struct inode *inode) { - return ((inode->i_mode & S_IWUGO) ? ATTR_NONE : ATTR_RO) | - (S_ISDIR(inode->i_mode) ? ATTR_DIR : ATTR_NONE) | - MSDOS_I(inode)->i_attrs; + u8 attrs = MSDOS_I(inode)->i_attrs; + if (S_ISDIR(inode->i_mode)) + attrs |= ATTR_DIR; + if (fat_mode_can_hold_ro(inode) && !(inode->i_mode & S_IWUGO)) + attrs |= ATTR_RO; + return attrs; } static inline void fat_save_attrs(struct inode *inode, u8 attrs) { - MSDOS_I(inode)->i_attrs = attrs & ATTR_UNUSED; + if (fat_mode_can_hold_ro(inode)) + MSDOS_I(inode)->i_attrs = attrs & ATTR_UNUSED; + else + MSDOS_I(inode)->i_attrs = attrs & (ATTR_UNUSED | ATTR_RO); } static inline unsigned char fat_checksum(const __u8 *name) diff --git a/fs/fat/file.c b/fs/fat/file.c index f5a7e907a8f..81b15c62380 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -27,7 +27,12 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp, switch (cmd) { case FAT_IOCTL_GET_ATTRIBUTES: { - u32 attr = fat_make_attrs(inode); + u32 attr; + + mutex_lock(&inode->i_mutex); + attr = fat_make_attrs(inode); + mutex_unlock(&inode->i_mutex); + return put_user(attr, user_attr); } case FAT_IOCTL_SET_ATTRIBUTES: -- cgit v1.2.3-18-g5258 From dfc209c0064efef5590f608056a48b61a5cac09c Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 6 Nov 2008 12:53:55 -0800 Subject: fat: Fix ATTR_RO for directory FAT has the ATTR_RO (read-only) attribute. But on Windows, the ATTR_RO of the directory will be just ignored actually, and is used by only applications as flag. E.g. it's setted for the customized folder by Explorer. http://msdn2.microsoft.com/en-us/library/aa969337.aspx This adds "rodir" option. If user specified it, ATTR_RO is used as read-only flag even if it's the directory. Otherwise, inode->i_mode is not used to hold ATTR_RO (i.e. fat_mode_can_save_ro() returns 0). Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/vfat.txt | 8 ++++++++ fs/fat/fat.h | 14 ++++++++++---- fs/fat/file.c | 16 ++++++++++++---- fs/fat/inode.c | 17 +++++++++++++---- 4 files changed, 43 insertions(+), 12 deletions(-) diff --git a/Documentation/filesystems/vfat.txt b/Documentation/filesystems/vfat.txt index dc9dc73d7d3..3a5ddc96901 100644 --- a/Documentation/filesystems/vfat.txt +++ b/Documentation/filesystems/vfat.txt @@ -124,6 +124,14 @@ sys_immutable -- If set, ATTR_SYS attribute on FAT is handled as flush -- If set, the filesystem will try to flush to disk more early than normal. Not set by default. +rodir -- FAT has the ATTR_RO (read-only) attribute. But on Windows, + the ATTR_RO of the directory will be just ignored actually, + and is used by only applications as flag. E.g. it's setted + for the customized folder. + + If you want to use ATTR_RO as read-only flag even for + the directory, set this option. + : 0,1,yes,no,true,false TODO diff --git a/fs/fat/fat.h b/fs/fat/fat.h index 313b645b812..e9dce5d8e7a 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h @@ -38,7 +38,8 @@ struct fat_mount_options { flush:1, /* write things quickly */ nocase:1, /* Does this need case conversion? 0=need case conversion*/ usefree:1, /* Use free_clusters for FAT32 */ - tz_utc:1; /* Filesystem timestamps are in UTC */ + tz_utc:1, /* Filesystem timestamps are in UTC */ + rodir:1; /* allow ATTR_RO for directory */ }; #define FAT_HASH_BITS 8 @@ -120,15 +121,20 @@ static inline struct msdos_inode_info *MSDOS_I(struct inode *inode) /* * If ->i_mode can't hold S_IWUGO (i.e. ATTR_RO), we use ->i_attrs to * save ATTR_RO instead of ->i_mode. + * + * If it's directory and !sbi->options.rodir, ATTR_RO isn't read-only + * bit, it's just used as flag for app. */ static inline int fat_mode_can_hold_ro(struct inode *inode) { struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); mode_t mask; - if (S_ISDIR(inode->i_mode)) + if (S_ISDIR(inode->i_mode)) { + if (!sbi->options.rodir) + return 0; mask = ~sbi->options.fs_dmask; - else + } else mask = ~sbi->options.fs_fmask; if (!(mask & S_IWUGO)) @@ -140,7 +146,7 @@ static inline int fat_mode_can_hold_ro(struct inode *inode) static inline mode_t fat_make_mode(struct msdos_sb_info *sbi, u8 attrs, mode_t mode) { - if (attrs & ATTR_RO) + if (attrs & ATTR_RO && !((attrs & ATTR_DIR) && !sbi->options.rodir)) mode &= ~S_IWUGO; if (attrs & ATTR_DIR) diff --git a/fs/fat/file.c b/fs/fat/file.c index 81b15c62380..f06a4e525ec 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -282,11 +282,18 @@ static int fat_sanitize_mode(const struct msdos_sb_info *sbi, /* * Of the r and x bits, all (subject to umask) must be present. Of the * w bits, either all (subject to umask) or none must be present. + * + * If fat_mode_can_hold_ro(inode) is false, can't change w bits. */ if ((perm & (S_IRUGO | S_IXUGO)) != (inode->i_mode & (S_IRUGO|S_IXUGO))) return -EPERM; - if ((perm & S_IWUGO) && ((perm & S_IWUGO) != (S_IWUGO & ~mask))) - return -EPERM; + if (fat_mode_can_hold_ro(inode)) { + if ((perm & S_IWUGO) && ((perm & S_IWUGO) != (S_IWUGO & ~mask))) + return -EPERM; + } else { + if ((perm & S_IWUGO) != (S_IWUGO & ~mask)) + return -EPERM; + } *mode_ptr &= S_IFMT | perm; @@ -316,8 +323,8 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr) { struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb); struct inode *inode = dentry->d_inode; - int error = 0; unsigned int ia_valid; + int error; /* * Expand the file. Since inode_setattr() updates ->i_size @@ -371,7 +378,8 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr) attr->ia_valid &= ~ATTR_MODE; } - error = inode_setattr(inode, attr); + if (attr->ia_valid) + error = inode_setattr(inode, attr); out: return error; } diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 7aaa21cf019..0da04e6d1e3 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -797,8 +797,10 @@ static int fat_show_options(struct seq_file *m, struct vfsmount *mnt) seq_puts(m, ",uni_xlate"); if (!opts->numtail) seq_puts(m, ",nonumtail"); + if (opts->rodir) + seq_puts(m, ",rodir"); } - if (sbi->options.flush) + if (opts->flush) seq_puts(m, ",flush"); if (opts->tz_utc) seq_puts(m, ",tz=UTC"); @@ -814,7 +816,7 @@ enum { Opt_charset, Opt_shortname_lower, Opt_shortname_win95, Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes, Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes, - Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_err, + Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_rodir, Opt_err, }; static const match_table_t fat_tokens = { @@ -886,6 +888,7 @@ static const match_table_t vfat_tokens = { {Opt_nonumtail_yes, "nonumtail=yes"}, {Opt_nonumtail_yes, "nonumtail=true"}, {Opt_nonumtail_yes, "nonumtail"}, + {Opt_rodir, "rodir"}, {Opt_err, NULL} }; @@ -905,10 +908,13 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug, opts->allow_utime = -1; opts->codepage = fat_default_codepage; opts->iocharset = fat_default_iocharset; - if (is_vfat) + if (is_vfat) { opts->shortname = VFAT_SFN_DISPLAY_LOWER|VFAT_SFN_CREATE_WIN95; - else + opts->rodir = 0; + } else { opts->shortname = 0; + opts->rodir = 1; + } opts->name_check = 'n'; opts->quiet = opts->showexec = opts->sys_immutable = opts->dotsOK = 0; opts->utf8 = opts->unicode_xlate = 0; @@ -1059,6 +1065,9 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug, case Opt_nonumtail_yes: /* empty or 1 or yes or true */ opts->numtail = 0; /* negated option */ break; + case Opt_rodir: + opts->rodir = 1; + break; /* obsolete mount options */ case Opt_obsolate: -- cgit v1.2.3-18-g5258 From fa93ca18a8b0da4e26bd9491ad144cd14d22f8ec Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 6 Nov 2008 12:53:56 -0800 Subject: fat: Fix _fat_bmap() race fat_get_cluster() assumes the requested blocknr isn't truncated during read. _fat_bmap() doesn't follow this rule. This protects it by ->i_mutex. Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/inode.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 0da04e6d1e3..be88208b83a 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -199,7 +199,14 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb, static sector_t _fat_bmap(struct address_space *mapping, sector_t block) { - return generic_block_bmap(mapping, block, fat_get_block); + sector_t blocknr; + + /* fat_get_cluster() assumes the requested blocknr isn't truncated. */ + mutex_lock(&mapping->host->i_mutex); + blocknr = generic_block_bmap(mapping, block, fat_get_block); + mutex_unlock(&mapping->host->i_mutex); + + return blocknr; } static const struct address_space_operations fat_aops = { -- cgit v1.2.3-18-g5258 From 0e75f5da06c05425f4b375eb981c4489fb2d9787 Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 6 Nov 2008 12:53:56 -0800 Subject: fat: Add printf attribute to fat_fs_panic() Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/fat.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/fat/fat.h b/fs/fat/fat.h index e9dce5d8e7a..a69f7f9757c 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h @@ -308,7 +308,8 @@ extern int fat_fill_super(struct super_block *sb, void *data, int silent, extern int fat_flush_inodes(struct super_block *sb, struct inode *i1, struct inode *i2); /* fat/misc.c */ -extern void fat_fs_panic(struct super_block *s, const char *fmt, ...); +extern void fat_fs_panic(struct super_block *s, const char *fmt, ...) + __attribute__ ((format (printf, 2, 3))) __cold; extern void fat_clusters_flush(struct super_block *sb); extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster); extern void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec *ts, -- cgit v1.2.3-18-g5258 From 2bdf67eb1631f30e2f3f5d49e4007c76e88877a8 Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 6 Nov 2008 12:53:57 -0800 Subject: fat: mmu_private race fix mmu_private is 64bits value, hence it's not atomic to update. So, the access rule for mmu_private is we must hold ->i_mutex. But, fat_get_block() path doesn't follow the rule on non-allocation path. This fixes by using i_size instead if non-allocation path. Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/cache.c | 23 ++++++++++++++++++----- fs/fat/dir.c | 2 +- fs/fat/fat.h | 6 ++++-- fs/fat/inode.c | 4 ++-- 4 files changed, 25 insertions(+), 10 deletions(-) diff --git a/fs/fat/cache.c b/fs/fat/cache.c index 589edde9053..b4260229808 100644 --- a/fs/fat/cache.c +++ b/fs/fat/cache.c @@ -293,10 +293,12 @@ static int fat_bmap_cluster(struct inode *inode, int cluster) } int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys, - unsigned long *mapped_blocks) + unsigned long *mapped_blocks, int create) { struct super_block *sb = inode->i_sb; struct msdos_sb_info *sbi = MSDOS_SB(sb); + const unsigned long blocksize = sb->s_blocksize; + const unsigned char blocksize_bits = sb->s_blocksize_bits; sector_t last_block; int cluster, offset; @@ -309,10 +311,21 @@ int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys, } return 0; } - last_block = (MSDOS_I(inode)->mmu_private + (sb->s_blocksize - 1)) - >> sb->s_blocksize_bits; - if (sector >= last_block) - return 0; + + last_block = (i_size_read(inode) + (blocksize - 1)) >> blocksize_bits; + if (sector >= last_block) { + if (!create) + return 0; + + /* + * ->mmu_private can access on only allocation path. + * (caller must hold ->i_mutex) + */ + last_block = (MSDOS_I(inode)->mmu_private + (blocksize - 1)) + >> blocksize_bits; + if (sector >= last_block) + return 0; + } cluster = sector >> (sbi->cluster_bits - sb->s_blocksize_bits); offset = sector & (sbi->sec_per_clus - 1); diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 140fc39e230..2ecaa17acdb 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -77,7 +77,7 @@ next: *bh = NULL; iblock = *pos >> sb->s_blocksize_bits; - err = fat_bmap(dir, iblock, &phys, &mapped_blocks); + err = fat_bmap(dir, iblock, &phys, &mapped_blocks, 0); if (err || !phys) return -1; /* beyond EOF or error */ diff --git a/fs/fat/fat.h b/fs/fat/fat.h index a69f7f9757c..4efc5038ed2 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h @@ -91,7 +91,9 @@ struct msdos_inode_info { /* for avoiding the race between fat_free() and fat_get_cluster() */ unsigned int cache_valid_id; - loff_t mmu_private; + /* NOTE: mmu_private is 64bits, so must hold ->i_mutex to access */ + loff_t mmu_private; /* physically allocated size */ + int i_start; /* first cluster or 0 */ int i_logstart; /* logical first cluster */ int i_attrs; /* unused attribute bits */ @@ -222,7 +224,7 @@ extern void fat_cache_inval_inode(struct inode *inode); extern int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus); extern int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys, - unsigned long *mapped_blocks); + unsigned long *mapped_blocks, int create); /* fat/dir.c */ extern const struct file_operations fat_dir_operations; diff --git a/fs/fat/inode.c b/fs/fat/inode.c index be88208b83a..9e37ad93c73 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -64,7 +64,7 @@ static inline int __fat_get_block(struct inode *inode, sector_t iblock, sector_t phys; int err, offset; - err = fat_bmap(inode, iblock, &phys, &mapped_blocks); + err = fat_bmap(inode, iblock, &phys, &mapped_blocks, create); if (err) return err; if (phys) { @@ -94,7 +94,7 @@ static inline int __fat_get_block(struct inode *inode, sector_t iblock, *max_blocks = min(mapped_blocks, *max_blocks); MSDOS_I(inode)->mmu_private += *max_blocks << sb->s_blocksize_bits; - err = fat_bmap(inode, iblock, &phys, &mapped_blocks); + err = fat_bmap(inode, iblock, &phys, &mapped_blocks, create); if (err) return err; -- cgit v1.2.3-18-g5258 From 9ca59f4c3d28df14a1545a1e2832f34a0a50e3ed Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 6 Nov 2008 12:53:57 -0800 Subject: fat: ->i_pos race fix i_pos is 64bits value, hence it's not atomic to update. Important place is fat_write_inode() only, other places without lock are just for printk(). This adds lock for "BITS_PER_LONG == 32" kernel. Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/inode.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 9e37ad93c73..bdd8fb7be2c 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -542,6 +542,20 @@ static int fat_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } +static inline loff_t fat_i_pos_read(struct msdos_sb_info *sbi, + struct inode *inode) +{ + loff_t i_pos; +#if BITS_PER_LONG == 32 + spin_lock(&sbi->inode_hash_lock); +#endif + i_pos = MSDOS_I(inode)->i_pos; +#if BITS_PER_LONG == 32 + spin_unlock(&sbi->inode_hash_lock); +#endif + return i_pos; +} + static int fat_write_inode(struct inode *inode, int wait) { struct super_block *sb = inode->i_sb; @@ -551,9 +565,12 @@ static int fat_write_inode(struct inode *inode, int wait) loff_t i_pos; int err; + if (inode->i_ino == MSDOS_ROOT_INO) + return 0; + retry: - i_pos = MSDOS_I(inode)->i_pos; - if (inode->i_ino == MSDOS_ROOT_INO || !i_pos) + i_pos = fat_i_pos_read(sbi, inode); + if (!i_pos) return 0; bh = sb_bread(sb, i_pos >> sbi->dir_per_block_bits); -- cgit v1.2.3-18-g5258 From c3302931db090d87e9015c3a7ce5c97a7dd90f78 Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 6 Nov 2008 12:53:58 -0800 Subject: fat: i_blocks warning fix blkcnt_t type depends on CONFIG_LSF. Use unsigned long long always for printk(). But lazy to type it, so add "llu" and use it. Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/dir.c | 2 +- fs/fat/fat.h | 3 +++ fs/fat/fatent.c | 5 ++--- fs/fat/misc.c | 5 +++-- 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 2ecaa17acdb..67e05835709 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -86,7 +86,7 @@ next: *bh = sb_bread(sb, phys); if (*bh == NULL) { printk(KERN_ERR "FAT: Directory bread(block %llu) failed\n", - (unsigned long long)phys); + (llu)phys); /* skip this block */ *pos = (iblock + 1) << sb->s_blocksize_bits; goto next; diff --git a/fs/fat/fat.h b/fs/fat/fat.h index 4efc5038ed2..ea440d65819 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h @@ -323,4 +323,7 @@ extern int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs); int fat_cache_init(void); void fat_cache_destroy(void); +/* helper for printk */ +typedef unsigned long long llu; + #endif /* !_FAT_H */ diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index 13513992da3..da6eea47872 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c @@ -93,8 +93,7 @@ static int fat12_ent_bread(struct super_block *sb, struct fat_entry *fatent, err_brelse: brelse(bhs[0]); err: - printk(KERN_ERR "FAT: FAT read failed (blocknr %llu)\n", - (unsigned long long)blocknr); + printk(KERN_ERR "FAT: FAT read failed (blocknr %llu)\n", (llu)blocknr); return -EIO; } @@ -107,7 +106,7 @@ static int fat_ent_bread(struct super_block *sb, struct fat_entry *fatent, fatent->bhs[0] = sb_bread(sb, blocknr); if (!fatent->bhs[0]) { printk(KERN_ERR "FAT: FAT read failed (blocknr %llu)\n", - (unsigned long long)blocknr); + (llu)blocknr); return -EIO; } fatent->nr_bhs = 1; diff --git a/fs/fat/misc.c b/fs/fat/misc.c index a191e79e66a..ac39ebcc149 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c @@ -124,8 +124,9 @@ int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster) mark_inode_dirty(inode); } if (new_fclus != (inode->i_blocks >> (sbi->cluster_bits - 9))) { - fat_fs_panic(sb, "clusters badly computed (%d != %lu)", - new_fclus, inode->i_blocks >> (sbi->cluster_bits - 9)); + fat_fs_panic(sb, "clusters badly computed (%d != %llu)", + new_fclus, + (llu)(inode->i_blocks >> (sbi->cluster_bits - 9))); fat_cache_inval_inode(inode); } inode->i_blocks += nr_cluster << (sbi->cluster_bits - 9); -- cgit v1.2.3-18-g5258 From 7597bc94d6f3bdccb086ac7f2ad91292fdaee2a4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 5 Nov 2008 17:38:47 +0000 Subject: Fix accidental implicit cast in HR-timer conversion Fix the hrtimer_add_expires_ns() function. It should take a 'u64 ns' argument, but rather takes an 'unsigned long ns' argument - which might only be 32-bits. On FRV, this results in the kernel locking up because hrtimer_forward() passes the result of a 64-bit multiplication to this function, for which the compiler discards the top 32-bits - something that didn't happen when ktime_add_ns() was called directly. Signed-off-by: David Howells Acked-by: Arjan van de Ven Signed-off-by: Linus Torvalds --- include/linux/hrtimer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 2b3645b1acf..07e510a3b00 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -239,7 +239,7 @@ static inline void hrtimer_add_expires(struct hrtimer *timer, ktime_t time) timer->_softexpires = ktime_add_safe(timer->_softexpires, time); } -static inline void hrtimer_add_expires_ns(struct hrtimer *timer, unsigned long ns) +static inline void hrtimer_add_expires_ns(struct hrtimer *timer, u64 ns) { timer->_expires = ktime_add_ns(timer->_expires, ns); timer->_softexpires = ktime_add_ns(timer->_softexpires, ns); -- cgit v1.2.3-18-g5258 From 3b53fbf4314594fa04544b02b2fc6e607912da18 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 6 Nov 2008 15:45:32 -0800 Subject: net: Fix recursive descent in __scm_destroy(). __scm_destroy() walks the list of file descriptors in the scm_fp_list pointed to by the scm_cookie argument. Those, in turn, can close sockets and invoke __scm_destroy() again. There is nothing which limits how deeply this can occur. The idea for how to fix this is from Linus. Basically, we do all of the fput()s at the top level by collecting all of the scm_fp_list objects hit by an fput(). Inside of the initial __scm_destroy() we keep running the list until it is empty. Signed-off-by: David S. Miller --- include/linux/sched.h | 2 ++ include/net/scm.h | 5 +++-- net/core/scm.c | 24 +++++++++++++++++++++--- 3 files changed, 26 insertions(+), 5 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index b483f39a711..295b7c756ca 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1349,6 +1349,8 @@ struct task_struct { */ unsigned long timer_slack_ns; unsigned long default_timer_slack_ns; + + struct list_head *scm_work_list; }; /* diff --git a/include/net/scm.h b/include/net/scm.h index 06df126103c..33e9986beb8 100644 --- a/include/net/scm.h +++ b/include/net/scm.h @@ -14,8 +14,9 @@ struct scm_fp_list { - int count; - struct file *fp[SCM_MAX_FD]; + struct list_head list; + int count; + struct file *fp[SCM_MAX_FD]; }; struct scm_cookie diff --git a/net/core/scm.c b/net/core/scm.c index 10f5c65f6a4..ab242cc1acc 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -75,6 +75,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp) if (!fpl) return -ENOMEM; *fplp = fpl; + INIT_LIST_HEAD(&fpl->list); fpl->count = 0; } fpp = &fpl->fp[fpl->count]; @@ -106,9 +107,25 @@ void __scm_destroy(struct scm_cookie *scm) if (fpl) { scm->fp = NULL; - for (i=fpl->count-1; i>=0; i--) - fput(fpl->fp[i]); - kfree(fpl); + if (current->scm_work_list) { + list_add_tail(&fpl->list, current->scm_work_list); + } else { + LIST_HEAD(work_list); + + current->scm_work_list = &work_list; + + list_add(&fpl->list, &work_list); + while (!list_empty(&work_list)) { + fpl = list_first_entry(&work_list, struct scm_fp_list, list); + + list_del(&fpl->list); + for (i=fpl->count-1; i>=0; i--) + fput(fpl->fp[i]); + kfree(fpl); + } + + current->scm_work_list = NULL; + } } } @@ -284,6 +301,7 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl) new_fpl = kmalloc(sizeof(*fpl), GFP_KERNEL); if (new_fpl) { + INIT_LIST_HEAD(&new_fpl->list); for (i=fpl->count-1; i>=0; i--) get_file(fpl->fp[i]); memcpy(new_fpl, fpl, sizeof(*fpl)); -- cgit v1.2.3-18-g5258 From fcef7836a31c6432b41a38867d413ed3d6aa8261 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 6 Nov 2008 12:05:21 -0800 Subject: alsa: fix snd_BUG_on() and friends sound/pci/pcxhr/pcxhr_core.c: In function 'pcxhr_set_pipe_cmd_params': sound/pci/pcxhr/pcxhr_core.c:700: warning: statement with no effect sound/pci/pcxhr/pcxhr_core.c:706: warning: statement with no effect sound/pci/pcxhr/pcxhr_core.c:710: warning: statement with no effect Due to try to fix this, and be more conventional about the empty stubs. Signed-off-by: Andrew Morton Signed-off-by: Takashi Iwai --- include/sound/core.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/include/sound/core.h b/include/sound/core.h index e5eec5f7350..7e558947268 100644 --- a/include/sound/core.h +++ b/include/sound/core.h @@ -388,9 +388,13 @@ void snd_verbose_printd(const char *file, int line, const char *format, ...) #else /* !CONFIG_SND_DEBUG */ -#define snd_printd(fmt, args...) /* nothing */ -#define snd_BUG() /* nothing */ -#define snd_BUG_ON(cond) ({/*(void)(cond);*/ 0;}) /* always false */ +#define snd_printd(fmt, args...) do { } while (0) +#define snd_BUG() do { } while (0) +static inline int __snd_bug_on(void) +{ + return 0; +} +#define snd_BUG_ON(cond) __snd_bug_on() /* always false */ #endif /* CONFIG_SND_DEBUG */ -- cgit v1.2.3-18-g5258 From f29c9b1ccb52904ee442a933cf3dee628f9f4e62 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 6 Nov 2008 09:45:16 +0800 Subject: sched: fix a bug in sched domain degenerate Impact: re-add incorrectly eliminated sched domain layers (1) on i386 with SCHED_SMT and SCHED_MC enabled # mount -t cgroup -o cpuset xxx /mnt # echo 0 > /mnt/cpuset.sched_load_balance # mkdir /mnt/0 # echo 0 > /mnt/0/cpuset.cpus # dmesg CPU0 attaching sched-domain: domain 0: span 0 level CPU groups: 0 (2) on i386 with SCHED_MC enabled but SCHED_SMT disabled # same with (1) # dmesg CPU0 attaching NULL sched-domain. The bug is that some sched domains may be skipped unintentionally when degenerating (optimizing) sched domains. Signed-off-by: Li Zefan Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/sched.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index 82cc839c921..4c7e2bcdfa8 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -6877,15 +6877,17 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu) struct sched_domain *tmp; /* Remove the sched domains which do not contribute to scheduling. */ - for (tmp = sd; tmp; tmp = tmp->parent) { + for (tmp = sd; tmp; ) { struct sched_domain *parent = tmp->parent; if (!parent) break; + if (sd_parent_degenerate(tmp, parent)) { tmp->parent = parent->parent; if (parent->parent) parent->parent->child = tmp; - } + } else + tmp = tmp->parent; } if (sd && sd_degenerate(sd)) { -- cgit v1.2.3-18-g5258 From ca3273f9646694e0419cfb9d6c12deb1c9aff27c Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 7 Nov 2008 14:47:21 +0800 Subject: sched: fix memory leak in a failure path Impact: fix rare memory leak in the sched-domains manual reconfiguration code In the failure path, rd is not attached to a sched domain, so it causes a leak. Signed-off-by: Li Zefan Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/sched.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/sched.c b/kernel/sched.c index 4c7e2bcdfa8..57c933ffbee 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -7676,6 +7676,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map, error: free_sched_groups(cpu_map, tmpmask); SCHED_CPUMASK_FREE((void *)allmasks); + kfree(rd); return -ENOMEM; #endif } -- cgit v1.2.3-18-g5258 From 9b46333406b9cb3397ab538485a4d57c316af0ff Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 28 Oct 2008 19:22:34 +1100 Subject: vmap: cope with vm_unmap_aliases before vmalloc_init() Xen can end up calling vm_unmap_aliases() before vmalloc_init() has been called. In this case its safe to make it a simple no-op. Signed-off-by: Jeremy Fitzhardinge Cc: Linux Memory Management List Cc: Nick Piggin Signed-off-by: Ingo Molnar --- mm/vmalloc.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 66fad3fc02b..ba6b0f5f7fa 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -592,6 +592,8 @@ static void free_unmap_vmap_area_addr(unsigned long addr) #define VMAP_BLOCK_SIZE (VMAP_BBMAP_BITS * PAGE_SIZE) +static bool vmap_initialized __read_mostly = false; + struct vmap_block_queue { spinlock_t lock; struct list_head free; @@ -828,6 +830,9 @@ void vm_unmap_aliases(void) int cpu; int flush = 0; + if (unlikely(!vmap_initialized)) + return; + for_each_possible_cpu(cpu) { struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu); struct vmap_block *vb; @@ -942,6 +947,8 @@ void __init vmalloc_init(void) INIT_LIST_HEAD(&vbq->dirty); vbq->nr_dirty = 0; } + + vmap_initialized = true; } void unmap_kernel_range(unsigned long addr, unsigned long size) -- cgit v1.2.3-18-g5258 From d05fdf316067cd311d5e7add08da26ded8a58080 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 28 Oct 2008 19:23:06 +1100 Subject: xen: make sure stray alias mappings are gone before pinning Xen requires that all mappings of pagetable pages are read-only, so that they can't be updated illegally. As a result, if a page is being turned into a pagetable page, we need to make sure all its mappings are RO. If the page had been used for ioremap or vmalloc, it may still have left over mappings as a result of not having been lazily unmapped. This change makes sure we explicitly mop them all up before pinning the page. Unlike aliases created by kmap, the there can be vmalloc aliases even for non-high pages, so we must do the flush unconditionally. Signed-off-by: Jeremy Fitzhardinge Cc: Linux Memory Management List Cc: Nick Piggin Signed-off-by: Ingo Molnar --- arch/x86/xen/enlighten.c | 5 +++-- arch/x86/xen/mmu.c | 9 ++++++--- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index b61534c7a4c..5e4686d70f6 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -863,15 +863,16 @@ static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned l if (PagePinned(virt_to_page(mm->pgd))) { SetPagePinned(page); + vm_unmap_aliases(); if (!PageHighMem(page)) { make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn))); if (level == PT_PTE && USE_SPLIT_PTLOCKS) pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); - } else + } else { /* make sure there are no stray mappings of this page */ kmap_flush_unused(); - vm_unmap_aliases(); + } } } diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index aba77b2b7d1..89f3b6edc65 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -850,13 +850,16 @@ static int xen_pin_page(struct mm_struct *mm, struct page *page, read-only, and can be pinned. */ static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd) { + vm_unmap_aliases(); + xen_mc_batch(); - if (xen_pgd_walk(mm, xen_pin_page, USER_LIMIT)) { - /* re-enable interrupts for kmap_flush_unused */ + if (xen_pgd_walk(mm, xen_pin_page, USER_LIMIT)) { + /* re-enable interrupts for flushing */ xen_mc_issue(0); + kmap_flush_unused(); - vm_unmap_aliases(); + xen_mc_batch(); } -- cgit v1.2.3-18-g5258 From cd83e42c6b0413dcbb548c2ead799111ff7e6a13 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 7 Nov 2008 11:12:29 +1100 Subject: cpumask: new API, v2 - add cpumask_of() - add free_bootmem_cpumask_var() Signed-off-by: Rusty Russell Signed-off-by: Ingo Molnar --- include/linux/cpumask.h | 11 +++++++++++ lib/cpumask.c | 5 +++++ 2 files changed, 16 insertions(+) diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index c8e66619097..31caa1bc620 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -893,6 +893,12 @@ static inline void cpumask_copy(struct cpumask *dstp, */ #define cpumask_any_and(mask1, mask2) cpumask_first_and((mask1), (mask2)) +/** + * cpumask_of - the cpumask containing just a given cpu + * @cpu: the cpu (<= nr_cpu_ids) + */ +#define cpumask_of(cpu) (get_cpu_mask(cpu)) + /** * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask * * @bitmap: the bitmap @@ -946,6 +952,7 @@ typedef struct cpumask *cpumask_var_t; bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags); void alloc_bootmem_cpumask_var(cpumask_var_t *mask); void free_cpumask_var(cpumask_var_t mask); +void free_bootmem_cpumask_var(cpumask_var_t mask); #else typedef struct cpumask cpumask_var_t[1]; @@ -962,6 +969,10 @@ static inline void alloc_bootmem_cpumask_var(cpumask_var_t *mask) static inline void free_cpumask_var(cpumask_var_t mask) { } + +static inline void free_bootmem_cpumask_var(cpumask_var_t mask) +{ +} #endif /* CONFIG_CPUMASK_OFFSTACK */ /* The pointer versions of the maps, these will become the primary versions. */ diff --git a/lib/cpumask.c b/lib/cpumask.c index 5ceb4211c83..2ebc3a9a746 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -107,4 +107,9 @@ void free_cpumask_var(cpumask_var_t mask) kfree(mask); } EXPORT_SYMBOL(free_cpumask_var); + +void free_bootmem_cpumask_var(cpumask_var_t mask) +{ + free_bootmem((unsigned long)mask, cpumask_size()); +} #endif -- cgit v1.2.3-18-g5258 From ed9b3e3379731e9f9d2f73f3d7fd9e7d2ce3df4a Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 7 Nov 2008 09:06:45 -0500 Subject: ext4: Mark the buffer_heads as dirty and uptodate after prepare_write We need to make sure we mark the buffer_heads as dirty and uptodate so that block_write_full_page write them correctly. This fixes mmap corruptions that can occur in low memory situations. Signed-off-by: Aneesh Kumar K.V Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 5a130b56f1c..be21a5ae33c 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2329,6 +2329,8 @@ static int ext4_da_writepage(struct page *page, unlock_page(page); return 0; } + /* now mark the buffer_heads as dirty and uptodate */ + block_commit_write(page, 0, PAGE_CACHE_SIZE); } if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) -- cgit v1.2.3-18-g5258 From 23712a9c28b9f80a8cf70c8490358d5f562d2465 Mon Sep 17 00:00:00 2001 From: Frederic Bohe Date: Fri, 7 Nov 2008 09:21:01 -0500 Subject: ext4: add checksum calculation when clearing UNINIT flag in ext4_new_inode When initializing an uninitialized block group in ext4_new_inode(), its block group checksum must be re-calculated. This fixes a race when several threads try to allocate a new inode in an UNINIT'd group. There is some question whether we need to be initializing the block bitmap in ext4_new_inode() at all, but for now, if we are going to init the block group, let's eliminate the race. Signed-off-by: Frederic Bohe Signed-off-by: "Theodore Ts'o" --- fs/ext4/ialloc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index fe34d74cfb1..2a117e286e5 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -718,6 +718,8 @@ got: gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); free = ext4_free_blocks_after_init(sb, group, gdp); gdp->bg_free_blocks_count = cpu_to_le16(free); + gdp->bg_checksum = ext4_group_desc_csum(sbi, group, + gdp); } spin_unlock(sb_bgl_lock(sbi, group)); -- cgit v1.2.3-18-g5258 From 14800984706bf6936bbec5187f736e928be5c218 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Fri, 7 Nov 2008 15:26:50 +0100 Subject: sched: fine-tune SD_MC_INIT Tune SD_MC_INIT the same way as SD_CPU_INIT: unset SD_BALANCE_NEWIDLE, and set SD_WAKE_BALANCE. This improves vmark by 5%: vmark 132102 125968 125497 messages/sec avg 127855.66 .984 vmark 139404 131719 131272 messages/sec avg 134131.66 1.033 Signed-off-by: Mike Galbraith Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar # *DOCUMENTATION* --- include/linux/topology.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/topology.h b/include/linux/topology.h index 34a7ee0ebed..a8d840595b7 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -120,10 +120,10 @@ void arch_update_cpu_topology(void); .wake_idx = 1, \ .forkexec_idx = 1, \ .flags = SD_LOAD_BALANCE \ - | SD_BALANCE_NEWIDLE \ | SD_BALANCE_FORK \ | SD_BALANCE_EXEC \ | SD_WAKE_AFFINE \ + | SD_WAKE_BALANCE \ | SD_SHARE_PKG_RESOURCES\ | BALANCE_FOR_MC_POWER, \ .last_balance = jiffies, \ -- cgit v1.2.3-18-g5258 From 52c642f33b14bfa1b00ef2b68296effb34a573f3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 7 Nov 2008 16:09:23 +0100 Subject: sched: fine-tune SD_SIBLING_INIT fine-tune the HT sched-domains parameters as well. On a HT capable box, this increases lat_ctx performance from 23.87 usecs to 1.49 usecs: # before $ ./lat_ctx -s 0 2 "size=0k ovr=1.89 2 23.87 # after $ ./lat_ctx -s 0 2 "size=0k ovr=1.84 2 1.49 Signed-off-by: Ingo Molnar --- include/linux/topology.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/topology.h b/include/linux/topology.h index a8d840595b7..117f1b7405c 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -99,7 +99,7 @@ void arch_update_cpu_topology(void); | SD_BALANCE_FORK \ | SD_BALANCE_EXEC \ | SD_WAKE_AFFINE \ - | SD_WAKE_IDLE \ + | SD_WAKE_BALANCE \ | SD_SHARE_CPUPOWER, \ .last_balance = jiffies, \ .balance_interval = 1, \ -- cgit v1.2.3-18-g5258 From 8638545c3668231675dcf8f46afa7ed5930a6b02 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Fri, 7 Nov 2008 16:03:46 +0000 Subject: trivial: dmi_scan typo As we've lost our trivial maintainer for the moment I'll send this directly. Only touches a comment Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- drivers/firmware/dmi_scan.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c index 3e526b6d00c..8daf4793ac3 100644 --- a/drivers/firmware/dmi_scan.c +++ b/drivers/firmware/dmi_scan.c @@ -81,9 +81,9 @@ static void dmi_table(u8 *buf, int len, int num, const struct dmi_header *dm = (const struct dmi_header *)data; /* - * We want to know the total length (formated area and strings) - * before decoding to make sure we won't run off the table in - * dmi_decode or dmi_string + * We want to know the total length (formatted area and + * strings) before decoding to make sure we won't run off the + * table in dmi_decode or dmi_string */ data += dm->length; while ((data - buf < len - 1) && (data[0] || data[1])) -- cgit v1.2.3-18-g5258 From 54e7ff9d6249ba88e393d7fbc8008da9279723be Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Fri, 7 Nov 2008 16:07:02 +0000 Subject: trivial: MPT fusion - remove long dead code This triggers false bug reports as it does a bogus kmalloc with locks held but is never really compiled into the kernel. Closes #8329 Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- drivers/message/fusion/mptlan.c | 108 ---------------------------------------- 1 file changed, 108 deletions(-) diff --git a/drivers/message/fusion/mptlan.c b/drivers/message/fusion/mptlan.c index a1abf95cf75..603ffd008c7 100644 --- a/drivers/message/fusion/mptlan.c +++ b/drivers/message/fusion/mptlan.c @@ -77,12 +77,6 @@ MODULE_VERSION(my_VERSION); * Fusion MPT LAN private structures */ -struct NAA_Hosed { - u16 NAA; - u8 ieee[FC_ALEN]; - struct NAA_Hosed *next; -}; - struct BufferControl { struct sk_buff *skb; dma_addr_t dma; @@ -159,11 +153,6 @@ static u8 LanCtx = MPT_MAX_PROTOCOL_DRIVERS; static u32 max_buckets_out = 127; static u32 tx_max_out_p = 127 - 16; -#ifdef QLOGIC_NAA_WORKAROUND -static struct NAA_Hosed *mpt_bad_naa = NULL; -DEFINE_RWLOCK(bad_naa_lock); -#endif - /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ /** * lan_reply - Handle all data sent from the hardware. @@ -780,30 +769,6 @@ mpt_lan_sdu_send (struct sk_buff *skb, struct net_device *dev) // ctx, skb, skb->data)); mac = skb_mac_header(skb); -#ifdef QLOGIC_NAA_WORKAROUND -{ - struct NAA_Hosed *nh; - - /* Munge the NAA for Tx packets to QLogic boards, which don't follow - RFC 2625. The longer I look at this, the more my opinion of Qlogic - drops. */ - read_lock_irq(&bad_naa_lock); - for (nh = mpt_bad_naa; nh != NULL; nh=nh->next) { - if ((nh->ieee[0] == mac[0]) && - (nh->ieee[1] == mac[1]) && - (nh->ieee[2] == mac[2]) && - (nh->ieee[3] == mac[3]) && - (nh->ieee[4] == mac[4]) && - (nh->ieee[5] == mac[5])) { - cur_naa = nh->NAA; - dlprintk ((KERN_INFO "mptlan/sdu_send: using NAA value " - "= %04x.\n", cur_naa)); - break; - } - } - read_unlock_irq(&bad_naa_lock); -} -#endif pTrans->TransactionDetails[0] = cpu_to_le32((cur_naa << 16) | (mac[0] << 8) | @@ -1572,79 +1537,6 @@ mpt_lan_type_trans(struct sk_buff *skb, struct net_device *dev) fcllc = (struct fcllc *)skb->data; -#ifdef QLOGIC_NAA_WORKAROUND -{ - u16 source_naa = fch->stype, found = 0; - - /* Workaround for QLogic not following RFC 2625 in regards to the NAA - value. */ - - if ((source_naa & 0xF000) == 0) - source_naa = swab16(source_naa); - - if (fcllc->ethertype == htons(ETH_P_ARP)) - dlprintk ((KERN_INFO "mptlan/type_trans: got arp req/rep w/ naa of " - "%04x.\n", source_naa)); - - if ((fcllc->ethertype == htons(ETH_P_ARP)) && - ((source_naa >> 12) != MPT_LAN_NAA_RFC2625)){ - struct NAA_Hosed *nh, *prevnh; - int i; - - dlprintk ((KERN_INFO "mptlan/type_trans: ARP Req/Rep from " - "system with non-RFC 2625 NAA value (%04x).\n", - source_naa)); - - write_lock_irq(&bad_naa_lock); - for (prevnh = nh = mpt_bad_naa; nh != NULL; - prevnh=nh, nh=nh->next) { - if ((nh->ieee[0] == fch->saddr[0]) && - (nh->ieee[1] == fch->saddr[1]) && - (nh->ieee[2] == fch->saddr[2]) && - (nh->ieee[3] == fch->saddr[3]) && - (nh->ieee[4] == fch->saddr[4]) && - (nh->ieee[5] == fch->saddr[5])) { - found = 1; - dlprintk ((KERN_INFO "mptlan/type_trans: ARP Re" - "q/Rep w/ bad NAA from system already" - " in DB.\n")); - break; - } - } - - if ((!found) && (nh == NULL)) { - - nh = kmalloc(sizeof(struct NAA_Hosed), GFP_KERNEL); - dlprintk ((KERN_INFO "mptlan/type_trans: ARP Req/Rep w/" - " bad NAA from system not yet in DB.\n")); - - if (nh != NULL) { - nh->next = NULL; - if (!mpt_bad_naa) - mpt_bad_naa = nh; - if (prevnh) - prevnh->next = nh; - - nh->NAA = source_naa; /* Set the S_NAA value. */ - for (i = 0; i < FC_ALEN; i++) - nh->ieee[i] = fch->saddr[i]; - dlprintk ((KERN_INFO "Got ARP from %02x:%02x:%02x:%02x:" - "%02x:%02x with non-compliant S_NAA value.\n", - fch->saddr[0], fch->saddr[1], fch->saddr[2], - fch->saddr[3], fch->saddr[4],fch->saddr[5])); - } else { - printk (KERN_ERR "mptlan/type_trans: Unable to" - " kmalloc a NAA_Hosed struct.\n"); - } - } else if (!found) { - printk (KERN_ERR "mptlan/type_trans: found not" - " set, but nh isn't null. Evil " - "funkiness abounds.\n"); - } - write_unlock_irq(&bad_naa_lock); - } -} -#endif /* Strip the SNAP header from ARP packets since we don't * pass them through to the 802.2/SNAP layers. -- cgit v1.2.3-18-g5258 From 7c64ade53a6f977d73f16243865c42ceae999aea Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 7 Nov 2008 14:02:49 +0100 Subject: oprofile: Fix p6 counter overflow check Fix the counter overflow check for CPUs with counter width > 32 I had a similar change in a different patch that I didn't submit and I didn't notice the problem earlier because it was always tested together. Signed-off-by: Andi Kleen Signed-off-by: Robert Richter --- arch/x86/oprofile/op_model_ppro.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index 0620d6d45f7..3f1b81a83e2 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -27,8 +27,7 @@ static int num_counters = 2; static int counter_width = 32; #define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0) -#define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0) -#define CTR_OVERFLOWED(n) (!((n) & (1U<<(counter_width-1)))) +#define CTR_OVERFLOWED(n) (!((n) & (1ULL<<(counter_width-1)))) #define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0) #define CTRL_READ(l, h, msrs, c) do {rdmsr((msrs->controls[(c)].addr), (l), (h)); } while (0) @@ -124,14 +123,14 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs) static int ppro_check_ctrs(struct pt_regs * const regs, struct op_msrs const * const msrs) { - unsigned int low, high; + u64 val; int i; for (i = 0 ; i < num_counters; ++i) { if (!reset_value[i]) continue; - CTR_READ(low, high, msrs, i); - if (CTR_OVERFLOWED(low)) { + rdmsrl(msrs->counters[i].addr, val); + if (CTR_OVERFLOWED(val)) { oprofile_add_sample(regs, i); wrmsrl(msrs->counters[i].addr, -reset_value[i]); } -- cgit v1.2.3-18-g5258 From 17c1f07ed70afa4f3941745fc3cc173e2a0365fe Mon Sep 17 00:00:00 2001 From: Jay Lan Date: Fri, 7 Nov 2008 09:51:55 -0800 Subject: [IA64] Reserve elfcorehdr memory in CONFIG_CRASH_DUMP IA64 kdump kernel failed to initialize /proc/vmcore in 2.6.28-rc2. A bug was introduced in this patch commit: d9a9855d0b06ca6d6cc92596fedcc03f8512e062 always reserve elfcore header memory in crash kernel The problem was that the call to reserve_elfcorehdr() should be placed in CONFIG_CRASH_DUMP rather than in CONFIG_CRASH_KERNEL, which does not exist. Signed-off-by: Jay Lan Acked-by: Simon Hormon Signed-off-by: Tony Luck --- arch/ia64/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index bf441f49682..865af27c773 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -359,7 +359,7 @@ reserve_memory (void) } #endif -#ifdef CONFIG_CRASH_KERNEL +#ifdef CONFIG_CRASH_DUMP if (reserve_elfcorehdr(&rsvd_region[n].start, &rsvd_region[n].end) == 0) n++; -- cgit v1.2.3-18-g5258 From 0c4b95455f250c7006af00208aefdf0f93f63144 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 7 Nov 2008 21:12:17 -0800 Subject: Staging: only build the tree if we really want to This Kconfig change allows the common 'make allmodconfig' and 'make allyesconfig' build options to skip the staging tree, which is probably what you want to have happen anyway. This makes the linux-next developer's life a lot easier so he doesn't have to worry about changes that break the staging tree, that's for me to worry about... Signed-off-by: Greg Kroah-Hartman --- drivers/staging/Kconfig | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig index e1654f59eb7..0a49cd788a7 100644 --- a/drivers/staging/Kconfig +++ b/drivers/staging/Kconfig @@ -21,7 +21,23 @@ menuconfig STAGING If in doubt, say N here. -if STAGING + +config STAGING_EXCLUDE_BUILD + bool "Exclude Staging drivers from being built" + default y + ---help--- + Are you sure you really want to build the staging drivers? + They taint your kernel, don't live up to the normal Linux + kernel quality standards, are a bit crufty around the edges, + and might go off and kick your dog when you aren't paying + attention. + + Say N here to be able to select and build the Staging drivers. + This option is primarily here to prevent them from being built + when selecting 'make allyesconfg' and 'make allmodconfig' so + don't be all that put off, your dog will be just fine. + +if !STAGING_EXCLUDE_BUILD source "drivers/staging/et131x/Kconfig" @@ -45,4 +61,4 @@ source "drivers/staging/at76_usb/Kconfig" source "drivers/staging/poch/Kconfig" -endif # STAGING +endif # !STAGING_EXCLUDE_BUILD -- cgit v1.2.3-18-g5258 From b8f6ec2e61f650fd1a316a207a00965bcb8805d4 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 29 Oct 2008 10:44:55 -0700 Subject: Staging: make usbip depend on CONFIG_NET Thanks to Randy Dunlap for finding this problem. Reported-by: Randy Dunlap Signed-off-by: Greg Kroah-Hartman --- drivers/staging/usbip/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/usbip/Kconfig b/drivers/staging/usbip/Kconfig index 7426235ccc4..217fb7e62c2 100644 --- a/drivers/staging/usbip/Kconfig +++ b/drivers/staging/usbip/Kconfig @@ -1,6 +1,6 @@ config USB_IP_COMMON tristate "USB IP support (EXPERIMENTAL)" - depends on USB && EXPERIMENTAL + depends on USB && NET && EXPERIMENTAL default N ---help--- This enables pushing USB packets over IP to allow remote -- cgit v1.2.3-18-g5258 From 0d12cdd5f883f508d33b85c1bae98fa28987c8c7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 8 Nov 2008 16:19:55 +0100 Subject: sched: improve sched_clock() performance in scheduler-intense workloads native_read_tsc() overhead accounts for 20% of the system overhead: 659567 system_call 41222.9375 686796 schedule 435.7843 718382 __switch_to 665.1685 823875 switch_mm 4526.7857 1883122 native_read_tsc 55385.9412 9761990 total 2.8468 this is large part due to the rdtsc_barrier() that is done before and after reading the TSC. But sched_clock() is not a precise clock in the GTOD sense, using such barriers is completely pointless. So remove the barriers and only use them in vget_cycles(). This improves lat_ctx performance by about 5%. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/msr.h | 2 -- arch/x86/include/asm/tsc.h | 8 +++++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 46be2fa7ac2..c2a812ebde8 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -108,9 +108,7 @@ static __always_inline unsigned long long __native_read_tsc(void) { DECLARE_ARGS(val, low, high); - rdtsc_barrier(); asm volatile("rdtsc" : EAX_EDX_RET(val, low, high)); - rdtsc_barrier(); return EAX_EDX_VAL(val, low, high); } diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index 38ae163cc91..9cd83a8e40d 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h @@ -34,6 +34,8 @@ static inline cycles_t get_cycles(void) static __always_inline cycles_t vget_cycles(void) { + cycles_t cycles; + /* * We only do VDSOs on TSC capable CPUs, so this shouldnt * access boot_cpu_data (which is not VDSO-safe): @@ -42,7 +44,11 @@ static __always_inline cycles_t vget_cycles(void) if (!cpu_has_tsc) return 0; #endif - return (cycles_t)__native_read_tsc(); + rdtsc_barrier(); + cycles = (cycles_t)__native_read_tsc(); + rdtsc_barrier(); + + return cycles; } extern void tsc_init(void); -- cgit v1.2.3-18-g5258 From 7cbaef9c83e58bbd4bdd534b09052b6c5ec457d5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 8 Nov 2008 17:05:38 +0100 Subject: sched: optimize sched_clock() a bit sched_clock() uses cycles_2_ns() needlessly - which is an irq-disabling variant of __cycles_2_ns(). Most of the time sched_clock() is called with irqs disabled already. The few places that call it with irqs enabled need to be updated. Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 2ef80e30192..424093b157d 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -55,7 +55,7 @@ u64 native_sched_clock(void) rdtscll(this_offset); /* return the value in ns */ - return cycles_2_ns(this_offset); + return __cycles_2_ns(this_offset); } /* We need to define a real function for sched_clock, to override the -- cgit v1.2.3-18-g5258 From c5d712433ff57a66d8fb79a57a4fc7a7c3467b97 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 8 Nov 2008 13:53:33 +0100 Subject: Fix __pfn_to_page(pfn) for CONFIG_DISCONTIGMEM=y Fix the __pfn_to_page(pfn) macro so that it doesn't evaluate its argument twice in the CONFIG_DISCONTIGMEM=y case, because 'pfn' may be a result of a funtion call having side effects. For example, the hibernation code applies pfn_to_page(pfn) to the result of a function returning the pfn corresponding to the next set bit in a bitmap and the current bit position is modified on each call. This leads to "interesting" failures for CONFIG_DISCONTIGMEM=y due to the current behavior of __pfn_to_page(pfn). Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Linus Torvalds --- include/asm-generic/memory_model.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/asm-generic/memory_model.h b/include/asm-generic/memory_model.h index ae060c62aff..18546d8eb78 100644 --- a/include/asm-generic/memory_model.h +++ b/include/asm-generic/memory_model.h @@ -34,7 +34,7 @@ #define __pfn_to_page(pfn) \ ({ unsigned long __pfn = (pfn); \ - unsigned long __nid = arch_pfn_to_nid(pfn); \ + unsigned long __nid = arch_pfn_to_nid(__pfn); \ NODE_DATA(__nid)->node_mem_map + arch_local_page_offset(__pfn, __nid);\ }) -- cgit v1.2.3-18-g5258 From 493890e75d98810a3470b4aae23be628ee5e9667 Mon Sep 17 00:00:00 2001 From: Pierre Ossman Date: Sun, 26 Oct 2008 12:37:25 +0100 Subject: mmc: increase SD write timeout for crappy cards It seems that some cards are slightly out of spec and occasionally will not be able to complete a write in the alloted 250 ms [1]. Incease the timeout slightly to allow even these cards to function properly. [1] http://lkml.org/lkml/2008/9/23/390 Signed-off-by: Pierre Ossman --- drivers/mmc/core/core.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 044d84eeed7..f7284b905eb 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -280,7 +280,11 @@ void mmc_set_data_timeout(struct mmc_data *data, const struct mmc_card *card) (card->host->ios.clock / 1000); if (data->flags & MMC_DATA_WRITE) - limit_us = 250000; + /* + * The limit is really 250 ms, but that is + * insufficient for some crappy cards. + */ + limit_us = 300000; else limit_us = 100000; -- cgit v1.2.3-18-g5258 From d1b268630875a7713b5d468a0c03403c5b721c8e Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Sat, 8 Nov 2008 21:37:46 +0100 Subject: mmc: struct device - replace bus_id with dev_name(), dev_set_name() Acked-by: Greg Kroah-Hartman Signed-Off-By: Kay Sievers Signed-off-by: Pierre Ossman --- drivers/mmc/core/bus.c | 3 +-- drivers/mmc/core/host.c | 5 ++--- drivers/mmc/core/sdio_bus.c | 3 +-- drivers/mmc/host/mmc_spi.c | 2 +- drivers/mmc/host/sdhci.c | 2 +- drivers/mmc/host/tifm_sd.c | 16 ++++++++-------- include/linux/mmc/card.h | 2 +- include/linux/mmc/host.h | 2 +- include/linux/mmc/sdio_func.h | 2 +- 9 files changed, 17 insertions(+), 20 deletions(-) diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c index 0d9b2d6f9eb..f210a8ee686 100644 --- a/drivers/mmc/core/bus.c +++ b/drivers/mmc/core/bus.c @@ -216,8 +216,7 @@ int mmc_add_card(struct mmc_card *card) int ret; const char *type; - snprintf(card->dev.bus_id, sizeof(card->dev.bus_id), - "%s:%04x", mmc_hostname(card->host), card->rca); + dev_set_name(&card->dev, "%s:%04x", mmc_hostname(card->host), card->rca); switch (card->type) { case MMC_TYPE_MMC: diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c index 6da80fd4d97..5e945e64ead 100644 --- a/drivers/mmc/core/host.c +++ b/drivers/mmc/core/host.c @@ -73,8 +73,7 @@ struct mmc_host *mmc_alloc_host(int extra, struct device *dev) if (err) goto free; - snprintf(host->class_dev.bus_id, BUS_ID_SIZE, - "mmc%d", host->index); + dev_set_name(&host->class_dev, "mmc%d", host->index); host->parent = dev; host->class_dev.parent = dev; @@ -121,7 +120,7 @@ int mmc_add_host(struct mmc_host *host) WARN_ON((host->caps & MMC_CAP_SDIO_IRQ) && !host->ops->enable_sdio_irq); - led_trigger_register_simple(host->class_dev.bus_id, &host->led); + led_trigger_register_simple(dev_name(&host->class_dev), &host->led); err = device_add(&host->class_dev); if (err) diff --git a/drivers/mmc/core/sdio_bus.c b/drivers/mmc/core/sdio_bus.c index 233d0f9b3c4..46284b52739 100644 --- a/drivers/mmc/core/sdio_bus.c +++ b/drivers/mmc/core/sdio_bus.c @@ -239,8 +239,7 @@ int sdio_add_func(struct sdio_func *func) { int ret; - snprintf(func->dev.bus_id, sizeof(func->dev.bus_id), - "%s:%d", mmc_card_id(func->card), func->num); + dev_set_name(&func->dev, "%s:%d", mmc_card_id(func->card), func->num); ret = device_add(&func->dev); if (ret == 0) diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c index 07faf5412a1..ad00e163231 100644 --- a/drivers/mmc/host/mmc_spi.c +++ b/drivers/mmc/host/mmc_spi.c @@ -1348,7 +1348,7 @@ static int mmc_spi_probe(struct spi_device *spi) goto fail_add_host; dev_info(&spi->dev, "SD/MMC host %s%s%s%s%s\n", - mmc->class_dev.bus_id, + dev_name(&mmc->class_dev), host->dma_dev ? "" : ", no DMA", (host->pdata && host->pdata->get_ro) ? "" : ", no WP", diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 30f64b1f235..4d010a984be 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -1733,7 +1733,7 @@ int sdhci_add_host(struct sdhci_host *host) mmc_add_host(mmc); printk(KERN_INFO "%s: SDHCI controller on %s [%s] using %s%s\n", - mmc_hostname(mmc), host->hw_name, mmc_dev(mmc)->bus_id, + mmc_hostname(mmc), host->hw_name, dev_name(mmc_dev(mmc)), (host->flags & SDHCI_USE_ADMA)?"A":"", (host->flags & SDHCI_USE_DMA)?"DMA":"PIO"); diff --git a/drivers/mmc/host/tifm_sd.c b/drivers/mmc/host/tifm_sd.c index 13844843e8d..82554ddec6b 100644 --- a/drivers/mmc/host/tifm_sd.c +++ b/drivers/mmc/host/tifm_sd.c @@ -632,7 +632,7 @@ static void tifm_sd_request(struct mmc_host *mmc, struct mmc_request *mrq) if (host->req) { printk(KERN_ERR "%s : unfinished request detected\n", - sock->dev.bus_id); + dev_name(&sock->dev)); mrq->cmd->error = -ETIMEDOUT; goto err_out; } @@ -672,7 +672,7 @@ static void tifm_sd_request(struct mmc_host *mmc, struct mmc_request *mrq) ? PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE)) { printk(KERN_ERR "%s : scatterlist map failed\n", - sock->dev.bus_id); + dev_name(&sock->dev)); mrq->cmd->error = -ENOMEM; goto err_out; } @@ -684,7 +684,7 @@ static void tifm_sd_request(struct mmc_host *mmc, struct mmc_request *mrq) : PCI_DMA_FROMDEVICE); if (host->sg_len < 1) { printk(KERN_ERR "%s : scatterlist map failed\n", - sock->dev.bus_id); + dev_name(&sock->dev)); tifm_unmap_sg(sock, &host->bounce_buf, 1, r_data->flags & MMC_DATA_WRITE ? PCI_DMA_TODEVICE @@ -748,7 +748,7 @@ static void tifm_sd_end_cmd(unsigned long data) if (!mrq) { printk(KERN_ERR " %s : no request to complete?\n", - sock->dev.bus_id); + dev_name(&sock->dev)); spin_unlock_irqrestore(&sock->lock, flags); return; } @@ -789,7 +789,7 @@ static void tifm_sd_abort(unsigned long data) printk(KERN_ERR "%s : card failed to respond for a long period of time " "(%x, %x)\n", - host->dev->dev.bus_id, host->req->cmd->opcode, host->cmd_flags); + dev_name(&host->dev->dev), host->req->cmd->opcode, host->cmd_flags); tifm_eject(host->dev); } @@ -906,7 +906,7 @@ static int tifm_sd_initialize_host(struct tifm_sd *host) if (rc) { printk(KERN_ERR "%s : controller failed to reset\n", - sock->dev.bus_id); + dev_name(&sock->dev)); return -ENODEV; } @@ -933,7 +933,7 @@ static int tifm_sd_initialize_host(struct tifm_sd *host) if (rc) { printk(KERN_ERR "%s : card not ready - probe failed on initialization\n", - sock->dev.bus_id); + dev_name(&sock->dev)); return -ENODEV; } @@ -954,7 +954,7 @@ static int tifm_sd_probe(struct tifm_dev *sock) if (!(TIFM_SOCK_STATE_OCCUPIED & readl(sock->addr + SOCK_PRESENT_STATE))) { printk(KERN_WARNING "%s : card gone, unexpectedly\n", - sock->dev.bus_id); + dev_name(&sock->dev)); return rc; } diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index ee6e822d599..403aa505f27 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -130,7 +130,7 @@ struct mmc_card { #define mmc_card_set_blockaddr(c) ((c)->state |= MMC_STATE_BLOCKADDR) #define mmc_card_name(c) ((c)->cid.prod_name) -#define mmc_card_id(c) ((c)->dev.bus_id) +#define mmc_card_id(c) (dev_name(&(c)->dev)) #define mmc_list_to_card(l) container_of(l, struct mmc_card, node) #define mmc_get_drvdata(c) dev_get_drvdata(&(c)->dev) diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index bde891f6459..f842f234e44 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -176,7 +176,7 @@ static inline void *mmc_priv(struct mmc_host *host) #define mmc_dev(x) ((x)->parent) #define mmc_classdev(x) (&(x)->class_dev) -#define mmc_hostname(x) ((x)->class_dev.bus_id) +#define mmc_hostname(x) (dev_name(&(x)->class_dev)) extern int mmc_suspend_host(struct mmc_host *, pm_message_t); extern int mmc_resume_host(struct mmc_host *); diff --git a/include/linux/mmc/sdio_func.h b/include/linux/mmc/sdio_func.h index 07bee4a0d45..451bdfc8583 100644 --- a/include/linux/mmc/sdio_func.h +++ b/include/linux/mmc/sdio_func.h @@ -63,7 +63,7 @@ struct sdio_func { #define sdio_func_set_present(f) ((f)->state |= SDIO_STATE_PRESENT) -#define sdio_func_id(f) ((f)->dev.bus_id) +#define sdio_func_id(f) (dev_name(&(f)->dev)) #define sdio_get_drvdata(f) dev_get_drvdata(&(f)->dev) #define sdio_set_drvdata(f,d) dev_set_drvdata(&(f)->dev, d) -- cgit v1.2.3-18-g5258 From 65b92e5cbc8acd14ea83190b4d016f765dce6075 Mon Sep 17 00:00:00 2001 From: Michel Marti Date: Sat, 8 Nov 2008 11:33:32 +0100 Subject: ALSA: hda - Add another HP model (6730s) for AD1884A Added model=laptop for another HP machine (103c:3614) with AD1884A codec. Signed-off-by: Michel Marti Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_analog.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c index d3fd432cb3e..400df85a445 100644 --- a/sound/pci/hda/patch_analog.c +++ b/sound/pci/hda/patch_analog.c @@ -3861,6 +3861,7 @@ static const char *ad1884a_models[AD1884A_MODELS] = { static struct snd_pci_quirk ad1884a_cfg_tbl[] = { SND_PCI_QUIRK(0x103c, 0x3030, "HP", AD1884A_MOBILE), SND_PCI_QUIRK(0x103c, 0x3056, "HP", AD1884A_MOBILE), + SND_PCI_QUIRK(0x103c, 0x3614, "HP 6730s", AD1884A_LAPTOP), SND_PCI_QUIRK(0x17aa, 0x20ac, "Thinkpad X300", AD1884A_THINKPAD), {} }; -- cgit v1.2.3-18-g5258 From 00c9ddd1d4cc73aa0077f379279d716cb0ab0ba5 Mon Sep 17 00:00:00 2001 From: Tim Blechmann Date: Sun, 9 Nov 2008 12:50:52 +0100 Subject: ALSA: HDSP: check for io box before uploading firmware currently the hdsp driver tries to upload the firmware, even if the io box is not connected. this patch adds a check for the io box before trying to upload the firmware. thus instead of messages complaining about the fifo status and firmware loading failure, the driver gives a message that no multiface or digiface is connected. [A minor coding-style fix by tiwai] Signed-off-by: Tim Blechmann Signed-off-by: Takashi Iwai --- sound/pci/rme9652/hdsp.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sound/pci/rme9652/hdsp.c b/sound/pci/rme9652/hdsp.c index d723543bead..d680114b3e4 100644 --- a/sound/pci/rme9652/hdsp.c +++ b/sound/pci/rme9652/hdsp.c @@ -5045,6 +5045,10 @@ static int __devinit snd_hdsp_create(struct snd_card *card, /* we wait 2 seconds to let freshly inserted cardbus cards do their hardware init */ ssleep(2); + err = hdsp_check_for_iobox(hdsp); + if (err < 0) + return err; + if ((hdsp_read (hdsp, HDSP_statusRegister) & HDSP_DllError) != 0) { #ifdef HDSP_FW_LOADER if ((err = hdsp_request_fw_loader(hdsp)) < 0) @@ -5057,7 +5061,7 @@ static int __devinit snd_hdsp_create(struct snd_card *card, /* init is complete, we return */ return 0; #endif - /* no iobox connected, we defer initialization */ + /* we defer initialization */ snd_printk(KERN_INFO "Hammerfall-DSP: card initialization pending : waiting for firmware\n"); if ((err = snd_hdsp_create_hwdep(card, hdsp)) < 0) return err; -- cgit v1.2.3-18-g5258 From 3ae7e2e22900b90bbe1d19454405950893c110be Mon Sep 17 00:00:00 2001 From: Tim Blechmann Date: Sat, 8 Nov 2008 14:42:18 +0100 Subject: ALSA: hdsp: check for iobox and upload firmware during ioctl currently, the error message when trying to run hdspmixer or hdspconf if the breakout box is not connected is somehow misleading, since it asks the user to upload the firmware. this patch adds a test, whether the breakout box is connected and tries to upload the firmware in the case, that it is not present, e.g. because of power failures of the breakout box. [Minor coding-style fixes by tiwai] Signed-off-by: Tim Blechmann Signed-off-by: Takashi Iwai --- sound/pci/rme9652/hdsp.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/sound/pci/rme9652/hdsp.c b/sound/pci/rme9652/hdsp.c index d680114b3e4..736246f98ac 100644 --- a/sound/pci/rme9652/hdsp.c +++ b/sound/pci/rme9652/hdsp.c @@ -4548,11 +4548,20 @@ static int snd_hdsp_hwdep_ioctl(struct snd_hwdep *hw, struct file *file, unsigne { struct hdsp *hdsp = (struct hdsp *)hw->private_data; void __user *argp = (void __user *)arg; + int err; switch (cmd) { case SNDRV_HDSP_IOCTL_GET_PEAK_RMS: { struct hdsp_peak_rms __user *peak_rms = (struct hdsp_peak_rms __user *)arg; + err = hdsp_check_for_iobox(hdsp); + if (err < 0) + return err; + + err = hdsp_check_for_firmware(hdsp, 1); + if (err < 0) + return err; + if (!(hdsp->state & HDSP_FirmwareLoaded)) { snd_printk(KERN_ERR "Hammerfall-DSP: firmware needs to be uploaded to the card.\n"); return -EINVAL; @@ -4572,10 +4581,14 @@ static int snd_hdsp_hwdep_ioctl(struct snd_hwdep *hw, struct file *file, unsigne unsigned long flags; int i; - if (!(hdsp->state & HDSP_FirmwareLoaded)) { - snd_printk(KERN_ERR "Hammerfall-DSP: Firmware needs to be uploaded to the card.\n"); - return -EINVAL; - } + err = hdsp_check_for_iobox(hdsp); + if (err < 0) + return err; + + err = hdsp_check_for_firmware(hdsp, 1); + if (err < 0) + return err; + spin_lock_irqsave(&hdsp->lock, flags); info.pref_sync_ref = (unsigned char)hdsp_pref_sync_ref(hdsp); info.wordclock_sync_check = (unsigned char)hdsp_wc_sync_check(hdsp); -- cgit v1.2.3-18-g5258 From bbda14dfba26bd4ca5dc74f672518bc42120d765 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 30 Oct 2008 15:57:05 +0100 Subject: regulator: Use menuconfig in Kconfig Use menuconfig instead of flat configs so that you can disable/enable regulator items with one selection. Also, use depends instead of reverse selections to make life easier, too. Signed-off-by: Takashi Iwai Signed-off-by: Liam Girdwood --- drivers/regulator/Kconfig | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig index 4dada6ee111..39360e2a454 100644 --- a/drivers/regulator/Kconfig +++ b/drivers/regulator/Kconfig @@ -1,6 +1,4 @@ -menu "Voltage and Current regulators" - -config REGULATOR +menuconfig REGULATOR bool "Voltage and Current Regulator Support" default n help @@ -23,21 +21,20 @@ config REGULATOR If unsure, say no. +if REGULATOR + config REGULATOR_DEBUG bool "Regulator debug support" - depends on REGULATOR help Say yes here to enable debugging support. config REGULATOR_FIXED_VOLTAGE tristate default n - select REGULATOR config REGULATOR_VIRTUAL_CONSUMER tristate "Virtual regulator consumer support" default n - select REGULATOR help This driver provides a virtual consumer for the voltage and current regulator API which provides sysfs controls for @@ -49,7 +46,6 @@ config REGULATOR_VIRTUAL_CONSUMER config REGULATOR_BQ24022 tristate "TI bq24022 Dual Input 1-Cell Li-Ion Charger IC" default n - select REGULATOR help This driver controls a TI bq24022 Charger attached via GPIOs. The provided current regulator can enable/disable @@ -59,7 +55,6 @@ config REGULATOR_BQ24022 config REGULATOR_WM8350 tristate "Wolfson Microelectroncis WM8350 AudioPlus PMIC" depends on MFD_WM8350 - select REGULATOR help This driver provides support for the voltage and current regulators of the WM8350 AudioPlus PMIC. @@ -67,7 +62,6 @@ config REGULATOR_WM8350 config REGULATOR_WM8400 tristate "Wolfson Microelectroncis WM8400 AudioPlus PMIC" depends on MFD_WM8400 - select REGULATOR help This driver provides support for the voltage regulators of the WM8400 AudioPlus PMIC. @@ -75,9 +69,8 @@ config REGULATOR_WM8400 config REGULATOR_DA903X tristate "Support regulators on Dialog Semiconductor DA9030/DA9034 PMIC" depends on PMIC_DA903X - select REGULATOR help Say y here to support the BUCKs and LDOs regulators found on Dialog Semiconductor DA9030/DA9034 PMIC. -endmenu +endif -- cgit v1.2.3-18-g5258 From 058e3739f6b0753696db1952378de9e8d2a11735 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Sun, 9 Nov 2008 00:27:53 -0500 Subject: clarify usage expectations for cnt32_to_63() Currently, all existing users of cnt32_to_63() are fine since the CPU architectures where it is used don't do read access reordering, and user mode preemption is disabled already. It is nevertheless a good idea to better elaborate usage requirements wrt preemption, and use an explicit memory barrier on SMP to avoid different CPUs accessing the counter value in the wrong order. On UP a simple compiler barrier is sufficient. Signed-off-by: Nicolas Pitre Acked-by: Mathieu Desnoyers Signed-off-by: Linus Torvalds --- include/linux/cnt32_to_63.h | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/include/linux/cnt32_to_63.h b/include/linux/cnt32_to_63.h index 8c0f9505b48..7605fdd1eb6 100644 --- a/include/linux/cnt32_to_63.h +++ b/include/linux/cnt32_to_63.h @@ -16,6 +16,7 @@ #include #include #include +#include /* this is used only to give gcc a clue about good code generation */ union cnt32_to_63 { @@ -53,11 +54,19 @@ union cnt32_to_63 { * needed increment. And any race in updating the value in memory is harmless * as the same value would simply be stored more than once. * - * The only restriction for the algorithm to work properly is that this - * code must be executed at least once per each half period of the 32-bit - * counter to properly update the state bit in memory. This is usually not a - * problem in practice, but if it is then a kernel timer could be scheduled - * to manage for this code to be executed often enough. + * The restrictions for the algorithm to work properly are: + * + * 1) this code must be called at least once per each half period of the + * 32-bit counter; + * + * 2) this code must not be preempted for a duration longer than the + * 32-bit counter half period minus the longest period between two + * calls to this code. + * + * Those requirements ensure proper update to the state bit in memory. + * This is usually not a problem in practice, but if it is then a kernel + * timer should be scheduled to manage for this code to be executed often + * enough. * * Note that the top bit (bit 63) in the returned value should be considered * as garbage. It is not cleared here because callers are likely to use a @@ -68,9 +77,10 @@ union cnt32_to_63 { */ #define cnt32_to_63(cnt_lo) \ ({ \ - static volatile u32 __m_cnt_hi; \ + static u32 __m_cnt_hi; \ union cnt32_to_63 __x; \ __x.hi = __m_cnt_hi; \ + smp_rmb(); \ __x.lo = (cnt_lo); \ if (unlikely((s32)(__x.hi ^ __x.lo) < 0)) \ __m_cnt_hi = __x.hi = (__x.hi ^ 0x80000000) + (__x.hi >> 31); \ -- cgit v1.2.3-18-g5258 From 6209344f5a3795d34b7f2c0061f49802283b6bdd Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Sun, 9 Nov 2008 15:23:57 +0100 Subject: net: unix: fix inflight counting bug in garbage collector Previously I assumed that the receive queues of candidates don't change during the GC. This is only half true, nothing can be received from the queues (see comment in unix_gc()), but buffers could be added through the other half of the socket pair, which may still have file descriptors referring to it. This can result in inc_inflight_move_tail() erronously increasing the "inflight" counter for a unix socket for which dec_inflight() wasn't previously called. This in turn can trigger the "BUG_ON(total_refs < inflight_refs)" in a later garbage collection run. Fix this by only manipulating the "inflight" counter for sockets which are candidates themselves. Duplicating the file references in unix_attach_fds() is also needed to prevent a socket becoming a candidate for GC while the skb that contains it is not yet queued. Reported-by: Andrea Bittau Signed-off-by: Miklos Szeredi CC: stable@kernel.org Signed-off-by: Linus Torvalds --- include/net/af_unix.h | 1 + net/unix/af_unix.c | 31 ++++++++++++++++++++++++------- net/unix/garbage.c | 49 +++++++++++++++++++++++++++++++++++++------------ 3 files changed, 62 insertions(+), 19 deletions(-) diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 7dd29b7e461..c29ff1da8a1 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -54,6 +54,7 @@ struct unix_sock { atomic_long_t inflight; spinlock_t lock; unsigned int gc_candidate : 1; + unsigned int gc_maybe_cycle : 1; wait_queue_head_t peer_wait; }; #define unix_sk(__sk) ((struct unix_sock *)__sk) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 4d3c6071b9a..eb90f77bb0e 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1302,14 +1302,23 @@ static void unix_destruct_fds(struct sk_buff *skb) sock_wfree(skb); } -static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) +static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) { int i; + + /* + * Need to duplicate file references for the sake of garbage + * collection. Otherwise a socket in the fps might become a + * candidate for GC while the skb is not yet queued. + */ + UNIXCB(skb).fp = scm_fp_dup(scm->fp); + if (!UNIXCB(skb).fp) + return -ENOMEM; + for (i=scm->fp->count-1; i>=0; i--) unix_inflight(scm->fp->fp[i]); - UNIXCB(skb).fp = scm->fp; skb->destructor = unix_destruct_fds; - scm->fp = NULL; + return 0; } /* @@ -1368,8 +1377,11 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, goto out; memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); - if (siocb->scm->fp) - unix_attach_fds(siocb->scm, skb); + if (siocb->scm->fp) { + err = unix_attach_fds(siocb->scm, skb); + if (err) + goto out_free; + } unix_get_secdata(siocb->scm, skb); skb_reset_transport_header(skb); @@ -1538,8 +1550,13 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, size = min_t(int, size, skb_tailroom(skb)); memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); - if (siocb->scm->fp) - unix_attach_fds(siocb->scm, skb); + if (siocb->scm->fp) { + err = unix_attach_fds(siocb->scm, skb); + if (err) { + kfree_skb(skb); + goto out_err; + } + } if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) { kfree_skb(skb); diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 2a27b84f740..6d4a9a8de5e 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -186,8 +186,17 @@ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *), */ struct sock *sk = unix_get_socket(*fp++); if (sk) { - hit = true; - func(unix_sk(sk)); + struct unix_sock *u = unix_sk(sk); + + /* + * Ignore non-candidates, they could + * have been added to the queues after + * starting the garbage collection + */ + if (u->gc_candidate) { + hit = true; + func(u); + } } } if (hit && hitlist != NULL) { @@ -249,11 +258,11 @@ static void inc_inflight_move_tail(struct unix_sock *u) { atomic_long_inc(&u->inflight); /* - * If this is still a candidate, move it to the end of the - * list, so that it's checked even if it was already passed - * over + * If this still might be part of a cycle, move it to the end + * of the list, so that it's checked even if it was already + * passed over */ - if (u->gc_candidate) + if (u->gc_maybe_cycle) list_move_tail(&u->link, &gc_candidates); } @@ -267,6 +276,7 @@ void unix_gc(void) struct unix_sock *next; struct sk_buff_head hitlist; struct list_head cursor; + LIST_HEAD(not_cycle_list); spin_lock(&unix_gc_lock); @@ -282,10 +292,14 @@ void unix_gc(void) * * Holding unix_gc_lock will protect these candidates from * being detached, and hence from gaining an external - * reference. This also means, that since there are no - * possible receivers, the receive queues of these sockets are - * static during the GC, even though the dequeue is done - * before the detach without atomicity guarantees. + * reference. Since there are no possible receivers, all + * buffers currently on the candidates' queues stay there + * during the garbage collection. + * + * We also know that no new candidate can be added onto the + * receive queues. Other, non candidate sockets _can_ be + * added to queue, so we must make sure only to touch + * candidates. */ list_for_each_entry_safe(u, next, &gc_inflight_list, link) { long total_refs; @@ -299,6 +313,7 @@ void unix_gc(void) if (total_refs == inflight_refs) { list_move_tail(&u->link, &gc_candidates); u->gc_candidate = 1; + u->gc_maybe_cycle = 1; } } @@ -325,13 +340,23 @@ void unix_gc(void) list_move(&cursor, &u->link); if (atomic_long_read(&u->inflight) > 0) { - list_move_tail(&u->link, &gc_inflight_list); - u->gc_candidate = 0; + list_move_tail(&u->link, ¬_cycle_list); + u->gc_maybe_cycle = 0; scan_children(&u->sk, inc_inflight_move_tail, NULL); } } list_del(&cursor); + /* + * not_cycle_list contains those sockets which do not make up a + * cycle. Restore these to the inflight list. + */ + while (!list_empty(¬_cycle_list)) { + u = list_entry(not_cycle_list.next, struct unix_sock, link); + u->gc_candidate = 0; + list_move_tail(&u->link, &gc_inflight_list); + } + /* * Now gc_candidates contains only garbage. Restore original * inflight counters for these as well, and remove the skbuffs -- cgit v1.2.3-18-g5258 From 984f2f377fdfd098f5ae58d09ee04d5e29e6112b Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 8 Nov 2008 20:24:19 +1100 Subject: cpumask: introduce new API, without changing anything, v3 Impact: cleanup Clean up based on feedback from Andrew Morton and others: - change to inline functions instead of macros - add __init to bootmem method - add a missing debug check Signed-off-by: Rusty Russell Signed-off-by: Ingo Molnar --- include/linux/cpumask.h | 58 ++++++++++++++++++++++++++++++++++++++++++++----- lib/cpumask.c | 3 ++- 2 files changed, 54 insertions(+), 7 deletions(-) diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 31caa1bc620..21e1dd43e52 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -564,12 +564,36 @@ static inline unsigned int cpumask_check(unsigned int cpu) } #if NR_CPUS == 1 -/* Uniprocesor. */ -#define cpumask_first(src) ({ (void)(src); 0; }) -#define cpumask_next(n, src) ({ (void)(src); 1; }) -#define cpumask_next_zero(n, src) ({ (void)(src); 1; }) -#define cpumask_next_and(n, srcp, andp) ({ (void)(srcp), (void)(andp); 1; }) -#define cpumask_any_but(mask, cpu) ({ (void)(mask); (void)(cpu); 0; }) +/* Uniprocessor. Assume all masks are "1". */ +static inline unsigned int cpumask_first(const struct cpumask *srcp) +{ + return 0; +} + +/* Valid inputs for n are -1 and 0. */ +static inline unsigned int cpumask_next(int n, const struct cpumask *srcp) +{ + return n+1; +} + +static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp) +{ + return n+1; +} + +static inline unsigned int cpumask_next_and(int n, + const struct cpumask *srcp, + const struct cpumask *andp) +{ + return n+1; +} + +/* cpu must be a valid cpu, ie 0, so there's no other choice. */ +static inline unsigned int cpumask_any_but(const struct cpumask *mask, + unsigned int cpu) +{ + return 1; +} #define for_each_cpu(cpu, mask) \ for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) @@ -620,10 +644,32 @@ static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp) int cpumask_next_and(int n, const struct cpumask *, const struct cpumask *); int cpumask_any_but(const struct cpumask *mask, unsigned int cpu); +/** + * for_each_cpu - iterate over every cpu in a mask + * @cpu: the (optionally unsigned) integer iterator + * @mask: the cpumask pointer + * + * After the loop, cpu is >= nr_cpu_ids. + */ #define for_each_cpu(cpu, mask) \ for ((cpu) = -1; \ (cpu) = cpumask_next((cpu), (mask)), \ (cpu) < nr_cpu_ids;) + +/** + * for_each_cpu_and - iterate over every cpu in both masks + * @cpu: the (optionally unsigned) integer iterator + * @mask: the first cpumask pointer + * @and: the second cpumask pointer + * + * This saves a temporary CPU mask in many places. It is equivalent to: + * struct cpumask tmp; + * cpumask_and(&tmp, &mask, &and); + * for_each_cpu(cpu, &tmp) + * ... + * + * After the loop, cpu is >= nr_cpu_ids. + */ #define for_each_cpu_and(cpu, mask, and) \ for ((cpu) = -1; \ (cpu) = cpumask_next_and((cpu), (mask), (and)), \ diff --git a/lib/cpumask.c b/lib/cpumask.c index 2ebc3a9a746..8d03f22c6ce 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -67,6 +67,7 @@ int cpumask_any_but(const struct cpumask *mask, unsigned int cpu) { unsigned int i; + cpumask_check(cpu); for_each_cpu(i, mask) if (i != cpu) break; @@ -108,7 +109,7 @@ void free_cpumask_var(cpumask_var_t mask) } EXPORT_SYMBOL(free_cpumask_var); -void free_bootmem_cpumask_var(cpumask_var_t mask) +void __init free_bootmem_cpumask_var(cpumask_var_t mask) { free_bootmem((unsigned long)mask, cpumask_size()); } -- cgit v1.2.3-18-g5258 From b726e923ea4d216027e466aa602d914e4b4a63af Mon Sep 17 00:00:00 2001 From: Doug Nazar Date: Wed, 5 Nov 2008 06:16:28 -0500 Subject: Fix nfsd truncation of readdir results Commit 8d7c4203 "nfsd: fix failure to set eof in readdir in some situations" introduced a bug: on a directory in an exported ext3 filesystem with dir_index unset, a READDIR will only return about 250 entries, even if the directory was larger. Bisected it back to this commit; reverting it fixes the problem. It turns out that in this case ext3 reads a block at a time, then returns from readdir, which means we can end up with buf.full==0 but with more entries in the directory still to be read. Before 8d7c4203 (but after c002a6c797 "Optimise NFS readdir hack slightly"), this would cause us to return the READDIR result immediately, but with the eof bit unset. That could cause a performance regression (because the client would need more roundtrips to the server to read the whole directory), but no loss in correctness, since the cleared eof bit caused the client to send another readdir. After 8d7c4203, the setting of the eof bit made this a correctness problem. So, move nfserr_eof into the loop and remove the buf.full check so that we loop until buf.used==0. The following seems to do the right thing and reduces the network traffic since we don't return a READDIR result until the buffer is full. Tested on an empty directory & large directory; eof is properly sent and there are no more short buffers. Signed-off-by: Doug Nazar Cc: David Woodhouse Cc: Al Viro Signed-off-by: J. Bruce Fields --- fs/nfsd/vfs.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 848a03e83a4..4433c8f0016 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1875,11 +1875,11 @@ static int nfsd_buffered_readdir(struct file *file, filldir_t func, return -ENOMEM; offset = *offsetp; - cdp->err = nfserr_eof; /* will be cleared on successful read */ while (1) { unsigned int reclen; + cdp->err = nfserr_eof; /* will be cleared on successful read */ buf.used = 0; buf.full = 0; @@ -1912,9 +1912,6 @@ static int nfsd_buffered_readdir(struct file *file, filldir_t func, de = (struct buffered_dirent *)((char *)de + reclen); } offset = vfs_llseek(file, 0, SEEK_CUR); - cdp->err = nfserr_eof; - if (!buf.full) - break; } done: -- cgit v1.2.3-18-g5258 From 43e61711d4e948d3e9c1c13832038659b2cd9287 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 9 Nov 2008 12:47:04 -0800 Subject: Don't ask twice about not including staging drivers The "Exclude staging drivers" question is there so that we don't build staging drivers for allyesconfig or allnoconfig settings, but it's very irritating when you've already said "no" to staging drivers earlier. There is absolutely no point in declining twice - once you've declined the staging drivers, you're done. So make the second question depend on the first question having been answered in the affirmative. Signed-off-by: Linus Torvalds --- drivers/staging/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig index 0a49cd788a7..c95b286a123 100644 --- a/drivers/staging/Kconfig +++ b/drivers/staging/Kconfig @@ -23,7 +23,7 @@ menuconfig STAGING config STAGING_EXCLUDE_BUILD - bool "Exclude Staging drivers from being built" + bool "Exclude Staging drivers from being built" if STAGING default y ---help--- Are you sure you really want to build the staging drivers? -- cgit v1.2.3-18-g5258 From bf1b36445dc868cbbde194aa1dd87e38fe24cf16 Mon Sep 17 00:00:00 2001 From: Jonathan McDowell Date: Sat, 13 Sep 2008 17:08:31 +0100 Subject: kbuild: Fixup deb-pkg target to generate separate firmware deb The below is a simplistic fix for "make deb-pkg"; it splits the firmware out to a linux-firmware-image package and adds an (unversioned) Suggests to the linux package for this firmware. Signed-Off-By: Jonathan McDowell Acked-by: Frans Pop Signed-off-by: Sam Ravnborg --- scripts/package/builddeb | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/scripts/package/builddeb b/scripts/package/builddeb index ba6bf5d5abf..1264b8e2829 100644 --- a/scripts/package/builddeb +++ b/scripts/package/builddeb @@ -15,15 +15,18 @@ set -e version=$KERNELRELEASE revision=`cat .version` tmpdir="$objtree/debian/tmp" +fwdir="$objtree/debian/fwtmp" packagename=linux-$version +fwpackagename=linux-firmware-image if [ "$ARCH" == "um" ] ; then packagename=user-mode-linux-$version fi # Setup the directory structure -rm -rf "$tmpdir" +rm -rf "$tmpdir" "$fwdir" mkdir -p "$tmpdir/DEBIAN" "$tmpdir/lib" "$tmpdir/boot" +mkdir -p "$fwdir/DEBIAN" "$fwdir/lib" if [ "$ARCH" == "um" ] ; then mkdir -p "$tmpdir/usr/lib/uml/modules/$version" "$tmpdir/usr/share/doc/$packagename" "$tmpdir/usr/bin" fi @@ -107,6 +110,7 @@ Standards-Version: 3.6.1 Package: $packagename Provides: kernel-image-$version, linux-image-$version +Suggests: $fwpackagename Architecture: any Description: Linux kernel, version $version This package contains the Linux kernel, modules and corresponding other @@ -118,8 +122,24 @@ fi chown -R root:root "$tmpdir" chmod -R go-w "$tmpdir" +# Do we have firmware? Move it out of the way and build it into a package. +if [ -e "$tmpdir/lib/firmware" ]; then + mv "$tmpdir/lib/firmware" "$fwdir/lib/" + + cat <> debian/control + +Package: $fwpackagename +Architecture: all +Description: Linux kernel firmware, version $version + This package contains firmware from the Linux kernel, version $version +EOF + + dpkg-gencontrol -isp -p$fwpackagename -P"$fwdir" + dpkg --build "$fwdir" .. +fi + # Perform the final magic -dpkg-gencontrol -isp +dpkg-gencontrol -isp -p$packagename dpkg --build "$tmpdir" .. exit 0 -- cgit v1.2.3-18-g5258 From 9a6558371bcd01c2973b7638181db4ccc34eab4f Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sun, 9 Nov 2008 12:45:10 -0800 Subject: regression: disable timer peek-ahead for 2.6.28 It's showing up as regressions; disabling it very likely just papers over an underlying issue, but time is running out for 2.6.28, lets get back to this for 2.6.29 Fixes: #11826 and #11893 Signed-off-by: Arjan van de Ven Signed-off-by: Linus Torvalds --- drivers/cpuidle/cpuidle.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 5bed73329ef..8504a210855 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -65,12 +65,14 @@ static void cpuidle_idle_call(void) return; } +#if 0 + /* shows regressions, re-enable for 2.6.29 */ /* * run any timers that can be run now, at this point * before calculating the idle duration etc. */ hrtimer_peek_ahead_timers(); - +#endif /* ask the governor for the next state */ next_state = cpuidle_curr_governor->select(dev); if (need_resched()) -- cgit v1.2.3-18-g5258 From f7160c7573615ec82c691e294cf80d920b5d588d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 9 Nov 2008 16:36:15 -0800 Subject: Linux 2.6.28-rc4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 29abe62ccba..7f9ff9bf154 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 28 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Killer Bat of Doom # *DOCUMENTATION* -- cgit v1.2.3-18-g5258 From 0f101fa6bccdc797ddcee40fd4c55e879155a40f Mon Sep 17 00:00:00 2001 From: Ville Syrjala Date: Sun, 9 Nov 2008 20:32:40 +0200 Subject: ALSA: gusextreme: Fix build errors gusextreme depends on opl3 support. Add the approriate select to Kconfig. Also remove the unnecessary hwdep select. Relevant build errors: ERROR: "snd_opl3_hwdep_new" [sound/isa/gus/snd-gusextreme.ko] undefined! ERROR: "snd_opl3_create" [sound/isa/gus/snd-gusextreme.ko] undefined! Signed-off-by: Ville Syrjala Signed-off-by: Takashi Iwai --- sound/isa/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/isa/Kconfig b/sound/isa/Kconfig index 660beb41f76..ce0aa044e27 100644 --- a/sound/isa/Kconfig +++ b/sound/isa/Kconfig @@ -211,7 +211,7 @@ config SND_GUSCLASSIC config SND_GUSEXTREME tristate "Gravis UltraSound Extreme" - select SND_HWDEP + select SND_OPL3_LIB select SND_MPU401_UART select SND_PCM help -- cgit v1.2.3-18-g5258 From 8a8bc22332ee6ea49137508467a76aa7f4367719 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 10 Nov 2008 14:48:21 +0900 Subject: libata: revert convert-to-block-tagging patches This patch reverts the following three commits which convert libata to use block layer tagging. 43a49cbdf31e812c0d8f553d433b09b421f5d52c e013e13bf605b9e6b702adffbe2853cfc60e7806 2fca5ccf97d2c28bcfce44f5b07d85e74e3cd18e Although using block layer tagging is the right direction, due to the tight coupling among tag number, data structure allocation and hardware command slot allocation, libata doesn't work correctly with the current conversion. The biggest problem is guaranteeing that tag 0 is always used for non-NCQ commands. Due to the way blk-tag is implemented and how SCSI starts and finishes requests, such guarantee can't be made. I'm not sure whether this would actually break any low level driver but it doesn't look like a good idea to break such assumption given the frailty of ATA controllers. So, for the time being, keep using the old dumb in-libata qc allocation. Signed-off-by: Tejun Heo Cc: Jens Axobe Cc: Jeff Garzik Signed-off-by: Linus Torvalds --- drivers/ata/libata-core.c | 66 ++++++++++++++++++++++++++++++++++++++++++----- drivers/ata/libata-scsi.c | 23 ++--------------- drivers/ata/libata.h | 19 ++------------ include/linux/libata.h | 1 + 4 files changed, 65 insertions(+), 44 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 622350d9b2e..0cd3ad49713 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -1712,6 +1712,8 @@ unsigned ata_exec_internal_sg(struct ata_device *dev, else tag = 0; + if (test_and_set_bit(tag, &ap->qc_allocated)) + BUG(); qc = __ata_qc_from_tag(ap, tag); qc->tag = tag; @@ -4562,6 +4564,37 @@ void swap_buf_le16(u16 *buf, unsigned int buf_words) #endif /* __BIG_ENDIAN */ } +/** + * ata_qc_new - Request an available ATA command, for queueing + * @ap: Port associated with device @dev + * @dev: Device from whom we request an available command structure + * + * LOCKING: + * None. + */ + +static struct ata_queued_cmd *ata_qc_new(struct ata_port *ap) +{ + struct ata_queued_cmd *qc = NULL; + unsigned int i; + + /* no command while frozen */ + if (unlikely(ap->pflags & ATA_PFLAG_FROZEN)) + return NULL; + + /* the last tag is reserved for internal command. */ + for (i = 0; i < ATA_MAX_QUEUE - 1; i++) + if (!test_and_set_bit(i, &ap->qc_allocated)) { + qc = __ata_qc_from_tag(ap, i); + break; + } + + if (qc) + qc->tag = i; + + return qc; +} + /** * ata_qc_new_init - Request an available ATA command, and initialize it * @dev: Device from whom we request an available command structure @@ -4571,20 +4604,16 @@ void swap_buf_le16(u16 *buf, unsigned int buf_words) * None. */ -struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag) +struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev) { struct ata_port *ap = dev->link->ap; struct ata_queued_cmd *qc; - if (unlikely(ap->pflags & ATA_PFLAG_FROZEN)) - return NULL; - - qc = __ata_qc_from_tag(ap, tag); + qc = ata_qc_new(ap); if (qc) { qc->scsicmd = NULL; qc->ap = ap; qc->dev = dev; - qc->tag = tag; ata_qc_reinit(qc); } @@ -4592,6 +4621,31 @@ struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag) return qc; } +/** + * ata_qc_free - free unused ata_queued_cmd + * @qc: Command to complete + * + * Designed to free unused ata_queued_cmd object + * in case something prevents using it. + * + * LOCKING: + * spin_lock_irqsave(host lock) + */ +void ata_qc_free(struct ata_queued_cmd *qc) +{ + struct ata_port *ap = qc->ap; + unsigned int tag; + + WARN_ON(qc == NULL); /* ata_qc_from_tag _might_ return NULL */ + + qc->flags = 0; + tag = qc->tag; + if (likely(ata_tag_valid(tag))) { + qc->tag = ATA_TAG_POISON; + clear_bit(tag, &ap->qc_allocated); + } +} + void __ata_qc_complete(struct ata_queued_cmd *qc) { struct ata_port *ap = qc->ap; diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 3fa75eac135..47c7afcb36f 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -709,11 +709,7 @@ static struct ata_queued_cmd *ata_scsi_qc_new(struct ata_device *dev, { struct ata_queued_cmd *qc; - if (cmd->request->tag != -1) - qc = ata_qc_new_init(dev, cmd->request->tag); - else - qc = ata_qc_new_init(dev, 0); - + qc = ata_qc_new_init(dev); if (qc) { qc->scsicmd = cmd; qc->scsidone = done; @@ -1108,17 +1104,7 @@ static int ata_scsi_dev_config(struct scsi_device *sdev, depth = min(sdev->host->can_queue, ata_id_queue_depth(dev->id)); depth = min(ATA_MAX_QUEUE - 1, depth); - - /* - * If this device is behind a port multiplier, we have - * to share the tag map between all devices on that PMP. - * Set up the shared tag map here and we get automatic. - */ - if (dev->link->ap->pmp_link) - scsi_init_shared_tag_map(sdev->host, ATA_MAX_QUEUE - 1); - - scsi_set_tag_type(sdev, MSG_SIMPLE_TAG); - scsi_activate_tcq(sdev, depth); + scsi_adjust_queue_depth(sdev, MSG_SIMPLE_TAG, depth); } return 0; @@ -1958,11 +1944,6 @@ static unsigned int ata_scsiop_inq_std(struct ata_scsi_args *args, u8 *rbuf) hdr[1] |= (1 << 7); memcpy(rbuf, hdr, sizeof(hdr)); - - /* if ncq, set tags supported */ - if (ata_id_has_ncq(args->id)) - rbuf[7] |= (1 << 1); - memcpy(&rbuf[8], "ATA ", 8); ata_id_string(args->id, &rbuf[16], ATA_ID_PROD, 16); ata_id_string(args->id, &rbuf[32], ATA_ID_FW_REV, 4); diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h index d3831d39bda..fe2839e5877 100644 --- a/drivers/ata/libata.h +++ b/drivers/ata/libata.h @@ -74,7 +74,7 @@ extern struct ata_link *ata_dev_phys_link(struct ata_device *dev); extern void ata_force_cbl(struct ata_port *ap); extern u64 ata_tf_to_lba(const struct ata_taskfile *tf); extern u64 ata_tf_to_lba48(const struct ata_taskfile *tf); -extern struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag); +extern struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev); extern int ata_build_rw_tf(struct ata_taskfile *tf, struct ata_device *dev, u64 block, u32 n_block, unsigned int tf_flags, unsigned int tag); @@ -103,6 +103,7 @@ extern int ata_dev_configure(struct ata_device *dev); extern int sata_down_spd_limit(struct ata_link *link); extern int ata_down_xfermask_limit(struct ata_device *dev, unsigned int sel); extern void ata_sg_clean(struct ata_queued_cmd *qc); +extern void ata_qc_free(struct ata_queued_cmd *qc); extern void ata_qc_issue(struct ata_queued_cmd *qc); extern void __ata_qc_complete(struct ata_queued_cmd *qc); extern int atapi_check_dma(struct ata_queued_cmd *qc); @@ -118,22 +119,6 @@ extern struct ata_port *ata_port_alloc(struct ata_host *host); extern void ata_dev_enable_pm(struct ata_device *dev, enum link_pm policy); extern void ata_lpm_schedule(struct ata_port *ap, enum link_pm); -/** - * ata_qc_free - free unused ata_queued_cmd - * @qc: Command to complete - * - * Designed to free unused ata_queued_cmd object - * in case something prevents using it. - * - * LOCKING: - * spin_lock_irqsave(host lock) - */ -static inline void ata_qc_free(struct ata_queued_cmd *qc) -{ - qc->flags = 0; - qc->tag = ATA_TAG_POISON; -} - /* libata-acpi.c */ #ifdef CONFIG_ATA_ACPI extern void ata_acpi_associate_sata_port(struct ata_port *ap); diff --git a/include/linux/libata.h b/include/linux/libata.h index c7665a4134c..59b0f1c807b 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -698,6 +698,7 @@ struct ata_port { unsigned int cbl; /* cable type; ATA_CBL_xxx */ struct ata_queued_cmd qcmd[ATA_MAX_QUEUE]; + unsigned long qc_allocated; unsigned int qc_active; int nr_active_links; /* #links with active qcs */ -- cgit v1.2.3-18-g5258 From 254248313aed7e6ff295ca21a82ca989b1f69c16 Mon Sep 17 00:00:00 2001 From: Travis Place Date: Mon, 10 Nov 2008 17:56:23 +0100 Subject: ALSA: hda - Make the HP EliteBook 8530p use AD1884A model laptop Added a QUIRK to patch_analog.c for the HP Elitebook 8530p (IDs 0x103c:0x30e7) to use AD1884A model 'laptop' by default. Playback and Capture confirmed working. Signed-off-by: Travis Place Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_analog.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c index 400df85a445..686c77491de 100644 --- a/sound/pci/hda/patch_analog.c +++ b/sound/pci/hda/patch_analog.c @@ -3861,6 +3861,7 @@ static const char *ad1884a_models[AD1884A_MODELS] = { static struct snd_pci_quirk ad1884a_cfg_tbl[] = { SND_PCI_QUIRK(0x103c, 0x3030, "HP", AD1884A_MOBILE), SND_PCI_QUIRK(0x103c, 0x3056, "HP", AD1884A_MOBILE), + SND_PCI_QUIRK(0x103c, 0x30e7, "HP EliteBook 8530p", AD1884A_LAPTOP), SND_PCI_QUIRK(0x103c, 0x3614, "HP 6730s", AD1884A_LAPTOP), SND_PCI_QUIRK(0x17aa, 0x20ac, "Thinkpad X300", AD1884A_THINKPAD), {} -- cgit v1.2.3-18-g5258 From bf5e6519b85b3853f2d0bb4f17a4e2eaeffeb574 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 10 Nov 2008 21:46:00 -0500 Subject: ftrace: disable tracing on resize Impact: fix for bug on resize This patch addresses the bug found here: http://bugzilla.kernel.org/show_bug.cgi?id=11996 When ftrace converted to the new unified trace buffer, the resizing of the buffer was not protected as much as it was originally. If tracing is performed while the resize occurs, then the buffer can be corrupted. This patch disables all ftrace buffer modifications before a resize takes place. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 9f3b478f917..abfa8103d04 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2676,7 +2676,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, { unsigned long val; char buf[64]; - int ret; + int ret, cpu; struct trace_array *tr = filp->private_data; if (cnt >= sizeof(buf)) @@ -2704,6 +2704,14 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, goto out; } + /* disable all cpu buffers */ + for_each_tracing_cpu(cpu) { + if (global_trace.data[cpu]) + atomic_inc(&global_trace.data[cpu]->disabled); + if (max_tr.data[cpu]) + atomic_inc(&max_tr.data[cpu]->disabled); + } + if (val != global_trace.entries) { ret = ring_buffer_resize(global_trace.buffer, val); if (ret < 0) { @@ -2735,6 +2743,13 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, if (tracing_disabled) cnt = -ENOMEM; out: + for_each_tracing_cpu(cpu) { + if (global_trace.data[cpu]) + atomic_dec(&global_trace.data[cpu]->disabled); + if (max_tr.data[cpu]) + atomic_dec(&max_tr.data[cpu]->disabled); + } + max_tr.entries = global_trace.entries; mutex_unlock(&trace_types_lock); -- cgit v1.2.3-18-g5258 From 4143c5cb36331155a1823af8b3a8c761a59fed71 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 10 Nov 2008 21:46:01 -0500 Subject: ring-buffer: prevent infinite looping on time stamping Impact: removal of unnecessary looping The lockless part of the ring buffer allows for reentry into the code from interrupts. A timestamp is taken, a test is preformed and if it detects that an interrupt occurred that did tracing, it tries again. The problem arises if the timestamp code itself causes a trace. The detection will detect this and loop again. The difference between this and an interrupt doing tracing, is that this will fail every time, and cause an infinite loop. Currently, we test if the loop happens 1000 times, and if so, it will produce a warning and disable the ring buffer. The problem with this approach is that it makes it difficult to perform some types of tracing (tracing the timestamp code itself). Each trace entry has a delta timestamp from the previous entry. If a trace entry is reserved but and interrupt occurs and traces before the previous entry is commited, the delta timestamp for that entry will be zero. This actually makes sense in terms of tracing, because the interrupt entry happened before the preempted entry was commited, so one may consider the two happening at the same time. The order is still preserved in the buffer. With this idea, instead of trying to get a new timestamp if an interrupt made it in between the timestamp and the test, the entry could simply make the delta zero and continue. This will prevent interrupts or tracers in the timer code from causing the above loop. Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 3f338063864..2f76193c348 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1060,7 +1060,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, /* Did the write stamp get updated already? */ if (unlikely(ts < cpu_buffer->write_stamp)) - goto again; + delta = 0; if (test_time_stamp(delta)) { -- cgit v1.2.3-18-g5258