diff options
author | Paul Mundt <lethal@linux-sh.org> | 2009-09-25 12:15:15 +0900 |
---|---|---|
committer | Paul Mundt <lethal@linux-sh.org> | 2009-09-25 12:15:15 +0900 |
commit | c373ba999103fa794f041eab5bd490714d2dee88 (patch) | |
tree | 8f2b445b1e0af2491c83527967dbcda76054a486 | |
parent | 6f3529f00a0a9ac06413d18d3926adf099cb59af (diff) | |
parent | 851b147e4411df6a1e7e90e2a609773c277eefd2 (diff) |
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
623 files changed, 29885 insertions, 12842 deletions
diff --git a/Documentation/arm/tcm.txt b/Documentation/arm/tcm.txt new file mode 100644 index 00000000000..074f4be6667 --- /dev/null +++ b/Documentation/arm/tcm.txt @@ -0,0 +1,145 @@ +ARM TCM (Tightly-Coupled Memory) handling in Linux +---- +Written by Linus Walleij <linus.walleij@stericsson.com> + +Some ARM SoC:s have a so-called TCM (Tightly-Coupled Memory). +This is usually just a few (4-64) KiB of RAM inside the ARM +processor. + +Due to being embedded inside the CPU The TCM has a +Harvard-architecture, so there is an ITCM (instruction TCM) +and a DTCM (data TCM). The DTCM can not contain any +instructions, but the ITCM can actually contain data. +The size of DTCM or ITCM is minimum 4KiB so the typical +minimum configuration is 4KiB ITCM and 4KiB DTCM. + +ARM CPU:s have special registers to read out status, physical +location and size of TCM memories. arch/arm/include/asm/cputype.h +defines a CPUID_TCM register that you can read out from the +system control coprocessor. Documentation from ARM can be found +at http://infocenter.arm.com, search for "TCM Status Register" +to see documents for all CPUs. Reading this register you can +determine if ITCM (bit 0) and/or DTCM (bit 16) is present in the +machine. + +There is further a TCM region register (search for "TCM Region +Registers" at the ARM site) that can report and modify the location +size of TCM memories at runtime. This is used to read out and modify +TCM location and size. Notice that this is not a MMU table: you +actually move the physical location of the TCM around. At the +place you put it, it will mask any underlying RAM from the +CPU so it is usually wise not to overlap any physical RAM with +the TCM. The TCM memory exists totally outside the MMU and will +override any MMU mappings. + +Code executing inside the ITCM does not "see" any MMU mappings +and e.g. register accesses must be made to physical addresses. + +TCM is used for a few things: + +- FIQ and other interrupt handlers that need deterministic + timing and cannot wait for cache misses. + +- Idle loops where all external RAM is set to self-refresh + retention mode, so only on-chip RAM is accessible by + the CPU and then we hang inside ITCM waiting for an + interrupt. + +- Other operations which implies shutting off or reconfiguring + the external RAM controller. + +There is an interface for using TCM on the ARM architecture +in <asm/tcm.h>. Using this interface it is possible to: + +- Define the physical address and size of ITCM and DTCM. + +- Tag functions to be compiled into ITCM. + +- Tag data and constants to be allocated to DTCM and ITCM. + +- Have the remaining TCM RAM added to a special + allocation pool with gen_pool_create() and gen_pool_add() + and provice tcm_alloc() and tcm_free() for this + memory. Such a heap is great for things like saving + device state when shutting off device power domains. + +A machine that has TCM memory shall select HAVE_TCM in +arch/arm/Kconfig for itself, and then the +rest of the functionality will depend on the physical +location and size of ITCM and DTCM to be defined in +mach/memory.h for the machine. Code that needs to use +TCM shall #include <asm/tcm.h> If the TCM is not located +at the place given in memory.h it will be moved using +the TCM Region registers. + +Functions to go into itcm can be tagged like this: +int __tcmfunc foo(int bar); + +Variables to go into dtcm can be tagged like this: +int __tcmdata foo; + +Constants can be tagged like this: +int __tcmconst foo; + +To put assembler into TCM just use +.section ".tcm.text" or .section ".tcm.data" +respectively. + +Example code: + +#include <asm/tcm.h> + +/* Uninitialized data */ +static u32 __tcmdata tcmvar; +/* Initialized data */ +static u32 __tcmdata tcmassigned = 0x2BADBABEU; +/* Constant */ +static const u32 __tcmconst tcmconst = 0xCAFEBABEU; + +static void __tcmlocalfunc tcm_to_tcm(void) +{ + int i; + for (i = 0; i < 100; i++) + tcmvar ++; +} + +static void __tcmfunc hello_tcm(void) +{ + /* Some abstract code that runs in ITCM */ + int i; + for (i = 0; i < 100; i++) { + tcmvar ++; + } + tcm_to_tcm(); +} + +static void __init test_tcm(void) +{ + u32 *tcmem; + int i; + + hello_tcm(); + printk("Hello TCM executed from ITCM RAM\n"); + + printk("TCM variable from testrun: %u @ %p\n", tcmvar, &tcmvar); + tcmvar = 0xDEADBEEFU; + printk("TCM variable: 0x%x @ %p\n", tcmvar, &tcmvar); + + printk("TCM assigned variable: 0x%x @ %p\n", tcmassigned, &tcmassigned); + + printk("TCM constant: 0x%x @ %p\n", tcmconst, &tcmconst); + + /* Allocate some TCM memory from the pool */ + tcmem = tcm_alloc(20); + if (tcmem) { + printk("TCM Allocated 20 bytes of TCM @ %p\n", tcmem); + tcmem[0] = 0xDEADBEEFU; + tcmem[1] = 0x2BADBABEU; + tcmem[2] = 0xCAFEBABEU; + tcmem[3] = 0xDEADBEEFU; + tcmem[4] = 0x2BADBABEU; + for (i = 0; i < 5; i++) + printk("TCM tcmem[%d] = %08x\n", i, tcmem[i]); + tcm_free(tcmem, 20); + } +} diff --git a/Documentation/auxdisplay/cfag12864b-example.c b/Documentation/auxdisplay/cfag12864b-example.c index 1d2c010bae1..e7823ffb1ca 100644 --- a/Documentation/auxdisplay/cfag12864b-example.c +++ b/Documentation/auxdisplay/cfag12864b-example.c @@ -194,7 +194,6 @@ static void cfag12864b_blit(void) */ #include <stdio.h> -#include <string.h> #define EXAMPLES 6 diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt index 6eb1a97e88c..455d4e6d346 100644 --- a/Documentation/cgroups/cgroups.txt +++ b/Documentation/cgroups/cgroups.txt @@ -408,6 +408,26 @@ You can attach the current shell task by echoing 0: # echo 0 > tasks +2.3 Mounting hierarchies by name +-------------------------------- + +Passing the name=<x> option when mounting a cgroups hierarchy +associates the given name with the hierarchy. This can be used when +mounting a pre-existing hierarchy, in order to refer to it by name +rather than by its set of active subsystems. Each hierarchy is either +nameless, or has a unique name. + +The name should match [\w.-]+ + +When passing a name=<x> option for a new hierarchy, you need to +specify subsystems manually; the legacy behaviour of mounting all +subsystems when none are explicitly specified is not supported when +you give a subsystem a name. + +The name of the subsystem appears as part of the hierarchy description +in /proc/mounts and /proc/<pid>/cgroups. + + 3. Kernel API ============= @@ -501,7 +521,7 @@ rmdir() will fail with it. From this behavior, pre_destroy() can be called multiple times against a cgroup. int can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, - struct task_struct *task) + struct task_struct *task, bool threadgroup) (cgroup_mutex held by caller) Called prior to moving a task into a cgroup; if the subsystem @@ -509,14 +529,20 @@ returns an error, this will abort the attach operation. If a NULL task is passed, then a successful result indicates that *any* unspecified task can be moved into the cgroup. Note that this isn't called on a fork. If this method returns 0 (success) then this should -remain valid while the caller holds cgroup_mutex. +remain valid while the caller holds cgroup_mutex. If threadgroup is +true, then a successful result indicates that all threads in the given +thread's threadgroup can be moved together. void attach(struct cgroup_subsys *ss, struct cgroup *cgrp, - struct cgroup *old_cgrp, struct task_struct *task) + struct cgroup *old_cgrp, struct task_struct *task, + bool threadgroup) (cgroup_mutex held by caller) Called after the task has been attached to the cgroup, to allow any post-attachment activity that requires memory allocations or blocking. +If threadgroup is true, the subsystem should take care of all threads +in the specified thread's threadgroup. Currently does not support any +subsystem that might need the old_cgrp for every thread in the group. void fork(struct cgroup_subsy *ss, struct task_struct *task) diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt index 23d1262c077..b871f2552b4 100644 --- a/Documentation/cgroups/memory.txt +++ b/Documentation/cgroups/memory.txt @@ -179,6 +179,9 @@ The reclaim algorithm has not been modified for cgroups, except that pages that are selected for reclaiming come from the per cgroup LRU list. +NOTE: Reclaim does not work for the root cgroup, since we cannot set any +limits on the root cgroup. + 2. Locking The memory controller uses the following hierarchy @@ -210,6 +213,7 @@ We can alter the memory limit: NOTE: We can use a suffix (k, K, m, M, g or G) to indicate values in kilo, mega or gigabytes. NOTE: We can write "-1" to reset the *.limit_in_bytes(unlimited). +NOTE: We cannot set limits on the root cgroup any more. # cat /cgroups/0/memory.limit_in_bytes 4194304 @@ -375,7 +379,42 @@ cgroups created below it. NOTE2: This feature can be enabled/disabled per subtree. -7. TODO +7. Soft limits + +Soft limits allow for greater sharing of memory. The idea behind soft limits +is to allow control groups to use as much of the memory as needed, provided + +a. There is no memory contention +b. They do not exceed their hard limit + +When the system detects memory contention or low memory control groups +are pushed back to their soft limits. If the soft limit of each control +group is very high, they are pushed back as much as possible to make +sure that one control group does not starve the others of memory. + +Please note that soft limits is a best effort feature, it comes with +no guarantees, but it does its best to make sure that when memory is +heavily contended for, memory is allocated based on the soft limit +hints/setup. Currently soft limit based reclaim is setup such that +it gets invoked from balance_pgdat (kswapd). + +7.1 Interface + +Soft limits can be setup by using the following commands (in this example we +assume a soft limit of 256 megabytes) + +# echo 256M > memory.soft_limit_in_bytes + +If we want to change this to 1G, we can at any time use + +# echo 1G > memory.soft_limit_in_bytes + +NOTE1: Soft limits take effect over a long period of time, since they involve + reclaiming memory for balancing between memory cgroups +NOTE2: It is recommended to set the soft limit always below the hard limit, + otherwise the hard limit will take precedence. + +8. TODO 1. Add support for accounting huge pages (as a separate controller) 2. Make per-cgroup scanner reclaim not-shared pages first diff --git a/Documentation/crypto/async-tx-api.txt b/Documentation/crypto/async-tx-api.txt index 9f59fcbf5d8..ba046b8fa92 100644 --- a/Documentation/crypto/async-tx-api.txt +++ b/Documentation/crypto/async-tx-api.txt @@ -54,20 +54,23 @@ features surfaced as a result: 3.1 General format of the API: struct dma_async_tx_descriptor * -async_<operation>(<op specific parameters>, - enum async_tx_flags flags, - struct dma_async_tx_descriptor *dependency, - dma_async_tx_callback callback_routine, - void *callback_parameter); +async_<operation>(<op specific parameters>, struct async_submit ctl *submit) 3.2 Supported operations: -memcpy - memory copy between a source and a destination buffer -memset - fill a destination buffer with a byte value -xor - xor a series of source buffers and write the result to a - destination buffer -xor_zero_sum - xor a series of source buffers and set a flag if the - result is zero. The implementation attempts to prevent - writes to memory +memcpy - memory copy between a source and a destination buffer +memset - fill a destination buffer with a byte value +xor - xor a series of source buffers and write the result to a + destination buffer +xor_val - xor a series of source buffers and set a flag if the + result is zero. The implementation attempts to prevent + writes to memory +pq - generate the p+q (raid6 syndrome) from a series of source buffers +pq_val - validate that a p and or q buffer are in sync with a given series of + sources +datap - (raid6_datap_recov) recover a raid6 data block and the p block + from the given sources +2data - (raid6_2data_recov) recover 2 raid6 data blocks from the given + sources 3.3 Descriptor management: The return value is non-NULL and points to a 'descriptor' when the operation @@ -80,8 +83,8 @@ acknowledged by the application before the offload engine driver is allowed to recycle (or free) the descriptor. A descriptor can be acked by one of the following methods: 1/ setting the ASYNC_TX_ACK flag if no child operations are to be submitted -2/ setting the ASYNC_TX_DEP_ACK flag to acknowledge the parent - descriptor of a new operation. +2/ submitting an unacknowledged descriptor as a dependency to another + async_tx call will implicitly set the acknowledged state. 3/ calling async_tx_ack() on the descriptor. 3.4 When does the operation execute? @@ -119,30 +122,42 @@ of an operation. Perform a xor->copy->xor operation where each operation depends on the result from the previous operation: -void complete_xor_copy_xor(void *param) +void callback(void *param) { - printk("complete\n"); + struct completion *cmp = param; + + complete(cmp); } -int run_xor_copy_xor(struct page **xor_srcs, - int xor_src_cnt, - struct page *xor_dest, - size_t xor_len, - struct page *copy_src, - struct page *copy_dest, - size_t copy_len) +void run_xor_copy_xor(struct page **xor_srcs, + int xor_src_cnt, + struct page *xor_dest, + size_t xor_len, + struct page *copy_src, + struct page *copy_dest, + size_t copy_len) { struct dma_async_tx_descriptor *tx; + addr_conv_t addr_conv[xor_src_cnt]; + struct async_submit_ctl submit; + addr_conv_t addr_conv[NDISKS]; + struct completion cmp; + + init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST, NULL, NULL, NULL, + addr_conv); + tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, &submit) - tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, - ASYNC_TX_XOR_DROP_DST, NULL, NULL, NULL); - tx = async_memcpy(copy_dest, copy_src, 0, 0, copy_len, - ASYNC_TX_DEP_ACK, tx, NULL, NULL); - tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, - ASYNC_TX_XOR_DROP_DST | ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, - tx, complete_xor_copy_xor, NULL); + submit->depend_tx = tx; + tx = async_memcpy(copy_dest, copy_src, 0, 0, copy_len, &submit); + + init_completion(&cmp); + init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST | ASYNC_TX_ACK, tx, + callback, &cmp, addr_conv); + tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, &submit); async_tx_issue_pending_all(); + + wait_for_completion(&cmp); } See include/linux/async_tx.h for more information on the flags. See the diff --git a/Documentation/filesystems/sharedsubtree.txt b/Documentation/filesystems/sharedsubtree.txt index 736540045dc..23a181074f9 100644 --- a/Documentation/filesystems/sharedsubtree.txt +++ b/Documentation/filesystems/sharedsubtree.txt @@ -4,7 +4,7 @@ Shared Subtrees Contents: 1) Overview 2) Features - 3) smount command + 3) Setting mount states 4) Use-case 5) Detailed semantics 6) Quiz @@ -41,14 +41,14 @@ replicas continue to be exactly same. Here is an example: - Lets say /mnt has a mount that is shared. + Let's say /mnt has a mount that is shared. mount --make-shared /mnt - note: mount command does not yet support the --make-shared flag. - I have included a small C program which does the same by executing - 'smount /mnt shared' + Note: mount(8) command now supports the --make-shared flag, + so the sample 'smount' program is no longer needed and has been + removed. - #mount --bind /mnt /tmp + # mount --bind /mnt /tmp The above command replicates the mount at /mnt to the mountpoint /tmp and the contents of both the mounts remain identical. @@ -58,8 +58,8 @@ replicas continue to be exactly same. #ls /tmp a b c - Now lets say we mount a device at /tmp/a - #mount /dev/sd0 /tmp/a + Now let's say we mount a device at /tmp/a + # mount /dev/sd0 /tmp/a #ls /tmp/a t1 t2 t2 @@ -80,21 +80,20 @@ replicas continue to be exactly same. Here is an example: - Lets say /mnt has a mount which is shared. - #mount --make-shared /mnt + Let's say /mnt has a mount which is shared. + # mount --make-shared /mnt - Lets bind mount /mnt to /tmp - #mount --bind /mnt /tmp + Let's bind mount /mnt to /tmp + # mount --bind /mnt /tmp the new mount at /tmp becomes a shared mount and it is a replica of the mount at /mnt. - Now lets make the mount at /tmp; a slave of /mnt - #mount --make-slave /tmp - [or smount /tmp slave] + Now let's make the mount at /tmp; a slave of /mnt + # mount --make-slave /tmp - lets mount /dev/sd0 on /mnt/a - #mount /dev/sd0 /mnt/a + let's mount /dev/sd0 on /mnt/a + # mount /dev/sd0 /mnt/a #ls /mnt/a t1 t2 t3 @@ -104,9 +103,9 @@ replicas continue to be exactly same. Note the mount event has propagated to the mount at /tmp - However lets see what happens if we mount something on the mount at /tmp + However let's see what happens if we mount something on the mount at /tmp - #mount /dev/sd1 /tmp/b + # mount /dev/sd1 /tmp/b #ls /tmp/b s1 s2 s3 @@ -124,12 +123,11 @@ replicas continue to be exactly same. 2d) A unbindable mount is a unbindable private mount - lets say we have a mount at /mnt and we make is unbindable + let's say we have a mount at /mnt and we make is unbindable - #mount --make-unbindable /mnt - [ smount /mnt unbindable ] + # mount --make-unbindable /mnt - Lets try to bind mount this mount somewhere else. + Let's try to bind mount this mount somewhere else. # mount --bind /mnt /tmp mount: wrong fs type, bad option, bad superblock on /mnt, or too many mounted file systems @@ -137,149 +135,15 @@ replicas continue to be exactly same. Binding a unbindable mount is a invalid operation. -3) smount command +3) Setting mount states - Currently the mount command is not aware of shared subtree features. - Work is in progress to add the support in mount ( util-linux package ). - Till then use the following program. + The mount command (util-linux package) can be used to set mount + states: - ------------------------------------------------------------------------ - // - //this code was developed my Miklos Szeredi <miklos@szeredi.hu> - //and modified by Ram Pai <linuxram@us.ibm.com> - // sample usage: - // smount /tmp shared - // - #include <stdio.h> - #include <stdlib.h> - #include <unistd.h> - #include <string.h> - #include <sys/mount.h> - #include <sys/fsuid.h> - - #ifndef MS_REC - #define MS_REC 0x4000 /* 16384: Recursive loopback */ - #endif - - #ifndef MS_SHARED - #define MS_SHARED 1<<20 /* Shared */ - #endif - - #ifndef MS_PRIVATE - #define MS_PRIVATE 1<<18 /* Private */ - #endif - - #ifndef MS_SLAVE - #define MS_SLAVE 1<<19 /* Slave */ - #endif - - #ifndef MS_UNBINDABLE - #define MS_UNBINDABLE 1<<17 /* Unbindable */ - #endif - - int main(int argc, char *argv[]) - { - int type; - if(argc != 3) { - fprintf(stderr, "usage: %s dir " - "<rshared|rslave|rprivate|runbindable|shared|slave" - "|private|unbindable>\n" , argv[0]); - return 1; - } - - fprintf(stdout, "%s %s %s\n", argv[0], argv[1], argv[2]); - - if (strcmp(argv[2],"rshared")==0) - type=(MS_SHARED|MS_REC); - else if (strcmp(argv[2],"rslave")==0) - type=(MS_SLAVE|MS_REC); - else if (strcmp(argv[2],"rprivate")==0) - type=(MS_PRIVATE|MS_REC); - else if (strcmp(argv[2],"runbindable")==0) - type=(MS_UNBINDABLE|MS_REC); - else if (strcmp(argv[2],"shared")==0) - type=MS_SHARED; - else if (strcmp(argv[2],"slave")==0) - type=MS_SLAVE; - else if (strcmp(argv[2],"private")==0) - type=MS_PRIVATE; - else if (strcmp(argv[2],"unbindable")==0) - type=MS_UNBINDABLE; - else { - fprintf(stderr, "invalid operation: %s\n", argv[2]); - return 1; - } - setfsuid(getuid()); - - if(mount("", argv[1], "dontcare", type, "") == -1) { - perror("mount"); - return 1; - } - return 0; - } - ----------------------------------------------------------------------- - - Copy the above code snippet into smount.c - gcc -o smount smount.c - - - (i) To mark all the mounts under /mnt as shared execute the following - command: - - smount /mnt rshared - the corresponding syntax planned for mount command is - mount --make-rshared /mnt - - just to mark a mount /mnt as shared, execute the following - command: - smount /mnt shared - the corresponding syntax planned for mount command is - mount --make-shared /mnt - - (ii) To mark all the shared mounts under /mnt as slave execute the - following - - command: - smount /mnt rslave - the corresponding syntax planned for mount command is - mount --make-rslave /mnt - - just to mark a mount /mnt as slave, execute the following - command: - smount /mnt slave - the corresponding syntax planned for mount command is - mount --make-slave /mnt - - (iii) To mark all the mounts under /mnt as private execute the - following command: - - smount /mnt rprivate - the corresponding syntax planned for mount command is - mount --make-rprivate /mnt - - just to mark a mount /mnt as private, execute the following - command: - smount /mnt private - the corresponding syntax planned for mount command is - mount --make-private /mnt - - NOTE: by default all the mounts are created as private. But if - you want to change some shared/slave/unbindable mount as - private at a later point in time, this command can help. - - (iv) To mark all the mounts under /mnt as unbindable execute the - following - - command: - smount /mnt runbindable - the corresponding syntax planned for mount command is - mount --make-runbindable /mnt - - just to mark a mount /mnt as unbindable, execute the following - command: - smount /mnt unbindable - the corresponding syntax planned for mount command is - mount --make-unbindable /mnt + mount --make-shared mountpoint + mount --make-slave mountpoint + mount --make-private mountpoint + mount --make-unbindable mountpoint 4) Use cases @@ -350,7 +214,7 @@ replicas continue to be exactly same. mount --rbind / /view/v3 mount --rbind / /view/v4 - and if /usr has a versioning filesystem mounted, than that + and if /usr has a versioning filesystem mounted, then that mount appears at /view/v1/usr, /view/v2/usr, /view/v3/usr and /view/v4/usr too @@ -390,7 +254,7 @@ replicas continue to be exactly same. For example: mount --make-shared /mnt - mount --bin /mnt /tmp + mount --bind /mnt /tmp The mount at /mnt and that at /tmp are both shared and belong to the same peer group. Anything mounted or unmounted under @@ -558,7 +422,7 @@ replicas continue to be exactly same. then the subtree under the unbindable mount is pruned in the new location. - eg: lets say we have the following mount tree. + eg: let's say we have the following mount tree. A / \ @@ -566,7 +430,7 @@ replicas continue to be exactly same. / \ / \ D E F G - Lets say all the mount except the mount C in the tree are + Let's say all the mount except the mount C in the tree are of a type other than unbindable. If this tree is rbound to say Z @@ -683,13 +547,13 @@ replicas continue to be exactly same. 'b' on mounts that receive propagation from mount 'B' and does not have sub-mounts within them are unmounted. - Example: Lets say 'B1', 'B2', 'B3' are shared mounts that propagate to + Example: Let's say 'B1', 'B2', 'B3' are shared mounts that propagate to each other. - lets say 'A1', 'A2', 'A3' are first mounted at dentry 'b' on mount + let's say 'A1', 'A2', 'A3' are first mounted at dentry 'b' on mount 'B1', 'B2' and 'B3' respectively. - lets say 'C1', 'C2', 'C3' are next mounted at the same dentry 'b' on + let's say 'C1', 'C2', 'C3' are next mounted at the same dentry 'b' on mount 'B1', 'B2' and 'B3' respectively. if 'C1' is unmounted, all the mounts that are most-recently-mounted on @@ -710,7 +574,7 @@ replicas continue to be exactly same. A cloned namespace contains all the mounts as that of the parent namespace. - Lets say 'A' and 'B' are the corresponding mounts in the parent and the + Let's say 'A' and 'B' are the corresponding mounts in the parent and the child namespace. If 'A' is shared, then 'B' is also shared and 'A' and 'B' propagate to @@ -759,11 +623,11 @@ replicas continue to be exactly same. mount --make-slave /mnt At this point we have the first mount at /tmp and - its root dentry is 1. Lets call this mount 'A' + its root dentry is 1. Let's call this mount 'A' And then we have a second mount at /tmp1 with root - dentry 2. Lets call this mount 'B' + dentry 2. Let's call this mount 'B' Next we have a third mount at /mnt with root dentry - mnt. Lets call this mount 'C' + mnt. Let's call this mount 'C' 'B' is the slave of 'A' and 'C' is a slave of 'B' A -> B -> C @@ -794,7 +658,7 @@ replicas continue to be exactly same. Q3 Why is unbindable mount needed? - Lets say we want to replicate the mount tree at multiple + Let's say we want to replicate the mount tree at multiple locations within the same subtree. if one rbind mounts a tree within the same subtree 'n' times @@ -803,7 +667,7 @@ replicas continue to be exactly same. mounts. Here is a example. step 1: - lets say the root tree has just two directories with + let's say the root tree has just two directories with one vfsmount. root / \ @@ -875,7 +739,7 @@ replicas continue to be exactly same. Unclonable mounts come in handy here. step 1: - lets say the root tree has just two directories with + let's say the root tree has just two directories with one vfsmount. root / \ diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index f49eecf2e57..623f094c9d8 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -536,6 +536,7 @@ struct address_space_operations { /* migrate the contents of a page to the specified target */ int (*migratepage) (struct page *, struct page *); int (*launder_page) (struct page *); + int (*error_remove_page) (struct mapping *mapping, struct page *page); }; writepage: called by the VM to write a dirty page to backing store. @@ -694,6 +695,12 @@ struct address_space_operations { prevent redirtying the page, it is kept locked during the whole operation. + error_remove_page: normally set to generic_error_remove_page if truncation + is ok for this address space. Used for memory failure handling. + Setting this implies you deal with pages going away under you, + unless you have them locked or reference counts increased. + + The File Object =============== diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt index aafca0a8f66..947374977ca 100644 --- a/Documentation/ioctl/ioctl-number.txt +++ b/Documentation/ioctl/ioctl-number.txt @@ -135,6 +135,7 @@ Code Seq# Include File Comments <http://mikonos.dia.unisa.it/tcfs> 'l' 40-7F linux/udf_fs_i.h in development: <http://sourceforge.net/projects/linux-udf/> +'m' 00-09 linux/mmtimer.h 'm' all linux/mtio.h conflict! 'm' all linux/soundcard.h conflict! 'm' all linux/synclink.h conflict! diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt index 1458448436c..62682500878 100644 --- a/Documentation/sysctl/fs.txt +++ b/Documentation/sysctl/fs.txt @@ -96,13 +96,16 @@ handles that the Linux kernel will allocate. When you get lots of error messages about running out of file handles, you might want to increase this limit. -The three values in file-nr denote the number of allocated -file handles, the number of unused file handles and the maximum -number of file handles. When the allocated file handles come -close to the maximum, but the number of unused file handles is -significantly greater than 0, you've encountered a peak in your -usage of file handles and you don't need to increase the maximum. - +Historically, the three values in file-nr denoted the number of +allocated file handles, the number of allocated but unused file +handles, and the maximum number of file handles. Linux 2.6 always +reports 0 as the number of free file handles -- this is not an +error, it just means that the number of allocated file handles +exactly matches the number of used file handles. + +Attempts to allocate more file descriptors than file-max are +reported with printk, look for "VFS: file-max limit <number> +reached". ============================================================== nr_open: diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index b3d8b492274..a028b92001e 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -22,6 +22,7 @@ show up in /proc/sys/kernel: - callhome [ S390 only ] - auto_msgmni - core_pattern +- core_pipe_limit - core_uses_pid - ctrl-alt-del - dentry-state @@ -135,6 +136,27 @@ core_pattern is used to specify a core dumpfile pattern name. ============================================================== +core_pipe_limit: + +This sysctl is only applicable when core_pattern is configured to pipe core +files to user space helper a (when the first character of core_pattern is a '|', +see above). When collecting cores via a pipe to an application, it is +occasionally usefull for the collecting application to gather data about the +crashing process from its /proc/pid directory. In order to do this safely, the +kernel must wait for the collecting process to exit, so as not to remove the +crashing processes proc files prematurely. This in turn creates the possibility +that a misbehaving userspace collecting process can block the reaping of a +crashed process simply by never exiting. This sysctl defends against that. It +defines how many concurrent crashing processes may be piped to user space +applications in parallel. If this value is exceeded, then those crashing +processes above that value are noted via the kernel log and their cores are +skipped. 0 is a special value, indicating that unlimited processes may be +captured in parallel, but that no waiting will take place (i.e. the collecting +process is not guaranteed access to /proc/<crahing pid>/). This value defaults +to 0. + +============================================================== + core_uses_pid: The default coredump filename is "core". By setting diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index e6fb1ec2744..a6e360d2055 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt @@ -32,6 +32,8 @@ Currently, these files are in /proc/sys/vm: - legacy_va_layout - lowmem_reserve_ratio - max_map_count +- memory_failure_early_kill +- memory_failure_recovery - min_free_kbytes - min_slab_ratio - min_unmapped_ratio @@ -53,7 +55,6 @@ Currently, these files are in /proc/sys/vm: - vfs_cache_pressure - zone_reclaim_mode - ============================================================== block_dump @@ -275,6 +276,44 @@ e.g., up to one or two maps per allocation. The default value is 65536. +============================================================= + +memory_failure_early_kill: + +Control how to kill processes when uncorrected memory error (typically +a 2bit error in a memory module) is detected in the background by hardware +that cannot be handled by the kernel. In some cases (like the page +still having a valid copy on disk) the kernel will handle the failure +transparently without affecting any applications. But if there is +no other uptodate copy of the data it will kill to prevent any data +corruptions from propagating. + +1: Kill all processes that have the corrupted and not reloadable page mapped +as soon as the corruption is detected. Note this is not supported +for a few types of pages, like kernel internally allocated data or +the swap cache, but works for the majority of user pages. + +0: Only unmap the corrupted page from all processes and only kill a process +who tries to access it. + +The kill is done using a catchable SIGBUS with BUS_MCEERR_AO, so processes can +handle this if they want to. + +This is only active on architectures/platforms with advanced machine +check handling and depends on the hardware capabilities. + +Applications can override this setting individually with the PR_MCE_KILL prctl + +============================================================== + +memory_failure_recovery + +Enable memory failure recovery (when supported by the platform) + +1: Attempt recovery. + +0: Always panic on a memory failure. + ============================================================== min_free_kbytes: diff --git a/Documentation/vm/.gitignore b/Documentation/vm/.gitignore index 33e8a023df0..09b164a5700 100644 --- a/Documentation/vm/.gitignore +++ b/Documentation/vm/.gitignore @@ -1 +1,2 @@ +page-types slabinfo diff --git a/Documentation/vm/locking b/Documentation/vm/locking index f366fa95617..25fadb44876 100644 --- a/Documentation/vm/locking +++ b/Documentation/vm/locking @@ -80,7 +80,7 @@ Note: PTL can also be used to guarantee that no new clones using the mm start up ... this is a loose form of stability on mm_users. For example, it is used in copy_mm to protect against a racing tlb_gather_mmu single address space optimization, so that the zap_page_range (from -vmtruncate) does not lose sending ipi's to cloned threads that might +truncate) does not lose sending ipi's to cloned threads that might be spawned underneath it and go to user mode to drag in pte's into tlbs. swap_lock diff --git a/Documentation/vm/page-types.c b/Documentation/vm/page-types.c index 3eda8ea0085..fa1a30d9e9d 100644 --- a/Documentation/vm/page-types.c +++ b/Documentation/vm/page-types.c @@ -5,6 +5,7 @@ * Copyright (C) 2009 Wu Fengguang <fengguang.wu@intel.com> */ +#define _LARGEFILE64_SOURCE #include <stdio.h> #include <stdlib.h> #include <unistd.h> @@ -13,12 +14,33 @@ #include <string.h> #include <getopt.h> #include <limits.h> +#include <assert.h> #include <sys/types.h> #include <sys/errno.h> #include <sys/fcntl.h> /* + * pagemap kernel ABI bits + */ + +#define PM_ENTRY_BYTES sizeof(uint64_t) +#define PM_STATUS_BITS 3 +#define PM_STATUS_OFFSET (64 - PM_STATUS_BITS) +#define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET) +#define PM_STATUS(nr) (((nr) << PM_STATUS_OFFSET) & PM_STATUS_MASK) +#define PM_PSHIFT_BITS 6 +#define PM_PSHIFT_OFFSET (PM_STATUS_OFFSET - PM_PSHIFT_BITS) +#define PM_PSHIFT_MASK (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET) +#define PM_PSHIFT(x) (((u64) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK) +#define PM_PFRAME_MASK ((1LL << PM_PSHIFT_OFFSET) - 1) +#define PM_PFRAME(x) ((x) & PM_PFRAME_MASK) + +#define PM_PRESENT PM_STATUS(4LL) +#define PM_SWAP PM_STATUS(2LL) + + +/* * kernel page flags */ @@ -126,6 +148,14 @@ static int nr_addr_ranges; static unsigned long opt_offset[MAX_ADDR_RANGES]; static unsigned long opt_size[MAX_ADDR_RANGES]; +#define MAX_VMAS 10240 +static int nr_vmas; +static unsigned long pg_start[MAX_VMAS]; +static unsigned long pg_end[MAX_VMAS]; +static unsigned long voffset; + +static int pagemap_fd; + #define MAX_BIT_FILTERS 64 static int nr_bit_filters; static uint64_t opt_mask[MAX_BIT_FILTERS]; @@ -135,7 +165,6 @@ static int page_size; #define PAGES_BATCH (64 << 10) /* 64k pages */ static int kpageflags_fd; -static uint64_t kpageflags_buf[KPF_BYTES * PAGES_BATCH]; #define HASH_SHIFT 13 #define HASH_SIZE (1 << HASH_SHIFT) @@ -158,6 +187,11 @@ static uint64_t page_flags[HASH_SIZE]; type __min2 = (y); \ __min1 < __min2 ? __min1 : __min2; }) +#define max_t(type, x, y) ({ \ + type __max1 = (x); \ + type __max2 = (y); \ + __max1 > __max2 ? __max1 : __max2; }) + static unsigned long pages2mb(unsigned long pages) { return (pages * page_size) >> 20; @@ -224,26 +258,34 @@ static char *page_flag_longname(uint64_t flags) static void show_page_range(unsigned long offset, uint64_t flags) { static uint64_t flags0; + static unsigned long voff; static unsigned long index; static unsigned long count; - if (flags == flags0 && offset == index + count) { + if (flags == flags0 && offset == index + count && + (!opt_pid || voffset == voff + count)) { count++; return; } - if (count) - printf("%lu\t%lu\t%s\n", + if (count) { + if (opt_pid) + printf("%lx\t", voff); + printf("%lx\t%lx\t%s\n", index, count, page_flag_name(flags0)); + } flags0 = flags; index = offset; + voff = voffset; count = 1; } static void show_page(unsigned long offset, uint64_t flags) { - printf("%lu\t%s\n", offset, page_flag_name(flags)); + if (opt_pid) + printf("%lx\t", voffset); + printf("%lx\t%s\n", offset, page_flag_name(flags)); } static void show_summary(void) @@ -383,6 +425,8 @@ static void walk_pfn(unsigned long index, unsigned long count) lseek(kpageflags_fd, index * KPF_BYTES, SEEK_SET); while (count) { + uint64_t kpageflags_buf[KPF_BYTES * PAGES_BATCH]; + batch = min_t(unsigned long, count, PAGES_BATCH); n = read(kpageflags_fd, kpageflags_buf, batch * KPF_BYTES); if (n == 0) @@ -404,6 +448,81 @@ static void walk_pfn(unsigned long index, unsigned long count) } } + +#define PAGEMAP_BATCH 4096 +static unsigned long task_pfn(unsigned long pgoff) +{ + static uint64_t buf[PAGEMAP_BATCH]; + static unsigned long start; + static long count; + uint64_t pfn; + + if (pgoff < start || pgoff >= start + count) { + if (lseek64(pagemap_fd, + (uint64_t)pgoff * PM_ENTRY_BYTES, + SEEK_SET) < 0) { + perror("pagemap seek"); + exit(EXIT_FAILURE); + } + count = read(pagemap_fd, buf, sizeof(buf)); + if (count == 0) + return 0; + if (count < 0) { + perror("pagemap read"); + exit(EXIT_FAILURE); + } + if (count % PM_ENTRY_BYTES) { + fatal("pagemap read not aligned.\n"); + exit(EXIT_FAILURE); + } + count /= PM_ENTRY_BYTES; + start = pgoff; + } + + pfn = buf[pgoff - start]; + if (pfn & PM_PRESENT) + pfn = PM_PFRAME(pfn); + else + pfn = 0; + + return pfn; +} + +static void walk_task(unsigned long index, unsigned long count) +{ + int i = 0; + const unsigned long end = index + count; + + while (index < end) { + + while (pg_end[i] <= index) + if (++i >= nr_vmas) + return; + if (pg_start[i] >= end) + return; + + voffset = max_t(unsigned long, pg_start[i], index); + index = min_t(unsigned long, pg_end[i], end); + + assert(voffset < index); + for (; voffset < index; voffset++) { + unsigned long pfn = task_pfn(voffset); + if (pfn) + walk_pfn(pfn, 1); + } + } +} + +static void add_addr_range(unsigned long offset, unsigned long size) +{ + if (nr_addr_ranges >= MAX_ADDR_RANGES) + fatal("too many addr ranges\n"); + + opt_offset[nr_addr_ranges] = offset; + opt_size[nr_addr_ranges] = min_t(unsigned long, size, ULONG_MAX-offset); + nr_addr_ranges++; +} + static void walk_addr_ranges(void) { int i; @@ -415,10 +534,13 @@ static void walk_addr_ranges(void) } if (!nr_addr_ranges) - walk_pfn(0, ULONG_MAX); + add_addr_range(0, ULONG_MAX); for (i = 0; i < nr_addr_ranges; i++) - walk_pfn(opt_offset[i], opt_size[i]); + if (!opt_pid) + walk_pfn(opt_offset[i], opt_size[i]); + else + walk_task(opt_offset[i], opt_size[i]); close(kpageflags_fd); } @@ -446,8 +568,8 @@ static void usage(void) " -r|--raw Raw mode, for kernel developers\n" " -a|--addr addr-spec Walk a range of pages\n" " -b|--bits bits-spec Walk pages with specified bits\n" -#if 0 /* planned features */ " -p|--pid pid Walk process address space\n" +#if 0 /* planned features */ " -f|--file filename Walk file address space\n" #endif " -l|--list Show page details in ranges\n" @@ -459,7 +581,7 @@ static void usage(void) " N+M pages range from N to N+M-1\n" " N,M pages range from N to M-1\n" " N, pages range from N to end\n" -" ,M pages range from 0 to M\n" +" ,M pages range from 0 to M-1\n" "bits-spec:\n" " bit1,bit2 (flags & (bit1|bit2)) != 0\n" " bit1,bit2=bit1 (flags & (bit1|bit2)) == bit1\n" @@ -496,21 +618,57 @@ static unsigned long long parse_number(const char *str) static void parse_pid(const char *str) { + FILE *file; + char buf[5000]; + opt_pid = parse_number(str); -} -static void parse_file(const char *name) -{ + sprintf(buf, "/proc/%d/pagemap", opt_pid); + pagemap_fd = open(buf, O_RDONLY); + if (pagemap_fd < 0) { + perror(buf); + exit(EXIT_FAILURE); + } + + sprintf(buf, "/proc/%d/maps", opt_pid); + file = fopen(buf, "r"); + if (!file) { + perror(buf); + exit(EXIT_FAILURE); + } + + while (fgets(buf, sizeof(buf), file) != NULL) { + unsigned long vm_start; + unsigned long vm_end; + unsigned long long pgoff; + int major, minor; + char r, w, x, s; + unsigned long ino; + int n; + + n = sscanf(buf, "%lx-%lx %c%c%c%c %llx %x:%x %lu", + &vm_start, + &vm_end, + &r, &w, &x, &s, + &pgoff, + &major, &minor, + &ino); + if (n < 10) { + fprintf(stderr, "unexpected line: %s\n", buf); + continue; + } + pg_start[nr_vmas] = vm_start / page_size; + pg_end[nr_vmas] = vm_end / page_size; + if (++nr_vmas >= MAX_VMAS) { + fprintf(stderr, "too many VMAs\n"); + break; + } + } + fclose(file); } -static void add_addr_range(unsigned long offset, unsigned long size) +static void parse_file(const char *name) { - if (nr_addr_ranges >= MAX_ADDR_RANGES) - fatal("too much addr ranges\n"); - - opt_offset[nr_addr_ranges] = offset; - opt_size[nr_addr_ranges] = size; - nr_addr_ranges++; } static void parse_addr_range(const char *optarg) @@ -676,8 +834,10 @@ int main(int argc, char *argv[]) } } + if (opt_list && opt_pid) + printf("voffset\t"); if (opt_list == 1) - printf("offset\tcount\tflags\n"); + printf("offset\tlen\tflags\n"); if (opt_list == 2) printf("offset\tflags\n"); diff --git a/MAINTAINERS b/MAINTAINERS index 7c1c0b05b29..c450f3abb8c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -257,12 +257,6 @@ W: http://www.lesswatts.org/projects/acpi/ S: Supported F: drivers/acpi/fan.c -ACPI PCI HOTPLUG DRIVER -M: Kristen Carlson Accardi <kristen.c.accardi@intel.com> -L: linux-pci@vger.kernel.org -S: Supported -F: drivers/pci/hotplug/acpi* - ACPI THERMAL DRIVER M: Zhang Rui <rui.zhang@intel.com> L: linux-acpi@vger.kernel.org @@ -689,7 +683,7 @@ S: Maintained ARM/INTEL IXP4XX ARM ARCHITECTURE M: Imre Kaloz <kaloz@openwrt.org> M: Krzysztof Halasa <khc@pm.waw.pl> -L: linux-arm-kernel@lists.infradead.org +L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained F: arch/arm/mach-ixp4xx/ @@ -746,18 +740,22 @@ M: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com> M: Dirk Opfer <dirk@opfer-online.de> S: Maintained -ARM/PALMTX,PALMT5,PALMLD,PALMTE2 SUPPORT -M: Marek Vasut <marek.vasut@gmail.com> +ARM/PALMTX,PALMT5,PALMLD,PALMTE2,PALMTC SUPPORT +P: Marek Vasut +M: marek.vasut@gmail.com +L: linux-arm-kernel@lists.infradead.org W: http://hackndev.com S: Maintained ARM/PALM TREO 680 SUPPORT M: Tomas Cech <sleep_walker@suse.cz> +L: linux-arm-kernel@lists.infradead.org W: http://hackndev.com S: Maintained ARM/PALMZ72 SUPPORT M: Sergey Lapin <slapin@ossfans.org> +L: linux-arm-kernel@lists.infradead.org W: http://hackndev.com S: Maintained @@ -2331,7 +2329,9 @@ S: Orphan F: drivers/hwmon/ HARDWARE RANDOM NUMBER GENERATOR CORE -S: Orphan +M: Matt Mackall <mpm@selenic.com> +M: Herbert Xu <herbert@gondor.apana.org.au> +S: Odd fixes F: Documentation/hw_random.txt F: drivers/char/hw_random/ F: include/linux/hw_random.h @@ -4003,11 +4003,11 @@ F: Documentation/PCI/ F: drivers/pci/ F: include/linux/pci* -PCIE HOTPLUG DRIVER -M: Kristen Carlson Accardi <kristen.c.accardi@intel.com> +PCI HOTPLUG +M: Jesse Barnes <jbarnes@virtuousgeek.org> L: linux-pci@vger.kernel.org S: Supported -F: drivers/pci/pcie/ +F: drivers/pci/hotplug PCMCIA SUBSYSTEM P: Linux PCMCIA Team @@ -4670,12 +4670,6 @@ F: drivers/serial/serial_lh7a40x.c F: drivers/usb/gadget/lh7a40* F: drivers/usb/host/ohci-lh7a40* -SHPC HOTPLUG DRIVER -M: Kristen Carlson Accardi <kristen.c.accardi@intel.com> -L: linux-pci@vger.kernel.org -S: Supported -F: drivers/pci/hotplug/shpchp* - SIMPLE FIRMWARE INTERFACE (SFI) P: Len Brown M: lenb@kernel.org @@ -4687,7 +4681,6 @@ F: arch/x86/kernel/*sfi* F: drivers/sfi/ F: include/linux/sfi*.h - SIMTEC EB110ATX (Chalice CATS) P: Ben Dooks M: Vincent Sanders <support@simtec.co.uk> diff --git a/arch/alpha/include/asm/fcntl.h b/arch/alpha/include/asm/fcntl.h index 25da0017ec8..e42823e954a 100644 --- a/arch/alpha/include/asm/fcntl.h +++ b/arch/alpha/include/asm/fcntl.h @@ -26,6 +26,8 @@ #define F_GETOWN 6 /* for sockets. */ #define F_SETSIG 10 /* for sockets. */ #define F_GETSIG 11 /* for sockets. */ +#define F_SETOWN_EX 12 +#define F_GETOWN_EX 13 /* for posix fcntl() and lockf() */ #define F_RDLCK 1 diff --git a/arch/alpha/kernel/core_marvel.c b/arch/alpha/kernel/core_marvel.c index e302daecbe5..8e059e58b0a 100644 --- a/arch/alpha/kernel/core_marvel.c +++ b/arch/alpha/kernel/core_marvel.c @@ -1016,7 +1016,7 @@ marvel_agp_bind_memory(alpha_agp_info *agp, off_t pg_start, struct agp_memory *m { struct marvel_agp_aperture *aper = agp->aperture.sysdata; return iommu_bind(aper->arena, aper->pg_start + pg_start, - mem->page_count, mem->memory); + mem->page_count, mem->pages); } static int diff --git a/arch/alpha/kernel/core_titan.c b/arch/alpha/kernel/core_titan.c index 319fcb74611..76686497b1e 100644 --- a/arch/alpha/kernel/core_titan.c +++ b/arch/alpha/kernel/core_titan.c @@ -680,7 +680,7 @@ titan_agp_bind_memory(alpha_agp_info *agp, off_t pg_start, struct agp_memory *me { struct titan_agp_aperture *aper = agp->aperture.sysdata; return iommu_bind(aper->arena, aper->pg_start + pg_start, - mem->page_count, mem->memory); + mem->page_count, mem->pages); } static int diff --git a/arch/alpha/kernel/init_task.c b/arch/alpha/kernel/init_task.c index 19b86328ffd..6f80ca4f976 100644 --- a/arch/alpha/kernel/init_task.c +++ b/arch/alpha/kernel/init_task.c @@ -13,6 +13,5 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct task_struct init_task = INIT_TASK(init_task); EXPORT_SYMBOL(init_task); -union thread_union init_thread_union - __attribute__((section(".data.init_thread"))) - = { INIT_THREAD_INFO(init_task) }; +union thread_union init_thread_union __init_task_data = + { INIT_THREAD_INFO(init_task) }; diff --git a/arch/alpha/kernel/pci_impl.h b/arch/alpha/kernel/pci_impl.h index 00edd04b585..85457b2d451 100644 --- a/arch/alpha/kernel/pci_impl.h +++ b/arch/alpha/kernel/pci_impl.h @@ -198,7 +198,7 @@ extern unsigned long size_for_memory(unsigned long max); extern int iommu_reserve(struct pci_iommu_arena *, long, long); extern int iommu_release(struct pci_iommu_arena *, long, long); -extern int iommu_bind(struct pci_iommu_arena *, long, long, unsigned long *); +extern int iommu_bind(struct pci_iommu_arena *, long, long, struct page **); extern int iommu_unbind(struct pci_iommu_arena *, long, long); diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c index d15aedfe606..8449504f5e0 100644 --- a/arch/alpha/kernel/pci_iommu.c +++ b/arch/alpha/kernel/pci_iommu.c @@ -876,7 +876,7 @@ iommu_release(struct pci_iommu_arena *arena, long pg_start, long pg_count) int iommu_bind(struct pci_iommu_arena *arena, long pg_start, long pg_count, - unsigned long *physaddrs) + struct page **pages) { unsigned long flags; unsigned long *ptes; @@ -896,7 +896,7 @@ iommu_bind(struct pci_iommu_arena *arena, long pg_start, long pg_count, } for(i = 0, j = pg_start; i < pg_count; i++, j++) - ptes[j] = mk_iommu_pte(physaddrs[i]); + ptes[j] = mk_iommu_pte(page_to_phys(pages[i])); spin_unlock_irqrestore(&arena->lock, flags); diff --git a/arch/alpha/kernel/vmlinux.lds.S b/arch/alpha/kernel/vmlinux.lds.S index 6dc03c35caa..2906665b1c1 100644 --- a/arch/alpha/kernel/vmlinux.lds.S +++ b/arch/alpha/kernel/vmlinux.lds.S @@ -1,5 +1,6 @@ #include <asm-generic/vmlinux.lds.h> #include <asm/page.h> +#include <asm/thread_info.h> OUTPUT_FORMAT("elf64-alpha") OUTPUT_ARCH(alpha) @@ -31,88 +32,21 @@ SECTIONS } :kernel RODATA - - /* Exception table */ - . = ALIGN(16); - __ex_table : { - __start___ex_table = .; - *(__ex_table) - __stop___ex_table = .; - } + EXCEPTION_TABLE(16) /* Will be freed after init */ - . = ALIGN(PAGE_SIZE); - /* Init code and data */ - __init_begin = .; - .init.text : { - _sinittext = .; - INIT_TEXT - _einittext = .; - } - .init.data : { - INIT_DATA - } - - . = ALIGN(16); - .init.setup : { - __setup_start = .; - *(.init.setup) - __setup_end = .; - } - - . = ALIGN(8); - .initcall.init : { - __initcall_start = .; - INITCALLS - __initcall_end = .; - } - -#ifdef CONFIG_BLK_DEV_INITRD - . = ALIGN(PAGE_SIZE); - .init.ramfs : { - __initramfs_start = .; - *(.init.ramfs) - __initramfs_end = .; - } -#endif - - . = ALIGN(8); - .con_initcall.init : { - __con_initcall_start = .; - *(.con_initcall.init) - __con_initcall_end = .; - } - - . = ALIGN(8); - SECURITY_INIT - + __init_begin = ALIGN(PAGE_SIZE); + INIT_TEXT_SECTION(PAGE_SIZE) + INIT_DATA_SECTION(16) PERCPU(PAGE_SIZE) - - . = ALIGN(2 * PAGE_SIZE); + /* Align to THREAD_SIZE rather than PAGE_SIZE here so any padding page + needed for the THREAD_SIZE aligned init_task gets freed after init */ + . = ALIGN(THREAD_SIZE); __init_end = .; /* Freed after init ends here */ - /* Note 2 page alignment above. */ - .data.init_thread : { - *(.data.init_thread) - } - - . = ALIGN(PAGE_SIZE); - .data.page_aligned : { - *(.data.page_aligned) - } - - . = ALIGN(64); - .data.cacheline_aligned : { - *(.data.cacheline_aligned) - } - _data = .; - /* Data */ - .data : { - DATA_DATA - CONSTRUCTORS - } + RW_DATA_SECTION(64, PAGE_SIZE, THREAD_SIZE) .got : { *(.got) @@ -122,16 +56,7 @@ SECTIONS } _edata = .; /* End of data section */ - __bss_start = .; - .sbss : { - *(.sbss) - *(.scommon) - } - .bss : { - *(.bss) - *(COMMON) - } - __bss_stop = .; + BSS_SECTION(0, 0, 0) _end = .; .mdebug 0 : { diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index d778a699f57..1c4119c6004 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -46,6 +46,10 @@ config GENERIC_CLOCKEVENTS_BROADCAST depends on GENERIC_CLOCKEVENTS default y if SMP && !LOCAL_TIMERS +config HAVE_TCM + bool + select GENERIC_ALLOCATOR + config NO_IOPORT bool @@ -649,6 +653,7 @@ config ARCH_U300 bool "ST-Ericsson U300 Series" depends on MMU select CPU_ARM926T + select HAVE_TCM select ARM_AMBA select ARM_VIC select GENERIC_TIME diff --git a/arch/arm/common/locomo.c b/arch/arm/common/locomo.c index 2293f0ce061..bd36c778c81 100644 --- a/arch/arm/common/locomo.c +++ b/arch/arm/common/locomo.c @@ -865,6 +865,7 @@ void locomo_gpio_set_dir(struct device *dev, unsigned int bits, unsigned int dir spin_unlock_irqrestore(&lchip->lock, flags); } +EXPORT_SYMBOL(locomo_gpio_set_dir); int locomo_gpio_read_level(struct device *dev, unsigned int bits) { @@ -882,6 +883,7 @@ int locomo_gpio_read_level(struct device *dev, unsigned int bits) ret &= bits; return ret; } +EXPORT_SYMBOL(locomo_gpio_read_level); int locomo_gpio_read_output(struct device *dev, unsigned int bits) { @@ -899,6 +901,7 @@ int locomo_gpio_read_output(struct device *dev, unsigned int bits) ret &= bits; return ret; } +EXPORT_SYMBOL(locomo_gpio_read_output); void locomo_gpio_write(struct device *dev, unsigned int bits, unsigned int set) { @@ -920,6 +923,7 @@ void locomo_gpio_write(struct device *dev, unsigned int bits, unsigned int set) spin_unlock_irqrestore(&lchip->lock, flags); } +EXPORT_SYMBOL(locomo_gpio_write); static void locomo_m62332_sendbit(void *mapbase, int bit) { @@ -1084,13 +1088,12 @@ void locomo_m62332_senddata(struct locomo_dev *ldev, unsigned int dac_data, int spin_unlock_irqrestore(&lchip->lock, flags); } +EXPORT_SYMBOL(locomo_m62332_senddata); /* * Frontlight control */ -static struct locomo *locomo_chip_driver(struct locomo_dev *ldev); - void locomo_frontlight_set(struct locomo_dev *dev, int duty, int vr, int bpwf) { unsigned long flags; @@ -1182,11 +1185,13 @@ int locomo_driver_register(struct locomo_driver *driver) driver->drv.bus = &locomo_bus_type; return driver_register(&driver->drv); } +EXPORT_SYMBOL(locomo_driver_register); void locomo_driver_unregister(struct locomo_driver *driver) { driver_unregister(&driver->drv); } +EXPORT_SYMBOL(locomo_driver_unregister); static int __init locomo_init(void) { @@ -1208,11 +1213,3 @@ module_exit(locomo_exit); MODULE_DESCRIPTION("Sharp LoCoMo core driver"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("John Lenz <lenz@cs.wisc.edu>"); - -EXPORT_SYMBOL(locomo_driver_register); -EXPORT_SYMBOL(locomo_driver_unregister); -EXPORT_SYMBOL(locomo_gpio_set_dir); -EXPORT_SYMBOL(locomo_gpio_read_level); -EXPORT_SYMBOL(locomo_gpio_read_output); -EXPORT_SYMBOL(locomo_gpio_write); -EXPORT_SYMBOL(locomo_m62332_senddata); diff --git a/arch/arm/common/vic.c b/arch/arm/common/vic.c index 920ced0b73c..f232941de8a 100644 --- a/arch/arm/common/vic.c +++ b/arch/arm/common/vic.c @@ -22,6 +22,7 @@ #include <linux/list.h> #include <linux/io.h> #include <linux/sysdev.h> +#include <linux/device.h> #include <linux/amba/bus.h> #include <asm/mach/irq.h> diff --git a/arch/arm/configs/littleton_defconfig b/arch/arm/configs/littleton_defconfig deleted file mode 100644 index 1db49690805..00000000000 --- a/arch/arm/configs/littleton_defconfig +++ /dev/null @@ -1,783 +0,0 @@ -# -# Automatically generated make config: don't edit -# Linux kernel version: 2.6.24-rc5 -# Fri Dec 21 11:06:19 2007 -# -CONFIG_ARM=y -CONFIG_SYS_SUPPORTS_APM_EMULATION=y -CONFIG_GENERIC_GPIO=y -CONFIG_GENERIC_TIME=y -CONFIG_GENERIC_CLOCKEVENTS=y -CONFIG_MMU=y -# CONFIG_NO_IOPORT is not set -CONFIG_GENERIC_HARDIRQS=y -CONFIG_STACKTRACE_SUPPORT=y -CONFIG_LOCKDEP_SUPPORT=y -CONFIG_TRACE_IRQFLAGS_SUPPORT=y -CONFIG_HARDIRQS_SW_RESEND=y -CONFIG_GENERIC_IRQ_PROBE=y -CONFIG_RWSEM_GENERIC_SPINLOCK=y -# CONFIG_ARCH_HAS_ILOG2_U32 is not set -# CONFIG_ARCH_HAS_ILOG2_U64 is not set -CONFIG_GENERIC_HWEIGHT=y -CONFIG_GENERIC_CALIBRATE_DELAY=y -CONFIG_ZONE_DMA=y -CONFIG_ARCH_MTD_XIP=y -CONFIG_VECTORS_BASE=0xffff0000 -CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" - -# -# General setup -# -CONFIG_EXPERIMENTAL=y -CONFIG_BROKEN_ON_SMP=y -CONFIG_LOCK_KERNEL=y -CONFIG_INIT_ENV_ARG_LIMIT=32 -CONFIG_LOCALVERSION="" -CONFIG_LOCALVERSION_AUTO=y -CONFIG_SWAP=y -CONFIG_SYSVIPC=y -CONFIG_SYSVIPC_SYSCTL=y -# CONFIG_POSIX_MQUEUE is not set -# CONFIG_BSD_PROCESS_ACCT is not set -# CONFIG_TASKSTATS is not set -# CONFIG_USER_NS is not set -# CONFIG_PID_NS is not set -# CONFIG_AUDIT is not set -# CONFIG_IKCONFIG is not set -CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_CGROUPS is not set -CONFIG_FAIR_GROUP_SCHED=y -CONFIG_FAIR_USER_SCHED=y -# CONFIG_FAIR_CGROUP_SCHED is not set -CONFIG_SYSFS_DEPRECATED=y -# CONFIG_RELAY is not set -CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="" -CONFIG_CC_OPTIMIZE_FOR_SIZE=y -CONFIG_SYSCTL=y -# CONFIG_EMBEDDED is not set -CONFIG_UID16=y -CONFIG_SYSCTL_SYSCALL=y -CONFIG_KALLSYMS=y -# CONFIG_KALLSYMS_ALL is not set -# CONFIG_KALLSYMS_EXTRA_PASS is not set -CONFIG_HOTPLUG=y -CONFIG_PRINTK=y -CONFIG_BUG=y -CONFIG_ELF_CORE=y -CONFIG_BASE_FULL=y -CONFIG_FUTEX=y -CONFIG_ANON_INODES=y -CONFIG_EPOLL=y -CONFIG_SIGNALFD=y -CONFIG_EVENTFD=y -CONFIG_SHMEM=y -CONFIG_VM_EVENT_COUNTERS=y -CONFIG_SLAB=y -# CONFIG_SLUB is not set -# CONFIG_SLOB is not set -CONFIG_RT_MUTEXES=y -# CONFIG_TINY_SHMEM is not set -CONFIG_BASE_SMALL=0 -CONFIG_MODULES=y -CONFIG_MODULE_UNLOAD=y -CONFIG_MODULE_FORCE_UNLOAD=y -# CONFIG_MODVERSIONS is not set -# CONFIG_MODULE_SRCVERSION_ALL is not set -# CONFIG_KMOD is not set -CONFIG_BLOCK=y -# CONFIG_LBD is not set -# CONFIG_BLK_DEV_IO_TRACE is not set -# CONFIG_LSF is not set -# CONFIG_BLK_DEV_BSG is not set - -# -# IO Schedulers -# -CONFIG_IOSCHED_NOOP=y -CONFIG_IOSCHED_AS=y -CONFIG_IOSCHED_DEADLINE=y -CONFIG_IOSCHED_CFQ=y -# CONFIG_DEFAULT_AS is not set -# CONFIG_DEFAULT_DEADLINE is not set -CONFIG_DEFAULT_CFQ=y -# CONFIG_DEFAULT_NOOP is not set -CONFIG_DEFAULT_IOSCHED="cfq" - -# -# System Type -# -# CONFIG_ARCH_AAEC2000 is not set -# CONFIG_ARCH_INTEGRATOR is not set -# CONFIG_ARCH_REALVIEW is not set -# CONFIG_ARCH_VERSATILE is not set -# CONFIG_ARCH_AT91 is not set -# CONFIG_ARCH_CLPS7500 is not set -# CONFIG_ARCH_CLPS711X is not set -# CONFIG_ARCH_CO285 is not set -# CONFIG_ARCH_EBSA110 is not set -# CONFIG_ARCH_EP93XX is not set -# CONFIG_ARCH_FOOTBRIDGE is not set -# CONFIG_ARCH_NETX is not set -# CONFIG_ARCH_H720X is not set -# CONFIG_ARCH_IMX is not set -# CONFIG_ARCH_IOP13XX is not set -# CONFIG_ARCH_IOP32X is not set -# CONFIG_ARCH_IOP33X is not set -# CONFIG_ARCH_IXP23XX is not set -# CONFIG_ARCH_IXP2000 is not set -# CONFIG_ARCH_IXP4XX is not set -# CONFIG_ARCH_L7200 is not set -# CONFIG_ARCH_KS8695 is not set -# CONFIG_ARCH_NS9XXX is not set -# CONFIG_ARCH_MXC is not set -# CONFIG_ARCH_PNX4008 is not set -CONFIG_ARCH_PXA=y -# CONFIG_ARCH_RPC is not set -# CONFIG_ARCH_SA1100 is not set -# CONFIG_ARCH_S3C2410 is not set -# CONFIG_ARCH_SHARK is not set -# CONFIG_ARCH_LH7A40X is not set -# CONFIG_ARCH_DAVINCI is not set -# CONFIG_ARCH_OMAP is not set - -# -# Intel PXA2xx/PXA3xx Implementations -# - -# -# Supported PXA3xx Processor Variants -# -CONFIG_CPU_PXA300=y -CONFIG_CPU_PXA310=y -# CONFIG_CPU_PXA320 is not set -# CONFIG_ARCH_LUBBOCK is not set -# CONFIG_MACH_LOGICPD_PXA270 is not set -# CONFIG_MACH_MAINSTONE is not set -# CONFIG_ARCH_PXA_IDP is not set -# CONFIG_PXA_SHARPSL is not set -# CONFIG_MACH_TRIZEPS4 is not set -# CONFIG_MACH_EM_X270 is not set -# CONFIG_MACH_ZYLONITE is not set -CONFIG_MACH_LITTLETON=y -# CONFIG_MACH_ARMCORE is not set -CONFIG_PXA3xx=y -CONFIG_PXA_SSP=y - -# -# Boot options -# - -# -# Power management -# - -# -# Processor Type -# -CONFIG_CPU_32=y -CONFIG_CPU_XSC3=y -CONFIG_CPU_32v5=y -CONFIG_CPU_ABRT_EV5T=y -CONFIG_CPU_CACHE_VIVT=y -CONFIG_CPU_TLB_V4WBI=y -CONFIG_CPU_CP15=y -CONFIG_CPU_CP15_MMU=y -CONFIG_IO_36=y - -# -# Processor Features -# -# CONFIG_ARM_THUMB is not set -# CONFIG_CPU_DCACHE_DISABLE is not set -# CONFIG_CPU_BPREDICT_DISABLE is not set -# CONFIG_OUTER_CACHE is not set -CONFIG_IWMMXT=y - -# -# Bus support -# -# CONFIG_PCI_SYSCALL is not set -# CONFIG_ARCH_SUPPORTS_MSI is not set -# CONFIG_PCCARD is not set - -# -# Kernel Features -# -CONFIG_TICK_ONESHOT=y -# CONFIG_NO_HZ is not set -# CONFIG_HIGH_RES_TIMERS is not set -CONFIG_GENERIC_CLOCKEVENTS_BUILD=y -CONFIG_PREEMPT=y -CONFIG_HZ=100 -CONFIG_AEABI=y -CONFIG_OABI_COMPAT=y -# CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set -CONFIG_SELECT_MEMORY_MODEL=y -CONFIG_FLATMEM_MANUAL=y -# CONFIG_DISCONTIGMEM_MANUAL is not set -# CONFIG_SPARSEMEM_MANUAL is not set -CONFIG_FLATMEM=y -CONFIG_FLAT_NODE_MEM_MAP=y -# CONFIG_SPARSEMEM_STATIC is not set -# CONFIG_SPARSEMEM_VMEMMAP_ENABLE is not set -CONFIG_SPLIT_PTLOCK_CPUS=4096 -# CONFIG_RESOURCES_64BIT is not set -CONFIG_ZONE_DMA_FLAG=1 -CONFIG_BOUNCE=y -CONFIG_VIRT_TO_BUS=y -CONFIG_ALIGNMENT_TRAP=y - -# -# Boot options -# -CONFIG_ZBOOT_ROM_TEXT=0x0 -CONFIG_ZBOOT_ROM_BSS=0x0 -CONFIG_CMDLINE="root=/dev/nfs rootfstype=nfs nfsroot=192.168.1.100:/nfsroot/ ip=192.168.1.101:192.168.1.100::255.255.255.0::eth0:on console=ttyS2,38400 mem=64M" -# CONFIG_XIP_KERNEL is not set -# CONFIG_KEXEC is not set - -# -# CPU Frequency scaling -# -# CONFIG_CPU_FREQ is not set - -# -# Floating point emulation -# - -# -# At least one emulation must be selected -# -CONFIG_FPE_NWFPE=y -# CONFIG_FPE_NWFPE_XP is not set -# CONFIG_FPE_FASTFPE is not set - -# -# Userspace binary formats -# -CONFIG_BINFMT_ELF=y -# CONFIG_BINFMT_AOUT is not set -# CONFIG_BINFMT_MISC is not set - -# -# Power management options -# -# CONFIG_PM is not set -CONFIG_SUSPEND_UP_POSSIBLE=y - -# -# Networking -# -CONFIG_NET=y - -# -# Networking options -# -CONFIG_PACKET=y -# CONFIG_PACKET_MMAP is not set -CONFIG_UNIX=y -CONFIG_XFRM=y -# CONFIG_XFRM_USER is not set -# CONFIG_XFRM_SUB_POLICY is not set -# CONFIG_XFRM_MIGRATE is not set -# CONFIG_NET_KEY is not set -CONFIG_INET=y -# CONFIG_IP_MULTICAST is not set -# CONFIG_IP_ADVANCED_ROUTER is not set -CONFIG_IP_FIB_HASH=y -CONFIG_IP_PNP=y -# CONFIG_IP_PNP_DHCP is not set -# CONFIG_IP_PNP_BOOTP is not set -# CONFIG_IP_PNP_RARP is not set -# CONFIG_NET_IPIP is not set -# CONFIG_NET_IPGRE is not set -# CONFIG_ARPD is not set -# CONFIG_SYN_COOKIES is not set -# CONFIG_INET_AH is not set -# CONFIG_INET_ESP is not set -# CONFIG_INET_IPCOMP is not set -# CONFIG_INET_XFRM_TUNNEL is not set -# CONFIG_INET_TUNNEL is not set -CONFIG_INET_XFRM_MODE_TRANSPORT=y -CONFIG_INET_XFRM_MODE_TUNNEL=y -CONFIG_INET_XFRM_MODE_BEET=y -# CONFIG_INET_LRO is not set -CONFIG_INET_DIAG=y -CONFIG_INET_TCP_DIAG=y -# CONFIG_TCP_CONG_ADVANCED is not set -CONFIG_TCP_CONG_CUBIC=y -CONFIG_DEFAULT_TCP_CONG="cubic" -# CONFIG_TCP_MD5SIG is not set -# CONFIG_IPV6 is not set -# CONFIG_INET6_XFRM_TUNNEL is not set -# CONFIG_INET6_TUNNEL is not set -# CONFIG_NETWORK_SECMARK is not set -# CONFIG_NETFILTER is not set -# CONFIG_IP_DCCP is not set -# CONFIG_IP_SCTP is not set -# CONFIG_TIPC is not set -# CONFIG_ATM is not set -# CONFIG_BRIDGE is not set -# CONFIG_VLAN_8021Q is not set -# CONFIG_DECNET is not set -# CONFIG_LLC2 is not set -# CONFIG_IPX is not set -# CONFIG_ATALK is not set -# CONFIG_X25 is not set -# CONFIG_LAPB is not set -# CONFIG_ECONET is not set -# CONFIG_WAN_ROUTER is not set -# CONFIG_NET_SCHED is not set - -# -# Network testing -# -# CONFIG_NET_PKTGEN is not set -# CONFIG_HAMRADIO is not set -# CONFIG_IRDA is not set -# CONFIG_BT is not set -# CONFIG_AF_RXRPC is not set - -# -# Wireless -# -# CONFIG_CFG80211 is not set -# CONFIG_WIRELESS_EXT is not set -# CONFIG_MAC80211 is not set -# CONFIG_IEEE80211 is not set -# CONFIG_RFKILL is not set -# CONFIG_NET_9P is not set - -# -# Device Drivers -# - -# -# Generic Driver Options -# -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -# CONFIG_STANDALONE is not set -# CONFIG_PREVENT_FIRMWARE_BUILD is not set -CONFIG_FW_LOADER=y -# CONFIG_DEBUG_DRIVER is not set -# CONFIG_DEBUG_DEVRES is not set -# CONFIG_SYS_HYPERVISOR is not set -# CONFIG_CONNECTOR is not set -# CONFIG_MTD is not set -# CONFIG_PARPORT is not set -# CONFIG_BLK_DEV is not set -# CONFIG_MISC_DEVICES is not set -# CONFIG_IDE is not set - -# -# SCSI device support -# -# CONFIG_RAID_ATTRS is not set -# CONFIG_SCSI is not set -# CONFIG_SCSI_DMA is not set -# CONFIG_SCSI_NETLINK is not set -# CONFIG_ATA is not set -# CONFIG_MD is not set -CONFIG_NETDEVICES=y -# CONFIG_NETDEVICES_MULTIQUEUE is not set -# CONFIG_DUMMY is not set -# CONFIG_BONDING is not set -# CONFIG_MACVLAN is not set -# CONFIG_EQUALIZER is not set -# CONFIG_TUN is not set -# CONFIG_VETH is not set -# CONFIG_PHYLIB is not set -CONFIG_NET_ETHERNET=y -CONFIG_MII=y -# CONFIG_AX88796 is not set -CONFIG_SMC91X=y -# CONFIG_DM9000 is not set -# CONFIG_SMC911X is not set -# CONFIG_IBM_NEW_EMAC_ZMII is not set -# CONFIG_IBM_NEW_EMAC_RGMII is not set -# CONFIG_IBM_NEW_EMAC_TAH is not set -# CONFIG_IBM_NEW_EMAC_EMAC4 is not set -# CONFIG_B44 is not set -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set - -# -# Wireless LAN -# -# CONFIG_WLAN_PRE80211 is not set -# CONFIG_WLAN_80211 is not set -# CONFIG_WAN is not set -# CONFIG_PPP is not set -# CONFIG_SLIP is not set -# CONFIG_SHAPER is not set -# CONFIG_NETCONSOLE is not set -# CONFIG_NETPOLL is not set -# CONFIG_NET_POLL_CONTROLLER is not set -# CONFIG_ISDN is not set - -# -# Input device support -# -CONFIG_INPUT=y -# CONFIG_INPUT_FF_MEMLESS is not set -# CONFIG_INPUT_POLLDEV is not set - -# -# Userland interfaces -# -CONFIG_INPUT_MOUSEDEV=y -# CONFIG_INPUT_MOUSEDEV_PSAUX is not set -CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 -CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 -# CONFIG_INPUT_JOYDEV is not set -# CONFIG_INPUT_EVDEV is not set -# CONFIG_INPUT_EVBUG is not set - -# -# Input Device Drivers -# -# CONFIG_INPUT_KEYBOARD is not set -# CONFIG_INPUT_MOUSE is not set -# CONFIG_INPUT_JOYSTICK is not set -# CONFIG_INPUT_TABLET is not set -# CONFIG_INPUT_TOUCHSCREEN is not set -# CONFIG_INPUT_MISC is not set - -# -# Hardware I/O ports -# -# CONFIG_SERIO is not set -# CONFIG_GAMEPORT is not set - -# -# Character devices -# -CONFIG_VT=y -CONFIG_VT_CONSOLE=y -CONFIG_HW_CONSOLE=y -# CONFIG_VT_HW_CONSOLE_BINDING is not set -# CONFIG_SERIAL_NONSTANDARD is not set - -# -# Serial drivers -# -# CONFIG_SERIAL_8250 is not set - -# -# Non-8250 serial port support -# -CONFIG_SERIAL_PXA=y -CONFIG_SERIAL_PXA_CONSOLE=y -CONFIG_SERIAL_CORE=y -CONFIG_SERIAL_CORE_CONSOLE=y -CONFIG_UNIX98_PTYS=y -# CONFIG_LEGACY_PTYS is not set -# CONFIG_IPMI_HANDLER is not set -# CONFIG_HW_RANDOM is not set -# CONFIG_NVRAM is not set -# CONFIG_R3964 is not set -# CONFIG_RAW_DRIVER is not set -# CONFIG_TCG_TPM is not set -# CONFIG_I2C is not set - -# -# SPI support -# -# CONFIG_SPI is not set -# CONFIG_SPI_MASTER is not set -# CONFIG_W1 is not set -# CONFIG_POWER_SUPPLY is not set -# CONFIG_HWMON is not set -# CONFIG_WATCHDOG is not set - -# -# Sonics Silicon Backplane -# -CONFIG_SSB_POSSIBLE=y -# CONFIG_SSB is not set - -# -# Multifunction device drivers -# -# CONFIG_MFD_SM501 is not set - -# -# Multimedia devices -# -# CONFIG_VIDEO_DEV is not set -# CONFIG_DVB_CORE is not set -# CONFIG_DAB is not set - -# -# Graphics support -# -# CONFIG_VGASTATE is not set -# CONFIG_VIDEO_OUTPUT_CONTROL is not set -CONFIG_FB=y -# CONFIG_FIRMWARE_EDID is not set -# CONFIG_FB_DDC is not set -CONFIG_FB_CFB_FILLRECT=y -CONFIG_FB_CFB_COPYAREA=y -CONFIG_FB_CFB_IMAGEBLIT=y -# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set -# CONFIG_FB_SYS_FILLRECT is not set -# CONFIG_FB_SYS_COPYAREA is not set -# CONFIG_FB_SYS_IMAGEBLIT is not set -# CONFIG_FB_SYS_FOPS is not set -CONFIG_FB_DEFERRED_IO=y -# CONFIG_FB_SVGALIB is not set -# CONFIG_FB_MACMODES is not set -# CONFIG_FB_BACKLIGHT is not set -# CONFIG_FB_MODE_HELPERS is not set -# CONFIG_FB_TILEBLITTING is not set - -# -# Frame buffer hardware drivers -# -# CONFIG_FB_S1D13XXX is not set -CONFIG_FB_PXA=y -# CONFIG_FB_PXA_PARAMETERS is not set -# CONFIG_FB_MBX is not set -# CONFIG_FB_VIRTUAL is not set -# CONFIG_BACKLIGHT_LCD_SUPPORT is not set - -# -# Display device support -# -# CONFIG_DISPLAY_SUPPORT is not set - -# -# Console display driver support -# -# CONFIG_VGA_CONSOLE is not set -CONFIG_DUMMY_CONSOLE=y -CONFIG_FRAMEBUFFER_CONSOLE=y -# CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY is not set -# CONFIG_FRAMEBUFFER_CONSOLE_ROTATION is not set -CONFIG_FONTS=y -# CONFIG_FONT_8x8 is not set -CONFIG_FONT_8x16=y -# CONFIG_FONT_6x11 is not set -# CONFIG_FONT_7x14 is not set -# CONFIG_FONT_PEARL_8x8 is not set -# CONFIG_FONT_ACORN_8x8 is not set -# CONFIG_FONT_MINI_4x6 is not set -# CONFIG_FONT_SUN8x16 is not set -# CONFIG_FONT_SUN12x22 is not set -# CONFIG_FONT_10x18 is not set -CONFIG_LOGO=y -CONFIG_LOGO_LINUX_MONO=y -CONFIG_LOGO_LINUX_VGA16=y -CONFIG_LOGO_LINUX_CLUT224=y - -# -# Sound -# -# CONFIG_SOUND is not set -# CONFIG_HID_SUPPORT is not set -# CONFIG_USB_SUPPORT is not set -# CONFIG_MMC is not set -# CONFIG_NEW_LEDS is not set -CONFIG_RTC_LIB=y -# CONFIG_RTC_CLASS is not set - -# -# File systems -# -# CONFIG_EXT2_FS is not set -# CONFIG_EXT3_FS is not set -# CONFIG_EXT4DEV_FS is not set -# CONFIG_REISERFS_FS is not set -# CONFIG_JFS_FS is not set -CONFIG_FS_POSIX_ACL=y -# CONFIG_XFS_FS is not set -# CONFIG_GFS2_FS is not set -# CONFIG_OCFS2_FS is not set -# CONFIG_MINIX_FS is not set -# CONFIG_ROMFS_FS is not set -# CONFIG_INOTIFY is not set -# CONFIG_QUOTA is not set -# CONFIG_DNOTIFY is not set -# CONFIG_AUTOFS_FS is not set -# CONFIG_AUTOFS4_FS is not set -# CONFIG_FUSE_FS is not set - -# -# CD-ROM/DVD Filesystems -# -# CONFIG_ISO9660_FS is not set -# CONFIG_UDF_FS is not set - -# -# DOS/FAT/NT Filesystems -# -# CONFIG_MSDOS_FS is not set -# CONFIG_VFAT_FS is not set -# CONFIG_NTFS_FS is not set - -# -# Pseudo filesystems -# -CONFIG_PROC_FS=y -CONFIG_PROC_SYSCTL=y -CONFIG_SYSFS=y -# CONFIG_TMPFS is not set -# CONFIG_HUGETLB_PAGE is not set -# CONFIG_CONFIGFS_FS is not set - -# -# Miscellaneous filesystems -# -# CONFIG_ADFS_FS is not set -# CONFIG_AFFS_FS is not set -# CONFIG_HFS_FS is not set -# CONFIG_HFSPLUS_FS is not set -# CONFIG_BEFS_FS is not set -# CONFIG_BFS_FS is not set -# CONFIG_EFS_FS is not set -# CONFIG_CRAMFS is not set -# CONFIG_VXFS_FS is not set -# CONFIG_HPFS_FS is not set -# CONFIG_QNX4FS_FS is not set -# CONFIG_SYSV_FS is not set -# CONFIG_UFS_FS is not set -CONFIG_NETWORK_FILESYSTEMS=y -CONFIG_NFS_FS=y -CONFIG_NFS_V3=y -CONFIG_NFS_V3_ACL=y -CONFIG_NFS_V4=y -CONFIG_NFS_DIRECTIO=y -# CONFIG_NFSD is not set -CONFIG_ROOT_NFS=y -CONFIG_LOCKD=y -CONFIG_LOCKD_V4=y -CONFIG_NFS_ACL_SUPPORT=y -CONFIG_NFS_COMMON=y -CONFIG_SUNRPC=y -CONFIG_SUNRPC_GSS=y -# CONFIG_SUNRPC_BIND34 is not set -CONFIG_RPCSEC_GSS_KRB5=y -# CONFIG_RPCSEC_GSS_SPKM3 is not set -# CONFIG_SMB_FS is not set -# CONFIG_CIFS is not set -# CONFIG_NCP_FS is not set -# CONFIG_CODA_FS is not set -# CONFIG_AFS_FS is not set - -# -# Partition Types -# -# CONFIG_PARTITION_ADVANCED is not set -CONFIG_MSDOS_PARTITION=y -# CONFIG_NLS is not set -# CONFIG_DLM is not set -# CONFIG_INSTRUMENTATION is not set - -# -# Kernel hacking -# -CONFIG_PRINTK_TIME=y -CONFIG_ENABLE_WARN_DEPRECATED=y -CONFIG_ENABLE_MUST_CHECK=y -CONFIG_MAGIC_SYSRQ=y -# CONFIG_UNUSED_SYMBOLS is not set -# CONFIG_DEBUG_FS is not set -# CONFIG_HEADERS_CHECK is not set -CONFIG_DEBUG_KERNEL=y -# CONFIG_DEBUG_SHIRQ is not set -CONFIG_DETECT_SOFTLOCKUP=y -CONFIG_SCHED_DEBUG=y -# CONFIG_SCHEDSTATS is not set -# CONFIG_TIMER_STATS is not set -# CONFIG_DEBUG_SLAB is not set -# CONFIG_DEBUG_PREEMPT is not set -# CONFIG_DEBUG_RT_MUTEXES is not set -# CONFIG_RT_MUTEX_TESTER is not set -# CONFIG_DEBUG_SPINLOCK is not set -# CONFIG_DEBUG_MUTEXES is not set -# CONFIG_DEBUG_LOCK_ALLOC is not set -# CONFIG_PROVE_LOCKING is not set -# CONFIG_LOCK_STAT is not set -# CONFIG_DEBUG_SPINLOCK_SLEEP is not set -# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set -# CONFIG_DEBUG_KOBJECT is not set -CONFIG_DEBUG_BUGVERBOSE=y -CONFIG_DEBUG_INFO=y -# CONFIG_DEBUG_VM is not set -# CONFIG_DEBUG_LIST is not set -# CONFIG_DEBUG_SG is not set -CONFIG_FRAME_POINTER=y -CONFIG_FORCED_INLINING=y -# CONFIG_BOOT_PRINTK_DELAY is not set -# CONFIG_RCU_TORTURE_TEST is not set -# CONFIG_FAULT_INJECTION is not set -# CONFIG_SAMPLES is not set -CONFIG_DEBUG_USER=y -CONFIG_DEBUG_ERRORS=y -CONFIG_DEBUG_LL=y -# CONFIG_DEBUG_ICEDCC is not set - -# -# Security options -# -# CONFIG_KEYS is not set -# CONFIG_SECURITY is not set -# CONFIG_SECURITY_FILE_CAPABILITIES is not set -CONFIG_CRYPTO=y -CONFIG_CRYPTO_ALGAPI=y -CONFIG_CRYPTO_BLKCIPHER=y -CONFIG_CRYPTO_MANAGER=y -# CONFIG_CRYPTO_HMAC is not set -# CONFIG_CRYPTO_XCBC is not set -# CONFIG_CRYPTO_NULL is not set -# CONFIG_CRYPTO_MD4 is not set -CONFIG_CRYPTO_MD5=y -# CONFIG_CRYPTO_SHA1 is not set -# CONFIG_CRYPTO_SHA256 is not set -# CONFIG_CRYPTO_SHA512 is not set -# CONFIG_CRYPTO_WP512 is not set -# CONFIG_CRYPTO_TGR192 is not set -# CONFIG_CRYPTO_GF128MUL is not set -# CONFIG_CRYPTO_ECB is not set -CONFIG_CRYPTO_CBC=y -# CONFIG_CRYPTO_PCBC is not set -# CONFIG_CRYPTO_LRW is not set -# CONFIG_CRYPTO_XTS is not set -# CONFIG_CRYPTO_CRYPTD is not set -CONFIG_CRYPTO_DES=y -# CONFIG_CRYPTO_FCRYPT is not set -# CONFIG_CRYPTO_BLOWFISH is not set -# CONFIG_CRYPTO_TWOFISH is not set -# CONFIG_CRYPTO_SERPENT is not set -# CONFIG_CRYPTO_AES is not set -# CONFIG_CRYPTO_CAST5 is not set -# CONFIG_CRYPTO_CAST6 is not set -# CONFIG_CRYPTO_TEA is not set -# CONFIG_CRYPTO_ARC4 is not set -# CONFIG_CRYPTO_KHAZAD is not set -# CONFIG_CRYPTO_ANUBIS is not set -# CONFIG_CRYPTO_SEED is not set -# CONFIG_CRYPTO_DEFLATE is not set -# CONFIG_CRYPTO_MICHAEL_MIC is not set -# CONFIG_CRYPTO_CRC32C is not set -# CONFIG_CRYPTO_CAMELLIA is not set -# CONFIG_CRYPTO_TEST is not set -# CONFIG_CRYPTO_AUTHENC is not set -CONFIG_CRYPTO_HW=y - -# -# Library routines -# -CONFIG_BITREVERSE=y -CONFIG_CRC_CCITT=y -# CONFIG_CRC16 is not set -# CONFIG_CRC_ITU_T is not set -CONFIG_CRC32=y -# CONFIG_CRC7 is not set -# CONFIG_LIBCRC32C is not set -CONFIG_PLIST=y -CONFIG_HAS_IOMEM=y -CONFIG_HAS_IOPORT=y -CONFIG_HAS_DMA=y diff --git a/arch/arm/configs/pxa3xx_defconfig b/arch/arm/configs/pxa3xx_defconfig new file mode 100644 index 00000000000..733b851e5b7 --- /dev/null +++ b/arch/arm/configs/pxa3xx_defconfig @@ -0,0 +1,1332 @@ +# +# Automatically generated make config: don't edit +# Linux kernel version: 2.6.31-rc1 +# Mon Jul 13 22:48:49 2009 +# +CONFIG_ARM=y +CONFIG_HAVE_PWM=y +CONFIG_SYS_SUPPORTS_APM_EMULATION=y +CONFIG_GENERIC_GPIO=y +CONFIG_GENERIC_TIME=y +CONFIG_GENERIC_CLOCKEVENTS=y +CONFIG_MMU=y +CONFIG_GENERIC_HARDIRQS=y +CONFIG_STACKTRACE_SUPPORT=y +CONFIG_HAVE_LATENCYTOP_SUPPORT=y +CONFIG_LOCKDEP_SUPPORT=y +CONFIG_TRACE_IRQFLAGS_SUPPORT=y +CONFIG_HARDIRQS_SW_RESEND=y +CONFIG_GENERIC_IRQ_PROBE=y +CONFIG_RWSEM_GENERIC_SPINLOCK=y +CONFIG_GENERIC_HWEIGHT=y +CONFIG_GENERIC_CALIBRATE_DELAY=y +CONFIG_ARCH_MTD_XIP=y +CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y +CONFIG_VECTORS_BASE=0xffff0000 +CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" +CONFIG_CONSTRUCTORS=y + +# +# General setup +# +CONFIG_EXPERIMENTAL=y +CONFIG_BROKEN_ON_SMP=y +CONFIG_LOCK_KERNEL=y +CONFIG_INIT_ENV_ARG_LIMIT=32 +CONFIG_LOCALVERSION="" +CONFIG_LOCALVERSION_AUTO=y +CONFIG_SWAP=y +CONFIG_SYSVIPC=y +CONFIG_SYSVIPC_SYSCTL=y +# CONFIG_POSIX_MQUEUE is not set +# CONFIG_BSD_PROCESS_ACCT is not set +# CONFIG_TASKSTATS is not set +# CONFIG_AUDIT is not set + +# +# RCU Subsystem +# +# CONFIG_CLASSIC_RCU is not set +CONFIG_TREE_RCU=y +# CONFIG_PREEMPT_RCU is not set +# CONFIG_RCU_TRACE is not set +CONFIG_RCU_FANOUT=32 +# CONFIG_RCU_FANOUT_EXACT is not set +# CONFIG_TREE_RCU_TRACE is not set +# CONFIG_PREEMPT_RCU_TRACE is not set +# CONFIG_IKCONFIG is not set +CONFIG_LOG_BUF_SHIFT=18 +CONFIG_GROUP_SCHED=y +CONFIG_FAIR_GROUP_SCHED=y +# CONFIG_RT_GROUP_SCHED is not set +CONFIG_USER_SCHED=y +# CONFIG_CGROUP_SCHED is not set +# CONFIG_CGROUPS is not set +CONFIG_SYSFS_DEPRECATED=y +CONFIG_SYSFS_DEPRECATED_V2=y +# CONFIG_RELAY is not set +CONFIG_NAMESPACES=y +# CONFIG_UTS_NS is not set +# CONFIG_IPC_NS is not set +# CONFIG_USER_NS is not set +# CONFIG_PID_NS is not set +# CONFIG_NET_NS is not set +# CONFIG_BLK_DEV_INITRD is not set +# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set +CONFIG_SYSCTL=y +CONFIG_ANON_INODES=y +# CONFIG_EMBEDDED is not set +CONFIG_UID16=y +CONFIG_SYSCTL_SYSCALL=y +CONFIG_KALLSYMS=y +CONFIG_KALLSYMS_ALL=y +# CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_HOTPLUG=y +CONFIG_PRINTK=y +CONFIG_BUG=y +CONFIG_ELF_CORE=y +CONFIG_BASE_FULL=y +CONFIG_FUTEX=y +CONFIG_EPOLL=y +CONFIG_SIGNALFD=y +CONFIG_TIMERFD=y +CONFIG_EVENTFD=y +CONFIG_SHMEM=y +CONFIG_AIO=y + +# +# Performance Counters +# +CONFIG_VM_EVENT_COUNTERS=y +# CONFIG_STRIP_ASM_SYMS is not set +CONFIG_COMPAT_BRK=y +CONFIG_SLAB=y +# CONFIG_SLUB is not set +# CONFIG_SLOB is not set +# CONFIG_PROFILING is not set +# CONFIG_MARKERS is not set +CONFIG_HAVE_OPROFILE=y +# CONFIG_KPROBES is not set +CONFIG_HAVE_KPROBES=y +CONFIG_HAVE_KRETPROBES=y +CONFIG_HAVE_CLK=y + +# +# GCOV-based kernel profiling +# +# CONFIG_SLOW_WORK is not set +CONFIG_HAVE_GENERIC_DMA_COHERENT=y +CONFIG_SLABINFO=y +CONFIG_RT_MUTEXES=y +CONFIG_BASE_SMALL=0 +CONFIG_MODULES=y +# CONFIG_MODULE_FORCE_LOAD is not set +# CONFIG_MODULE_UNLOAD is not set +# CONFIG_MODVERSIONS is not set +# CONFIG_MODULE_SRCVERSION_ALL is not set +CONFIG_BLOCK=y +CONFIG_LBDAF=y +# CONFIG_BLK_DEV_BSG is not set +# CONFIG_BLK_DEV_INTEGRITY is not set + +# +# IO Schedulers +# +CONFIG_IOSCHED_NOOP=y +CONFIG_IOSCHED_AS=y +CONFIG_IOSCHED_DEADLINE=y +CONFIG_IOSCHED_CFQ=y +# CONFIG_DEFAULT_AS is not set +# CONFIG_DEFAULT_DEADLINE is not set +CONFIG_DEFAULT_CFQ=y +# CONFIG_DEFAULT_NOOP is not set +CONFIG_DEFAULT_IOSCHED="cfq" +# CONFIG_FREEZER is not set + +# +# System Type +# +# CONFIG_ARCH_AAEC2000 is not set +# CONFIG_ARCH_INTEGRATOR is not set +# CONFIG_ARCH_REALVIEW is not set +# CONFIG_ARCH_VERSATILE is not set +# CONFIG_ARCH_AT91 is not set +# CONFIG_ARCH_CLPS711X is not set +# CONFIG_ARCH_GEMINI is not set +# CONFIG_ARCH_EBSA110 is not set +# CONFIG_ARCH_EP93XX is not set +# CONFIG_ARCH_FOOTBRIDGE is not set +# CONFIG_ARCH_MXC is not set +# CONFIG_ARCH_STMP3XXX is not set +# CONFIG_ARCH_NETX is not set +# CONFIG_ARCH_H720X is not set +# CONFIG_ARCH_IOP13XX is not set +# CONFIG_ARCH_IOP32X is not set +# CONFIG_ARCH_IOP33X is not set +# CONFIG_ARCH_IXP23XX is not set +# CONFIG_ARCH_IXP2000 is not set +# CONFIG_ARCH_IXP4XX is not set +# CONFIG_ARCH_L7200 is not set +# CONFIG_ARCH_KIRKWOOD is not set +# CONFIG_ARCH_LOKI is not set +# CONFIG_ARCH_MV78XX0 is not set +# CONFIG_ARCH_ORION5X is not set +# CONFIG_ARCH_MMP is not set +# CONFIG_ARCH_KS8695 is not set +# CONFIG_ARCH_NS9XXX is not set +# CONFIG_ARCH_W90X900 is not set +# CONFIG_ARCH_PNX4008 is not set +CONFIG_ARCH_PXA=y +# CONFIG_ARCH_MSM is not set +# CONFIG_ARCH_RPC is not set +# CONFIG_ARCH_SA1100 is not set +# CONFIG_ARCH_S3C2410 is not set +# CONFIG_ARCH_S3C64XX is not set +# CONFIG_ARCH_SHARK is not set +# CONFIG_ARCH_LH7A40X is not set +# CONFIG_ARCH_U300 is not set +# CONFIG_ARCH_DAVINCI is not set +# CONFIG_ARCH_OMAP is not set + +# +# Intel PXA2xx/PXA3xx Implementations +# + +# +# Supported PXA3xx Processor Variants +# +CONFIG_CPU_PXA300=y +CONFIG_CPU_PXA310=y +CONFIG_CPU_PXA320=y +CONFIG_CPU_PXA930=y +CONFIG_CPU_PXA935=y +# CONFIG_ARCH_GUMSTIX is not set +# CONFIG_MACH_INTELMOTE2 is not set +# CONFIG_MACH_STARGATE2 is not set +# CONFIG_ARCH_LUBBOCK is not set +# CONFIG_MACH_LOGICPD_PXA270 is not set +# CONFIG_MACH_MAINSTONE is not set +# CONFIG_MACH_MP900C is not set +# CONFIG_ARCH_PXA_IDP is not set +# CONFIG_PXA_SHARPSL is not set +# CONFIG_ARCH_VIPER is not set +# CONFIG_ARCH_PXA_ESERIES is not set +# CONFIG_TRIZEPS_PXA is not set +# CONFIG_MACH_H5000 is not set +# CONFIG_MACH_EM_X270 is not set +# CONFIG_MACH_EXEDA is not set +# CONFIG_MACH_COLIBRI is not set +# CONFIG_MACH_COLIBRI300 is not set +# CONFIG_MACH_COLIBRI320 is not set +CONFIG_MACH_ZYLONITE=y +CONFIG_MACH_LITTLETON=y +CONFIG_MACH_TAVOREVB=y +CONFIG_MACH_SAAR=y +# CONFIG_MACH_ARMCORE is not set +# CONFIG_MACH_CM_X300 is not set +# CONFIG_MACH_H4700 is not set +# CONFIG_MACH_MAGICIAN is not set +# CONFIG_MACH_HIMALAYA is not set +# CONFIG_MACH_MIOA701 is not set +# CONFIG_MACH_PCM027 is not set +# CONFIG_ARCH_PXA_PALM is not set +# CONFIG_MACH_CSB726 is not set +# CONFIG_PXA_EZX is not set +CONFIG_PXA3xx=y +CONFIG_PXA_SSP=y +CONFIG_PXA_HAVE_BOARD_IRQS=y +CONFIG_PLAT_PXA=y + +# +# Processor Type +# +CONFIG_CPU_32=y +CONFIG_CPU_XSC3=y +CONFIG_CPU_32v5=y +CONFIG_CPU_ABRT_EV5T=y +CONFIG_CPU_PABRT_NOIFAR=y +CONFIG_CPU_CACHE_VIVT=y +CONFIG_CPU_TLB_V4WBI=y +CONFIG_CPU_CP15=y +CONFIG_CPU_CP15_MMU=y +CONFIG_IO_36=y + +# +# Processor Features +# +CONFIG_ARM_THUMB=y +# CONFIG_CPU_DCACHE_DISABLE is not set +# CONFIG_CPU_BPREDICT_DISABLE is not set +CONFIG_OUTER_CACHE=y +CONFIG_CACHE_XSC3L2=y +CONFIG_IWMMXT=y +CONFIG_COMMON_CLKDEV=y + +# +# Bus support +# +# CONFIG_PCI_SYSCALL is not set +# CONFIG_ARCH_SUPPORTS_MSI is not set +# CONFIG_PCCARD is not set + +# +# Kernel Features +# +CONFIG_TICK_ONESHOT=y +# CONFIG_NO_HZ is not set +# CONFIG_HIGH_RES_TIMERS is not set +CONFIG_GENERIC_CLOCKEVENTS_BUILD=y +CONFIG_VMSPLIT_3G=y +# CONFIG_VMSPLIT_2G is not set +# CONFIG_VMSPLIT_1G is not set +CONFIG_PAGE_OFFSET=0xC0000000 +CONFIG_PREEMPT=y +CONFIG_HZ=100 +CONFIG_AEABI=y +CONFIG_OABI_COMPAT=y +# CONFIG_ARCH_SPARSEMEM_DEFAULT is not set +# CONFIG_ARCH_SELECT_MEMORY_MODEL is not set +# CONFIG_HIGHMEM is not set +CONFIG_SELECT_MEMORY_MODEL=y +CONFIG_FLATMEM_MANUAL=y +# CONFIG_DISCONTIGMEM_MANUAL is not set +# CONFIG_SPARSEMEM_MANUAL is not set +CONFIG_FLATMEM=y +CONFIG_FLAT_NODE_MEM_MAP=y +CONFIG_PAGEFLAGS_EXTENDED=y +CONFIG_SPLIT_PTLOCK_CPUS=4096 +# CONFIG_PHYS_ADDR_T_64BIT is not set +CONFIG_ZONE_DMA_FLAG=0 +CONFIG_VIRT_TO_BUS=y +CONFIG_HAVE_MLOCK=y +CONFIG_HAVE_MLOCKED_PAGE_BIT=y +CONFIG_DEFAULT_MMAP_MIN_ADDR=4096 +CONFIG_ALIGNMENT_TRAP=y +# CONFIG_UACCESS_WITH_MEMCPY is not set + +# +# Boot options +# +CONFIG_ZBOOT_ROM_TEXT=0x0 +CONFIG_ZBOOT_ROM_BSS=0x0 +CONFIG_CMDLINE="root=/dev/nfs rootfstype=nfs nfsroot=192.168.1.100:/nfsroot/ ip=192.168.1.101:192.168.1.100::255.255.255.0::eth0:on console=ttyS0,115200 mem=64M debug" +# CONFIG_XIP_KERNEL is not set +# CONFIG_KEXEC is not set + +# +# CPU Power Management +# +# CONFIG_CPU_FREQ is not set +# CONFIG_CPU_IDLE is not set + +# +# Floating point emulation +# + +# +# At least one emulation must be selected +# +CONFIG_FPE_NWFPE=y +# CONFIG_FPE_NWFPE_XP is not set +# CONFIG_FPE_FASTFPE is not set + +# +# Userspace binary formats +# +CONFIG_BINFMT_ELF=y +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +CONFIG_HAVE_AOUT=y +# CONFIG_BINFMT_AOUT is not set +# CONFIG_BINFMT_MISC is not set + +# +# Power management options +# +# CONFIG_PM is not set +CONFIG_ARCH_SUSPEND_POSSIBLE=y +CONFIG_NET=y + +# +# Networking options +# +CONFIG_PACKET=y +# CONFIG_PACKET_MMAP is not set +CONFIG_UNIX=y +# CONFIG_NET_KEY is not set +CONFIG_INET=y +# CONFIG_IP_MULTICAST is not set +# CONFIG_IP_ADVANCED_ROUTER is not set +CONFIG_IP_FIB_HASH=y +CONFIG_IP_PNP=y +# CONFIG_IP_PNP_DHCP is not set +# CONFIG_IP_PNP_BOOTP is not set +# CONFIG_IP_PNP_RARP is not set +# CONFIG_NET_IPIP is not set +# CONFIG_NET_IPGRE is not set +# CONFIG_ARPD is not set +# CONFIG_SYN_COOKIES is not set +# CONFIG_INET_AH is not set +# CONFIG_INET_ESP is not set +# CONFIG_INET_IPCOMP is not set +# CONFIG_INET_XFRM_TUNNEL is not set +# CONFIG_INET_TUNNEL is not set +# CONFIG_INET_XFRM_MODE_TRANSPORT is not set +# CONFIG_INET_XFRM_MODE_TUNNEL is not set +# CONFIG_INET_XFRM_MODE_BEET is not set +# CONFIG_INET_LRO is not set +# CONFIG_INET_DIAG is not set +# CONFIG_TCP_CONG_ADVANCED is not set +CONFIG_TCP_CONG_CUBIC=y +CONFIG_DEFAULT_TCP_CONG="cubic" +# CONFIG_TCP_MD5SIG is not set +# CONFIG_IPV6 is not set +# CONFIG_NETWORK_SECMARK is not set +# CONFIG_NETFILTER is not set +# CONFIG_IP_DCCP is not set +# CONFIG_IP_SCTP is not set +# CONFIG_TIPC is not set +# CONFIG_ATM is not set +# CONFIG_BRIDGE is not set +# CONFIG_NET_DSA is not set +# CONFIG_VLAN_8021Q is not set +# CONFIG_DECNET is not set +# CONFIG_LLC2 is not set +# CONFIG_IPX is not set +# CONFIG_ATALK is not set +# CONFIG_X25 is not set +# CONFIG_LAPB is not set +# CONFIG_ECONET is not set +# CONFIG_WAN_ROUTER is not set +# CONFIG_PHONET is not set +# CONFIG_IEEE802154 is not set +# CONFIG_NET_SCHED is not set +# CONFIG_DCB is not set + +# +# Network testing +# +# CONFIG_NET_PKTGEN is not set +# CONFIG_HAMRADIO is not set +# CONFIG_CAN is not set +# CONFIG_IRDA is not set +# CONFIG_BT is not set +# CONFIG_AF_RXRPC is not set +# CONFIG_WIRELESS is not set +# CONFIG_WIMAX is not set +# CONFIG_RFKILL is not set +# CONFIG_NET_9P is not set + +# +# Device Drivers +# + +# +# Generic Driver Options +# +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +# CONFIG_STANDALONE is not set +# CONFIG_PREVENT_FIRMWARE_BUILD is not set +CONFIG_FW_LOADER=y +CONFIG_FIRMWARE_IN_KERNEL=y +CONFIG_EXTRA_FIRMWARE="" +# CONFIG_DEBUG_DRIVER is not set +# CONFIG_DEBUG_DEVRES is not set +# CONFIG_SYS_HYPERVISOR is not set +# CONFIG_CONNECTOR is not set +CONFIG_MTD=y +# CONFIG_MTD_DEBUG is not set +CONFIG_MTD_CONCAT=y +CONFIG_MTD_PARTITIONS=y +# CONFIG_MTD_TESTS is not set +# CONFIG_MTD_REDBOOT_PARTS is not set +# CONFIG_MTD_CMDLINE_PARTS is not set +# CONFIG_MTD_AFS_PARTS is not set +# CONFIG_MTD_AR7_PARTS is not set + +# +# User Modules And Translation Layers +# +CONFIG_MTD_CHAR=y +CONFIG_MTD_BLKDEVS=y +CONFIG_MTD_BLOCK=y +# CONFIG_FTL is not set +# CONFIG_NFTL is not set +# CONFIG_INFTL is not set +# CONFIG_RFD_FTL is not set +# CONFIG_SSFDC is not set +# CONFIG_MTD_OOPS is not set + +# +# RAM/ROM/Flash chip drivers +# +# CONFIG_MTD_CFI is not set +# CONFIG_MTD_JEDECPROBE is not set +CONFIG_MTD_MAP_BANK_WIDTH_1=y +CONFIG_MTD_MAP_BANK_WIDTH_2=y +CONFIG_MTD_MAP_BANK_WIDTH_4=y +# CONFIG_MTD_MAP_BANK_WIDTH_8 is not set +# CONFIG_MTD_MAP_BANK_WIDTH_16 is not set +# CONFIG_MTD_MAP_BANK_WIDTH_32 is not set +CONFIG_MTD_CFI_I1=y +CONFIG_MTD_CFI_I2=y +# CONFIG_MTD_CFI_I4 is not set +# CONFIG_MTD_CFI_I8 is not set +# CONFIG_MTD_RAM is not set +# CONFIG_MTD_ROM is not set +# CONFIG_MTD_ABSENT is not set + +# +# Mapping drivers for chip access +# +# CONFIG_MTD_COMPLEX_MAPPINGS is not set +# CONFIG_MTD_PLATRAM is not set + +# +# Self-contained MTD device drivers +# +# CONFIG_MTD_DATAFLASH is not set +# CONFIG_MTD_M25P80 is not set +# CONFIG_MTD_SLRAM is not set +# CONFIG_MTD_PHRAM is not set +# CONFIG_MTD_MTDRAM is not set +# CONFIG_MTD_BLOCK2MTD is not set + +# +# Disk-On-Chip Device Drivers +# +# CONFIG_MTD_DOC2000 is not set +# CONFIG_MTD_DOC2001 is not set +# CONFIG_MTD_DOC2001PLUS is not set +CONFIG_MTD_NAND=y +CONFIG_MTD_NAND_VERIFY_WRITE=y +# CONFIG_MTD_NAND_ECC_SMC is not set +# CONFIG_MTD_NAND_MUSEUM_IDS is not set +# CONFIG_MTD_NAND_H1900 is not set +# CONFIG_MTD_NAND_GPIO is not set +CONFIG_MTD_NAND_IDS=y +# CONFIG_MTD_NAND_DISKONCHIP is not set +# CONFIG_MTD_NAND_SHARPSL is not set +CONFIG_MTD_NAND_PXA3xx=y +CONFIG_MTD_NAND_PXA3xx_BUILTIN=y +# CONFIG_MTD_NAND_NANDSIM is not set +# CONFIG_MTD_NAND_PLATFORM is not set +CONFIG_MTD_ONENAND=y +CONFIG_MTD_ONENAND_VERIFY_WRITE=y +CONFIG_MTD_ONENAND_GENERIC=y +# CONFIG_MTD_ONENAND_OTP is not set +# CONFIG_MTD_ONENAND_2X_PROGRAM is not set +# CONFIG_MTD_ONENAND_SIM is not set + +# +# LPDDR flash memory drivers +# +# CONFIG_MTD_LPDDR is not set + +# +# UBI - Unsorted block images +# +# CONFIG_MTD_UBI is not set +# CONFIG_PARPORT is not set +CONFIG_BLK_DEV=y +# CONFIG_BLK_DEV_COW_COMMON is not set +CONFIG_BLK_DEV_LOOP=y +# CONFIG_BLK_DEV_CRYPTOLOOP is not set +# CONFIG_BLK_DEV_NBD is not set +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_COUNT=16 +CONFIG_BLK_DEV_RAM_SIZE=4096 +# CONFIG_BLK_DEV_XIP is not set +# CONFIG_CDROM_PKTCDVD is not set +# CONFIG_ATA_OVER_ETH is not set +# CONFIG_MG_DISK is not set +# CONFIG_MISC_DEVICES is not set +CONFIG_HAVE_IDE=y +# CONFIG_IDE is not set + +# +# SCSI device support +# +# CONFIG_RAID_ATTRS is not set +# CONFIG_SCSI is not set +# CONFIG_SCSI_DMA is not set +# CONFIG_SCSI_NETLINK is not set +# CONFIG_ATA is not set +# CONFIG_MD is not set +CONFIG_NETDEVICES=y +# CONFIG_DUMMY is not set +# CONFIG_BONDING is not set +# CONFIG_MACVLAN is not set +# CONFIG_EQUALIZER is not set +# CONFIG_TUN is not set +# CONFIG_VETH is not set +# CONFIG_PHYLIB is not set +CONFIG_NET_ETHERNET=y +CONFIG_MII=y +# CONFIG_AX88796 is not set +CONFIG_SMC91X=y +# CONFIG_DM9000 is not set +# CONFIG_ENC28J60 is not set +# CONFIG_ETHOC is not set +# CONFIG_SMC911X is not set +# CONFIG_SMSC911X is not set +# CONFIG_DNET is not set +# CONFIG_IBM_NEW_EMAC_ZMII is not set +# CONFIG_IBM_NEW_EMAC_RGMII is not set +# CONFIG_IBM_NEW_EMAC_TAH is not set +# CONFIG_IBM_NEW_EMAC_EMAC4 is not set +# CONFIG_IBM_NEW_EMAC_NO_FLOW_CTRL is not set +# CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set +# CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set +# CONFIG_B44 is not set +# CONFIG_KS8842 is not set +# CONFIG_NETDEV_1000 is not set +# CONFIG_NETDEV_10000 is not set + +# +# Wireless LAN +# +# CONFIG_WLAN_PRE80211 is not set +# CONFIG_WLAN_80211 is not set + +# +# Enable WiMAX (Networking options) to see the WiMAX drivers +# +# CONFIG_WAN is not set +# CONFIG_PPP is not set +# CONFIG_SLIP is not set +# CONFIG_NETCONSOLE is not set +# CONFIG_NETPOLL is not set +# CONFIG_NET_POLL_CONTROLLER is not set +# CONFIG_ISDN is not set + +# +# Input device support +# +CONFIG_INPUT=y +# CONFIG_INPUT_FF_MEMLESS is not set +# CONFIG_INPUT_POLLDEV is not set + +# +# Userland interfaces +# +CONFIG_INPUT_MOUSEDEV=y +# CONFIG_INPUT_MOUSEDEV_PSAUX is not set +CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 +CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 +# CONFIG_INPUT_JOYDEV is not set +# CONFIG_INPUT_EVDEV is not set +# CONFIG_INPUT_EVBUG is not set + +# +# Input Device Drivers +# +CONFIG_INPUT_KEYBOARD=y +# CONFIG_KEYBOARD_ATKBD is not set +# CONFIG_KEYBOARD_SUNKBD is not set +# CONFIG_KEYBOARD_LKKBD is not set +# CONFIG_KEYBOARD_XTKBD is not set +# CONFIG_KEYBOARD_NEWTON is not set +# CONFIG_KEYBOARD_STOWAWAY is not set +# CONFIG_KEYBOARD_LM8323 is not set +CONFIG_KEYBOARD_PXA27x=y +CONFIG_KEYBOARD_PXA930_ROTARY=y +CONFIG_KEYBOARD_GPIO=y +CONFIG_INPUT_MOUSE=y +CONFIG_MOUSE_PS2=y +CONFIG_MOUSE_PS2_ALPS=y +CONFIG_MOUSE_PS2_LOGIPS2PP=y +CONFIG_MOUSE_PS2_SYNAPTICS=y +CONFIG_MOUSE_PS2_TRACKPOINT=y +# CONFIG_MOUSE_PS2_ELANTECH is not set +# CONFIG_MOUSE_PS2_TOUCHKIT is not set +# CONFIG_MOUSE_SERIAL is not set +# CONFIG_MOUSE_VSXXXAA is not set +# CONFIG_MOUSE_GPIO is not set +CONFIG_MOUSE_PXA930_TRKBALL=y +# CONFIG_MOUSE_SYNAPTICS_I2C is not set +# CONFIG_INPUT_JOYSTICK is not set +# CONFIG_INPUT_TABLET is not set +CONFIG_INPUT_TOUCHSCREEN=y +# CONFIG_TOUCHSCREEN_ADS7846 is not set +# CONFIG_TOUCHSCREEN_AD7877 is not set +# CONFIG_TOUCHSCREEN_AD7879_I2C is not set +# CONFIG_TOUCHSCREEN_AD7879_SPI is not set +# CONFIG_TOUCHSCREEN_AD7879 is not set +CONFIG_TOUCHSCREEN_DA9034=y +# CONFIG_TOUCHSCREEN_EETI is not set +# CONFIG_TOUCHSCREEN_FUJITSU is not set +# CONFIG_TOUCHSCREEN_GUNZE is not set +# CONFIG_TOUCHSCREEN_ELO is not set +# CONFIG_TOUCHSCREEN_WACOM_W8001 is not set +# CONFIG_TOUCHSCREEN_MTOUCH is not set +# CONFIG_TOUCHSCREEN_INEXIO is not set +# CONFIG_TOUCHSCREEN_MK712 is not set +# CONFIG_TOUCHSCREEN_PENMOUNT is not set +# CONFIG_TOUCHSCREEN_TOUCHRIGHT is not set +# CONFIG_TOUCHSCREEN_TOUCHWIN is not set +# CONFIG_TOUCHSCREEN_TOUCHIT213 is not set +# CONFIG_TOUCHSCREEN_TSC2007 is not set +# CONFIG_TOUCHSCREEN_W90X900 is not set +# CONFIG_INPUT_MISC is not set + +# +# Hardware I/O ports +# +CONFIG_SERIO=y +CONFIG_SERIO_SERPORT=y +CONFIG_SERIO_LIBPS2=y +# CONFIG_SERIO_RAW is not set +# CONFIG_GAMEPORT is not set + +# +# Character devices +# +CONFIG_VT=y +CONFIG_CONSOLE_TRANSLATIONS=y +CONFIG_VT_CONSOLE=y +CONFIG_HW_CONSOLE=y +# CONFIG_VT_HW_CONSOLE_BINDING is not set +CONFIG_DEVKMEM=y +# CONFIG_SERIAL_NONSTANDARD is not set + +# +# Serial drivers +# +# CONFIG_SERIAL_8250 is not set + +# +# Non-8250 serial port support +# +# CONFIG_SERIAL_MAX3100 is not set +CONFIG_SERIAL_PXA=y +CONFIG_SERIAL_PXA_CONSOLE=y +CONFIG_SERIAL_CORE=y +CONFIG_SERIAL_CORE_CONSOLE=y +CONFIG_UNIX98_PTYS=y +# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set +# CONFIG_LEGACY_PTYS is not set +# CONFIG_IPMI_HANDLER is not set +# CONFIG_HW_RANDOM is not set +# CONFIG_R3964 is not set +# CONFIG_RAW_DRIVER is not set +# CONFIG_TCG_TPM is not set +CONFIG_I2C=y +CONFIG_I2C_BOARDINFO=y +# CONFIG_I2C_CHARDEV is not set +# CONFIG_I2C_HELPER_AUTO is not set + +# +# I2C Algorithms +# +# CONFIG_I2C_ALGOBIT is not set +# CONFIG_I2C_ALGOPCF is not set +# CONFIG_I2C_ALGOPCA is not set + +# +# I2C Hardware Bus support +# + +# +# I2C system bus drivers (mostly embedded / system-on-chip) +# +# CONFIG_I2C_DESIGNWARE is not set +# CONFIG_I2C_GPIO is not set +# CONFIG_I2C_OCORES is not set +CONFIG_I2C_PXA=y +# CONFIG_I2C_PXA_SLAVE is not set +# CONFIG_I2C_SIMTEC is not set + +# +# External I2C/SMBus adapter drivers +# +# CONFIG_I2C_PARPORT_LIGHT is not set +# CONFIG_I2C_TAOS_EVM is not set + +# +# Other I2C/SMBus bus drivers +# +# CONFIG_I2C_PCA_PLATFORM is not set +# CONFIG_I2C_STUB is not set + +# +# Miscellaneous I2C Chip support +# +# CONFIG_DS1682 is not set +# CONFIG_SENSORS_TSL2550 is not set +# CONFIG_I2C_DEBUG_CORE is not set +# CONFIG_I2C_DEBUG_ALGO is not set +# CONFIG_I2C_DEBUG_BUS is not set +# CONFIG_I2C_DEBUG_CHIP is not set +CONFIG_SPI=y +# CONFIG_SPI_DEBUG is not set +CONFIG_SPI_MASTER=y + +# +# SPI Master Controller Drivers +# +# CONFIG_SPI_BITBANG is not set +# CONFIG_SPI_GPIO is not set +CONFIG_SPI_PXA2XX=y + +# +# SPI Protocol Masters +# +# CONFIG_SPI_SPIDEV is not set +# CONFIG_SPI_TLE62X0 is not set +CONFIG_ARCH_REQUIRE_GPIOLIB=y +CONFIG_GPIOLIB=y +# CONFIG_DEBUG_GPIO is not set +# CONFIG_GPIO_SYSFS is not set + +# +# Memory mapped GPIO expanders: +# + +# +# I2C GPIO expanders: +# +CONFIG_GPIO_MAX732X=y +CONFIG_GPIO_PCA953X=y +CONFIG_GPIO_PCF857X=y + +# +# PCI GPIO expanders: +# + +# +# SPI GPIO expanders: +# +CONFIG_GPIO_MAX7301=y +# CONFIG_GPIO_MCP23S08 is not set +# CONFIG_W1 is not set +CONFIG_POWER_SUPPLY=y +CONFIG_POWER_SUPPLY_DEBUG=y +CONFIG_PDA_POWER=y +# CONFIG_BATTERY_DS2760 is not set +# CONFIG_BATTERY_BQ27x00 is not set +CONFIG_BATTERY_DA9030=y +# CONFIG_BATTERY_MAX17040 is not set +# CONFIG_HWMON is not set +# CONFIG_THERMAL is not set +# CONFIG_THERMAL_HWMON is not set +# CONFIG_WATCHDOG is not set +CONFIG_SSB_POSSIBLE=y + +# +# Sonics Silicon Backplane +# +# CONFIG_SSB is not set + +# +# Multifunction device drivers +# +# CONFIG_MFD_CORE is not set +# CONFIG_MFD_SM501 is not set +# CONFIG_MFD_ASIC3 is not set +# CONFIG_HTC_EGPIO is not set +# CONFIG_HTC_PASIC3 is not set +# CONFIG_TPS65010 is not set +# CONFIG_TWL4030_CORE is not set +# CONFIG_MFD_TMIO is not set +# CONFIG_MFD_T7L66XB is not set +# CONFIG_MFD_TC6387XB is not set +# CONFIG_MFD_TC6393XB is not set +CONFIG_PMIC_DA903X=y +# CONFIG_MFD_WM8400 is not set +# CONFIG_MFD_WM8350_I2C is not set +# CONFIG_MFD_PCF50633 is not set +# CONFIG_AB3100_CORE is not set +# CONFIG_EZX_PCAP is not set +# CONFIG_MEDIA_SUPPORT is not set + +# +# Graphics support +# +# CONFIG_VGASTATE is not set +# CONFIG_VIDEO_OUTPUT_CONTROL is not set +CONFIG_FB=y +# CONFIG_FIRMWARE_EDID is not set +# CONFIG_FB_DDC is not set +# CONFIG_FB_BOOT_VESA_SUPPORT is not set +CONFIG_FB_CFB_FILLRECT=y +CONFIG_FB_CFB_COPYAREA=y +CONFIG_FB_CFB_IMAGEBLIT=y +# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set +# CONFIG_FB_SYS_FILLRECT is not set +# CONFIG_FB_SYS_COPYAREA is not set +# CONFIG_FB_SYS_IMAGEBLIT is not set +# CONFIG_FB_FOREIGN_ENDIAN is not set +# CONFIG_FB_SYS_FOPS is not set +# CONFIG_FB_SVGALIB is not set +# CONFIG_FB_MACMODES is not set +# CONFIG_FB_BACKLIGHT is not set +# CONFIG_FB_MODE_HELPERS is not set +# CONFIG_FB_TILEBLITTING is not set + +# +# Frame buffer hardware drivers +# +# CONFIG_FB_S1D13XXX is not set +CONFIG_FB_PXA=y +# CONFIG_FB_PXA_OVERLAY is not set +# CONFIG_FB_PXA_SMARTPANEL is not set +# CONFIG_FB_PXA_PARAMETERS is not set +# CONFIG_FB_MBX is not set +# CONFIG_FB_W100 is not set +# CONFIG_FB_VIRTUAL is not set +# CONFIG_FB_METRONOME is not set +# CONFIG_FB_MB862XX is not set +# CONFIG_FB_BROADSHEET is not set +CONFIG_BACKLIGHT_LCD_SUPPORT=y +CONFIG_LCD_CLASS_DEVICE=y +# CONFIG_LCD_LTV350QV is not set +# CONFIG_LCD_ILI9320 is not set +CONFIG_LCD_TDO24M=y +# CONFIG_LCD_VGG2432A4 is not set +# CONFIG_LCD_PLATFORM is not set +CONFIG_BACKLIGHT_CLASS_DEVICE=y +# CONFIG_BACKLIGHT_GENERIC is not set +CONFIG_BACKLIGHT_PWM=y +CONFIG_BACKLIGHT_DA903X=y + +# +# Display device support +# +# CONFIG_DISPLAY_SUPPORT is not set + +# +# Console display driver support +# +# CONFIG_VGA_CONSOLE is not set +CONFIG_DUMMY_CONSOLE=y +CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y +# CONFIG_FRAMEBUFFER_CONSOLE_ROTATION is not set +CONFIG_FONTS=y +# CONFIG_FONT_8x8 is not set +# CONFIG_FONT_8x16 is not set +CONFIG_FONT_6x11=y +# CONFIG_FONT_7x14 is not set +# CONFIG_FONT_PEARL_8x8 is not set +# CONFIG_FONT_ACORN_8x8 is not set +# CONFIG_FONT_MINI_4x6 is not set +# CONFIG_FONT_SUN8x16 is not set +# CONFIG_FONT_SUN12x22 is not set +# CONFIG_FONT_10x18 is not set +CONFIG_LOGO=y +CONFIG_LOGO_LINUX_MONO=y +CONFIG_LOGO_LINUX_VGA16=y +CONFIG_LOGO_LINUX_CLUT224=y +# CONFIG_SOUND is not set +# CONFIG_HID_SUPPORT is not set +# CONFIG_USB_SUPPORT is not set +CONFIG_MMC=y +# CONFIG_MMC_DEBUG is not set +# CONFIG_MMC_UNSAFE_RESUME is not set + +# +# MMC/SD/SDIO Card Drivers +# +CONFIG_MMC_BLOCK=y +CONFIG_MMC_BLOCK_BOUNCE=y +# CONFIG_SDIO_UART is not set +# CONFIG_MMC_TEST is not set + +# +# MMC/SD/SDIO Host Controller Drivers +# +CONFIG_MMC_PXA=y +# CONFIG_MMC_SDHCI is not set +# CONFIG_MMC_SPI is not set +# CONFIG_MEMSTICK is not set +# CONFIG_ACCESSIBILITY is not set +CONFIG_NEW_LEDS=y +CONFIG_LEDS_CLASS=m + +# +# LED drivers +# +# CONFIG_LEDS_PCA9532 is not set +CONFIG_LEDS_GPIO=m +CONFIG_LEDS_GPIO_PLATFORM=y +# CONFIG_LEDS_LP5521 is not set +# CONFIG_LEDS_PCA955X is not set +CONFIG_LEDS_DA903X=m +# CONFIG_LEDS_DAC124S085 is not set +# CONFIG_LEDS_PWM is not set +# CONFIG_LEDS_BD2802 is not set + +# +# LED Triggers +# +CONFIG_LEDS_TRIGGERS=y +CONFIG_LEDS_TRIGGER_TIMER=m +CONFIG_LEDS_TRIGGER_HEARTBEAT=m +CONFIG_LEDS_TRIGGER_BACKLIGHT=m +CONFIG_LEDS_TRIGGER_GPIO=m +CONFIG_LEDS_TRIGGER_DEFAULT_ON=m + +# +# iptables trigger is under Netfilter config (LED target) +# +CONFIG_RTC_LIB=y +# CONFIG_RTC_CLASS is not set +# CONFIG_DMADEVICES is not set +# CONFIG_AUXDISPLAY is not set +CONFIG_REGULATOR=y +CONFIG_REGULATOR_DEBUG=y +# CONFIG_REGULATOR_FIXED_VOLTAGE is not set +CONFIG_REGULATOR_VIRTUAL_CONSUMER=y +# CONFIG_REGULATOR_USERSPACE_CONSUMER is not set +# CONFIG_REGULATOR_BQ24022 is not set +# CONFIG_REGULATOR_MAX1586 is not set +CONFIG_REGULATOR_DA903X=y +# CONFIG_REGULATOR_LP3971 is not set +# CONFIG_UIO is not set +# CONFIG_STAGING is not set + +# +# File systems +# +# CONFIG_EXT2_FS is not set +# CONFIG_EXT3_FS is not set +# CONFIG_EXT4_FS is not set +# CONFIG_REISERFS_FS is not set +# CONFIG_JFS_FS is not set +CONFIG_FS_POSIX_ACL=y +# CONFIG_XFS_FS is not set +# CONFIG_GFS2_FS is not set +# CONFIG_OCFS2_FS is not set +# CONFIG_BTRFS_FS is not set +CONFIG_FILE_LOCKING=y +CONFIG_FSNOTIFY=y +CONFIG_DNOTIFY=y +# CONFIG_INOTIFY is not set +CONFIG_INOTIFY_USER=y +# CONFIG_QUOTA is not set +# CONFIG_AUTOFS_FS is not set +# CONFIG_AUTOFS4_FS is not set +# CONFIG_FUSE_FS is not set + +# +# Caches +# +# CONFIG_FSCACHE is not set + +# +# CD-ROM/DVD Filesystems +# +# CONFIG_ISO9660_FS is not set +# CONFIG_UDF_FS is not set + +# +# DOS/FAT/NT Filesystems +# +# CONFIG_MSDOS_FS is not set +# CONFIG_VFAT_FS is not set +# CONFIG_NTFS_FS is not set + +# +# Pseudo filesystems +# +CONFIG_PROC_FS=y +CONFIG_PROC_SYSCTL=y +CONFIG_PROC_PAGE_MONITOR=y +CONFIG_SYSFS=y +# CONFIG_TMPFS is not set +# CONFIG_HUGETLB_PAGE is not set +# CONFIG_CONFIGFS_FS is not set +CONFIG_MISC_FILESYSTEMS=y +# CONFIG_ADFS_FS is not set +# CONFIG_AFFS_FS is not set +# CONFIG_HFS_FS is not set +# CONFIG_HFSPLUS_FS is not set +# CONFIG_BEFS_FS is not set +# CONFIG_BFS_FS is not set +# CONFIG_EFS_FS is not set +CONFIG_JFFS2_FS=y +CONFIG_JFFS2_FS_DEBUG=0 +CONFIG_JFFS2_FS_WRITEBUFFER=y +CONFIG_JFFS2_FS_WBUF_VERIFY=y +# CONFIG_JFFS2_SUMMARY is not set +# CONFIG_JFFS2_FS_XATTR is not set +CONFIG_JFFS2_COMPRESSION_OPTIONS=y +CONFIG_JFFS2_ZLIB=y +CONFIG_JFFS2_LZO=y +CONFIG_JFFS2_RTIME=y +CONFIG_JFFS2_RUBIN=y +# CONFIG_JFFS2_CMODE_NONE is not set +CONFIG_JFFS2_CMODE_PRIORITY=y +# CONFIG_JFFS2_CMODE_SIZE is not set +# CONFIG_JFFS2_CMODE_FAVOURLZO is not set +# CONFIG_CRAMFS is not set +# CONFIG_SQUASHFS is not set +# CONFIG_VXFS_FS is not set +# CONFIG_MINIX_FS is not set +# CONFIG_OMFS_FS is not set +# CONFIG_HPFS_FS is not set +# CONFIG_QNX4FS_FS is not set +# CONFIG_ROMFS_FS is not set +# CONFIG_SYSV_FS is not set +# CONFIG_UFS_FS is not set +# CONFIG_NILFS2_FS is not set +CONFIG_NETWORK_FILESYSTEMS=y +CONFIG_NFS_FS=y +CONFIG_NFS_V3=y +CONFIG_NFS_V3_ACL=y +CONFIG_NFS_V4=y +# CONFIG_NFS_V4_1 is not set +CONFIG_ROOT_NFS=y +# CONFIG_NFSD is not set +CONFIG_LOCKD=y +CONFIG_LOCKD_V4=y +CONFIG_NFS_ACL_SUPPORT=y +CONFIG_NFS_COMMON=y +CONFIG_SUNRPC=y +CONFIG_SUNRPC_GSS=y +CONFIG_RPCSEC_GSS_KRB5=y +# CONFIG_RPCSEC_GSS_SPKM3 is not set +# CONFIG_SMB_FS is not set +# CONFIG_CIFS is not set +# CONFIG_NCP_FS is not set +# CONFIG_CODA_FS is not set +# CONFIG_AFS_FS is not set + +# +# Partition Types +# +# CONFIG_PARTITION_ADVANCED is not set +CONFIG_MSDOS_PARTITION=y +CONFIG_NLS=y +CONFIG_NLS_DEFAULT="iso8859-1" +# CONFIG_NLS_CODEPAGE_437 is not set +# CONFIG_NLS_CODEPAGE_737 is not set +# CONFIG_NLS_CODEPAGE_775 is not set +# CONFIG_NLS_CODEPAGE_850 is not set +# CONFIG_NLS_CODEPAGE_852 is not set +# CONFIG_NLS_CODEPAGE_855 is not set +# CONFIG_NLS_CODEPAGE_857 is not set +# CONFIG_NLS_CODEPAGE_860 is not set +# CONFIG_NLS_CODEPAGE_861 is not set +# CONFIG_NLS_CODEPAGE_862 is not set +# CONFIG_NLS_CODEPAGE_863 is not set +# CONFIG_NLS_CODEPAGE_864 is not set +# CONFIG_NLS_CODEPAGE_865 is not set +# CONFIG_NLS_CODEPAGE_866 is not set +# CONFIG_NLS_CODEPAGE_869 is not set +# CONFIG_NLS_CODEPAGE_936 is not set +# CONFIG_NLS_CODEPAGE_950 is not set +# CONFIG_NLS_CODEPAGE_932 is not set +# CONFIG_NLS_CODEPAGE_949 is not set +# CONFIG_NLS_CODEPAGE_874 is not set +# CONFIG_NLS_ISO8859_8 is not set +# CONFIG_NLS_CODEPAGE_1250 is not set +# CONFIG_NLS_CODEPAGE_1251 is not set +# CONFIG_NLS_ASCII is not set +# CONFIG_NLS_ISO8859_1 is not set +# CONFIG_NLS_ISO8859_2 is not set +# CONFIG_NLS_ISO8859_3 is not set +# CONFIG_NLS_ISO8859_4 is not set +# CONFIG_NLS_ISO8859_5 is not set +# CONFIG_NLS_ISO8859_6 is not set +# CONFIG_NLS_ISO8859_7 is not set +# CONFIG_NLS_ISO8859_9 is not set +# CONFIG_NLS_ISO8859_13 is not set +# CONFIG_NLS_ISO8859_14 is not set +# CONFIG_NLS_ISO8859_15 is not set +# CONFIG_NLS_KOI8_R is not set +# CONFIG_NLS_KOI8_U is not set +# CONFIG_NLS_UTF8 is not set +# CONFIG_DLM is not set + +# +# Kernel hacking +# +CONFIG_PRINTK_TIME=y +CONFIG_ENABLE_WARN_DEPRECATED=y +CONFIG_ENABLE_MUST_CHECK=y +CONFIG_FRAME_WARN=1024 +CONFIG_MAGIC_SYSRQ=y +# CONFIG_UNUSED_SYMBOLS is not set +# CONFIG_DEBUG_FS is not set +# CONFIG_HEADERS_CHECK is not set +CONFIG_DEBUG_KERNEL=y +CONFIG_DEBUG_SHIRQ=y +CONFIG_DETECT_SOFTLOCKUP=y +CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y +CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=1 +CONFIG_DETECT_HUNG_TASK=y +# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set +CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0 +# CONFIG_SCHED_DEBUG is not set +# CONFIG_SCHEDSTATS is not set +# CONFIG_TIMER_STATS is not set +# CONFIG_DEBUG_OBJECTS is not set +# CONFIG_DEBUG_SLAB is not set +# CONFIG_DEBUG_KMEMLEAK is not set +CONFIG_DEBUG_PREEMPT=y +# CONFIG_DEBUG_RT_MUTEXES is not set +# CONFIG_RT_MUTEX_TESTER is not set +CONFIG_DEBUG_SPINLOCK=y +# CONFIG_DEBUG_MUTEXES is not set +# CONFIG_DEBUG_LOCK_ALLOC is not set +# CONFIG_PROVE_LOCKING is not set +# CONFIG_LOCK_STAT is not set +CONFIG_DEBUG_SPINLOCK_SLEEP=y +# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set +# CONFIG_DEBUG_KOBJECT is not set +CONFIG_DEBUG_BUGVERBOSE=y +# CONFIG_DEBUG_INFO is not set +# CONFIG_DEBUG_VM is not set +# CONFIG_DEBUG_WRITECOUNT is not set +CONFIG_DEBUG_MEMORY_INIT=y +# CONFIG_DEBUG_LIST is not set +# CONFIG_DEBUG_SG is not set +# CONFIG_DEBUG_NOTIFIERS is not set +# CONFIG_BOOT_PRINTK_DELAY is not set +# CONFIG_RCU_TORTURE_TEST is not set +# CONFIG_RCU_CPU_STALL_DETECTOR is not set +# CONFIG_BACKTRACE_SELF_TEST is not set +# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set +# CONFIG_FAULT_INJECTION is not set +# CONFIG_LATENCYTOP is not set +# CONFIG_SYSCTL_SYSCALL_CHECK is not set +# CONFIG_PAGE_POISONING is not set +CONFIG_HAVE_FUNCTION_TRACER=y +CONFIG_TRACING_SUPPORT=y +# CONFIG_FTRACE is not set +# CONFIG_SAMPLES is not set +CONFIG_HAVE_ARCH_KGDB=y +# CONFIG_KGDB is not set +# CONFIG_KMEMCHECK is not set +CONFIG_ARM_UNWIND=y +CONFIG_DEBUG_USER=y +# CONFIG_DEBUG_ERRORS is not set +# CONFIG_DEBUG_STACK_USAGE is not set +# CONFIG_DEBUG_LL is not set + +# +# Security options +# +# CONFIG_KEYS is not set +# CONFIG_SECURITY is not set +# CONFIG_SECURITYFS is not set +# CONFIG_SECURITY_FILE_CAPABILITIES is not set +CONFIG_CRYPTO=y + +# +# Crypto core or helper +# +# CONFIG_CRYPTO_FIPS is not set +CONFIG_CRYPTO_ALGAPI=y +CONFIG_CRYPTO_ALGAPI2=y +CONFIG_CRYPTO_AEAD2=y +CONFIG_CRYPTO_BLKCIPHER=y +CONFIG_CRYPTO_BLKCIPHER2=y +CONFIG_CRYPTO_HASH=y +CONFIG_CRYPTO_HASH2=y +CONFIG_CRYPTO_RNG2=y +CONFIG_CRYPTO_PCOMP=y +CONFIG_CRYPTO_MANAGER=y +CONFIG_CRYPTO_MANAGER2=y +# CONFIG_CRYPTO_GF128MUL is not set +# CONFIG_CRYPTO_NULL is not set +CONFIG_CRYPTO_WORKQUEUE=y +# CONFIG_CRYPTO_CRYPTD is not set +# CONFIG_CRYPTO_AUTHENC is not set +# CONFIG_CRYPTO_TEST is not set + +# +# Authenticated Encryption with Associated Data +# +# CONFIG_CRYPTO_CCM is not set +# CONFIG_CRYPTO_GCM is not set +# CONFIG_CRYPTO_SEQIV is not set + +# +# Block modes +# +CONFIG_CRYPTO_CBC=y +# CONFIG_CRYPTO_CTR is not set +# CONFIG_CRYPTO_CTS is not set +# CONFIG_CRYPTO_ECB is not set +# CONFIG_CRYPTO_LRW is not set +# CONFIG_CRYPTO_PCBC is not set +# CONFIG_CRYPTO_XTS is not set + +# +# Hash modes +# +# CONFIG_CRYPTO_HMAC is not set +# CONFIG_CRYPTO_XCBC is not set + +# +# Digest +# +# CONFIG_CRYPTO_CRC32C is not set +# CONFIG_CRYPTO_MD4 is not set +CONFIG_CRYPTO_MD5=y +# CONFIG_CRYPTO_MICHAEL_MIC is not set +# CONFIG_CRYPTO_RMD128 is not set +# CONFIG_CRYPTO_RMD160 is not set +# CONFIG_CRYPTO_RMD256 is not set +# CONFIG_CRYPTO_RMD320 is not set +# CONFIG_CRYPTO_SHA1 is not set +# CONFIG_CRYPTO_SHA256 is not set +# CONFIG_CRYPTO_SHA512 is not set +# CONFIG_CRYPTO_TGR192 is not set +# CONFIG_CRYPTO_WP512 is not set + +# +# Ciphers +# +# CONFIG_CRYPTO_AES is not set +# CONFIG_CRYPTO_ANUBIS is not set +# CONFIG_CRYPTO_ARC4 is not set +# CONFIG_CRYPTO_BLOWFISH is not set +# CONFIG_CRYPTO_CAMELLIA is not set +# CONFIG_CRYPTO_CAST5 is not set +# CONFIG_CRYPTO_CAST6 is not set +CONFIG_CRYPTO_DES=y +# CONFIG_CRYPTO_FCRYPT is not set +# CONFIG_CRYPTO_KHAZAD is not set +# CONFIG_CRYPTO_SALSA20 is not set +# CONFIG_CRYPTO_SEED is not set +# CONFIG_CRYPTO_SERPENT is not set +# CONFIG_CRYPTO_TEA is not set +# CONFIG_CRYPTO_TWOFISH is not set + +# +# Compression +# +# CONFIG_CRYPTO_DEFLATE is not set +# CONFIG_CRYPTO_ZLIB is not set +# CONFIG_CRYPTO_LZO is not set + +# +# Random Number Generation +# +# CONFIG_CRYPTO_ANSI_CPRNG is not set +# CONFIG_CRYPTO_HW is not set +# CONFIG_BINARY_PRINTF is not set + +# +# Library routines +# +CONFIG_BITREVERSE=y +CONFIG_GENERIC_FIND_LAST_BIT=y +# CONFIG_CRC_CCITT is not set +# CONFIG_CRC16 is not set +# CONFIG_CRC_T10DIF is not set +# CONFIG_CRC_ITU_T is not set +CONFIG_CRC32=y +# CONFIG_CRC7 is not set +# CONFIG_LIBCRC32C is not set +CONFIG_ZLIB_INFLATE=y +CONFIG_ZLIB_DEFLATE=y +CONFIG_LZO_COMPRESS=y +CONFIG_LZO_DECOMPRESS=y +CONFIG_HAS_IOMEM=y +CONFIG_HAS_IOPORT=y +CONFIG_HAS_DMA=y +CONFIG_NLATTR=y diff --git a/arch/arm/configs/xcep_defconfig b/arch/arm/configs/xcep_defconfig new file mode 100644 index 00000000000..33bb7250946 --- /dev/null +++ b/arch/arm/configs/xcep_defconfig @@ -0,0 +1,1129 @@ +# +# Automatically generated make config: don't edit +# Linux kernel version: 2.6.31-rc6 +# Thu Aug 20 09:02:37 2009 +# +CONFIG_ARM=y +CONFIG_SYS_SUPPORTS_APM_EMULATION=y +CONFIG_GENERIC_GPIO=y +CONFIG_GENERIC_TIME=y +CONFIG_GENERIC_CLOCKEVENTS=y +CONFIG_MMU=y +CONFIG_GENERIC_HARDIRQS=y +CONFIG_STACKTRACE_SUPPORT=y +CONFIG_HAVE_LATENCYTOP_SUPPORT=y +CONFIG_LOCKDEP_SUPPORT=y +CONFIG_TRACE_IRQFLAGS_SUPPORT=y +CONFIG_HARDIRQS_SW_RESEND=y +CONFIG_GENERIC_IRQ_PROBE=y +CONFIG_RWSEM_GENERIC_SPINLOCK=y +CONFIG_GENERIC_HWEIGHT=y +CONFIG_GENERIC_CALIBRATE_DELAY=y +CONFIG_ARCH_MTD_XIP=y +CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y +CONFIG_VECTORS_BASE=0xffff0000 +CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" +CONFIG_CONSTRUCTORS=y + +# +# General setup +# +CONFIG_EXPERIMENTAL=y +CONFIG_BROKEN_ON_SMP=y +CONFIG_INIT_ENV_ARG_LIMIT=32 +CONFIG_LOCALVERSION=".xcep-itech" +# CONFIG_LOCALVERSION_AUTO is not set +CONFIG_SYSVIPC=y +CONFIG_SYSVIPC_SYSCTL=y +# CONFIG_POSIX_MQUEUE is not set +CONFIG_BSD_PROCESS_ACCT=y +# CONFIG_BSD_PROCESS_ACCT_V3 is not set +# CONFIG_TASKSTATS is not set +# CONFIG_AUDIT is not set + +# +# RCU Subsystem +# +CONFIG_CLASSIC_RCU=y +# CONFIG_TREE_RCU is not set +# CONFIG_PREEMPT_RCU is not set +# CONFIG_TREE_RCU_TRACE is not set +# CONFIG_PREEMPT_RCU_TRACE is not set +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_LOG_BUF_SHIFT=16 +# CONFIG_GROUP_SCHED is not set +# CONFIG_CGROUPS is not set +CONFIG_SYSFS_DEPRECATED=y +CONFIG_SYSFS_DEPRECATED_V2=y +# CONFIG_RELAY is not set +# CONFIG_NAMESPACES is not set +CONFIG_BLK_DEV_INITRD=y +CONFIG_INITRAMFS_SOURCE="" +CONFIG_RD_GZIP=y +# CONFIG_RD_BZIP2 is not set +# CONFIG_RD_LZMA is not set +CONFIG_CC_OPTIMIZE_FOR_SIZE=y +CONFIG_SYSCTL=y +CONFIG_ANON_INODES=y +CONFIG_EMBEDDED=y +# CONFIG_UID16 is not set +CONFIG_SYSCTL_SYSCALL=y +CONFIG_KALLSYMS=y +# CONFIG_KALLSYMS_ALL is not set +# CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_HOTPLUG=y +CONFIG_PRINTK=y +CONFIG_BUG=y +CONFIG_ELF_CORE=y +CONFIG_BASE_FULL=y +CONFIG_FUTEX=y +CONFIG_EPOLL=y +CONFIG_SIGNALFD=y +CONFIG_TIMERFD=y +CONFIG_EVENTFD=y +# CONFIG_SHMEM is not set +CONFIG_AIO=y + +# +# Performance Counters +# +# CONFIG_VM_EVENT_COUNTERS is not set +CONFIG_STRIP_ASM_SYMS=y +# CONFIG_COMPAT_BRK is not set +# CONFIG_SLAB is not set +# CONFIG_SLUB is not set +CONFIG_SLOB=y +# CONFIG_PROFILING is not set +CONFIG_TRACEPOINTS=y +CONFIG_MARKERS=y +CONFIG_HAVE_OPROFILE=y +CONFIG_KPROBES=y +CONFIG_KRETPROBES=y +CONFIG_HAVE_KPROBES=y +CONFIG_HAVE_KRETPROBES=y +CONFIG_HAVE_CLK=y + +# +# GCOV-based kernel profiling +# +# CONFIG_SLOW_WORK is not set +CONFIG_HAVE_GENERIC_DMA_COHERENT=y +CONFIG_RT_MUTEXES=y +CONFIG_BASE_SMALL=0 +CONFIG_MODULES=y +# CONFIG_MODULE_FORCE_LOAD is not set +CONFIG_MODULE_UNLOAD=y +# CONFIG_MODULE_FORCE_UNLOAD is not set +CONFIG_MODVERSIONS=y +CONFIG_MODULE_SRCVERSION_ALL=y +# CONFIG_BLOCK is not set +# CONFIG_FREEZER is not set + +# +# System Type +# +# CONFIG_ARCH_AAEC2000 is not set +# CONFIG_ARCH_INTEGRATOR is not set +# CONFIG_ARCH_REALVIEW is not set +# CONFIG_ARCH_VERSATILE is not set +# CONFIG_ARCH_AT91 is not set +# CONFIG_ARCH_CLPS711X is not set +# CONFIG_ARCH_GEMINI is not set +# CONFIG_ARCH_EBSA110 is not set +# CONFIG_ARCH_EP93XX is not set +# CONFIG_ARCH_FOOTBRIDGE is not set +# CONFIG_ARCH_MXC is not set +# CONFIG_ARCH_STMP3XXX is not set +# CONFIG_ARCH_NETX is not set +# CONFIG_ARCH_H720X is not set +# CONFIG_ARCH_IOP13XX is not set +# CONFIG_ARCH_IOP32X is not set +# CONFIG_ARCH_IOP33X is not set +# CONFIG_ARCH_IXP23XX is not set +# CONFIG_ARCH_IXP2000 is not set +# CONFIG_ARCH_IXP4XX is not set +# CONFIG_ARCH_L7200 is not set +# CONFIG_ARCH_KIRKWOOD is not set +# CONFIG_ARCH_LOKI is not set +# CONFIG_ARCH_MV78XX0 is not set +# CONFIG_ARCH_ORION5X is not set +# CONFIG_ARCH_MMP is not set +# CONFIG_ARCH_KS8695 is not set +# CONFIG_ARCH_NS9XXX is not set +# CONFIG_ARCH_W90X900 is not set +# CONFIG_ARCH_PNX4008 is not set +CONFIG_ARCH_PXA=y +# CONFIG_ARCH_MSM is not set +# CONFIG_ARCH_RPC is not set +# CONFIG_ARCH_SA1100 is not set +# CONFIG_ARCH_S3C2410 is not set +# CONFIG_ARCH_S3C64XX is not set +# CONFIG_ARCH_SHARK is not set +# CONFIG_ARCH_LH7A40X is not set +# CONFIG_ARCH_U300 is not set +# CONFIG_ARCH_DAVINCI is not set +# CONFIG_ARCH_OMAP is not set + +# +# Intel PXA2xx/PXA3xx Implementations +# +# CONFIG_ARCH_GUMSTIX is not set +# CONFIG_MACH_INTELMOTE2 is not set +# CONFIG_MACH_STARGATE2 is not set +# CONFIG_ARCH_LUBBOCK is not set +# CONFIG_MACH_LOGICPD_PXA270 is not set +# CONFIG_MACH_MAINSTONE is not set +# CONFIG_MACH_MP900C is not set +# CONFIG_ARCH_PXA_IDP is not set +# CONFIG_PXA_SHARPSL is not set +# CONFIG_ARCH_VIPER is not set +# CONFIG_ARCH_PXA_ESERIES is not set +# CONFIG_TRIZEPS_PXA is not set +# CONFIG_MACH_H5000 is not set +# CONFIG_MACH_EM_X270 is not set +# CONFIG_MACH_EXEDA is not set +# CONFIG_MACH_COLIBRI is not set +# CONFIG_MACH_COLIBRI300 is not set +# CONFIG_MACH_COLIBRI320 is not set +# CONFIG_MACH_ZYLONITE is not set +# CONFIG_MACH_LITTLETON is not set +# CONFIG_MACH_TAVOREVB is not set +# CONFIG_MACH_SAAR is not set +# CONFIG_MACH_ARMCORE is not set +# CONFIG_MACH_CM_X300 is not set +# CONFIG_MACH_H4700 is not set +# CONFIG_MACH_MAGICIAN is not set +# CONFIG_MACH_HIMALAYA is not set +# CONFIG_MACH_MIOA701 is not set +# CONFIG_MACH_PCM027 is not set +# CONFIG_ARCH_PXA_PALM is not set +# CONFIG_MACH_CSB726 is not set +# CONFIG_PXA_EZX is not set +CONFIG_MACH_XCEP=y +CONFIG_PXA25x=y +CONFIG_PXA_SSP=y +CONFIG_PLAT_PXA=y + +# +# Processor Type +# +CONFIG_CPU_32=y +CONFIG_CPU_XSCALE=y +CONFIG_CPU_32v5=y +CONFIG_CPU_ABRT_EV5T=y +CONFIG_CPU_PABRT_NOIFAR=y +CONFIG_CPU_CACHE_VIVT=y +CONFIG_CPU_TLB_V4WBI=y +CONFIG_CPU_CP15=y +CONFIG_CPU_CP15_MMU=y + +# +# Processor Features +# +CONFIG_ARM_THUMB=y +# CONFIG_CPU_DCACHE_DISABLE is not set +CONFIG_IWMMXT=y +CONFIG_XSCALE_PMU=y +CONFIG_COMMON_CLKDEV=y + +# +# Bus support +# +# CONFIG_PCI_SYSCALL is not set +# CONFIG_ARCH_SUPPORTS_MSI is not set +# CONFIG_PCCARD is not set + +# +# Kernel Features +# +CONFIG_TICK_ONESHOT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_GENERIC_CLOCKEVENTS_BUILD=y +CONFIG_VMSPLIT_3G=y +# CONFIG_VMSPLIT_2G is not set +# CONFIG_VMSPLIT_1G is not set +CONFIG_PAGE_OFFSET=0xC0000000 +# CONFIG_PREEMPT is not set +CONFIG_HZ=100 +CONFIG_AEABI=y +CONFIG_OABI_COMPAT=y +# CONFIG_ARCH_SPARSEMEM_DEFAULT is not set +# CONFIG_ARCH_SELECT_MEMORY_MODEL is not set +# CONFIG_HIGHMEM is not set +CONFIG_SELECT_MEMORY_MODEL=y +CONFIG_FLATMEM_MANUAL=y +# CONFIG_DISCONTIGMEM_MANUAL is not set +# CONFIG_SPARSEMEM_MANUAL is not set +CONFIG_FLATMEM=y +CONFIG_FLAT_NODE_MEM_MAP=y +CONFIG_PAGEFLAGS_EXTENDED=y +CONFIG_SPLIT_PTLOCK_CPUS=4096 +# CONFIG_PHYS_ADDR_T_64BIT is not set +CONFIG_ZONE_DMA_FLAG=0 +CONFIG_VIRT_TO_BUS=y +CONFIG_HAVE_MLOCK=y +CONFIG_HAVE_MLOCKED_PAGE_BIT=y +CONFIG_DEFAULT_MMAP_MIN_ADDR=4096 +CONFIG_ALIGNMENT_TRAP=y +# CONFIG_UACCESS_WITH_MEMCPY is not set + +# +# Boot options +# +CONFIG_ZBOOT_ROM_TEXT=0x0 +CONFIG_ZBOOT_ROM_BSS=0x0 +CONFIG_CMDLINE="root=mtd4 rootfstype=jffs2 ro console=ttyS0,115200" +# CONFIG_XIP_KERNEL is not set +# CONFIG_KEXEC is not set + +# +# CPU Power Management +# +# CONFIG_CPU_FREQ is not set +# CONFIG_CPU_IDLE is not set + +# +# Floating point emulation +# + +# +# At least one emulation must be selected +# +CONFIG_FPE_NWFPE=y +# CONFIG_FPE_NWFPE_XP is not set +# CONFIG_FPE_FASTFPE is not set + +# +# Userspace binary formats +# +CONFIG_BINFMT_ELF=y +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +CONFIG_HAVE_AOUT=y +# CONFIG_BINFMT_AOUT is not set +# CONFIG_BINFMT_MISC is not set + +# +# Power management options +# +# CONFIG_PM is not set +CONFIG_ARCH_SUSPEND_POSSIBLE=y +CONFIG_NET=y + +# +# Networking options +# +CONFIG_PACKET=m +CONFIG_PACKET_MMAP=y +CONFIG_UNIX=y +CONFIG_XFRM=y +# CONFIG_XFRM_USER is not set +# CONFIG_XFRM_SUB_POLICY is not set +# CONFIG_XFRM_MIGRATE is not set +# CONFIG_XFRM_STATISTICS is not set +CONFIG_NET_KEY=y +# CONFIG_NET_KEY_MIGRATE is not set +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +# CONFIG_IP_ADVANCED_ROUTER is not set +CONFIG_IP_FIB_HASH=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +# CONFIG_IP_PNP_RARP is not set +# CONFIG_NET_IPIP is not set +# CONFIG_NET_IPGRE is not set +# CONFIG_IP_MROUTE is not set +# CONFIG_ARPD is not set +# CONFIG_SYN_COOKIES is not set +# CONFIG_INET_AH is not set +# CONFIG_INET_ESP is not set +# CONFIG_INET_IPCOMP is not set +# CONFIG_INET_XFRM_TUNNEL is not set +# CONFIG_INET_TUNNEL is not set +# CONFIG_INET_XFRM_MODE_TRANSPORT is not set +# CONFIG_INET_XFRM_MODE_TUNNEL is not set +# CONFIG_INET_XFRM_MODE_BEET is not set +CONFIG_INET_LRO=y +# CONFIG_INET_DIAG is not set +# CONFIG_TCP_CONG_ADVANCED is not set +CONFIG_TCP_CONG_CUBIC=y +CONFIG_DEFAULT_TCP_CONG="cubic" +# CONFIG_TCP_MD5SIG is not set +# CONFIG_IPV6 is not set +# CONFIG_NETWORK_SECMARK is not set +# CONFIG_NETFILTER is not set +# CONFIG_IP_DCCP is not set +# CONFIG_IP_SCTP is not set +# CONFIG_TIPC is not set +# CONFIG_ATM is not set +# CONFIG_BRIDGE is not set +# CONFIG_NET_DSA is not set +# CONFIG_VLAN_8021Q is not set +# CONFIG_DECNET is not set +# CONFIG_LLC2 is not set +# CONFIG_IPX is not set +# CONFIG_ATALK is not set +# CONFIG_X25 is not set +# CONFIG_LAPB is not set +# CONFIG_ECONET is not set +# CONFIG_WAN_ROUTER is not set +# CONFIG_PHONET is not set +# CONFIG_IEEE802154 is not set +# CONFIG_NET_SCHED is not set +# CONFIG_DCB is not set + +# +# Network testing +# +# CONFIG_NET_PKTGEN is not set +# CONFIG_NET_TCPPROBE is not set +# CONFIG_NET_DROP_MONITOR is not set +# CONFIG_HAMRADIO is not set +# CONFIG_CAN is not set +# CONFIG_IRDA is not set +# CONFIG_BT is not set +# CONFIG_AF_RXRPC is not set +# CONFIG_WIRELESS is not set +# CONFIG_WIMAX is not set +# CONFIG_RFKILL is not set +# CONFIG_NET_9P is not set + +# +# Device Drivers +# + +# +# Generic Driver Options +# +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_STANDALONE=y +# CONFIG_PREVENT_FIRMWARE_BUILD is not set +# CONFIG_FW_LOADER is not set +# CONFIG_DEBUG_DRIVER is not set +# CONFIG_DEBUG_DEVRES is not set +# CONFIG_SYS_HYPERVISOR is not set +# CONFIG_CONNECTOR is not set +CONFIG_MTD=y +# CONFIG_MTD_DEBUG is not set +CONFIG_MTD_CONCAT=y +CONFIG_MTD_PARTITIONS=y +# CONFIG_MTD_TESTS is not set +# CONFIG_MTD_REDBOOT_PARTS is not set +# CONFIG_MTD_CMDLINE_PARTS is not set +# CONFIG_MTD_AFS_PARTS is not set +# CONFIG_MTD_AR7_PARTS is not set + +# +# User Modules And Translation Layers +# +CONFIG_MTD_CHAR=y +# CONFIG_MTD_OOPS is not set + +# +# RAM/ROM/Flash chip drivers +# +CONFIG_MTD_CFI=y +# CONFIG_MTD_JEDECPROBE is not set +CONFIG_MTD_GEN_PROBE=y +# CONFIG_MTD_CFI_ADV_OPTIONS is not set +CONFIG_MTD_MAP_BANK_WIDTH_1=y +CONFIG_MTD_MAP_BANK_WIDTH_2=y +CONFIG_MTD_MAP_BANK_WIDTH_4=y +# CONFIG_MTD_MAP_BANK_WIDTH_8 is not set +# CONFIG_MTD_MAP_BANK_WIDTH_16 is not set +# CONFIG_MTD_MAP_BANK_WIDTH_32 is not set +CONFIG_MTD_CFI_I1=y +CONFIG_MTD_CFI_I2=y +# CONFIG_MTD_CFI_I4 is not set +# CONFIG_MTD_CFI_I8 is not set +CONFIG_MTD_CFI_INTELEXT=y +# CONFIG_MTD_CFI_AMDSTD is not set +# CONFIG_MTD_CFI_STAA is not set +CONFIG_MTD_CFI_UTIL=y +# CONFIG_MTD_RAM is not set +# CONFIG_MTD_ROM is not set +# CONFIG_MTD_ABSENT is not set +# CONFIG_MTD_XIP is not set + +# +# Mapping drivers for chip access +# +CONFIG_MTD_COMPLEX_MAPPINGS=y +CONFIG_MTD_PHYSMAP=y +# CONFIG_MTD_PHYSMAP_COMPAT is not set +CONFIG_MTD_PXA2XX=y +# CONFIG_MTD_ARM_INTEGRATOR is not set +# CONFIG_MTD_PLATRAM is not set + +# +# Self-contained MTD device drivers +# +# CONFIG_MTD_SLRAM is not set +# CONFIG_MTD_PHRAM is not set +# CONFIG_MTD_MTDRAM is not set + +# +# Disk-On-Chip Device Drivers +# +# CONFIG_MTD_DOC2000 is not set +# CONFIG_MTD_DOC2001 is not set +# CONFIG_MTD_DOC2001PLUS is not set +# CONFIG_MTD_NAND is not set +# CONFIG_MTD_ONENAND is not set + +# +# LPDDR flash memory drivers +# +# CONFIG_MTD_LPDDR is not set + +# +# UBI - Unsorted block images +# +# CONFIG_MTD_UBI is not set +# CONFIG_PARPORT is not set +# CONFIG_MISC_DEVICES is not set +CONFIG_HAVE_IDE=y + +# +# SCSI device support +# +# CONFIG_SCSI_DMA is not set +# CONFIG_SCSI_NETLINK is not set +CONFIG_NETDEVICES=y +# CONFIG_DUMMY is not set +# CONFIG_BONDING is not set +# CONFIG_MACVLAN is not set +# CONFIG_EQUALIZER is not set +# CONFIG_TUN is not set +# CONFIG_VETH is not set +# CONFIG_PHYLIB is not set +CONFIG_NET_ETHERNET=y +CONFIG_MII=y +# CONFIG_AX88796 is not set +CONFIG_SMC91X=y +# CONFIG_DM9000 is not set +# CONFIG_ETHOC is not set +# CONFIG_SMC911X is not set +# CONFIG_SMSC911X is not set +# CONFIG_DNET is not set +# CONFIG_IBM_NEW_EMAC_ZMII is not set +# CONFIG_IBM_NEW_EMAC_RGMII is not set +# CONFIG_IBM_NEW_EMAC_TAH is not set +# CONFIG_IBM_NEW_EMAC_EMAC4 is not set +# CONFIG_IBM_NEW_EMAC_NO_FLOW_CTRL is not set +# CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set +# CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set +# CONFIG_B44 is not set +# CONFIG_KS8842 is not set +# CONFIG_NETDEV_1000 is not set +# CONFIG_NETDEV_10000 is not set + +# +# Wireless LAN +# +# CONFIG_WLAN_PRE80211 is not set +# CONFIG_WLAN_80211 is not set + +# +# Enable WiMAX (Networking options) to see the WiMAX drivers +# +# CONFIG_WAN is not set +# CONFIG_PPP is not set +# CONFIG_SLIP is not set +# CONFIG_NETCONSOLE is not set +# CONFIG_NETPOLL is not set +# CONFIG_NET_POLL_CONTROLLER is not set +# CONFIG_ISDN is not set + +# +# Input device support +# +CONFIG_INPUT=y +# CONFIG_INPUT_FF_MEMLESS is not set +# CONFIG_INPUT_POLLDEV is not set + +# +# Userland interfaces +# +# CONFIG_INPUT_MOUSEDEV is not set +# CONFIG_INPUT_JOYDEV is not set +# CONFIG_INPUT_EVDEV is not set +# CONFIG_INPUT_EVBUG is not set + +# +# Input Device Drivers +# +# CONFIG_INPUT_KEYBOARD is not set +# CONFIG_INPUT_MOUSE is not set +# CONFIG_INPUT_JOYSTICK is not set +# CONFIG_INPUT_TABLET is not set +# CONFIG_INPUT_TOUCHSCREEN is not set +# CONFIG_INPUT_MISC is not set + +# +# Hardware I/O ports +# +# CONFIG_SERIO is not set +# CONFIG_GAMEPORT is not set + +# +# Character devices +# +CONFIG_VT=y +CONFIG_CONSOLE_TRANSLATIONS=y +CONFIG_VT_CONSOLE=y +CONFIG_HW_CONSOLE=y +# CONFIG_VT_HW_CONSOLE_BINDING is not set +# CONFIG_DEVKMEM is not set +# CONFIG_SERIAL_NONSTANDARD is not set + +# +# Serial drivers +# +# CONFIG_SERIAL_8250 is not set + +# +# Non-8250 serial port support +# +CONFIG_SERIAL_PXA=y +CONFIG_SERIAL_PXA_CONSOLE=y +CONFIG_SERIAL_CORE=y +CONFIG_SERIAL_CORE_CONSOLE=y +CONFIG_UNIX98_PTYS=y +# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set +# CONFIG_LEGACY_PTYS is not set +# CONFIG_IPMI_HANDLER is not set +# CONFIG_HW_RANDOM is not set +# CONFIG_R3964 is not set +# CONFIG_TCG_TPM is not set +CONFIG_I2C=m +CONFIG_I2C_BOARDINFO=y +CONFIG_I2C_CHARDEV=m +CONFIG_I2C_HELPER_AUTO=y + +# +# I2C Hardware Bus support +# + +# +# I2C system bus drivers (mostly embedded / system-on-chip) +# +# CONFIG_I2C_DESIGNWARE is not set +# CONFIG_I2C_GPIO is not set +# CONFIG_I2C_OCORES is not set +CONFIG_I2C_PXA=m +# CONFIG_I2C_PXA_SLAVE is not set +# CONFIG_I2C_SIMTEC is not set + +# +# External I2C/SMBus adapter drivers +# +# CONFIG_I2C_PARPORT_LIGHT is not set +# CONFIG_I2C_TAOS_EVM is not set + +# +# Other I2C/SMBus bus drivers +# +# CONFIG_I2C_PCA_PLATFORM is not set +# CONFIG_I2C_STUB is not set + +# +# Miscellaneous I2C Chip support +# +# CONFIG_DS1682 is not set +# CONFIG_SENSORS_PCF8574 is not set +# CONFIG_PCF8575 is not set +# CONFIG_SENSORS_PCA9539 is not set +# CONFIG_SENSORS_TSL2550 is not set +# CONFIG_I2C_DEBUG_CORE is not set +# CONFIG_I2C_DEBUG_ALGO is not set +# CONFIG_I2C_DEBUG_BUS is not set +# CONFIG_I2C_DEBUG_CHIP is not set +# CONFIG_SPI is not set +CONFIG_ARCH_REQUIRE_GPIOLIB=y +CONFIG_GPIOLIB=y +# CONFIG_DEBUG_GPIO is not set +# CONFIG_GPIO_SYSFS is not set + +# +# Memory mapped GPIO expanders: +# + +# +# I2C GPIO expanders: +# +# CONFIG_GPIO_MAX732X is not set +# CONFIG_GPIO_PCA953X is not set +# CONFIG_GPIO_PCF857X is not set + +# +# PCI GPIO expanders: +# + +# +# SPI GPIO expanders: +# +# CONFIG_W1 is not set +# CONFIG_POWER_SUPPLY is not set +CONFIG_HWMON=m +# CONFIG_HWMON_VID is not set +# CONFIG_SENSORS_AD7414 is not set +# CONFIG_SENSORS_AD7418 is not set +CONFIG_SENSORS_ADM1021=m +# CONFIG_SENSORS_ADM1025 is not set +# CONFIG_SENSORS_ADM1026 is not set +# CONFIG_SENSORS_ADM1029 is not set +# CONFIG_SENSORS_ADM1031 is not set +# CONFIG_SENSORS_ADM9240 is not set +# CONFIG_SENSORS_ADT7462 is not set +# CONFIG_SENSORS_ADT7470 is not set +# CONFIG_SENSORS_ADT7473 is not set +# CONFIG_SENSORS_ADT7475 is not set +# CONFIG_SENSORS_ATXP1 is not set +# CONFIG_SENSORS_DS1621 is not set +# CONFIG_SENSORS_F71805F is not set +# CONFIG_SENSORS_F71882FG is not set +# CONFIG_SENSORS_F75375S is not set +# CONFIG_SENSORS_G760A is not set +# CONFIG_SENSORS_GL518SM is not set +# CONFIG_SENSORS_GL520SM is not set +# CONFIG_SENSORS_IT87 is not set +# CONFIG_SENSORS_LM63 is not set +# CONFIG_SENSORS_LM75 is not set +# CONFIG_SENSORS_LM77 is not set +# CONFIG_SENSORS_LM78 is not set +# CONFIG_SENSORS_LM80 is not set +# CONFIG_SENSORS_LM83 is not set +# CONFIG_SENSORS_LM85 is not set +# CONFIG_SENSORS_LM87 is not set +# CONFIG_SENSORS_LM90 is not set +# CONFIG_SENSORS_LM92 is not set +# CONFIG_SENSORS_LM93 is not set +# CONFIG_SENSORS_LTC4215 is not set +# CONFIG_SENSORS_LTC4245 is not set +# CONFIG_SENSORS_LM95241 is not set +# CONFIG_SENSORS_MAX1619 is not set +CONFIG_SENSORS_MAX6650=m +# CONFIG_SENSORS_PC87360 is not set +# CONFIG_SENSORS_PC87427 is not set +# CONFIG_SENSORS_PCF8591 is not set +# CONFIG_SENSORS_SHT15 is not set +# CONFIG_SENSORS_DME1737 is not set +# CONFIG_SENSORS_SMSC47M1 is not set +# CONFIG_SENSORS_SMSC47M192 is not set +# CONFIG_SENSORS_SMSC47B397 is not set +# CONFIG_SENSORS_ADS7828 is not set +# CONFIG_SENSORS_THMC50 is not set +# CONFIG_SENSORS_TMP401 is not set +# CONFIG_SENSORS_VT1211 is not set +# CONFIG_SENSORS_W83781D is not set +# CONFIG_SENSORS_W83791D is not set +# CONFIG_SENSORS_W83792D is not set +# CONFIG_SENSORS_W83793 is not set +# CONFIG_SENSORS_W83L785TS is not set +# CONFIG_SENSORS_W83L786NG is not set +# CONFIG_SENSORS_W83627HF is not set +# CONFIG_SENSORS_W83627EHF is not set +# CONFIG_HWMON_DEBUG_CHIP is not set +# CONFIG_THERMAL is not set +# CONFIG_WATCHDOG is not set +CONFIG_SSB_POSSIBLE=y + +# +# Sonics Silicon Backplane +# +# CONFIG_SSB is not set + +# +# Multifunction device drivers +# +# CONFIG_MFD_CORE is not set +# CONFIG_MFD_SM501 is not set +# CONFIG_MFD_ASIC3 is not set +# CONFIG_HTC_EGPIO is not set +# CONFIG_HTC_PASIC3 is not set +# CONFIG_TPS65010 is not set +# CONFIG_MFD_TMIO is not set +# CONFIG_MFD_T7L66XB is not set +# CONFIG_MFD_TC6387XB is not set +# CONFIG_MFD_TC6393XB is not set +# CONFIG_MFD_WM8400 is not set +# CONFIG_MFD_WM8350_I2C is not set +# CONFIG_MFD_PCF50633 is not set +# CONFIG_AB3100_CORE is not set +# CONFIG_MEDIA_SUPPORT is not set + +# +# Graphics support +# +# CONFIG_VGASTATE is not set +# CONFIG_VIDEO_OUTPUT_CONTROL is not set +# CONFIG_FB is not set +# CONFIG_BACKLIGHT_LCD_SUPPORT is not set + +# +# Display device support +# +# CONFIG_DISPLAY_SUPPORT is not set + +# +# Console display driver support +# +# CONFIG_VGA_CONSOLE is not set +CONFIG_DUMMY_CONSOLE=y +# CONFIG_SOUND is not set +# CONFIG_HID_SUPPORT is not set +# CONFIG_USB_SUPPORT is not set +# CONFIG_MMC is not set +# CONFIG_MEMSTICK is not set +# CONFIG_ACCESSIBILITY is not set +# CONFIG_NEW_LEDS is not set +CONFIG_RTC_LIB=y +CONFIG_RTC_CLASS=m + +# +# RTC interfaces +# +CONFIG_RTC_INTF_SYSFS=y +CONFIG_RTC_INTF_PROC=y +CONFIG_RTC_INTF_DEV=y +# CONFIG_RTC_INTF_DEV_UIE_EMUL is not set +# CONFIG_RTC_DRV_TEST is not set + +# +# I2C RTC drivers +# +# CONFIG_RTC_DRV_DS1307 is not set +# CONFIG_RTC_DRV_DS1374 is not set +# CONFIG_RTC_DRV_DS1672 is not set +# CONFIG_RTC_DRV_MAX6900 is not set +# CONFIG_RTC_DRV_RS5C372 is not set +# CONFIG_RTC_DRV_ISL1208 is not set +# CONFIG_RTC_DRV_X1205 is not set +# CONFIG_RTC_DRV_PCF8563 is not set +# CONFIG_RTC_DRV_PCF8583 is not set +# CONFIG_RTC_DRV_M41T80 is not set +# CONFIG_RTC_DRV_S35390A is not set +# CONFIG_RTC_DRV_FM3130 is not set +# CONFIG_RTC_DRV_RX8581 is not set +# CONFIG_RTC_DRV_RX8025 is not set + +# +# SPI RTC drivers +# + +# +# Platform RTC drivers +# +# CONFIG_RTC_DRV_CMOS is not set +# CONFIG_RTC_DRV_DS1286 is not set +# CONFIG_RTC_DRV_DS1511 is not set +# CONFIG_RTC_DRV_DS1553 is not set +# CONFIG_RTC_DRV_DS1742 is not set +# CONFIG_RTC_DRV_STK17TA8 is not set +# CONFIG_RTC_DRV_M48T86 is not set +# CONFIG_RTC_DRV_M48T35 is not set +# CONFIG_RTC_DRV_M48T59 is not set +# CONFIG_RTC_DRV_BQ4802 is not set +# CONFIG_RTC_DRV_V3020 is not set + +# +# on-CPU RTC drivers +# +CONFIG_RTC_DRV_SA1100=m +# CONFIG_RTC_DRV_PXA is not set +CONFIG_DMADEVICES=y + +# +# DMA Devices +# +# CONFIG_AUXDISPLAY is not set +# CONFIG_REGULATOR is not set +# CONFIG_UIO is not set +# CONFIG_STAGING is not set + +# +# File systems +# +CONFIG_FILE_LOCKING=y +# CONFIG_FSNOTIFY is not set +# CONFIG_DNOTIFY is not set +# CONFIG_INOTIFY is not set +# CONFIG_INOTIFY_USER is not set +# CONFIG_QUOTA is not set +# CONFIG_AUTOFS_FS is not set +# CONFIG_AUTOFS4_FS is not set +# CONFIG_FUSE_FS is not set + +# +# Caches +# +# CONFIG_FSCACHE is not set + +# +# Pseudo filesystems +# +CONFIG_PROC_FS=y +CONFIG_PROC_SYSCTL=y +CONFIG_PROC_PAGE_MONITOR=y +CONFIG_SYSFS=y +CONFIG_TMPFS=y +# CONFIG_TMPFS_POSIX_ACL is not set +# CONFIG_HUGETLB_PAGE is not set +# CONFIG_CONFIGFS_FS is not set +CONFIG_MISC_FILESYSTEMS=y +CONFIG_JFFS2_FS=y +CONFIG_JFFS2_FS_DEBUG=0 +CONFIG_JFFS2_FS_WRITEBUFFER=y +CONFIG_JFFS2_FS_WBUF_VERIFY=y +# CONFIG_JFFS2_SUMMARY is not set +# CONFIG_JFFS2_FS_XATTR is not set +# CONFIG_JFFS2_COMPRESSION_OPTIONS is not set +CONFIG_JFFS2_ZLIB=y +# CONFIG_JFFS2_LZO is not set +CONFIG_JFFS2_RTIME=y +# CONFIG_JFFS2_RUBIN is not set +# CONFIG_ROMFS_FS is not set +CONFIG_NETWORK_FILESYSTEMS=y +CONFIG_NFS_FS=m +CONFIG_NFS_V3=y +# CONFIG_NFS_V3_ACL is not set +# CONFIG_NFS_V4 is not set +# CONFIG_NFSD is not set +CONFIG_LOCKD=m +CONFIG_LOCKD_V4=y +CONFIG_NFS_COMMON=y +CONFIG_SUNRPC=m +# CONFIG_RPCSEC_GSS_KRB5 is not set +# CONFIG_RPCSEC_GSS_SPKM3 is not set +# CONFIG_SMB_FS is not set +# CONFIG_CIFS is not set +# CONFIG_NCP_FS is not set +# CONFIG_CODA_FS is not set +# CONFIG_AFS_FS is not set +CONFIG_NLS=m +CONFIG_NLS_DEFAULT="utf8" +# CONFIG_NLS_CODEPAGE_437 is not set +# CONFIG_NLS_CODEPAGE_737 is not set +# CONFIG_NLS_CODEPAGE_775 is not set +# CONFIG_NLS_CODEPAGE_850 is not set +# CONFIG_NLS_CODEPAGE_852 is not set +# CONFIG_NLS_CODEPAGE_855 is not set +# CONFIG_NLS_CODEPAGE_857 is not set +# CONFIG_NLS_CODEPAGE_860 is not set +# CONFIG_NLS_CODEPAGE_861 is not set +# CONFIG_NLS_CODEPAGE_862 is not set +# CONFIG_NLS_CODEPAGE_863 is not set +# CONFIG_NLS_CODEPAGE_864 is not set +# CONFIG_NLS_CODEPAGE_865 is not set +# CONFIG_NLS_CODEPAGE_866 is not set +# CONFIG_NLS_CODEPAGE_869 is not set +# CONFIG_NLS_CODEPAGE_936 is not set +# CONFIG_NLS_CODEPAGE_950 is not set +# CONFIG_NLS_CODEPAGE_932 is not set +# CONFIG_NLS_CODEPAGE_949 is not set +# CONFIG_NLS_CODEPAGE_874 is not set +# CONFIG_NLS_ISO8859_8 is not set +# CONFIG_NLS_CODEPAGE_1250 is not set +# CONFIG_NLS_CODEPAGE_1251 is not set +# CONFIG_NLS_ASCII is not set +# CONFIG_NLS_ISO8859_1 is not set +# CONFIG_NLS_ISO8859_2 is not set +# CONFIG_NLS_ISO8859_3 is not set +# CONFIG_NLS_ISO8859_4 is not set +# CONFIG_NLS_ISO8859_5 is not set +# CONFIG_NLS_ISO8859_6 is not set +# CONFIG_NLS_ISO8859_7 is not set +# CONFIG_NLS_ISO8859_9 is not set +# CONFIG_NLS_ISO8859_13 is not set +# CONFIG_NLS_ISO8859_14 is not set +# CONFIG_NLS_ISO8859_15 is not set +# CONFIG_NLS_KOI8_R is not set +# CONFIG_NLS_KOI8_U is not set +CONFIG_NLS_UTF8=m +# CONFIG_DLM is not set + +# +# Kernel hacking +# +CONFIG_PRINTK_TIME=y +CONFIG_ENABLE_WARN_DEPRECATED=y +CONFIG_ENABLE_MUST_CHECK=y +CONFIG_FRAME_WARN=1024 +# CONFIG_MAGIC_SYSRQ is not set +# CONFIG_UNUSED_SYMBOLS is not set +# CONFIG_DEBUG_FS is not set +# CONFIG_HEADERS_CHECK is not set +CONFIG_DEBUG_KERNEL=y +# CONFIG_DEBUG_SHIRQ is not set +CONFIG_DETECT_SOFTLOCKUP=y +# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set +CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0 +CONFIG_DETECT_HUNG_TASK=y +# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set +CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0 +# CONFIG_SCHED_DEBUG is not set +# CONFIG_SCHEDSTATS is not set +# CONFIG_TIMER_STATS is not set +# CONFIG_DEBUG_OBJECTS is not set +# CONFIG_DEBUG_KMEMLEAK is not set +# CONFIG_DEBUG_RT_MUTEXES is not set +# CONFIG_RT_MUTEX_TESTER is not set +# CONFIG_DEBUG_SPINLOCK is not set +# CONFIG_DEBUG_MUTEXES is not set +# CONFIG_DEBUG_LOCK_ALLOC is not set +# CONFIG_PROVE_LOCKING is not set +# CONFIG_LOCK_STAT is not set +# CONFIG_DEBUG_SPINLOCK_SLEEP is not set +# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set +# CONFIG_DEBUG_KOBJECT is not set +# CONFIG_DEBUG_BUGVERBOSE is not set +# CONFIG_DEBUG_INFO is not set +# CONFIG_DEBUG_VM is not set +# CONFIG_DEBUG_WRITECOUNT is not set +# CONFIG_DEBUG_MEMORY_INIT is not set +# CONFIG_DEBUG_LIST is not set +# CONFIG_DEBUG_SG is not set +# CONFIG_DEBUG_NOTIFIERS is not set +CONFIG_FRAME_POINTER=y +# CONFIG_BOOT_PRINTK_DELAY is not set +# CONFIG_RCU_TORTURE_TEST is not set +# CONFIG_RCU_CPU_STALL_DETECTOR is not set +# CONFIG_KPROBES_SANITY_TEST is not set +# CONFIG_BACKTRACE_SELF_TEST is not set +# CONFIG_FAULT_INJECTION is not set +# CONFIG_LATENCYTOP is not set +CONFIG_SYSCTL_SYSCALL_CHECK=y +# CONFIG_PAGE_POISONING is not set +CONFIG_HAVE_FUNCTION_TRACER=y +CONFIG_TRACING_SUPPORT=y +# CONFIG_FTRACE is not set +# CONFIG_SAMPLES is not set +CONFIG_HAVE_ARCH_KGDB=y +# CONFIG_KGDB is not set +# CONFIG_ARM_UNWIND is not set +# CONFIG_DEBUG_USER is not set +# CONFIG_DEBUG_ERRORS is not set +# CONFIG_DEBUG_STACK_USAGE is not set +# CONFIG_DEBUG_LL is not set + +# +# Security options +# +# CONFIG_KEYS is not set +# CONFIG_SECURITY is not set +# CONFIG_SECURITYFS is not set +# CONFIG_SECURITY_FILE_CAPABILITIES is not set +CONFIG_CRYPTO=y + +# +# Crypto core or helper +# +# CONFIG_CRYPTO_FIPS is not set +CONFIG_CRYPTO_ALGAPI=m +CONFIG_CRYPTO_ALGAPI2=m +CONFIG_CRYPTO_HASH=m +CONFIG_CRYPTO_HASH2=m +# CONFIG_CRYPTO_MANAGER is not set +# CONFIG_CRYPTO_MANAGER2 is not set +# CONFIG_CRYPTO_GF128MUL is not set +# CONFIG_CRYPTO_NULL is not set +# CONFIG_CRYPTO_CRYPTD is not set +# CONFIG_CRYPTO_AUTHENC is not set +# CONFIG_CRYPTO_TEST is not set + +# +# Authenticated Encryption with Associated Data +# +# CONFIG_CRYPTO_CCM is not set +# CONFIG_CRYPTO_GCM is not set +# CONFIG_CRYPTO_SEQIV is not set + +# +# Block modes +# +# CONFIG_CRYPTO_CBC is not set +# CONFIG_CRYPTO_CTR is not set +# CONFIG_CRYPTO_CTS is not set +# CONFIG_CRYPTO_ECB is not set +# CONFIG_CRYPTO_LRW is not set +# CONFIG_CRYPTO_PCBC is not set +# CONFIG_CRYPTO_XTS is not set + +# +# Hash modes +# +# CONFIG_CRYPTO_HMAC is not set +# CONFIG_CRYPTO_XCBC is not set + +# +# Digest +# +CONFIG_CRYPTO_CRC32C=m +# CONFIG_CRYPTO_MD4 is not set +# CONFIG_CRYPTO_MD5 is not set +# CONFIG_CRYPTO_MICHAEL_MIC is not set +# CONFIG_CRYPTO_RMD128 is not set +# CONFIG_CRYPTO_RMD160 is not set +# CONFIG_CRYPTO_RMD256 is not set +# CONFIG_CRYPTO_RMD320 is not set +# CONFIG_CRYPTO_SHA1 is not set +# CONFIG_CRYPTO_SHA256 is not set +# CONFIG_CRYPTO_SHA512 is not set +# CONFIG_CRYPTO_TGR192 is not set +# CONFIG_CRYPTO_WP512 is not set + +# +# Ciphers +# +# CONFIG_CRYPTO_AES is not set +# CONFIG_CRYPTO_ANUBIS is not set +# CONFIG_CRYPTO_ARC4 is not set +# CONFIG_CRYPTO_BLOWFISH is not set +# CONFIG_CRYPTO_CAMELLIA is not set +# CONFIG_CRYPTO_CAST5 is not set +# CONFIG_CRYPTO_CAST6 is not set +# CONFIG_CRYPTO_DES is not set +# CONFIG_CRYPTO_FCRYPT is not set +# CONFIG_CRYPTO_KHAZAD is not set +# CONFIG_CRYPTO_SALSA20 is not set +# CONFIG_CRYPTO_SEED is not set +# CONFIG_CRYPTO_SERPENT is not set +# CONFIG_CRYPTO_TEA is not set +# CONFIG_CRYPTO_TWOFISH is not set + +# +# Compression +# +# CONFIG_CRYPTO_DEFLATE is not set +# CONFIG_CRYPTO_ZLIB is not set +# CONFIG_CRYPTO_LZO is not set + +# +# Random Number Generation +# +# CONFIG_CRYPTO_ANSI_CPRNG is not set +# CONFIG_CRYPTO_HW is not set +# CONFIG_BINARY_PRINTF is not set + +# +# Library routines +# +CONFIG_BITREVERSE=y +CONFIG_GENERIC_FIND_LAST_BIT=y +# CONFIG_CRC_CCITT is not set +# CONFIG_CRC16 is not set +# CONFIG_CRC_T10DIF is not set +# CONFIG_CRC_ITU_T is not set +CONFIG_CRC32=y +# CONFIG_CRC7 is not set +CONFIG_LIBCRC32C=m +CONFIG_ZLIB_INFLATE=y +CONFIG_ZLIB_DEFLATE=y +CONFIG_DECOMPRESS_GZIP=y +CONFIG_HAS_IOMEM=y +CONFIG_HAS_IOPORT=y +CONFIG_HAS_DMA=y +CONFIG_NLATTR=y diff --git a/arch/arm/configs/zylonite_defconfig b/arch/arm/configs/zylonite_defconfig deleted file mode 100644 index 7949d04a360..00000000000 --- a/arch/arm/configs/zylonite_defconfig +++ /dev/null @@ -1,736 +0,0 @@ -# -# Automatically generated make config: don't edit -# Linux kernel version: 2.6.23 -# Tue Oct 23 13:33:20 2007 -# -CONFIG_ARM=y -CONFIG_SYS_SUPPORTS_APM_EMULATION=y -CONFIG_GENERIC_GPIO=y -CONFIG_GENERIC_TIME=y -CONFIG_GENERIC_CLOCKEVENTS=y -CONFIG_MMU=y -# CONFIG_NO_IOPORT is not set -CONFIG_GENERIC_HARDIRQS=y -CONFIG_STACKTRACE_SUPPORT=y -CONFIG_LOCKDEP_SUPPORT=y -CONFIG_TRACE_IRQFLAGS_SUPPORT=y -CONFIG_HARDIRQS_SW_RESEND=y -CONFIG_GENERIC_IRQ_PROBE=y -CONFIG_RWSEM_GENERIC_SPINLOCK=y -# CONFIG_ARCH_HAS_ILOG2_U32 is not set -# CONFIG_ARCH_HAS_ILOG2_U64 is not set -CONFIG_GENERIC_HWEIGHT=y -CONFIG_GENERIC_CALIBRATE_DELAY=y -CONFIG_ZONE_DMA=y -CONFIG_ARCH_MTD_XIP=y -CONFIG_VECTORS_BASE=0xffff0000 -CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" - -# -# General setup -# -CONFIG_EXPERIMENTAL=y -CONFIG_BROKEN_ON_SMP=y -CONFIG_INIT_ENV_ARG_LIMIT=32 -CONFIG_LOCALVERSION="" -CONFIG_LOCALVERSION_AUTO=y -CONFIG_SWAP=y -CONFIG_SYSVIPC=y -CONFIG_SYSVIPC_SYSCTL=y -# CONFIG_POSIX_MQUEUE is not set -# CONFIG_BSD_PROCESS_ACCT is not set -# CONFIG_TASKSTATS is not set -# CONFIG_USER_NS is not set -# CONFIG_AUDIT is not set -# CONFIG_IKCONFIG is not set -CONFIG_LOG_BUF_SHIFT=18 -# CONFIG_CGROUPS is not set -CONFIG_FAIR_GROUP_SCHED=y -CONFIG_FAIR_USER_SCHED=y -# CONFIG_FAIR_CGROUP_SCHED is not set -CONFIG_SYSFS_DEPRECATED=y -# CONFIG_RELAY is not set -# CONFIG_BLK_DEV_INITRD is not set -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_SYSCTL=y -# CONFIG_EMBEDDED is not set -CONFIG_UID16=y -CONFIG_SYSCTL_SYSCALL=y -CONFIG_KALLSYMS=y -# CONFIG_KALLSYMS_EXTRA_PASS is not set -CONFIG_HOTPLUG=y -CONFIG_PRINTK=y -CONFIG_BUG=y -CONFIG_ELF_CORE=y -CONFIG_BASE_FULL=y -CONFIG_FUTEX=y -CONFIG_ANON_INODES=y -CONFIG_EPOLL=y -CONFIG_SIGNALFD=y -CONFIG_EVENTFD=y -CONFIG_SHMEM=y -CONFIG_VM_EVENT_COUNTERS=y -CONFIG_SLUB_DEBUG=y -# CONFIG_SLAB is not set -CONFIG_SLUB=y -# CONFIG_SLOB is not set -CONFIG_RT_MUTEXES=y -# CONFIG_TINY_SHMEM is not set -CONFIG_BASE_SMALL=0 -CONFIG_MODULES=y -# CONFIG_MODULE_UNLOAD is not set -# CONFIG_MODVERSIONS is not set -# CONFIG_MODULE_SRCVERSION_ALL is not set -# CONFIG_KMOD is not set -CONFIG_BLOCK=y -# CONFIG_LBD is not set -# CONFIG_BLK_DEV_IO_TRACE is not set -# CONFIG_LSF is not set -# CONFIG_BLK_DEV_BSG is not set - -# -# IO Schedulers -# -CONFIG_IOSCHED_NOOP=y -CONFIG_IOSCHED_AS=y -CONFIG_IOSCHED_DEADLINE=y -CONFIG_IOSCHED_CFQ=y -# CONFIG_DEFAULT_AS is not set -# CONFIG_DEFAULT_DEADLINE is not set -CONFIG_DEFAULT_CFQ=y -# CONFIG_DEFAULT_NOOP is not set -CONFIG_DEFAULT_IOSCHED="cfq" - -# -# System Type -# -# CONFIG_ARCH_AAEC2000 is not set -# CONFIG_ARCH_INTEGRATOR is not set -# CONFIG_ARCH_REALVIEW is not set -# CONFIG_ARCH_VERSATILE is not set -# CONFIG_ARCH_AT91 is not set -# CONFIG_ARCH_CLPS7500 is not set -# CONFIG_ARCH_CLPS711X is not set -# CONFIG_ARCH_CO285 is not set -# CONFIG_ARCH_EBSA110 is not set -# CONFIG_ARCH_EP93XX is not set -# CONFIG_ARCH_FOOTBRIDGE is not set -# CONFIG_ARCH_NETX is not set -# CONFIG_ARCH_H720X is not set -# CONFIG_ARCH_IMX is not set -# CONFIG_ARCH_IOP13XX is not set -# CONFIG_ARCH_IOP32X is not set -# CONFIG_ARCH_IOP33X is not set -# CONFIG_ARCH_IXP23XX is not set -# CONFIG_ARCH_IXP2000 is not set -# CONFIG_ARCH_IXP4XX is not set -# CONFIG_ARCH_L7200 is not set -# CONFIG_ARCH_KS8695 is not set -# CONFIG_ARCH_NS9XXX is not set -# CONFIG_ARCH_MXC is not set -# CONFIG_ARCH_PNX4008 is not set -CONFIG_ARCH_PXA=y -# CONFIG_ARCH_RPC is not set -# CONFIG_ARCH_SA1100 is not set -# CONFIG_ARCH_S3C2410 is not set -# CONFIG_ARCH_SHARK is not set -# CONFIG_ARCH_LH7A40X is not set -# CONFIG_ARCH_DAVINCI is not set -# CONFIG_ARCH_OMAP is not set - -# -# Intel PXA2xx/PXA3xx Implementations -# - -# -# Supported PXA3xx Processor Variants -# -CONFIG_CPU_PXA300=y -CONFIG_CPU_PXA310=y -CONFIG_CPU_PXA320=y -# CONFIG_ARCH_LUBBOCK is not set -# CONFIG_MACH_LOGICPD_PXA270 is not set -# CONFIG_MACH_MAINSTONE is not set -# CONFIG_ARCH_PXA_IDP is not set -# CONFIG_PXA_SHARPSL is not set -# CONFIG_MACH_TRIZEPS4 is not set -# CONFIG_MACH_EM_X270 is not set -CONFIG_MACH_ZYLONITE=y -# CONFIG_MACH_ARMCORE is not set -CONFIG_PXA3xx=y - -# -# Boot options -# - -# -# Power management -# - -# -# Processor Type -# -CONFIG_CPU_32=y -CONFIG_CPU_XSC3=y -CONFIG_CPU_32v5=y -CONFIG_CPU_ABRT_EV5T=y -CONFIG_CPU_CACHE_VIVT=y -CONFIG_CPU_TLB_V4WBI=y -CONFIG_CPU_CP15=y -CONFIG_CPU_CP15_MMU=y -CONFIG_IO_36=y - -# -# Processor Features -# -# CONFIG_ARM_THUMB is not set -# CONFIG_CPU_DCACHE_DISABLE is not set -# CONFIG_CPU_BPREDICT_DISABLE is not set -# CONFIG_OUTER_CACHE is not set -CONFIG_IWMMXT=y - -# -# Bus support -# -# CONFIG_PCI_SYSCALL is not set -# CONFIG_ARCH_SUPPORTS_MSI is not set -# CONFIG_PCCARD is not set - -# -# Kernel Features -# -# CONFIG_TICK_ONESHOT is not set -# CONFIG_NO_HZ is not set -# CONFIG_HIGH_RES_TIMERS is not set -CONFIG_GENERIC_CLOCKEVENTS_BUILD=y -# CONFIG_PREEMPT is not set -CONFIG_HZ=100 -CONFIG_AEABI=y -CONFIG_OABI_COMPAT=y -# CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set -CONFIG_SELECT_MEMORY_MODEL=y -CONFIG_FLATMEM_MANUAL=y -# CONFIG_DISCONTIGMEM_MANUAL is not set -# CONFIG_SPARSEMEM_MANUAL is not set -CONFIG_FLATMEM=y -CONFIG_FLAT_NODE_MEM_MAP=y -# CONFIG_SPARSEMEM_STATIC is not set -# CONFIG_SPARSEMEM_VMEMMAP_ENABLE is not set -CONFIG_SPLIT_PTLOCK_CPUS=4096 -# CONFIG_RESOURCES_64BIT is not set -CONFIG_ZONE_DMA_FLAG=1 -CONFIG_BOUNCE=y -CONFIG_VIRT_TO_BUS=y -CONFIG_ALIGNMENT_TRAP=y - -# -# Boot options -# -CONFIG_ZBOOT_ROM_TEXT=0x0 -CONFIG_ZBOOT_ROM_BSS=0x0 -CONFIG_CMDLINE="root=/dev/nfs rootfstype=nfs nfsroot=192.168.1.100:/nfs/rootfs/ ip=192.168.1.101:192.168.1.100::255.255.255.0::eth0:on console=ttyS0,38400 mem=64M debug" -# CONFIG_XIP_KERNEL is not set -# CONFIG_KEXEC is not set - -# -# Floating point emulation -# - -# -# At least one emulation must be selected -# -CONFIG_FPE_NWFPE=y -# CONFIG_FPE_NWFPE_XP is not set -# CONFIG_FPE_FASTFPE is not set - -# -# Userspace binary formats -# -CONFIG_BINFMT_ELF=y -# CONFIG_BINFMT_AOUT is not set -# CONFIG_BINFMT_MISC is not set - -# -# Power management options -# -# CONFIG_PM is not set -CONFIG_SUSPEND_UP_POSSIBLE=y - -# -# Networking -# -CONFIG_NET=y - -# -# Networking options -# -CONFIG_PACKET=y -# CONFIG_PACKET_MMAP is not set -CONFIG_UNIX=y -# CONFIG_NET_KEY is not set -CONFIG_INET=y -# CONFIG_IP_MULTICAST is not set -# CONFIG_IP_ADVANCED_ROUTER is not set -CONFIG_IP_FIB_HASH=y -CONFIG_IP_PNP=y -CONFIG_IP_PNP_DHCP=y -CONFIG_IP_PNP_BOOTP=y -CONFIG_IP_PNP_RARP=y -# CONFIG_NET_IPIP is not set -# CONFIG_NET_IPGRE is not set -# CONFIG_ARPD is not set -# CONFIG_SYN_COOKIES is not set -# CONFIG_INET_AH is not set -# CONFIG_INET_ESP is not set -# CONFIG_INET_IPCOMP is not set -# CONFIG_INET_XFRM_TUNNEL is not set -# CONFIG_INET_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set -# CONFIG_INET_DIAG is not set -# CONFIG_TCP_CONG_ADVANCED is not set -CONFIG_TCP_CONG_CUBIC=y -CONFIG_DEFAULT_TCP_CONG="cubic" -# CONFIG_TCP_MD5SIG is not set -# CONFIG_IPV6 is not set -# CONFIG_INET6_XFRM_TUNNEL is not set -# CONFIG_INET6_TUNNEL is not set -# CONFIG_NETWORK_SECMARK is not set -# CONFIG_NETFILTER is not set -# CONFIG_IP_DCCP is not set -# CONFIG_IP_SCTP is not set -# CONFIG_TIPC is not set -# CONFIG_ATM is not set -# CONFIG_BRIDGE is not set -# CONFIG_VLAN_8021Q is not set -# CONFIG_DECNET is not set -# CONFIG_LLC2 is not set -# CONFIG_IPX is not set -# CONFIG_ATALK is not set -# CONFIG_X25 is not set -# CONFIG_LAPB is not set -# CONFIG_ECONET is not set -# CONFIG_WAN_ROUTER is not set -# CONFIG_NET_SCHED is not set - -# -# Network testing -# -# CONFIG_NET_PKTGEN is not set -# CONFIG_HAMRADIO is not set -# CONFIG_IRDA is not set -# CONFIG_BT is not set -# CONFIG_AF_RXRPC is not set - -# -# Wireless -# -# CONFIG_CFG80211 is not set -# CONFIG_WIRELESS_EXT is not set -# CONFIG_MAC80211 is not set -# CONFIG_IEEE80211 is not set -# CONFIG_RFKILL is not set -# CONFIG_NET_9P is not set - -# -# Device Drivers -# - -# -# Generic Driver Options -# -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -CONFIG_STANDALONE=y -CONFIG_PREVENT_FIRMWARE_BUILD=y -CONFIG_FW_LOADER=y -# CONFIG_SYS_HYPERVISOR is not set -# CONFIG_CONNECTOR is not set -# CONFIG_MTD is not set -# CONFIG_PARPORT is not set -# CONFIG_BLK_DEV is not set -# CONFIG_MISC_DEVICES is not set -# CONFIG_IDE is not set - -# -# SCSI device support -# -# CONFIG_RAID_ATTRS is not set -# CONFIG_SCSI is not set -# CONFIG_SCSI_DMA is not set -# CONFIG_SCSI_NETLINK is not set -# CONFIG_ATA is not set -# CONFIG_MD is not set -CONFIG_NETDEVICES=y -# CONFIG_NETDEVICES_MULTIQUEUE is not set -# CONFIG_DUMMY is not set -# CONFIG_BONDING is not set -# CONFIG_MACVLAN is not set -# CONFIG_EQUALIZER is not set -# CONFIG_TUN is not set -# CONFIG_VETH is not set -# CONFIG_PHYLIB is not set -CONFIG_NET_ETHERNET=y -CONFIG_MII=y -# CONFIG_AX88796 is not set -CONFIG_SMC91X=y -# CONFIG_DM9000 is not set -# CONFIG_SMC911X is not set -# CONFIG_IBM_NEW_EMAC_ZMII is not set -# CONFIG_IBM_NEW_EMAC_RGMII is not set -# CONFIG_IBM_NEW_EMAC_TAH is not set -# CONFIG_IBM_NEW_EMAC_EMAC4 is not set -# CONFIG_B44 is not set -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set - -# -# Wireless LAN -# -# CONFIG_WLAN_PRE80211 is not set -# CONFIG_WLAN_80211 is not set -# CONFIG_WAN is not set -# CONFIG_PPP is not set -# CONFIG_SLIP is not set -# CONFIG_SHAPER is not set -# CONFIG_NETCONSOLE is not set -# CONFIG_NETPOLL is not set -# CONFIG_NET_POLL_CONTROLLER is not set -# CONFIG_ISDN is not set - -# -# Input device support -# -CONFIG_INPUT=y -# CONFIG_INPUT_FF_MEMLESS is not set -# CONFIG_INPUT_POLLDEV is not set - -# -# Userland interfaces -# -CONFIG_INPUT_MOUSEDEV=y -# CONFIG_INPUT_MOUSEDEV_PSAUX is not set -CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 -CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 -# CONFIG_INPUT_JOYDEV is not set -# CONFIG_INPUT_EVDEV is not set -# CONFIG_INPUT_EVBUG is not set - -# -# Input Device Drivers -# -# CONFIG_INPUT_KEYBOARD is not set -# CONFIG_INPUT_MOUSE is not set -# CONFIG_INPUT_JOYSTICK is not set -# CONFIG_INPUT_TABLET is not set -# CONFIG_INPUT_TOUCHSCREEN is not set -# CONFIG_INPUT_MISC is not set - -# -# Hardware I/O ports -# -# CONFIG_SERIO is not set -# CONFIG_GAMEPORT is not set - -# -# Character devices -# -CONFIG_VT=y -CONFIG_VT_CONSOLE=y -CONFIG_HW_CONSOLE=y -# CONFIG_VT_HW_CONSOLE_BINDING is not set -# CONFIG_SERIAL_NONSTANDARD is not set - -# -# Serial drivers -# -# CONFIG_SERIAL_8250 is not set - -# -# Non-8250 serial port support -# -CONFIG_SERIAL_PXA=y -CONFIG_SERIAL_PXA_CONSOLE=y -CONFIG_SERIAL_CORE=y -CONFIG_SERIAL_CORE_CONSOLE=y -CONFIG_UNIX98_PTYS=y -# CONFIG_LEGACY_PTYS is not set -# CONFIG_IPMI_HANDLER is not set -# CONFIG_HW_RANDOM is not set -# CONFIG_NVRAM is not set -# CONFIG_R3964 is not set -# CONFIG_RAW_DRIVER is not set -# CONFIG_TCG_TPM is not set -# CONFIG_I2C is not set - -# -# SPI support -# -# CONFIG_SPI is not set -# CONFIG_SPI_MASTER is not set -# CONFIG_W1 is not set -# CONFIG_POWER_SUPPLY is not set -# CONFIG_HWMON is not set - -# -# Sonics Silicon Backplane -# -CONFIG_SSB_POSSIBLE=y -# CONFIG_SSB is not set - -# -# Multifunction device drivers -# -# CONFIG_MFD_SM501 is not set - -# -# Multimedia devices -# -# CONFIG_VIDEO_DEV is not set -# CONFIG_DVB_CORE is not set -# CONFIG_DAB is not set - -# -# Graphics support -# -# CONFIG_VGASTATE is not set -# CONFIG_VIDEO_OUTPUT_CONTROL is not set -CONFIG_FB=y -# CONFIG_FIRMWARE_EDID is not set -# CONFIG_FB_DDC is not set -CONFIG_FB_CFB_FILLRECT=y -CONFIG_FB_CFB_COPYAREA=y -CONFIG_FB_CFB_IMAGEBLIT=y -# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set -# CONFIG_FB_SYS_FILLRECT is not set -# CONFIG_FB_SYS_COPYAREA is not set -# CONFIG_FB_SYS_IMAGEBLIT is not set -# CONFIG_FB_SYS_FOPS is not set -CONFIG_FB_DEFERRED_IO=y -# CONFIG_FB_SVGALIB is not set -# CONFIG_FB_MACMODES is not set -# CONFIG_FB_BACKLIGHT is not set -# CONFIG_FB_MODE_HELPERS is not set -# CONFIG_FB_TILEBLITTING is not set - -# -# Frame buffer hardware drivers -# -# CONFIG_FB_S1D13XXX is not set -CONFIG_FB_PXA=y -# CONFIG_FB_PXA_PARAMETERS is not set -# CONFIG_FB_MBX is not set -# CONFIG_FB_VIRTUAL is not set -# CONFIG_BACKLIGHT_LCD_SUPPORT is not set - -# -# Display device support -# -# CONFIG_DISPLAY_SUPPORT is not set - -# -# Console display driver support -# -# CONFIG_VGA_CONSOLE is not set -CONFIG_DUMMY_CONSOLE=y -CONFIG_FRAMEBUFFER_CONSOLE=y -CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y -# CONFIG_FRAMEBUFFER_CONSOLE_ROTATION is not set -CONFIG_FONTS=y -# CONFIG_FONT_8x8 is not set -# CONFIG_FONT_8x16 is not set -CONFIG_FONT_6x11=y -# CONFIG_FONT_7x14 is not set -# CONFIG_FONT_PEARL_8x8 is not set -# CONFIG_FONT_ACORN_8x8 is not set -# CONFIG_FONT_MINI_4x6 is not set -# CONFIG_FONT_SUN8x16 is not set -# CONFIG_FONT_SUN12x22 is not set -# CONFIG_FONT_10x18 is not set -CONFIG_LOGO=y -CONFIG_LOGO_LINUX_MONO=y -CONFIG_LOGO_LINUX_VGA16=y -CONFIG_LOGO_LINUX_CLUT224=y - -# -# Sound -# -# CONFIG_SOUND is not set -# CONFIG_HID_SUPPORT is not set -# CONFIG_USB_SUPPORT is not set -# CONFIG_MMC is not set -# CONFIG_NEW_LEDS is not set -CONFIG_RTC_LIB=y -# CONFIG_RTC_CLASS is not set - -# -# File systems -# -# CONFIG_EXT2_FS is not set -# CONFIG_EXT3_FS is not set -# CONFIG_EXT4DEV_FS is not set -# CONFIG_REISERFS_FS is not set -# CONFIG_JFS_FS is not set -CONFIG_FS_POSIX_ACL=y -# CONFIG_XFS_FS is not set -# CONFIG_GFS2_FS is not set -# CONFIG_OCFS2_FS is not set -# CONFIG_MINIX_FS is not set -# CONFIG_ROMFS_FS is not set -# CONFIG_INOTIFY is not set -# CONFIG_QUOTA is not set -CONFIG_DNOTIFY=y -# CONFIG_AUTOFS_FS is not set -# CONFIG_AUTOFS4_FS is not set -# CONFIG_FUSE_FS is not set - -# -# CD-ROM/DVD Filesystems -# -# CONFIG_ISO9660_FS is not set -# CONFIG_UDF_FS is not set - -# -# DOS/FAT/NT Filesystems -# -# CONFIG_MSDOS_FS is not set -# CONFIG_VFAT_FS is not set -# CONFIG_NTFS_FS is not set - -# -# Pseudo filesystems -# -CONFIG_PROC_FS=y -CONFIG_PROC_SYSCTL=y -CONFIG_SYSFS=y -# CONFIG_TMPFS is not set -# CONFIG_HUGETLB_PAGE is not set -# CONFIG_CONFIGFS_FS is not set - -# -# Miscellaneous filesystems -# -# CONFIG_ADFS_FS is not set -# CONFIG_AFFS_FS is not set -# CONFIG_HFS_FS is not set -# CONFIG_HFSPLUS_FS is not set -# CONFIG_BEFS_FS is not set -# CONFIG_BFS_FS is not set -# CONFIG_EFS_FS is not set -# CONFIG_CRAMFS is not set -# CONFIG_VXFS_FS is not set -# CONFIG_HPFS_FS is not set -# CONFIG_QNX4FS_FS is not set -# CONFIG_SYSV_FS is not set -# CONFIG_UFS_FS is not set -CONFIG_NETWORK_FILESYSTEMS=y -CONFIG_NFS_FS=y -CONFIG_NFS_V3=y -CONFIG_NFS_V3_ACL=y -CONFIG_NFS_V4=y -CONFIG_NFS_DIRECTIO=y -# CONFIG_NFSD is not set -CONFIG_ROOT_NFS=y -CONFIG_LOCKD=y -CONFIG_LOCKD_V4=y -CONFIG_NFS_ACL_SUPPORT=y -CONFIG_NFS_COMMON=y -CONFIG_SUNRPC=y -CONFIG_SUNRPC_GSS=y -# CONFIG_SUNRPC_BIND34 is not set -CONFIG_RPCSEC_GSS_KRB5=y -# CONFIG_RPCSEC_GSS_SPKM3 is not set -# CONFIG_SMB_FS is not set -# CONFIG_CIFS is not set -# CONFIG_NCP_FS is not set -# CONFIG_CODA_FS is not set -# CONFIG_AFS_FS is not set - -# -# Partition Types -# -# CONFIG_PARTITION_ADVANCED is not set -CONFIG_MSDOS_PARTITION=y -# CONFIG_NLS is not set -# CONFIG_DLM is not set -# CONFIG_INSTRUMENTATION is not set - -# -# Kernel hacking -# -# CONFIG_PRINTK_TIME is not set -CONFIG_ENABLE_MUST_CHECK=y -# CONFIG_MAGIC_SYSRQ is not set -# CONFIG_UNUSED_SYMBOLS is not set -# CONFIG_DEBUG_FS is not set -# CONFIG_HEADERS_CHECK is not set -# CONFIG_DEBUG_KERNEL is not set -# CONFIG_SLUB_DEBUG_ON is not set -CONFIG_DEBUG_BUGVERBOSE=y -CONFIG_FRAME_POINTER=y -# CONFIG_SAMPLES is not set -CONFIG_DEBUG_USER=y - -# -# Security options -# -# CONFIG_KEYS is not set -# CONFIG_SECURITY is not set -# CONFIG_SECURITY_FILE_CAPABILITIES is not set -CONFIG_CRYPTO=y -CONFIG_CRYPTO_ALGAPI=y -CONFIG_CRYPTO_BLKCIPHER=y -CONFIG_CRYPTO_MANAGER=y -# CONFIG_CRYPTO_HMAC is not set -# CONFIG_CRYPTO_XCBC is not set -# CONFIG_CRYPTO_NULL is not set -# CONFIG_CRYPTO_MD4 is not set -CONFIG_CRYPTO_MD5=y -# CONFIG_CRYPTO_SHA1 is not set -# CONFIG_CRYPTO_SHA256 is not set -# CONFIG_CRYPTO_SHA512 is not set -# CONFIG_CRYPTO_WP512 is not set -# CONFIG_CRYPTO_TGR192 is not set -# CONFIG_CRYPTO_GF128MUL is not set -# CONFIG_CRYPTO_ECB is not set -CONFIG_CRYPTO_CBC=y -# CONFIG_CRYPTO_PCBC is not set -# CONFIG_CRYPTO_LRW is not set -# CONFIG_CRYPTO_XTS is not set -# CONFIG_CRYPTO_CRYPTD is not set -CONFIG_CRYPTO_DES=y -# CONFIG_CRYPTO_FCRYPT is not set -# CONFIG_CRYPTO_BLOWFISH is not set -# CONFIG_CRYPTO_TWOFISH is not set -# CONFIG_CRYPTO_SERPENT is not set -# CONFIG_CRYPTO_AES is not set -# CONFIG_CRYPTO_CAST5 is not set -# CONFIG_CRYPTO_CAST6 is not set -# CONFIG_CRYPTO_TEA is not set -# CONFIG_CRYPTO_ARC4 is not set -# CONFIG_CRYPTO_KHAZAD is not set -# CONFIG_CRYPTO_ANUBIS is not set -# CONFIG_CRYPTO_SEED is not set -# CONFIG_CRYPTO_DEFLATE is not set -# CONFIG_CRYPTO_MICHAEL_MIC is not set -# CONFIG_CRYPTO_CRC32C is not set -# CONFIG_CRYPTO_CAMELLIA is not set -# CONFIG_CRYPTO_TEST is not set -# CONFIG_CRYPTO_AUTHENC is not set -# CONFIG_CRYPTO_HW is not set - -# -# Library routines -# -CONFIG_BITREVERSE=y -# CONFIG_CRC_CCITT is not set -# CONFIG_CRC16 is not set -# CONFIG_CRC_ITU_T is not set -CONFIG_CRC32=y -# CONFIG_CRC7 is not set -# CONFIG_LIBCRC32C is not set -CONFIG_PLIST=y -CONFIG_HAS_IOMEM=y -CONFIG_HAS_IOPORT=y -CONFIG_HAS_DMA=y diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h index 9ed2377fe8e..d0daeab2234 100644 --- a/arch/arm/include/asm/atomic.h +++ b/arch/arm/include/asm/atomic.h @@ -19,31 +19,21 @@ #ifdef __KERNEL__ +/* + * On ARM, ordinary assignment (str instruction) doesn't clear the local + * strex/ldrex monitor on some implementations. The reason we can use it for + * atomic_set() is the clrex or dummy strex done on every exception return. + */ #define atomic_read(v) ((v)->counter) +#define atomic_set(v,i) (((v)->counter) = (i)) #if __LINUX_ARM_ARCH__ >= 6 /* * ARMv6 UP and SMP safe atomic ops. We use load exclusive and * store exclusive to ensure that these are atomic. We may loop - * to ensure that the update happens. Writing to 'v->counter' - * without using the following operations WILL break the atomic - * nature of these ops. + * to ensure that the update happens. */ -static inline void atomic_set(atomic_t *v, int i) -{ - unsigned long tmp; - - __asm__ __volatile__("@ atomic_set\n" -"1: ldrex %0, [%1]\n" -" strex %0, %2, [%1]\n" -" teq %0, #0\n" -" bne 1b" - : "=&r" (tmp) - : "r" (&v->counter), "r" (i) - : "cc"); -} - static inline void atomic_add(int i, atomic_t *v) { unsigned long tmp; @@ -163,8 +153,6 @@ static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr) #error SMP not supported on pre-ARMv6 CPUs #endif -#define atomic_set(v,i) (((v)->counter) = (i)) - static inline int atomic_add_return(int i, atomic_t *v) { unsigned long flags; diff --git a/arch/arm/include/asm/cache.h b/arch/arm/include/asm/cache.h index feaa75f0013..66c160b8547 100644 --- a/arch/arm/include/asm/cache.h +++ b/arch/arm/include/asm/cache.h @@ -4,7 +4,7 @@ #ifndef __ASMARM_CACHE_H #define __ASMARM_CACHE_H -#define L1_CACHE_SHIFT 5 +#define L1_CACHE_SHIFT CONFIG_ARM_L1_CACHE_SHIFT #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) /* diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h index b3e656c6fb7..20ae96cc002 100644 --- a/arch/arm/include/asm/cputype.h +++ b/arch/arm/include/asm/cputype.h @@ -63,6 +63,11 @@ static inline unsigned int __attribute_const__ read_cpuid_cachetype(void) return read_cpuid(CPUID_CACHETYPE); } +static inline unsigned int __attribute_const__ read_cpuid_tcmstatus(void) +{ + return read_cpuid(CPUID_TCM); +} + /* * Intel's XScale3 core supports some v6 features (supersections, L2) * but advertises itself as v5 as it does not support the v6 ISA. For @@ -73,7 +78,10 @@ static inline unsigned int __attribute_const__ read_cpuid_cachetype(void) #else static inline int cpu_is_xsc3(void) { - if ((read_cpuid_id() & 0xffffe000) == 0x69056000) + unsigned int id; + id = read_cpuid_id() & 0xffffe000; + /* It covers both Intel ID and Marvell ID */ + if ((id == 0x69056000) || (id == 0x56056000)) return 1; return 0; diff --git a/arch/arm/include/asm/hardware/iop3xx-adma.h b/arch/arm/include/asm/hardware/iop3xx-adma.h index 83e6ba338e2..1a8c7279a28 100644 --- a/arch/arm/include/asm/hardware/iop3xx-adma.h +++ b/arch/arm/include/asm/hardware/iop3xx-adma.h @@ -187,11 +187,74 @@ union iop3xx_desc { void *ptr; }; +/* No support for p+q operations */ +static inline int +iop_chan_pq_slot_count(size_t len, int src_cnt, int *slots_per_op) +{ + BUG(); + return 0; +} + +static inline void +iop_desc_init_pq(struct iop_adma_desc_slot *desc, int src_cnt, + unsigned long flags) +{ + BUG(); +} + +static inline void +iop_desc_set_pq_addr(struct iop_adma_desc_slot *desc, dma_addr_t *addr) +{ + BUG(); +} + +static inline void +iop_desc_set_pq_src_addr(struct iop_adma_desc_slot *desc, int src_idx, + dma_addr_t addr, unsigned char coef) +{ + BUG(); +} + +static inline int +iop_chan_pq_zero_sum_slot_count(size_t len, int src_cnt, int *slots_per_op) +{ + BUG(); + return 0; +} + +static inline void +iop_desc_init_pq_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt, + unsigned long flags) +{ + BUG(); +} + +static inline void +iop_desc_set_pq_zero_sum_byte_count(struct iop_adma_desc_slot *desc, u32 len) +{ + BUG(); +} + +#define iop_desc_set_pq_zero_sum_src_addr iop_desc_set_pq_src_addr + +static inline void +iop_desc_set_pq_zero_sum_addr(struct iop_adma_desc_slot *desc, int pq_idx, + dma_addr_t *src) +{ + BUG(); +} + static inline int iop_adma_get_max_xor(void) { return 32; } +static inline int iop_adma_get_max_pq(void) +{ + BUG(); + return 0; +} + static inline u32 iop_chan_get_current_descriptor(struct iop_adma_chan *chan) { int id = chan->device->id; @@ -332,6 +395,11 @@ static inline int iop_chan_zero_sum_slot_count(size_t len, int src_cnt, return slot_cnt; } +static inline int iop_desc_is_pq(struct iop_adma_desc_slot *desc) +{ + return 0; +} + static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc, struct iop_adma_chan *chan) { @@ -349,6 +417,14 @@ static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc, return 0; } + +static inline u32 iop_desc_get_qdest_addr(struct iop_adma_desc_slot *desc, + struct iop_adma_chan *chan) +{ + BUG(); + return 0; +} + static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc, struct iop_adma_chan *chan) { @@ -756,13 +832,14 @@ static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc, hw_desc->src[0] = val; } -static inline int iop_desc_get_zero_result(struct iop_adma_desc_slot *desc) +static inline enum sum_check_flags +iop_desc_get_zero_result(struct iop_adma_desc_slot *desc) { struct iop3xx_desc_aau *hw_desc = desc->hw_desc; struct iop3xx_aau_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field; iop_paranoia(!(desc_ctrl.tx_complete && desc_ctrl.zero_result_en)); - return desc_ctrl.zero_result_err; + return desc_ctrl.zero_result_err << SUM_CHECK_P; } static inline void iop_chan_append(struct iop_adma_chan *chan) diff --git a/arch/arm/include/asm/hardware/iop_adma.h b/arch/arm/include/asm/hardware/iop_adma.h index 385c6e8cbbd..59b8c3892f7 100644 --- a/arch/arm/include/asm/hardware/iop_adma.h +++ b/arch/arm/include/asm/hardware/iop_adma.h @@ -86,6 +86,7 @@ struct iop_adma_chan { * @idx: pool index * @unmap_src_cnt: number of xor sources * @unmap_len: transaction bytecount + * @tx_list: list of descriptors that are associated with one operation * @async_tx: support for the async_tx api * @group_list: list of slots that make up a multi-descriptor transaction * for example transfer lengths larger than the supported hw max @@ -102,10 +103,12 @@ struct iop_adma_desc_slot { u16 idx; u16 unmap_src_cnt; size_t unmap_len; + struct list_head tx_list; struct dma_async_tx_descriptor async_tx; union { u32 *xor_check_result; u32 *crc32_result; + u32 *pq_check_result; }; }; diff --git a/arch/arm/include/asm/tcm.h b/arch/arm/include/asm/tcm.h new file mode 100644 index 00000000000..5929ef5d927 --- /dev/null +++ b/arch/arm/include/asm/tcm.h @@ -0,0 +1,31 @@ +/* + * + * Copyright (C) 2008-2009 ST-Ericsson AB + * License terms: GNU General Public License (GPL) version 2 + * + * Author: Rickard Andersson <rickard.andersson@stericsson.com> + * Author: Linus Walleij <linus.walleij@stericsson.com> + * + */ +#ifndef __ASMARM_TCM_H +#define __ASMARM_TCM_H + +#ifndef CONFIG_HAVE_TCM +#error "You should not be including tcm.h unless you have a TCM!" +#endif + +#include <linux/compiler.h> + +/* Tag variables with this */ +#define __tcmdata __section(.tcm.data) +/* Tag constants with this */ +#define __tcmconst __section(.tcm.rodata) +/* Tag functions inside TCM called from outside TCM with this */ +#define __tcmfunc __attribute__((long_call)) __section(.tcm.text) noinline +/* Tag function inside TCM called from inside TCM with this */ +#define __tcmlocalfunc __section(.tcm.text) + +void *tcm_alloc(size_t len); +void tcm_free(void *addr, size_t len); + +#endif diff --git a/arch/arm/include/asm/unified.h b/arch/arm/include/asm/unified.h index 073e85b9b96..bc631161e9c 100644 --- a/arch/arm/include/asm/unified.h +++ b/arch/arm/include/asm/unified.h @@ -35,7 +35,9 @@ #define ARM(x...) #define THUMB(x...) x +#ifdef __ASSEMBLY__ #define W(instr) instr.w +#endif #define BSYM(sym) sym + 1 #else /* !CONFIG_THUMB2_KERNEL */ @@ -45,7 +47,9 @@ #define ARM(x...) x #define THUMB(x...) +#ifdef __ASSEMBLY__ #define W(instr) instr +#endif #define BSYM(sym) sym #endif /* CONFIG_THUMB2_KERNEL */ diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index c446aeff7b8..79087dd6d86 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -35,6 +35,7 @@ obj-$(CONFIG_OABI_COMPAT) += sys_oabi-compat.o obj-$(CONFIG_ARM_THUMBEE) += thumbee.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_ARM_UNWIND) += unwind.o +obj-$(CONFIG_HAVE_TCM) += tcm.o obj-$(CONFIG_CRUNCH) += crunch.o crunch-bits.o AFLAGS_crunch-bits.o := -Wa,-mcpu=ep9312 diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 3d727a8a23b..0a2ba51cf35 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -272,7 +272,15 @@ __und_svc: @ @ r0 - instruction @ +#ifndef CONFIG_THUMB2_KERNEL ldr r0, [r2, #-4] +#else + ldrh r0, [r2, #-2] @ Thumb instruction at LR - 2 + and r9, r0, #0xf800 + cmp r9, #0xe800 @ 32-bit instruction if xx >= 0 + ldrhhs r9, [r2] @ bottom 16 bits + orrhs r0, r9, r0, lsl #16 +#endif adr r9, BSYM(1f) bl call_fpe @@ -678,7 +686,9 @@ ENTRY(fp_enter) .word no_fp .previous -no_fp: mov pc, lr +ENTRY(no_fp) + mov pc, lr +ENDPROC(no_fp) __und_usr_unknown: enable_irq @@ -734,13 +744,6 @@ ENTRY(__switch_to) #ifdef CONFIG_MMU ldr r6, [r2, #TI_CPU_DOMAIN] #endif -#if __LINUX_ARM_ARCH__ >= 6 -#ifdef CONFIG_CPU_32v6K - clrex -#else - strex r5, r4, [ip] @ Clear exclusive monitor -#endif -#endif #if defined(CONFIG_HAS_TLS_REG) mcr p15, 0, r3, c13, c0, 3 @ set TLS register #elif !defined(CONFIG_TLS_REG_EMUL) diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S index a4eaf4f920c..e17e3c30d95 100644 --- a/arch/arm/kernel/entry-header.S +++ b/arch/arm/kernel/entry-header.S @@ -76,13 +76,25 @@ #ifndef CONFIG_THUMB2_KERNEL .macro svc_exit, rpsr msr spsr_cxsf, \rpsr +#if defined(CONFIG_CPU_32v6K) + clrex @ clear the exclusive monitor ldmia sp, {r0 - pc}^ @ load r0 - pc, cpsr +#elif defined (CONFIG_CPU_V6) + ldr r0, [sp] + strex r1, r2, [sp] @ clear the exclusive monitor + ldmib sp, {r1 - pc}^ @ load r1 - pc, cpsr +#endif .endm .macro restore_user_regs, fast = 0, offset = 0 ldr r1, [sp, #\offset + S_PSR] @ get calling cpsr ldr lr, [sp, #\offset + S_PC]! @ get pc msr spsr_cxsf, r1 @ save in spsr_svc +#if defined(CONFIG_CPU_32v6K) + clrex @ clear the exclusive monitor +#elif defined (CONFIG_CPU_V6) + strex r1, r2, [sp] @ clear the exclusive monitor +#endif .if \fast ldmdb sp, {r1 - lr}^ @ get calling r1 - lr .else @@ -98,6 +110,7 @@ .endm #else /* CONFIG_THUMB2_KERNEL */ .macro svc_exit, rpsr + clrex @ clear the exclusive monitor ldr r0, [sp, #S_SP] @ top of the stack ldr r1, [sp, #S_PC] @ return address tst r0, #4 @ orig stack 8-byte aligned? @@ -110,6 +123,7 @@ .endm .macro restore_user_regs, fast = 0, offset = 0 + clrex @ clear the exclusive monitor mov r2, sp load_user_sp_lr r2, r3, \offset + S_SP @ calling sp, lr ldr r1, [sp, #\offset + S_PSR] @ get calling cpsr diff --git a/arch/arm/kernel/kprobes.c b/arch/arm/kernel/kprobes.c index f692efddd44..60c62c377fa 100644 --- a/arch/arm/kernel/kprobes.c +++ b/arch/arm/kernel/kprobes.c @@ -22,6 +22,7 @@ #include <linux/kernel.h> #include <linux/kprobes.h> #include <linux/module.h> +#include <linux/stop_machine.h> #include <linux/stringify.h> #include <asm/traps.h> #include <asm/cacheflush.h> @@ -83,10 +84,24 @@ void __kprobes arch_arm_kprobe(struct kprobe *p) flush_insns(p->addr, 1); } +/* + * The actual disarming is done here on each CPU and synchronized using + * stop_machine. This synchronization is necessary on SMP to avoid removing + * a probe between the moment the 'Undefined Instruction' exception is raised + * and the moment the exception handler reads the faulting instruction from + * memory. + */ +int __kprobes __arch_disarm_kprobe(void *p) +{ + struct kprobe *kp = p; + *kp->addr = kp->opcode; + flush_insns(kp->addr, 1); + return 0; +} + void __kprobes arch_disarm_kprobe(struct kprobe *p) { - *p->addr = p->opcode; - flush_insns(p->addr, 1); + stop_machine(__arch_disarm_kprobe, p, &cpu_online_map); } void __kprobes arch_remove_kprobe(struct kprobe *p) diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index d4d4f77c91b..c6c57b640b6 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -45,6 +45,7 @@ #include "compat.h" #include "atags.h" +#include "tcm.h" #ifndef MEM_SIZE #define MEM_SIZE (16*1024*1024) @@ -749,6 +750,7 @@ void __init setup_arch(char **cmdline_p) #endif cpu_init(); + tcm_init(); /* * Set up various architecture-specific pointers diff --git a/arch/arm/kernel/tcm.c b/arch/arm/kernel/tcm.c new file mode 100644 index 00000000000..e50303868f1 --- /dev/null +++ b/arch/arm/kernel/tcm.c @@ -0,0 +1,246 @@ +/* + * Copyright (C) 2008-2009 ST-Ericsson AB + * License terms: GNU General Public License (GPL) version 2 + * TCM memory handling for ARM systems + * + * Author: Linus Walleij <linus.walleij@stericsson.com> + * Author: Rickard Andersson <rickard.andersson@stericsson.com> + */ +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/stddef.h> +#include <linux/ioport.h> +#include <linux/genalloc.h> +#include <linux/string.h> /* memcpy */ +#include <asm/page.h> /* PAGE_SHIFT */ +#include <asm/cputype.h> +#include <asm/mach/map.h> +#include <mach/memory.h> +#include "tcm.h" + +/* Scream and warn about misuse */ +#if !defined(ITCM_OFFSET) || !defined(ITCM_END) || \ + !defined(DTCM_OFFSET) || !defined(DTCM_END) +#error "TCM support selected but offsets not defined!" +#endif + +static struct gen_pool *tcm_pool; + +/* TCM section definitions from the linker */ +extern char __itcm_start, __sitcm_text, __eitcm_text; +extern char __dtcm_start, __sdtcm_data, __edtcm_data; + +/* + * TCM memory resources + */ +static struct resource dtcm_res = { + .name = "DTCM RAM", + .start = DTCM_OFFSET, + .end = DTCM_END, + .flags = IORESOURCE_MEM +}; + +static struct resource itcm_res = { + .name = "ITCM RAM", + .start = ITCM_OFFSET, + .end = ITCM_END, + .flags = IORESOURCE_MEM +}; + +static struct map_desc dtcm_iomap[] __initdata = { + { + .virtual = DTCM_OFFSET, + .pfn = __phys_to_pfn(DTCM_OFFSET), + .length = (DTCM_END - DTCM_OFFSET + 1), + .type = MT_UNCACHED + } +}; + +static struct map_desc itcm_iomap[] __initdata = { + { + .virtual = ITCM_OFFSET, + .pfn = __phys_to_pfn(ITCM_OFFSET), + .length = (ITCM_END - ITCM_OFFSET + 1), + .type = MT_UNCACHED + } +}; + +/* + * Allocate a chunk of TCM memory + */ +void *tcm_alloc(size_t len) +{ + unsigned long vaddr; + + if (!tcm_pool) + return NULL; + + vaddr = gen_pool_alloc(tcm_pool, len); + if (!vaddr) + return NULL; + + return (void *) vaddr; +} +EXPORT_SYMBOL(tcm_alloc); + +/* + * Free a chunk of TCM memory + */ +void tcm_free(void *addr, size_t len) +{ + gen_pool_free(tcm_pool, (unsigned long) addr, len); +} +EXPORT_SYMBOL(tcm_free); + + +static void __init setup_tcm_bank(u8 type, u32 offset, u32 expected_size) +{ + const int tcm_sizes[16] = { 0, -1, -1, 4, 8, 16, 32, 64, 128, + 256, 512, 1024, -1, -1, -1, -1 }; + u32 tcm_region; + int tcm_size; + + /* Read the special TCM region register c9, 0 */ + if (!type) + asm("mrc p15, 0, %0, c9, c1, 0" + : "=r" (tcm_region)); + else + asm("mrc p15, 0, %0, c9, c1, 1" + : "=r" (tcm_region)); + + tcm_size = tcm_sizes[(tcm_region >> 2) & 0x0f]; + if (tcm_size < 0) { + pr_err("CPU: %sTCM of unknown size!\n", + type ? "I" : "D"); + } else { + pr_info("CPU: found %sTCM %dk @ %08x, %senabled\n", + type ? "I" : "D", + tcm_size, + (tcm_region & 0xfffff000U), + (tcm_region & 1) ? "" : "not "); + } + + if (tcm_size != expected_size) { + pr_crit("CPU: %sTCM was detected %dk but expected %dk!\n", + type ? "I" : "D", + tcm_size, + expected_size); + /* Adjust to the expected size? what can we do... */ + } + + /* Force move the TCM bank to where we want it, enable */ + tcm_region = offset | (tcm_region & 0x00000ffeU) | 1; + + if (!type) + asm("mcr p15, 0, %0, c9, c1, 0" + : /* No output operands */ + : "r" (tcm_region)); + else + asm("mcr p15, 0, %0, c9, c1, 1" + : /* No output operands */ + : "r" (tcm_region)); + + pr_debug("CPU: moved %sTCM %dk to %08x, enabled\n", + type ? "I" : "D", + tcm_size, + (tcm_region & 0xfffff000U)); +} + +/* + * This initializes the TCM memory + */ +void __init tcm_init(void) +{ + u32 tcm_status = read_cpuid_tcmstatus(); + char *start; + char *end; + char *ram; + + /* Setup DTCM if present */ + if (tcm_status & (1 << 16)) { + setup_tcm_bank(0, DTCM_OFFSET, + (DTCM_END - DTCM_OFFSET + 1) >> 10); + request_resource(&iomem_resource, &dtcm_res); + iotable_init(dtcm_iomap, 1); + /* Copy data from RAM to DTCM */ + start = &__sdtcm_data; + end = &__edtcm_data; + ram = &__dtcm_start; + memcpy(start, ram, (end-start)); + pr_debug("CPU DTCM: copied data from %p - %p\n", start, end); + } + + /* Setup ITCM if present */ + if (tcm_status & 1) { + setup_tcm_bank(1, ITCM_OFFSET, + (ITCM_END - ITCM_OFFSET + 1) >> 10); + request_resource(&iomem_resource, &itcm_res); + iotable_init(itcm_iomap, 1); + /* Copy code from RAM to ITCM */ + start = &__sitcm_text; + end = &__eitcm_text; + ram = &__itcm_start; + memcpy(start, ram, (end-start)); + pr_debug("CPU ITCM: copied code from %p - %p\n", start, end); + } +} + +/* + * This creates the TCM memory pool and has to be done later, + * during the core_initicalls, since the allocator is not yet + * up and running when the first initialization runs. + */ +static int __init setup_tcm_pool(void) +{ + u32 tcm_status = read_cpuid_tcmstatus(); + u32 dtcm_pool_start = (u32) &__edtcm_data; + u32 itcm_pool_start = (u32) &__eitcm_text; + int ret; + + /* + * Set up malloc pool, 2^2 = 4 bytes granularity since + * the TCM is sometimes just 4 KiB. NB: pages and cache + * line alignments does not matter in TCM! + */ + tcm_pool = gen_pool_create(2, -1); + + pr_debug("Setting up TCM memory pool\n"); + + /* Add the rest of DTCM to the TCM pool */ + if (tcm_status & (1 << 16)) { + if (dtcm_pool_start < DTCM_END) { + ret = gen_pool_add(tcm_pool, dtcm_pool_start, + DTCM_END - dtcm_pool_start + 1, -1); + if (ret) { + pr_err("CPU DTCM: could not add DTCM " \ + "remainder to pool!\n"); + return ret; + } + pr_debug("CPU DTCM: Added %08x bytes @ %08x to " \ + "the TCM memory pool\n", + DTCM_END - dtcm_pool_start + 1, + dtcm_pool_start); + } + } + + /* Add the rest of ITCM to the TCM pool */ + if (tcm_status & 1) { + if (itcm_pool_start < ITCM_END) { + ret = gen_pool_add(tcm_pool, itcm_pool_start, + ITCM_END - itcm_pool_start + 1, -1); + if (ret) { + pr_err("CPU ITCM: could not add ITCM " \ + "remainder to pool!\n"); + return ret; + } + pr_debug("CPU ITCM: Added %08x bytes @ %08x to " \ + "the TCM memory pool\n", + ITCM_END - itcm_pool_start + 1, + itcm_pool_start); + } + } + return 0; +} + +core_initcall(setup_tcm_pool); diff --git a/arch/arm/kernel/tcm.h b/arch/arm/kernel/tcm.h new file mode 100644 index 00000000000..8015ad434a4 --- /dev/null +++ b/arch/arm/kernel/tcm.h @@ -0,0 +1,17 @@ +/* + * Copyright (C) 2008-2009 ST-Ericsson AB + * License terms: GNU General Public License (GPL) version 2 + * TCM memory handling for ARM systems + * + * Author: Linus Walleij <linus.walleij@stericsson.com> + * Author: Rickard Andersson <rickard.andersson@stericsson.com> + */ + +#ifdef CONFIG_HAVE_TCM +void __init tcm_init(void); +#else +/* No TCM support, just blank inlines to be optimized out */ +inline void tcm_init(void) +{ +} +#endif diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index 5cc4812c976..aecf87dfbae 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -199,6 +199,63 @@ SECTIONS } _edata_loc = __data_loc + SIZEOF(.data); +#ifdef CONFIG_HAVE_TCM + /* + * We align everything to a page boundary so we can + * free it after init has commenced and TCM contents have + * been copied to its destination. + */ + .tcm_start : { + . = ALIGN(PAGE_SIZE); + __tcm_start = .; + __itcm_start = .; + } + + /* + * Link these to the ITCM RAM + * Put VMA to the TCM address and LMA to the common RAM + * and we'll upload the contents from RAM to TCM and free + * the used RAM after that. + */ + .text_itcm ITCM_OFFSET : AT(__itcm_start) + { + __sitcm_text = .; + *(.tcm.text) + *(.tcm.rodata) + . = ALIGN(4); + __eitcm_text = .; + } + + /* + * Reset the dot pointer, this is needed to create the + * relative __dtcm_start below (to be used as extern in code). + */ + . = ADDR(.tcm_start) + SIZEOF(.tcm_start) + SIZEOF(.text_itcm); + + .dtcm_start : { + __dtcm_start = .; + } + + /* TODO: add remainder of ITCM as well, that can be used for data! */ + .data_dtcm DTCM_OFFSET : AT(__dtcm_start) + { + . = ALIGN(4); + __sdtcm_data = .; + *(.tcm.data) + . = ALIGN(4); + __edtcm_data = .; + } + + /* Reset the dot pointer or the linker gets confused */ + . = ADDR(.dtcm_start) + SIZEOF(.data_dtcm); + + /* End marker for freeing TCM copy in linked object */ + .tcm_end : AT(ADDR(.dtcm_start) + SIZEOF(.data_dtcm)){ + . = ALIGN(PAGE_SIZE); + __tcm_end = .; + } +#endif + .bss : { __bss_start = .; /* BSS */ *(.bss) diff --git a/arch/arm/lib/copy_page.S b/arch/arm/lib/copy_page.S index 6ae04db1ca4..6ee2f6706f8 100644 --- a/arch/arm/lib/copy_page.S +++ b/arch/arm/lib/copy_page.S @@ -12,8 +12,9 @@ #include <linux/linkage.h> #include <asm/assembler.h> #include <asm/asm-offsets.h> +#include <asm/cache.h> -#define COPY_COUNT (PAGE_SZ/64 PLD( -1 )) +#define COPY_COUNT (PAGE_SZ / (2 * L1_CACHE_BYTES) PLD( -1 )) .text .align 5 @@ -26,17 +27,16 @@ ENTRY(copy_page) stmfd sp!, {r4, lr} @ 2 PLD( pld [r1, #0] ) - PLD( pld [r1, #32] ) + PLD( pld [r1, #L1_CACHE_BYTES] ) mov r2, #COPY_COUNT @ 1 ldmia r1!, {r3, r4, ip, lr} @ 4+1 -1: PLD( pld [r1, #64] ) - PLD( pld [r1, #96] ) -2: stmia r0!, {r3, r4, ip, lr} @ 4 - ldmia r1!, {r3, r4, ip, lr} @ 4+1 - stmia r0!, {r3, r4, ip, lr} @ 4 - ldmia r1!, {r3, r4, ip, lr} @ 4+1 +1: PLD( pld [r1, #2 * L1_CACHE_BYTES]) + PLD( pld [r1, #3 * L1_CACHE_BYTES]) +2: + .rept (2 * L1_CACHE_BYTES / 16 - 1) stmia r0!, {r3, r4, ip, lr} @ 4 ldmia r1!, {r3, r4, ip, lr} @ 4 + .endr subs r2, r2, #1 @ 1 stmia r0!, {r3, r4, ip, lr} @ 4 ldmgtia r1!, {r3, r4, ip, lr} @ 4 diff --git a/arch/arm/mach-at91/at91cap9_devices.c b/arch/arm/mach-at91/at91cap9_devices.c index 412aa49ad2f..d1f775e8635 100644 --- a/arch/arm/mach-at91/at91cap9_devices.c +++ b/arch/arm/mach-at91/at91cap9_devices.c @@ -771,9 +771,9 @@ void __init at91_add_device_pwm(u32 mask) {} * AC97 * -------------------------------------------------------------------- */ -#if defined(CONFIG_SND_AT91_AC97) || defined(CONFIG_SND_AT91_AC97_MODULE) +#if defined(CONFIG_SND_ATMEL_AC97C) || defined(CONFIG_SND_ATMEL_AC97C_MODULE) static u64 ac97_dmamask = DMA_BIT_MASK(32); -static struct atmel_ac97_data ac97_data; +static struct ac97c_platform_data ac97_data; static struct resource ac97_resources[] = { [0] = { @@ -789,7 +789,7 @@ static struct resource ac97_resources[] = { }; static struct platform_device at91cap9_ac97_device = { - .name = "ac97c", + .name = "atmel_ac97c", .id = 1, .dev = { .dma_mask = &ac97_dmamask, @@ -800,7 +800,7 @@ static struct platform_device at91cap9_ac97_device = { .num_resources = ARRAY_SIZE(ac97_resources), }; -void __init at91_add_device_ac97(struct atmel_ac97_data *data) +void __init at91_add_device_ac97(struct ac97c_platform_data *data) { if (!data) return; @@ -818,7 +818,7 @@ void __init at91_add_device_ac97(struct atmel_ac97_data *data) platform_device_register(&at91cap9_ac97_device); } #else -void __init at91_add_device_ac97(struct atmel_ac97_data *data) {} +void __init at91_add_device_ac97(struct ac97c_platform_data *data) {} #endif diff --git a/arch/arm/mach-at91/at91sam9g45_devices.c b/arch/arm/mach-at91/at91sam9g45_devices.c index d746e8621bc..d581cff80c4 100644 --- a/arch/arm/mach-at91/at91sam9g45_devices.c +++ b/arch/arm/mach-at91/at91sam9g45_devices.c @@ -24,11 +24,59 @@ #include <mach/at91sam9g45.h> #include <mach/at91sam9g45_matrix.h> #include <mach/at91sam9_smc.h> +#include <mach/at_hdmac.h> #include "generic.h" /* -------------------------------------------------------------------- + * HDMAC - AHB DMA Controller + * -------------------------------------------------------------------- */ + +#if defined(CONFIG_AT_HDMAC) || defined(CONFIG_AT_HDMAC_MODULE) +static u64 hdmac_dmamask = DMA_BIT_MASK(32); + +static struct at_dma_platform_data atdma_pdata = { + .nr_channels = 8, +}; + +static struct resource hdmac_resources[] = { + [0] = { + .start = AT91_BASE_SYS + AT91_DMA, + .end = AT91_BASE_SYS + AT91_DMA + SZ_512 - 1, + .flags = IORESOURCE_MEM, + }, + [2] = { + .start = AT91SAM9G45_ID_DMA, + .end = AT91SAM9G45_ID_DMA, + .flags = IORESOURCE_IRQ, + }, +}; + +static struct platform_device at_hdmac_device = { + .name = "at_hdmac", + .id = -1, + .dev = { + .dma_mask = &hdmac_dmamask, + .coherent_dma_mask = DMA_BIT_MASK(32), + .platform_data = &atdma_pdata, + }, + .resource = hdmac_resources, + .num_resources = ARRAY_SIZE(hdmac_resources), +}; + +void __init at91_add_device_hdmac(void) +{ + dma_cap_set(DMA_MEMCPY, atdma_pdata.cap_mask); + dma_cap_set(DMA_SLAVE, atdma_pdata.cap_mask); + platform_device_register(&at_hdmac_device); +} +#else +void __init at91_add_device_hdmac(void) {} +#endif + + +/* -------------------------------------------------------------------- * USB Host (OHCI) * -------------------------------------------------------------------- */ @@ -550,6 +598,61 @@ void __init at91_add_device_spi(struct spi_board_info *devices, int nr_devices) /* -------------------------------------------------------------------- + * AC97 + * -------------------------------------------------------------------- */ + +#if defined(CONFIG_SND_ATMEL_AC97C) || defined(CONFIG_SND_ATMEL_AC97C_MODULE) +static u64 ac97_dmamask = DMA_BIT_MASK(32); +static struct ac97c_platform_data ac97_data; + +static struct resource ac97_resources[] = { + [0] = { + .start = AT91SAM9G45_BASE_AC97C, + .end = AT91SAM9G45_BASE_AC97C + SZ_16K - 1, + .flags = IORESOURCE_MEM, + }, + [1] = { + .start = AT91SAM9G45_ID_AC97C, + .end = AT91SAM9G45_ID_AC97C, + .flags = IORESOURCE_IRQ, + }, +}; + +static struct platform_device at91sam9g45_ac97_device = { + .name = "atmel_ac97c", + .id = 0, + .dev = { + .dma_mask = &ac97_dmamask, + .coherent_dma_mask = DMA_BIT_MASK(32), + .platform_data = &ac97_data, + }, + .resource = ac97_resources, + .num_resources = ARRAY_SIZE(ac97_resources), +}; + +void __init at91_add_device_ac97(struct ac97c_platform_data *data) +{ + if (!data) + return; + + at91_set_A_periph(AT91_PIN_PD8, 0); /* AC97FS */ + at91_set_A_periph(AT91_PIN_PD9, 0); /* AC97CK */ + at91_set_A_periph(AT91_PIN_PD7, 0); /* AC97TX */ + at91_set_A_periph(AT91_PIN_PD6, 0); /* AC97RX */ + + /* reset */ + if (data->reset_pin) + at91_set_gpio_output(data->reset_pin, 0); + + ac97_data = *data; + platform_device_register(&at91sam9g45_ac97_device); +} +#else +void __init at91_add_device_ac97(struct ac97c_platform_data *data) {} +#endif + + +/* -------------------------------------------------------------------- * LCD Controller * -------------------------------------------------------------------- */ @@ -1220,6 +1323,7 @@ void __init at91_add_device_serial(void) {} */ static int __init at91_add_standard_devices(void) { + at91_add_device_hdmac(); at91_add_device_rtc(); at91_add_device_rtt(); at91_add_device_watchdog(); diff --git a/arch/arm/mach-at91/at91sam9rl_devices.c b/arch/arm/mach-at91/at91sam9rl_devices.c index 728186515cd..d345f5453db 100644 --- a/arch/arm/mach-at91/at91sam9rl_devices.c +++ b/arch/arm/mach-at91/at91sam9rl_devices.c @@ -21,11 +21,57 @@ #include <mach/at91sam9rl.h> #include <mach/at91sam9rl_matrix.h> #include <mach/at91sam9_smc.h> +#include <mach/at_hdmac.h> #include "generic.h" /* -------------------------------------------------------------------- + * HDMAC - AHB DMA Controller + * -------------------------------------------------------------------- */ + +#if defined(CONFIG_AT_HDMAC) || defined(CONFIG_AT_HDMAC_MODULE) +static u64 hdmac_dmamask = DMA_BIT_MASK(32); + +static struct at_dma_platform_data atdma_pdata = { + .nr_channels = 2, +}; + +static struct resource hdmac_resources[] = { + [0] = { + .start = AT91_BASE_SYS + AT91_DMA, + .end = AT91_BASE_SYS + AT91_DMA + SZ_512 - 1, + .flags = IORESOURCE_MEM, + }, + [2] = { + .start = AT91SAM9RL_ID_DMA, + .end = AT91SAM9RL_ID_DMA, + .flags = IORESOURCE_IRQ, + }, +}; + +static struct platform_device at_hdmac_device = { + .name = "at_hdmac", + .id = -1, + .dev = { + .dma_mask = &hdmac_dmamask, + .coherent_dma_mask = DMA_BIT_MASK(32), + .platform_data = &atdma_pdata, + }, + .resource = hdmac_resources, + .num_resources = ARRAY_SIZE(hdmac_resources), +}; + +void __init at91_add_device_hdmac(void) +{ + dma_cap_set(DMA_MEMCPY, atdma_pdata.cap_mask); + platform_device_register(&at_hdmac_device); +} +#else +void __init at91_add_device_hdmac(void) {} +#endif + +/* -------------------------------------------------------------------- * USB HS Device (Gadget) * -------------------------------------------------------------------- */ @@ -398,6 +444,61 @@ void __init at91_add_device_spi(struct spi_board_info *devices, int nr_devices) /* -------------------------------------------------------------------- + * AC97 + * -------------------------------------------------------------------- */ + +#if defined(CONFIG_SND_ATMEL_AC97C) || defined(CONFIG_SND_ATMEL_AC97C_MODULE) +static u64 ac97_dmamask = DMA_BIT_MASK(32); +static struct ac97c_platform_data ac97_data; + +static struct resource ac97_resources[] = { + [0] = { + .start = AT91SAM9RL_BASE_AC97C, + .end = AT91SAM9RL_BASE_AC97C + SZ_16K - 1, + .flags = IORESOURCE_MEM, + }, + [1] = { + .start = AT91SAM9RL_ID_AC97C, + .end = AT91SAM9RL_ID_AC97C, + .flags = IORESOURCE_IRQ, + }, +}; + +static struct platform_device at91sam9rl_ac97_device = { + .name = "atmel_ac97c", + .id = 0, + .dev = { + .dma_mask = &ac97_dmamask, + .coherent_dma_mask = DMA_BIT_MASK(32), + .platform_data = &ac97_data, + }, + .resource = ac97_resources, + .num_resources = ARRAY_SIZE(ac97_resources), +}; + +void __init at91_add_device_ac97(struct ac97c_platform_data *data) +{ + if (!data) + return; + + at91_set_A_periph(AT91_PIN_PD1, 0); /* AC97FS */ + at91_set_A_periph(AT91_PIN_PD2, 0); /* AC97CK */ + at91_set_A_periph(AT91_PIN_PD3, 0); /* AC97TX */ + at91_set_A_periph(AT91_PIN_PD4, 0); /* AC97RX */ + + /* reset */ + if (data->reset_pin) + at91_set_gpio_output(data->reset_pin, 0); + + ac97_data = *data; + platform_device_register(&at91sam9rl_ac97_device); +} +#else +void __init at91_add_device_ac97(struct ac97c_platform_data *data) {} +#endif + + +/* -------------------------------------------------------------------- * LCD Controller * -------------------------------------------------------------------- */ @@ -1103,6 +1204,7 @@ void __init at91_add_device_serial(void) {} */ static int __init at91_add_standard_devices(void) { + at91_add_device_hdmac(); at91_add_device_rtc(); at91_add_device_rtt(); at91_add_device_watchdog(); diff --git a/arch/arm/mach-at91/board-cap9adk.c b/arch/arm/mach-at91/board-cap9adk.c index 83a1a0fef47..d6940870e40 100644 --- a/arch/arm/mach-at91/board-cap9adk.c +++ b/arch/arm/mach-at91/board-cap9adk.c @@ -364,7 +364,7 @@ static struct atmel_lcdfb_info __initdata cap9adk_lcdc_data; /* * AC97 */ -static struct atmel_ac97_data cap9adk_ac97_data = { +static struct ac97c_platform_data cap9adk_ac97_data = { // .reset_pin = ... not connected }; diff --git a/arch/arm/mach-at91/board-neocore926.c b/arch/arm/mach-at91/board-neocore926.c index 8c0b71c95be..7c1e382330f 100644 --- a/arch/arm/mach-at91/board-neocore926.c +++ b/arch/arm/mach-at91/board-neocore926.c @@ -340,7 +340,7 @@ static void __init neocore926_add_device_buttons(void) {} /* * AC97 */ -static struct atmel_ac97_data neocore926_ac97_data = { +static struct ac97c_platform_data neocore926_ac97_data = { .reset_pin = AT91_PIN_PA13, }; diff --git a/arch/arm/mach-at91/board-sam9m10g45ek.c b/arch/arm/mach-at91/board-sam9m10g45ek.c index b8558eae522..64c3843f323 100644 --- a/arch/arm/mach-at91/board-sam9m10g45ek.c +++ b/arch/arm/mach-at91/board-sam9m10g45ek.c @@ -311,6 +311,14 @@ static void __init ek_add_device_buttons(void) {} /* + * AC97 + * reset_pin is not connected: NRST + */ +static struct ac97c_platform_data ek_ac97_data = { +}; + + +/* * LEDs ... these could all be PWM-driven, for variable brightness */ static struct gpio_led ek_leds[] = { @@ -372,6 +380,8 @@ static void __init ek_board_init(void) at91_add_device_lcdc(&ek_lcdc_data); /* Push Buttons */ ek_add_device_buttons(); + /* AC97 */ + at91_add_device_ac97(&ek_ac97_data); /* LEDs */ at91_gpio_leds(ek_leds, ARRAY_SIZE(ek_leds)); at91_pwm_leds(ek_pwm_led, ARRAY_SIZE(ek_pwm_led)); diff --git a/arch/arm/mach-at91/board-sam9rlek.c b/arch/arm/mach-at91/board-sam9rlek.c index 94ffb5c103b..bd28e989e54 100644 --- a/arch/arm/mach-at91/board-sam9rlek.c +++ b/arch/arm/mach-at91/board-sam9rlek.c @@ -211,6 +211,14 @@ static struct atmel_lcdfb_info __initdata ek_lcdc_data; /* + * AC97 + * reset_pin is not connected: NRST + */ +static struct ac97c_platform_data ek_ac97_data = { +}; + + +/* * LEDs */ static struct gpio_led ek_leds[] = { @@ -299,6 +307,8 @@ static void __init ek_board_init(void) at91_add_device_mmc(0, &ek_mmc_data); /* LCD Controller */ at91_add_device_lcdc(&ek_lcdc_data); + /* AC97 */ + at91_add_device_ac97(&ek_ac97_data); /* Touch Screen Controller */ at91_add_device_tsadcc(); /* LEDs */ diff --git a/arch/arm/mach-integrator/integrator_cp.c b/arch/arm/mach-integrator/integrator_cp.c index 2a318eba1b0..3f35293d457 100644 --- a/arch/arm/mach-integrator/integrator_cp.c +++ b/arch/arm/mach-integrator/integrator_cp.c @@ -19,6 +19,7 @@ #include <linux/amba/bus.h> #include <linux/amba/kmi.h> #include <linux/amba/clcd.h> +#include <linux/amba/mmci.h> #include <linux/io.h> #include <asm/clkdev.h> @@ -35,7 +36,6 @@ #include <asm/mach/arch.h> #include <asm/mach/flash.h> #include <asm/mach/irq.h> -#include <asm/mach/mmc.h> #include <asm/mach/map.h> #include <asm/mach/time.h> @@ -400,7 +400,7 @@ static unsigned int mmc_status(struct device *dev) return status & 8; } -static struct mmc_platform_data mmc_data = { +static struct mmci_platform_data mmc_data = { .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34, .status = mmc_status, .gpio_wp = -1, diff --git a/arch/arm/mach-iop13xx/include/mach/adma.h b/arch/arm/mach-iop13xx/include/mach/adma.h index 5722e86f217..6d3782d85a9 100644 --- a/arch/arm/mach-iop13xx/include/mach/adma.h +++ b/arch/arm/mach-iop13xx/include/mach/adma.h @@ -150,6 +150,8 @@ static inline int iop_adma_get_max_xor(void) return 16; } +#define iop_adma_get_max_pq iop_adma_get_max_xor + static inline u32 iop_chan_get_current_descriptor(struct iop_adma_chan *chan) { return __raw_readl(ADMA_ADAR(chan)); @@ -211,7 +213,10 @@ iop_chan_xor_slot_count(size_t len, int src_cnt, int *slots_per_op) #define IOP_ADMA_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT #define IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT #define IOP_ADMA_XOR_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT +#define IOP_ADMA_PQ_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT #define iop_chan_zero_sum_slot_count(l, s, o) iop_chan_xor_slot_count(l, s, o) +#define iop_chan_pq_slot_count iop_chan_xor_slot_count +#define iop_chan_pq_zero_sum_slot_count iop_chan_xor_slot_count static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc, struct iop_adma_chan *chan) @@ -220,6 +225,13 @@ static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc, return hw_desc->dest_addr; } +static inline u32 iop_desc_get_qdest_addr(struct iop_adma_desc_slot *desc, + struct iop_adma_chan *chan) +{ + struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; + return hw_desc->q_dest_addr; +} + static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc, struct iop_adma_chan *chan) { @@ -319,6 +331,58 @@ iop_desc_init_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt, return 1; } +static inline void +iop_desc_init_pq(struct iop_adma_desc_slot *desc, int src_cnt, + unsigned long flags) +{ + struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; + union { + u32 value; + struct iop13xx_adma_desc_ctrl field; + } u_desc_ctrl; + + u_desc_ctrl.value = 0; + u_desc_ctrl.field.src_select = src_cnt - 1; + u_desc_ctrl.field.xfer_dir = 3; /* local to internal bus */ + u_desc_ctrl.field.pq_xfer_en = 1; + u_desc_ctrl.field.p_xfer_dis = !!(flags & DMA_PREP_PQ_DISABLE_P); + u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT; + hw_desc->desc_ctrl = u_desc_ctrl.value; +} + +static inline int iop_desc_is_pq(struct iop_adma_desc_slot *desc) +{ + struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; + union { + u32 value; + struct iop13xx_adma_desc_ctrl field; + } u_desc_ctrl; + + u_desc_ctrl.value = hw_desc->desc_ctrl; + return u_desc_ctrl.field.pq_xfer_en; +} + +static inline void +iop_desc_init_pq_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt, + unsigned long flags) +{ + struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; + union { + u32 value; + struct iop13xx_adma_desc_ctrl field; + } u_desc_ctrl; + + u_desc_ctrl.value = 0; + u_desc_ctrl.field.src_select = src_cnt - 1; + u_desc_ctrl.field.xfer_dir = 3; /* local to internal bus */ + u_desc_ctrl.field.zero_result = 1; + u_desc_ctrl.field.status_write_back_en = 1; + u_desc_ctrl.field.pq_xfer_en = 1; + u_desc_ctrl.field.p_xfer_dis = !!(flags & DMA_PREP_PQ_DISABLE_P); + u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT; + hw_desc->desc_ctrl = u_desc_ctrl.value; +} + static inline void iop_desc_set_byte_count(struct iop_adma_desc_slot *desc, struct iop_adma_chan *chan, u32 byte_count) @@ -351,6 +415,7 @@ iop_desc_set_zero_sum_byte_count(struct iop_adma_desc_slot *desc, u32 len) } } +#define iop_desc_set_pq_zero_sum_byte_count iop_desc_set_zero_sum_byte_count static inline void iop_desc_set_dest_addr(struct iop_adma_desc_slot *desc, struct iop_adma_chan *chan, @@ -361,6 +426,16 @@ static inline void iop_desc_set_dest_addr(struct iop_adma_desc_slot *desc, hw_desc->upper_dest_addr = 0; } +static inline void +iop_desc_set_pq_addr(struct iop_adma_desc_slot *desc, dma_addr_t *addr) +{ + struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; + + hw_desc->dest_addr = addr[0]; + hw_desc->q_dest_addr = addr[1]; + hw_desc->upper_dest_addr = 0; +} + static inline void iop_desc_set_memcpy_src_addr(struct iop_adma_desc_slot *desc, dma_addr_t addr) { @@ -389,6 +464,29 @@ static inline void iop_desc_set_xor_src_addr(struct iop_adma_desc_slot *desc, } static inline void +iop_desc_set_pq_src_addr(struct iop_adma_desc_slot *desc, int src_idx, + dma_addr_t addr, unsigned char coef) +{ + int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op; + struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc, *iter; + struct iop13xx_adma_src *src; + int i = 0; + + do { + iter = iop_hw_desc_slot_idx(hw_desc, i); + src = &iter->src[src_idx]; + src->src_addr = addr; + src->pq_upper_src_addr = 0; + src->pq_dmlt = coef; + slot_cnt -= slots_per_op; + if (slot_cnt) { + i += slots_per_op; + addr += IOP_ADMA_PQ_MAX_BYTE_COUNT; + } + } while (slot_cnt); +} + +static inline void iop_desc_init_interrupt(struct iop_adma_desc_slot *desc, struct iop_adma_chan *chan) { @@ -399,6 +497,15 @@ iop_desc_init_interrupt(struct iop_adma_desc_slot *desc, } #define iop_desc_set_zero_sum_src_addr iop_desc_set_xor_src_addr +#define iop_desc_set_pq_zero_sum_src_addr iop_desc_set_pq_src_addr + +static inline void +iop_desc_set_pq_zero_sum_addr(struct iop_adma_desc_slot *desc, int pq_idx, + dma_addr_t *src) +{ + iop_desc_set_xor_src_addr(desc, pq_idx, src[pq_idx]); + iop_desc_set_xor_src_addr(desc, pq_idx+1, src[pq_idx+1]); +} static inline void iop_desc_set_next_desc(struct iop_adma_desc_slot *desc, u32 next_desc_addr) @@ -428,18 +535,20 @@ static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc, hw_desc->block_fill_data = val; } -static inline int iop_desc_get_zero_result(struct iop_adma_desc_slot *desc) +static inline enum sum_check_flags +iop_desc_get_zero_result(struct iop_adma_desc_slot *desc) { struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; struct iop13xx_adma_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field; struct iop13xx_adma_byte_count byte_count = hw_desc->byte_count_field; + enum sum_check_flags flags; BUG_ON(!(byte_count.tx_complete && desc_ctrl.zero_result)); - if (desc_ctrl.pq_xfer_en) - return byte_count.zero_result_err_q; - else - return byte_count.zero_result_err; + flags = byte_count.zero_result_err_q << SUM_CHECK_Q; + flags |= byte_count.zero_result_err << SUM_CHECK_P; + + return flags; } static inline void iop_chan_append(struct iop_adma_chan *chan) diff --git a/arch/arm/mach-iop13xx/setup.c b/arch/arm/mach-iop13xx/setup.c index bee42c609df..5c147fb66a0 100644 --- a/arch/arm/mach-iop13xx/setup.c +++ b/arch/arm/mach-iop13xx/setup.c @@ -477,10 +477,8 @@ void __init iop13xx_platform_init(void) plat_data = &iop13xx_adma_0_data; dma_cap_set(DMA_MEMCPY, plat_data->cap_mask); dma_cap_set(DMA_XOR, plat_data->cap_mask); - dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask); - dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask); + dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask); dma_cap_set(DMA_MEMSET, plat_data->cap_mask); - dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask); dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask); break; case IOP13XX_INIT_ADMA_1: @@ -489,10 +487,8 @@ void __init iop13xx_platform_init(void) plat_data = &iop13xx_adma_1_data; dma_cap_set(DMA_MEMCPY, plat_data->cap_mask); dma_cap_set(DMA_XOR, plat_data->cap_mask); - dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask); - dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask); + dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask); dma_cap_set(DMA_MEMSET, plat_data->cap_mask); - dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask); dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask); break; case IOP13XX_INIT_ADMA_2: @@ -501,14 +497,11 @@ void __init iop13xx_platform_init(void) plat_data = &iop13xx_adma_2_data; dma_cap_set(DMA_MEMCPY, plat_data->cap_mask); dma_cap_set(DMA_XOR, plat_data->cap_mask); - dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask); - dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask); + dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask); dma_cap_set(DMA_MEMSET, plat_data->cap_mask); - dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask); dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask); - dma_cap_set(DMA_PQ_XOR, plat_data->cap_mask); - dma_cap_set(DMA_PQ_UPDATE, plat_data->cap_mask); - dma_cap_set(DMA_PQ_ZERO_SUM, plat_data->cap_mask); + dma_cap_set(DMA_PQ, plat_data->cap_mask); + dma_cap_set(DMA_PQ_VAL, plat_data->cap_mask); break; } } diff --git a/arch/arm/mach-pxa/Kconfig b/arch/arm/mach-pxa/Kconfig index 89c992b8f75..a6f8eab14ba 100644 --- a/arch/arm/mach-pxa/Kconfig +++ b/arch/arm/mach-pxa/Kconfig @@ -21,6 +21,11 @@ config CPU_PXA930 config CPU_PXA935 bool "PXA935 (codename Tavor-P65)" + select CPU_PXA930 + +config CPU_PXA950 + bool "PXA950 (codename Tavor-PV2)" + select CPU_PXA930 endmenu @@ -79,6 +84,12 @@ config MACH_MP900C bool "Nec Mobilepro 900/c" select PXA25x +config MACH_BALLOON3 + bool "Balloon 3 board" + select PXA27x + select IWMMXT + select PXA_HAVE_BOARD_IRQS + config ARCH_PXA_IDP bool "Accelent Xscale IDP" select PXA25x @@ -371,6 +382,15 @@ config MACH_PALMTE2 Say Y here if you intend to run this kernel on a Palm Tungsten|E2 handheld computer. +config MACH_PALMTC + bool "Palm Tungsten|C" + default y + depends on ARCH_PXA_PALM + select PXA25x + help + Say Y here if you intend to run this kernel on a Palm Tungsten|C + handheld computer. + config MACH_PALMT5 bool "Palm Tungsten|T5" default y @@ -458,6 +478,7 @@ config PXA_EZX select PXA27x select IWMMXT select HAVE_PWM + select PXA_HAVE_BOARD_IRQS config MACH_EZX_A780 bool "Motorola EZX A780" @@ -489,6 +510,21 @@ config MACH_EZX_E2 default y depends on PXA_EZX +config MACH_XCEP + bool "Iskratel Electronics XCEP" + select PXA25x + select MTD + select MTD_PARTITIONS + select MTD_PHYSMAP + select MTD_CFI_INTELEXT + select MTD_CFI + select MTD_CHAR + select SMC91X + select PXA_SSP + help + PXA255 based Single Board Computer with SMC 91C111 ethernet chip and 64 MB of flash. + Tuned for usage in Libera instruments for particle accelerators. + endmenu config PXA25x diff --git a/arch/arm/mach-pxa/Makefile b/arch/arm/mach-pxa/Makefile index d4c6122a342..f10e152bfc2 100644 --- a/arch/arm/mach-pxa/Makefile +++ b/arch/arm/mach-pxa/Makefile @@ -31,6 +31,7 @@ obj-$(CONFIG_GUMSTIX_AM300EPD) += am300epd.o obj-$(CONFIG_ARCH_LUBBOCK) += lubbock.o obj-$(CONFIG_MACH_LOGICPD_PXA270) += lpd270.o obj-$(CONFIG_MACH_MAINSTONE) += mainstone.o +obj-$(CONFIG_MACH_BALLOON3) += balloon3.o obj-$(CONFIG_MACH_MP900C) += mp900.o obj-$(CONFIG_ARCH_PXA_IDP) += idp.o obj-$(CONFIG_MACH_TRIZEPS4) += trizeps4.o @@ -58,6 +59,7 @@ obj-$(CONFIG_MACH_E750) += e750.o obj-$(CONFIG_MACH_E400) += e400.o obj-$(CONFIG_MACH_E800) += e800.o obj-$(CONFIG_MACH_PALMTE2) += palmte2.o +obj-$(CONFIG_MACH_PALMTC) += palmtc.o obj-$(CONFIG_MACH_PALMT5) += palmt5.o obj-$(CONFIG_MACH_PALMTX) += palmtx.o obj-$(CONFIG_MACH_PALMLD) += palmld.o @@ -78,6 +80,8 @@ obj-$(CONFIG_MACH_ARMCORE) += cm-x2xx.o cm-x255.o cm-x270.o obj-$(CONFIG_MACH_CM_X300) += cm-x300.o obj-$(CONFIG_PXA_EZX) += ezx.o +obj-$(CONFIG_MACH_XCEP) += xcep.o + obj-$(CONFIG_MACH_INTELMOTE2) += imote2.o obj-$(CONFIG_MACH_STARGATE2) += stargate2.o obj-$(CONFIG_MACH_CSB726) += csb726.o diff --git a/arch/arm/mach-pxa/balloon3.c b/arch/arm/mach-pxa/balloon3.c new file mode 100644 index 00000000000..f23138b8fca --- /dev/null +++ b/arch/arm/mach-pxa/balloon3.c @@ -0,0 +1,361 @@ +/* + * linux/arch/arm/mach-pxa/balloon3.c + * + * Support for Balloonboard.org Balloon3 board. + * + * Author: Nick Bane, Wookey, Jonathan McDowell + * Created: June, 2006 + * Copyright: Toby Churchill Ltd + * Derived from mainstone.c, by Nico Pitre + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/init.h> +#include <linux/platform_device.h> +#include <linux/sysdev.h> +#include <linux/interrupt.h> +#include <linux/sched.h> +#include <linux/bitops.h> +#include <linux/fb.h> +#include <linux/gpio.h> +#include <linux/ioport.h> +#include <linux/mtd/mtd.h> +#include <linux/mtd/partitions.h> +#include <linux/types.h> + +#include <asm/setup.h> +#include <asm/mach-types.h> +#include <asm/irq.h> +#include <asm/sizes.h> + +#include <asm/mach/arch.h> +#include <asm/mach/map.h> +#include <asm/mach/irq.h> +#include <asm/mach/flash.h> + +#include <mach/pxa27x.h> +#include <mach/balloon3.h> +#include <mach/audio.h> +#include <mach/pxafb.h> +#include <mach/mmc.h> +#include <mach/udc.h> +#include <mach/pxa27x-udc.h> +#include <mach/irda.h> +#include <mach/ohci.h> + +#include <plat/i2c.h> + +#include "generic.h" +#include "devices.h" + +static unsigned long balloon3_irq_enabled; + +static unsigned long balloon3_features_present = + (1 << BALLOON3_FEATURE_OHCI) | (1 << BALLOON3_FEATURE_CF) | + (1 << BALLOON3_FEATURE_AUDIO) | + (1 << BALLOON3_FEATURE_TOPPOLY); + +int balloon3_has(enum balloon3_features feature) +{ + return (balloon3_features_present & (1 << feature)) ? 1 : 0; +} +EXPORT_SYMBOL_GPL(balloon3_has); + +int __init parse_balloon3_features(char *arg) +{ + if (!arg) + return 0; + + return strict_strtoul(arg, 0, &balloon3_features_present); +} +early_param("balloon3_features", parse_balloon3_features); + +static void balloon3_mask_irq(unsigned int irq) +{ + int balloon3_irq = (irq - BALLOON3_IRQ(0)); + balloon3_irq_enabled &= ~(1 << balloon3_irq); + __raw_writel(~balloon3_irq_enabled, BALLOON3_INT_CONTROL_REG); +} + +static void balloon3_unmask_irq(unsigned int irq) +{ + int balloon3_irq = (irq - BALLOON3_IRQ(0)); + balloon3_irq_enabled |= (1 << balloon3_irq); + __raw_writel(~balloon3_irq_enabled, BALLOON3_INT_CONTROL_REG); +} + +static struct irq_chip balloon3_irq_chip = { + .name = "FPGA", + .ack = balloon3_mask_irq, + .mask = balloon3_mask_irq, + .unmask = balloon3_unmask_irq, +}; + +static void balloon3_irq_handler(unsigned int irq, struct irq_desc *desc) +{ + unsigned long pending = __raw_readl(BALLOON3_INT_CONTROL_REG) & + balloon3_irq_enabled; + + do { + /* clear useless edge notification */ + if (desc->chip->ack) + desc->chip->ack(BALLOON3_AUX_NIRQ); + while (pending) { + irq = BALLOON3_IRQ(0) + __ffs(pending); + generic_handle_irq(irq); + pending &= pending - 1; + } + pending = __raw_readl(BALLOON3_INT_CONTROL_REG) & + balloon3_irq_enabled; + } while (pending); +} + +static void __init balloon3_init_irq(void) +{ + int irq; + + pxa27x_init_irq(); + /* setup extra Balloon3 irqs */ + for (irq = BALLOON3_IRQ(0); irq <= BALLOON3_IRQ(7); irq++) { + set_irq_chip(irq, &balloon3_irq_chip); + set_irq_handler(irq, handle_level_irq); + set_irq_flags(irq, IRQF_VALID | IRQF_PROBE); + } + + set_irq_chained_handler(BALLOON3_AUX_NIRQ, balloon3_irq_handler); + set_irq_type(BALLOON3_AUX_NIRQ, IRQ_TYPE_EDGE_FALLING); + + pr_debug("%s: chained handler installed - irq %d automatically " + "enabled\n", __func__, BALLOON3_AUX_NIRQ); +} + +static void balloon3_backlight_power(int on) +{ + pr_debug("%s: power is %s\n", __func__, on ? "on" : "off"); + gpio_set_value(BALLOON3_GPIO_RUN_BACKLIGHT, on); +} + +static unsigned long balloon3_lcd_pin_config[] = { + /* LCD - 16bpp Active TFT */ + GPIO58_LCD_LDD_0, + GPIO59_LCD_LDD_1, + GPIO60_LCD_LDD_2, + GPIO61_LCD_LDD_3, + GPIO62_LCD_LDD_4, + GPIO63_LCD_LDD_5, + GPIO64_LCD_LDD_6, + GPIO65_LCD_LDD_7, + GPIO66_LCD_LDD_8, + GPIO67_LCD_LDD_9, + GPIO68_LCD_LDD_10, + GPIO69_LCD_LDD_11, + GPIO70_LCD_LDD_12, + GPIO71_LCD_LDD_13, + GPIO72_LCD_LDD_14, + GPIO73_LCD_LDD_15, + GPIO74_LCD_FCLK, + GPIO75_LCD_LCLK, + GPIO76_LCD_PCLK, + GPIO77_LCD_BIAS, + + GPIO99_GPIO, /* Backlight */ +}; + +static struct pxafb_mode_info balloon3_lcd_modes[] = { + { + .pixclock = 38000, + .xres = 480, + .yres = 640, + .bpp = 16, + .hsync_len = 8, + .left_margin = 8, + .right_margin = 8, + .vsync_len = 2, + .upper_margin = 4, + .lower_margin = 5, + .sync = 0, + }, +}; + +static struct pxafb_mach_info balloon3_pxafb_info = { + .modes = balloon3_lcd_modes, + .num_modes = ARRAY_SIZE(balloon3_lcd_modes), + .lcd_conn = LCD_COLOR_TFT_16BPP | LCD_PCLK_EDGE_FALL, + .pxafb_backlight_power = balloon3_backlight_power, +}; + +static unsigned long balloon3_mmc_pin_config[] = { + GPIO32_MMC_CLK, + GPIO92_MMC_DAT_0, + GPIO109_MMC_DAT_1, + GPIO110_MMC_DAT_2, + GPIO111_MMC_DAT_3, + GPIO112_MMC_CMD, +}; + +static void balloon3_mci_setpower(struct device *dev, unsigned int vdd) +{ + struct pxamci_platform_data *p_d = dev->platform_data; + + if ((1 << vdd) & p_d->ocr_mask) { + pr_debug("%s: on\n", __func__); + /* FIXME something to prod here? */ + } else { + pr_debug("%s: off\n", __func__); + /* FIXME something to prod here? */ + } +} + +static struct pxamci_platform_data balloon3_mci_platform_data = { + .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34, + .setpower = balloon3_mci_setpower, +}; + +static int balloon3_udc_is_connected(void) +{ + pr_debug("%s: udc connected\n", __func__); + return 1; +} + +static void balloon3_udc_command(int cmd) +{ + switch (cmd) { + case PXA2XX_UDC_CMD_CONNECT: + UP2OCR |= (UP2OCR_DPPUE + UP2OCR_DPPUBE); + pr_debug("%s: connect\n", __func__); + break; + case PXA2XX_UDC_CMD_DISCONNECT: + UP2OCR &= ~UP2OCR_DPPUE; + pr_debug("%s: disconnect\n", __func__); + break; + } +} + +static struct pxa2xx_udc_mach_info balloon3_udc_info = { + .udc_is_connected = balloon3_udc_is_connected, + .udc_command = balloon3_udc_command, +}; + +static struct pxaficp_platform_data balloon3_ficp_platform_data = { + .transceiver_cap = IR_SIRMODE | IR_FIRMODE | IR_OFF, +}; + +static unsigned long balloon3_ohci_pin_config[] = { + GPIO88_USBH1_PWR, + GPIO89_USBH1_PEN, +}; + +static struct pxaohci_platform_data balloon3_ohci_platform_data = { + .port_mode = PMM_PERPORT_MODE, + .flags = ENABLE_PORT_ALL | POWER_CONTROL_LOW | POWER_SENSE_LOW, +}; + +static unsigned long balloon3_pin_config[] __initdata = { + /* Select BTUART 'COM1/ttyS0' as IO option for pins 42/43/44/45 */ + GPIO42_BTUART_RXD, + GPIO43_BTUART_TXD, + GPIO44_BTUART_CTS, + GPIO45_BTUART_RTS, + + /* Wakeup GPIO */ + GPIO1_GPIO | WAKEUP_ON_EDGE_BOTH, + + /* NAND & IDLE LED GPIOs */ + GPIO9_GPIO, + GPIO10_GPIO, +}; + +static struct gpio_led balloon3_gpio_leds[] = { + { + .name = "balloon3:green:idle", + .default_trigger = "heartbeat", + .gpio = BALLOON3_GPIO_LED_IDLE, + .active_low = 1, + }, + { + .name = "balloon3:green:nand", + .default_trigger = "nand-disk", + .gpio = BALLOON3_GPIO_LED_NAND, + .active_low = 1, + }, +}; + +static struct gpio_led_platform_data balloon3_gpio_leds_platform_data = { + .leds = balloon3_gpio_leds, + .num_leds = ARRAY_SIZE(balloon3_gpio_leds), +}; + +static struct platform_device balloon3led_device = { + .name = "leds-gpio", + .id = -1, + .dev = { + .platform_data = &balloon3_gpio_leds_platform_data, + }, +}; + +static void __init balloon3_init(void) +{ + pr_info("Initialising Balloon3\n"); + + /* system bus arbiter setting + * - Core_Park + * - LCD_wt:DMA_wt:CORE_Wt = 2:3:4 + */ + ARB_CNTRL = ARB_CORE_PARK | 0x234; + + pxa_set_i2c_info(NULL); + if (balloon3_has(BALLOON3_FEATURE_AUDIO)) + pxa_set_ac97_info(NULL); + + if (balloon3_has(BALLOON3_FEATURE_TOPPOLY)) { + pxa2xx_mfp_config(ARRAY_AND_SIZE(balloon3_lcd_pin_config)); + gpio_request(BALLOON3_GPIO_RUN_BACKLIGHT, + "LCD Backlight Power"); + gpio_direction_output(BALLOON3_GPIO_RUN_BACKLIGHT, 1); + set_pxa_fb_info(&balloon3_pxafb_info); + } + + if (balloon3_has(BALLOON3_FEATURE_MMC)) { + pxa2xx_mfp_config(ARRAY_AND_SIZE(balloon3_mmc_pin_config)); + pxa_set_mci_info(&balloon3_mci_platform_data); + } + pxa_set_ficp_info(&balloon3_ficp_platform_data); + if (balloon3_has(BALLOON3_FEATURE_OHCI)) { + pxa2xx_mfp_config(ARRAY_AND_SIZE(balloon3_ohci_pin_config)); + pxa_set_ohci_info(&balloon3_ohci_platform_data); + } + pxa_set_udc_info(&balloon3_udc_info); + + pxa2xx_mfp_config(ARRAY_AND_SIZE(balloon3_pin_config)); + + platform_device_register(&balloon3led_device); +} + +static struct map_desc balloon3_io_desc[] __initdata = { + { /* CPLD/FPGA */ + .virtual = BALLOON3_FPGA_VIRT, + .pfn = __phys_to_pfn(BALLOON3_FPGA_PHYS), + .length = BALLOON3_FPGA_LENGTH, + .type = MT_DEVICE, + }, +}; + +static void __init balloon3_map_io(void) +{ + pxa_map_io(); + iotable_init(balloon3_io_desc, ARRAY_SIZE(balloon3_io_desc)); +} + +MACHINE_START(BALLOON3, "Balloon3") + /* Maintainer: Nick Bane. */ + .phys_io = 0x40000000, + .io_pg_offst = (io_p2v(0x40000000) >> 18) & 0xfffc, + .map_io = balloon3_map_io, + .init_irq = balloon3_init_irq, + .timer = &pxa_timer, + .init_machine = balloon3_init, + .boot_params = PHYS_OFFSET + 0x100, +MACHINE_END diff --git a/arch/arm/mach-pxa/clock.h b/arch/arm/mach-pxa/clock.h index 5599bceff73..978a3667e90 100644 --- a/arch/arm/mach-pxa/clock.h +++ b/arch/arm/mach-pxa/clock.h @@ -12,7 +12,6 @@ struct clk { unsigned int cken; unsigned int delay; unsigned int enabled; - struct clk *other; }; #define INIT_CLKREG(_clk,_devname,_conname) \ diff --git a/arch/arm/mach-pxa/cm-x270.c b/arch/arm/mach-pxa/cm-x270.c index 1d2cec25391..eea78b6c2bc 100644 --- a/arch/arm/mach-pxa/cm-x270.c +++ b/arch/arm/mach-pxa/cm-x270.c @@ -13,13 +13,18 @@ #include <linux/sysdev.h> #include <linux/irq.h> #include <linux/gpio.h> +#include <linux/delay.h> #include <linux/rtc-v3020.h> #include <video/mbxfb.h> +#include <linux/spi/spi.h> +#include <linux/spi/libertas_spi.h> + #include <mach/pxa27x.h> #include <mach/ohci.h> #include <mach/mmc.h> +#include <mach/pxa2xx_spi.h> #include "generic.h" @@ -34,6 +39,10 @@ /* MMC power enable */ #define GPIO105_MMC_POWER (105) +/* WLAN GPIOS */ +#define GPIO19_WLAN_STRAP (19) +#define GPIO102_WLAN_RST (102) + static unsigned long cmx270_pin_config[] = { /* AC'97 */ GPIO28_AC97_BITCLK, @@ -94,8 +103,8 @@ static unsigned long cmx270_pin_config[] = { GPIO26_SSP1_RXD, /* SSP2 */ - GPIO19_SSP2_SCLK, - GPIO14_SSP2_SFRM, + GPIO19_GPIO, /* SSP2 clock is used as GPIO for Libertas pin-strap */ + GPIO14_GPIO, GPIO87_SSP2_TXD, GPIO88_SSP2_RXD, @@ -123,6 +132,7 @@ static unsigned long cmx270_pin_config[] = { GPIO0_GPIO | WAKEUP_ON_EDGE_BOTH, GPIO105_GPIO | MFP_LPM_DRIVE_HIGH, /* MMC/SD power */ GPIO53_GPIO, /* PC card reset */ + GPIO102_GPIO, /* WLAN reset */ /* NAND controls */ GPIO11_GPIO | MFP_LPM_DRIVE_HIGH, /* NAND CE# */ @@ -131,6 +141,7 @@ static unsigned long cmx270_pin_config[] = { /* interrupts */ GPIO10_GPIO, /* DM9000 interrupt */ GPIO83_GPIO, /* MMC card detect */ + GPIO95_GPIO, /* WLAN interrupt */ }; /* V3020 RTC */ @@ -271,64 +282,114 @@ static inline void cmx270_init_ohci(void) {} #endif #if defined(CONFIG_MMC) || defined(CONFIG_MMC_MODULE) -static int cmx270_mci_init(struct device *dev, - irq_handler_t cmx270_detect_int, - void *data) +static struct pxamci_platform_data cmx270_mci_platform_data = { + .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34, + .gpio_card_detect = GPIO83_MMC_IRQ, + .gpio_card_ro = -1, + .gpio_power = GPIO105_MMC_POWER, + .gpio_power_invert = 1, +}; + +static void __init cmx270_init_mmc(void) { - int err; + pxa_set_mci_info(&cmx270_mci_platform_data); +} +#else +static inline void cmx270_init_mmc(void) {} +#endif + +#if defined(CONFIG_SPI_PXA2XX) || defined(CONFIG_SPI_PXA2XX_MODULE) +static struct pxa2xx_spi_master cm_x270_spi_info = { + .num_chipselect = 1, + .enable_dma = 1, +}; + +static struct pxa2xx_spi_chip cm_x270_libertas_chip = { + .rx_threshold = 1, + .tx_threshold = 1, + .timeout = 1000, + .gpio_cs = 14, +}; + +static unsigned long cm_x270_libertas_pin_config[] = { + /* SSP2 */ + GPIO19_SSP2_SCLK, + GPIO14_GPIO, + GPIO87_SSP2_TXD, + GPIO88_SSP2_RXD, + +}; - err = gpio_request(GPIO105_MMC_POWER, "MMC/SD power"); - if (err) { - dev_warn(dev, "power gpio unavailable\n"); +static int cm_x270_libertas_setup(struct spi_device *spi) +{ + int err = gpio_request(GPIO19_WLAN_STRAP, "WLAN STRAP"); + if (err) return err; - } - gpio_direction_output(GPIO105_MMC_POWER, 0); + err = gpio_request(GPIO102_WLAN_RST, "WLAN RST"); + if (err) + goto err_free_strap; - err = request_irq(CMX270_MMC_IRQ, cmx270_detect_int, - IRQF_DISABLED | IRQF_TRIGGER_FALLING, - "MMC card detect", data); - if (err) { - gpio_free(GPIO105_MMC_POWER); - dev_err(dev, "cmx270_mci_init: MMC/SD: can't" - " request MMC card detect IRQ\n"); - } + err = gpio_direction_output(GPIO102_WLAN_RST, 0); + if (err) + goto err_free_strap; + msleep(100); + + err = gpio_direction_output(GPIO19_WLAN_STRAP, 1); + if (err) + goto err_free_strap; + msleep(100); + + pxa2xx_mfp_config(ARRAY_AND_SIZE(cm_x270_libertas_pin_config)); + + gpio_set_value(GPIO102_WLAN_RST, 1); + msleep(100); + + spi->bits_per_word = 16; + spi_setup(spi); + + return 0; + +err_free_strap: + gpio_free(GPIO19_WLAN_STRAP); return err; } -static void cmx270_mci_setpower(struct device *dev, unsigned int vdd) +static int cm_x270_libertas_teardown(struct spi_device *spi) { - struct pxamci_platform_data *p_d = dev->platform_data; - - if ((1 << vdd) & p_d->ocr_mask) { - dev_dbg(dev, "power on\n"); - gpio_set_value(GPIO105_MMC_POWER, 0); - } else { - gpio_set_value(GPIO105_MMC_POWER, 1); - dev_dbg(dev, "power off\n"); - } -} + gpio_set_value(GPIO102_WLAN_RST, 0); + gpio_free(GPIO102_WLAN_RST); + gpio_free(GPIO19_WLAN_STRAP); -static void cmx270_mci_exit(struct device *dev, void *data) -{ - free_irq(CMX270_MMC_IRQ, data); - gpio_free(GPIO105_MMC_POWER); + return 0; } -static struct pxamci_platform_data cmx270_mci_platform_data = { - .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34, - .init = cmx270_mci_init, - .setpower = cmx270_mci_setpower, - .exit = cmx270_mci_exit, +struct libertas_spi_platform_data cm_x270_libertas_pdata = { + .use_dummy_writes = 1, + .setup = cm_x270_libertas_setup, + .teardown = cm_x270_libertas_teardown, }; -static void __init cmx270_init_mmc(void) +static struct spi_board_info cm_x270_spi_devices[] __initdata = { + { + .modalias = "libertas_spi", + .max_speed_hz = 13000000, + .bus_num = 2, + .irq = gpio_to_irq(95), + .chip_select = 0, + .controller_data = &cm_x270_libertas_chip, + .platform_data = &cm_x270_libertas_pdata, + }, +}; + +static void __init cmx270_init_spi(void) { - pxa_set_mci_info(&cmx270_mci_platform_data); + pxa2xx_set_spi_info(2, &cm_x270_spi_info); + spi_register_board_info(ARRAY_AND_SIZE(cm_x270_spi_devices)); } #else -static inline void cmx270_init_mmc(void) {} +static inline void cmx270_init_spi(void) {} #endif void __init cmx270_init(void) @@ -343,4 +404,5 @@ void __init cmx270_init(void) cmx270_init_mmc(); cmx270_init_ohci(); cmx270_init_2700G(); + cmx270_init_spi(); } diff --git a/arch/arm/mach-pxa/cm-x300.c b/arch/arm/mach-pxa/cm-x300.c index 465da26591b..aac2cda60e0 100644 --- a/arch/arm/mach-pxa/cm-x300.c +++ b/arch/arm/mach-pxa/cm-x300.c @@ -306,68 +306,21 @@ static void cm_x300_mci_exit(struct device *dev, void *data) } static struct pxamci_platform_data cm_x300_mci_platform_data = { - .detect_delay = 20, - .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34, - .init = cm_x300_mci_init, - .exit = cm_x300_mci_exit, + .detect_delay = 20, + .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34, + .init = cm_x300_mci_init, + .exit = cm_x300_mci_exit, + .gpio_card_detect = -1, + .gpio_card_ro = -1, + .gpio_power = -1, }; -static int cm_x300_mci2_ro(struct device *dev) -{ - return gpio_get_value(GPIO85_MMC2_WP); -} - -static int cm_x300_mci2_init(struct device *dev, - irq_handler_t cm_x300_detect_int, - void *data) -{ - int err; - - /* - * setup GPIO for CM-X300 MMC controller - */ - err = gpio_request(GPIO82_MMC2_IRQ, "mmc card detect"); - if (err) - goto err_request_cd; - gpio_direction_input(GPIO82_MMC2_IRQ); - - err = gpio_request(GPIO85_MMC2_WP, "mmc write protect"); - if (err) - goto err_request_wp; - gpio_direction_input(GPIO85_MMC2_WP); - - err = request_irq(CM_X300_MMC2_IRQ, cm_x300_detect_int, - IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING, - "MMC card detect", data); - if (err) { - printk(KERN_ERR "%s: MMC/SD/SDIO: " - "can't request card detect IRQ\n", __func__); - goto err_request_irq; - } - - return 0; - -err_request_irq: - gpio_free(GPIO85_MMC2_WP); -err_request_wp: - gpio_free(GPIO82_MMC2_IRQ); -err_request_cd: - return err; -} - -static void cm_x300_mci2_exit(struct device *dev, void *data) -{ - free_irq(CM_X300_MMC2_IRQ, data); - gpio_free(GPIO82_MMC2_IRQ); - gpio_free(GPIO85_MMC2_WP); -} - static struct pxamci_platform_data cm_x300_mci2_platform_data = { - .detect_delay = 20, - .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34, - .init = cm_x300_mci2_init, - .exit = cm_x300_mci2_exit, - .get_ro = cm_x300_mci2_ro, + .detect_delay = 20, + .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34, + .gpio_card_detect = GPIO82_MMC2_IRQ, + .gpio_card_ro = GPIO85_MMC2_WP, + .gpio_power = -1, }; static void __init cm_x300_init_mmc(void) diff --git a/arch/arm/mach-pxa/colibri-pxa300.c b/arch/arm/mach-pxa/colibri-pxa300.c index 7c9c34c19ae..37c239c5656 100644 --- a/arch/arm/mach-pxa/colibri-pxa300.c +++ b/arch/arm/mach-pxa/colibri-pxa300.c @@ -172,6 +172,7 @@ void __init colibri_pxa300_init(void) { colibri_pxa300_init_eth(); colibri_pxa300_init_ohci(); + colibri_pxa3xx_init_nand(); colibri_pxa300_init_lcd(); colibri_pxa3xx_init_lcd(mfp_to_gpio(GPIO39_GPIO)); colibri_pxa310_init_ac97(); diff --git a/arch/arm/mach-pxa/colibri-pxa320.c b/arch/arm/mach-pxa/colibri-pxa320.c index a18d37b3c5e..494572825c7 100644 --- a/arch/arm/mach-pxa/colibri-pxa320.c +++ b/arch/arm/mach-pxa/colibri-pxa320.c @@ -164,15 +164,48 @@ static inline void __init colibri_pxa320_init_ac97(void) static inline void colibri_pxa320_init_ac97(void) {} #endif +/* + * The following configuration is verified to work with the Toradex Orchid + * carrier board + */ +static mfp_cfg_t colibri_pxa320_uart_pin_config[] __initdata = { + /* UART 1 configuration (may be set by bootloader) */ + GPIO99_UART1_CTS, + GPIO104_UART1_RTS, + GPIO97_UART1_RXD, + GPIO98_UART1_TXD, + GPIO101_UART1_DTR, + GPIO103_UART1_DSR, + GPIO100_UART1_DCD, + GPIO102_UART1_RI, + + /* UART 2 configuration */ + GPIO109_UART2_CTS, + GPIO112_UART2_RTS, + GPIO110_UART2_RXD, + GPIO111_UART2_TXD, + + /* UART 3 configuration */ + GPIO30_UART3_RXD, + GPIO31_UART3_TXD, +}; + +static void __init colibri_pxa320_init_uart(void) +{ + pxa3xx_mfp_config(ARRAY_AND_SIZE(colibri_pxa320_uart_pin_config)); +} + void __init colibri_pxa320_init(void) { colibri_pxa320_init_eth(); colibri_pxa320_init_ohci(); + colibri_pxa3xx_init_nand(); colibri_pxa320_init_lcd(); colibri_pxa3xx_init_lcd(mfp_to_gpio(GPIO49_GPIO)); colibri_pxa320_init_ac97(); colibri_pxa3xx_init_mmc(ARRAY_AND_SIZE(colibri_pxa320_mmc_pin_config), mfp_to_gpio(MFP_PIN_GPIO28)); + colibri_pxa320_init_uart(); } MACHINE_START(COLIBRI320, "Toradex Colibri PXA320") diff --git a/arch/arm/mach-pxa/colibri-pxa3xx.c b/arch/arm/mach-pxa/colibri-pxa3xx.c index ea34e34f8cd..efebaf4d734 100644 --- a/arch/arm/mach-pxa/colibri-pxa3xx.c +++ b/arch/arm/mach-pxa/colibri-pxa3xx.c @@ -25,6 +25,7 @@ #include <mach/colibri.h> #include <mach/mmc.h> #include <mach/pxafb.h> +#include <mach/pxa3xx_nand.h> #include "generic.h" #include "devices.h" @@ -95,10 +96,13 @@ static void colibri_pxa3xx_mci_exit(struct device *dev, void *data) } static struct pxamci_platform_data colibri_pxa3xx_mci_platform_data = { - .detect_delay = 20, - .ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34, - .init = colibri_pxa3xx_mci_init, - .exit = colibri_pxa3xx_mci_exit, + .detect_delay = 20, + .ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34, + .init = colibri_pxa3xx_mci_init, + .exit = colibri_pxa3xx_mci_exit, + .gpio_card_detect = -1, + .gpio_card_ro = -1, + .gpio_power = -1, }; void __init colibri_pxa3xx_init_mmc(mfp_cfg_t *pins, int len, int detect_pin) @@ -154,3 +158,43 @@ void __init colibri_pxa3xx_init_lcd(int bl_pin) } #endif +#if defined(CONFIG_MTD_NAND_PXA3xx) || defined(CONFIG_MTD_NAND_PXA3xx_MODULE) +static struct mtd_partition colibri_nand_partitions[] = { + { + .name = "bootloader", + .offset = 0, + .size = SZ_512K, + .mask_flags = MTD_WRITEABLE, /* force read-only */ + }, + { + .name = "kernel", + .offset = MTDPART_OFS_APPEND, + .size = SZ_4M, + .mask_flags = MTD_WRITEABLE, /* force read-only */ + }, + { + .name = "reserved", + .offset = MTDPART_OFS_APPEND, + .size = SZ_1M, + .mask_flags = MTD_WRITEABLE, /* force read-only */ + }, + { + .name = "fs", + .offset = MTDPART_OFS_APPEND, + .size = MTDPART_SIZ_FULL, + }, +}; + +static struct pxa3xx_nand_platform_data colibri_nand_info = { + .enable_arbiter = 1, + .keep_config = 1, + .parts = colibri_nand_partitions, + .nr_parts = ARRAY_SIZE(colibri_nand_partitions), +}; + +void __init colibri_pxa3xx_init_nand(void) +{ + pxa3xx_set_nand_info(&colibri_nand_info); +} +#endif + diff --git a/arch/arm/mach-pxa/corgi.c b/arch/arm/mach-pxa/corgi.c index 5363e1aea3f..b536b5a5a10 100644 --- a/arch/arm/mach-pxa/corgi.c +++ b/arch/arm/mach-pxa/corgi.c @@ -29,6 +29,7 @@ #include <linux/spi/ads7846.h> #include <linux/spi/corgi_lcd.h> #include <linux/mtd/sharpsl.h> +#include <linux/input/matrix_keypad.h> #include <video/w100fb.h> #include <asm/setup.h> @@ -104,6 +105,28 @@ static unsigned long corgi_pin_config[] __initdata = { GPIO6_MMC_CLK, GPIO8_MMC_CS0, + /* GPIO Matrix Keypad */ + GPIO66_GPIO, /* column 0 */ + GPIO67_GPIO, /* column 1 */ + GPIO68_GPIO, /* column 2 */ + GPIO69_GPIO, /* column 3 */ + GPIO70_GPIO, /* column 4 */ + GPIO71_GPIO, /* column 5 */ + GPIO72_GPIO, /* column 6 */ + GPIO73_GPIO, /* column 7 */ + GPIO74_GPIO, /* column 8 */ + GPIO75_GPIO, /* column 9 */ + GPIO76_GPIO, /* column 10 */ + GPIO77_GPIO, /* column 11 */ + GPIO58_GPIO, /* row 0 */ + GPIO59_GPIO, /* row 1 */ + GPIO60_GPIO, /* row 2 */ + GPIO61_GPIO, /* row 3 */ + GPIO62_GPIO, /* row 4 */ + GPIO63_GPIO, /* row 5 */ + GPIO64_GPIO, /* row 6 */ + GPIO65_GPIO, /* row 7 */ + /* GPIO */ GPIO9_GPIO, /* CORGI_GPIO_nSD_DETECT */ GPIO7_GPIO, /* CORGI_GPIO_nSD_WP */ @@ -267,9 +290,115 @@ static struct platform_device corgifb_device = { /* * Corgi Keyboard Device */ +#define CORGI_KEY_CALENDER KEY_F1 +#define CORGI_KEY_ADDRESS KEY_F2 +#define CORGI_KEY_FN KEY_F3 +#define CORGI_KEY_CANCEL KEY_F4 +#define CORGI_KEY_OFF KEY_SUSPEND +#define CORGI_KEY_EXOK KEY_F5 +#define CORGI_KEY_EXCANCEL KEY_F6 +#define CORGI_KEY_EXJOGDOWN KEY_F7 +#define CORGI_KEY_EXJOGUP KEY_F8 +#define CORGI_KEY_JAP1 KEY_LEFTCTRL +#define CORGI_KEY_JAP2 KEY_LEFTALT +#define CORGI_KEY_MAIL KEY_F10 +#define CORGI_KEY_OK KEY_F11 +#define CORGI_KEY_MENU KEY_F12 + +static const uint32_t corgikbd_keymap[] = { + KEY(0, 1, KEY_1), + KEY(0, 2, KEY_3), + KEY(0, 3, KEY_5), + KEY(0, 4, KEY_6), + KEY(0, 5, KEY_7), + KEY(0, 6, KEY_9), + KEY(0, 7, KEY_0), + KEY(0, 8, KEY_BACKSPACE), + KEY(1, 1, KEY_2), + KEY(1, 2, KEY_4), + KEY(1, 3, KEY_R), + KEY(1, 4, KEY_Y), + KEY(1, 5, KEY_8), + KEY(1, 6, KEY_I), + KEY(1, 7, KEY_O), + KEY(1, 8, KEY_P), + KEY(2, 0, KEY_TAB), + KEY(2, 1, KEY_Q), + KEY(2, 2, KEY_E), + KEY(2, 3, KEY_T), + KEY(2, 4, KEY_G), + KEY(2, 5, KEY_U), + KEY(2, 6, KEY_J), + KEY(2, 7, KEY_K), + KEY(3, 0, CORGI_KEY_CALENDER), + KEY(3, 1, KEY_W), + KEY(3, 2, KEY_S), + KEY(3, 3, KEY_F), + KEY(3, 4, KEY_V), + KEY(3, 5, KEY_H), + KEY(3, 6, KEY_M), + KEY(3, 7, KEY_L), + KEY(3, 9, KEY_RIGHTSHIFT), + KEY(4, 0, CORGI_KEY_ADDRESS), + KEY(4, 1, KEY_A), + KEY(4, 2, KEY_D), + KEY(4, 3, KEY_C), + KEY(4, 4, KEY_B), + KEY(4, 5, KEY_N), + KEY(4, 6, KEY_DOT), + KEY(4, 8, KEY_ENTER), + KEY(4, 10, KEY_LEFTSHIFT), + KEY(5, 0, CORGI_KEY_MAIL), + KEY(5, 1, KEY_Z), + KEY(5, 2, KEY_X), + KEY(5, 3, KEY_MINUS), + KEY(5, 4, KEY_SPACE), + KEY(5, 5, KEY_COMMA), + KEY(5, 7, KEY_UP), + KEY(5, 11, CORGI_KEY_FN), + KEY(6, 0, KEY_SYSRQ), + KEY(6, 1, CORGI_KEY_JAP1), + KEY(6, 2, CORGI_KEY_JAP2), + KEY(6, 3, CORGI_KEY_CANCEL), + KEY(6, 4, CORGI_KEY_OK), + KEY(6, 5, CORGI_KEY_MENU), + KEY(6, 6, KEY_LEFT), + KEY(6, 7, KEY_DOWN), + KEY(6, 8, KEY_RIGHT), + KEY(7, 0, CORGI_KEY_OFF), + KEY(7, 1, CORGI_KEY_EXOK), + KEY(7, 2, CORGI_KEY_EXCANCEL), + KEY(7, 3, CORGI_KEY_EXJOGDOWN), + KEY(7, 4, CORGI_KEY_EXJOGUP), +}; + +static struct matrix_keymap_data corgikbd_keymap_data = { + .keymap = corgikbd_keymap, + .keymap_size = ARRAY_SIZE(corgikbd_keymap), +}; + +static const int corgikbd_row_gpios[] = + { 58, 59, 60, 61, 62, 63, 64, 65 }; +static const int corgikbd_col_gpios[] = + { 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77 }; + +static struct matrix_keypad_platform_data corgikbd_pdata = { + .keymap_data = &corgikbd_keymap_data, + .row_gpios = corgikbd_row_gpios, + .col_gpios = corgikbd_col_gpios, + .num_row_gpios = ARRAY_SIZE(corgikbd_row_gpios), + .num_col_gpios = ARRAY_SIZE(corgikbd_col_gpios), + .col_scan_delay_us = 10, + .debounce_ms = 10, + .wakeup = 1, +}; + static struct platform_device corgikbd_device = { - .name = "corgi-keyboard", + .name = "matrix-keypad", .id = -1, + .dev = { + .platform_data = &corgikbd_pdata, + }, }; /* @@ -307,111 +436,20 @@ static struct platform_device corgiled_device = { * The card detect interrupt isn't debounced so we delay it by 250ms * to give the card a chance to fully insert/eject. */ -static struct pxamci_platform_data corgi_mci_platform_data; - -static int corgi_mci_init(struct device *dev, irq_handler_t corgi_detect_int, void *data) -{ - int err; - - err = gpio_request(CORGI_GPIO_nSD_DETECT, "nSD_DETECT"); - if (err) - goto err_out; - - err = gpio_request(CORGI_GPIO_nSD_WP, "nSD_WP"); - if (err) - goto err_free_1; - - err = gpio_request(CORGI_GPIO_SD_PWR, "SD_PWR"); - if (err) - goto err_free_2; - - gpio_direction_input(CORGI_GPIO_nSD_DETECT); - gpio_direction_input(CORGI_GPIO_nSD_WP); - gpio_direction_output(CORGI_GPIO_SD_PWR, 0); - - corgi_mci_platform_data.detect_delay = msecs_to_jiffies(250); - - err = request_irq(CORGI_IRQ_GPIO_nSD_DETECT, corgi_detect_int, - IRQF_DISABLED | IRQF_TRIGGER_RISING | - IRQF_TRIGGER_FALLING, - "MMC card detect", data); - if (err) { - pr_err("%s: MMC/SD: can't request MMC card detect IRQ\n", - __func__); - goto err_free_3; - } - return 0; - -err_free_3: - gpio_free(CORGI_GPIO_SD_PWR); -err_free_2: - gpio_free(CORGI_GPIO_nSD_WP); -err_free_1: - gpio_free(CORGI_GPIO_nSD_DETECT); -err_out: - return err; -} - -static void corgi_mci_setpower(struct device *dev, unsigned int vdd) -{ - struct pxamci_platform_data* p_d = dev->platform_data; - - gpio_set_value(CORGI_GPIO_SD_PWR, ((1 << vdd) & p_d->ocr_mask)); -} - -static int corgi_mci_get_ro(struct device *dev) -{ - return gpio_get_value(CORGI_GPIO_nSD_WP); -} - -static void corgi_mci_exit(struct device *dev, void *data) -{ - free_irq(CORGI_IRQ_GPIO_nSD_DETECT, data); - gpio_free(CORGI_GPIO_SD_PWR); - gpio_free(CORGI_GPIO_nSD_WP); - gpio_free(CORGI_GPIO_nSD_DETECT); -} - static struct pxamci_platform_data corgi_mci_platform_data = { - .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34, - .init = corgi_mci_init, - .get_ro = corgi_mci_get_ro, - .setpower = corgi_mci_setpower, - .exit = corgi_mci_exit, + .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34, + .gpio_card_detect = -1, + .gpio_card_ro = CORGI_GPIO_nSD_WP, + .gpio_power = CORGI_GPIO_SD_PWR, }; /* * Irda */ -static void corgi_irda_transceiver_mode(struct device *dev, int mode) -{ - gpio_set_value(CORGI_GPIO_IR_ON, mode & IR_OFF); - pxa2xx_transceiver_mode(dev, mode); -} - -static int corgi_irda_startup(struct device *dev) -{ - int err; - - err = gpio_request(CORGI_GPIO_IR_ON, "IR_ON"); - if (err) - return err; - - gpio_direction_output(CORGI_GPIO_IR_ON, 1); - return 0; -} - -static void corgi_irda_shutdown(struct device *dev) -{ - gpio_free(CORGI_GPIO_IR_ON); -} - static struct pxaficp_platform_data corgi_ficp_platform_data = { + .gpio_pwdown = CORGI_GPIO_IR_ON, .transceiver_cap = IR_SIRMODE | IR_OFF, - .transceiver_mode = corgi_irda_transceiver_mode, - .startup = corgi_irda_startup, - .shutdown = corgi_irda_shutdown, }; @@ -636,6 +674,7 @@ static void __init corgi_init(void) corgi_init_spi(); pxa_set_udc_info(&udc_info); + corgi_mci_platform_data.detect_delay = msecs_to_jiffies(250); pxa_set_mci_info(&corgi_mci_platform_data); pxa_set_ficp_info(&corgi_ficp_platform_data); pxa_set_i2c_info(NULL); diff --git a/arch/arm/mach-pxa/csb726.c b/arch/arm/mach-pxa/csb726.c index 7d3e1b46e55..79141f86272 100644 --- a/arch/arm/mach-pxa/csb726.c +++ b/arch/arm/mach-pxa/csb726.c @@ -130,61 +130,17 @@ static struct pxamci_platform_data csb726_mci_data; static int csb726_mci_init(struct device *dev, irq_handler_t detect, void *data) { - int err; - csb726_mci_data.detect_delay = msecs_to_jiffies(500); - - err = gpio_request(CSB726_GPIO_MMC_DETECT, "MMC detect"); - if (err) - goto err_det_req; - - err = gpio_direction_input(CSB726_GPIO_MMC_DETECT); - if (err) - goto err_det_dir; - - err = gpio_request(CSB726_GPIO_MMC_RO, "MMC ro"); - if (err) - goto err_ro_req; - - err = gpio_direction_input(CSB726_GPIO_MMC_RO); - if (err) - goto err_ro_dir; - - err = request_irq(gpio_to_irq(CSB726_GPIO_MMC_DETECT), detect, - IRQF_DISABLED, "MMC card detect", data); - if (err) - goto err_irq; - return 0; - -err_irq: -err_ro_dir: - gpio_free(CSB726_GPIO_MMC_RO); -err_ro_req: -err_det_dir: - gpio_free(CSB726_GPIO_MMC_DETECT); -err_det_req: - return err; -} - -static int csb726_mci_get_ro(struct device *dev) -{ - return gpio_get_value(CSB726_GPIO_MMC_RO); -} - -static void csb726_mci_exit(struct device *dev, void *data) -{ - free_irq(gpio_to_irq(CSB726_GPIO_MMC_DETECT), data); - gpio_free(CSB726_GPIO_MMC_RO); - gpio_free(CSB726_GPIO_MMC_DETECT); } static struct pxamci_platform_data csb726_mci = { - .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34, - .init = csb726_mci_init, - .get_ro = csb726_mci_get_ro, + .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34, + .init = csb726_mci_init, /* FIXME setpower */ - .exit = csb726_mci_exit, + .gpio_card_detect = CSB726_GPIO_MMC_DETECT, + .gpio_card_ro = CSB726_GPIO_MMC_RO, + .gpio_power = -1, }; static struct pxaohci_platform_data csb726_ohci_platform_data = { diff --git a/arch/arm/mach-pxa/devices.c b/arch/arm/mach-pxa/devices.c index ecc08f360b6..46fabe1cca1 100644 --- a/arch/arm/mach-pxa/devices.c +++ b/arch/arm/mach-pxa/devices.c @@ -935,6 +935,33 @@ void __init pxa3xx_set_nand_info(struct pxa3xx_nand_platform_data *info) { pxa_register_device(&pxa3xx_device_nand, info); } + +static struct resource pxa3xx_resources_gcu[] = { + { + .start = 0x54000000, + .end = 0x54000fff, + .flags = IORESOURCE_MEM, + }, + { + .start = IRQ_GCU, + .end = IRQ_GCU, + .flags = IORESOURCE_IRQ, + }, +}; + +static u64 pxa3xx_gcu_dmamask = DMA_BIT_MASK(32); + +struct platform_device pxa3xx_device_gcu = { + .name = "pxa3xx-gcu", + .id = -1, + .num_resources = ARRAY_SIZE(pxa3xx_resources_gcu), + .resource = pxa3xx_resources_gcu, + .dev = { + .dma_mask = &pxa3xx_gcu_dmamask, + .coherent_dma_mask = 0xffffffff, + }, +}; + #endif /* CONFIG_PXA3xx */ /* pxa2xx-spi platform-device ID equals respective SSP platform-device ID + 1. diff --git a/arch/arm/mach-pxa/devices.h b/arch/arm/mach-pxa/devices.h index ecc24a4dca6..93817d99761 100644 --- a/arch/arm/mach-pxa/devices.h +++ b/arch/arm/mach-pxa/devices.h @@ -35,4 +35,6 @@ extern struct platform_device pxa27x_device_pwm1; extern struct platform_device pxa3xx_device_nand; extern struct platform_device pxa3xx_device_i2c_power; +extern struct platform_device pxa3xx_device_gcu; + void __init pxa_register_device(struct platform_device *dev, void *data); diff --git a/arch/arm/mach-pxa/e740.c b/arch/arm/mach-pxa/e740.c index a36fc17f671..49acdfa6650 100644 --- a/arch/arm/mach-pxa/e740.c +++ b/arch/arm/mach-pxa/e740.c @@ -199,7 +199,6 @@ static void __init e740_init(void) platform_add_devices(devices, ARRAY_SIZE(devices)); pxa_set_udc_info(&e7xx_udc_mach_info); pxa_set_ac97_info(NULL); - e7xx_irda_init(); pxa_set_ficp_info(&e7xx_ficp_platform_data); } diff --git a/arch/arm/mach-pxa/e750.c b/arch/arm/mach-pxa/e750.c index 1d00110590e..4052ece3ef4 100644 --- a/arch/arm/mach-pxa/e750.c +++ b/arch/arm/mach-pxa/e750.c @@ -200,7 +200,6 @@ static void __init e750_init(void) platform_add_devices(devices, ARRAY_SIZE(devices)); pxa_set_udc_info(&e7xx_udc_mach_info); pxa_set_ac97_info(NULL); - e7xx_irda_init(); pxa_set_ficp_info(&e7xx_ficp_platform_data); } diff --git a/arch/arm/mach-pxa/em-x270.c b/arch/arm/mach-pxa/em-x270.c index 9cd09465a0e..aec7f4214b1 100644 --- a/arch/arm/mach-pxa/em-x270.c +++ b/arch/arm/mach-pxa/em-x270.c @@ -646,13 +646,16 @@ static int em_x270_mci_get_ro(struct device *dev) } static struct pxamci_platform_data em_x270_mci_platform_data = { - .ocr_mask = MMC_VDD_20_21|MMC_VDD_21_22|MMC_VDD_22_23| - MMC_VDD_24_25|MMC_VDD_25_26|MMC_VDD_26_27| - MMC_VDD_27_28|MMC_VDD_28_29|MMC_VDD_29_30| - MMC_VDD_30_31|MMC_VDD_31_32, - .init = em_x270_mci_init, - .setpower = em_x270_mci_setpower, - .exit = em_x270_mci_exit, + .ocr_mask = MMC_VDD_20_21|MMC_VDD_21_22|MMC_VDD_22_23| + MMC_VDD_24_25|MMC_VDD_25_26|MMC_VDD_26_27| + MMC_VDD_27_28|MMC_VDD_28_29|MMC_VDD_29_30| + MMC_VDD_30_31|MMC_VDD_31_32, + .init = em_x270_mci_init, + .setpower = em_x270_mci_setpower, + .exit = em_x270_mci_exit, + .gpio_card_detect = -1, + .gpio_card_ro = -1, + .gpio_power = -1, }; static void __init em_x270_init_mmc(void) @@ -1022,22 +1025,32 @@ static int em_x270_sensor_power(struct device *dev, int on) return 0; } -static struct soc_camera_link iclink = { - .bus_id = 0, - .power = em_x270_sensor_power, -}; - static struct i2c_board_info em_x270_i2c_cam_info[] = { { I2C_BOARD_INFO("mt9m111", 0x48), + }, +}; + +static struct soc_camera_link iclink = { + .bus_id = 0, + .power = em_x270_sensor_power, + .board_info = &em_x270_i2c_cam_info[0], + .i2c_adapter_id = 0, + .module_name = "mt9m111", +}; + +static struct platform_device em_x270_camera = { + .name = "soc-camera-pdrv", + .id = -1, + .dev = { .platform_data = &iclink, }, }; static void __init em_x270_init_camera(void) { - i2c_register_board_info(0, ARRAY_AND_SIZE(em_x270_i2c_cam_info)); pxa_set_camera_info(&em_x270_camera_platform_data); + platform_device_register(&em_x270_camera); } #else static inline void em_x270_init_camera(void) {} @@ -1103,6 +1116,7 @@ REGULATOR_CONSUMER(ldo5, NULL, "vcc cam"); REGULATOR_CONSUMER(ldo10, &pxa_device_mci.dev, "vcc sdio"); REGULATOR_CONSUMER(ldo12, NULL, "vcc usb"); REGULATOR_CONSUMER(ldo19, &em_x270_gprs_userspace_consumer.dev, "vcc gprs"); +REGULATOR_CONSUMER(buck2, NULL, "vcc_core"); #define REGULATOR_INIT(_ldo, _min_uV, _max_uV, _ops_mask) \ static struct regulator_init_data _ldo##_data = { \ @@ -1125,6 +1139,7 @@ REGULATOR_INIT(ldo10, 2000000, 3200000, REGULATOR_CHANGE_STATUS | REGULATOR_CHANGE_VOLTAGE); REGULATOR_INIT(ldo12, 3000000, 3000000, REGULATOR_CHANGE_STATUS); REGULATOR_INIT(ldo19, 3200000, 3200000, REGULATOR_CHANGE_STATUS); +REGULATOR_INIT(buck2, 1000000, 1650000, REGULATOR_CHANGE_VOLTAGE); struct led_info em_x270_led_info = { .name = "em-x270:orange", @@ -1194,6 +1209,8 @@ struct da903x_subdev_info em_x270_da9030_subdevs[] = { DA9030_LDO(12), DA9030_LDO(19), + DA9030_SUBDEV(regulator, BUCK2, &buck2_data), + DA9030_SUBDEV(led, LED_PC, &em_x270_led_info), DA9030_SUBDEV(backlight, WLED, &em_x270_led_info), DA9030_SUBDEV(battery, BAT, &em_x270_batterty_info), @@ -1245,7 +1262,6 @@ static void __init em_x270_init_i2c(void) static void __init em_x270_module_init(void) { - pr_info("%s\n", __func__); pxa2xx_mfp_config(ARRAY_AND_SIZE(em_x270_pin_config)); mmc_cd = GPIO13_MMC_CD; @@ -1257,7 +1273,6 @@ static void __init em_x270_module_init(void) static void __init em_x270_exeda_init(void) { - pr_info("%s\n", __func__); pxa2xx_mfp_config(ARRAY_AND_SIZE(exeda_pin_config)); mmc_cd = GPIO114_MMC_CD; diff --git a/arch/arm/mach-pxa/eseries.c b/arch/arm/mach-pxa/eseries.c index c60dadf847a..91417f03506 100644 --- a/arch/arm/mach-pxa/eseries.c +++ b/arch/arm/mach-pxa/eseries.c @@ -47,44 +47,9 @@ struct pxa2xx_udc_mach_info e7xx_udc_mach_info = { .gpio_pullup_inverted = 1 }; -static void e7xx_irda_transceiver_mode(struct device *dev, int mode) -{ - if (mode & IR_OFF) { - gpio_set_value(GPIO_E7XX_IR_OFF, 1); - pxa2xx_transceiver_mode(dev, mode); - } else { - pxa2xx_transceiver_mode(dev, mode); - gpio_set_value(GPIO_E7XX_IR_OFF, 0); - } -} - -int e7xx_irda_init(void) -{ - int ret; - - ret = gpio_request(GPIO_E7XX_IR_OFF, "IrDA power"); - if (ret) - goto out; - - ret = gpio_direction_output(GPIO_E7XX_IR_OFF, 0); - if (ret) - goto out; - - e7xx_irda_transceiver_mode(NULL, IR_SIRMODE | IR_OFF); -out: - return ret; -} - -static void e7xx_irda_shutdown(struct device *dev) -{ - e7xx_irda_transceiver_mode(dev, IR_SIRMODE | IR_OFF); - gpio_free(GPIO_E7XX_IR_OFF); -} - struct pxaficp_platform_data e7xx_ficp_platform_data = { - .transceiver_cap = IR_SIRMODE | IR_OFF, - .transceiver_mode = e7xx_irda_transceiver_mode, - .shutdown = e7xx_irda_shutdown, + .gpio_pwdown = GPIO_E7XX_IR_OFF, + .transceiver_cap = IR_SIRMODE | IR_OFF, }; int eseries_tmio_enable(struct platform_device *dev) diff --git a/arch/arm/mach-pxa/gumstix.c b/arch/arm/mach-pxa/gumstix.c index ca9912ea78d..1708c010984 100644 --- a/arch/arm/mach-pxa/gumstix.c +++ b/arch/arm/mach-pxa/gumstix.c @@ -88,7 +88,10 @@ static struct platform_device *devices[] __initdata = { #ifdef CONFIG_MMC_PXA static struct pxamci_platform_data gumstix_mci_platform_data = { - .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34, + .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34, + .gpio_card_detect = -1, + .gpio_card_ro = -1, + .gpio_power = -1, }; static void __init gumstix_mmc_init(void) diff --git a/arch/arm/mach-pxa/hx4700.c b/arch/arm/mach-pxa/hx4700.c index 81359d574f8..abff9e13274 100644 --- a/arch/arm/mach-pxa/hx4700.c +++ b/arch/arm/mach-pxa/hx4700.c @@ -45,6 +45,7 @@ #include <mach/irda.h> #include <mach/pxa2xx_spi.h> +#include <video/platform_lcd.h> #include <video/w100fb.h> #include "devices.h" @@ -174,14 +175,9 @@ static int hx4700_gpio_request(struct gpio_ress *gpios, int size) * IRDA */ -static void irda_transceiver_mode(struct device *dev, int mode) -{ - gpio_set_value(GPIO105_HX4700_nIR_ON, mode & IR_OFF); -} - static struct pxaficp_platform_data ficp_info = { - .transceiver_cap = IR_SIRMODE | IR_OFF, - .transceiver_mode = irda_transceiver_mode, + .gpio_pwdown = GPIO105_HX4700_nIR_ON, + .transceiver_cap = IR_SIRMODE | IR_OFF, }; /* @@ -368,8 +364,6 @@ static struct platform_device egpio = { * LCD - Sony display connected to ATI Imageon w3220 */ -static int lcd_power; - static void sony_lcd_init(void) { gpio_set_value(GPIO84_HX4700_LCD_SQN, 1); @@ -410,35 +404,6 @@ static void sony_lcd_off(void) gpio_set_value(GPIO110_HX4700_LCD_LVDD_3V3_ON, 0); } -static int hx4700_lcd_set_power(struct lcd_device *ldev, int level) -{ - switch (level) { - case FB_BLANK_UNBLANK: - sony_lcd_init(); - break; - case FB_BLANK_NORMAL: - case FB_BLANK_VSYNC_SUSPEND: - case FB_BLANK_HSYNC_SUSPEND: - case FB_BLANK_POWERDOWN: - sony_lcd_off(); - break; - } - lcd_power = level; - return 0; -} - -static int hx4700_lcd_get_power(struct lcd_device *lm) -{ - return lcd_power; -} - -static struct lcd_ops hx4700_lcd_ops = { - .get_power = hx4700_lcd_get_power, - .set_power = hx4700_lcd_set_power, -}; - -static struct lcd_device *hx4700_lcd_device; - #ifdef CONFIG_PM static void w3220_lcd_suspend(struct w100fb_par *wfb) { @@ -573,6 +538,27 @@ static struct platform_device w3220 = { .resource = w3220_resources, }; +static void hx4700_lcd_set_power(struct plat_lcd_data *pd, unsigned int power) +{ + if (power) + sony_lcd_init(); + else + sony_lcd_off(); +} + +static struct plat_lcd_data hx4700_lcd_data = { + .set_power = hx4700_lcd_set_power, +}; + +static struct platform_device hx4700_lcd = { + .name = "platform-lcd", + .id = -1, + .dev = { + .platform_data = &hx4700_lcd_data, + .parent = &w3220.dev, + }, +}; + /* * Backlight */ @@ -872,9 +858,6 @@ static void __init hx4700_init(void) pxa2xx_set_spi_info(2, &pxa_ssp2_master_info); spi_register_board_info(ARRAY_AND_SIZE(tsc2046_board_info)); - hx4700_lcd_device = lcd_device_register("w100fb", NULL, - (void *)&w3220_info, &hx4700_lcd_ops); - gpio_set_value(GPIO71_HX4700_ASIC3_nRESET, 0); mdelay(10); gpio_set_value(GPIO71_HX4700_ASIC3_nRESET, 1); diff --git a/arch/arm/mach-pxa/idp.c b/arch/arm/mach-pxa/idp.c index b6243b59d9b..b6486ef20b1 100644 --- a/arch/arm/mach-pxa/idp.c +++ b/arch/arm/mach-pxa/idp.c @@ -168,7 +168,10 @@ static struct pxafb_mach_info sharp_lm8v31 = { }; static struct pxamci_platform_data idp_mci_platform_data = { - .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34, + .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34, + .gpio_card_detect = -1, + .gpio_card_ro = -1, + .gpio_power = -1, }; static void __init idp_init(void) diff --git a/arch/arm/mach-pxa/imote2.c b/arch/arm/mach-pxa/imote2.c index 961807dc646..2a4945db31c 100644 --- a/arch/arm/mach-pxa/imote2.c +++ b/arch/arm/mach-pxa/imote2.c @@ -389,6 +389,9 @@ static int imote2_mci_get_ro(struct device *dev) static struct pxamci_platform_data imote2_mci_platform_data = { .ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34, /* default anyway */ .get_ro = imote2_mci_get_ro, + .gpio_card_detect = -1, + .gpio_card_ro = -1, + .gpio_power = -1, }; static struct mtd_partition imote2flash_partitions[] = { diff --git a/arch/arm/mach-pxa/include/mach/balloon3.h b/arch/arm/mach-pxa/include/mach/balloon3.h new file mode 100644 index 00000000000..bfec09b1814 --- /dev/null +++ b/arch/arm/mach-pxa/include/mach/balloon3.h @@ -0,0 +1,134 @@ +/* + * linux/include/asm-arm/arch-pxa/balloon3.h + * + * Authors: Nick Bane and Wookey + * Created: Oct, 2005 + * Copyright: Toby Churchill Ltd + * Cribbed from mainstone.c, by Nicholas Pitre + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef ASM_ARCH_BALLOON3_H +#define ASM_ARCH_BALLOON3_H + +enum balloon3_features { + BALLOON3_FEATURE_OHCI, + BALLOON3_FEATURE_MMC, + BALLOON3_FEATURE_CF, + BALLOON3_FEATURE_AUDIO, + BALLOON3_FEATURE_TOPPOLY, +}; + +#define BALLOON3_FPGA_PHYS PXA_CS4_PHYS +#define BALLOON3_FPGA_VIRT (0xf1000000) /* as per balloon2 */ +#define BALLOON3_FPGA_LENGTH 0x01000000 + +/* FPGA/CPLD registers */ +#define BALLOON3_PCMCIA0_REG (BALLOON3_FPGA_VIRT + 0x00e00008) +/* fixme - same for now */ +#define BALLOON3_PCMCIA1_REG (BALLOON3_FPGA_VIRT + 0x00e00008) +#define BALLOON3_NANDIO_IO_REG (BALLOON3_FPGA_VIRT + 0x00e00000) +/* fpga/cpld interrupt control register */ +#define BALLOON3_INT_CONTROL_REG (BALLOON3_FPGA_VIRT + 0x00e0000C) +#define BALLOON3_NANDIO_CTL2_REG (BALLOON3_FPGA_VIRT + 0x00e00010) +#define BALLOON3_NANDIO_CTL_REG (BALLOON3_FPGA_VIRT + 0x00e00014) +#define BALLOON3_VERSION_REG (BALLOON3_FPGA_VIRT + 0x00e0001c) + +#define BALLOON3_SAMOSA_ADDR_REG (BALLOON3_FPGA_VIRT + 0x00c00000) +#define BALLOON3_SAMOSA_DATA_REG (BALLOON3_FPGA_VIRT + 0x00c00004) +#define BALLOON3_SAMOSA_STATUS_REG (BALLOON3_FPGA_VIRT + 0x00c0001c) + +/* GPIOs for irqs */ +#define BALLOON3_GPIO_AUX_NIRQ (94) +#define BALLOON3_GPIO_CODEC_IRQ (95) + +/* Timer and Idle LED locations */ +#define BALLOON3_GPIO_LED_NAND (9) +#define BALLOON3_GPIO_LED_IDLE (10) + +/* backlight control */ +#define BALLOON3_GPIO_RUN_BACKLIGHT (99) + +#define BALLOON3_GPIO_S0_CD (105) + +/* FPGA Interrupt Mask/Acknowledge Register */ +#define BALLOON3_INT_S0_IRQ (1 << 0) /* PCMCIA 0 IRQ */ +#define BALLOON3_INT_S0_STSCHG (1 << 1) /* PCMCIA 0 status changed */ + +/* CF Status Register */ +#define BALLOON3_PCMCIA_nIRQ (1 << 0) /* IRQ / ready signal */ +#define BALLOON3_PCMCIA_nSTSCHG_BVD1 (1 << 1) + /* VDD sense / card status changed */ + +/* CF control register (write) */ +#define BALLOON3_PCMCIA_RESET (1 << 0) /* Card reset signal */ +#define BALLOON3_PCMCIA_ENABLE (1 << 1) +#define BALLOON3_PCMCIA_ADD_ENABLE (1 << 2) + +/* CPLD (and FPGA) interface definitions */ +#define CPLD_LCD0_DATA_SET 0x00 +#define CPLD_LCD0_DATA_CLR 0x10 +#define CPLD_LCD0_COMMAND_SET 0x01 +#define CPLD_LCD0_COMMAND_CLR 0x11 +#define CPLD_LCD1_DATA_SET 0x02 +#define CPLD_LCD1_DATA_CLR 0x12 +#define CPLD_LCD1_COMMAND_SET 0x03 +#define CPLD_LCD1_COMMAND_CLR 0x13 + +#define CPLD_MISC_SET 0x07 +#define CPLD_MISC_CLR 0x17 +#define CPLD_MISC_LOON_NRESET_BIT 0 +#define CPLD_MISC_LOON_UNSUSP_BIT 1 +#define CPLD_MISC_RUN_5V_BIT 2 +#define CPLD_MISC_CHG_D0_BIT 3 +#define CPLD_MISC_CHG_D1_BIT 4 +#define CPLD_MISC_DAC_NCS_BIT 5 + +#define CPLD_LCD_SET 0x08 +#define CPLD_LCD_CLR 0x18 +#define CPLD_LCD_BACKLIGHT_EN_0_BIT 0 +#define CPLD_LCD_BACKLIGHT_EN_1_BIT 1 +#define CPLD_LCD_LED_RED_BIT 4 +#define CPLD_LCD_LED_GREEN_BIT 5 +#define CPLD_LCD_NRESET_BIT 7 + +#define CPLD_LCD_RO_SET 0x09 +#define CPLD_LCD_RO_CLR 0x19 +#define CPLD_LCD_RO_LCD0_nWAIT_BIT 0 +#define CPLD_LCD_RO_LCD1_nWAIT_BIT 1 + +#define CPLD_SERIAL_SET 0x0a +#define CPLD_SERIAL_CLR 0x1a +#define CPLD_SERIAL_GSM_RI_BIT 0 +#define CPLD_SERIAL_GSM_CTS_BIT 1 +#define CPLD_SERIAL_GSM_DTR_BIT 2 +#define CPLD_SERIAL_LPR_CTS_BIT 3 +#define CPLD_SERIAL_TC232_CTS_BIT 4 +#define CPLD_SERIAL_TC232_DSR_BIT 5 + +#define CPLD_SROUTING_SET 0x0b +#define CPLD_SROUTING_CLR 0x1b +#define CPLD_SROUTING_MSP430_LPR 0 +#define CPLD_SROUTING_MSP430_TC232 1 +#define CPLD_SROUTING_MSP430_GSM 2 +#define CPLD_SROUTING_LOON_LPR (0 << 4) +#define CPLD_SROUTING_LOON_TC232 (1 << 4) +#define CPLD_SROUTING_LOON_GSM (2 << 4) + +#define CPLD_AROUTING_SET 0x0c +#define CPLD_AROUTING_CLR 0x1c +#define CPLD_AROUTING_MIC2PHONE_BIT 0 +#define CPLD_AROUTING_PHONE2INT_BIT 1 +#define CPLD_AROUTING_PHONE2EXT_BIT 2 +#define CPLD_AROUTING_LOONL2INT_BIT 3 +#define CPLD_AROUTING_LOONL2EXT_BIT 4 +#define CPLD_AROUTING_LOONR2PHONE_BIT 5 +#define CPLD_AROUTING_LOONR2INT_BIT 6 +#define CPLD_AROUTING_LOONR2EXT_BIT 7 + +extern int balloon3_has(enum balloon3_features feature); + +#endif diff --git a/arch/arm/mach-pxa/include/mach/colibri.h b/arch/arm/mach-pxa/include/mach/colibri.h index a88d7caff0d..811743c5614 100644 --- a/arch/arm/mach-pxa/include/mach/colibri.h +++ b/arch/arm/mach-pxa/include/mach/colibri.h @@ -23,6 +23,12 @@ static inline void colibri_pxa3xx_init_lcd(int bl_pin) {} extern void colibri_pxa3xx_init_eth(struct ax_plat_data *plat_data); #endif +#if defined(CONFIG_MTD_NAND_PXA3xx) || defined(CONFIG_MTD_NAND_PXA3xx_MODULE) +extern void colibri_pxa3xx_init_nand(void); +#else +static inline void colibri_pxa3xx_init_nand(void) {} +#endif + /* physical memory regions */ #define COLIBRI_SDRAM_BASE 0xa0000000 /* SDRAM region */ diff --git a/arch/arm/mach-pxa/include/mach/entry-macro.S b/arch/arm/mach-pxa/include/mach/entry-macro.S index f6b4bf3e73d..241880608ac 100644 --- a/arch/arm/mach-pxa/include/mach/entry-macro.S +++ b/arch/arm/mach-pxa/include/mach/entry-macro.S @@ -24,34 +24,27 @@ mov \tmp, \tmp, lsr #13 and \tmp, \tmp, #0x7 @ Core G cmp \tmp, #1 - bhi 1004f + bhi 1002f + @ Core Generation 1 (PXA25x) mov \base, #io_p2v(0x40000000) @ IIR Ctl = 0x40d00000 add \base, \base, #0x00d00000 ldr \irqstat, [\base, #0] @ ICIP ldr \irqnr, [\base, #4] @ ICMR - b 1002f -1004: - mrc p6, 0, \irqstat, c6, c0, 0 @ ICIP2 - mrc p6, 0, \irqnr, c7, c0, 0 @ ICMR2 ands \irqnr, \irqstat, \irqnr - beq 1003f + beq 1001f rsb \irqstat, \irqnr, #0 and \irqstat, \irqstat, \irqnr clz \irqnr, \irqstat - rsb \irqnr, \irqnr, #31 - add \irqnr, \irqnr, #(32 + PXA_IRQ(0)) + rsb \irqnr, \irqnr, #(31 + PXA_IRQ(0)) b 1001f -1003: - mrc p6, 0, \irqstat, c0, c0, 0 @ ICIP - mrc p6, 0, \irqnr, c1, c0, 0 @ ICMR 1002: - ands \irqnr, \irqstat, \irqnr + @ Core Generation 2 (PXA27x) or Core Generation 3 (PXA3xx) + mrc p6, 0, \irqstat, c5, c0, 0 @ ICHP + tst \irqstat, #0x80000000 beq 1001f - rsb \irqstat, \irqnr, #0 - and \irqstat, \irqstat, \irqnr - clz \irqnr, \irqstat - rsb \irqnr, \irqnr, #(31 + PXA_IRQ(0)) + bic \irqstat, \irqstat, #0x80000000 + mov \irqnr, \irqstat, lsr #16 1001: .endm diff --git a/arch/arm/mach-pxa/include/mach/hardware.h b/arch/arm/mach-pxa/include/mach/hardware.h index 16ab79547da..aa3d9f70a08 100644 --- a/arch/arm/mach-pxa/include/mach/hardware.h +++ b/arch/arm/mach-pxa/include/mach/hardware.h @@ -197,6 +197,16 @@ #define __cpu_is_pxa935(id) (0) #endif +#ifdef CONFIG_CPU_PXA950 +#define __cpu_is_pxa950(id) \ + ({ \ + unsigned int _id = (id) >> 4 & 0xfff; \ + id == 0x697; \ + }) +#else +#define __cpu_is_pxa950(id) (0) +#endif + #define cpu_is_pxa210() \ ({ \ __cpu_is_pxa210(read_cpuid_id()); \ @@ -249,6 +259,13 @@ __cpu_is_pxa935(id); \ }) +#define cpu_is_pxa950() \ + ({ \ + unsigned int id = read_cpuid(CPUID_ID); \ + __cpu_is_pxa950(id); \ + }) + + /* * CPUID Core Generation Bit * <= 0x2 for pxa21x/pxa25x/pxa26x/pxa27x diff --git a/arch/arm/mach-pxa/include/mach/irda.h b/arch/arm/mach-pxa/include/mach/irda.h index 0a50c3c763d..3cd41f77dda 100644 --- a/arch/arm/mach-pxa/include/mach/irda.h +++ b/arch/arm/mach-pxa/include/mach/irda.h @@ -12,6 +12,8 @@ struct pxaficp_platform_data { void (*transceiver_mode)(struct device *dev, int mode); int (*startup)(struct device *dev); void (*shutdown)(struct device *dev); + int gpio_pwdown; /* powerdown GPIO for the IrDA chip */ + bool gpio_pwdown_inverted; /* gpio_pwdown is inverted */ }; extern void pxa_set_ficp_info(struct pxaficp_platform_data *info); diff --git a/arch/arm/mach-pxa/include/mach/irqs.h b/arch/arm/mach-pxa/include/mach/irqs.h index 6a1d9599334..3677a9af9c8 100644 --- a/arch/arm/mach-pxa/include/mach/irqs.h +++ b/arch/arm/mach-pxa/include/mach/irqs.h @@ -68,9 +68,10 @@ #ifdef CONFIG_PXA3xx #define IRQ_SSP4 PXA_IRQ(13) /* SSP4 service request */ #define IRQ_CIR PXA_IRQ(34) /* Consumer IR */ +#define IRQ_COMM_WDT PXA_IRQ(35) /* Comm WDT interrupt */ #define IRQ_TSI PXA_IRQ(36) /* Touch Screen Interface (PXA320) */ #define IRQ_USIM2 PXA_IRQ(38) /* USIM2 Controller */ -#define IRQ_GRPHICS PXA_IRQ(39) /* Graphics Controller */ +#define IRQ_GCU PXA_IRQ(39) /* Graphics Controller */ #define IRQ_MMC2 PXA_IRQ(41) /* MMC2 Controller */ #define IRQ_1WIRE PXA_IRQ(44) /* 1-Wire Controller */ #define IRQ_NAND PXA_IRQ(45) /* NAND Controller */ @@ -81,8 +82,31 @@ #define IRQ_MMC3 PXA_IRQ(55) /* MMC3 Controller (PXA310) */ #endif -#define PXA_GPIO_IRQ_BASE PXA_IRQ(64) -#define PXA_GPIO_IRQ_NUM (128) +#ifdef CONFIG_CPU_PXA935 +#define IRQ_U2O PXA_IRQ(64) /* USB OTG 2.0 Controller (PXA935) */ +#define IRQ_U2H PXA_IRQ(65) /* USB Host 2.0 Controller (PXA935) */ + +#define IRQ_MMC3_PXA935 PXA_IRQ(72) /* MMC3 Controller (PXA935) */ +#define IRQ_MMC4_PXA935 PXA_IRQ(73) /* MMC4 Controller (PXA935) */ +#define IRQ_MMC5_PXA935 PXA_IRQ(74) /* MMC5 Controller (PXA935) */ + +#define IRQ_U2P PXA_IRQ(93) /* USB PHY D+/D- Lines (PXA935) */ +#endif + +#ifdef CONFIG_CPU_PXA930 +#define IRQ_ENHROT PXA_IRQ(37) /* Enhanced Rotary (PXA930) */ +#define IRQ_ACIPC0 PXA_IRQ(5) +#define IRQ_ACIPC1 PXA_IRQ(40) +#define IRQ_ACIPC2 PXA_IRQ(19) +#define IRQ_TRKBALL PXA_IRQ(43) /* Track Ball */ +#endif + +#ifdef CONFIG_CPU_PXA950 +#define IRQ_GC500 PXA_IRQ(70) /* Graphics Controller (PXA950) */ +#endif + +#define PXA_GPIO_IRQ_BASE PXA_IRQ(96) +#define PXA_GPIO_IRQ_NUM (192) #define GPIO_2_x_TO_IRQ(x) (PXA_GPIO_IRQ_BASE + (x)) #define IRQ_GPIO(x) (((x) < 2) ? (IRQ_GPIO0 + (x)) : GPIO_2_x_TO_IRQ(x)) @@ -105,6 +129,8 @@ #define IRQ_BOARD_END (IRQ_BOARD_START + 70) #elif defined(CONFIG_MACH_ZYLONITE) #define IRQ_BOARD_END (IRQ_BOARD_START + 32) +#elif defined(CONFIG_PXA_EZX) +#define IRQ_BOARD_END (IRQ_BOARD_START + 23) #else #define IRQ_BOARD_END (IRQ_BOARD_START + 16) #endif @@ -237,6 +263,16 @@ #define MAINSTONE_S1_STSCHG_IRQ MAINSTONE_IRQ(14) #define MAINSTONE_S1_IRQ MAINSTONE_IRQ(15) +/* Balloon3 Interrupts */ +#define BALLOON3_IRQ(x) (IRQ_BOARD_START + (x)) + +#define BALLOON3_BP_CF_NRDY_IRQ BALLOON3_IRQ(0) +#define BALLOON3_BP_NSTSCHG_IRQ BALLOON3_IRQ(1) + +#define BALLOON3_AUX_NIRQ IRQ_GPIO(BALLOON3_GPIO_AUX_NIRQ) +#define BALLOON3_CODEC_IRQ IRQ_GPIO(BALLOON3_GPIO_CODEC_IRQ) +#define BALLOON3_S0_CD_IRQ IRQ_GPIO(BALLOON3_GPIO_S0_CD) + /* LoCoMo Interrupts (CONFIG_SHARP_LOCOMO) */ #define IRQ_LOCOMO_KEY_BASE (IRQ_BOARD_START + 0) #define IRQ_LOCOMO_GPIO_BASE (IRQ_BOARD_START + 1) diff --git a/arch/arm/mach-pxa/include/mach/mfp.h b/arch/arm/mach-pxa/include/mach/mfp.h index 482185053a9..271e249ae34 100644 --- a/arch/arm/mach-pxa/include/mach/mfp.h +++ b/arch/arm/mach-pxa/include/mach/mfp.h @@ -16,305 +16,6 @@ #ifndef __ASM_ARCH_MFP_H #define __ASM_ARCH_MFP_H -#define mfp_to_gpio(m) ((m) % 128) - -/* list of all the configurable MFP pins */ -enum { - MFP_PIN_INVALID = -1, - - MFP_PIN_GPIO0 = 0, - MFP_PIN_GPIO1, - MFP_PIN_GPIO2, - MFP_PIN_GPIO3, - MFP_PIN_GPIO4, - MFP_PIN_GPIO5, - MFP_PIN_GPIO6, - MFP_PIN_GPIO7, - MFP_PIN_GPIO8, - MFP_PIN_GPIO9, - MFP_PIN_GPIO10, - MFP_PIN_GPIO11, - MFP_PIN_GPIO12, - MFP_PIN_GPIO13, - MFP_PIN_GPIO14, - MFP_PIN_GPIO15, - MFP_PIN_GPIO16, - MFP_PIN_GPIO17, - MFP_PIN_GPIO18, - MFP_PIN_GPIO19, - MFP_PIN_GPIO20, - MFP_PIN_GPIO21, - MFP_PIN_GPIO22, - MFP_PIN_GPIO23, - MFP_PIN_GPIO24, - MFP_PIN_GPIO25, - MFP_PIN_GPIO26, - MFP_PIN_GPIO27, - MFP_PIN_GPIO28, - MFP_PIN_GPIO29, - MFP_PIN_GPIO30, - MFP_PIN_GPIO31, - MFP_PIN_GPIO32, - MFP_PIN_GPIO33, - MFP_PIN_GPIO34, - MFP_PIN_GPIO35, - MFP_PIN_GPIO36, - MFP_PIN_GPIO37, - MFP_PIN_GPIO38, - MFP_PIN_GPIO39, - MFP_PIN_GPIO40, - MFP_PIN_GPIO41, - MFP_PIN_GPIO42, - MFP_PIN_GPIO43, - MFP_PIN_GPIO44, - MFP_PIN_GPIO45, - MFP_PIN_GPIO46, - MFP_PIN_GPIO47, - MFP_PIN_GPIO48, - MFP_PIN_GPIO49, - MFP_PIN_GPIO50, - MFP_PIN_GPIO51, - MFP_PIN_GPIO52, - MFP_PIN_GPIO53, - MFP_PIN_GPIO54, - MFP_PIN_GPIO55, - MFP_PIN_GPIO56, - MFP_PIN_GPIO57, - MFP_PIN_GPIO58, - MFP_PIN_GPIO59, - MFP_PIN_GPIO60, - MFP_PIN_GPIO61, - MFP_PIN_GPIO62, - MFP_PIN_GPIO63, - MFP_PIN_GPIO64, - MFP_PIN_GPIO65, - MFP_PIN_GPIO66, - MFP_PIN_GPIO67, - MFP_PIN_GPIO68, - MFP_PIN_GPIO69, - MFP_PIN_GPIO70, - MFP_PIN_GPIO71, - MFP_PIN_GPIO72, - MFP_PIN_GPIO73, - MFP_PIN_GPIO74, - MFP_PIN_GPIO75, - MFP_PIN_GPIO76, - MFP_PIN_GPIO77, - MFP_PIN_GPIO78, - MFP_PIN_GPIO79, - MFP_PIN_GPIO80, - MFP_PIN_GPIO81, - MFP_PIN_GPIO82, - MFP_PIN_GPIO83, - MFP_PIN_GPIO84, - MFP_PIN_GPIO85, - MFP_PIN_GPIO86, - MFP_PIN_GPIO87, - MFP_PIN_GPIO88, - MFP_PIN_GPIO89, - MFP_PIN_GPIO90, - MFP_PIN_GPIO91, - MFP_PIN_GPIO92, - MFP_PIN_GPIO93, - MFP_PIN_GPIO94, - MFP_PIN_GPIO95, - MFP_PIN_GPIO96, - MFP_PIN_GPIO97, - MFP_PIN_GPIO98, - MFP_PIN_GPIO99, - MFP_PIN_GPIO100, - MFP_PIN_GPIO101, - MFP_PIN_GPIO102, - MFP_PIN_GPIO103, - MFP_PIN_GPIO104, - MFP_PIN_GPIO105, - MFP_PIN_GPIO106, - MFP_PIN_GPIO107, - MFP_PIN_GPIO108, - MFP_PIN_GPIO109, - MFP_PIN_GPIO110, - MFP_PIN_GPIO111, - MFP_PIN_GPIO112, - MFP_PIN_GPIO113, - MFP_PIN_GPIO114, - MFP_PIN_GPIO115, - MFP_PIN_GPIO116, - MFP_PIN_GPIO117, - MFP_PIN_GPIO118, - MFP_PIN_GPIO119, - MFP_PIN_GPIO120, - MFP_PIN_GPIO121, - MFP_PIN_GPIO122, - MFP_PIN_GPIO123, - MFP_PIN_GPIO124, - MFP_PIN_GPIO125, - MFP_PIN_GPIO126, - MFP_PIN_GPIO127, - MFP_PIN_GPIO0_2, - MFP_PIN_GPIO1_2, - MFP_PIN_GPIO2_2, - MFP_PIN_GPIO3_2, - MFP_PIN_GPIO4_2, - MFP_PIN_GPIO5_2, - MFP_PIN_GPIO6_2, - MFP_PIN_GPIO7_2, - MFP_PIN_GPIO8_2, - MFP_PIN_GPIO9_2, - MFP_PIN_GPIO10_2, - MFP_PIN_GPIO11_2, - MFP_PIN_GPIO12_2, - MFP_PIN_GPIO13_2, - MFP_PIN_GPIO14_2, - MFP_PIN_GPIO15_2, - MFP_PIN_GPIO16_2, - MFP_PIN_GPIO17_2, - - MFP_PIN_ULPI_STP, - MFP_PIN_ULPI_NXT, - MFP_PIN_ULPI_DIR, - - MFP_PIN_nXCVREN, - MFP_PIN_DF_CLE_nOE, - MFP_PIN_DF_nADV1_ALE, - MFP_PIN_DF_SCLK_E, - MFP_PIN_DF_SCLK_S, - MFP_PIN_nBE0, - MFP_PIN_nBE1, - MFP_PIN_DF_nADV2_ALE, - MFP_PIN_DF_INT_RnB, - MFP_PIN_DF_nCS0, - MFP_PIN_DF_nCS1, - MFP_PIN_nLUA, - MFP_PIN_nLLA, - MFP_PIN_DF_nWE, - MFP_PIN_DF_ALE_nWE, - MFP_PIN_DF_nRE_nOE, - MFP_PIN_DF_ADDR0, - MFP_PIN_DF_ADDR1, - MFP_PIN_DF_ADDR2, - MFP_PIN_DF_ADDR3, - MFP_PIN_DF_IO0, - MFP_PIN_DF_IO1, - MFP_PIN_DF_IO2, - MFP_PIN_DF_IO3, - MFP_PIN_DF_IO4, - MFP_PIN_DF_IO5, - MFP_PIN_DF_IO6, - MFP_PIN_DF_IO7, - MFP_PIN_DF_IO8, - MFP_PIN_DF_IO9, - MFP_PIN_DF_IO10, - MFP_PIN_DF_IO11, - MFP_PIN_DF_IO12, - MFP_PIN_DF_IO13, - MFP_PIN_DF_IO14, - MFP_PIN_DF_IO15, - - /* additional pins on PXA930 */ - MFP_PIN_GSIM_UIO, - MFP_PIN_GSIM_UCLK, - MFP_PIN_GSIM_UDET, - MFP_PIN_GSIM_nURST, - MFP_PIN_PMIC_INT, - MFP_PIN_RDY, - - MFP_PIN_MAX, -}; - -/* - * a possible MFP configuration is represented by a 32-bit integer - * - * bit 0.. 9 - MFP Pin Number (1024 Pins Maximum) - * bit 10..12 - Alternate Function Selection - * bit 13..15 - Drive Strength - * bit 16..18 - Low Power Mode State - * bit 19..20 - Low Power Mode Edge Detection - * bit 21..22 - Run Mode Pull State - * - * to facilitate the definition, the following macros are provided - * - * MFP_CFG_DEFAULT - default MFP configuration value, with - * alternate function = 0, - * drive strength = fast 3mA (MFP_DS03X) - * low power mode = default - * edge detection = none - * - * MFP_CFG - default MFPR value with alternate function - * MFP_CFG_DRV - default MFPR value with alternate function and - * pin drive strength - * MFP_CFG_LPM - default MFPR value with alternate function and - * low power mode - * MFP_CFG_X - default MFPR value with alternate function, - * pin drive strength and low power mode - */ - -typedef unsigned long mfp_cfg_t; - -#define MFP_PIN(x) ((x) & 0x3ff) - -#define MFP_AF0 (0x0 << 10) -#define MFP_AF1 (0x1 << 10) -#define MFP_AF2 (0x2 << 10) -#define MFP_AF3 (0x3 << 10) -#define MFP_AF4 (0x4 << 10) -#define MFP_AF5 (0x5 << 10) -#define MFP_AF6 (0x6 << 10) -#define MFP_AF7 (0x7 << 10) -#define MFP_AF_MASK (0x7 << 10) -#define MFP_AF(x) (((x) >> 10) & 0x7) - -#define MFP_DS01X (0x0 << 13) -#define MFP_DS02X (0x1 << 13) -#define MFP_DS03X (0x2 << 13) -#define MFP_DS04X (0x3 << 13) -#define MFP_DS06X (0x4 << 13) -#define MFP_DS08X (0x5 << 13) -#define MFP_DS10X (0x6 << 13) -#define MFP_DS13X (0x7 << 13) -#define MFP_DS_MASK (0x7 << 13) -#define MFP_DS(x) (((x) >> 13) & 0x7) - -#define MFP_LPM_DEFAULT (0x0 << 16) -#define MFP_LPM_DRIVE_LOW (0x1 << 16) -#define MFP_LPM_DRIVE_HIGH (0x2 << 16) -#define MFP_LPM_PULL_LOW (0x3 << 16) -#define MFP_LPM_PULL_HIGH (0x4 << 16) -#define MFP_LPM_FLOAT (0x5 << 16) -#define MFP_LPM_INPUT (0x6 << 16) -#define MFP_LPM_STATE_MASK (0x7 << 16) -#define MFP_LPM_STATE(x) (((x) >> 16) & 0x7) - -#define MFP_LPM_EDGE_NONE (0x0 << 19) -#define MFP_LPM_EDGE_RISE (0x1 << 19) -#define MFP_LPM_EDGE_FALL (0x2 << 19) -#define MFP_LPM_EDGE_BOTH (0x3 << 19) -#define MFP_LPM_EDGE_MASK (0x3 << 19) -#define MFP_LPM_EDGE(x) (((x) >> 19) & 0x3) - -#define MFP_PULL_NONE (0x0 << 21) -#define MFP_PULL_LOW (0x1 << 21) -#define MFP_PULL_HIGH (0x2 << 21) -#define MFP_PULL_BOTH (0x3 << 21) -#define MFP_PULL_MASK (0x3 << 21) -#define MFP_PULL(x) (((x) >> 21) & 0x3) - -#define MFP_CFG_DEFAULT (MFP_AF0 | MFP_DS03X | MFP_LPM_DEFAULT |\ - MFP_LPM_EDGE_NONE | MFP_PULL_NONE) - -#define MFP_CFG(pin, af) \ - ((MFP_CFG_DEFAULT & ~MFP_AF_MASK) |\ - (MFP_PIN(MFP_PIN_##pin) | MFP_##af)) - -#define MFP_CFG_DRV(pin, af, drv) \ - ((MFP_CFG_DEFAULT & ~(MFP_AF_MASK | MFP_DS_MASK)) |\ - (MFP_PIN(MFP_PIN_##pin) | MFP_##af | MFP_##drv)) - -#define MFP_CFG_LPM(pin, af, lpm) \ - ((MFP_CFG_DEFAULT & ~(MFP_AF_MASK | MFP_LPM_STATE_MASK)) |\ - (MFP_PIN(MFP_PIN_##pin) | MFP_##af | MFP_LPM_##lpm)) - -#define MFP_CFG_X(pin, af, drv, lpm) \ - ((MFP_CFG_DEFAULT & ~(MFP_AF_MASK | MFP_DS_MASK | MFP_LPM_STATE_MASK)) |\ - (MFP_PIN(MFP_PIN_##pin) | MFP_##af | MFP_##drv | MFP_LPM_##lpm)) +#include <plat/mfp.h> #endif /* __ASM_ARCH_MFP_H */ diff --git a/arch/arm/mach-pxa/include/mach/mmc.h b/arch/arm/mach-pxa/include/mach/mmc.h index 6d1304c9270..02a69dc2ee6 100644 --- a/arch/arm/mach-pxa/include/mach/mmc.h +++ b/arch/arm/mach-pxa/include/mach/mmc.h @@ -14,6 +14,11 @@ struct pxamci_platform_data { int (*get_ro)(struct device *); void (*setpower)(struct device *, unsigned int); void (*exit)(struct device *, void *); + int gpio_card_detect; /* gpio detecting card insertion */ + int gpio_card_ro; /* gpio detecting read only toggle */ + bool gpio_card_ro_invert; /* gpio ro is inverted */ + int gpio_power; /* gpio powering up MMC bus */ + bool gpio_power_invert; /* gpio power is inverted */ }; extern void pxa_set_mci_info(struct pxamci_platform_data *info); diff --git a/arch/arm/mach-pxa/include/mach/palmtc.h b/arch/arm/mach-pxa/include/mach/palmtc.h new file mode 100644 index 00000000000..3dc9b074ab4 --- /dev/null +++ b/arch/arm/mach-pxa/include/mach/palmtc.h @@ -0,0 +1,86 @@ +/* + * linux/include/asm-arm/arch-pxa/palmtc-gpio.h + * + * GPIOs and interrupts for Palm Tungsten|C Handheld Computer + * + * Authors: Alex Osborne <bobofdoom@gmail.com> + * Marek Vasut <marek.vasut@gmail.com> + * Holger Bocklet <bitz.email@gmx.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#ifndef _INCLUDE_PALMTC_H_ +#define _INCLUDE_PALMTC_H_ + +/** HERE ARE GPIOs **/ + +/* GPIOs */ +#define GPIO_NR_PALMTC_EARPHONE_DETECT 2 +#define GPIO_NR_PALMTC_CRADLE_DETECT 5 +#define GPIO_NR_PALMTC_HOTSYNC_BUTTON 7 + +/* SD/MMC */ +#define GPIO_NR_PALMTC_SD_DETECT_N 12 +#define GPIO_NR_PALMTC_SD_POWER 32 +#define GPIO_NR_PALMTC_SD_READONLY 54 + +/* WLAN */ +#define GPIO_NR_PALMTC_PCMCIA_READY 13 +#define GPIO_NR_PALMTC_PCMCIA_PWRREADY 14 +#define GPIO_NR_PALMTC_PCMCIA_POWER1 15 +#define GPIO_NR_PALMTC_PCMCIA_POWER2 33 +#define GPIO_NR_PALMTC_PCMCIA_POWER3 55 +#define GPIO_NR_PALMTC_PCMCIA_RESET 78 + +/* UDC */ +#define GPIO_NR_PALMTC_USB_DETECT_N 4 +#define GPIO_NR_PALMTC_USB_POWER 36 + +/* LCD/BACKLIGHT */ +#define GPIO_NR_PALMTC_BL_POWER 16 +#define GPIO_NR_PALMTC_LCD_POWER 44 +#define GPIO_NR_PALMTC_LCD_BLANK 38 + +/* UART */ +#define GPIO_NR_PALMTC_RS232_POWER 37 + +/* IRDA */ +#define GPIO_NR_PALMTC_IR_DISABLE 45 + +/* IRQs */ +#define IRQ_GPIO_PALMTC_SD_DETECT_N IRQ_GPIO(GPIO_NR_PALMTC_SD_DETECT_N) +#define IRQ_GPIO_PALMTC_WLAN_READY IRQ_GPIO(GPIO_NR_PALMTC_WLAN_READY) + +/* UCB1400 GPIOs */ +#define GPIO_NR_PALMTC_POWER_DETECT (0x80 | 0x00) +#define GPIO_NR_PALMTC_HEADPHONE_DETECT (0x80 | 0x01) +#define GPIO_NR_PALMTC_SPEAKER_ENABLE (0x80 | 0x03) +#define GPIO_NR_PALMTC_VIBRA_POWER (0x80 | 0x05) +#define GPIO_NR_PALMTC_LED_POWER (0x80 | 0x07) + +/** HERE ARE INIT VALUES **/ +#define PALMTC_UCB1400_GPIO_OFFSET 0x80 + +/* BATTERY */ +#define PALMTC_BAT_MAX_VOLTAGE 4000 /* 4.00V maximum voltage */ +#define PALMTC_BAT_MIN_VOLTAGE 3550 /* 3.55V critical voltage */ +#define PALMTC_BAT_MAX_CURRENT 0 /* unknokn */ +#define PALMTC_BAT_MIN_CURRENT 0 /* unknown */ +#define PALMTC_BAT_MAX_CHARGE 1 /* unknown */ +#define PALMTC_BAT_MIN_CHARGE 1 /* unknown */ +#define PALMTC_MAX_LIFE_MINS 240 /* on-life in minutes */ + +#define PALMTC_BAT_MEASURE_DELAY (HZ * 1) + +/* BACKLIGHT */ +#define PALMTC_MAX_INTENSITY 0xFE +#define PALMTC_DEFAULT_INTENSITY 0x7E +#define PALMTC_LIMIT_MASK 0x7F +#define PALMTC_PRESCALER 0x3F +#define PALMTC_PERIOD_NS 3500 + +#endif diff --git a/arch/arm/mach-pxa/include/mach/palmtx.h b/arch/arm/mach-pxa/include/mach/palmtx.h index e74082c872e..1be0db6ed55 100644 --- a/arch/arm/mach-pxa/include/mach/palmtx.h +++ b/arch/arm/mach-pxa/include/mach/palmtx.h @@ -82,6 +82,11 @@ #define PALMTX_PHYS_FLASH_START PXA_CS0_PHYS /* ChipSelect 0 */ #define PALMTX_PHYS_NAND_START PXA_CS1_PHYS /* ChipSelect 1 */ +#define PALMTX_NAND_ALE_PHYS (PALMTX_PHYS_NAND_START | (1 << 24)) +#define PALMTX_NAND_CLE_PHYS (PALMTX_PHYS_NAND_START | (1 << 25)) +#define PALMTX_NAND_ALE_VIRT 0xff100000 +#define PALMTX_NAND_CLE_VIRT 0xff200000 + /* TOUCHSCREEN */ #define AC97_LINK_FRAME 21 diff --git a/arch/arm/mach-pxa/include/mach/pxa3xx-regs.h b/arch/arm/mach-pxa/include/mach/pxa3xx-regs.h index 7d1a059b3d4..e91d63cfe81 100644 --- a/arch/arm/mach-pxa/include/mach/pxa3xx-regs.h +++ b/arch/arm/mach-pxa/include/mach/pxa3xx-regs.h @@ -208,7 +208,7 @@ #define CKEN_MVED 43 /* < MVED clock enable */ /* Note: GCU clock enable bit differs on PXA300/PXA310 and PXA320 */ -#define PXA300_CKEN_GRAPHICS 42 /* Graphics controller clock enable */ -#define PXA320_CKEN_GRAPHICS 7 /* Graphics controller clock enable */ +#define CKEN_PXA300_GCU 42 /* Graphics controller clock enable */ +#define CKEN_PXA320_GCU 7 /* Graphics controller clock enable */ #endif /* __ASM_ARCH_PXA3XX_REGS_H */ diff --git a/arch/arm/mach-pxa/include/mach/pxafb.h b/arch/arm/mach-pxa/include/mach/pxafb.h index 6932720ba04..f73061c90b5 100644 --- a/arch/arm/mach-pxa/include/mach/pxafb.h +++ b/arch/arm/mach-pxa/include/mach/pxafb.h @@ -118,7 +118,8 @@ struct pxafb_mach_info { u_int fixed_modes:1, cmap_inverse:1, cmap_static:1, - unused:29; + acceleration_enabled:1, + unused:28; /* The following should be defined in LCCR0 * LCCR0_Act or LCCR0_Pas Active or Passive diff --git a/arch/arm/mach-pxa/include/mach/regs-intc.h b/arch/arm/mach-pxa/include/mach/regs-intc.h index ad23e74b762..68464ce1c1e 100644 --- a/arch/arm/mach-pxa/include/mach/regs-intc.h +++ b/arch/arm/mach-pxa/include/mach/regs-intc.h @@ -13,6 +13,7 @@ #define ICFP __REG(0x40D0000C) /* Interrupt Controller FIQ Pending Register */ #define ICPR __REG(0x40D00010) /* Interrupt Controller Pending Register */ #define ICCR __REG(0x40D00014) /* Interrupt Controller Control Register */ +#define ICHP __REG(0x40D00018) /* Interrupt Controller Highest Priority Register */ #define ICIP2 __REG(0x40D0009C) /* Interrupt Controller IRQ Pending Register 2 */ #define ICMR2 __REG(0x40D000A0) /* Interrupt Controller Mask Register 2 */ @@ -20,4 +21,14 @@ #define ICFP2 __REG(0x40D000A8) /* Interrupt Controller FIQ Pending Register 2 */ #define ICPR2 __REG(0x40D000AC) /* Interrupt Controller Pending Register 2 */ +#define ICIP3 __REG(0x40D00130) /* Interrupt Controller IRQ Pending Register 3 */ +#define ICMR3 __REG(0x40D00134) /* Interrupt Controller Mask Register 3 */ +#define ICLR3 __REG(0x40D00138) /* Interrupt Controller Level Register 3 */ +#define ICFP3 __REG(0x40D0013C) /* Interrupt Controller FIQ Pending Register 3 */ +#define ICPR3 __REG(0x40D00140) /* Interrupt Controller Pending Register 3 */ + +#define IPR(x) __REG(0x40D0001C + (x < 32 ? (x << 2) \ + : (x < 64 ? (0x94 + ((x - 32) << 2)) \ + : (0x128 + ((x - 64) << 2))))) + #endif /* __ASM_MACH_REGS_INTC_H */ diff --git a/arch/arm/mach-pxa/include/mach/uncompress.h b/arch/arm/mach-pxa/include/mach/uncompress.h index b54749413e9..237734b5b1b 100644 --- a/arch/arm/mach-pxa/include/mach/uncompress.h +++ b/arch/arm/mach-pxa/include/mach/uncompress.h @@ -37,7 +37,7 @@ static inline void arch_decomp_setup(void) { if (machine_is_littleton() || machine_is_intelmote2() || machine_is_csb726() || machine_is_stargate2() - || machine_is_cm_x300()) + || machine_is_cm_x300() || machine_is_balloon3()) UART = STUART; } diff --git a/arch/arm/mach-pxa/irq.c b/arch/arm/mach-pxa/irq.c index f6e0300e4f6..d694ce28966 100644 --- a/arch/arm/mach-pxa/irq.c +++ b/arch/arm/mach-pxa/irq.c @@ -120,7 +120,7 @@ static void __init pxa_init_low_gpio_irq(set_wake_t fn) void __init pxa_init_irq(int irq_nr, set_wake_t fn) { - int irq; + int irq, i; pxa_internal_irq_nr = irq_nr; @@ -129,6 +129,12 @@ void __init pxa_init_irq(int irq_nr, set_wake_t fn) _ICLR(irq) = 0; /* all IRQs are IRQ, not FIQ */ } + /* initialize interrupt priority */ + if (cpu_is_pxa27x() || cpu_is_pxa3xx()) { + for (i = 0; i < irq_nr; i++) + IPR(i) = i | (1 << 31); + } + /* only unmasked interrupts kick us out of idle */ ICCR = 1; diff --git a/arch/arm/mach-pxa/littleton.c b/arch/arm/mach-pxa/littleton.c index 55b3788fd1a..13848955d13 100644 --- a/arch/arm/mach-pxa/littleton.c +++ b/arch/arm/mach-pxa/littleton.c @@ -265,45 +265,12 @@ static inline void littleton_init_keypad(void) {} #endif #if defined(CONFIG_MMC_PXA) || defined(CONFIG_MMC_PXA_MODULE) -static int littleton_mci_init(struct device *dev, - irq_handler_t littleton_detect_int, void *data) -{ - int err, gpio_cd = GPIO_MMC1_CARD_DETECT; - - err = gpio_request(gpio_cd, "mmc card detect"); - if (err) - goto err_request_cd; - - gpio_direction_input(gpio_cd); - - err = request_irq(gpio_to_irq(gpio_cd), littleton_detect_int, - IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING, - "mmc card detect", data); - if (err) { - dev_err(dev, "failed to request card detect IRQ\n"); - goto err_request_irq; - } - return 0; - -err_request_irq: - gpio_free(gpio_cd); -err_request_cd: - return err; -} - -static void littleton_mci_exit(struct device *dev, void *data) -{ - int gpio_cd = GPIO_MMC1_CARD_DETECT; - - free_irq(gpio_to_irq(gpio_cd), data); - gpio_free(gpio_cd); -} - static struct pxamci_platform_data littleton_mci_platform_data = { - .detect_delay = 20, - .ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34, - .init = littleton_mci_init, - .exit = littleton_mci_exit, + .detect_delay = 20, + .ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34, + .gpio_card_detect = GPIO_MMC1_CARD_DETECT, + .gpio_card_ro = -1, + .gpio_power = -1, }; static void __init littleton_init_mmc(void) diff --git a/arch/arm/mach-pxa/lubbock.c b/arch/arm/mach-pxa/lubbock.c index f04c8333dff..c6a94d3fdd6 100644 --- a/arch/arm/mach-pxa/lubbock.c +++ b/arch/arm/mach-pxa/lubbock.c @@ -482,11 +482,14 @@ static void lubbock_mci_exit(struct device *dev, void *data) } static struct pxamci_platform_data lubbock_mci_platform_data = { - .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34, - .detect_delay = 1, - .init = lubbock_mci_init, - .get_ro = lubbock_mci_get_ro, - .exit = lubbock_mci_exit, + .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34, + .detect_delay = 1, + .init = lubbock_mci_init, + .get_ro = lubbock_mci_get_ro, + .exit = lubbock_mci_exit, + .gpio_card_detect = -1, + .gpio_card_ro = -1, + .gpio_power = -1, }; static void lubbock_irda_transceiver_mode(struct device *dev, int mode) @@ -504,8 +507,9 @@ static void lubbock_irda_transceiver_mode(struct device *dev, int mode) } static struct pxaficp_platform_data lubbock_ficp_platform_data = { - .transceiver_cap = IR_SIRMODE | IR_FIRMODE, - .transceiver_mode = lubbock_irda_transceiver_mode, + .gpio_pwdown = -1, + .transceiver_cap = IR_SIRMODE | IR_FIRMODE, + .transceiver_mode = lubbock_irda_transceiver_mode, }; static void __init lubbock_init(void) diff --git a/arch/arm/mach-pxa/magician.c b/arch/arm/mach-pxa/magician.c index ca39669cffc..5360c07f513 100644 --- a/arch/arm/mach-pxa/magician.c +++ b/arch/arm/mach-pxa/magician.c @@ -140,15 +140,9 @@ static unsigned long magician_pin_config[] __initdata = { * IRDA */ -static void magician_irda_transceiver_mode(struct device *dev, int mode) -{ - gpio_set_value(GPIO83_MAGICIAN_nIR_EN, mode & IR_OFF); - pxa2xx_transceiver_mode(dev, mode); -} - static struct pxaficp_platform_data magician_ficp_info = { - .transceiver_cap = IR_SIRMODE | IR_OFF, - .transceiver_mode = magician_irda_transceiver_mode, + .gpio_pwdown = GPIO83_MAGICIAN_nIR_EN, + .transceiver_cap = IR_SIRMODE | IR_OFF, }; /* @@ -651,55 +645,24 @@ static struct platform_device bq24022 = { static int magician_mci_init(struct device *dev, irq_handler_t detect_irq, void *data) { - int err; - - err = request_irq(IRQ_MAGICIAN_SD, detect_irq, + return request_irq(IRQ_MAGICIAN_SD, detect_irq, IRQF_DISABLED | IRQF_SAMPLE_RANDOM, - "MMC card detect", data); - if (err) - goto err_request_irq; - err = gpio_request(EGPIO_MAGICIAN_SD_POWER, "SD_POWER"); - if (err) - goto err_request_power; - err = gpio_request(EGPIO_MAGICIAN_nSD_READONLY, "nSD_READONLY"); - if (err) - goto err_request_readonly; - - return 0; - -err_request_readonly: - gpio_free(EGPIO_MAGICIAN_SD_POWER); -err_request_power: - free_irq(IRQ_MAGICIAN_SD, data); -err_request_irq: - return err; -} - -static void magician_mci_setpower(struct device *dev, unsigned int vdd) -{ - struct pxamci_platform_data *pdata = dev->platform_data; - - gpio_set_value(EGPIO_MAGICIAN_SD_POWER, (1 << vdd) & pdata->ocr_mask); -} - -static int magician_mci_get_ro(struct device *dev) -{ - return (!gpio_get_value(EGPIO_MAGICIAN_nSD_READONLY)); + "mmc card detect", data); } static void magician_mci_exit(struct device *dev, void *data) { - gpio_free(EGPIO_MAGICIAN_nSD_READONLY); - gpio_free(EGPIO_MAGICIAN_SD_POWER); free_irq(IRQ_MAGICIAN_SD, data); } static struct pxamci_platform_data magician_mci_info = { - .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34, - .init = magician_mci_init, - .get_ro = magician_mci_get_ro, - .setpower = magician_mci_setpower, - .exit = magician_mci_exit, + .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34, + .init = magician_mci_init, + .exit = magician_mci_exit, + .gpio_card_detect = -1, + .gpio_card_ro = EGPIO_MAGICIAN_nSD_READONLY, + .gpio_card_ro_invert = 1, + .gpio_power = EGPIO_MAGICIAN_SD_POWER, }; diff --git a/arch/arm/mach-pxa/mainstone.c b/arch/arm/mach-pxa/mainstone.c index f4dabf0273c..a4eeae345e6 100644 --- a/arch/arm/mach-pxa/mainstone.c +++ b/arch/arm/mach-pxa/mainstone.c @@ -450,10 +450,13 @@ static void mainstone_mci_exit(struct device *dev, void *data) } static struct pxamci_platform_data mainstone_mci_platform_data = { - .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34, - .init = mainstone_mci_init, - .setpower = mainstone_mci_setpower, - .exit = mainstone_mci_exit, + .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34, + .init = mainstone_mci_init, + .setpower = mainstone_mci_setpower, + .exit = mainstone_mci_exit, + .gpio_card_detect = -1, + .gpio_card_ro = -1, + .gpio_power = -1, }; static void mainstone_irda_transceiver_mode(struct device *dev, int mode) @@ -476,8 +479,9 @@ static void mainstone_irda_transceiver_mode(struct device *dev, int mode) } static struct pxaficp_platform_data mainstone_ficp_platform_data = { - .transceiver_cap = IR_SIRMODE | IR_FIRMODE | IR_OFF, - .transceiver_mode = mainstone_irda_transceiver_mode, + .gpio_pwdown = -1, + .transceiver_cap = IR_SIRMODE | IR_FIRMODE | IR_OFF, + .transceiver_mode = mainstone_irda_transceiver_mode, }; static struct gpio_keys_button gpio_keys_button[] = { diff --git a/arch/arm/mach-pxa/mioa701.c b/arch/arm/mach-pxa/mioa701.c index 2d28132c725..3cab452e556 100644 --- a/arch/arm/mach-pxa/mioa701.c +++ b/arch/arm/mach-pxa/mioa701.c @@ -434,72 +434,15 @@ struct gpio_vbus_mach_info gpio_vbus_data = { /* * SDIO/MMC Card controller */ -static void mci_setpower(struct device *dev, unsigned int vdd) -{ - struct pxamci_platform_data *p_d = dev->platform_data; - - if ((1 << vdd) & p_d->ocr_mask) - gpio_set_value(GPIO91_SDIO_EN, 1); /* enable SDIO power */ - else - gpio_set_value(GPIO91_SDIO_EN, 0); /* disable SDIO power */ -} - -static int mci_get_ro(struct device *dev) -{ - return gpio_get_value(GPIO78_SDIO_RO); -} - -struct gpio_ress mci_gpios[] = { - MIO_GPIO_IN(GPIO78_SDIO_RO, "SDIO readonly detect"), - MIO_GPIO_IN(GPIO15_SDIO_INSERT, "SDIO insertion detect"), - MIO_GPIO_OUT(GPIO91_SDIO_EN, 0, "SDIO power enable") -}; - -static void mci_exit(struct device *dev, void *data) -{ - mio_gpio_free(ARRAY_AND_SIZE(mci_gpios)); - free_irq(gpio_to_irq(GPIO15_SDIO_INSERT), data); -} - -static struct pxamci_platform_data mioa701_mci_info; - /** * The card detect interrupt isn't debounced so we delay it by 250ms * to give the card a chance to fully insert/eject. */ -static int mci_init(struct device *dev, irq_handler_t detect_int, void *data) -{ - int rc; - int irq = gpio_to_irq(GPIO15_SDIO_INSERT); - - rc = mio_gpio_request(ARRAY_AND_SIZE(mci_gpios)); - if (rc) - goto err_gpio; - /* enable RE/FE interrupt on card insertion and removal */ - rc = request_irq(irq, detect_int, - IRQF_DISABLED | IRQF_TRIGGER_RISING | - IRQF_TRIGGER_FALLING, - "MMC card detect", data); - if (rc) - goto err_irq; - - mioa701_mci_info.detect_delay = msecs_to_jiffies(250); - return 0; - -err_irq: - dev_err(dev, "mioa701_mci_init: MMC/SD:" - " can't request MMC card detect IRQ\n"); - mio_gpio_free(ARRAY_AND_SIZE(mci_gpios)); -err_gpio: - return rc; -} - static struct pxamci_platform_data mioa701_mci_info = { - .ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34, - .init = mci_init, - .get_ro = mci_get_ro, - .setpower = mci_setpower, - .exit = mci_exit, + .ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34, + .gpio_card_detect = GPIO15_SDIO_INSERT, + .gpio_card_ro = GPIO78_SDIO_RO, + .gpio_power = GPIO91_SDIO_EN, }; /* FlashRAM */ @@ -765,19 +708,20 @@ static struct i2c_board_info __initdata mioa701_pi2c_devices[] = { }, }; -static struct soc_camera_link iclink = { - .bus_id = 0, /* Must match id in pxa27x_device_camera in device.c */ -}; - /* Board I2C devices. */ static struct i2c_board_info __initdata mioa701_i2c_devices[] = { { - /* Must initialize before the camera(s) */ I2C_BOARD_INFO("mt9m111", 0x5d), - .platform_data = &iclink, }, }; +static struct soc_camera_link iclink = { + .bus_id = 0, /* Match id in pxa27x_device_camera in device.c */ + .board_info = &mioa701_i2c_devices[0], + .i2c_adapter_id = 0, + .module_name = "mt9m111", +}; + struct i2c_pxa_platform_data i2c_pdata = { .fast_mode = 1, }; @@ -811,6 +755,7 @@ MIO_SIMPLE_DEV(pxa2xx_pcm, "pxa2xx-pcm", NULL) MIO_SIMPLE_DEV(mioa701_sound, "mioa701-wm9713", NULL) MIO_SIMPLE_DEV(mioa701_board, "mioa701-board", NULL) MIO_SIMPLE_DEV(gpio_vbus, "gpio-vbus", &gpio_vbus_data); +MIO_SIMPLE_DEV(mioa701_camera, "soc-camera-pdrv",&iclink); static struct platform_device *devices[] __initdata = { &mioa701_gpio_keys, @@ -821,6 +766,7 @@ static struct platform_device *devices[] __initdata = { &power_dev, &strataflash, &gpio_vbus, + &mioa701_camera, &mioa701_board, }; @@ -841,7 +787,7 @@ static void mioa701_restart(char c, const char *cmd) static struct gpio_ress global_gpios[] = { MIO_GPIO_OUT(GPIO9_CHARGE_EN, 1, "Charger enable"), MIO_GPIO_OUT(GPIO18_POWEROFF, 0, "Power Off"), - MIO_GPIO_OUT(GPIO87_LCD_POWER, 0, "LCD Power") + MIO_GPIO_OUT(GPIO87_LCD_POWER, 0, "LCD Power"), }; static void __init mioa701_machine_init(void) @@ -855,6 +801,7 @@ static void __init mioa701_machine_init(void) mio_gpio_request(ARRAY_AND_SIZE(global_gpios)); bootstrap_init(); set_pxa_fb_info(&mioa701_pxafb_info); + mioa701_mci_info.detect_delay = msecs_to_jiffies(250); pxa_set_mci_info(&mioa701_mci_info); pxa_set_keypad_info(&mioa701_keypad_info); wm97xx_bat_set_pdata(&mioa701_battery_data); @@ -869,7 +816,6 @@ static void __init mioa701_machine_init(void) pxa_set_i2c_info(&i2c_pdata); pxa27x_set_i2c_power_info(NULL); pxa_set_camera_info(&mioa701_pxacamera_platform_data); - i2c_register_board_info(0, ARRAY_AND_SIZE(mioa701_i2c_devices)); } static void mioa701_machine_exit(void) diff --git a/arch/arm/mach-pxa/palmld.c b/arch/arm/mach-pxa/palmld.c index 169fcc18154..1ad029dd443 100644 --- a/arch/arm/mach-pxa/palmld.c +++ b/arch/arm/mach-pxa/palmld.c @@ -25,6 +25,9 @@ #include <linux/wm97xx_batt.h> #include <linux/power_supply.h> #include <linux/sysdev.h> +#include <linux/mtd/mtd.h> +#include <linux/mtd/partitions.h> +#include <linux/mtd/physmap.h> #include <asm/mach-types.h> #include <asm/mach/arch.h> @@ -141,85 +144,50 @@ static unsigned long palmld_pin_config[] __initdata = { }; /****************************************************************************** - * SD/MMC card controller + * NOR Flash ******************************************************************************/ -static int palmld_mci_init(struct device *dev, irq_handler_t palmld_detect_int, - void *data) -{ - int err = 0; - - /* Setup an interrupt for detecting card insert/remove events */ - err = gpio_request(GPIO_NR_PALMLD_SD_DETECT_N, "SD IRQ"); - if (err) - goto err; - err = gpio_direction_input(GPIO_NR_PALMLD_SD_DETECT_N); - if (err) - goto err2; - err = request_irq(gpio_to_irq(GPIO_NR_PALMLD_SD_DETECT_N), - palmld_detect_int, IRQF_DISABLED | IRQF_SAMPLE_RANDOM | - IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING, - "SD/MMC card detect", data); - if (err) { - printk(KERN_ERR "%s: cannot request SD/MMC card detect IRQ\n", - __func__); - goto err2; +static struct mtd_partition palmld_partitions[] = { + { + .name = "Flash", + .offset = 0x00000000, + .size = MTDPART_SIZ_FULL, + .mask_flags = 0 } +}; - err = gpio_request(GPIO_NR_PALMLD_SD_POWER, "SD_POWER"); - if (err) - goto err3; - err = gpio_direction_output(GPIO_NR_PALMLD_SD_POWER, 0); - if (err) - goto err4; - - err = gpio_request(GPIO_NR_PALMLD_SD_READONLY, "SD_READONLY"); - if (err) - goto err4; - err = gpio_direction_input(GPIO_NR_PALMLD_SD_READONLY); - if (err) - goto err5; - - printk(KERN_DEBUG "%s: irq registered\n", __func__); - - return 0; - -err5: - gpio_free(GPIO_NR_PALMLD_SD_READONLY); -err4: - gpio_free(GPIO_NR_PALMLD_SD_POWER); -err3: - free_irq(gpio_to_irq(GPIO_NR_PALMLD_SD_DETECT_N), data); -err2: - gpio_free(GPIO_NR_PALMLD_SD_DETECT_N); -err: - return err; -} - -static void palmld_mci_exit(struct device *dev, void *data) -{ - gpio_free(GPIO_NR_PALMLD_SD_READONLY); - gpio_free(GPIO_NR_PALMLD_SD_POWER); - free_irq(gpio_to_irq(GPIO_NR_PALMLD_SD_DETECT_N), data); - gpio_free(GPIO_NR_PALMLD_SD_DETECT_N); -} +static struct physmap_flash_data palmld_flash_data[] = { + { + .width = 2, /* bankwidth in bytes */ + .parts = palmld_partitions, + .nr_parts = ARRAY_SIZE(palmld_partitions) + } +}; -static void palmld_mci_power(struct device *dev, unsigned int vdd) -{ - struct pxamci_platform_data *p_d = dev->platform_data; - gpio_set_value(GPIO_NR_PALMLD_SD_POWER, p_d->ocr_mask & (1 << vdd)); -} +static struct resource palmld_flash_resource = { + .start = PXA_CS0_PHYS, + .end = PXA_CS0_PHYS + SZ_4M - 1, + .flags = IORESOURCE_MEM, +}; -static int palmld_mci_get_ro(struct device *dev) -{ - return gpio_get_value(GPIO_NR_PALMLD_SD_READONLY); -} +static struct platform_device palmld_flash = { + .name = "physmap-flash", + .id = 0, + .resource = &palmld_flash_resource, + .num_resources = 1, + .dev = { + .platform_data = palmld_flash_data, + }, +}; +/****************************************************************************** + * SD/MMC card controller + ******************************************************************************/ static struct pxamci_platform_data palmld_mci_platform_data = { - .ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34, - .setpower = palmld_mci_power, - .get_ro = palmld_mci_get_ro, - .init = palmld_mci_init, - .exit = palmld_mci_exit, + .ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34, + .gpio_card_detect = GPIO_NR_PALMLD_SD_DETECT_N, + .gpio_card_ro = GPIO_NR_PALMLD_SD_READONLY, + .gpio_power = GPIO_NR_PALMLD_SD_POWER, + .detect_delay = 20, }; /****************************************************************************** @@ -336,35 +304,9 @@ static struct platform_device palmld_backlight = { /****************************************************************************** * IrDA ******************************************************************************/ -static int palmld_irda_startup(struct device *dev) -{ - int err; - err = gpio_request(GPIO_NR_PALMLD_IR_DISABLE, "IR DISABLE"); - if (err) - goto err; - err = gpio_direction_output(GPIO_NR_PALMLD_IR_DISABLE, 1); - if (err) - gpio_free(GPIO_NR_PALMLD_IR_DISABLE); -err: - return err; -} - -static void palmld_irda_shutdown(struct device *dev) -{ - gpio_free(GPIO_NR_PALMLD_IR_DISABLE); -} - -static void palmld_irda_transceiver_mode(struct device *dev, int mode) -{ - gpio_set_value(GPIO_NR_PALMLD_IR_DISABLE, mode & IR_OFF); - pxa2xx_transceiver_mode(dev, mode); -} - static struct pxaficp_platform_data palmld_ficp_platform_data = { - .startup = palmld_irda_startup, - .shutdown = palmld_irda_shutdown, - .transceiver_cap = IR_SIRMODE | IR_FIRMODE | IR_OFF, - .transceiver_mode = palmld_irda_transceiver_mode, + .gpio_pwdown = GPIO_NR_PALMLD_IR_DISABLE, + .transceiver_cap = IR_SIRMODE | IR_OFF, }; /****************************************************************************** @@ -560,6 +502,7 @@ static struct platform_device *devices[] __initdata = { &power_supply, &palmld_asoc, &palmld_hdd, + &palmld_flash, }; static struct map_desc palmld_io_desc[] __initdata = { diff --git a/arch/arm/mach-pxa/palmt5.c b/arch/arm/mach-pxa/palmt5.c index 33f726ff55e..2dd7ce28556 100644 --- a/arch/arm/mach-pxa/palmt5.c +++ b/arch/arm/mach-pxa/palmt5.c @@ -124,83 +124,12 @@ static unsigned long palmt5_pin_config[] __initdata = { /****************************************************************************** * SD/MMC card controller ******************************************************************************/ -static int palmt5_mci_init(struct device *dev, irq_handler_t palmt5_detect_int, - void *data) -{ - int err = 0; - - /* Setup an interrupt for detecting card insert/remove events */ - err = gpio_request(GPIO_NR_PALMT5_SD_DETECT_N, "SD IRQ"); - if (err) - goto err; - err = gpio_direction_input(GPIO_NR_PALMT5_SD_DETECT_N); - if (err) - goto err2; - err = request_irq(gpio_to_irq(GPIO_NR_PALMT5_SD_DETECT_N), - palmt5_detect_int, IRQF_DISABLED | IRQF_SAMPLE_RANDOM | - IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING, - "SD/MMC card detect", data); - if (err) { - printk(KERN_ERR "%s: cannot request SD/MMC card detect IRQ\n", - __func__); - goto err2; - } - - err = gpio_request(GPIO_NR_PALMT5_SD_POWER, "SD_POWER"); - if (err) - goto err3; - err = gpio_direction_output(GPIO_NR_PALMT5_SD_POWER, 0); - if (err) - goto err4; - - err = gpio_request(GPIO_NR_PALMT5_SD_READONLY, "SD_READONLY"); - if (err) - goto err4; - err = gpio_direction_input(GPIO_NR_PALMT5_SD_READONLY); - if (err) - goto err5; - - printk(KERN_DEBUG "%s: irq registered\n", __func__); - - return 0; - -err5: - gpio_free(GPIO_NR_PALMT5_SD_READONLY); -err4: - gpio_free(GPIO_NR_PALMT5_SD_POWER); -err3: - free_irq(gpio_to_irq(GPIO_NR_PALMT5_SD_DETECT_N), data); -err2: - gpio_free(GPIO_NR_PALMT5_SD_DETECT_N); -err: - return err; -} - -static void palmt5_mci_exit(struct device *dev, void *data) -{ - gpio_free(GPIO_NR_PALMT5_SD_READONLY); - gpio_free(GPIO_NR_PALMT5_SD_POWER); - free_irq(IRQ_GPIO_PALMT5_SD_DETECT_N, data); - gpio_free(GPIO_NR_PALMT5_SD_DETECT_N); -} - -static void palmt5_mci_power(struct device *dev, unsigned int vdd) -{ - struct pxamci_platform_data *p_d = dev->platform_data; - gpio_set_value(GPIO_NR_PALMT5_SD_POWER, p_d->ocr_mask & (1 << vdd)); -} - -static int palmt5_mci_get_ro(struct device *dev) -{ - return gpio_get_value(GPIO_NR_PALMT5_SD_READONLY); -} - static struct pxamci_platform_data palmt5_mci_platform_data = { - .ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34, - .setpower = palmt5_mci_power, - .get_ro = palmt5_mci_get_ro, - .init = palmt5_mci_init, - .exit = palmt5_mci_exit, + .ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34, + .gpio_card_detect = GPIO_NR_PALMT5_SD_DETECT_N, + .gpio_card_ro = GPIO_NR_PALMT5_SD_READONLY, + .gpio_power = GPIO_NR_PALMT5_SD_POWER, + .detect_delay = 20, }; /****************************************************************************** @@ -314,35 +243,9 @@ static struct platform_device palmt5_backlight = { /****************************************************************************** * IrDA ******************************************************************************/ -static int palmt5_irda_startup(struct device *dev) -{ - int err; - err = gpio_request(GPIO_NR_PALMT5_IR_DISABLE, "IR DISABLE"); - if (err) - goto err; - err = gpio_direction_output(GPIO_NR_PALMT5_IR_DISABLE, 1); - if (err) - gpio_free(GPIO_NR_PALMT5_IR_DISABLE); -err: - return err; -} - -static void palmt5_irda_shutdown(struct device *dev) -{ - gpio_free(GPIO_NR_PALMT5_IR_DISABLE); -} - -static void palmt5_irda_transceiver_mode(struct device *dev, int mode) -{ - gpio_set_value(GPIO_NR_PALMT5_IR_DISABLE, mode & IR_OFF); - pxa2xx_transceiver_mode(dev, mode); -} - static struct pxaficp_platform_data palmt5_ficp_platform_data = { - .startup = palmt5_irda_startup, - .shutdown = palmt5_irda_shutdown, - .transceiver_cap = IR_SIRMODE | IR_FIRMODE | IR_OFF, - .transceiver_mode = palmt5_irda_transceiver_mode, + .gpio_pwdown = GPIO_NR_PALMT5_IR_DISABLE, + .transceiver_cap = IR_SIRMODE | IR_OFF, }; /****************************************************************************** diff --git a/arch/arm/mach-pxa/palmtc.c b/arch/arm/mach-pxa/palmtc.c new file mode 100644 index 00000000000..bb2cc0dd44e --- /dev/null +++ b/arch/arm/mach-pxa/palmtc.c @@ -0,0 +1,436 @@ +/* + * linux/arch/arm/mach-pxa/palmtc.c + * + * Support for the Palm Tungsten|C + * + * Author: Marek Vasut <marek.vasut@gmail.com> + * + * Based on work of: + * Petr Blaha <p3t3@centrum.cz> + * Chetan S. Kumar <shivakumar.chetan@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/platform_device.h> +#include <linux/delay.h> +#include <linux/irq.h> +#include <linux/input.h> +#include <linux/pwm_backlight.h> +#include <linux/gpio.h> +#include <linux/input/matrix_keypad.h> +#include <linux/ucb1400.h> +#include <linux/power_supply.h> +#include <linux/gpio_keys.h> +#include <linux/mtd/physmap.h> + +#include <asm/mach-types.h> +#include <asm/mach/arch.h> +#include <asm/mach/map.h> + +#include <mach/audio.h> +#include <mach/palmtc.h> +#include <mach/mmc.h> +#include <mach/pxafb.h> +#include <mach/mfp-pxa25x.h> +#include <mach/irda.h> +#include <mach/udc.h> +#include <mach/pxa2xx-regs.h> + +#include "generic.h" +#include "devices.h" + +/****************************************************************************** + * Pin configuration + ******************************************************************************/ +static unsigned long palmtc_pin_config[] __initdata = { + /* MMC */ + GPIO6_MMC_CLK, + GPIO8_MMC_CS0, + GPIO12_GPIO, /* detect */ + GPIO32_GPIO, /* power */ + GPIO54_GPIO, /* r/o switch */ + + /* PCMCIA */ + GPIO52_nPCE_1, + GPIO53_nPCE_2, + GPIO50_nPIOR, + GPIO51_nPIOW, + GPIO49_nPWE, + GPIO48_nPOE, + GPIO52_nPCE_1, + GPIO53_nPCE_2, + GPIO57_nIOIS16, + GPIO56_nPWAIT, + + /* AC97 */ + GPIO28_AC97_BITCLK, + GPIO29_AC97_SDATA_IN_0, + GPIO30_AC97_SDATA_OUT, + GPIO31_AC97_SYNC, + + /* IrDA */ + GPIO45_GPIO, /* ir disable */ + GPIO46_FICP_RXD, + GPIO47_FICP_TXD, + + /* PWM */ + GPIO17_PWM1_OUT, + + /* USB */ + GPIO4_GPIO, /* detect */ + GPIO36_GPIO, /* pullup */ + + /* LCD */ + GPIO58_LCD_LDD_0, + GPIO59_LCD_LDD_1, + GPIO60_LCD_LDD_2, + GPIO61_LCD_LDD_3, + GPIO62_LCD_LDD_4, + GPIO63_LCD_LDD_5, + GPIO64_LCD_LDD_6, + GPIO65_LCD_LDD_7, + GPIO66_LCD_LDD_8, + GPIO67_LCD_LDD_9, + GPIO68_LCD_LDD_10, + GPIO69_LCD_LDD_11, + GPIO70_LCD_LDD_12, + GPIO71_LCD_LDD_13, + GPIO72_LCD_LDD_14, + GPIO73_LCD_LDD_15, + GPIO74_LCD_FCLK, + GPIO75_LCD_LCLK, + GPIO76_LCD_PCLK, + GPIO77_LCD_BIAS, + + /* MATRIX KEYPAD */ + GPIO0_GPIO | WAKEUP_ON_EDGE_BOTH, /* in 0 */ + GPIO9_GPIO | WAKEUP_ON_EDGE_BOTH, /* in 1 */ + GPIO10_GPIO | WAKEUP_ON_EDGE_BOTH, /* in 2 */ + GPIO11_GPIO | WAKEUP_ON_EDGE_BOTH, /* in 3 */ + GPIO18_GPIO | MFP_LPM_DRIVE_LOW, /* out 0 */ + GPIO19_GPIO | MFP_LPM_DRIVE_LOW, /* out 1 */ + GPIO20_GPIO | MFP_LPM_DRIVE_LOW, /* out 2 */ + GPIO21_GPIO | MFP_LPM_DRIVE_LOW, /* out 3 */ + GPIO22_GPIO | MFP_LPM_DRIVE_LOW, /* out 4 */ + GPIO23_GPIO | MFP_LPM_DRIVE_LOW, /* out 5 */ + GPIO24_GPIO | MFP_LPM_DRIVE_LOW, /* out 6 */ + GPIO25_GPIO | MFP_LPM_DRIVE_LOW, /* out 7 */ + GPIO26_GPIO | MFP_LPM_DRIVE_LOW, /* out 8 */ + GPIO27_GPIO | MFP_LPM_DRIVE_LOW, /* out 9 */ + GPIO79_GPIO | MFP_LPM_DRIVE_LOW, /* out 10 */ + GPIO80_GPIO | MFP_LPM_DRIVE_LOW, /* out 11 */ + + /* PXA GPIO KEYS */ + GPIO7_GPIO | WAKEUP_ON_EDGE_BOTH, /* hotsync button on cradle */ + + /* MISC */ + GPIO1_RST, /* reset */ + GPIO2_GPIO, /* earphone detect */ + GPIO16_GPIO, /* backlight switch */ +}; + +/****************************************************************************** + * SD/MMC card controller + ******************************************************************************/ +static struct pxamci_platform_data palmtc_mci_platform_data = { + .ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34, + .gpio_power = GPIO_NR_PALMTC_SD_POWER, + .gpio_card_ro = GPIO_NR_PALMTC_SD_READONLY, + .gpio_card_detect = GPIO_NR_PALMTC_SD_DETECT_N, + .detect_delay = 20, +}; + +/****************************************************************************** + * GPIO keys + ******************************************************************************/ +static struct gpio_keys_button palmtc_pxa_buttons[] = { + {KEY_F8, GPIO_NR_PALMTC_HOTSYNC_BUTTON, 1, "HotSync Button", EV_KEY, 1}, +}; + +static struct gpio_keys_platform_data palmtc_pxa_keys_data = { + .buttons = palmtc_pxa_buttons, + .nbuttons = ARRAY_SIZE(palmtc_pxa_buttons), +}; + +static struct platform_device palmtc_pxa_keys = { + .name = "gpio-keys", + .id = -1, + .dev = { + .platform_data = &palmtc_pxa_keys_data, + }, +}; + +/****************************************************************************** + * Backlight + ******************************************************************************/ +static int palmtc_backlight_init(struct device *dev) +{ + int ret; + + ret = gpio_request(GPIO_NR_PALMTC_BL_POWER, "BL POWER"); + if (ret) + goto err; + ret = gpio_direction_output(GPIO_NR_PALMTC_BL_POWER, 1); + if (ret) + goto err2; + + return 0; + +err2: + gpio_free(GPIO_NR_PALMTC_BL_POWER); +err: + return ret; +} + +static int palmtc_backlight_notify(int brightness) +{ + /* backlight is on when GPIO16 AF0 is high */ + gpio_set_value(GPIO_NR_PALMTC_BL_POWER, brightness); + return brightness; +} + +static void palmtc_backlight_exit(struct device *dev) +{ + gpio_free(GPIO_NR_PALMTC_BL_POWER); +} + +static struct platform_pwm_backlight_data palmtc_backlight_data = { + .pwm_id = 1, + .max_brightness = PALMTC_MAX_INTENSITY, + .dft_brightness = PALMTC_MAX_INTENSITY, + .pwm_period_ns = PALMTC_PERIOD_NS, + .init = palmtc_backlight_init, + .notify = palmtc_backlight_notify, + .exit = palmtc_backlight_exit, +}; + +static struct platform_device palmtc_backlight = { + .name = "pwm-backlight", + .dev = { + .parent = &pxa25x_device_pwm1.dev, + .platform_data = &palmtc_backlight_data, + }, +}; + +/****************************************************************************** + * IrDA + ******************************************************************************/ +static struct pxaficp_platform_data palmtc_ficp_platform_data = { + .gpio_pwdown = GPIO_NR_PALMTC_IR_DISABLE, + .transceiver_cap = IR_SIRMODE | IR_OFF, +}; + +/****************************************************************************** + * Keyboard + ******************************************************************************/ +static const uint32_t palmtc_matrix_keys[] = { + KEY(0, 0, KEY_F1), + KEY(0, 1, KEY_X), + KEY(0, 2, KEY_POWER), + KEY(0, 3, KEY_TAB), + KEY(0, 4, KEY_A), + KEY(0, 5, KEY_Q), + KEY(0, 6, KEY_LEFTSHIFT), + KEY(0, 7, KEY_Z), + KEY(0, 8, KEY_S), + KEY(0, 9, KEY_W), + KEY(0, 10, KEY_E), + KEY(0, 11, KEY_UP), + + KEY(1, 0, KEY_F2), + KEY(1, 1, KEY_DOWN), + KEY(1, 3, KEY_D), + KEY(1, 4, KEY_C), + KEY(1, 5, KEY_F), + KEY(1, 6, KEY_R), + KEY(1, 7, KEY_SPACE), + KEY(1, 8, KEY_V), + KEY(1, 9, KEY_G), + KEY(1, 10, KEY_T), + KEY(1, 11, KEY_LEFT), + + KEY(2, 0, KEY_F3), + KEY(2, 1, KEY_LEFTCTRL), + KEY(2, 3, KEY_H), + KEY(2, 4, KEY_Y), + KEY(2, 5, KEY_N), + KEY(2, 6, KEY_J), + KEY(2, 7, KEY_U), + KEY(2, 8, KEY_M), + KEY(2, 9, KEY_K), + KEY(2, 10, KEY_I), + KEY(2, 11, KEY_RIGHT), + + KEY(3, 0, KEY_F4), + KEY(3, 1, KEY_ENTER), + KEY(3, 3, KEY_DOT), + KEY(3, 4, KEY_L), + KEY(3, 5, KEY_O), + KEY(3, 6, KEY_LEFTALT), + KEY(3, 7, KEY_ENTER), + KEY(3, 8, KEY_BACKSPACE), + KEY(3, 9, KEY_P), + KEY(3, 10, KEY_B), + KEY(3, 11, KEY_FN), +}; + +const struct matrix_keymap_data palmtc_keymap_data = { + .keymap = palmtc_matrix_keys, + .keymap_size = ARRAY_SIZE(palmtc_matrix_keys), +}; + +const static unsigned int palmtc_keypad_row_gpios[] = { + 0, 9, 10, 11 +}; + +const static unsigned int palmtc_keypad_col_gpios[] = { + 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 79, 80 +}; + +static struct matrix_keypad_platform_data palmtc_keypad_platform_data = { + .keymap_data = &palmtc_keymap_data, + .col_gpios = palmtc_keypad_row_gpios, + .num_col_gpios = 12, + .row_gpios = palmtc_keypad_col_gpios, + .num_row_gpios = 4, + .active_low = 1, + + .debounce_ms = 20, + .col_scan_delay_us = 5, +}; + +static struct platform_device palmtc_keyboard = { + .name = "matrix-keypad", + .id = -1, + .dev = { + .platform_data = &palmtc_keypad_platform_data, + }, +}; + +/****************************************************************************** + * UDC + ******************************************************************************/ +static struct pxa2xx_udc_mach_info palmtc_udc_info __initdata = { + .gpio_vbus = GPIO_NR_PALMTC_USB_DETECT_N, + .gpio_vbus_inverted = 1, + .gpio_pullup = GPIO_NR_PALMTC_USB_POWER, +}; + +/****************************************************************************** + * Touchscreen / Battery / GPIO-extender + ******************************************************************************/ +static struct platform_device palmtc_ucb1400_core = { + .name = "ucb1400_core", + .id = -1, +}; + +/****************************************************************************** + * NOR Flash + ******************************************************************************/ +static struct resource palmtc_flash_resource = { + .start = PXA_CS0_PHYS, + .end = PXA_CS0_PHYS + SZ_16M - 1, + .flags = IORESOURCE_MEM, +}; + +static struct mtd_partition palmtc_flash_parts[] = { + { + .name = "U-Boot Bootloader", + .offset = 0x0, + .size = 0x40000, + }, + { + .name = "Linux Kernel", + .offset = 0x40000, + .size = 0x2c0000, + }, + { + .name = "Filesystem", + .offset = 0x300000, + .size = 0xcc0000, + }, + { + .name = "U-Boot Environment", + .offset = 0xfc0000, + .size = MTDPART_SIZ_FULL, + }, +}; + +static struct physmap_flash_data palmtc_flash_data = { + .width = 4, + .parts = palmtc_flash_parts, + .nr_parts = ARRAY_SIZE(palmtc_flash_parts), +}; + +static struct platform_device palmtc_flash = { + .name = "physmap-flash", + .id = -1, + .resource = &palmtc_flash_resource, + .num_resources = 1, + .dev = { + .platform_data = &palmtc_flash_data, + }, +}; + +/****************************************************************************** + * Framebuffer + ******************************************************************************/ +static struct pxafb_mode_info palmtc_lcd_modes[] = { +{ + .pixclock = 115384, + .xres = 320, + .yres = 320, + .bpp = 16, + + .left_margin = 27, + .right_margin = 7, + .upper_margin = 7, + .lower_margin = 8, + + .hsync_len = 6, + .vsync_len = 1, +}, +}; + +static struct pxafb_mach_info palmtc_lcd_screen = { + .modes = palmtc_lcd_modes, + .num_modes = ARRAY_SIZE(palmtc_lcd_modes), + .lcd_conn = LCD_COLOR_TFT_16BPP | LCD_PCLK_EDGE_FALL, +}; + +/****************************************************************************** + * Machine init + ******************************************************************************/ +static struct platform_device *devices[] __initdata = { + &palmtc_backlight, + &palmtc_ucb1400_core, + &palmtc_keyboard, + &palmtc_pxa_keys, + &palmtc_flash, +}; + +static void __init palmtc_init(void) +{ + pxa2xx_mfp_config(ARRAY_AND_SIZE(palmtc_pin_config)); + + set_pxa_fb_info(&palmtc_lcd_screen); + pxa_set_mci_info(&palmtc_mci_platform_data); + pxa_set_udc_info(&palmtc_udc_info); + pxa_set_ac97_info(NULL); + pxa_set_ficp_info(&palmtc_ficp_platform_data); + + platform_add_devices(devices, ARRAY_SIZE(devices)); +}; + +MACHINE_START(PALMTC, "Palm Tungsten|C") + .phys_io = 0x40000000, + .boot_params = 0xa0000100, + .io_pg_offst = (io_p2v(0x40000000) >> 18) & 0xfffc, + .map_io = pxa_map_io, + .init_irq = pxa25x_init_irq, + .timer = &pxa_timer, + .init_machine = palmtc_init +MACHINE_END diff --git a/arch/arm/mach-pxa/palmte2.c b/arch/arm/mach-pxa/palmte2.c index d823b09801d..277c4062e3c 100644 --- a/arch/arm/mach-pxa/palmte2.c +++ b/arch/arm/mach-pxa/palmte2.c @@ -117,83 +117,11 @@ static unsigned long palmte2_pin_config[] __initdata = { /****************************************************************************** * SD/MMC card controller ******************************************************************************/ -static int palmte2_mci_init(struct device *dev, - irq_handler_t palmte2_detect_int, void *data) -{ - int err = 0; - - /* Setup an interrupt for detecting card insert/remove events */ - err = gpio_request(GPIO_NR_PALMTE2_SD_DETECT_N, "SD IRQ"); - if (err) - goto err; - err = gpio_direction_input(GPIO_NR_PALMTE2_SD_DETECT_N); - if (err) - goto err2; - err = request_irq(gpio_to_irq(GPIO_NR_PALMTE2_SD_DETECT_N), - palmte2_detect_int, IRQF_DISABLED | IRQF_SAMPLE_RANDOM | - IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING, - "SD/MMC card detect", data); - if (err) { - printk(KERN_ERR "%s: cannot request SD/MMC card detect IRQ\n", - __func__); - goto err2; - } - - err = gpio_request(GPIO_NR_PALMTE2_SD_POWER, "SD_POWER"); - if (err) - goto err3; - err = gpio_direction_output(GPIO_NR_PALMTE2_SD_POWER, 0); - if (err) - goto err4; - - err = gpio_request(GPIO_NR_PALMTE2_SD_READONLY, "SD_READONLY"); - if (err) - goto err4; - err = gpio_direction_input(GPIO_NR_PALMTE2_SD_READONLY); - if (err) - goto err5; - - printk(KERN_DEBUG "%s: irq registered\n", __func__); - - return 0; - -err5: - gpio_free(GPIO_NR_PALMTE2_SD_READONLY); -err4: - gpio_free(GPIO_NR_PALMTE2_SD_POWER); -err3: - free_irq(gpio_to_irq(GPIO_NR_PALMTE2_SD_DETECT_N), data); -err2: - gpio_free(GPIO_NR_PALMTE2_SD_DETECT_N); -err: - return err; -} - -static void palmte2_mci_exit(struct device *dev, void *data) -{ - gpio_free(GPIO_NR_PALMTE2_SD_READONLY); - gpio_free(GPIO_NR_PALMTE2_SD_POWER); - free_irq(gpio_to_irq(GPIO_NR_PALMTE2_SD_DETECT_N), data); - gpio_free(GPIO_NR_PALMTE2_SD_DETECT_N); -} - -static void palmte2_mci_power(struct device *dev, unsigned int vdd) -{ - struct pxamci_platform_data *p_d = dev->platform_data; - gpio_set_value(GPIO_NR_PALMTE2_SD_POWER, p_d->ocr_mask & (1 << vdd)); -} - -static int palmte2_mci_get_ro(struct device *dev) -{ - return gpio_get_value(GPIO_NR_PALMTE2_SD_READONLY); -} - static struct pxamci_platform_data palmte2_mci_platform_data = { - .ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34, - .setpower = palmte2_mci_power, - .get_ro = palmte2_mci_get_ro, - .init = palmte2_mci_init, - .exit = palmte2_mci_exit, + .ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34, + .gpio_card_detect = GPIO_NR_PALMTE2_SD_DETECT_N, + .gpio_card_ro = GPIO_NR_PALMTE2_SD_READONLY, + .gpio_power = GPIO_NR_PALMTE2_SD_POWER, }; /****************************************************************************** @@ -287,35 +215,9 @@ static struct platform_device palmte2_backlight = { /****************************************************************************** * IrDA ******************************************************************************/ -static int palmte2_irda_startup(struct device *dev) -{ - int err; - err = gpio_request(GPIO_NR_PALMTE2_IR_DISABLE, "IR DISABLE"); - if (err) - goto err; - err = gpio_direction_output(GPIO_NR_PALMTE2_IR_DISABLE, 1); - if (err) - gpio_free(GPIO_NR_PALMTE2_IR_DISABLE); -err: - return err; -} - -static void palmte2_irda_shutdown(struct device *dev) -{ - gpio_free(GPIO_NR_PALMTE2_IR_DISABLE); -} - -static void palmte2_irda_transceiver_mode(struct device *dev, int mode) -{ - gpio_set_value(GPIO_NR_PALMTE2_IR_DISABLE, mode & IR_OFF); - pxa2xx_transceiver_mode(dev, mode); -} - static struct pxaficp_platform_data palmte2_ficp_platform_data = { - .startup = palmte2_irda_startup, - .shutdown = palmte2_irda_shutdown, - .transceiver_cap = IR_SIRMODE | IR_FIRMODE | IR_OFF, - .transceiver_mode = palmte2_irda_transceiver_mode, + .gpio_pwdown = GPIO_NR_PALMTE2_IR_DISABLE, + .transceiver_cap = IR_SIRMODE | IR_OFF, }; /****************************************************************************** diff --git a/arch/arm/mach-pxa/palmtx.c b/arch/arm/mach-pxa/palmtx.c index 83d02087958..76a2b37eaf3 100644 --- a/arch/arm/mach-pxa/palmtx.c +++ b/arch/arm/mach-pxa/palmtx.c @@ -28,6 +28,10 @@ #include <linux/wm97xx_batt.h> #include <linux/power_supply.h> #include <linux/usb/gpio_vbus.h> +#include <linux/mtd/nand.h> +#include <linux/mtd/partitions.h> +#include <linux/mtd/mtd.h> +#include <linux/mtd/physmap.h> #include <asm/mach-types.h> #include <asm/mach/arch.h> @@ -131,6 +135,10 @@ static unsigned long palmtx_pin_config[] __initdata = { GPIO34_FFUART_RXD, GPIO39_FFUART_TXD, + /* NAND */ + GPIO15_nCS_1, + GPIO18_RDY, + /* MISC. */ GPIO10_GPIO, /* hotsync button */ GPIO12_GPIO, /* power detect */ @@ -138,85 +146,50 @@ static unsigned long palmtx_pin_config[] __initdata = { }; /****************************************************************************** - * SD/MMC card controller + * NOR Flash ******************************************************************************/ -static int palmtx_mci_init(struct device *dev, irq_handler_t palmtx_detect_int, - void *data) -{ - int err = 0; - - /* Setup an interrupt for detecting card insert/remove events */ - err = gpio_request(GPIO_NR_PALMTX_SD_DETECT_N, "SD IRQ"); - if (err) - goto err; - err = gpio_direction_input(GPIO_NR_PALMTX_SD_DETECT_N); - if (err) - goto err2; - err = request_irq(gpio_to_irq(GPIO_NR_PALMTX_SD_DETECT_N), - palmtx_detect_int, IRQF_DISABLED | IRQF_SAMPLE_RANDOM | - IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING, - "SD/MMC card detect", data); - if (err) { - printk(KERN_ERR "%s: cannot request SD/MMC card detect IRQ\n", - __func__); - goto err2; +static struct mtd_partition palmtx_partitions[] = { + { + .name = "Flash", + .offset = 0x00000000, + .size = MTDPART_SIZ_FULL, + .mask_flags = 0 } +}; - err = gpio_request(GPIO_NR_PALMTX_SD_POWER, "SD_POWER"); - if (err) - goto err3; - err = gpio_direction_output(GPIO_NR_PALMTX_SD_POWER, 0); - if (err) - goto err4; - - err = gpio_request(GPIO_NR_PALMTX_SD_READONLY, "SD_READONLY"); - if (err) - goto err4; - err = gpio_direction_input(GPIO_NR_PALMTX_SD_READONLY); - if (err) - goto err5; - - printk(KERN_DEBUG "%s: irq registered\n", __func__); - - return 0; - -err5: - gpio_free(GPIO_NR_PALMTX_SD_READONLY); -err4: - gpio_free(GPIO_NR_PALMTX_SD_POWER); -err3: - free_irq(gpio_to_irq(GPIO_NR_PALMTX_SD_DETECT_N), data); -err2: - gpio_free(GPIO_NR_PALMTX_SD_DETECT_N); -err: - return err; -} - -static void palmtx_mci_exit(struct device *dev, void *data) -{ - gpio_free(GPIO_NR_PALMTX_SD_READONLY); - gpio_free(GPIO_NR_PALMTX_SD_POWER); - free_irq(gpio_to_irq(GPIO_NR_PALMTX_SD_DETECT_N), data); - gpio_free(GPIO_NR_PALMTX_SD_DETECT_N); -} +static struct physmap_flash_data palmtx_flash_data[] = { + { + .width = 2, /* bankwidth in bytes */ + .parts = palmtx_partitions, + .nr_parts = ARRAY_SIZE(palmtx_partitions) + } +}; -static void palmtx_mci_power(struct device *dev, unsigned int vdd) -{ - struct pxamci_platform_data *p_d = dev->platform_data; - gpio_set_value(GPIO_NR_PALMTX_SD_POWER, p_d->ocr_mask & (1 << vdd)); -} +static struct resource palmtx_flash_resource = { + .start = PXA_CS0_PHYS, + .end = PXA_CS0_PHYS + SZ_8M - 1, + .flags = IORESOURCE_MEM, +}; -static int palmtx_mci_get_ro(struct device *dev) -{ - return gpio_get_value(GPIO_NR_PALMTX_SD_READONLY); -} +static struct platform_device palmtx_flash = { + .name = "physmap-flash", + .id = 0, + .resource = &palmtx_flash_resource, + .num_resources = 1, + .dev = { + .platform_data = palmtx_flash_data, + }, +}; +/****************************************************************************** + * SD/MMC card controller + ******************************************************************************/ static struct pxamci_platform_data palmtx_mci_platform_data = { - .ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34, - .setpower = palmtx_mci_power, - .get_ro = palmtx_mci_get_ro, - .init = palmtx_mci_init, - .exit = palmtx_mci_exit, + .ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34, + .gpio_card_detect = GPIO_NR_PALMTX_SD_DETECT_N, + .gpio_card_ro = GPIO_NR_PALMTX_SD_READONLY, + .gpio_power = GPIO_NR_PALMTX_SD_POWER, + .detect_delay = 20, }; /****************************************************************************** @@ -330,35 +303,9 @@ static struct platform_device palmtx_backlight = { /****************************************************************************** * IrDA ******************************************************************************/ -static int palmtx_irda_startup(struct device *dev) -{ - int err; - err = gpio_request(GPIO_NR_PALMTX_IR_DISABLE, "IR DISABLE"); - if (err) - goto err; - err = gpio_direction_output(GPIO_NR_PALMTX_IR_DISABLE, 1); - if (err) - gpio_free(GPIO_NR_PALMTX_IR_DISABLE); -err: - return err; -} - -static void palmtx_irda_shutdown(struct device *dev) -{ - gpio_free(GPIO_NR_PALMTX_IR_DISABLE); -} - -static void palmtx_irda_transceiver_mode(struct device *dev, int mode) -{ - gpio_set_value(GPIO_NR_PALMTX_IR_DISABLE, mode & IR_OFF); - pxa2xx_transceiver_mode(dev, mode); -} - static struct pxaficp_platform_data palmtx_ficp_platform_data = { - .startup = palmtx_irda_startup, - .shutdown = palmtx_irda_shutdown, - .transceiver_cap = IR_SIRMODE | IR_FIRMODE | IR_OFF, - .transceiver_mode = palmtx_irda_transceiver_mode, + .gpio_pwdown = GPIO_NR_PALMTX_IR_DISABLE, + .transceiver_cap = IR_SIRMODE | IR_OFF, }; /****************************************************************************** @@ -493,6 +440,68 @@ static struct pxafb_mach_info palmtx_lcd_screen = { }; /****************************************************************************** + * NAND Flash + ******************************************************************************/ +static void palmtx_nand_cmd_ctl(struct mtd_info *mtd, int cmd, + unsigned int ctrl) +{ + struct nand_chip *this = mtd->priv; + unsigned long nandaddr = (unsigned long)this->IO_ADDR_W; + + if (cmd == NAND_CMD_NONE) + return; + + if (ctrl & NAND_CLE) + writeb(cmd, PALMTX_NAND_CLE_VIRT); + else if (ctrl & NAND_ALE) + writeb(cmd, PALMTX_NAND_ALE_VIRT); + else + writeb(cmd, nandaddr); +} + +static struct mtd_partition palmtx_partition_info[] = { + [0] = { + .name = "palmtx-0", + .offset = 0, + .size = MTDPART_SIZ_FULL + }, +}; + +static const char *palmtx_part_probes[] = { "cmdlinepart", NULL }; + +struct platform_nand_data palmtx_nand_platdata = { + .chip = { + .nr_chips = 1, + .chip_offset = 0, + .nr_partitions = ARRAY_SIZE(palmtx_partition_info), + .partitions = palmtx_partition_info, + .chip_delay = 20, + .part_probe_types = palmtx_part_probes, + }, + .ctrl = { + .cmd_ctrl = palmtx_nand_cmd_ctl, + }, +}; + +static struct resource palmtx_nand_resource[] = { + [0] = { + .start = PXA_CS1_PHYS, + .end = PXA_CS1_PHYS + SZ_1M - 1, + .flags = IORESOURCE_MEM, + }, +}; + +static struct platform_device palmtx_nand = { + .name = "gen_nand", + .num_resources = ARRAY_SIZE(palmtx_nand_resource), + .resource = palmtx_nand_resource, + .id = -1, + .dev = { + .platform_data = &palmtx_nand_platdata, + } +}; + +/****************************************************************************** * Power management - standby ******************************************************************************/ static void __init palmtx_pm_init(void) @@ -518,6 +527,8 @@ static struct platform_device *devices[] __initdata = { &power_supply, &palmtx_asoc, &palmtx_gpio_vbus, + &palmtx_flash, + &palmtx_nand, }; static struct map_desc palmtx_io_desc[] __initdata = { @@ -525,8 +536,18 @@ static struct map_desc palmtx_io_desc[] __initdata = { .virtual = PALMTX_PCMCIA_VIRT, .pfn = __phys_to_pfn(PALMTX_PCMCIA_PHYS), .length = PALMTX_PCMCIA_SIZE, - .type = MT_DEVICE -}, + .type = MT_DEVICE, +}, { + .virtual = PALMTX_NAND_ALE_VIRT, + .pfn = __phys_to_pfn(PALMTX_NAND_ALE_PHYS), + .length = SZ_1M, + .type = MT_DEVICE, +}, { + .virtual = PALMTX_NAND_CLE_VIRT, + .pfn = __phys_to_pfn(PALMTX_NAND_CLE_PHYS), + .length = SZ_1M, + .type = MT_DEVICE, +} }; static void __init palmtx_map_io(void) diff --git a/arch/arm/mach-pxa/palmz72.c b/arch/arm/mach-pxa/palmz72.c index c3645aa3fa3..c2bf493c5f5 100644 --- a/arch/arm/mach-pxa/palmz72.c +++ b/arch/arm/mach-pxa/palmz72.c @@ -129,88 +129,14 @@ static unsigned long palmz72_pin_config[] __initdata = { /****************************************************************************** * SD/MMC card controller ******************************************************************************/ -static int palmz72_mci_init(struct device *dev, - irq_handler_t palmz72_detect_int, void *data) -{ - int err = 0; - - /* Setup an interrupt for detecting card insert/remove events */ - err = gpio_request(GPIO_NR_PALMZ72_SD_DETECT_N, "SD IRQ"); - if (err) - goto err; - err = gpio_direction_input(GPIO_NR_PALMZ72_SD_DETECT_N); - if (err) - goto err2; - err = request_irq(gpio_to_irq(GPIO_NR_PALMZ72_SD_DETECT_N), - palmz72_detect_int, IRQF_DISABLED | IRQF_SAMPLE_RANDOM | - IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING, - "SD/MMC card detect", data); - if (err) { - printk(KERN_ERR "%s: cannot request SD/MMC card detect IRQ\n", - __func__); - goto err2; - } - - /* SD_POWER is not actually power, but it is more like chip - * select, i.e. it is inverted */ - - err = gpio_request(GPIO_NR_PALMZ72_SD_POWER_N, "SD_POWER"); - if (err) - goto err3; - err = gpio_direction_output(GPIO_NR_PALMZ72_SD_POWER_N, 0); - if (err) - goto err4; - err = gpio_request(GPIO_NR_PALMZ72_SD_RO, "SD_RO"); - if (err) - goto err4; - err = gpio_direction_input(GPIO_NR_PALMZ72_SD_RO); - if (err) - goto err5; - - printk(KERN_DEBUG "%s: irq registered\n", __func__); - - return 0; - -err5: - gpio_free(GPIO_NR_PALMZ72_SD_RO); -err4: - gpio_free(GPIO_NR_PALMZ72_SD_POWER_N); -err3: - free_irq(gpio_to_irq(GPIO_NR_PALMZ72_SD_DETECT_N), data); -err2: - gpio_free(GPIO_NR_PALMZ72_SD_DETECT_N); -err: - return err; -} - -static void palmz72_mci_exit(struct device *dev, void *data) -{ - gpio_free(GPIO_NR_PALMZ72_SD_POWER_N); - free_irq(gpio_to_irq(GPIO_NR_PALMZ72_SD_DETECT_N), data); - gpio_free(GPIO_NR_PALMZ72_SD_DETECT_N); - gpio_free(GPIO_NR_PALMZ72_SD_RO); -} - -static void palmz72_mci_power(struct device *dev, unsigned int vdd) -{ - struct pxamci_platform_data *p_d = dev->platform_data; - if (p_d->ocr_mask & (1 << vdd)) - gpio_set_value(GPIO_NR_PALMZ72_SD_POWER_N, 0); - else - gpio_set_value(GPIO_NR_PALMZ72_SD_POWER_N, 1); -} - -static int palmz72_mci_ro(struct device *dev) -{ - return gpio_get_value(GPIO_NR_PALMZ72_SD_RO); -} - +/* SD_POWER is not actually power, but it is more like chip + * select, i.e. it is inverted */ static struct pxamci_platform_data palmz72_mci_platform_data = { - .ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34, - .setpower = palmz72_mci_power, - .get_ro = palmz72_mci_ro, - .init = palmz72_mci_init, - .exit = palmz72_mci_exit, + .ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34, + .gpio_card_detect = GPIO_NR_PALMZ72_SD_DETECT_N, + .gpio_card_ro = GPIO_NR_PALMZ72_SD_RO, + .gpio_power = GPIO_NR_PALMZ72_SD_POWER_N, + .gpio_power_invert = 1, }; /****************************************************************************** @@ -304,35 +230,9 @@ static struct platform_device palmz72_backlight = { /****************************************************************************** * IrDA ******************************************************************************/ -static int palmz72_irda_startup(struct device *dev) -{ - int err; - err = gpio_request(GPIO_NR_PALMZ72_IR_DISABLE, "IR DISABLE"); - if (err) - goto err; - err = gpio_direction_output(GPIO_NR_PALMZ72_IR_DISABLE, 1); - if (err) - gpio_free(GPIO_NR_PALMZ72_IR_DISABLE); -err: - return err; -} - -static void palmz72_irda_shutdown(struct device *dev) -{ - gpio_free(GPIO_NR_PALMZ72_IR_DISABLE); -} - -static void palmz72_irda_transceiver_mode(struct device *dev, int mode) -{ - gpio_set_value(GPIO_NR_PALMZ72_IR_DISABLE, mode & IR_OFF); - pxa2xx_transceiver_mode(dev, mode); -} - static struct pxaficp_platform_data palmz72_ficp_platform_data = { - .startup = palmz72_irda_startup, - .shutdown = palmz72_irda_shutdown, + .gpio_pwdown = GPIO_NR_PALMZ72_IR_DISABLE, .transceiver_cap = IR_SIRMODE | IR_OFF, - .transceiver_mode = palmz72_irda_transceiver_mode, }; /****************************************************************************** diff --git a/arch/arm/mach-pxa/pcm990-baseboard.c b/arch/arm/mach-pxa/pcm990-baseboard.c index 01791d74e08..bbda57078e0 100644 --- a/arch/arm/mach-pxa/pcm990-baseboard.c +++ b/arch/arm/mach-pxa/pcm990-baseboard.c @@ -321,11 +321,14 @@ static void pcm990_mci_exit(struct device *dev, void *data) #define MSECS_PER_JIFFY (1000/HZ) static struct pxamci_platform_data pcm990_mci_platform_data = { - .detect_delay = 250 / MSECS_PER_JIFFY, - .ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34, - .init = pcm990_mci_init, - .setpower = pcm990_mci_setpower, - .exit = pcm990_mci_exit, + .detect_delay = 250 / MSECS_PER_JIFFY, + .ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34, + .init = pcm990_mci_init, + .setpower = pcm990_mci_setpower, + .exit = pcm990_mci_exit, + .gpio_card_detect = -1, + .gpio_card_ro = -1, + .gpio_power = -1, }; static struct pxaohci_platform_data pcm990_ohci_platform_data = { @@ -427,25 +430,56 @@ static void pcm990_camera_free_bus(struct soc_camera_link *link) gpio_bus_switch = -EINVAL; } -static struct soc_camera_link iclink = { - .bus_id = 0, /* Must match with the camera ID above */ - .query_bus_param = pcm990_camera_query_bus_param, - .set_bus_param = pcm990_camera_set_bus_param, - .free_bus = pcm990_camera_free_bus, -}; - /* Board I2C devices. */ static struct i2c_board_info __initdata pcm990_i2c_devices[] = { { /* Must initialize before the camera(s) */ I2C_BOARD_INFO("pca9536", 0x41), .platform_data = &pca9536_data, - }, { + }, +}; + +static struct i2c_board_info pcm990_camera_i2c[] = { + { I2C_BOARD_INFO("mt9v022", 0x48), - .platform_data = &iclink, /* With extender */ }, { I2C_BOARD_INFO("mt9m001", 0x5d), - .platform_data = &iclink, /* With extender */ + }, +}; + +static struct soc_camera_link iclink[] = { + { + .bus_id = 0, /* Must match with the camera ID */ + .board_info = &pcm990_camera_i2c[0], + .i2c_adapter_id = 0, + .query_bus_param = pcm990_camera_query_bus_param, + .set_bus_param = pcm990_camera_set_bus_param, + .free_bus = pcm990_camera_free_bus, + .module_name = "mt9v022", + }, { + .bus_id = 0, /* Must match with the camera ID */ + .board_info = &pcm990_camera_i2c[1], + .i2c_adapter_id = 0, + .query_bus_param = pcm990_camera_query_bus_param, + .set_bus_param = pcm990_camera_set_bus_param, + .free_bus = pcm990_camera_free_bus, + .module_name = "mt9m001", + }, +}; + +static struct platform_device pcm990_camera[] = { + { + .name = "soc-camera-pdrv", + .id = 0, + .dev = { + .platform_data = &iclink[0], + }, + }, { + .name = "soc-camera-pdrv", + .id = 1, + .dev = { + .platform_data = &iclink[1], + }, }, }; #endif /* CONFIG_VIDEO_PXA27x ||CONFIG_VIDEO_PXA27x_MODULE */ @@ -501,6 +535,9 @@ void __init pcm990_baseboard_init(void) pxa_set_camera_info(&pcm990_pxacamera_platform_data); i2c_register_board_info(0, ARRAY_AND_SIZE(pcm990_i2c_devices)); + + platform_device_register(&pcm990_camera[0]); + platform_device_register(&pcm990_camera[1]); #endif printk(KERN_INFO "PCM-990 Evaluation baseboard initialized\n"); diff --git a/arch/arm/mach-pxa/poodle.c b/arch/arm/mach-pxa/poodle.c index 9352d4a3483..a186994f77f 100644 --- a/arch/arm/mach-pxa/poodle.c +++ b/arch/arm/mach-pxa/poodle.c @@ -245,20 +245,10 @@ static inline void poodle_init_spi(void) {} * The card detect interrupt isn't debounced so we delay it by 250ms * to give the card a chance to fully insert/eject. */ -static struct pxamci_platform_data poodle_mci_platform_data; - static int poodle_mci_init(struct device *dev, irq_handler_t poodle_detect_int, void *data) { int err; - err = gpio_request(POODLE_GPIO_nSD_DETECT, "nSD_DETECT"); - if (err) - goto err_out; - - err = gpio_request(POODLE_GPIO_nSD_WP, "nSD_WP"); - if (err) - goto err_free_1; - err = gpio_request(POODLE_GPIO_SD_PWR, "SD_PWR"); if (err) goto err_free_2; @@ -267,34 +257,14 @@ static int poodle_mci_init(struct device *dev, irq_handler_t poodle_detect_int, if (err) goto err_free_3; - gpio_direction_input(POODLE_GPIO_nSD_DETECT); - gpio_direction_input(POODLE_GPIO_nSD_WP); - gpio_direction_output(POODLE_GPIO_SD_PWR, 0); gpio_direction_output(POODLE_GPIO_SD_PWR1, 0); - poodle_mci_platform_data.detect_delay = msecs_to_jiffies(250); - - err = request_irq(POODLE_IRQ_GPIO_nSD_DETECT, poodle_detect_int, - IRQF_DISABLED | IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING, - "MMC card detect", data); - if (err) { - pr_err("%s: MMC/SD: can't request MMC card detect IRQ\n", - __func__); - goto err_free_4; - } - return 0; -err_free_4: - gpio_free(POODLE_GPIO_SD_PWR1); err_free_3: gpio_free(POODLE_GPIO_SD_PWR); err_free_2: - gpio_free(POODLE_GPIO_nSD_WP); -err_free_1: - gpio_free(POODLE_GPIO_nSD_DETECT); -err_out: return err; } @@ -312,62 +282,29 @@ static void poodle_mci_setpower(struct device *dev, unsigned int vdd) } } -static int poodle_mci_get_ro(struct device *dev) -{ - return !!gpio_get_value(POODLE_GPIO_nSD_WP); - return GPLR(POODLE_GPIO_nSD_WP) & GPIO_bit(POODLE_GPIO_nSD_WP); -} - - static void poodle_mci_exit(struct device *dev, void *data) { - free_irq(POODLE_IRQ_GPIO_nSD_DETECT, data); gpio_free(POODLE_GPIO_SD_PWR1); gpio_free(POODLE_GPIO_SD_PWR); - gpio_free(POODLE_GPIO_nSD_WP); - gpio_free(POODLE_GPIO_nSD_DETECT); } static struct pxamci_platform_data poodle_mci_platform_data = { - .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34, - .init = poodle_mci_init, - .get_ro = poodle_mci_get_ro, - .setpower = poodle_mci_setpower, - .exit = poodle_mci_exit, + .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34, + .init = poodle_mci_init, + .setpower = poodle_mci_setpower, + .exit = poodle_mci_exit, + .gpio_card_detect = POODLE_IRQ_GPIO_nSD_DETECT, + .gpio_card_ro = POODLE_GPIO_nSD_WP, + .gpio_power = -1, }; /* * Irda */ -static void poodle_irda_transceiver_mode(struct device *dev, int mode) -{ - gpio_set_value(POODLE_GPIO_IR_ON, mode & IR_OFF); - pxa2xx_transceiver_mode(dev, mode); -} - -static int poodle_irda_startup(struct device *dev) -{ - int err; - - err = gpio_request(POODLE_GPIO_IR_ON, "IR_ON"); - if (err) - return err; - - gpio_direction_output(POODLE_GPIO_IR_ON, 1); - return 0; -} - -static void poodle_irda_shutdown(struct device *dev) -{ - gpio_free(POODLE_GPIO_IR_ON); -} - static struct pxaficp_platform_data poodle_ficp_platform_data = { + .gpio_pwdown = POODLE_GPIO_IR_ON, .transceiver_cap = IR_SIRMODE | IR_OFF, - .transceiver_mode = poodle_irda_transceiver_mode, - .startup = poodle_irda_startup, - .shutdown = poodle_irda_shutdown, }; @@ -521,6 +458,7 @@ static void __init poodle_init(void) set_pxa_fb_parent(&poodle_locomo_device.dev); set_pxa_fb_info(&poodle_fb_info); pxa_set_udc_info(&udc_info); + poodle_mci_platform_data.detect_delay = msecs_to_jiffies(250); pxa_set_mci_info(&poodle_mci_platform_data); pxa_set_ficp_info(&poodle_ficp_platform_data); pxa_set_i2c_info(NULL); diff --git a/arch/arm/mach-pxa/pxa2xx.c b/arch/arm/mach-pxa/pxa2xx.c index 2f3394f8591..868270421b8 100644 --- a/arch/arm/mach-pxa/pxa2xx.c +++ b/arch/arm/mach-pxa/pxa2xx.c @@ -52,3 +52,4 @@ void pxa2xx_transceiver_mode(struct device *dev, int mode) } else BUG(); } +EXPORT_SYMBOL_GPL(pxa2xx_transceiver_mode); diff --git a/arch/arm/mach-pxa/pxa300.c b/arch/arm/mach-pxa/pxa300.c index 4ba6d21f851..f4af6e2bef8 100644 --- a/arch/arm/mach-pxa/pxa300.c +++ b/arch/arm/mach-pxa/pxa300.c @@ -84,9 +84,11 @@ static struct mfp_addr_map pxa310_mfp_addr_map[] __initdata = { }; static DEFINE_PXA3_CKEN(common_nand, NAND, 156000000, 0); +static DEFINE_PXA3_CKEN(gcu, PXA300_GCU, 0, 0); static struct clk_lookup common_clkregs[] = { INIT_CLKREG(&clk_common_nand, "pxa3xx-nand", NULL), + INIT_CLKREG(&clk_gcu, "pxa3xx-gcu", NULL), }; static DEFINE_PXA3_CKEN(pxa310_mmc3, MMC3, 19500000, 0); diff --git a/arch/arm/mach-pxa/pxa320.c b/arch/arm/mach-pxa/pxa320.c index 8b3d97efada..c7373e74a10 100644 --- a/arch/arm/mach-pxa/pxa320.c +++ b/arch/arm/mach-pxa/pxa320.c @@ -78,9 +78,11 @@ static struct mfp_addr_map pxa320_mfp_addr_map[] __initdata = { }; static DEFINE_PXA3_CKEN(pxa320_nand, NAND, 104000000, 0); +static DEFINE_PXA3_CKEN(gcu, PXA320_GCU, 0, 0); static struct clk_lookup pxa320_clkregs[] = { INIT_CLKREG(&clk_pxa320_nand, "pxa3xx-nand", NULL), + INIT_CLKREG(&clk_gcu, "pxa3xx-gcu", NULL), }; static int __init pxa320_init(void) diff --git a/arch/arm/mach-pxa/pxa930.c b/arch/arm/mach-pxa/pxa930.c index 71131742fff..06429200828 100644 --- a/arch/arm/mach-pxa/pxa930.c +++ b/arch/arm/mach-pxa/pxa930.c @@ -176,13 +176,30 @@ static struct mfp_addr_map pxa930_mfp_addr_map[] __initdata = { MFP_ADDR_END, }; +static struct mfp_addr_map pxa935_mfp_addr_map[] __initdata = { + MFP_ADDR(GPIO159, 0x0524), + MFP_ADDR(GPIO163, 0x0534), + MFP_ADDR(GPIO167, 0x0544), + MFP_ADDR(GPIO168, 0x0548), + MFP_ADDR(GPIO169, 0x054c), + MFP_ADDR(GPIO170, 0x0550), + MFP_ADDR(GPIO171, 0x0554), + MFP_ADDR(GPIO172, 0x0558), + MFP_ADDR(GPIO173, 0x055c), + + MFP_ADDR_END, +}; + static int __init pxa930_init(void) { - if (cpu_is_pxa930()) { + if (cpu_is_pxa930() || cpu_is_pxa935()) { mfp_init_base(io_p2v(MFPR_BASE)); mfp_init_addr(pxa930_mfp_addr_map); } + if (cpu_is_pxa935()) + mfp_init_addr(pxa935_mfp_addr_map); + return 0; } diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c index dda310fe71c..ee8d6038ce8 100644 --- a/arch/arm/mach-pxa/spitz.c +++ b/arch/arm/mach-pxa/spitz.c @@ -24,6 +24,7 @@ #include <linux/spi/ads7846.h> #include <linux/spi/corgi_lcd.h> #include <linux/mtd/sharpsl.h> +#include <linux/input/matrix_keypad.h> #include <asm/setup.h> #include <asm/mach-types.h> @@ -111,6 +112,26 @@ static unsigned long spitz_pin_config[] __initdata = { GPIO105_GPIO, /* SPITZ_GPIO_CF_IRQ */ GPIO106_GPIO, /* SPITZ_GPIO_CF2_IRQ */ + /* GPIO matrix keypad */ + GPIO88_GPIO, /* column 0 */ + GPIO23_GPIO, /* column 1 */ + GPIO24_GPIO, /* column 2 */ + GPIO25_GPIO, /* column 3 */ + GPIO26_GPIO, /* column 4 */ + GPIO27_GPIO, /* column 5 */ + GPIO52_GPIO, /* column 6 */ + GPIO103_GPIO, /* column 7 */ + GPIO107_GPIO, /* column 8 */ + GPIO108_GPIO, /* column 9 */ + GPIO114_GPIO, /* column 10 */ + GPIO12_GPIO, /* row 0 */ + GPIO17_GPIO, /* row 1 */ + GPIO91_GPIO, /* row 2 */ + GPIO34_GPIO, /* row 3 */ + GPIO36_GPIO, /* row 4 */ + GPIO38_GPIO, /* row 5 */ + GPIO39_GPIO, /* row 6 */ + /* I2C */ GPIO117_I2C_SCL, GPIO118_I2C_SDA, @@ -242,9 +263,115 @@ EXPORT_SYMBOL(spitzscoop2_device); /* * Spitz Keyboard Device */ +#define SPITZ_KEY_CALENDAR KEY_F1 +#define SPITZ_KEY_ADDRESS KEY_F2 +#define SPITZ_KEY_FN KEY_F3 +#define SPITZ_KEY_CANCEL KEY_F4 +#define SPITZ_KEY_EXOK KEY_F5 +#define SPITZ_KEY_EXCANCEL KEY_F6 +#define SPITZ_KEY_EXJOGDOWN KEY_F7 +#define SPITZ_KEY_EXJOGUP KEY_F8 +#define SPITZ_KEY_JAP1 KEY_LEFTALT +#define SPITZ_KEY_JAP2 KEY_RIGHTCTRL +#define SPITZ_KEY_SYNC KEY_F9 +#define SPITZ_KEY_MAIL KEY_F10 +#define SPITZ_KEY_OK KEY_F11 +#define SPITZ_KEY_MENU KEY_F12 + +static const uint32_t spitzkbd_keymap[] = { + KEY(0, 0, KEY_LEFTCTRL), + KEY(0, 1, KEY_1), + KEY(0, 2, KEY_3), + KEY(0, 3, KEY_5), + KEY(0, 4, KEY_6), + KEY(0, 5, KEY_7), + KEY(0, 6, KEY_9), + KEY(0, 7, KEY_0), + KEY(0, 8, KEY_BACKSPACE), + KEY(0, 9, SPITZ_KEY_EXOK), /* EXOK */ + KEY(0, 10, SPITZ_KEY_EXCANCEL), /* EXCANCEL */ + KEY(1, 1, KEY_2), + KEY(1, 2, KEY_4), + KEY(1, 3, KEY_R), + KEY(1, 4, KEY_Y), + KEY(1, 5, KEY_8), + KEY(1, 6, KEY_I), + KEY(1, 7, KEY_O), + KEY(1, 8, KEY_P), + KEY(1, 9, SPITZ_KEY_EXJOGDOWN), /* EXJOGDOWN */ + KEY(1, 10, SPITZ_KEY_EXJOGUP), /* EXJOGUP */ + KEY(2, 0, KEY_TAB), + KEY(2, 1, KEY_Q), + KEY(2, 2, KEY_E), + KEY(2, 3, KEY_T), + KEY(2, 4, KEY_G), + KEY(2, 5, KEY_U), + KEY(2, 6, KEY_J), + KEY(2, 7, KEY_K), + KEY(3, 0, SPITZ_KEY_ADDRESS), /* ADDRESS */ + KEY(3, 1, KEY_W), + KEY(3, 2, KEY_S), + KEY(3, 3, KEY_F), + KEY(3, 4, KEY_V), + KEY(3, 5, KEY_H), + KEY(3, 6, KEY_M), + KEY(3, 7, KEY_L), + KEY(3, 9, KEY_RIGHTSHIFT), + KEY(4, 0, SPITZ_KEY_CALENDAR), /* CALENDAR */ + KEY(4, 1, KEY_A), + KEY(4, 2, KEY_D), + KEY(4, 3, KEY_C), + KEY(4, 4, KEY_B), + KEY(4, 5, KEY_N), + KEY(4, 6, KEY_DOT), + KEY(4, 8, KEY_ENTER), + KEY(4, 9, KEY_LEFTSHIFT), + KEY(5, 0, SPITZ_KEY_MAIL), /* MAIL */ + KEY(5, 1, KEY_Z), + KEY(5, 2, KEY_X), + KEY(5, 3, KEY_MINUS), + KEY(5, 4, KEY_SPACE), + KEY(5, 5, KEY_COMMA), + KEY(5, 7, KEY_UP), + KEY(5, 10, SPITZ_KEY_FN), /* FN */ + KEY(6, 0, KEY_SYSRQ), + KEY(6, 1, SPITZ_KEY_JAP1), /* JAP1 */ + KEY(6, 2, SPITZ_KEY_JAP2), /* JAP2 */ + KEY(6, 3, SPITZ_KEY_CANCEL), /* CANCEL */ + KEY(6, 4, SPITZ_KEY_OK), /* OK */ + KEY(6, 5, SPITZ_KEY_MENU), /* MENU */ + KEY(6, 6, KEY_LEFT), + KEY(6, 7, KEY_DOWN), + KEY(6, 8, KEY_RIGHT), +}; + +static const struct matrix_keymap_data spitzkbd_keymap_data = { + .keymap = spitzkbd_keymap, + .keymap_size = ARRAY_SIZE(spitzkbd_keymap), +}; + +static const uint32_t spitzkbd_row_gpios[] = + { 12, 17, 91, 34, 36, 38, 39 }; +static const uint32_t spitzkbd_col_gpios[] = + { 88, 23, 24, 25, 26, 27, 52, 103, 107, 108, 114 }; + +static struct matrix_keypad_platform_data spitzkbd_pdata = { + .keymap_data = &spitzkbd_keymap_data, + .row_gpios = spitzkbd_row_gpios, + .col_gpios = spitzkbd_col_gpios, + .num_row_gpios = ARRAY_SIZE(spitzkbd_row_gpios), + .num_col_gpios = ARRAY_SIZE(spitzkbd_col_gpios), + .col_scan_delay_us = 10, + .debounce_ms = 10, + .wakeup = 1, +}; + static struct platform_device spitzkbd_device = { - .name = "spitz-keyboard", + .name = "matrix-keypad", .id = -1, + .dev = { + .platform_data = &spitzkbd_pdata, + }, }; @@ -296,6 +423,7 @@ static struct ads7846_platform_data spitz_ads7846_info = { .vref_delay_usecs = 100, .x_plate_ohms = 419, .y_plate_ohms = 486, + .pressure_max = 1024, .gpio_pendown = SPITZ_GPIO_TP_INT, .wait_for_sync = spitz_wait_for_hsync, }; @@ -378,45 +506,6 @@ static inline void spitz_init_spi(void) {} * The card detect interrupt isn't debounced so we delay it by 250ms * to give the card a chance to fully insert/eject. */ - -static struct pxamci_platform_data spitz_mci_platform_data; - -static int spitz_mci_init(struct device *dev, irq_handler_t spitz_detect_int, void *data) -{ - int err; - - err = gpio_request(SPITZ_GPIO_nSD_DETECT, "nSD_DETECT"); - if (err) - goto err_out; - - err = gpio_request(SPITZ_GPIO_nSD_WP, "nSD_WP"); - if (err) - goto err_free_1; - - gpio_direction_input(SPITZ_GPIO_nSD_DETECT); - gpio_direction_input(SPITZ_GPIO_nSD_WP); - - spitz_mci_platform_data.detect_delay = msecs_to_jiffies(250); - - err = request_irq(SPITZ_IRQ_GPIO_nSD_DETECT, spitz_detect_int, - IRQF_DISABLED | IRQF_TRIGGER_RISING | - IRQF_TRIGGER_FALLING, - "MMC card detect", data); - if (err) { - pr_err("%s: MMC/SD: can't request MMC card detect IRQ\n", - __func__); - goto err_free_2; - } - return 0; - -err_free_2: - gpio_free(SPITZ_GPIO_nSD_WP); -err_free_1: - gpio_free(SPITZ_GPIO_nSD_DETECT); -err_out: - return err; -} - static void spitz_mci_setpower(struct device *dev, unsigned int vdd) { struct pxamci_platform_data* p_d = dev->platform_data; @@ -427,24 +516,12 @@ static void spitz_mci_setpower(struct device *dev, unsigned int vdd) spitz_card_pwr_ctrl(SPITZ_PWR_SD, 0x0000); } -static int spitz_mci_get_ro(struct device *dev) -{ - return gpio_get_value(SPITZ_GPIO_nSD_WP); -} - -static void spitz_mci_exit(struct device *dev, void *data) -{ - free_irq(SPITZ_IRQ_GPIO_nSD_DETECT, data); - gpio_free(SPITZ_GPIO_nSD_WP); - gpio_free(SPITZ_GPIO_nSD_DETECT); -} - static struct pxamci_platform_data spitz_mci_platform_data = { - .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34, - .init = spitz_mci_init, - .get_ro = spitz_mci_get_ro, - .setpower = spitz_mci_setpower, - .exit = spitz_mci_exit, + .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34, + .setpower = spitz_mci_setpower, + .gpio_card_detect = SPITZ_GPIO_nSD_DETECT, + .gpio_card_ro = SPITZ_GPIO_nSD_WP, + .gpio_power = -1, }; @@ -484,50 +561,10 @@ static struct pxaohci_platform_data spitz_ohci_platform_data = { /* * Irda */ -static int spitz_irda_startup(struct device *dev) -{ - int rc; - - rc = gpio_request(SPITZ_GPIO_IR_ON, "IrDA on"); - if (rc) - goto err; - - rc = gpio_direction_output(SPITZ_GPIO_IR_ON, 1); - if (rc) - goto err_dir; - - return 0; - -err_dir: - gpio_free(SPITZ_GPIO_IR_ON); -err: - return rc; -} - -static void spitz_irda_shutdown(struct device *dev) -{ - gpio_free(SPITZ_GPIO_IR_ON); -} - -static void spitz_irda_transceiver_mode(struct device *dev, int mode) -{ - gpio_set_value(SPITZ_GPIO_IR_ON, mode & IR_OFF); - pxa2xx_transceiver_mode(dev, mode); -} - -#ifdef CONFIG_MACH_AKITA -static void akita_irda_transceiver_mode(struct device *dev, int mode) -{ - gpio_set_value(AKITA_GPIO_IR_ON, mode & IR_OFF); - pxa2xx_transceiver_mode(dev, mode); -} -#endif static struct pxaficp_platform_data spitz_ficp_platform_data = { +/* .gpio_pwdown is set in spitz_init() and akita_init() accordingly */ .transceiver_cap = IR_SIRMODE | IR_OFF, - .transceiver_mode = spitz_irda_transceiver_mode, - .startup = spitz_irda_startup, - .shutdown = spitz_irda_shutdown, }; @@ -695,6 +732,7 @@ static void __init common_init(void) spitz_init_spi(); platform_add_devices(devices, ARRAY_SIZE(devices)); + spitz_mci_platform_data.detect_delay = msecs_to_jiffies(250); pxa_set_mci_info(&spitz_mci_platform_data); pxa_set_ohci_info(&spitz_ohci_platform_data); pxa_set_ficp_info(&spitz_ficp_platform_data); @@ -705,6 +743,8 @@ static void __init common_init(void) #if defined(CONFIG_MACH_SPITZ) || defined(CONFIG_MACH_BORZOI) static void __init spitz_init(void) { + spitz_ficp_platform_data.gpio_pwdown = SPITZ_GPIO_IR_ON; + platform_scoop_config = &spitz_pcmcia_config; common_init(); @@ -747,7 +787,7 @@ static struct nand_ecclayout akita_oobinfo = { static void __init akita_init(void) { - spitz_ficp_platform_data.transceiver_mode = akita_irda_transceiver_mode; + spitz_ficp_platform_data.gpio_pwdown = AKITA_GPIO_IR_ON; sharpsl_nand_platform_data.badblock_pattern = &sharpsl_akita_bbt; sharpsl_nand_platform_data.ecc_layout = &akita_oobinfo; diff --git a/arch/arm/mach-pxa/tosa.c b/arch/arm/mach-pxa/tosa.c index 117ad5920e5..e81a52673d4 100644 --- a/arch/arm/mach-pxa/tosa.c +++ b/arch/arm/mach-pxa/tosa.c @@ -247,49 +247,10 @@ static struct pxa2xx_udc_mach_info udc_info __initdata = { /* * MMC/SD Device */ -static struct pxamci_platform_data tosa_mci_platform_data; - static int tosa_mci_init(struct device *dev, irq_handler_t tosa_detect_int, void *data) { int err; - tosa_mci_platform_data.detect_delay = msecs_to_jiffies(250); - - err = gpio_request(TOSA_GPIO_nSD_DETECT, "MMC/SD card detect"); - if (err) { - printk(KERN_ERR "tosa_mci_init: can't request nSD_DETECT gpio\n"); - goto err_gpio_detect; - } - err = gpio_direction_input(TOSA_GPIO_nSD_DETECT); - if (err) - goto err_gpio_detect_dir; - - err = request_irq(TOSA_IRQ_GPIO_nSD_DETECT, tosa_detect_int, - IRQF_DISABLED | IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING, - "MMC/SD card detect", data); - if (err) { - printk(KERN_ERR "tosa_mci_init: MMC/SD: can't request MMC card detect IRQ\n"); - goto err_irq; - } - - err = gpio_request(TOSA_GPIO_SD_WP, "SD Write Protect"); - if (err) { - printk(KERN_ERR "tosa_mci_init: can't request SD_WP gpio\n"); - goto err_gpio_wp; - } - err = gpio_direction_input(TOSA_GPIO_SD_WP); - if (err) - goto err_gpio_wp_dir; - - err = gpio_request(TOSA_GPIO_PWR_ON, "SD Power"); - if (err) { - printk(KERN_ERR "tosa_mci_init: can't request SD_PWR gpio\n"); - goto err_gpio_pwr; - } - err = gpio_direction_output(TOSA_GPIO_PWR_ON, 0); - if (err) - goto err_gpio_pwr_dir; - err = gpio_request(TOSA_GPIO_nSD_INT, "SD Int"); if (err) { printk(KERN_ERR "tosa_mci_init: can't request SD_PWR gpio\n"); @@ -304,51 +265,21 @@ static int tosa_mci_init(struct device *dev, irq_handler_t tosa_detect_int, void err_gpio_int_dir: gpio_free(TOSA_GPIO_nSD_INT); err_gpio_int: -err_gpio_pwr_dir: - gpio_free(TOSA_GPIO_PWR_ON); -err_gpio_pwr: -err_gpio_wp_dir: - gpio_free(TOSA_GPIO_SD_WP); -err_gpio_wp: - free_irq(TOSA_IRQ_GPIO_nSD_DETECT, data); -err_irq: -err_gpio_detect_dir: - gpio_free(TOSA_GPIO_nSD_DETECT); -err_gpio_detect: return err; } -static void tosa_mci_setpower(struct device *dev, unsigned int vdd) -{ - struct pxamci_platform_data* p_d = dev->platform_data; - - if (( 1 << vdd) & p_d->ocr_mask) { - gpio_set_value(TOSA_GPIO_PWR_ON, 1); - } else { - gpio_set_value(TOSA_GPIO_PWR_ON, 0); - } -} - -static int tosa_mci_get_ro(struct device *dev) -{ - return gpio_get_value(TOSA_GPIO_SD_WP); -} - static void tosa_mci_exit(struct device *dev, void *data) { gpio_free(TOSA_GPIO_nSD_INT); - gpio_free(TOSA_GPIO_PWR_ON); - gpio_free(TOSA_GPIO_SD_WP); - free_irq(TOSA_IRQ_GPIO_nSD_DETECT, data); - gpio_free(TOSA_GPIO_nSD_DETECT); } static struct pxamci_platform_data tosa_mci_platform_data = { - .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34, - .init = tosa_mci_init, - .get_ro = tosa_mci_get_ro, - .setpower = tosa_mci_setpower, - .exit = tosa_mci_exit, + .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34, + .init = tosa_mci_init, + .exit = tosa_mci_exit, + .gpio_card_detect = TOSA_GPIO_nSD_DETECT, + .gpio_card_ro = TOSA_GPIO_SD_WP, + .gpio_power = TOSA_GPIO_PWR_ON, }; /* @@ -406,10 +337,11 @@ static void tosa_irda_shutdown(struct device *dev) } static struct pxaficp_platform_data tosa_ficp_platform_data = { - .transceiver_cap = IR_SIRMODE | IR_OFF, - .transceiver_mode = tosa_irda_transceiver_mode, - .startup = tosa_irda_startup, - .shutdown = tosa_irda_shutdown, + .gpio_pwdown = -1, + .transceiver_cap = IR_SIRMODE | IR_OFF, + .transceiver_mode = tosa_irda_transceiver_mode, + .startup = tosa_irda_startup, + .shutdown = tosa_irda_shutdown, }; /* @@ -910,6 +842,7 @@ static void __init tosa_init(void) dummy = gpiochip_reserve(TOSA_SCOOP_JC_GPIO_BASE, 12); dummy = gpiochip_reserve(TOSA_TC6393XB_GPIO_BASE, 16); + tosa_mci_platform_data.detect_delay = msecs_to_jiffies(250); pxa_set_mci_info(&tosa_mci_platform_data); pxa_set_udc_info(&udc_info); pxa_set_ficp_info(&tosa_ficp_platform_data); diff --git a/arch/arm/mach-pxa/treo680.c b/arch/arm/mach-pxa/treo680.c index 753ec4df17b..fe085076fbf 100644 --- a/arch/arm/mach-pxa/treo680.c +++ b/arch/arm/mach-pxa/treo680.c @@ -153,87 +153,11 @@ static unsigned long treo680_pin_config[] __initdata = { /****************************************************************************** * SD/MMC card controller ******************************************************************************/ -static int treo680_mci_init(struct device *dev, - irq_handler_t treo680_detect_int, void *data) -{ - int err = 0; - - /* Setup an interrupt for detecting card insert/remove events */ - err = gpio_request(GPIO_NR_TREO680_SD_DETECT_N, "SD IRQ"); - - if (err) - goto err; - - err = gpio_direction_input(GPIO_NR_TREO680_SD_DETECT_N); - if (err) - goto err2; - - err = request_irq(gpio_to_irq(GPIO_NR_TREO680_SD_DETECT_N), - treo680_detect_int, IRQF_DISABLED | IRQF_SAMPLE_RANDOM | - IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING, - "SD/MMC card detect", data); - - if (err) { - dev_err(dev, "%s: cannot request SD/MMC card detect IRQ\n", - __func__); - goto err2; - } - - err = gpio_request(GPIO_NR_TREO680_SD_POWER, "SD_POWER"); - if (err) - goto err3; - - err = gpio_direction_output(GPIO_NR_TREO680_SD_POWER, 1); - if (err) - goto err4; - - err = gpio_request(GPIO_NR_TREO680_SD_READONLY, "SD_READONLY"); - if (err) - goto err4; - - err = gpio_direction_input(GPIO_NR_TREO680_SD_READONLY); - if (err) - goto err5; - - return 0; - -err5: - gpio_free(GPIO_NR_TREO680_SD_READONLY); -err4: - gpio_free(GPIO_NR_TREO680_SD_POWER); -err3: - free_irq(gpio_to_irq(GPIO_NR_TREO680_SD_DETECT_N), data); -err2: - gpio_free(GPIO_NR_TREO680_SD_DETECT_N); -err: - return err; -} - -static void treo680_mci_exit(struct device *dev, void *data) -{ - gpio_free(GPIO_NR_TREO680_SD_READONLY); - gpio_free(GPIO_NR_TREO680_SD_POWER); - free_irq(gpio_to_irq(GPIO_NR_TREO680_SD_DETECT_N), data); - gpio_free(GPIO_NR_TREO680_SD_DETECT_N); -} - -static void treo680_mci_power(struct device *dev, unsigned int vdd) -{ - struct pxamci_platform_data *p_d = dev->platform_data; - gpio_set_value(GPIO_NR_TREO680_SD_POWER, p_d->ocr_mask & (1 << vdd)); -} - -static int treo680_mci_get_ro(struct device *dev) -{ - return gpio_get_value(GPIO_NR_TREO680_SD_READONLY); -} - static struct pxamci_platform_data treo680_mci_platform_data = { - .ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34, - .setpower = treo680_mci_power, - .get_ro = treo680_mci_get_ro, - .init = treo680_mci_init, - .exit = treo680_mci_exit, + .ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34, + .gpio_card_detect = GPIO_NR_TREO680_SD_DETECT_N, + .gpio_card_ro = GPIO_NR_TREO680_SD_READONLY, + .gpio_power = GPIO_NR_TREO680_SD_POWER, }; /****************************************************************************** @@ -330,16 +254,9 @@ static int treo680_backlight_init(struct device *dev) ret = gpio_direction_output(GPIO_NR_TREO680_BL_POWER, 0); if (ret) goto err2; - ret = gpio_request(GPIO_NR_TREO680_LCD_POWER, "LCD POWER"); - if (ret) - goto err2; - ret = gpio_direction_output(GPIO_NR_TREO680_LCD_POWER, 0); - if (ret) - goto err3; return 0; -err3: - gpio_free(GPIO_NR_TREO680_LCD_POWER); + err2: gpio_free(GPIO_NR_TREO680_BL_POWER); err: @@ -355,7 +272,6 @@ static int treo680_backlight_notify(int brightness) static void treo680_backlight_exit(struct device *dev) { gpio_free(GPIO_NR_TREO680_BL_POWER); - gpio_free(GPIO_NR_TREO680_LCD_POWER); } static struct platform_pwm_backlight_data treo680_backlight_data = { @@ -379,44 +295,9 @@ static struct platform_device treo680_backlight = { /****************************************************************************** * IrDA ******************************************************************************/ -static void treo680_transceiver_mode(struct device *dev, int mode) -{ - gpio_set_value(GPIO_NR_TREO680_IR_EN, mode & IR_OFF); - pxa2xx_transceiver_mode(dev, mode); -} - -static int treo680_irda_startup(struct device *dev) -{ - int err; - - err = gpio_request(GPIO_NR_TREO680_IR_EN, "Ir port disable"); - if (err) - goto err1; - - err = gpio_direction_output(GPIO_NR_TREO680_IR_EN, 1); - if (err) - goto err2; - - return 0; - -err2: - dev_err(dev, "treo680_irda: cannot change IR gpio direction\n"); - gpio_free(GPIO_NR_TREO680_IR_EN); -err1: - dev_err(dev, "treo680_irda: cannot allocate IR gpio\n"); - return err; -} - -static void treo680_irda_shutdown(struct device *dev) -{ - gpio_free(GPIO_NR_TREO680_IR_EN); -} - static struct pxaficp_platform_data treo680_ficp_info = { - .transceiver_cap = IR_FIRMODE | IR_SIRMODE | IR_OFF, - .startup = treo680_irda_startup, - .shutdown = treo680_irda_shutdown, - .transceiver_mode = treo680_transceiver_mode, + .gpio_pwdown = GPIO_NR_TREO680_IR_EN, + .transceiver_cap = IR_SIRMODE | IR_OFF, }; /****************************************************************************** @@ -546,6 +427,11 @@ static struct pxafb_mode_info treo680_lcd_modes[] = { }, }; +static void treo680_lcd_power(int on, struct fb_var_screeninfo *info) +{ + gpio_set_value(GPIO_NR_TREO680_BL_POWER, on); +} + static struct pxafb_mach_info treo680_lcd_screen = { .modes = treo680_lcd_modes, .num_modes = ARRAY_SIZE(treo680_lcd_modes), @@ -585,11 +471,32 @@ static void __init treo680_udc_init(void) } } +static void __init treo680_lcd_power_init(void) +{ + int ret; + + ret = gpio_request(GPIO_NR_TREO680_LCD_POWER, "LCD POWER"); + if (ret) { + pr_err("Treo680: LCD power GPIO request failed!\n"); + return; + } + + ret = gpio_direction_output(GPIO_NR_TREO680_LCD_POWER, 0); + if (ret) { + pr_err("Treo680: setting LCD power GPIO direction failed!\n"); + gpio_free(GPIO_NR_TREO680_LCD_POWER); + return; + } + + treo680_lcd_screen.pxafb_lcd_power = treo680_lcd_power; +} + static void __init treo680_init(void) { treo680_pm_init(); pxa2xx_mfp_config(ARRAY_AND_SIZE(treo680_pin_config)); pxa_set_keypad_info(&treo680_keypad_platform_data); + treo680_lcd_power_init(); set_pxa_fb_info(&treo680_lcd_screen); pxa_set_mci_info(&treo680_mci_platform_data); treo680_udc_init(); diff --git a/arch/arm/mach-pxa/trizeps4.c b/arch/arm/mach-pxa/trizeps4.c index 825f540176d..3981e0356d1 100644 --- a/arch/arm/mach-pxa/trizeps4.c +++ b/arch/arm/mach-pxa/trizeps4.c @@ -367,6 +367,9 @@ static struct pxamci_platform_data trizeps4_mci_platform_data = { .exit = trizeps4_mci_exit, .get_ro = NULL, /* write-protection not supported */ .setpower = NULL, /* power-switching not supported */ + .gpio_card_detect = -1, + .gpio_card_ro = -1, + .gpio_power = -1, }; /**************************************************************************** @@ -412,6 +415,7 @@ static void trizeps4_irda_transceiver_mode(struct device *dev, int mode) } static struct pxaficp_platform_data trizeps4_ficp_platform_data = { + .gpio_pwdown = -1, .transceiver_cap = IR_SIRMODE | IR_FIRMODE | IR_OFF, .transceiver_mode = trizeps4_irda_transceiver_mode, .startup = trizeps4_irda_startup, diff --git a/arch/arm/mach-pxa/xcep.c b/arch/arm/mach-pxa/xcep.c new file mode 100644 index 00000000000..3fd79cbb36c --- /dev/null +++ b/arch/arm/mach-pxa/xcep.c @@ -0,0 +1,187 @@ +/* linux/arch/arm/mach-pxa/xcep.c + * + * Support for the Iskratel Electronics XCEP platform as used in + * the Libera instruments from Instrumentation Technologies. + * + * Author: Ales Bardorfer <ales@i-tech.si> + * Contributions by: Abbott, MG (Michael) <michael.abbott@diamond.ac.uk> + * Contributions by: Matej Kenda <matej.kenda@i-tech.si> + * Created: June 2006 + * Copyright: (C) 2006-2009 Instrumentation Technologies + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/platform_device.h> +#include <linux/i2c.h> +#include <linux/smc91x.h> +#include <linux/mtd/mtd.h> +#include <linux/mtd/partitions.h> +#include <linux/mtd/physmap.h> + +#include <asm/mach-types.h> +#include <asm/mach/arch.h> +#include <asm/mach/irq.h> +#include <asm/mach/map.h> + +#include <plat/i2c.h> + +#include <mach/hardware.h> +#include <mach/pxa2xx-regs.h> +#include <mach/mfp-pxa25x.h> + +#include "generic.h" + +#define XCEP_ETH_PHYS (PXA_CS3_PHYS + 0x00000300) +#define XCEP_ETH_PHYS_END (PXA_CS3_PHYS + 0x000fffff) +#define XCEP_ETH_ATTR (PXA_CS3_PHYS + 0x02000000) +#define XCEP_ETH_ATTR_END (PXA_CS3_PHYS + 0x020fffff) +#define XCEP_ETH_IRQ IRQ_GPIO0 + +/* XCEP CPLD base */ +#define XCEP_CPLD_BASE 0xf0000000 + + +/* Flash partitions. */ + +static struct mtd_partition xcep_partitions[] = { + { + .name = "Bootloader", + .size = 0x00040000, + .offset = 0, + .mask_flags = MTD_WRITEABLE + }, { + .name = "Bootloader ENV", + .size = 0x00040000, + .offset = 0x00040000, + .mask_flags = MTD_WRITEABLE + }, { + .name = "Kernel", + .size = 0x00100000, + .offset = 0x00080000, + }, { + .name = "Rescue fs", + .size = 0x00280000, + .offset = 0x00180000, + }, { + .name = "Filesystem", + .size = MTDPART_SIZ_FULL, + .offset = 0x00400000 + } +}; + +static struct physmap_flash_data xcep_flash_data[] = { + { + .width = 4, /* bankwidth in bytes */ + .parts = xcep_partitions, + .nr_parts = ARRAY_SIZE(xcep_partitions) + } +}; + +static struct resource flash_resource = { + .start = PXA_CS0_PHYS, + .end = PXA_CS0_PHYS + SZ_32M - 1, + .flags = IORESOURCE_MEM, +}; + +static struct platform_device flash_device = { + .name = "physmap-flash", + .id = 0, + .dev = { + .platform_data = xcep_flash_data, + }, + .resource = &flash_resource, + .num_resources = 1, +}; + + + +/* SMC LAN91C111 network controller. */ + +static struct resource smc91x_resources[] = { + [0] = { + .name = "smc91x-regs", + .start = XCEP_ETH_PHYS, + .end = XCEP_ETH_PHYS_END, + .flags = IORESOURCE_MEM, + }, + [1] = { + .start = XCEP_ETH_IRQ, + .end = XCEP_ETH_IRQ, + .flags = IORESOURCE_IRQ, + }, + [2] = { + .name = "smc91x-attrib", + .start = XCEP_ETH_ATTR, + .end = XCEP_ETH_ATTR_END, + .flags = IORESOURCE_MEM, + }, +}; + +static struct smc91x_platdata xcep_smc91x_info = { + .flags = SMC91X_USE_32BIT | SMC91X_NOWAIT | SMC91X_USE_DMA, +}; + +static struct platform_device smc91x_device = { + .name = "smc91x", + .id = -1, + .num_resources = ARRAY_SIZE(smc91x_resources), + .resource = smc91x_resources, + .dev = { + .platform_data = &xcep_smc91x_info, + }, +}; + + +static struct platform_device *devices[] __initdata = { + &flash_device, + &smc91x_device, +}; + + +/* We have to state that there are HWMON devices on the I2C bus on XCEP. + * Drivers for HWMON verify capabilities of the adapter when loading and + * refuse to attach if the adapter doesn't support HWMON class of devices. + * See also Documentation/i2c/porting-clients. */ +static struct i2c_pxa_platform_data xcep_i2c_platform_data = { + .class = I2C_CLASS_HWMON +}; + + +static mfp_cfg_t xcep_pin_config[] __initdata = { + GPIO79_nCS_3, /* SMC 91C111 chip select. */ + GPIO80_nCS_4, /* CPLD chip select. */ + /* SSP communication to MSP430 */ + GPIO23_SSP1_SCLK, + GPIO24_SSP1_SFRM, + GPIO25_SSP1_TXD, + GPIO26_SSP1_RXD, + GPIO27_SSP1_EXTCLK +}; + +static void __init xcep_init(void) +{ + pxa2xx_mfp_config(ARRAY_AND_SIZE(xcep_pin_config)); + + /* See Intel XScale Developer's Guide for details */ + /* Set RDF and RDN to appropriate values (chip select 3 (smc91x)) */ + MSC1 = (MSC1 & 0xffff) | 0xD5540000; + /* Set RDF and RDN to appropriate values (chip select 5 (fpga)) */ + MSC2 = (MSC2 & 0xffff) | 0x72A00000; + + platform_add_devices(ARRAY_AND_SIZE(devices)); + pxa_set_i2c_info(&xcep_i2c_platform_data); +} + +MACHINE_START(XCEP, "Iskratel XCEP") + .phys_io = 0x40000000, + .io_pg_offst = (io_p2v(0x40000000) >> 18) & 0xfffc, + .boot_params = 0xa0000100, + .init_machine = xcep_init, + .map_io = pxa_map_io, + .init_irq = pxa25x_init_irq, + .timer = &pxa_timer, +MACHINE_END + diff --git a/arch/arm/mach-pxa/zylonite.c b/arch/arm/mach-pxa/zylonite.c index 218d2001f1d..09784d3954e 100644 --- a/arch/arm/mach-pxa/zylonite.c +++ b/arch/arm/mach-pxa/zylonite.c @@ -290,6 +290,9 @@ static struct pxamci_platform_data zylonite_mci_platform_data = { .init = zylonite_mci_init, .exit = zylonite_mci_exit, .get_ro = zylonite_mci_ro, + .gpio_card_detect = -1, + .gpio_card_ro = -1, + .gpio_power = -1, }; static struct pxamci_platform_data zylonite_mci2_platform_data = { diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c index dc3519c50ab..a2083b60e3f 100644 --- a/arch/arm/mach-realview/core.c +++ b/arch/arm/mach-realview/core.c @@ -30,6 +30,7 @@ #include <linux/io.h> #include <linux/smsc911x.h> #include <linux/ata_platform.h> +#include <linux/amba/mmci.h> #include <asm/clkdev.h> #include <asm/system.h> @@ -44,7 +45,6 @@ #include <asm/mach/flash.h> #include <asm/mach/irq.h> #include <asm/mach/map.h> -#include <asm/mach/mmc.h> #include <asm/hardware/gic.h> @@ -237,14 +237,14 @@ static unsigned int realview_mmc_status(struct device *dev) return readl(REALVIEW_SYSMCI) & mask; } -struct mmc_platform_data realview_mmc0_plat_data = { +struct mmci_platform_data realview_mmc0_plat_data = { .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34, .status = realview_mmc_status, .gpio_wp = 17, .gpio_cd = 16, }; -struct mmc_platform_data realview_mmc1_plat_data = { +struct mmci_platform_data realview_mmc1_plat_data = { .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34, .status = realview_mmc_status, .gpio_wp = 19, @@ -296,31 +296,31 @@ static struct clk ref24_clk = { static struct clk_lookup lookups[] = { { /* UART0 */ - .dev_id = "dev:f1", + .dev_id = "dev:uart0", .clk = &ref24_clk, }, { /* UART1 */ - .dev_id = "dev:f2", + .dev_id = "dev:uart1", .clk = &ref24_clk, }, { /* UART2 */ - .dev_id = "dev:f3", + .dev_id = "dev:uart2", .clk = &ref24_clk, }, { /* UART3 */ - .dev_id = "fpga:09", + .dev_id = "fpga:uart3", .clk = &ref24_clk, }, { /* KMI0 */ - .dev_id = "fpga:06", + .dev_id = "fpga:kmi0", .clk = &ref24_clk, }, { /* KMI1 */ - .dev_id = "fpga:07", + .dev_id = "fpga:kmi1", .clk = &ref24_clk, }, { /* MMC0 */ - .dev_id = "fpga:05", + .dev_id = "fpga:mmc0", .clk = &ref24_clk, }, { /* EB:CLCD */ - .dev_id = "dev:20", + .dev_id = "dev:clcd", .clk = &oscvco_clk, }, { /* PB:CLCD */ - .dev_id = "issp:20", + .dev_id = "issp:clcd", .clk = &oscvco_clk, } }; diff --git a/arch/arm/mach-realview/core.h b/arch/arm/mach-realview/core.h index 59a337ba4be..46cd6acb4d4 100644 --- a/arch/arm/mach-realview/core.h +++ b/arch/arm/mach-realview/core.h @@ -47,8 +47,8 @@ static struct amba_device name##_device = { \ extern struct platform_device realview_flash_device; extern struct platform_device realview_cf_device; extern struct platform_device realview_i2c_device; -extern struct mmc_platform_data realview_mmc0_plat_data; -extern struct mmc_platform_data realview_mmc1_plat_data; +extern struct mmci_platform_data realview_mmc0_plat_data; +extern struct mmci_platform_data realview_mmc1_plat_data; extern struct clcd_board clcd_plat_data; extern void __iomem *gic_cpu_base_addr; extern void __iomem *timer0_va_base; diff --git a/arch/arm/mach-realview/realview_eb.c b/arch/arm/mach-realview/realview_eb.c index abd13b44867..1d65e64ae57 100644 --- a/arch/arm/mach-realview/realview_eb.c +++ b/arch/arm/mach-realview/realview_eb.c @@ -24,6 +24,7 @@ #include <linux/sysdev.h> #include <linux/amba/bus.h> #include <linux/amba/pl061.h> +#include <linux/amba/mmci.h> #include <linux/io.h> #include <mach/hardware.h> @@ -37,7 +38,6 @@ #include <asm/mach/arch.h> #include <asm/mach/map.h> -#include <asm/mach/mmc.h> #include <asm/mach/time.h> #include <mach/board-eb.h> @@ -193,27 +193,27 @@ static struct pl061_platform_data gpio2_plat_data = { #define EB_SSP_DMA { 9, 8 } /* FPGA Primecells */ -AMBA_DEVICE(aaci, "fpga:04", AACI, NULL); -AMBA_DEVICE(mmc0, "fpga:05", MMCI0, &realview_mmc0_plat_data); -AMBA_DEVICE(kmi0, "fpga:06", KMI0, NULL); -AMBA_DEVICE(kmi1, "fpga:07", KMI1, NULL); -AMBA_DEVICE(uart3, "fpga:09", EB_UART3, NULL); +AMBA_DEVICE(aaci, "fpga:aaci", AACI, NULL); +AMBA_DEVICE(mmc0, "fpga:mmc0", MMCI0, &realview_mmc0_plat_data); +AMBA_DEVICE(kmi0, "fpga:kmi0", KMI0, NULL); +AMBA_DEVICE(kmi1, "fpga:kmi1", KMI1, NULL); +AMBA_DEVICE(uart3, "fpga:uart3", EB_UART3, NULL); /* DevChip Primecells */ -AMBA_DEVICE(smc, "dev:00", EB_SMC, NULL); -AMBA_DEVICE(clcd, "dev:20", EB_CLCD, &clcd_plat_data); -AMBA_DEVICE(dmac, "dev:30", DMAC, NULL); -AMBA_DEVICE(sctl, "dev:e0", SCTL, NULL); -AMBA_DEVICE(wdog, "dev:e1", EB_WATCHDOG, NULL); -AMBA_DEVICE(gpio0, "dev:e4", EB_GPIO0, &gpio0_plat_data); -AMBA_DEVICE(gpio1, "dev:e5", GPIO1, &gpio1_plat_data); -AMBA_DEVICE(gpio2, "dev:e6", GPIO2, &gpio2_plat_data); -AMBA_DEVICE(rtc, "dev:e8", EB_RTC, NULL); -AMBA_DEVICE(sci0, "dev:f0", SCI, NULL); -AMBA_DEVICE(uart0, "dev:f1", EB_UART0, NULL); -AMBA_DEVICE(uart1, "dev:f2", EB_UART1, NULL); -AMBA_DEVICE(uart2, "dev:f3", EB_UART2, NULL); -AMBA_DEVICE(ssp0, "dev:f4", EB_SSP, NULL); +AMBA_DEVICE(smc, "dev:smc", EB_SMC, NULL); +AMBA_DEVICE(clcd, "dev:clcd", EB_CLCD, &clcd_plat_data); +AMBA_DEVICE(dmac, "dev:dmac", DMAC, NULL); +AMBA_DEVICE(sctl, "dev:sctl", SCTL, NULL); +AMBA_DEVICE(wdog, "dev:wdog", EB_WATCHDOG, NULL); +AMBA_DEVICE(gpio0, "dev:gpio0", EB_GPIO0, &gpio0_plat_data); +AMBA_DEVICE(gpio1, "dev:gpio1", GPIO1, &gpio1_plat_data); +AMBA_DEVICE(gpio2, "dev:gpio2", GPIO2, &gpio2_plat_data); +AMBA_DEVICE(rtc, "dev:rtc", EB_RTC, NULL); +AMBA_DEVICE(sci0, "dev:sci0", SCI, NULL); +AMBA_DEVICE(uart0, "dev:uart0", EB_UART0, NULL); +AMBA_DEVICE(uart1, "dev:uart1", EB_UART1, NULL); +AMBA_DEVICE(uart2, "dev:uart2", EB_UART2, NULL); +AMBA_DEVICE(ssp0, "dev:ssp0", EB_SSP, NULL); static struct amba_device *amba_devs[] __initdata = { &dmac_device, diff --git a/arch/arm/mach-realview/realview_pb1176.c b/arch/arm/mach-realview/realview_pb1176.c index 17fbb0e889b..2817fe09931 100644 --- a/arch/arm/mach-realview/realview_pb1176.c +++ b/arch/arm/mach-realview/realview_pb1176.c @@ -24,6 +24,7 @@ #include <linux/sysdev.h> #include <linux/amba/bus.h> #include <linux/amba/pl061.h> +#include <linux/amba/mmci.h> #include <linux/io.h> #include <mach/hardware.h> @@ -37,7 +38,6 @@ #include <asm/mach/arch.h> #include <asm/mach/flash.h> #include <asm/mach/map.h> -#include <asm/mach/mmc.h> #include <asm/mach/time.h> #include <mach/board-pb1176.h> @@ -170,29 +170,29 @@ static struct pl061_platform_data gpio2_plat_data = { #define PB1176_SSP_DMA { 9, 8 } /* FPGA Primecells */ -AMBA_DEVICE(aaci, "fpga:04", AACI, NULL); -AMBA_DEVICE(mmc0, "fpga:05", MMCI0, &realview_mmc0_plat_data); -AMBA_DEVICE(kmi0, "fpga:06", KMI0, NULL); -AMBA_DEVICE(kmi1, "fpga:07", KMI1, NULL); -AMBA_DEVICE(uart3, "fpga:09", PB1176_UART3, NULL); +AMBA_DEVICE(aaci, "fpga:aaci", AACI, NULL); +AMBA_DEVICE(mmc0, "fpga:mmc0", MMCI0, &realview_mmc0_plat_data); +AMBA_DEVICE(kmi0, "fpga:kmi0", KMI0, NULL); +AMBA_DEVICE(kmi1, "fpga:kmi1", KMI1, NULL); +AMBA_DEVICE(uart3, "fpga:uart3", PB1176_UART3, NULL); /* DevChip Primecells */ -AMBA_DEVICE(smc, "dev:00", PB1176_SMC, NULL); -AMBA_DEVICE(sctl, "dev:e0", SCTL, NULL); -AMBA_DEVICE(wdog, "dev:e1", PB1176_WATCHDOG, NULL); -AMBA_DEVICE(gpio0, "dev:e4", PB1176_GPIO0, &gpio0_plat_data); -AMBA_DEVICE(gpio1, "dev:e5", GPIO1, &gpio1_plat_data); -AMBA_DEVICE(gpio2, "dev:e6", GPIO2, &gpio2_plat_data); -AMBA_DEVICE(rtc, "dev:e8", PB1176_RTC, NULL); -AMBA_DEVICE(sci0, "dev:f0", SCI, NULL); -AMBA_DEVICE(uart0, "dev:f1", PB1176_UART0, NULL); -AMBA_DEVICE(uart1, "dev:f2", PB1176_UART1, NULL); -AMBA_DEVICE(uart2, "dev:f3", PB1176_UART2, NULL); -AMBA_DEVICE(ssp0, "dev:f4", PB1176_SSP, NULL); +AMBA_DEVICE(smc, "dev:smc", PB1176_SMC, NULL); +AMBA_DEVICE(sctl, "dev:sctl", SCTL, NULL); +AMBA_DEVICE(wdog, "dev:wdog", PB1176_WATCHDOG, NULL); +AMBA_DEVICE(gpio0, "dev:gpio0", PB1176_GPIO0, &gpio0_plat_data); +AMBA_DEVICE(gpio1, "dev:gpio1", GPIO1, &gpio1_plat_data); +AMBA_DEVICE(gpio2, "dev:gpio2", GPIO2, &gpio2_plat_data); +AMBA_DEVICE(rtc, "dev:rtc", PB1176_RTC, NULL); +AMBA_DEVICE(sci0, "dev:sci0", SCI, NULL); +AMBA_DEVICE(uart0, "dev:uart0", PB1176_UART0, NULL); +AMBA_DEVICE(uart1, "dev:uart1", PB1176_UART1, NULL); +AMBA_DEVICE(uart2, "dev:uart2", PB1176_UART2, NULL); +AMBA_DEVICE(ssp0, "dev:ssp0", PB1176_SSP, NULL); /* Primecells on the NEC ISSP chip */ -AMBA_DEVICE(clcd, "issp:20", PB1176_CLCD, &clcd_plat_data); -//AMBA_DEVICE(dmac, "issp:30", PB1176_DMAC, NULL); +AMBA_DEVICE(clcd, "issp:clcd", PB1176_CLCD, &clcd_plat_data); +//AMBA_DEVICE(dmac, "issp:dmac", PB1176_DMAC, NULL); static struct amba_device *amba_devs[] __initdata = { // &dmac_device, diff --git a/arch/arm/mach-realview/realview_pb11mp.c b/arch/arm/mach-realview/realview_pb11mp.c index fdd042b85f4..94680fcf726 100644 --- a/arch/arm/mach-realview/realview_pb11mp.c +++ b/arch/arm/mach-realview/realview_pb11mp.c @@ -24,6 +24,7 @@ #include <linux/sysdev.h> #include <linux/amba/bus.h> #include <linux/amba/pl061.h> +#include <linux/amba/mmci.h> #include <linux/io.h> #include <mach/hardware.h> @@ -38,7 +39,6 @@ #include <asm/mach/arch.h> #include <asm/mach/flash.h> #include <asm/mach/map.h> -#include <asm/mach/mmc.h> #include <asm/mach/time.h> #include <mach/board-pb11mp.h> @@ -172,29 +172,29 @@ static struct pl061_platform_data gpio2_plat_data = { #define PB11MP_SSP_DMA { 9, 8 } /* FPGA Primecells */ -AMBA_DEVICE(aaci, "fpga:04", AACI, NULL); -AMBA_DEVICE(mmc0, "fpga:05", MMCI0, &realview_mmc0_plat_data); -AMBA_DEVICE(kmi0, "fpga:06", KMI0, NULL); -AMBA_DEVICE(kmi1, "fpga:07", KMI1, NULL); -AMBA_DEVICE(uart3, "fpga:09", PB11MP_UART3, NULL); +AMBA_DEVICE(aaci, "fpga:aaci", AACI, NULL); +AMBA_DEVICE(mmc0, "fpga:mmc0", MMCI0, &realview_mmc0_plat_data); +AMBA_DEVICE(kmi0, "fpga:kmi0", KMI0, NULL); +AMBA_DEVICE(kmi1, "fpga:kmi1", KMI1, NULL); +AMBA_DEVICE(uart3, "fpga:uart3", PB11MP_UART3, NULL); /* DevChip Primecells */ -AMBA_DEVICE(smc, "dev:00", PB11MP_SMC, NULL); -AMBA_DEVICE(sctl, "dev:e0", SCTL, NULL); -AMBA_DEVICE(wdog, "dev:e1", PB11MP_WATCHDOG, NULL); -AMBA_DEVICE(gpio0, "dev:e4", PB11MP_GPIO0, &gpio0_plat_data); -AMBA_DEVICE(gpio1, "dev:e5", GPIO1, &gpio1_plat_data); -AMBA_DEVICE(gpio2, "dev:e6", GPIO2, &gpio2_plat_data); -AMBA_DEVICE(rtc, "dev:e8", PB11MP_RTC, NULL); -AMBA_DEVICE(sci0, "dev:f0", SCI, NULL); -AMBA_DEVICE(uart0, "dev:f1", PB11MP_UART0, NULL); -AMBA_DEVICE(uart1, "dev:f2", PB11MP_UART1, NULL); -AMBA_DEVICE(uart2, "dev:f3", PB11MP_UART2, NULL); -AMBA_DEVICE(ssp0, "dev:f4", PB11MP_SSP, NULL); +AMBA_DEVICE(smc, "dev:smc", PB11MP_SMC, NULL); +AMBA_DEVICE(sctl, "dev:sctl", SCTL, NULL); +AMBA_DEVICE(wdog, "dev:wdog", PB11MP_WATCHDOG, NULL); +AMBA_DEVICE(gpio0, "dev:gpio0", PB11MP_GPIO0, &gpio0_plat_data); +AMBA_DEVICE(gpio1, "dev:gpio1", GPIO1, &gpio1_plat_data); +AMBA_DEVICE(gpio2, "dev:gpio2", GPIO2, &gpio2_plat_data); +AMBA_DEVICE(rtc, "dev:rtc", PB11MP_RTC, NULL); +AMBA_DEVICE(sci0, "dev:sci0", SCI, NULL); +AMBA_DEVICE(uart0, "dev:uart0", PB11MP_UART0, NULL); +AMBA_DEVICE(uart1, "dev:uart1", PB11MP_UART1, NULL); +AMBA_DEVICE(uart2, "dev:uart2", PB11MP_UART2, NULL); +AMBA_DEVICE(ssp0, "dev:ssp0", PB11MP_SSP, NULL); /* Primecells on the NEC ISSP chip */ -AMBA_DEVICE(clcd, "issp:20", PB11MP_CLCD, &clcd_plat_data); -AMBA_DEVICE(dmac, "issp:30", DMAC, NULL); +AMBA_DEVICE(clcd, "issp:clcd", PB11MP_CLCD, &clcd_plat_data); +AMBA_DEVICE(dmac, "issp:dmac", DMAC, NULL); static struct amba_device *amba_devs[] __initdata = { &dmac_device, diff --git a/arch/arm/mach-realview/realview_pba8.c b/arch/arm/mach-realview/realview_pba8.c index 70bba9900d9..941beb2b970 100644 --- a/arch/arm/mach-realview/realview_pba8.c +++ b/arch/arm/mach-realview/realview_pba8.c @@ -24,6 +24,7 @@ #include <linux/sysdev.h> #include <linux/amba/bus.h> #include <linux/amba/pl061.h> +#include <linux/amba/mmci.h> #include <linux/io.h> #include <asm/irq.h> @@ -34,7 +35,6 @@ #include <asm/mach/arch.h> #include <asm/mach/map.h> -#include <asm/mach/mmc.h> #include <asm/mach/time.h> #include <mach/hardware.h> @@ -162,29 +162,29 @@ static struct pl061_platform_data gpio2_plat_data = { #define PBA8_SSP_DMA { 9, 8 } /* FPGA Primecells */ -AMBA_DEVICE(aaci, "fpga:04", AACI, NULL); -AMBA_DEVICE(mmc0, "fpga:05", MMCI0, &realview_mmc0_plat_data); -AMBA_DEVICE(kmi0, "fpga:06", KMI0, NULL); -AMBA_DEVICE(kmi1, "fpga:07", KMI1, NULL); -AMBA_DEVICE(uart3, "fpga:09", PBA8_UART3, NULL); +AMBA_DEVICE(aaci, "fpga:aaci", AACI, NULL); +AMBA_DEVICE(mmc0, "fpga:mmc0", MMCI0, &realview_mmc0_plat_data); +AMBA_DEVICE(kmi0, "fpga:kmi0", KMI0, NULL); +AMBA_DEVICE(kmi1, "fpga:kmi1", KMI1, NULL); +AMBA_DEVICE(uart3, "fpga:uart3", PBA8_UART3, NULL); /* DevChip Primecells */ -AMBA_DEVICE(smc, "dev:00", PBA8_SMC, NULL); -AMBA_DEVICE(sctl, "dev:e0", SCTL, NULL); -AMBA_DEVICE(wdog, "dev:e1", PBA8_WATCHDOG, NULL); -AMBA_DEVICE(gpio0, "dev:e4", PBA8_GPIO0, &gpio0_plat_data); -AMBA_DEVICE(gpio1, "dev:e5", GPIO1, &gpio1_plat_data); -AMBA_DEVICE(gpio2, "dev:e6", GPIO2, &gpio2_plat_data); -AMBA_DEVICE(rtc, "dev:e8", PBA8_RTC, NULL); -AMBA_DEVICE(sci0, "dev:f0", SCI, NULL); -AMBA_DEVICE(uart0, "dev:f1", PBA8_UART0, NULL); -AMBA_DEVICE(uart1, "dev:f2", PBA8_UART1, NULL); -AMBA_DEVICE(uart2, "dev:f3", PBA8_UART2, NULL); -AMBA_DEVICE(ssp0, "dev:f4", PBA8_SSP, NULL); +AMBA_DEVICE(smc, "dev:smc", PBA8_SMC, NULL); +AMBA_DEVICE(sctl, "dev:sctl", SCTL, NULL); +AMBA_DEVICE(wdog, "dev:wdog", PBA8_WATCHDOG, NULL); +AMBA_DEVICE(gpio0, "dev:gpio0", PBA8_GPIO0, &gpio0_plat_data); +AMBA_DEVICE(gpio1, "dev:gpio1", GPIO1, &gpio1_plat_data); +AMBA_DEVICE(gpio2, "dev:gpio2", GPIO2, &gpio2_plat_data); +AMBA_DEVICE(rtc, "dev:rtc", PBA8_RTC, NULL); +AMBA_DEVICE(sci0, "dev:sci0", SCI, NULL); +AMBA_DEVICE(uart0, "dev:uart0", PBA8_UART0, NULL); +AMBA_DEVICE(uart1, "dev:uart1", PBA8_UART1, NULL); +AMBA_DEVICE(uart2, "dev:uart2", PBA8_UART2, NULL); +AMBA_DEVICE(ssp0, "dev:ssp0", PBA8_SSP, NULL); /* Primecells on the NEC ISSP chip */ -AMBA_DEVICE(clcd, "issp:20", PBA8_CLCD, &clcd_plat_data); -AMBA_DEVICE(dmac, "issp:30", DMAC, NULL); +AMBA_DEVICE(clcd, "issp:clcd", PBA8_CLCD, &clcd_plat_data); +AMBA_DEVICE(dmac, "issp:dmac", DMAC, NULL); static struct amba_device *amba_devs[] __initdata = { &dmac_device, diff --git a/arch/arm/mach-realview/realview_pbx.c b/arch/arm/mach-realview/realview_pbx.c index ce6c5d25fbe..7e4bc6cdca5 100644 --- a/arch/arm/mach-realview/realview_pbx.c +++ b/arch/arm/mach-realview/realview_pbx.c @@ -23,6 +23,7 @@ #include <linux/sysdev.h> #include <linux/amba/bus.h> #include <linux/amba/pl061.h> +#include <linux/amba/mmci.h> #include <linux/io.h> #include <asm/irq.h> @@ -34,7 +35,6 @@ #include <asm/mach/arch.h> #include <asm/mach/map.h> -#include <asm/mach/mmc.h> #include <asm/mach/time.h> #include <mach/hardware.h> @@ -182,29 +182,29 @@ static struct pl061_platform_data gpio2_plat_data = { #define PBX_SSP_DMA { 9, 8 } /* FPGA Primecells */ -AMBA_DEVICE(aaci, "fpga:04", AACI, NULL); -AMBA_DEVICE(mmc0, "fpga:05", MMCI0, &realview_mmc0_plat_data); -AMBA_DEVICE(kmi0, "fpga:06", KMI0, NULL); -AMBA_DEVICE(kmi1, "fpga:07", KMI1, NULL); -AMBA_DEVICE(uart3, "fpga:09", PBX_UART3, NULL); +AMBA_DEVICE(aaci, "fpga:aaci", AACI, NULL); +AMBA_DEVICE(mmc0, "fpga:mmc0", MMCI0, &realview_mmc0_plat_data); +AMBA_DEVICE(kmi0, "fpga:kmi0", KMI0, NULL); +AMBA_DEVICE(kmi1, "fpga:kmi1", KMI1, NULL); +AMBA_DEVICE(uart3, "fpga:uart3", PBX_UART3, NULL); /* DevChip Primecells */ -AMBA_DEVICE(smc, "dev:00", PBX_SMC, NULL); -AMBA_DEVICE(sctl, "dev:e0", SCTL, NULL); -AMBA_DEVICE(wdog, "dev:e1", PBX_WATCHDOG, NULL); -AMBA_DEVICE(gpio0, "dev:e4", PBX_GPIO0, &gpio0_plat_data); -AMBA_DEVICE(gpio1, "dev:e5", GPIO1, &gpio1_plat_data); -AMBA_DEVICE(gpio2, "dev:e6", GPIO2, &gpio2_plat_data); -AMBA_DEVICE(rtc, "dev:e8", PBX_RTC, NULL); -AMBA_DEVICE(sci0, "dev:f0", SCI, NULL); -AMBA_DEVICE(uart0, "dev:f1", PBX_UART0, NULL); -AMBA_DEVICE(uart1, "dev:f2", PBX_UART1, NULL); -AMBA_DEVICE(uart2, "dev:f3", PBX_UART2, NULL); -AMBA_DEVICE(ssp0, "dev:f4", PBX_SSP, NULL); +AMBA_DEVICE(smc, "dev:smc", PBX_SMC, NULL); +AMBA_DEVICE(sctl, "dev:sctl", SCTL, NULL); +AMBA_DEVICE(wdog, "dev:wdog", PBX_WATCHDOG, NULL); +AMBA_DEVICE(gpio0, "dev:gpio0", PBX_GPIO0, &gpio0_plat_data); +AMBA_DEVICE(gpio1, "dev:gpio1", GPIO1, &gpio1_plat_data); +AMBA_DEVICE(gpio2, "dev:gpio2", GPIO2, &gpio2_plat_data); +AMBA_DEVICE(rtc, "dev:rtc", PBX_RTC, NULL); +AMBA_DEVICE(sci0, "dev:sci0", SCI, NULL); +AMBA_DEVICE(uart0, "dev:uart0", PBX_UART0, NULL); +AMBA_DEVICE(uart1, "dev:uart1", PBX_UART1, NULL); +AMBA_DEVICE(uart2, "dev:uart2", PBX_UART2, NULL); +AMBA_DEVICE(ssp0, "dev:ssp0", PBX_SSP, NULL); /* Primecells on the NEC ISSP chip */ -AMBA_DEVICE(clcd, "issp:20", PBX_CLCD, &clcd_plat_data); -AMBA_DEVICE(dmac, "issp:30", DMAC, NULL); +AMBA_DEVICE(clcd, "issp:clcd", PBX_CLCD, &clcd_plat_data); +AMBA_DEVICE(dmac, "issp:dmac", DMAC, NULL); static struct amba_device *amba_devs[] __initdata = { &dmac_device, diff --git a/arch/arm/mach-s3c2410/Kconfig b/arch/arm/mach-s3c2410/Kconfig index d8c023d4df3..3d4e9da3fa5 100644 --- a/arch/arm/mach-s3c2410/Kconfig +++ b/arch/arm/mach-s3c2410/Kconfig @@ -77,6 +77,7 @@ config ARCH_H1940 select CPU_S3C2410 select PM_H1940 if PM select S3C_DEV_USB_HOST + select S3C_DEV_NAND help Say Y here if you are using the HP IPAQ H1940 @@ -89,6 +90,7 @@ config MACH_N30 bool "Acer N30 family" select CPU_S3C2410 select S3C_DEV_USB_HOST + select S3C_DEV_NAND help Say Y here if you want suppt for the Acer N30, Acer N35, Navman PiN570, Yakumo AlphaX or Airis NC05 PDAs. @@ -103,6 +105,7 @@ config ARCH_BAST select S3C24XX_DCLK select ISA select S3C_DEV_USB_HOST + select S3C_DEV_NAND help Say Y here if you are using the Simtec Electronics EB2410ITX development board (also known as BAST) @@ -111,6 +114,7 @@ config MACH_OTOM bool "NexVision OTOM Board" select CPU_S3C2410 select S3C_DEV_USB_HOST + select S3C_DEV_NAND help Say Y here if you are using the Nex Vision OTOM board @@ -154,6 +158,7 @@ config MACH_QT2410 bool "QT2410" select CPU_S3C2410 select S3C_DEV_USB_HOST + select S3C_DEV_NAND help Say Y here if you are using the Armzone QT2410 diff --git a/arch/arm/mach-s3c2412/Kconfig b/arch/arm/mach-s3c2412/Kconfig index 35c1bde89cf..c2bdc4635d1 100644 --- a/arch/arm/mach-s3c2412/Kconfig +++ b/arch/arm/mach-s3c2412/Kconfig @@ -48,6 +48,7 @@ config MACH_JIVE bool "Logitech Jive" select CPU_S3C2412 select S3C_DEV_USB_HOST + select S3C_DEV_NAND help Say Y here if you are using the Logitech Jive. @@ -61,6 +62,7 @@ config MACH_SMDK2413 select MACH_S3C2413 select MACH_SMDK select S3C_DEV_USB_HOST + select S3C_DEV_NAND help Say Y here if you are using an SMDK2413 @@ -84,6 +86,7 @@ config MACH_VSTMS bool "VMSTMS" select CPU_S3C2412 select S3C_DEV_USB_HOST + select S3C_DEV_NAND help Say Y here if you are using an VSTMS board diff --git a/arch/arm/mach-s3c2440/Kconfig b/arch/arm/mach-s3c2440/Kconfig index 8ae1b288f7f..d7bba919a77 100644 --- a/arch/arm/mach-s3c2440/Kconfig +++ b/arch/arm/mach-s3c2440/Kconfig @@ -48,6 +48,7 @@ config MACH_OSIRIS select S3C2440_XTAL_12000000 select S3C2410_IOTIMING if S3C2440_CPUFREQ select S3C_DEV_USB_HOST + select S3C_DEV_NAND help Say Y here if you are using the Simtec IM2440D20 module, also known as the Osiris. @@ -57,6 +58,7 @@ config MACH_RX3715 select CPU_S3C2440 select S3C2440_XTAL_16934400 select PM_H1940 if PM + select S3C_DEV_NAND help Say Y here if you are using the HP iPAQ rx3715. @@ -66,6 +68,7 @@ config ARCH_S3C2440 select S3C2440_XTAL_16934400 select MACH_SMDK select S3C_DEV_USB_HOST + select S3C_DEV_NAND help Say Y here if you are using the SMDK2440. @@ -74,6 +77,7 @@ config MACH_NEXCODER_2440 select CPU_S3C2440 select S3C2440_XTAL_12000000 select S3C_DEV_USB_HOST + select S3C_DEV_NAND help Say Y here if you are using the Nex Vision NEXCODER 2440 Light Board @@ -88,6 +92,7 @@ config MACH_AT2440EVB bool "Avantech AT2440EVB development board" select CPU_S3C2440 select S3C_DEV_USB_HOST + select S3C_DEV_NAND help Say Y here if you are using the AT2440EVB development board @@ -97,6 +102,7 @@ config MACH_MINI2440 select EEPROM_AT24 select LEDS_TRIGGER_BACKLIGHT select SND_S3C24XX_SOC_S3C24XX_UDA134X + select S3C_DEV_NAND help Say Y here to select support for the MINI2440. Is a 10cm x 10cm board available via various sources. It can come with a 3.5" or 7" touch LCD. diff --git a/arch/arm/mach-s3c6400/Kconfig b/arch/arm/mach-s3c6400/Kconfig index f5af212066c..770b72067e3 100644 --- a/arch/arm/mach-s3c6400/Kconfig +++ b/arch/arm/mach-s3c6400/Kconfig @@ -26,6 +26,7 @@ config MACH_SMDK6400 bool "SMDK6400" select CPU_S3C6400 select S3C_DEV_HSMMC + select S3C_DEV_NAND select S3C6400_SETUP_SDHCI help Machine support for the Samsung SMDK6400 diff --git a/arch/arm/mach-s3c6410/Kconfig b/arch/arm/mach-s3c6410/Kconfig index f9d0f09f976..53fc3ff657f 100644 --- a/arch/arm/mach-s3c6410/Kconfig +++ b/arch/arm/mach-s3c6410/Kconfig @@ -102,6 +102,7 @@ config MACH_HMT bool "Airgoo HMT" select CPU_S3C6410 select S3C_DEV_FB + select S3C_DEV_NAND select S3C_DEV_USB_HOST select S3C64XX_SETUP_FB_24BPP select HAVE_PWM diff --git a/arch/arm/mach-sa1100/dma.c b/arch/arm/mach-sa1100/dma.c index 95f9c5a6d6d..cb4521a6f42 100644 --- a/arch/arm/mach-sa1100/dma.c +++ b/arch/arm/mach-sa1100/dma.c @@ -39,7 +39,7 @@ typedef struct { static sa1100_dma_t dma_chan[SA1100_DMA_CHANNELS]; -static spinlock_t dma_list_lock; +static DEFINE_SPINLOCK(dma_list_lock); static irqreturn_t dma_irq_handler(int irq, void *dev_id) diff --git a/arch/arm/mach-u300/Kconfig b/arch/arm/mach-u300/Kconfig index 337b9aabce4..801b21e7f67 100644 --- a/arch/arm/mach-u300/Kconfig +++ b/arch/arm/mach-u300/Kconfig @@ -81,6 +81,18 @@ config MACH_U300_SEMI_IS_SHARED Memory Interface) from both from access and application side. +config MACH_U300_SPIDUMMY + bool "SSP/SPI dummy chip" + select SPI + select SPI_MASTER + select SPI_PL022 + help + This creates a small kernel module that creates a dummy + SPI device to be used for loopback tests. Regularly used + to test reference designs. If you're not testing SPI, + you don't need it. Selecting this will activate the + SPI framework and ARM PL022 support. + comment "All the settings below must match the bootloader's settings" config MACH_U300_ACCESS_MEM_SIZE diff --git a/arch/arm/mach-u300/Makefile b/arch/arm/mach-u300/Makefile index 24950e0df4b..885b5c027c1 100644 --- a/arch/arm/mach-u300/Makefile +++ b/arch/arm/mach-u300/Makefile @@ -9,3 +9,6 @@ obj- := obj-$(CONFIG_ARCH_U300) += u300.o obj-$(CONFIG_MMC) += mmc.o +obj-$(CONFIG_SPI_PL022) += spi.o +obj-$(CONFIG_MACH_U300_SPIDUMMY) += dummyspichip.o +obj-$(CONFIG_I2C_STU300) += i2c.o diff --git a/arch/arm/mach-u300/core.c b/arch/arm/mach-u300/core.c index 2e9b8ccd8ec..be60d6deee8 100644 --- a/arch/arm/mach-u300/core.c +++ b/arch/arm/mach-u300/core.c @@ -32,6 +32,8 @@ #include "clock.h" #include "mmc.h" +#include "spi.h" +#include "i2c.h" /* * Static I/O mappings that are needed for booting the U300 platforms. The @@ -378,14 +380,14 @@ static struct platform_device wdog_device = { }; static struct platform_device i2c0_device = { - .name = "stddci2c", + .name = "stu300", .id = 0, .num_resources = ARRAY_SIZE(i2c0_resources), .resource = i2c0_resources, }; static struct platform_device i2c1_device = { - .name = "stddci2c", + .name = "stu300", .id = 1, .num_resources = ARRAY_SIZE(i2c1_resources), .resource = i2c1_resources, @@ -611,6 +613,8 @@ void __init u300_init_devices(void) /* Wait for the PLL208 to lock if not locked in yet */ while (!(readw(U300_SYSCON_VBASE + U300_SYSCON_CSR) & U300_SYSCON_CSR_PLL208_LOCK_IND)); + /* Initialize SPI device with some board specifics */ + u300_spi_init(&pl022_device); /* Register the AMBA devices in the AMBA bus abstraction layer */ u300_clock_primecells(); @@ -622,6 +626,12 @@ void __init u300_init_devices(void) u300_assign_physmem(); + /* Register subdevices on the I2C buses */ + u300_i2c_register_board_devices(); + + /* Register subdevices on the SPI bus */ + u300_spi_register_board_devices(); + /* Register the platform devices */ platform_add_devices(platform_devs, ARRAY_SIZE(platform_devs)); diff --git a/arch/arm/mach-u300/dummyspichip.c b/arch/arm/mach-u300/dummyspichip.c new file mode 100644 index 00000000000..962f9de454d --- /dev/null +++ b/arch/arm/mach-u300/dummyspichip.c @@ -0,0 +1,290 @@ +/* + * arch/arm/mach-u300/dummyspichip.c + * + * Copyright (C) 2007-2009 ST-Ericsson AB + * License terms: GNU General Public License (GPL) version 2 + * This is a dummy loopback SPI "chip" used for testing SPI. + * Author: Linus Walleij <linus.walleij@stericsson.com> + */ +#include <linux/init.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/device.h> +#include <linux/err.h> +#include <linux/sysfs.h> +#include <linux/mutex.h> +#include <linux/spi/spi.h> +#include <linux/dma-mapping.h> +/* + * WARNING! Do not include this pl022-specific controller header + * for any generic driver. It is only done in this dummy chip + * because we alter the chip configuration in order to test some + * different settings on the loopback device. Normal chip configs + * shall be STATIC and not altered by the driver! + */ +#include <linux/amba/pl022.h> + +struct dummy { + struct device *dev; + struct mutex lock; +}; + +#define DMA_TEST_SIZE 2048 + +/* When we cat /sys/bus/spi/devices/spi0.0/looptest this will be triggered */ +static ssize_t dummy_looptest(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct spi_device *spi = to_spi_device(dev); + struct dummy *p_dummy = dev_get_drvdata(&spi->dev); + + /* + * WARNING! Do not dereference the chip-specific data in any normal + * driver for a chip. It is usually STATIC and shall not be read + * or written to. Your chip driver should NOT depend on fields in this + * struct, this is just used here to alter the behaviour of the chip + * in order to perform tests. + */ + struct pl022_config_chip *chip_info = spi->controller_data; + int status; + u8 txbuf[14] = {0xDE, 0xAD, 0xBE, 0xEF, 0x2B, 0xAD, + 0xCA, 0xFE, 0xBA, 0xBE, 0xB1, 0x05, + 0xF0, 0x0D}; + u8 rxbuf[14]; + u8 *bigtxbuf_virtual; + u8 *bigrxbuf_virtual; + + if (mutex_lock_interruptible(&p_dummy->lock)) + return -ERESTARTSYS; + + bigtxbuf_virtual = kmalloc(DMA_TEST_SIZE, GFP_KERNEL); + if (bigtxbuf_virtual == NULL) { + status = -ENOMEM; + goto out; + } + bigrxbuf_virtual = kmalloc(DMA_TEST_SIZE, GFP_KERNEL); + + /* Fill TXBUF with some happy pattern */ + memset(bigtxbuf_virtual, 0xAA, DMA_TEST_SIZE); + + /* + * Force chip to 8 bit mode + * WARNING: NEVER DO THIS IN REAL DRIVER CODE, THIS SHOULD BE STATIC! + */ + chip_info->data_size = SSP_DATA_BITS_8; + /* You should NOT DO THIS EITHER */ + spi->master->setup(spi); + + /* Now run the tests for 8bit mode */ + pr_info("Simple test 1: write 0xAA byte, read back garbage byte " + "in 8bit mode\n"); + status = spi_w8r8(spi, 0xAA); + if (status < 0) + pr_warning("Siple test 1: FAILURE: spi_write_then_read " + "failed with status %d\n", status); + else + pr_info("Simple test 1: SUCCESS!\n"); + + pr_info("Simple test 2: write 8 bytes, read back 8 bytes garbage " + "in 8bit mode (full FIFO)\n"); + status = spi_write_then_read(spi, &txbuf[0], 8, &rxbuf[0], 8); + if (status < 0) + pr_warning("Simple test 2: FAILURE: spi_write_then_read() " + "failed with status %d\n", status); + else + pr_info("Simple test 2: SUCCESS!\n"); + + pr_info("Simple test 3: write 14 bytes, read back 14 bytes garbage " + "in 8bit mode (see if we overflow FIFO)\n"); + status = spi_write_then_read(spi, &txbuf[0], 14, &rxbuf[0], 14); + if (status < 0) + pr_warning("Simple test 3: FAILURE: failed with status %d " + "(probably FIFO overrun)\n", status); + else + pr_info("Simple test 3: SUCCESS!\n"); + + pr_info("Simple test 4: write 8 bytes with spi_write(), read 8 " + "bytes garbage with spi_read() in 8bit mode\n"); + status = spi_write(spi, &txbuf[0], 8); + if (status < 0) + pr_warning("Simple test 4 step 1: FAILURE: spi_write() " + "failed with status %d\n", status); + else + pr_info("Simple test 4 step 1: SUCCESS!\n"); + status = spi_read(spi, &rxbuf[0], 8); + if (status < 0) + pr_warning("Simple test 4 step 2: FAILURE: spi_read() " + "failed with status %d\n", status); + else + pr_info("Simple test 4 step 2: SUCCESS!\n"); + + pr_info("Simple test 5: write 14 bytes with spi_write(), read " + "14 bytes garbage with spi_read() in 8bit mode\n"); + status = spi_write(spi, &txbuf[0], 14); + if (status < 0) + pr_warning("Simple test 5 step 1: FAILURE: spi_write() " + "failed with status %d (probably FIFO overrun)\n", + status); + else + pr_info("Simple test 5 step 1: SUCCESS!\n"); + status = spi_read(spi, &rxbuf[0], 14); + if (status < 0) + pr_warning("Simple test 5 step 2: FAILURE: spi_read() " + "failed with status %d (probably FIFO overrun)\n", + status); + else + pr_info("Simple test 5: SUCCESS!\n"); + + pr_info("Simple test 6: write %d bytes with spi_write(), " + "read %d bytes garbage with spi_read() in 8bit mode\n", + DMA_TEST_SIZE, DMA_TEST_SIZE); + status = spi_write(spi, &bigtxbuf_virtual[0], DMA_TEST_SIZE); + if (status < 0) + pr_warning("Simple test 6 step 1: FAILURE: spi_write() " + "failed with status %d (probably FIFO overrun)\n", + status); + else + pr_info("Simple test 6 step 1: SUCCESS!\n"); + status = spi_read(spi, &bigrxbuf_virtual[0], DMA_TEST_SIZE); + if (status < 0) + pr_warning("Simple test 6 step 2: FAILURE: spi_read() " + "failed with status %d (probably FIFO overrun)\n", + status); + else + pr_info("Simple test 6: SUCCESS!\n"); + + + /* + * Force chip to 16 bit mode + * WARNING: NEVER DO THIS IN REAL DRIVER CODE, THIS SHOULD BE STATIC! + */ + chip_info->data_size = SSP_DATA_BITS_16; + /* You should NOT DO THIS EITHER */ + spi->master->setup(spi); + + pr_info("Simple test 7: write 0xAA byte, read back garbage byte " + "in 16bit bus mode\n"); + status = spi_w8r8(spi, 0xAA); + if (status == -EIO) + pr_info("Simple test 7: SUCCESS! (expected failure with " + "status EIO)\n"); + else if (status < 0) + pr_warning("Siple test 7: FAILURE: spi_write_then_read " + "failed with status %d\n", status); + else + pr_warning("Siple test 7: FAILURE: spi_write_then_read " + "succeeded but it was expected to fail!\n"); + + pr_info("Simple test 8: write 8 bytes, read back 8 bytes garbage " + "in 16bit mode (full FIFO)\n"); + status = spi_write_then_read(spi, &txbuf[0], 8, &rxbuf[0], 8); + if (status < 0) + pr_warning("Simple test 8: FAILURE: spi_write_then_read() " + "failed with status %d\n", status); + else + pr_info("Simple test 8: SUCCESS!\n"); + + pr_info("Simple test 9: write 14 bytes, read back 14 bytes garbage " + "in 16bit mode (see if we overflow FIFO)\n"); + status = spi_write_then_read(spi, &txbuf[0], 14, &rxbuf[0], 14); + if (status < 0) + pr_warning("Simple test 9: FAILURE: failed with status %d " + "(probably FIFO overrun)\n", status); + else + pr_info("Simple test 9: SUCCESS!\n"); + + pr_info("Simple test 10: write %d bytes with spi_write(), " + "read %d bytes garbage with spi_read() in 16bit mode\n", + DMA_TEST_SIZE, DMA_TEST_SIZE); + status = spi_write(spi, &bigtxbuf_virtual[0], DMA_TEST_SIZE); + if (status < 0) + pr_warning("Simple test 10 step 1: FAILURE: spi_write() " + "failed with status %d (probably FIFO overrun)\n", + status); + else + pr_info("Simple test 10 step 1: SUCCESS!\n"); + + status = spi_read(spi, &bigrxbuf_virtual[0], DMA_TEST_SIZE); + if (status < 0) + pr_warning("Simple test 10 step 2: FAILURE: spi_read() " + "failed with status %d (probably FIFO overrun)\n", + status); + else + pr_info("Simple test 10: SUCCESS!\n"); + + status = sprintf(buf, "loop test complete\n"); + kfree(bigrxbuf_virtual); + kfree(bigtxbuf_virtual); + out: + mutex_unlock(&p_dummy->lock); + return status; +} + +static DEVICE_ATTR(looptest, S_IRUGO, dummy_looptest, NULL); + +static int __devinit pl022_dummy_probe(struct spi_device *spi) +{ + struct dummy *p_dummy; + int status; + + dev_info(&spi->dev, "probing dummy SPI device\n"); + + p_dummy = kzalloc(sizeof *p_dummy, GFP_KERNEL); + if (!p_dummy) + return -ENOMEM; + + dev_set_drvdata(&spi->dev, p_dummy); + mutex_init(&p_dummy->lock); + + /* sysfs hook */ + status = device_create_file(&spi->dev, &dev_attr_looptest); + if (status) { + dev_dbg(&spi->dev, "device_create_file looptest failure.\n"); + goto out_dev_create_looptest_failed; + } + + return 0; + +out_dev_create_looptest_failed: + dev_set_drvdata(&spi->dev, NULL); + kfree(p_dummy); + return status; +} + +static int __devexit pl022_dummy_remove(struct spi_device *spi) +{ + struct dummy *p_dummy = dev_get_drvdata(&spi->dev); + + dev_info(&spi->dev, "removing dummy SPI device\n"); + device_remove_file(&spi->dev, &dev_attr_looptest); + dev_set_drvdata(&spi->dev, NULL); + kfree(p_dummy); + + return 0; +} + +static struct spi_driver pl022_dummy_driver = { + .driver = { + .name = "spi-dummy", + .owner = THIS_MODULE, + }, + .probe = pl022_dummy_probe, + .remove = __devexit_p(pl022_dummy_remove), +}; + +static int __init pl022_init_dummy(void) +{ + return spi_register_driver(&pl022_dummy_driver); +} + +static void __exit pl022_exit_dummy(void) +{ + spi_unregister_driver(&pl022_dummy_driver); +} + +module_init(pl022_init_dummy); +module_exit(pl022_exit_dummy); + +MODULE_AUTHOR("Linus Walleij <linus.walleij@stericsson.com>"); +MODULE_DESCRIPTION("PL022 SSP/SPI DUMMY Linux driver"); +MODULE_LICENSE("GPL"); diff --git a/arch/arm/mach-u300/gpio.c b/arch/arm/mach-u300/gpio.c index 308cdb197a9..63c8f27fb15 100644 --- a/arch/arm/mach-u300/gpio.c +++ b/arch/arm/mach-u300/gpio.c @@ -25,11 +25,6 @@ #include <linux/platform_device.h> #include <linux/gpio.h> -/* Need access to SYSCON registers for PADmuxing */ -#include <mach/syscon.h> - -#include "padmux.h" - /* Reference to GPIO block clock */ static struct clk *clk; @@ -606,14 +601,6 @@ static int __init gpio_probe(struct platform_device *pdev) writel(U300_GPIO_CR_BLOCK_CLKRQ_ENABLE, virtbase + U300_GPIO_CR); #endif - /* Set up some padmuxing here */ -#ifdef CONFIG_MMC - pmx_set_mission_mode_mmc(); -#endif -#ifdef CONFIG_SPI_PL022 - pmx_set_mission_mode_spi(); -#endif - gpio_set_initial_values(); for (num_irqs = 0 ; num_irqs < U300_GPIO_NUM_PORTS; num_irqs++) { diff --git a/arch/arm/mach-u300/i2c.c b/arch/arm/mach-u300/i2c.c new file mode 100644 index 00000000000..10be1f888b2 --- /dev/null +++ b/arch/arm/mach-u300/i2c.c @@ -0,0 +1,43 @@ +/* + * arch/arm/mach-u300/i2c.c + * + * Copyright (C) 2009 ST-Ericsson AB + * License terms: GNU General Public License (GPL) version 2 + * + * Register board i2c devices + * Author: Linus Walleij <linus.walleij@stericsson.com> + */ +#include <linux/kernel.h> +#include <linux/i2c.h> +#include <mach/irqs.h> + +static struct i2c_board_info __initdata bus0_i2c_board_info[] = { + { + .type = "ab3100", + .addr = 0x48, + .irq = IRQ_U300_IRQ0_EXT, + }, +}; + +static struct i2c_board_info __initdata bus1_i2c_board_info[] = { +#ifdef CONFIG_MACH_U300_BS335 + { + .type = "fwcam", + .addr = 0x10, + }, + { + .type = "fwcam", + .addr = 0x5d, + }, +#else + { }, +#endif +}; + +void __init u300_i2c_register_board_devices(void) +{ + i2c_register_board_info(0, bus0_i2c_board_info, + ARRAY_SIZE(bus0_i2c_board_info)); + i2c_register_board_info(1, bus1_i2c_board_info, + ARRAY_SIZE(bus1_i2c_board_info)); +} diff --git a/arch/arm/mach-u300/i2c.h b/arch/arm/mach-u300/i2c.h new file mode 100644 index 00000000000..485c02e5c06 --- /dev/null +++ b/arch/arm/mach-u300/i2c.h @@ -0,0 +1,23 @@ +/* + * arch/arm/mach-u300/i2c.h + * + * Copyright (C) 2009 ST-Ericsson AB + * License terms: GNU General Public License (GPL) version 2 + * + * Register board i2c devices + * Author: Linus Walleij <linus.walleij@stericsson.com> + */ + +#ifndef MACH_U300_I2C_H +#define MACH_U300_I2C_H + +#ifdef CONFIG_I2C_STU300 +void __init u300_i2c_register_board_devices(void); +#else +/* Compile out this stuff if no I2C adapter is available */ +static inline void __init u300_i2c_register_board_devices(void) +{ +} +#endif + +#endif diff --git a/arch/arm/mach-u300/include/mach/memory.h b/arch/arm/mach-u300/include/mach/memory.h index bf134bcc129..ab000df7fc0 100644 --- a/arch/arm/mach-u300/include/mach/memory.h +++ b/arch/arm/mach-u300/include/mach/memory.h @@ -35,6 +35,14 @@ #endif /* + * TCM memory whereabouts + */ +#define ITCM_OFFSET 0xffff2000 +#define ITCM_END 0xffff3fff +#define DTCM_OFFSET 0xffff4000 +#define DTCM_END 0xffff5fff + +/* * We enable a real big DMA buffer if need be. */ #define CONSISTENT_DMA_SIZE SZ_4M diff --git a/arch/arm/mach-u300/include/mach/syscon.h b/arch/arm/mach-u300/include/mach/syscon.h index 1c90d1b1ccb..7444f5c7da9 100644 --- a/arch/arm/mach-u300/include/mach/syscon.h +++ b/arch/arm/mach-u300/include/mach/syscon.h @@ -240,8 +240,13 @@ #define U300_SYSCON_PMC1LR_CDI_MASK (0xC000) #define U300_SYSCON_PMC1LR_CDI_CDI (0x0000) #define U300_SYSCON_PMC1LR_CDI_EMIF (0x4000) +#ifdef CONFIG_MACH_U300_BS335 +#define U300_SYSCON_PMC1LR_CDI_CDI2 (0x8000) +#define U300_SYSCON_PMC1LR_CDI_WCDMA_APP_GPIO (0xC000) +#elif CONFIG_MACH_U300_BS365 #define U300_SYSCON_PMC1LR_CDI_GPIO (0x8000) #define U300_SYSCON_PMC1LR_CDI_WCDMA (0xC000) +#endif #define U300_SYSCON_PMC1LR_PDI_MASK (0x3000) #define U300_SYSCON_PMC1LR_PDI_PDI (0x0000) #define U300_SYSCON_PMC1LR_PDI_EGG (0x1000) @@ -345,19 +350,69 @@ #define U300_SYSCON_MMCR_MASK (0x0003) #define U300_SYSCON_MMCR_MMC_FB_CLK_SEL_ENABLE (0x0002) #define U300_SYSCON_MMCR_MSPRO_FREQSEL_ENABLE (0x0001) - +/* Pull up/down control (R/W) */ +#define U300_SYSCON_PUCR (0x104) +#define U300_SYSCON_PUCR_EMIF_1_WAIT_N_PU_ENABLE (0x0200) +#define U300_SYSCON_PUCR_EMIF_1_NFIF_READY_PU_ENABLE (0x0100) +#define U300_SYSCON_PUCR_EMIF_1_16BIT_PU_ENABLE (0x0080) +#define U300_SYSCON_PUCR_EMIF_1_8BIT_PU_ENABLE (0x0040) +#define U300_SYSCON_PUCR_KEY_IN_PU_EN_MASK (0x003F) +/* Padmux 2 control */ +#define U300_SYSCON_PMC2R (0x100) +#define U300_SYSCON_PMC2R_APP_MISC_0_MASK (0x00C0) +#define U300_SYSCON_PMC2R_APP_MISC_0_APP_GPIO (0x0000) +#define U300_SYSCON_PMC2R_APP_MISC_0_EMIF_SDRAM (0x0040) +#define U300_SYSCON_PMC2R_APP_MISC_0_MMC (0x0080) +#define U300_SYSCON_PMC2R_APP_MISC_0_CDI2 (0x00C0) +#define U300_SYSCON_PMC2R_APP_MISC_1_MASK (0x0300) +#define U300_SYSCON_PMC2R_APP_MISC_1_APP_GPIO (0x0000) +#define U300_SYSCON_PMC2R_APP_MISC_1_EMIF_SDRAM (0x0100) +#define U300_SYSCON_PMC2R_APP_MISC_1_MMC (0x0200) +#define U300_SYSCON_PMC2R_APP_MISC_1_CDI2 (0x0300) +#define U300_SYSCON_PMC2R_APP_MISC_2_MASK (0x0C00) +#define U300_SYSCON_PMC2R_APP_MISC_2_APP_GPIO (0x0000) +#define U300_SYSCON_PMC2R_APP_MISC_2_EMIF_SDRAM (0x0400) +#define U300_SYSCON_PMC2R_APP_MISC_2_MMC (0x0800) +#define U300_SYSCON_PMC2R_APP_MISC_2_CDI2 (0x0C00) +#define U300_SYSCON_PMC2R_APP_MISC_3_MASK (0x3000) +#define U300_SYSCON_PMC2R_APP_MISC_3_APP_GPIO (0x0000) +#define U300_SYSCON_PMC2R_APP_MISC_3_EMIF_SDRAM (0x1000) +#define U300_SYSCON_PMC2R_APP_MISC_3_MMC (0x2000) +#define U300_SYSCON_PMC2R_APP_MISC_3_CDI2 (0x3000) +#define U300_SYSCON_PMC2R_APP_MISC_4_MASK (0xC000) +#define U300_SYSCON_PMC2R_APP_MISC_4_APP_GPIO (0x0000) +#define U300_SYSCON_PMC2R_APP_MISC_4_EMIF_SDRAM (0x4000) +#define U300_SYSCON_PMC2R_APP_MISC_4_MMC (0x8000) +#define U300_SYSCON_PMC2R_APP_MISC_4_ACC_GPIO (0xC000) /* TODO: More SYSCON registers missing */ #define U300_SYSCON_PMC3R (0x10c) #define U300_SYSCON_PMC3R_APP_MISC_11_MASK (0xc000) #define U300_SYSCON_PMC3R_APP_MISC_11_SPI (0x4000) #define U300_SYSCON_PMC3R_APP_MISC_10_MASK (0x3000) #define U300_SYSCON_PMC3R_APP_MISC_10_SPI (0x1000) -/* TODO: Missing other configs, I just added the SPI stuff */ - +/* TODO: Missing other configs */ +#define U300_SYSCON_PMC4R (0x168) +#define U300_SYSCON_PMC4R_APP_MISC_12_MASK (0x0003) +#define U300_SYSCON_PMC4R_APP_MISC_12_APP_GPIO (0x0000) +#define U300_SYSCON_PMC4R_APP_MISC_13_MASK (0x000C) +#define U300_SYSCON_PMC4R_APP_MISC_13_CDI (0x0000) +#define U300_SYSCON_PMC4R_APP_MISC_13_SMIA (0x0004) +#define U300_SYSCON_PMC4R_APP_MISC_13_SMIA2 (0x0008) +#define U300_SYSCON_PMC4R_APP_MISC_13_APP_GPIO (0x000C) +#define U300_SYSCON_PMC4R_APP_MISC_14_MASK (0x0030) +#define U300_SYSCON_PMC4R_APP_MISC_14_CDI (0x0000) +#define U300_SYSCON_PMC4R_APP_MISC_14_SMIA (0x0010) +#define U300_SYSCON_PMC4R_APP_MISC_14_CDI2 (0x0020) +#define U300_SYSCON_PMC4R_APP_MISC_14_APP_GPIO (0x0030) +#define U300_SYSCON_PMC4R_APP_MISC_16_MASK (0x0300) +#define U300_SYSCON_PMC4R_APP_MISC_16_APP_GPIO_13 (0x0000) +#define U300_SYSCON_PMC4R_APP_MISC_16_APP_UART1_CTS (0x0100) +#define U300_SYSCON_PMC4R_APP_MISC_16_EMIF_1_STATIC_CS5_N (0x0200) /* SYS_0_CLK_CONTROL first clock control 16bit (R/W) */ #define U300_SYSCON_S0CCR (0x120) #define U300_SYSCON_S0CCR_FIELD_MASK (0x43FF) #define U300_SYSCON_S0CCR_CLOCK_REQ (0x4000) +#define U300_SYSCON_S0CCR_CLOCK_REQ_MONITOR (0x2000) #define U300_SYSCON_S0CCR_CLOCK_INV (0x0200) #define U300_SYSCON_S0CCR_CLOCK_FREQ_MASK (0x01E0) #define U300_SYSCON_S0CCR_CLOCK_SELECT_MASK (0x001E) @@ -375,6 +430,7 @@ #define U300_SYSCON_S1CCR (0x124) #define U300_SYSCON_S1CCR_FIELD_MASK (0x43FF) #define U300_SYSCON_S1CCR_CLOCK_REQ (0x4000) +#define U300_SYSCON_S1CCR_CLOCK_REQ_MONITOR (0x2000) #define U300_SYSCON_S1CCR_CLOCK_INV (0x0200) #define U300_SYSCON_S1CCR_CLOCK_FREQ_MASK (0x01E0) #define U300_SYSCON_S1CCR_CLOCK_SELECT_MASK (0x001E) @@ -393,6 +449,7 @@ #define U300_SYSCON_S2CCR_FIELD_MASK (0xC3FF) #define U300_SYSCON_S2CCR_CLK_STEAL (0x8000) #define U300_SYSCON_S2CCR_CLOCK_REQ (0x4000) +#define U300_SYSCON_S2CCR_CLOCK_REQ_MONITOR (0x2000) #define U300_SYSCON_S2CCR_CLOCK_INV (0x0200) #define U300_SYSCON_S2CCR_CLOCK_FREQ_MASK (0x01E0) #define U300_SYSCON_S2CCR_CLOCK_SELECT_MASK (0x001E) @@ -425,6 +482,44 @@ #define U300_SYSCON_MCR_PMGEN_CR_0_EMIF_0_SDRAM (0x000C) #define U300_SYSCON_MCR_PM1G_MODE_ENABLE (0x0002) #define U300_SYSCON_MCR_PMTG5_MODE_ENABLE (0x0001) +/* SC_PLL_IRQ_CONTROL 16bit (R/W) */ +#define U300_SYSCON_PICR (0x0130) +#define U300_SYSCON_PICR_MASK (0x00FF) +#define U300_SYSCON_PICR_FORCE_PLL208_LOCK_LOW_ENABLE (0x0080) +#define U300_SYSCON_PICR_FORCE_PLL208_LOCK_HIGH_ENABLE (0x0040) +#define U300_SYSCON_PICR_FORCE_PLL13_LOCK_LOW_ENABLE (0x0020) +#define U300_SYSCON_PICR_FORCE_PLL13_LOCK_HIGH_ENABLE (0x0010) +#define U300_SYSCON_PICR_IRQMASK_PLL13_UNLOCK_ENABLE (0x0008) +#define U300_SYSCON_PICR_IRQMASK_PLL13_LOCK_ENABLE (0x0004) +#define U300_SYSCON_PICR_IRQMASK_PLL208_UNLOCK_ENABLE (0x0002) +#define U300_SYSCON_PICR_IRQMASK_PLL208_LOCK_ENABLE (0x0001) +/* SC_PLL_IRQ_STATUS 16 bit (R/-) */ +#define U300_SYSCON_PISR (0x0134) +#define U300_SYSCON_PISR_MASK (0x000F) +#define U300_SYSCON_PISR_PLL13_UNLOCK_IND (0x0008) +#define U300_SYSCON_PISR_PLL13_LOCK_IND (0x0004) +#define U300_SYSCON_PISR_PLL208_UNLOCK_IND (0x0002) +#define U300_SYSCON_PISR_PLL208_LOCK_IND (0x0001) +/* SC_PLL_IRQ_CLEAR 16 bit (-/W) */ +#define U300_SYSCON_PICLR (0x0138) +#define U300_SYSCON_PICLR_MASK (0x000F) +#define U300_SYSCON_PICLR_RWMASK (0x0000) +#define U300_SYSCON_PICLR_PLL13_UNLOCK_SC (0x0008) +#define U300_SYSCON_PICLR_PLL13_LOCK_SC (0x0004) +#define U300_SYSCON_PICLR_PLL208_UNLOCK_SC (0x0002) +#define U300_SYSCON_PICLR_PLL208_LOCK_SC (0x0001) +/* CAMIF_CONTROL 16 bit (-/W) */ +#define U300_SYSCON_CICR (0x013C) +#define U300_SYSCON_CICR_MASK (0x0FFF) +#define U300_SYSCON_CICR_APP_SUBLVDS_TESTMODE_MASK (0x0F00) +#define U300_SYSCON_CICR_APP_SUBLVDS_TESTMODE_PORT1 (0x0C00) +#define U300_SYSCON_CICR_APP_SUBLVDS_TESTMODE_PORT0 (0x0300) +#define U300_SYSCON_CICR_APP_SUBLVDS_RESCON_MASK (0x00F0) +#define U300_SYSCON_CICR_APP_SUBLVDS_RESCON_PORT1 (0x00C0) +#define U300_SYSCON_CICR_APP_SUBLVDS_RESCON_PORT0 (0x0030) +#define U300_SYSCON_CICR_APP_SUBLVDS_PWR_DWN_N_MASK (0x000F) +#define U300_SYSCON_CICR_APP_SUBLVDS_PWR_DWN_N_PORT1 (0x000C) +#define U300_SYSCON_CICR_APP_SUBLVDS_PWR_DWN_N_PORT0 (0x0003) /* Clock activity observability register 0 */ #define U300_SYSCON_C0OAR (0x140) #define U300_SYSCON_C0OAR_MASK (0xFFFF) @@ -513,7 +608,7 @@ /** * CPU medium frequency in MHz */ -#define SYSCON_CPU_CLOCK_MEDIUM 104 +#define SYSCON_CPU_CLOCK_MEDIUM 52 /** * CPU low frequency in MHz */ @@ -527,7 +622,7 @@ /** * EMIF medium frequency in MHz */ -#define SYSCON_EMIF_CLOCK_MEDIUM 104 +#define SYSCON_EMIF_CLOCK_MEDIUM 52 /** * EMIF low frequency in MHz */ @@ -541,7 +636,7 @@ /** * AHB medium frequency in MHz */ -#define SYSCON_AHB_CLOCK_MEDIUM 52 +#define SYSCON_AHB_CLOCK_MEDIUM 26 /** * AHB low frequency in MHz */ @@ -553,6 +648,15 @@ enum syscon_busmaster { SYSCON_BM_VIDEO_ENC }; +/* Selectr a resistor or a set of resistors */ +enum syscon_pull_up_down { + SYSCON_PU_KEY_IN_EN, + SYSCON_PU_EMIF_1_8_BIT_EN, + SYSCON_PU_EMIF_1_16_BIT_EN, + SYSCON_PU_EMIF_1_NFIF_READY_EN, + SYSCON_PU_EMIF_1_NFIF_WAIT_N_EN, +}; + /* * Note that this array must match the order of the array "clk_reg" * in syscon.c @@ -575,6 +679,7 @@ enum syscon_clk { SYSCON_CLKCONTROL_SPI, SYSCON_CLKCONTROL_I2S0_CORE, SYSCON_CLKCONTROL_I2S1_CORE, + SYSCON_CLKCONTROL_UART1, SYSCON_CLKCONTROL_AAIF, SYSCON_CLKCONTROL_AHB, SYSCON_CLKCONTROL_APEX, @@ -604,7 +709,8 @@ enum syscon_sysclk_mode { enum syscon_sysclk_req { SYSCON_SYSCLKREQ_DISABLED, - SYSCON_SYSCLKREQ_ACTIVE_LOW + SYSCON_SYSCLKREQ_ACTIVE_LOW, + SYSCON_SYSCLKREQ_MONITOR }; enum syscon_clk_mode { diff --git a/arch/arm/mach-u300/mmc.c b/arch/arm/mach-u300/mmc.c index 585cc013639..7b6b016786b 100644 --- a/arch/arm/mach-u300/mmc.c +++ b/arch/arm/mach-u300/mmc.c @@ -19,15 +19,16 @@ #include <linux/regulator/consumer.h> #include <linux/regulator/machine.h> #include <linux/gpio.h> +#include <linux/amba/mmci.h> -#include <asm/mach/mmc.h> #include "mmc.h" +#include "padmux.h" struct mmci_card_event { struct input_dev *mmc_input; int mmc_inserted; struct work_struct workq; - struct mmc_platform_data mmc0_plat_data; + struct mmci_platform_data mmc0_plat_data; }; static unsigned int mmc_status(struct device *dev) @@ -146,6 +147,7 @@ int __devinit mmc_init(struct amba_device *adev) { struct mmci_card_event *mmci_card; struct device *mmcsd_device = &adev->dev; + struct pmx *pmx; int ret = 0; mmci_card = kzalloc(sizeof(struct mmci_card_event), GFP_KERNEL); @@ -158,6 +160,8 @@ int __devinit mmc_init(struct amba_device *adev) mmci_card->mmc0_plat_data.status = mmc_status; mmci_card->mmc0_plat_data.gpio_wp = -1; mmci_card->mmc0_plat_data.gpio_cd = -1; + mmci_card->mmc0_plat_data.capabilities = MMC_CAP_MMC_HIGHSPEED | + MMC_CAP_SD_HIGHSPEED | MMC_CAP_4_BIT_DATA; mmcsd_device->platform_data = (void *) &mmci_card->mmc0_plat_data; @@ -207,6 +211,20 @@ int __devinit mmc_init(struct amba_device *adev) input_set_drvdata(mmci_card->mmc_input, mmci_card); + /* + * Setup padmuxing for MMC. Since this must always be + * compiled into the kernel, pmx is never released. + */ + pmx = pmx_get(mmcsd_device, U300_APP_PMX_MMC_SETTING); + + if (IS_ERR(pmx)) + pr_warning("Could not get padmux handle\n"); + else { + ret = pmx_activate(mmcsd_device, pmx); + if (IS_ERR_VALUE(ret)) + pr_warning("Could not activate padmuxing\n"); + } + ret = gpio_register_callback(U300_GPIO_PIN_MMC_CD, mmci_callback, mmci_card); diff --git a/arch/arm/mach-u300/padmux.c b/arch/arm/mach-u300/padmux.c index f3664564f08..4c93c6cefd3 100644 --- a/arch/arm/mach-u300/padmux.c +++ b/arch/arm/mach-u300/padmux.c @@ -6,53 +6,362 @@ * Copyright (C) 2009 ST-Ericsson AB * License terms: GNU General Public License (GPL) version 2 * U300 PADMUX functions - * Author: Linus Walleij <linus.walleij@stericsson.com> - * + * Author: Martin Persson <martin.persson@stericsson.com> */ -#include <linux/io.h> + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/device.h> #include <linux/err.h> +#include <linux/errno.h> +#include <linux/io.h> +#include <linux/mutex.h> +#include <linux/string.h> +#include <linux/bug.h> +#include <linux/debugfs.h> +#include <linux/seq_file.h> #include <mach/u300-regs.h> #include <mach/syscon.h> - #include "padmux.h" -/* Set the PAD MUX to route the MMC reader correctly to GPIO0. */ -void pmx_set_mission_mode_mmc(void) -{ - u16 val; - - val = readw(U300_SYSCON_VBASE + U300_SYSCON_PMC1LR); - val &= ~U300_SYSCON_PMC1LR_MMCSD_MASK; - writew(val, U300_SYSCON_VBASE + U300_SYSCON_PMC1LR); - val = readw(U300_SYSCON_VBASE + U300_SYSCON_PMC1HR); - val &= ~U300_SYSCON_PMC1HR_APP_GPIO_1_MASK; - val |= U300_SYSCON_PMC1HR_APP_GPIO_1_MMC; - writew(val, U300_SYSCON_VBASE + U300_SYSCON_PMC1HR); -} - -void pmx_set_mission_mode_spi(void) -{ - u16 val; - - /* Set up padmuxing so the SPI port and its chipselects are active */ - val = readw(U300_SYSCON_VBASE + U300_SYSCON_PMC1HR); - /* - * Activate the SPI port (disable the use of these pins for generic - * GPIO, DSP, AAIF - */ - val &= ~U300_SYSCON_PMC1HR_APP_SPI_2_MASK; - val |= U300_SYSCON_PMC1HR_APP_SPI_2_SPI; - /* - * Use GPIO pin SPI CS1 for CS1 actually (it can be used for other - * things also) - */ - val &= ~U300_SYSCON_PMC1HR_APP_SPI_CS_1_MASK; - val |= U300_SYSCON_PMC1HR_APP_SPI_CS_1_SPI; - /* - * Use GPIO pin SPI CS2 for CS2 actually (it can be used for other - * things also) - */ - val &= ~U300_SYSCON_PMC1HR_APP_SPI_CS_2_MASK; - val |= U300_SYSCON_PMC1HR_APP_SPI_CS_2_SPI; - writew(val, U300_SYSCON_VBASE + U300_SYSCON_PMC1HR); +static DEFINE_MUTEX(pmx_mutex); + +const u32 pmx_registers[] = { + (U300_SYSCON_VBASE + U300_SYSCON_PMC1LR), + (U300_SYSCON_VBASE + U300_SYSCON_PMC1HR), + (U300_SYSCON_VBASE + U300_SYSCON_PMC2R), + (U300_SYSCON_VBASE + U300_SYSCON_PMC3R), + (U300_SYSCON_VBASE + U300_SYSCON_PMC4R) +}; + +/* High level functionality */ + +/* Lazy dog: + * onmask = { + * {"PMC1LR" mask, "PMC1LR" value}, + * {"PMC1HR" mask, "PMC1HR" value}, + * {"PMC2R" mask, "PMC2R" value}, + * {"PMC3R" mask, "PMC3R" value}, + * {"PMC4R" mask, "PMC4R" value} + * } + */ +static struct pmx mmc_setting = { + .setting = U300_APP_PMX_MMC_SETTING, + .default_on = false, + .activated = false, + .name = "MMC", + .onmask = { + {U300_SYSCON_PMC1LR_MMCSD_MASK, + U300_SYSCON_PMC1LR_MMCSD_MMCSD}, + {0, 0}, + {0, 0}, + {0, 0}, + {U300_SYSCON_PMC4R_APP_MISC_12_MASK, + U300_SYSCON_PMC4R_APP_MISC_12_APP_GPIO} + }, +}; + +static struct pmx spi_setting = { + .setting = U300_APP_PMX_SPI_SETTING, + .default_on = false, + .activated = false, + .name = "SPI", + .onmask = {{0, 0}, + {U300_SYSCON_PMC1HR_APP_SPI_2_MASK | + U300_SYSCON_PMC1HR_APP_SPI_CS_1_MASK | + U300_SYSCON_PMC1HR_APP_SPI_CS_2_MASK, + U300_SYSCON_PMC1HR_APP_SPI_2_SPI | + U300_SYSCON_PMC1HR_APP_SPI_CS_1_SPI | + U300_SYSCON_PMC1HR_APP_SPI_CS_2_SPI}, + {0, 0}, + {0, 0}, + {0, 0} + }, +}; + +/* Available padmux settings */ +static struct pmx *pmx_settings[] = { + &mmc_setting, + &spi_setting, +}; + +static void update_registers(struct pmx *pmx, bool activate) +{ + u16 regval, val, mask; + int i; + + for (i = 0; i < ARRAY_SIZE(pmx_registers); i++) { + if (activate) + val = pmx->onmask[i].val; + else + val = 0; + + mask = pmx->onmask[i].mask; + if (mask != 0) { + regval = readw(pmx_registers[i]); + regval &= ~mask; + regval |= val; + writew(regval, pmx_registers[i]); + } + } +} + +struct pmx *pmx_get(struct device *dev, enum pmx_settings setting) +{ + int i; + struct pmx *pmx = ERR_PTR(-ENOENT); + + if (dev == NULL) + return ERR_PTR(-EINVAL); + + mutex_lock(&pmx_mutex); + for (i = 0; i < ARRAY_SIZE(pmx_settings); i++) { + + if (setting == pmx_settings[i]->setting) { + + if (pmx_settings[i]->dev != NULL) { + WARN(1, "padmux: required setting " + "in use by another consumer\n"); + } else { + pmx = pmx_settings[i]; + pmx->dev = dev; + dev_dbg(dev, "padmux: setting nr %d is now " + "bound to %s and ready to use\n", + setting, dev_name(dev)); + break; + } + } + } + mutex_unlock(&pmx_mutex); + + return pmx; +} +EXPORT_SYMBOL(pmx_get); + +int pmx_put(struct device *dev, struct pmx *pmx) +{ + int i; + int ret = -ENOENT; + + if (pmx == NULL || dev == NULL) + return -EINVAL; + + mutex_lock(&pmx_mutex); + for (i = 0; i < ARRAY_SIZE(pmx_settings); i++) { + + if (pmx->setting == pmx_settings[i]->setting) { + + if (dev != pmx->dev) { + WARN(1, "padmux: cannot release handle as " + "it is bound to another consumer\n"); + ret = -EINVAL; + break; + } else { + pmx_settings[i]->dev = NULL; + ret = 0; + break; + } + } + } + mutex_unlock(&pmx_mutex); + + return ret; +} +EXPORT_SYMBOL(pmx_put); + +int pmx_activate(struct device *dev, struct pmx *pmx) +{ + int i, j, ret; + ret = 0; + + if (pmx == NULL || dev == NULL) + return -EINVAL; + + mutex_lock(&pmx_mutex); + + /* Make sure the required bits are not used */ + for (i = 0; i < ARRAY_SIZE(pmx_settings); i++) { + + if (pmx_settings[i]->dev == NULL || pmx_settings[i] == pmx) + continue; + + for (j = 0; j < ARRAY_SIZE(pmx_registers); j++) { + + if (pmx_settings[i]->onmask[j].mask & pmx-> + onmask[j].mask) { + /* More than one entry on the same bits */ + WARN(1, "padmux: cannot activate " + "setting. Bit conflict with " + "an active setting\n"); + + ret = -EUSERS; + goto exit; + } + } + } + update_registers(pmx, true); + pmx->activated = true; + dev_dbg(dev, "padmux: setting nr %d is activated\n", + pmx->setting); + +exit: + mutex_unlock(&pmx_mutex); + return ret; +} +EXPORT_SYMBOL(pmx_activate); + +int pmx_deactivate(struct device *dev, struct pmx *pmx) +{ + int i; + int ret = -ENOENT; + + if (pmx == NULL || dev == NULL) + return -EINVAL; + + mutex_lock(&pmx_mutex); + for (i = 0; i < ARRAY_SIZE(pmx_settings); i++) { + + if (pmx_settings[i]->dev == NULL) + continue; + + if (pmx->setting == pmx_settings[i]->setting) { + + if (dev != pmx->dev) { + WARN(1, "padmux: cannot deactivate " + "pmx setting as it was activated " + "by another consumer\n"); + + ret = -EBUSY; + continue; + } else { + update_registers(pmx, false); + pmx_settings[i]->dev = NULL; + pmx->activated = false; + ret = 0; + dev_dbg(dev, "padmux: setting nr %d is deactivated", + pmx->setting); + break; + } + } + } + mutex_unlock(&pmx_mutex); + + return ret; +} +EXPORT_SYMBOL(pmx_deactivate); + +/* + * For internal use only. If it is to be exported, + * it should be reentrant. Notice that pmx_activate + * (i.e. runtime settings) always override default settings. + */ +static int pmx_set_default(void) +{ + /* Used to identify several entries on the same bits */ + u16 modbits[ARRAY_SIZE(pmx_registers)]; + + int i, j; + + memset(modbits, 0, ARRAY_SIZE(pmx_registers) * sizeof(u16)); + + for (i = 0; i < ARRAY_SIZE(pmx_settings); i++) { + + if (!pmx_settings[i]->default_on) + continue; + + for (j = 0; j < ARRAY_SIZE(pmx_registers); j++) { + + /* Make sure there is only one entry on the same bits */ + if (modbits[j] & pmx_settings[i]->onmask[j].mask) { + BUG(); + return -EUSERS; + } + modbits[j] |= pmx_settings[i]->onmask[j].mask; + } + update_registers(pmx_settings[i], true); + } + return 0; } + +#if (defined(CONFIG_DEBUG_FS) && defined(CONFIG_U300_DEBUG)) +static int pmx_show(struct seq_file *s, void *data) +{ + int i; + seq_printf(s, "-------------------------------------------------\n"); + seq_printf(s, "SETTING BOUND TO DEVICE STATE\n"); + seq_printf(s, "-------------------------------------------------\n"); + mutex_lock(&pmx_mutex); + for (i = 0; i < ARRAY_SIZE(pmx_settings); i++) { + /* Format pmx and device name nicely */ + char cdp[33]; + int chars; + + chars = snprintf(&cdp[0], 17, "%s", pmx_settings[i]->name); + while (chars < 16) { + cdp[chars] = ' '; + chars++; + } + chars = snprintf(&cdp[16], 17, "%s", pmx_settings[i]->dev ? + dev_name(pmx_settings[i]->dev) : "N/A"); + while (chars < 16) { + cdp[chars+16] = ' '; + chars++; + } + cdp[32] = '\0'; + + seq_printf(s, + "%s\t%s\n", + &cdp[0], + pmx_settings[i]->activated ? + "ACTIVATED" : "DEACTIVATED" + ); + + } + mutex_unlock(&pmx_mutex); + return 0; +} + +static int pmx_open(struct inode *inode, struct file *file) +{ + return single_open(file, pmx_show, NULL); +} + +static const struct file_operations pmx_operations = { + .owner = THIS_MODULE, + .open = pmx_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init init_pmx_read_debugfs(void) +{ + /* Expose a simple debugfs interface to view pmx settings */ + (void) debugfs_create_file("padmux", S_IFREG | S_IRUGO, + NULL, NULL, + &pmx_operations); + return 0; +} + +/* + * This needs to come in after the core_initcall(), + * because debugfs is not available until + * the subsystems come up. + */ +module_init(init_pmx_read_debugfs); +#endif + +static int __init pmx_init(void) +{ + int ret; + + ret = pmx_set_default(); + + if (IS_ERR_VALUE(ret)) + pr_crit("padmux: default settings could not be set\n"); + + return 0; +} + +/* Should be initialized before consumers */ +core_initcall(pmx_init); diff --git a/arch/arm/mach-u300/padmux.h b/arch/arm/mach-u300/padmux.h index 8c2099ac504..6e8b8606409 100644 --- a/arch/arm/mach-u300/padmux.h +++ b/arch/arm/mach-u300/padmux.h @@ -6,14 +6,34 @@ * Copyright (C) 2009 ST-Ericsson AB * License terms: GNU General Public License (GPL) version 2 * U300 PADMUX API - * Author: Linus Walleij <linus.walleij@stericsson.com> - * + * Author: Martin Persson <martin.persson@stericsson.com> */ #ifndef __MACH_U300_PADMUX_H #define __MACH_U300_PADMUX_H -void pmx_set_mission_mode_mmc(void); -void pmx_set_mission_mode_spi(void); +enum pmx_settings { + U300_APP_PMX_MMC_SETTING, + U300_APP_PMX_SPI_SETTING +}; + +struct pmx_onmask { + u16 mask; /* Mask bits */ + u16 val; /* Value when active */ +}; + +struct pmx { + struct device *dev; + enum pmx_settings setting; + char *name; + bool activated; + bool default_on; + struct pmx_onmask onmask[]; +}; + +struct pmx *pmx_get(struct device *dev, enum pmx_settings setting); +int pmx_put(struct device *dev, struct pmx *pmx); +int pmx_activate(struct device *dev, struct pmx *pmx); +int pmx_deactivate(struct device *dev, struct pmx *pmx); #endif diff --git a/arch/arm/mach-u300/spi.c b/arch/arm/mach-u300/spi.c new file mode 100644 index 00000000000..f0e887bea30 --- /dev/null +++ b/arch/arm/mach-u300/spi.c @@ -0,0 +1,124 @@ +/* + * arch/arm/mach-u300/spi.c + * + * Copyright (C) 2009 ST-Ericsson AB + * License terms: GNU General Public License (GPL) version 2 + * + * Author: Linus Walleij <linus.walleij@stericsson.com> + */ +#include <linux/device.h> +#include <linux/amba/bus.h> +#include <linux/spi/spi.h> +#include <linux/amba/pl022.h> +#include <linux/err.h> +#include "padmux.h" + +/* + * The following is for the actual devices on the SSP/SPI bus + */ +#ifdef CONFIG_MACH_U300_SPIDUMMY +static void select_dummy_chip(u32 chipselect) +{ + pr_debug("CORE: %s called with CS=0x%x (%s)\n", + __func__, + chipselect, + chipselect ? "unselect chip" : "select chip"); + /* + * Here you would write the chip select value to the GPIO pins if + * this was a real chip (but this is a loopback dummy). + */ +} + +struct pl022_config_chip dummy_chip_info = { + /* Nominally this is LOOPBACK_DISABLED, but this is our dummy chip! */ + .lbm = LOOPBACK_ENABLED, + /* + * available POLLING_TRANSFER and INTERRUPT_TRANSFER, + * DMA_TRANSFER does not work + */ + .com_mode = INTERRUPT_TRANSFER, + .iface = SSP_INTERFACE_MOTOROLA_SPI, + /* We can only act as master but SSP_SLAVE is possible in theory */ + .hierarchy = SSP_MASTER, + /* 0 = drive TX even as slave, 1 = do not drive TX as slave */ + .slave_tx_disable = 0, + /* LSB first */ + .endian_tx = SSP_TX_LSB, + .endian_rx = SSP_RX_LSB, + .data_size = SSP_DATA_BITS_8, /* used to be 12 in some default */ + .rx_lev_trig = SSP_RX_1_OR_MORE_ELEM, + .tx_lev_trig = SSP_TX_1_OR_MORE_EMPTY_LOC, + .clk_phase = SSP_CLK_SECOND_EDGE, + .clk_pol = SSP_CLK_POL_IDLE_LOW, + .ctrl_len = SSP_BITS_12, + .wait_state = SSP_MWIRE_WAIT_ZERO, + .duplex = SSP_MICROWIRE_CHANNEL_FULL_DUPLEX, + /* + * This is where you insert a call to a function to enable CS + * (usually GPIO) for a certain chip. + */ + .cs_control = select_dummy_chip, +}; +#endif + +static struct spi_board_info u300_spi_devices[] = { +#ifdef CONFIG_MACH_U300_SPIDUMMY + { + /* A dummy chip used for loopback tests */ + .modalias = "spi-dummy", + /* Really dummy, pass in additional chip config here */ + .platform_data = NULL, + /* This defines how the controller shall handle the device */ + .controller_data = &dummy_chip_info, + /* .irq - no external IRQ routed from this device */ + .max_speed_hz = 1000000, + .bus_num = 0, /* Only one bus on this chip */ + .chip_select = 0, + /* Means SPI_CS_HIGH, change if e.g low CS */ + .mode = 0, + }, +#endif +}; + +static struct pl022_ssp_controller ssp_platform_data = { + /* If you have several SPI buses this varies, we have only bus 0 */ + .bus_id = 0, + /* Set this to 1 when we think we got DMA working */ + .enable_dma = 0, + /* + * On the APP CPU GPIO 4, 5 and 6 are connected as generic + * chip selects for SPI. (Same on U330, U335 and U365.) + * TODO: make sure the GPIO driver can select these properly + * and do padmuxing accordingly too. + */ + .num_chipselect = 3, +}; + + +void __init u300_spi_init(struct amba_device *adev) +{ + struct pmx *pmx; + + adev->dev.platform_data = &ssp_platform_data; + /* + * Setup padmuxing for SPI. Since this must always be + * compiled into the kernel, pmx is never released. + */ + pmx = pmx_get(&adev->dev, U300_APP_PMX_SPI_SETTING); + + if (IS_ERR(pmx)) + dev_warn(&adev->dev, "Could not get padmux handle\n"); + else { + int ret; + + ret = pmx_activate(&adev->dev, pmx); + if (IS_ERR_VALUE(ret)) + dev_warn(&adev->dev, "Could not activate padmuxing\n"); + } + +} +void __init u300_spi_register_board_devices(void) +{ + /* Register any SPI devices */ + spi_register_board_info(u300_spi_devices, ARRAY_SIZE(u300_spi_devices)); +} diff --git a/arch/arm/mach-u300/spi.h b/arch/arm/mach-u300/spi.h new file mode 100644 index 00000000000..bd3d867e240 --- /dev/null +++ b/arch/arm/mach-u300/spi.h @@ -0,0 +1,26 @@ +/* + * arch/arm/mach-u300/spi.h + * + * Copyright (C) 2009 ST-Ericsson AB + * License terms: GNU General Public License (GPL) version 2 + * + * Author: Linus Walleij <linus.walleij@stericsson.com> + */ +#ifndef SPI_H +#define SPI_H +#include <linux/amba/bus.h> + +#ifdef CONFIG_SPI_PL022 +void __init u300_spi_init(struct amba_device *adev); +void __init u300_spi_register_board_devices(void); +#else +/* Compile out SPI support if PL022 is not selected */ +static inline void __init u300_spi_init(struct amba_device *adev) +{ +} +static inline void __init u300_spi_register_board_devices(void) +{ +} +#endif + +#endif diff --git a/arch/arm/mach-u300/timer.c b/arch/arm/mach-u300/timer.c index cce53204880..26d26f5100f 100644 --- a/arch/arm/mach-u300/timer.c +++ b/arch/arm/mach-u300/timer.c @@ -346,6 +346,21 @@ static struct clocksource clocksource_u300_1mhz = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; +/* + * Override the global weak sched_clock symbol with this + * local implementation which uses the clocksource to get some + * better resolution when scheduling the kernel. We accept that + * this wraps around for now, since it is just a relative time + * stamp. (Inspired by OMAP implementation.) + */ +unsigned long long notrace sched_clock(void) +{ + return clocksource_cyc2ns(clocksource_u300_1mhz.read( + &clocksource_u300_1mhz), + clocksource_u300_1mhz.mult, + clocksource_u300_1mhz.shift); +} + /* * This sets up the system timers, clock source and clock event. diff --git a/arch/arm/mach-versatile/core.c b/arch/arm/mach-versatile/core.c index 975eae41ee6..e13be7c444c 100644 --- a/arch/arm/mach-versatile/core.c +++ b/arch/arm/mach-versatile/core.c @@ -27,6 +27,7 @@ #include <linux/amba/bus.h> #include <linux/amba/clcd.h> #include <linux/amba/pl061.h> +#include <linux/amba/mmci.h> #include <linux/clocksource.h> #include <linux/clockchips.h> #include <linux/cnt32_to_63.h> @@ -47,7 +48,6 @@ #include <asm/mach/irq.h> #include <asm/mach/time.h> #include <asm/mach/map.h> -#include <asm/mach/mmc.h> #include "core.h" #include "clock.h" @@ -369,7 +369,7 @@ unsigned int mmc_status(struct device *dev) return readl(VERSATILE_SYSMCI) & mask; } -static struct mmc_platform_data mmc0_plat_data = { +static struct mmci_platform_data mmc0_plat_data = { .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34, .status = mmc_status, .gpio_wp = -1, diff --git a/arch/arm/mach-versatile/versatile_pb.c b/arch/arm/mach-versatile/versatile_pb.c index 9af8d8154df..239cd30fc4f 100644 --- a/arch/arm/mach-versatile/versatile_pb.c +++ b/arch/arm/mach-versatile/versatile_pb.c @@ -24,6 +24,7 @@ #include <linux/sysdev.h> #include <linux/amba/bus.h> #include <linux/amba/pl061.h> +#include <linux/amba/mmci.h> #include <linux/io.h> #include <mach/hardware.h> @@ -31,7 +32,6 @@ #include <asm/mach-types.h> #include <asm/mach/arch.h> -#include <asm/mach/mmc.h> #include "core.h" @@ -41,7 +41,7 @@ #define IRQ_MMCI1A IRQ_SIC_MMCI1A #endif -static struct mmc_platform_data mmc1_plat_data = { +static struct mmci_platform_data mmc1_plat_data = { .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34, .status = mmc_status, .gpio_wp = -1, diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 5fe595aeba6..8d43e58f924 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -771,3 +771,8 @@ config CACHE_XSC3L2 select OUTER_CACHE help This option enables the L2 cache on XScale3. + +config ARM_L1_CACHE_SHIFT + int + default 6 if ARCH_OMAP3 + default 5 diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index cc8829d7e11..379f7855605 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -25,6 +25,19 @@ #include "fault.h" +/* + * Fault status register encodings. We steal bit 31 for our own purposes. + */ +#define FSR_LNX_PF (1 << 31) +#define FSR_WRITE (1 << 11) +#define FSR_FS4 (1 << 10) +#define FSR_FS3_0 (15) + +static inline int fsr_fs(unsigned int fsr) +{ + return (fsr & FSR_FS3_0) | (fsr & FSR_FS4) >> 6; +} + #ifdef CONFIG_MMU #ifdef CONFIG_KPROBES @@ -182,18 +195,35 @@ void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs) #define VM_FAULT_BADMAP 0x010000 #define VM_FAULT_BADACCESS 0x020000 -static int +/* + * Check that the permissions on the VMA allow for the fault which occurred. + * If we encountered a write fault, we must have write permission, otherwise + * we allow any permission. + */ +static inline bool access_error(unsigned int fsr, struct vm_area_struct *vma) +{ + unsigned int mask = VM_READ | VM_WRITE | VM_EXEC; + + if (fsr & FSR_WRITE) + mask = VM_WRITE; + if (fsr & FSR_LNX_PF) + mask = VM_EXEC; + + return vma->vm_flags & mask ? false : true; +} + +static int __kprobes __do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr, struct task_struct *tsk) { struct vm_area_struct *vma; - int fault, mask; + int fault; vma = find_vma(mm, addr); fault = VM_FAULT_BADMAP; - if (!vma) + if (unlikely(!vma)) goto out; - if (vma->vm_start > addr) + if (unlikely(vma->vm_start > addr)) goto check_stack; /* @@ -201,47 +231,24 @@ __do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr, * memory access, so we can handle it. */ good_area: - if (fsr & (1 << 11)) /* write? */ - mask = VM_WRITE; - else - mask = VM_READ|VM_EXEC|VM_WRITE; - - fault = VM_FAULT_BADACCESS; - if (!(vma->vm_flags & mask)) + if (access_error(fsr, vma)) { + fault = VM_FAULT_BADACCESS; goto out; + } /* - * If for any reason at all we couldn't handle - * the fault, make sure we exit gracefully rather - * than endlessly redo the fault. + * If for any reason at all we couldn't handle the fault, make + * sure we exit gracefully rather than endlessly redo the fault. */ -survive: - fault = handle_mm_fault(mm, vma, addr & PAGE_MASK, (fsr & (1 << 11)) ? FAULT_FLAG_WRITE : 0); - if (unlikely(fault & VM_FAULT_ERROR)) { - if (fault & VM_FAULT_OOM) - goto out_of_memory; - else if (fault & VM_FAULT_SIGBUS) - return fault; - BUG(); - } + fault = handle_mm_fault(mm, vma, addr & PAGE_MASK, (fsr & FSR_WRITE) ? FAULT_FLAG_WRITE : 0); + if (unlikely(fault & VM_FAULT_ERROR)) + return fault; if (fault & VM_FAULT_MAJOR) tsk->maj_flt++; else tsk->min_flt++; return fault; -out_of_memory: - if (!is_global_init(tsk)) - goto out; - - /* - * If we are out of memory for pid1, sleep for a while and retry - */ - up_read(&mm->mmap_sem); - yield(); - down_read(&mm->mmap_sem); - goto survive; - check_stack: if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr)) goto good_area; @@ -278,6 +285,13 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if (!user_mode(regs) && !search_exception_tables(regs->ARM_pc)) goto no_context; down_read(&mm->mmap_sem); + } else { + /* + * The above down_read_trylock() might have succeeded in + * which case, we'll have missed the might_sleep() from + * down_read() + */ + might_sleep(); } fault = __do_page_fault(mm, addr, fsr, tsk); @@ -289,6 +303,16 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | VM_FAULT_BADACCESS)))) return 0; + if (fault & VM_FAULT_OOM) { + /* + * We ran out of memory, call the OOM killer, and return to + * userspace (which will retry the fault, or kill us if we + * got oom-killed) + */ + pagefault_out_of_memory(); + return 0; + } + /* * If we are in kernel mode at this point, we * have no context to handle this fault with. @@ -296,16 +320,6 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if (!user_mode(regs)) goto no_context; - if (fault & VM_FAULT_OOM) { - /* - * We ran out of memory, or some other thing - * happened to us that made us unable to handle - * the page fault gracefully. - */ - printk("VM: killing process %s\n", tsk->comm); - do_group_exit(SIGKILL); - return 0; - } if (fault & VM_FAULT_SIGBUS) { /* * We had some memory, but were unable to @@ -489,10 +503,10 @@ hook_fault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *) asmlinkage void __exception do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { - const struct fsr_info *inf = fsr_info + (fsr & 15) + ((fsr & (1 << 10)) >> 6); + const struct fsr_info *inf = fsr_info + fsr_fs(fsr); struct siginfo info; - if (!inf->fn(addr, fsr, regs)) + if (!inf->fn(addr, fsr & ~FSR_LNX_PF, regs)) return; printk(KERN_ALERT "Unhandled fault: %s (0x%03x) at 0x%08lx\n", @@ -508,6 +522,6 @@ do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs) asmlinkage void __exception do_PrefetchAbort(unsigned long addr, struct pt_regs *regs) { - do_translation_fault(addr, 0, regs); + do_translation_fault(addr, FSR_LNX_PF, regs); } diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index f982606d7bf..877c492f8e1 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -613,6 +613,14 @@ void __init mem_init(void) void free_initmem(void) { +#ifdef CONFIG_HAVE_TCM + extern char *__tcm_start, *__tcm_end; + + totalram_pages += free_area(__phys_to_pfn(__pa(__tcm_start)), + __phys_to_pfn(__pa(__tcm_end)), + "TCM link"); +#endif + if (!machine_is_integrator() && !machine_is_cintegrator()) totalram_pages += free_area(__phys_to_pfn(__pa(__init_begin)), __phys_to_pfn(__pa(__init_end)), diff --git a/arch/arm/plat-iop/adma.c b/arch/arm/plat-iop/adma.c index 3c127aabe21..1ff6a37e893 100644 --- a/arch/arm/plat-iop/adma.c +++ b/arch/arm/plat-iop/adma.c @@ -179,7 +179,6 @@ static int __init iop3xx_adma_cap_init(void) dma_cap_set(DMA_INTERRUPT, iop3xx_dma_0_data.cap_mask); #else dma_cap_set(DMA_MEMCPY, iop3xx_dma_0_data.cap_mask); - dma_cap_set(DMA_MEMCPY_CRC32C, iop3xx_dma_0_data.cap_mask); dma_cap_set(DMA_INTERRUPT, iop3xx_dma_0_data.cap_mask); #endif @@ -188,7 +187,6 @@ static int __init iop3xx_adma_cap_init(void) dma_cap_set(DMA_INTERRUPT, iop3xx_dma_1_data.cap_mask); #else dma_cap_set(DMA_MEMCPY, iop3xx_dma_1_data.cap_mask); - dma_cap_set(DMA_MEMCPY_CRC32C, iop3xx_dma_1_data.cap_mask); dma_cap_set(DMA_INTERRUPT, iop3xx_dma_1_data.cap_mask); #endif @@ -198,7 +196,7 @@ static int __init iop3xx_adma_cap_init(void) dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask); #else dma_cap_set(DMA_XOR, iop3xx_aau_data.cap_mask); - dma_cap_set(DMA_ZERO_SUM, iop3xx_aau_data.cap_mask); + dma_cap_set(DMA_XOR_VAL, iop3xx_aau_data.cap_mask); dma_cap_set(DMA_MEMSET, iop3xx_aau_data.cap_mask); dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask); #endif diff --git a/arch/arm/plat-pxa/dma.c b/arch/arm/plat-pxa/dma.c index 70aeee407f7..2975798d411 100644 --- a/arch/arm/plat-pxa/dma.c +++ b/arch/arm/plat-pxa/dma.c @@ -17,22 +17,266 @@ #include <linux/kernel.h> #include <linux/interrupt.h> #include <linux/errno.h> +#include <linux/dma-mapping.h> #include <asm/system.h> #include <asm/irq.h> +#include <asm/memory.h> #include <mach/hardware.h> #include <mach/dma.h> +#define DMA_DEBUG_NAME "pxa_dma" +#define DMA_MAX_REQUESTERS 64 + struct dma_channel { char *name; pxa_dma_prio prio; void (*irq_handler)(int, void *); void *data; + spinlock_t lock; }; static struct dma_channel *dma_channels; static int num_dma_channels; +/* + * Debug fs + */ +#ifdef CONFIG_DEBUG_FS +#include <linux/debugfs.h> +#include <linux/uaccess.h> +#include <linux/seq_file.h> + +static struct dentry *dbgfs_root, *dbgfs_state, **dbgfs_chan; + +static int dbg_show_requester_chan(struct seq_file *s, void *p) +{ + int pos = 0; + int chan = (int)s->private; + int i; + u32 drcmr; + + pos += seq_printf(s, "DMA channel %d requesters list :\n", chan); + for (i = 0; i < DMA_MAX_REQUESTERS; i++) { + drcmr = DRCMR(i); + if ((drcmr & DRCMR_CHLNUM) == chan) + pos += seq_printf(s, "\tRequester %d (MAPVLD=%d)\n", i, + !!(drcmr & DRCMR_MAPVLD)); + } + return pos; +} + +static inline int dbg_burst_from_dcmd(u32 dcmd) +{ + int burst = (dcmd >> 16) & 0x3; + + return burst ? 4 << burst : 0; +} + +static int is_phys_valid(unsigned long addr) +{ + return pfn_valid(__phys_to_pfn(addr)); +} + +#define DCSR_STR(flag) (dcsr & DCSR_##flag ? #flag" " : "") +#define DCMD_STR(flag) (dcmd & DCMD_##flag ? #flag" " : "") + +static int dbg_show_descriptors(struct seq_file *s, void *p) +{ + int pos = 0; + int chan = (int)s->private; + int i, max_show = 20, burst, width; + u32 dcmd; + unsigned long phys_desc; + struct pxa_dma_desc *desc; + unsigned long flags; + + spin_lock_irqsave(&dma_channels[chan].lock, flags); + phys_desc = DDADR(chan); + + pos += seq_printf(s, "DMA channel %d descriptors :\n", chan); + pos += seq_printf(s, "[%03d] First descriptor unknown\n", 0); + for (i = 1; i < max_show && is_phys_valid(phys_desc); i++) { + desc = phys_to_virt(phys_desc); + dcmd = desc->dcmd; + burst = dbg_burst_from_dcmd(dcmd); + width = (1 << ((dcmd >> 14) & 0x3)) >> 1; + + pos += seq_printf(s, "[%03d] Desc at %08lx(virt %p)\n", + i, phys_desc, desc); + pos += seq_printf(s, "\tDDADR = %08x\n", desc->ddadr); + pos += seq_printf(s, "\tDSADR = %08x\n", desc->dsadr); + pos += seq_printf(s, "\tDTADR = %08x\n", desc->dtadr); + pos += seq_printf(s, "\tDCMD = %08x (%s%s%s%s%s%s%sburst=%d" + " width=%d len=%d)\n", + dcmd, + DCMD_STR(INCSRCADDR), DCMD_STR(INCTRGADDR), + DCMD_STR(FLOWSRC), DCMD_STR(FLOWTRG), + DCMD_STR(STARTIRQEN), DCMD_STR(ENDIRQEN), + DCMD_STR(ENDIAN), burst, width, + dcmd & DCMD_LENGTH); + phys_desc = desc->ddadr; + } + if (i == max_show) + pos += seq_printf(s, "[%03d] Desc at %08lx ... max display reached\n", + i, phys_desc); + else + pos += seq_printf(s, "[%03d] Desc at %08lx is %s\n", + i, phys_desc, phys_desc == DDADR_STOP ? + "DDADR_STOP" : "invalid"); + + spin_unlock_irqrestore(&dma_channels[chan].lock, flags); + return pos; +} + +static int dbg_show_chan_state(struct seq_file *s, void *p) +{ + int pos = 0; + int chan = (int)s->private; + u32 dcsr, dcmd; + int burst, width; + static char *str_prio[] = { "high", "normal", "low" }; + + dcsr = DCSR(chan); + dcmd = DCMD(chan); + burst = dbg_burst_from_dcmd(dcmd); + width = (1 << ((dcmd >> 14) & 0x3)) >> 1; + + pos += seq_printf(s, "DMA channel %d\n", chan); + pos += seq_printf(s, "\tPriority : %s\n", + str_prio[dma_channels[chan].prio]); + pos += seq_printf(s, "\tUnaligned transfer bit: %s\n", + DALGN & (1 << chan) ? "yes" : "no"); + pos += seq_printf(s, "\tDCSR = %08x (%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n", + dcsr, DCSR_STR(RUN), DCSR_STR(NODESC), + DCSR_STR(STOPIRQEN), DCSR_STR(EORIRQEN), + DCSR_STR(EORJMPEN), DCSR_STR(EORSTOPEN), + DCSR_STR(SETCMPST), DCSR_STR(CLRCMPST), + DCSR_STR(CMPST), DCSR_STR(EORINTR), DCSR_STR(REQPEND), + DCSR_STR(STOPSTATE), DCSR_STR(ENDINTR), + DCSR_STR(STARTINTR), DCSR_STR(BUSERR)); + + pos += seq_printf(s, "\tDCMD = %08x (%s%s%s%s%s%s%sburst=%d width=%d" + " len=%d)\n", + dcmd, + DCMD_STR(INCSRCADDR), DCMD_STR(INCTRGADDR), + DCMD_STR(FLOWSRC), DCMD_STR(FLOWTRG), + DCMD_STR(STARTIRQEN), DCMD_STR(ENDIRQEN), + DCMD_STR(ENDIAN), burst, width, dcmd & DCMD_LENGTH); + pos += seq_printf(s, "\tDSADR = %08x\n", DSADR(chan)); + pos += seq_printf(s, "\tDTADR = %08x\n", DTADR(chan)); + pos += seq_printf(s, "\tDDADR = %08x\n", DDADR(chan)); + return pos; +} + +static int dbg_show_state(struct seq_file *s, void *p) +{ + int pos = 0; + + /* basic device status */ + pos += seq_printf(s, "DMA engine status\n"); + pos += seq_printf(s, "\tChannel number: %d\n", num_dma_channels); + + return pos; +} + +#define DBGFS_FUNC_DECL(name) \ +static int dbg_open_##name(struct inode *inode, struct file *file) \ +{ \ + return single_open(file, dbg_show_##name, inode->i_private); \ +} \ +static const struct file_operations dbg_fops_##name = { \ + .owner = THIS_MODULE, \ + .open = dbg_open_##name, \ + .llseek = seq_lseek, \ + .read = seq_read, \ + .release = single_release, \ +} + +DBGFS_FUNC_DECL(state); +DBGFS_FUNC_DECL(chan_state); +DBGFS_FUNC_DECL(descriptors); +DBGFS_FUNC_DECL(requester_chan); + +static struct dentry *pxa_dma_dbg_alloc_chan(int ch, struct dentry *chandir) +{ + char chan_name[11]; + struct dentry *chan, *chan_state = NULL, *chan_descr = NULL; + struct dentry *chan_reqs = NULL; + void *dt; + + scnprintf(chan_name, sizeof(chan_name), "%d", ch); + chan = debugfs_create_dir(chan_name, chandir); + dt = (void *)ch; + + if (chan) + chan_state = debugfs_create_file("state", 0400, chan, dt, + &dbg_fops_chan_state); + if (chan_state) + chan_descr = debugfs_create_file("descriptors", 0400, chan, dt, + &dbg_fops_descriptors); + if (chan_descr) + chan_reqs = debugfs_create_file("requesters", 0400, chan, dt, + &dbg_fops_requester_chan); + if (!chan_reqs) + goto err_state; + + return chan; + +err_state: + debugfs_remove_recursive(chan); + return NULL; +} + +static void pxa_dma_init_debugfs(void) +{ + int i; + struct dentry *chandir; + + dbgfs_root = debugfs_create_dir(DMA_DEBUG_NAME, NULL); + if (IS_ERR(dbgfs_root) || !dbgfs_root) + goto err_root; + + dbgfs_state = debugfs_create_file("state", 0400, dbgfs_root, NULL, + &dbg_fops_state); + if (!dbgfs_state) + goto err_state; + + dbgfs_chan = kmalloc(sizeof(*dbgfs_state) * num_dma_channels, + GFP_KERNEL); + if (!dbgfs_state) + goto err_alloc; + + chandir = debugfs_create_dir("channels", dbgfs_root); + if (!chandir) + goto err_chandir; + + for (i = 0; i < num_dma_channels; i++) { + dbgfs_chan[i] = pxa_dma_dbg_alloc_chan(i, chandir); + if (!dbgfs_chan[i]) + goto err_chans; + } + + return; +err_chans: +err_chandir: + kfree(dbgfs_chan); +err_alloc: +err_state: + debugfs_remove_recursive(dbgfs_root); +err_root: + pr_err("pxa_dma: debugfs is not available\n"); +} + +static void __exit pxa_dma_cleanup_debugfs(void) +{ + debugfs_remove_recursive(dbgfs_root); +} +#else +static inline void pxa_dma_init_debugfs(void) {} +static inline void pxa_dma_cleanup_debugfs(void) {} +#endif + int pxa_request_dma (char *name, pxa_dma_prio prio, void (*irq_handler)(int, void *), void *data) @@ -71,6 +315,7 @@ int pxa_request_dma (char *name, pxa_dma_prio prio, local_irq_restore(flags); return i; } +EXPORT_SYMBOL(pxa_request_dma); void pxa_free_dma (int dma_ch) { @@ -88,24 +333,26 @@ void pxa_free_dma (int dma_ch) dma_channels[dma_ch].name = NULL; local_irq_restore(flags); } +EXPORT_SYMBOL(pxa_free_dma); static irqreturn_t dma_irq_handler(int irq, void *dev_id) { int i, dint = DINT; + struct dma_channel *channel; - for (i = 0; i < num_dma_channels; i++) { - if (dint & (1 << i)) { - struct dma_channel *channel = &dma_channels[i]; - if (channel->name && channel->irq_handler) { - channel->irq_handler(i, channel->data); - } else { - /* - * IRQ for an unregistered DMA channel: - * let's clear the interrupts and disable it. - */ - printk (KERN_WARNING "spurious IRQ for DMA channel %d\n", i); - DCSR(i) = DCSR_STARTINTR|DCSR_ENDINTR|DCSR_BUSERR; - } + while (dint) { + i = __ffs(dint); + dint &= (dint - 1); + channel = &dma_channels[i]; + if (channel->name && channel->irq_handler) { + channel->irq_handler(i, channel->data); + } else { + /* + * IRQ for an unregistered DMA channel: + * let's clear the interrupts and disable it. + */ + printk (KERN_WARNING "spurious IRQ for DMA channel %d\n", i); + DCSR(i) = DCSR_STARTINTR|DCSR_ENDINTR|DCSR_BUSERR; } } return IRQ_HANDLED; @@ -127,6 +374,7 @@ int __init pxa_init_dma(int irq, int num_ch) for (i = 0; i < num_ch; i++) { DCSR(i) = 0; dma_channels[i].prio = min((i & 0xf) >> 2, DMA_PRIO_LOW); + spin_lock_init(&dma_channels[i].lock); } ret = request_irq(irq, dma_irq_handler, IRQF_DISABLED, "DMA", NULL); @@ -135,10 +383,9 @@ int __init pxa_init_dma(int irq, int num_ch) kfree(dma_channels); return ret; } - num_dma_channels = num_ch; + + pxa_dma_init_debugfs(); + return 0; } - -EXPORT_SYMBOL(pxa_request_dma); -EXPORT_SYMBOL(pxa_free_dma); diff --git a/arch/arm/plat-pxa/include/plat/mfp.h b/arch/arm/plat-pxa/include/plat/mfp.h index 64019464c8d..22086e696e8 100644 --- a/arch/arm/plat-pxa/include/plat/mfp.h +++ b/arch/arm/plat-pxa/include/plat/mfp.h @@ -150,6 +150,74 @@ enum { MFP_PIN_GPIO125, MFP_PIN_GPIO126, MFP_PIN_GPIO127, + + MFP_PIN_GPIO128, + MFP_PIN_GPIO129, + MFP_PIN_GPIO130, + MFP_PIN_GPIO131, + MFP_PIN_GPIO132, + MFP_PIN_GPIO133, + MFP_PIN_GPIO134, + MFP_PIN_GPIO135, + MFP_PIN_GPIO136, + MFP_PIN_GPIO137, + MFP_PIN_GPIO138, + MFP_PIN_GPIO139, + MFP_PIN_GPIO140, + MFP_PIN_GPIO141, + MFP_PIN_GPIO142, + MFP_PIN_GPIO143, + MFP_PIN_GPIO144, + MFP_PIN_GPIO145, + MFP_PIN_GPIO146, + MFP_PIN_GPIO147, + MFP_PIN_GPIO148, + MFP_PIN_GPIO149, + MFP_PIN_GPIO150, + MFP_PIN_GPIO151, + MFP_PIN_GPIO152, + MFP_PIN_GPIO153, + MFP_PIN_GPIO154, + MFP_PIN_GPIO155, + MFP_PIN_GPIO156, + MFP_PIN_GPIO157, + MFP_PIN_GPIO158, + MFP_PIN_GPIO159, + MFP_PIN_GPIO160, + MFP_PIN_GPIO161, + MFP_PIN_GPIO162, + MFP_PIN_GPIO163, + MFP_PIN_GPIO164, + MFP_PIN_GPIO165, + MFP_PIN_GPIO166, + MFP_PIN_GPIO167, + MFP_PIN_GPIO168, + MFP_PIN_GPIO169, + MFP_PIN_GPIO170, + MFP_PIN_GPIO171, + MFP_PIN_GPIO172, + MFP_PIN_GPIO173, + MFP_PIN_GPIO174, + MFP_PIN_GPIO175, + MFP_PIN_GPIO176, + MFP_PIN_GPIO177, + MFP_PIN_GPIO178, + MFP_PIN_GPIO179, + MFP_PIN_GPIO180, + MFP_PIN_GPIO181, + MFP_PIN_GPIO182, + MFP_PIN_GPIO183, + MFP_PIN_GPIO184, + MFP_PIN_GPIO185, + MFP_PIN_GPIO186, + MFP_PIN_GPIO187, + MFP_PIN_GPIO188, + MFP_PIN_GPIO189, + MFP_PIN_GPIO190, + MFP_PIN_GPIO191, + + MFP_PIN_GPIO255 = 255, + MFP_PIN_GPIO0_2, MFP_PIN_GPIO1_2, MFP_PIN_GPIO2_2, @@ -325,8 +393,9 @@ typedef unsigned long mfp_cfg_t; #define MFP_PULL_LOW (0x1 << 21) #define MFP_PULL_HIGH (0x2 << 21) #define MFP_PULL_BOTH (0x3 << 21) -#define MFP_PULL_MASK (0x3 << 21) -#define MFP_PULL(x) (((x) >> 21) & 0x3) +#define MFP_PULL_FLOAT (0x4 << 21) +#define MFP_PULL_MASK (0x7 << 21) +#define MFP_PULL(x) (((x) >> 21) & 0x7) #define MFP_CFG_DEFAULT (MFP_AF0 | MFP_DS03X | MFP_LPM_DEFAULT |\ MFP_LPM_EDGE_NONE | MFP_PULL_NONE) diff --git a/arch/arm/plat-pxa/mfp.c b/arch/arm/plat-pxa/mfp.c index e716c622a17..9405d0379c8 100644 --- a/arch/arm/plat-pxa/mfp.c +++ b/arch/arm/plat-pxa/mfp.c @@ -77,11 +77,13 @@ * MFPR_PULL_LOW 1 0 1 * MFPR_PULL_HIGH 1 1 0 * MFPR_PULL_BOTH 1 1 1 + * MFPR_PULL_FLOAT 1 0 0 */ #define MFPR_PULL_NONE (0) #define MFPR_PULL_LOW (MFPR_PULL_SEL | MFPR_PULLDOWN_EN) #define MFPR_PULL_BOTH (MFPR_PULL_LOW | MFPR_PULLUP_EN) #define MFPR_PULL_HIGH (MFPR_PULL_SEL | MFPR_PULLUP_EN) +#define MFPR_PULL_FLOAT (MFPR_PULL_SEL) /* mfp_spin_lock is used to ensure that MFP register configuration * (most likely a read-modify-write operation) is atomic, and that @@ -116,6 +118,7 @@ static const unsigned long mfpr_pull[] = { MFPR_PULL_LOW, MFPR_PULL_HIGH, MFPR_PULL_BOTH, + MFPR_PULL_FLOAT, }; /* mapping of MFP_LPM_EDGE_* definitions to MFPR_EDGE_* register bits */ diff --git a/arch/arm/plat-s3c/gpio.c b/arch/arm/plat-s3c/gpio.c index 260fdc6ad68..5ff24e0f9f8 100644 --- a/arch/arm/plat-s3c/gpio.c +++ b/arch/arm/plat-s3c/gpio.c @@ -28,7 +28,7 @@ static __init void s3c_gpiolib_track(struct s3c_gpio_chip *chip) gpn = chip->chip.base; for (i = 0; i < chip->chip.ngpio; i++, gpn++) { - BUG_ON(gpn > ARRAY_SIZE(s3c_gpios)); + BUG_ON(gpn >= ARRAY_SIZE(s3c_gpios)); s3c_gpios[gpn] = chip; } } diff --git a/arch/arm/plat-s3c64xx/dma.c b/arch/arm/plat-s3c64xx/dma.c index 67aa93dbb69..266a10745a8 100644 --- a/arch/arm/plat-s3c64xx/dma.c +++ b/arch/arm/plat-s3c64xx/dma.c @@ -345,13 +345,13 @@ int s3c2410_dma_enqueue(unsigned int channel, void *id, if (!chan) return -EINVAL; - buff = kzalloc(sizeof(struct s3c64xx_dma_buff), GFP_KERNEL); + buff = kzalloc(sizeof(struct s3c64xx_dma_buff), GFP_ATOMIC); if (!buff) { printk(KERN_ERR "%s: no memory for buffer\n", __func__); return -ENOMEM; } - lli = dma_pool_alloc(dma_pool, GFP_KERNEL, &buff->lli_dma); + lli = dma_pool_alloc(dma_pool, GFP_ATOMIC, &buff->lli_dma); if (!lli) { printk(KERN_ERR "%s: no memory for lli\n", __func__); ret = -ENOMEM; @@ -697,7 +697,7 @@ static int __init s3c64xx_dma_init(void) printk(KERN_INFO "%s: Registering DMA channels\n", __func__); - dma_pool = dma_pool_create("DMA-LLI", NULL, 32, 16, 0); + dma_pool = dma_pool_create("DMA-LLI", NULL, sizeof(struct pl080s_lli), 16, 0); if (!dma_pool) { printk(KERN_ERR "%s: failed to create pool\n", __func__); return -ENOMEM; diff --git a/arch/arm/plat-s3c64xx/include/plat/dma-plat.h b/arch/arm/plat-s3c64xx/include/plat/dma-plat.h index 0c30dd98672..8f76a1e474d 100644 --- a/arch/arm/plat-s3c64xx/include/plat/dma-plat.h +++ b/arch/arm/plat-s3c64xx/include/plat/dma-plat.h @@ -26,7 +26,7 @@ struct s3c64xx_dma_buff { struct s3c64xx_dma_buff *next; void *pw; - struct pl080_lli *lli; + struct pl080s_lli *lli; dma_addr_t lli_dma; }; diff --git a/arch/arm/plat-s3c64xx/include/plat/irqs.h b/arch/arm/plat-s3c64xx/include/plat/irqs.h index 743a70094d0..7956fd3bb19 100644 --- a/arch/arm/plat-s3c64xx/include/plat/irqs.h +++ b/arch/arm/plat-s3c64xx/include/plat/irqs.h @@ -194,9 +194,17 @@ #define IRQ_EINT_GROUP(group, no) (IRQ_EINT_GROUP##group##_BASE + (no)) +/* Define a group of interrupts for board-specific use (eg, for MFD + * interrupt controllers). */ +#define IRQ_BOARD_START (IRQ_EINT_GROUP9_BASE + IRQ_EINT_GROUP9_NR + 1) + +#define IRQ_BOARD_NR 16 + +#define IRQ_BOARD_END (IRQ_BOARD_START + IRQ_BOARD_NR) + /* Set the default NR_IRQS */ -#define NR_IRQS (IRQ_EINT_GROUP9_BASE + IRQ_EINT_GROUP9_NR + 1) +#define NR_IRQS (IRQ_BOARD_END + 1) #endif /* __ASM_PLAT_S3C64XX_IRQS_H */ diff --git a/arch/arm/plat-s3c64xx/s3c6400-clock.c b/arch/arm/plat-s3c64xx/s3c6400-clock.c index febac1950d8..9745852261e 100644 --- a/arch/arm/plat-s3c64xx/s3c6400-clock.c +++ b/arch/arm/plat-s3c64xx/s3c6400-clock.c @@ -302,8 +302,8 @@ static int s3c64xx_setrate_clksrc(struct clk *clk, unsigned long rate) return -EINVAL; val = __raw_readl(reg); - val &= ~(0xf << sclk->shift); - val |= (div - 1) << sclk->shift; + val &= ~(0xf << sclk->divider_shift); + val |= (div - 1) << sclk->divider_shift; __raw_writel(val, reg); return 0; @@ -328,6 +328,8 @@ static int s3c64xx_setparent_clksrc(struct clk *clk, struct clk *parent) clksrc |= src_nr << sclk->shift; __raw_writel(clksrc, S3C_CLK_SRC); + + clk->parent = parent; return 0; } @@ -343,7 +345,7 @@ static unsigned long s3c64xx_roundrate_clksrc(struct clk *clk, if (rate > parent_rate) rate = parent_rate; else { - div = rate / parent_rate; + div = parent_rate / rate; if (div == 0) div = 1; diff --git a/arch/arm/plat-stmp3xxx/dma.c b/arch/arm/plat-stmp3xxx/dma.c index d2f497764dc..ef88f25fb87 100644 --- a/arch/arm/plat-stmp3xxx/dma.c +++ b/arch/arm/plat-stmp3xxx/dma.c @@ -264,7 +264,7 @@ int stmp3xxx_dma_make_chain(int ch, struct stmp37xx_circ_dma_chain *chain, stmp3xxx_dma_free_command(ch, &descriptors [i]); - } while (i-- >= 0); + } while (i-- > 0); } return err; } diff --git a/arch/arm/tools/mach-types b/arch/arm/tools/mach-types index c8c55b46934..94be7bb6cb9 100644 --- a/arch/arm/tools/mach-types +++ b/arch/arm/tools/mach-types @@ -12,7 +12,7 @@ # # http://www.arm.linux.org.uk/developer/machines/?action=new # -# Last update: Sat Sep 12 12:00:16 2009 +# Last update: Fri Sep 18 21:42:00 2009 # # machine_is_xxx CONFIG_xxxx MACH_TYPE_xxx number # @@ -1638,7 +1638,7 @@ mx35evb MACH_MX35EVB MX35EVB 1643 aml_m8050 MACH_AML_M8050 AML_M8050 1644 mx35_3ds MACH_MX35_3DS MX35_3DS 1645 mars MACH_MARS MARS 1646 -ntosd_644xa MACH_NTOSD_644XA NTOSD_644XA 1647 +neuros_osd2 MACH_NEUROS_OSD2 NEUROS_OSD2 1647 badger MACH_BADGER BADGER 1648 trizeps4wl MACH_TRIZEPS4WL TRIZEPS4WL 1649 trizeps5 MACH_TRIZEPS5 TRIZEPS5 1650 @@ -1654,7 +1654,7 @@ vf10xx MACH_VF10XX VF10XX 1659 zoran43xx MACH_ZORAN43XX ZORAN43XX 1660 sonix926 MACH_SONIX926 SONIX926 1661 celestialsemi MACH_CELESTIALSEMI CELESTIALSEMI 1662 -cc9m2443 MACH_CC9M2443 CC9M2443 1663 +cc9m2443js MACH_CC9M2443JS CC9M2443JS 1663 tw5334 MACH_TW5334 TW5334 1664 omap_htcartemis MACH_HTCARTEMIS HTCARTEMIS 1665 nal_hlite MACH_NAL_HLITE NAL_HLITE 1666 @@ -1802,7 +1802,7 @@ ccw9p9215js MACH_CCW9P9215JS CCW9P9215JS 1811 rd88f5181l_ge MACH_RD88F5181L_GE RD88F5181L_GE 1812 sifmain MACH_SIFMAIN SIFMAIN 1813 sam9_l9261 MACH_SAM9_L9261 SAM9_L9261 1814 -cc9m2443js MACH_CC9M2443JS CC9M2443JS 1815 +cc9m2443 MACH_CC9M2443 CC9M2443 1815 xaria300 MACH_XARIA300 XARIA300 1816 it9200 MACH_IT9200 IT9200 1817 rd88f5181l_fxo MACH_RD88F5181L_FXO RD88F5181L_FXO 1818 @@ -2409,3 +2409,15 @@ platypus MACH_PLATYPUS PLATYPUS 2422 pss2 MACH_PSS2 PSS2 2423 davinci_apm150 MACH_DAVINCI_APM150 DAVINCI_APM150 2424 str9100 MACH_STR9100 STR9100 2425 +net5big MACH_NET5BIG NET5BIG 2426 +seabed9263 MACH_SEABED9263 SEABED9263 2427 +mx51_m2id MACH_MX51_M2ID MX51_M2ID 2428 +octvocplus_eb MACH_OCTVOCPLUS_EB OCTVOCPLUS_EB 2429 +klk_firefox MACH_KLK_FIREFOX KLK_FIREFOX 2430 +klk_wirma_module MACH_KLK_WIRMA_MODULE KLK_WIRMA_MODULE 2431 +klk_wirma_mmi MACH_KLK_WIRMA_MMI KLK_WIRMA_MMI 2432 +supersonic MACH_SUPERSONIC SUPERSONIC 2433 +liberty MACH_LIBERTY LIBERTY 2434 +mh355 MACH_MH355 MH355 2435 +pc7802 MACH_PC7802 PC7802 2436 +gnet_sgc MACH_GNET_SGC GNET_SGC 2437 diff --git a/arch/blackfin/kernel/vmlinux.lds.S b/arch/blackfin/kernel/vmlinux.lds.S index 21ac7c26079..ffd90fbbc8f 100644 --- a/arch/blackfin/kernel/vmlinux.lds.S +++ b/arch/blackfin/kernel/vmlinux.lds.S @@ -96,8 +96,7 @@ SECTIONS { __sdata = .; /* This gets done first, so the glob doesn't suck it in */ - . = ALIGN(32); - *(.data.cacheline_aligned) + CACHELINE_ALIGNED_DATA(32) #if !L1_DATA_A_LENGTH . = ALIGN(32); @@ -116,12 +115,7 @@ SECTIONS DATA_DATA CONSTRUCTORS - /* make sure the init_task is aligned to the - * kernel thread size so we can locate the kernel - * stack properly and quickly. - */ - . = ALIGN(THREAD_SIZE); - *(.init_task.data) + INIT_TASK_DATA(THREAD_SIZE) __edata = .; } @@ -134,39 +128,10 @@ SECTIONS . = ALIGN(PAGE_SIZE); ___init_begin = .; - .init.text : - { - . = ALIGN(PAGE_SIZE); - __sinittext = .; - INIT_TEXT - __einittext = .; - } - .init.data : - { - . = ALIGN(16); - INIT_DATA - } - .init.setup : - { - . = ALIGN(16); - ___setup_start = .; - *(.init.setup) - ___setup_end = .; - } - .initcall.init : - { - ___initcall_start = .; - INITCALLS - ___initcall_end = .; - } - .con_initcall.init : - { - ___con_initcall_start = .; - *(.con_initcall.init) - ___con_initcall_end = .; - } + INIT_TEXT_SECTION(PAGE_SIZE) + . = ALIGN(16); + INIT_DATA_SECTION(16) PERCPU(4) - SECURITY_INIT /* we have to discard exit text and such at runtime, not link time, to * handle embedded cross-section references (alt instructions, bug @@ -181,18 +146,9 @@ SECTIONS EXIT_DATA } - .init.ramfs : - { - . = ALIGN(4); - ___initramfs_start = .; - *(.init.ramfs) - . = ALIGN(4); - ___initramfs_end = .; - } - __l1_lma_start = .; - .text_l1 L1_CODE_START : AT(LOADADDR(.init.ramfs) + SIZEOF(.init.ramfs)) + .text_l1 L1_CODE_START : AT(LOADADDR(.exit.data) + SIZEOF(.exit.data)) { . = ALIGN(4); __stext_l1 = .; diff --git a/arch/cris/arch-v10/kernel/time.c b/arch/cris/arch-v10/kernel/time.c index 2b73c7a5b64..31ca1418d5a 100644 --- a/arch/cris/arch-v10/kernel/time.c +++ b/arch/cris/arch-v10/kernel/time.c @@ -28,7 +28,6 @@ extern void update_xtime_from_cmos(void); extern int set_rtc_mmss(unsigned long nowtime); -extern int setup_irq(int, struct irqaction *); extern int have_rtc; unsigned long get_ns_in_jiffie(void) diff --git a/arch/cris/arch-v32/kernel/smp.c b/arch/cris/arch-v32/kernel/smp.c index d2a3ff8c4d3..058adddf4e4 100644 --- a/arch/cris/arch-v32/kernel/smp.c +++ b/arch/cris/arch-v32/kernel/smp.c @@ -52,8 +52,6 @@ static struct mm_struct* flush_mm; static struct vm_area_struct* flush_vma; static unsigned long flush_addr; -extern int setup_irq(int, struct irqaction *); - /* Mode registers */ static unsigned long irq_regs[NR_CPUS] = { regi_irq, diff --git a/arch/cris/arch-v32/kernel/time.c b/arch/cris/arch-v32/kernel/time.c index 65633d0dab8..b1920d8de40 100644 --- a/arch/cris/arch-v32/kernel/time.c +++ b/arch/cris/arch-v32/kernel/time.c @@ -46,7 +46,6 @@ unsigned long timer_regs[NR_CPUS] = extern void update_xtime_from_cmos(void); extern int set_rtc_mmss(unsigned long nowtime); -extern int setup_irq(int, struct irqaction *); extern int have_rtc; #ifdef CONFIG_CPU_FREQ diff --git a/arch/cris/arch-v32/mach-a3/io.c b/arch/cris/arch-v32/mach-a3/io.c index c22f67ecd9f..090ceb99ef0 100644 --- a/arch/cris/arch-v32/mach-a3/io.c +++ b/arch/cris/arch-v32/mach-a3/io.c @@ -36,7 +36,7 @@ struct crisv32_ioport crisv32_ioports[] = { }, }; -#define NBR_OF_PORTS sizeof(crisv32_ioports)/sizeof(struct crisv32_ioport) +#define NBR_OF_PORTS ARRAY_SIZE(crisv32_ioports) struct crisv32_iopin crisv32_led_net0_green; struct crisv32_iopin crisv32_led_net0_red; diff --git a/arch/cris/arch-v32/mach-fs/io.c b/arch/cris/arch-v32/mach-fs/io.c index cb6327b1f8f..a6958661fa8 100644 --- a/arch/cris/arch-v32/mach-fs/io.c +++ b/arch/cris/arch-v32/mach-fs/io.c @@ -52,7 +52,7 @@ struct crisv32_ioport crisv32_ioports[] = { } }; -#define NBR_OF_PORTS sizeof(crisv32_ioports)/sizeof(struct crisv32_ioport) +#define NBR_OF_PORTS ARRAY_SIZE(crisv32_ioports) struct crisv32_iopin crisv32_led_net0_green; struct crisv32_iopin crisv32_led_net0_red; diff --git a/arch/cris/include/arch-v10/arch/mmu.h b/arch/cris/include/arch-v10/arch/mmu.h index df84f1716e6..e829e5a37bb 100644 --- a/arch/cris/include/arch-v10/arch/mmu.h +++ b/arch/cris/include/arch-v10/arch/mmu.h @@ -33,10 +33,10 @@ typedef struct /* CRIS PTE bits (see R_TLB_LO in the register description) * - * Bit: 31-13 12-------4 3 2 1 0 - * ________________________________________________ - * | pfn | reserved | global | valid | kernel | we | - * |_____|__________|________|_______|________|_____| + * Bit: 31 30-13 12-------4 3 2 1 0 + * _______________________________________________________ + * | cache |pfn | reserved | global | valid | kernel | we | + * |_______|____|__________|________|_______|________|_____| * * (pfn = physical frame number) */ @@ -53,6 +53,7 @@ typedef struct #define _PAGE_VALID (1<<2) /* page is valid */ #define _PAGE_SILENT_READ (1<<2) /* synonym */ #define _PAGE_GLOBAL (1<<3) /* global page - context is ignored */ +#define _PAGE_NO_CACHE (1<<31) /* part of the uncached memory map */ /* Bits the HW doesn't care about but the kernel uses them in SW */ diff --git a/arch/cris/include/arch-v32/arch/mmu.h b/arch/cris/include/arch-v32/arch/mmu.h index 6bcdc3fdf7d..c1a13e05e96 100644 --- a/arch/cris/include/arch-v32/arch/mmu.h +++ b/arch/cris/include/arch-v32/arch/mmu.h @@ -28,10 +28,10 @@ typedef struct /* * CRISv32 PTE bits: * - * Bit: 31-13 12-5 4 3 2 1 0 - * +-----+------+--------+-------+--------+-------+---------+ - * | pfn | zero | global | valid | kernel | write | execute | - * +-----+------+--------+-------+--------+-------+---------+ + * Bit: 31 30-13 12-5 4 3 2 1 0 + * +-------+-----+------+--------+-------+--------+-------+---------+ + * | cache | pfn | zero | global | valid | kernel | write | execute | + * +-------+-----+------+--------+-------+--------+-------+---------+ */ /* @@ -45,6 +45,8 @@ typedef struct #define _PAGE_VALID (1 << 3) /* Page is valid. */ #define _PAGE_SILENT_READ (1 << 3) /* Same as above. */ #define _PAGE_GLOBAL (1 << 4) /* Global page. */ +#define _PAGE_NO_CACHE (1 << 31) /* part of the uncached memory map */ + /* * The hardware doesn't care about these bits, but the kernel uses them in diff --git a/arch/cris/include/asm/hardirq.h b/arch/cris/include/asm/hardirq.h index 74178adeb1c..17bb12d760b 100644 --- a/arch/cris/include/asm/hardirq.h +++ b/arch/cris/include/asm/hardirq.h @@ -2,16 +2,6 @@ #define __ASM_HARDIRQ_H #include <asm/irq.h> -#include <linux/threads.h> -#include <linux/cache.h> - -typedef struct { - unsigned int __softirq_pending; -} ____cacheline_aligned irq_cpustat_t; - -#include <linux/irq_cpustat.h> /* Standard mappings for irq_cpustat_t above */ - -void ack_bad_irq(unsigned int irq); #define HARDIRQ_BITS 8 @@ -24,4 +14,6 @@ void ack_bad_irq(unsigned int irq); # error HARDIRQ_BITS is too low! #endif +#include <asm-generic/hardirq.h> + #endif /* __ASM_HARDIRQ_H */ diff --git a/arch/cris/include/asm/pgtable.h b/arch/cris/include/asm/pgtable.h index 50aa974aa83..1fcce00f01f 100644 --- a/arch/cris/include/asm/pgtable.h +++ b/arch/cris/include/asm/pgtable.h @@ -197,6 +197,8 @@ static inline pte_t __mk_pte(void * page, pgprot_t pgprot) static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { pte_val(pte) = (pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot); return pte; } +#define pgprot_noncached(prot) __pgprot((pgprot_val(prot) | _PAGE_NO_CACHE)) + /* pte_val refers to a page in the 0x4xxxxxxx physical DRAM interval * __pte_page(pte_val) refers to the "virtual" DRAM interval diff --git a/arch/cris/kernel/irq.c b/arch/cris/kernel/irq.c index 7f642fcffbf..0ca7d9892cc 100644 --- a/arch/cris/kernel/irq.c +++ b/arch/cris/kernel/irq.c @@ -38,11 +38,6 @@ #include <asm/io.h> -void ack_bad_irq(unsigned int irq) -{ - printk("unexpected IRQ trap at vector %02x\n", irq); -} - int show_interrupts(struct seq_file *p, void *v) { int i = *(loff_t *) v, j; diff --git a/arch/cris/kernel/vmlinux.lds.S b/arch/cris/kernel/vmlinux.lds.S index 6c81836b922..bbfda67d290 100644 --- a/arch/cris/kernel/vmlinux.lds.S +++ b/arch/cris/kernel/vmlinux.lds.S @@ -51,10 +51,7 @@ SECTIONS _etext = . ; /* End of text section. */ __etext = .; - . = ALIGN(4); /* Exception table. */ - __start___ex_table = .; - __ex_table : { *(__ex_table) } - __stop___ex_table = .; + EXCEPTION_TABLE(4) RODATA @@ -67,36 +64,24 @@ SECTIONS __edata = . ; /* End of data section. */ _edata = . ; - . = ALIGN(PAGE_SIZE); /* init_task and stack, must be aligned. */ - .data.init_task : { *(.data.init_task) } + INIT_TASK_DATA_SECTION(PAGE_SIZE) . = ALIGN(PAGE_SIZE); /* Init code and data. */ __init_begin = .; - .init.text : { - _sinittext = .; - INIT_TEXT - _einittext = .; - } + INIT_TEXT_SECTION(PAGE_SIZE) .init.data : { INIT_DATA } - . = ALIGN(16); - __setup_start = .; - .init.setup : { *(.init.setup) } - __setup_end = .; + .init.setup : { INIT_SETUP(16) } #ifdef CONFIG_ETRAX_ARCH_V32 __start___param = .; __param : { *(__param) } __stop___param = .; #endif .initcall.init : { - __initcall_start = .; - INITCALLS - __initcall_end = .; + INIT_CALLS } .con_initcall.init : { - __con_initcall_start = .; - *(.con_initcall.init) - __con_initcall_end = .; + CON_INITCALL } SECURITY_INIT @@ -114,9 +99,7 @@ SECTIONS PERCPU(PAGE_SIZE) .init.ramfs : { - __initramfs_start = .; - *(.init.ramfs) - __initramfs_end = .; + INIT_RAM_FS } #endif @@ -130,11 +113,7 @@ SECTIONS __init_end = .; __data_end = . ; /* Move to _edata ? */ - __bss_start = .; /* BSS. */ - .bss : { - *(COMMON) - *(.bss) - } + BSS_SECTION(0, 0, 0) . = ALIGN (0x20); _end = .; diff --git a/arch/frv/kernel/pm.c b/arch/frv/kernel/pm.c index be722fc1acf..0d4d3e3a4cf 100644 --- a/arch/frv/kernel/pm.c +++ b/arch/frv/kernel/pm.c @@ -150,7 +150,7 @@ static int user_atoi(char __user *ubuf, size_t len) /* * Send us to sleep. */ -static int sysctl_pm_do_suspend(ctl_table *ctl, int write, struct file *filp, +static int sysctl_pm_do_suspend(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *fpos) { int retval, mode; @@ -198,13 +198,13 @@ static int try_set_cmode(int new_cmode) } -static int cmode_procctl(ctl_table *ctl, int write, struct file *filp, +static int cmode_procctl(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *fpos) { int new_cmode; if (!write) - return proc_dointvec(ctl, write, filp, buffer, lenp, fpos); + return proc_dointvec(ctl, write, buffer, lenp, fpos); new_cmode = user_atoi(buffer, *lenp); @@ -301,13 +301,13 @@ static int try_set_cm(int new_cm) return 0; } -static int p0_procctl(ctl_table *ctl, int write, struct file *filp, +static int p0_procctl(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *fpos) { int new_p0; if (!write) - return proc_dointvec(ctl, write, filp, buffer, lenp, fpos); + return proc_dointvec(ctl, write, buffer, lenp, fpos); new_p0 = user_atoi(buffer, *lenp); @@ -345,13 +345,13 @@ static int p0_sysctl(ctl_table *table, return 1; } -static int cm_procctl(ctl_table *ctl, int write, struct file *filp, +static int cm_procctl(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *fpos) { int new_cm; if (!write) - return proc_dointvec(ctl, write, filp, buffer, lenp, fpos); + return proc_dointvec(ctl, write, buffer, lenp, fpos); new_cm = user_atoi(buffer, *lenp); diff --git a/arch/frv/mb93090-mb00/Makefile b/arch/frv/mb93090-mb00/Makefile index 76595e87073..b73b542f8f4 100644 --- a/arch/frv/mb93090-mb00/Makefile +++ b/arch/frv/mb93090-mb00/Makefile @@ -11,3 +11,5 @@ else obj-y += pci-dma-nommu.o endif endif + +obj-$(CONFIG_MTD) += flash.o diff --git a/arch/frv/mb93090-mb00/flash.c b/arch/frv/mb93090-mb00/flash.c new file mode 100644 index 00000000000..c0e3707c229 --- /dev/null +++ b/arch/frv/mb93090-mb00/flash.c @@ -0,0 +1,90 @@ +/* Flash mappings for the MB93090-MB00 motherboard + * + * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include <linux/init.h> +#include <linux/platform_device.h> +#include <linux/mtd/partitions.h> +#include <linux/mtd/physmap.h> + +#define MB93090_BOOTROM_ADDR 0xFF000000 /* Boot ROM */ +#define MB93090_BOOTROM_SIZE (2 * 1024 * 1024) +#define MB93090_USERROM_ADDR 0xFF200000 /* User ROM */ +#define MB93090_USERROM_SIZE (2 * 1024 * 1024) + +/* + * default MTD partition table for both main flash devices, expected to be + * overridden by RedBoot + */ +static struct mtd_partition mb93090_partitions[] = { + { + .name = "Filesystem", + .size = MTDPART_SIZ_FULL, + .offset = 0, + } +}; + +/* + * Definition of the MB93090 Boot ROM (on the CPU card) + */ +static struct physmap_flash_data mb93090_bootrom_data = { + .width = 2, + .nr_parts = 2, + .parts = mb93090_partitions, +}; + +static struct resource mb93090_bootrom_resource = { + .start = MB93090_BOOTROM_ADDR, + .end = MB93090_BOOTROM_ADDR + MB93090_BOOTROM_SIZE - 1, + .flags = IORESOURCE_MEM, +}; + +static struct platform_device mb93090_bootrom = { + .name = "physmap-flash", + .id = 0, + .dev.platform_data = &mb93090_bootrom_data, + .num_resources = 1, + .resource = &mb93090_bootrom_resource, +}; + +/* + * Definition of the MB93090 User ROM definition (on the motherboard) + */ +static struct physmap_flash_data mb93090_userrom_data = { + .width = 2, + .nr_parts = 2, + .parts = mb93090_partitions, +}; + +static struct resource mb93090_userrom_resource = { + .start = MB93090_USERROM_ADDR, + .end = MB93090_USERROM_ADDR + MB93090_USERROM_SIZE - 1, + .flags = IORESOURCE_MEM, +}; + +static struct platform_device mb93090_userrom = { + .name = "physmap-flash", + .id = 1, + .dev.platform_data = &mb93090_userrom_data, + .num_resources = 1, + .resource = &mb93090_userrom_resource, +}; + +/* + * register the MB93090 flashes + */ +static int __init mb93090_mtd_init(void) +{ + platform_device_register(&mb93090_bootrom); + platform_device_register(&mb93090_userrom); + return 0; +} + +module_init(mb93090_mtd_init); diff --git a/arch/h8300/kernel/vmlinux.lds.S b/arch/h8300/kernel/vmlinux.lds.S index 662b02ecb86..b9e24907e6e 100644 --- a/arch/h8300/kernel/vmlinux.lds.S +++ b/arch/h8300/kernel/vmlinux.lds.S @@ -1,5 +1,6 @@ #define VMLINUX_SYMBOL(_sym_) _##_sym_ #include <asm-generic/vmlinux.lds.h> +#include <asm/page.h> /* target memory map */ #ifdef CONFIG_H8300H_GENERIC @@ -79,11 +80,8 @@ SECTIONS SCHED_TEXT LOCK_TEXT __etext = . ; - . = ALIGN(16); /* Exception table */ - ___start___ex_table = .; - *(__ex_table) - ___stop___ex_table = .; } + EXCEPTION_TABLE(16) RODATA #if defined(CONFIG_ROMKERNEL) @@ -100,8 +98,7 @@ SECTIONS __sdata = . ; ___data_start = . ; - . = ALIGN(0x2000) ; - *(.data.init_task) + INIT_TASK_DATA(0x2000) . = ALIGN(0x4) ; DATA_DATA . = ALIGN(0x4) ; @@ -114,24 +111,16 @@ SECTIONS __einittext = .; INIT_DATA . = ALIGN(0x4) ; + INIT_SETUP(0x4) ___setup_start = .; *(.init.setup) . = ALIGN(0x4) ; ___setup_end = .; - ___initcall_start = .; - INITCALLS - ___initcall_end = .; - ___con_initcall_start = .; - *(.con_initcall.init) - ___con_initcall_end = .; + INIT_CALLS + CON_INITCALL EXIT_TEXT EXIT_DATA -#if defined(CONFIG_BLK_DEV_INITRD) - . = ALIGN(4); - ___initramfs_start = .; - *(.init.ramfs) - ___initramfs_end = .; -#endif + INIT_RAM_FS . = ALIGN(0x4) ; ___init_end = .; __edata = . ; diff --git a/arch/m32r/include/asm/page.h b/arch/m32r/include/asm/page.h index 11777f7a562..725ede8f288 100644 --- a/arch/m32r/include/asm/page.h +++ b/arch/m32r/include/asm/page.h @@ -1,9 +1,11 @@ #ifndef _ASM_M32R_PAGE_H #define _ASM_M32R_PAGE_H +#include <linux/const.h> + /* PAGE_SHIFT determines the page size */ #define PAGE_SHIFT 12 -#define PAGE_SIZE (1UL << PAGE_SHIFT) +#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) #define PAGE_MASK (~(PAGE_SIZE-1)) #ifndef __ASSEMBLY__ diff --git a/arch/m32r/include/asm/processor.h b/arch/m32r/include/asm/processor.h index 1a997fc148a..8397c249989 100644 --- a/arch/m32r/include/asm/processor.h +++ b/arch/m32r/include/asm/processor.h @@ -140,8 +140,6 @@ unsigned long get_wchan(struct task_struct *p); #define KSTK_EIP(tsk) ((tsk)->thread.lr) #define KSTK_ESP(tsk) ((tsk)->thread.sp) -#define THREAD_SIZE (2*PAGE_SIZE) - #define cpu_relax() barrier() #endif /* _ASM_M32R_PROCESSOR_H */ diff --git a/arch/m32r/include/asm/thread_info.h b/arch/m32r/include/asm/thread_info.h index 71578151a40..ed240b6e8e7 100644 --- a/arch/m32r/include/asm/thread_info.h +++ b/arch/m32r/include/asm/thread_info.h @@ -55,6 +55,8 @@ struct thread_info { #define PREEMPT_ACTIVE 0x10000000 +#define THREAD_SIZE (PAGE_SIZE << 1) + /* * macros/functions for gaining access to the thread information structure */ @@ -76,8 +78,6 @@ struct thread_info { #define init_thread_info (init_thread_union.thread_info) #define init_stack (init_thread_union.stack) -#define THREAD_SIZE (2*PAGE_SIZE) - /* how to get the thread information struct from C */ static inline struct thread_info *current_thread_info(void) { @@ -125,17 +125,6 @@ static inline unsigned int get_thread_fault_code(void) return ti->flags >> TI_FLAG_FAULT_CODE_SHIFT; } -#else /* !__ASSEMBLY__ */ - -#define THREAD_SIZE 8192 - -/* how to get the thread information struct from ASM */ -#define GET_THREAD_INFO(reg) GET_THREAD_INFO reg - .macro GET_THREAD_INFO reg - ldi \reg, #-THREAD_SIZE - and \reg, sp - .endm - #endif /* diff --git a/arch/m32r/kernel/entry.S b/arch/m32r/kernel/entry.S index 612d35b082a..403869833b9 100644 --- a/arch/m32r/kernel/entry.S +++ b/arch/m32r/kernel/entry.S @@ -118,6 +118,13 @@ #define resume_kernel restore_all #endif +/* how to get the thread information struct from ASM */ +#define GET_THREAD_INFO(reg) GET_THREAD_INFO reg + .macro GET_THREAD_INFO reg + ldi \reg, #-THREAD_SIZE + and \reg, sp + .endm + ENTRY(ret_from_fork) pop r0 bl schedule_tail diff --git a/arch/m32r/kernel/head.S b/arch/m32r/kernel/head.S index 0a7194439eb..a46652dd83e 100644 --- a/arch/m32r/kernel/head.S +++ b/arch/m32r/kernel/head.S @@ -268,13 +268,13 @@ ENTRY(empty_zero_page) /*------------------------------------------------------------------------ * Stack area */ - .section .spi + .section .init.data, "aw" ALIGN .global spi_stack_top .zero 1024 spi_stack_top: - .section .spu + .section .init.data, "aw" ALIGN .global spu_stack_top .zero 1024 diff --git a/arch/m32r/kernel/vmlinux.lds.S b/arch/m32r/kernel/vmlinux.lds.S index de5e21cca6a..8ceb6181d80 100644 --- a/arch/m32r/kernel/vmlinux.lds.S +++ b/arch/m32r/kernel/vmlinux.lds.S @@ -4,6 +4,7 @@ #include <asm-generic/vmlinux.lds.h> #include <asm/addrspace.h> #include <asm/page.h> +#include <asm/thread_info.h> OUTPUT_ARCH(m32r) #if defined(__LITTLE_ENDIAN__) @@ -40,83 +41,22 @@ SECTIONS #endif _etext = .; /* End of text section */ - . = ALIGN(16); /* Exception table */ - __start___ex_table = .; - __ex_table : { *(__ex_table) } - __stop___ex_table = .; - + EXCEPTION_TABLE(16) RODATA - - /* writeable */ - .data : { /* Data */ - *(.spu) - *(.spi) - DATA_DATA - CONSTRUCTORS - } - - . = ALIGN(4096); - __nosave_begin = .; - .data_nosave : { *(.data.nosave) } - . = ALIGN(4096); - __nosave_end = .; - - . = ALIGN(32); - .data.cacheline_aligned : { *(.data.cacheline_aligned) } - + RW_DATA_SECTION(32, PAGE_SIZE, THREAD_SIZE) _edata = .; /* End of data section */ - . = ALIGN(8192); /* init_task */ - .data.init_task : { *(.data.init_task) } - /* will be freed after init */ - . = ALIGN(4096); /* Init code and data */ + . = ALIGN(PAGE_SIZE); /* Init code and data */ __init_begin = .; - .init.text : { - _sinittext = .; - INIT_TEXT - _einittext = .; - } - .init.data : { INIT_DATA } - . = ALIGN(16); - __setup_start = .; - .init.setup : { *(.init.setup) } - __setup_end = .; - __initcall_start = .; - .initcall.init : { - INITCALLS - } - __initcall_end = .; - __con_initcall_start = .; - .con_initcall.init : { *(.con_initcall.init) } - __con_initcall_end = .; - SECURITY_INIT - . = ALIGN(4); - __alt_instructions = .; - .altinstructions : { *(.altinstructions) } - __alt_instructions_end = .; - .altinstr_replacement : { *(.altinstr_replacement) } - /* .exit.text is discard at runtime, not link time, to deal with references - from .altinstructions and .eh_frame */ - .exit.text : { EXIT_TEXT } - .exit.data : { EXIT_DATA } - -#ifdef CONFIG_BLK_DEV_INITRD - . = ALIGN(4096); - __initramfs_start = .; - .init.ramfs : { *(.init.ramfs) } - __initramfs_end = .; -#endif - - PERCPU(4096) - . = ALIGN(4096); + INIT_TEXT_SECTION(PAGE_SIZE) + INIT_DATA_SECTION(16) + PERCPU(PAGE_SIZE) + . = ALIGN(PAGE_SIZE); __init_end = .; /* freed after init ends here */ - __bss_start = .; /* BSS */ - .bss : { *(.bss) } - . = ALIGN(4); - __bss_stop = .; + BSS_SECTION(0, 0, 4) _end = . ; diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index 2db722d80d4..bbd8327f189 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -6,6 +6,7 @@ mainmenu "Linux/Microblaze Kernel Configuration" config MICROBLAZE def_bool y select HAVE_LMB + select USB_ARCH_HAS_EHCI select ARCH_WANT_OPTIONAL_GPIOLIB config SWAP diff --git a/arch/microblaze/Makefile b/arch/microblaze/Makefile index 8439598d465..34187354304 100644 --- a/arch/microblaze/Makefile +++ b/arch/microblaze/Makefile @@ -37,12 +37,12 @@ CPUFLAGS-$(CONFIG_XILINX_MICROBLAZE0_USE_PCMP_INSTR) += -mxl-pattern-compare CPUFLAGS-1 += $(call cc-option,-mcpu=v$(CPU_VER)) # r31 holds current when in kernel mode -KBUILD_KERNEL += -ffixed-r31 $(CPUFLAGS-1) $(CPUFLAGS-2) +KBUILD_CFLAGS += -ffixed-r31 $(CPUFLAGS-1) $(CPUFLAGS-2) LDFLAGS := LDFLAGS_vmlinux := -LIBGCC := $(shell $(CC) $(KBUILD_KERNEL) -print-libgcc-file-name) +LIBGCC := $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name) head-y := arch/microblaze/kernel/head.o libs-y += arch/microblaze/lib/ @@ -53,22 +53,41 @@ core-y += arch/microblaze/platform/ boot := arch/microblaze/boot +# Are we making a simpleImage.<boardname> target? If so, crack out the boardname +DTB:=$(subst simpleImage.,,$(filter simpleImage.%, $(MAKECMDGOALS))) + +ifneq ($(DTB),) + core-y += $(boot)/ +endif + # defines filename extension depending memory management type ifeq ($(CONFIG_MMU),) MMU := -nommu endif -export MMU +export MMU DTB all: linux.bin +BOOT_TARGETS = linux.bin linux.bin.gz simpleImage.% + archclean: $(Q)$(MAKE) $(clean)=$(boot) -linux.bin linux.bin.gz: vmlinux +$(BOOT_TARGETS): vmlinux $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ define archhelp - echo '* linux.bin - Create raw binary' - echo ' linux.bin.gz - Create compressed raw binary' + echo '* linux.bin - Create raw binary' + echo ' linux.bin.gz - Create compressed raw binary' + echo ' simpleImage.<dt> - ELF image with $(arch)/boot/dts/<dt>.dts linked in' + echo ' - stripped elf with fdt blob + echo ' simpleImage.<dt>.unstrip - full ELF image with fdt blob' + echo ' *_defconfig - Select default config from arch/microblaze/configs' + echo '' + echo ' Targets with <dt> embed a device tree blob inside the image' + echo ' These targets support board with firmware that does not' + echo ' support passing a device tree directly. Replace <dt> with the' + echo ' name of a dts file from the arch/microblaze/boot/dts/ directory' + echo ' (minus the .dts extension).' endef diff --git a/arch/microblaze/boot/Makefile b/arch/microblaze/boot/Makefile index c2bb043a029..21f13322a4c 100644 --- a/arch/microblaze/boot/Makefile +++ b/arch/microblaze/boot/Makefile @@ -2,10 +2,24 @@ # arch/microblaze/boot/Makefile # -targets := linux.bin linux.bin.gz +obj-y += linked_dtb.o + +targets := linux.bin linux.bin.gz simpleImage.% OBJCOPYFLAGS_linux.bin := -O binary +# Where the DTS files live +dtstree := $(srctree)/$(src)/dts + +# Ensure system.dtb exists +$(obj)/linked_dtb.o: $(obj)/system.dtb + +# Generate system.dtb from $(DTB).dtb +ifneq ($(DTB),system) +$(obj)/system.dtb: $(obj)/$(DTB).dtb + $(call if_changed,cp) +endif + $(obj)/linux.bin: vmlinux FORCE [ -n $(CONFIG_INITRAMFS_SOURCE) ] && [ ! -e $(CONFIG_INITRAMFS_SOURCE) ] && \ touch $(CONFIG_INITRAMFS_SOURCE) || echo "No CPIO image" @@ -16,4 +30,27 @@ $(obj)/linux.bin.gz: $(obj)/linux.bin FORCE $(call if_changed,gzip) @echo 'Kernel: $@ is ready' ' (#'`cat .version`')' -clean-kernel += linux.bin linux.bin.gz +quiet_cmd_cp = CP $< $@$2 + cmd_cp = cat $< >$@$2 || (rm -f $@ && echo false) + +quiet_cmd_strip = STRIP $@ + cmd_strip = $(STRIP) -K _start -K _end -K __log_buf -K _fdt_start vmlinux -o $@ + +$(obj)/simpleImage.%: vmlinux FORCE + $(call if_changed,cp,.unstrip) + $(call if_changed,strip) + @echo 'Kernel: $@ is ready' ' (#'`cat .version`')' + +# Rule to build device tree blobs +DTC = $(objtree)/scripts/dtc/dtc + +# Rule to build device tree blobs +quiet_cmd_dtc = DTC $@ + cmd_dtc = $(DTC) -O dtb -o $(obj)/$*.dtb -b 0 -p 1024 $(dtstree)/$*.dts + +$(obj)/%.dtb: $(dtstree)/%.dts FORCE + $(call if_changed,dtc) + +clean-kernel += linux.bin linux.bin.gz simpleImage.* + +clean-files += *.dtb diff --git a/arch/microblaze/boot/dts/system.dts b/arch/microblaze/boot/dts/system.dts new file mode 120000 index 00000000000..7cb657892f2 --- /dev/null +++ b/arch/microblaze/boot/dts/system.dts @@ -0,0 +1 @@ +../../platform/generic/system.dts
\ No newline at end of file diff --git a/arch/microblaze/boot/linked_dtb.S b/arch/microblaze/boot/linked_dtb.S new file mode 100644 index 00000000000..cb2b537aebe --- /dev/null +++ b/arch/microblaze/boot/linked_dtb.S @@ -0,0 +1,3 @@ +.section __fdt_blob,"a" +.incbin "arch/microblaze/boot/system.dtb" + diff --git a/arch/microblaze/configs/mmu_defconfig b/arch/microblaze/configs/mmu_defconfig index 09c32962b66..bb7c374713a 100644 --- a/arch/microblaze/configs/mmu_defconfig +++ b/arch/microblaze/configs/mmu_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.31-rc6 -# Tue Aug 18 11:00:02 2009 +# Linux kernel version: 2.6.31 +# Thu Sep 24 10:28:50 2009 # CONFIG_MICROBLAZE=y # CONFIG_SWAP is not set @@ -42,11 +42,12 @@ CONFIG_SYSVIPC_SYSCTL=y # # RCU Subsystem # -CONFIG_CLASSIC_RCU=y -# CONFIG_TREE_RCU is not set -# CONFIG_PREEMPT_RCU is not set +CONFIG_TREE_RCU=y +# CONFIG_TREE_PREEMPT_RCU is not set +# CONFIG_RCU_TRACE is not set +CONFIG_RCU_FANOUT=32 +# CONFIG_RCU_FANOUT_EXACT is not set # CONFIG_TREE_RCU_TRACE is not set -# CONFIG_PREEMPT_RCU_TRACE is not set CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=17 @@ -260,6 +261,7 @@ CONFIG_DEFAULT_TCP_CONG="cubic" # CONFIG_NETFILTER is not set # CONFIG_IP_DCCP is not set # CONFIG_IP_SCTP is not set +# CONFIG_RDS is not set # CONFIG_TIPC is not set # CONFIG_ATM is not set # CONFIG_BRIDGE is not set @@ -357,12 +359,10 @@ CONFIG_NET_ETHERNET=y # CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set # CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set # CONFIG_KS8842 is not set +CONFIG_XILINX_EMACLITE=y CONFIG_NETDEV_1000=y CONFIG_NETDEV_10000=y - -# -# Wireless LAN -# +CONFIG_WLAN=y # CONFIG_WLAN_PRE80211 is not set # CONFIG_WLAN_80211 is not set @@ -460,6 +460,7 @@ CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y # CONFIG_DISPLAY_SUPPORT is not set # CONFIG_SOUND is not set # CONFIG_USB_SUPPORT is not set +CONFIG_USB_ARCH_HAS_EHCI=y # CONFIG_MMC is not set # CONFIG_MEMSTICK is not set # CONFIG_NEW_LEDS is not set @@ -488,6 +489,7 @@ CONFIG_EXT2_FS=y # CONFIG_GFS2_FS is not set # CONFIG_OCFS2_FS is not set # CONFIG_BTRFS_FS is not set +# CONFIG_NILFS2_FS is not set CONFIG_FILE_LOCKING=y CONFIG_FSNOTIFY=y # CONFIG_DNOTIFY is not set @@ -546,7 +548,6 @@ CONFIG_MISC_FILESYSTEMS=y # CONFIG_ROMFS_FS is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set -# CONFIG_NILFS2_FS is not set CONFIG_NETWORK_FILESYSTEMS=y CONFIG_NFS_FS=y CONFIG_NFS_V3=y @@ -671,18 +672,20 @@ CONFIG_DEBUG_INFO=y # CONFIG_DEBUG_LIST is not set # CONFIG_DEBUG_SG is not set # CONFIG_DEBUG_NOTIFIERS is not set +# CONFIG_DEBUG_CREDENTIALS is not set # CONFIG_BOOT_PRINTK_DELAY is not set # CONFIG_RCU_TORTURE_TEST is not set # CONFIG_RCU_CPU_STALL_DETECTOR is not set # CONFIG_BACKTRACE_SELF_TEST is not set # CONFIG_DEBUG_BLOCK_EXT_DEVT is not set +# CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set # CONFIG_FAULT_INJECTION is not set # CONFIG_SYSCTL_SYSCALL_CHECK is not set # CONFIG_PAGE_POISONING is not set # CONFIG_SAMPLES is not set # CONFIG_KMEMCHECK is not set CONFIG_EARLY_PRINTK=y -CONFIG_HEART_BEAT=y +# CONFIG_HEART_BEAT is not set CONFIG_DEBUG_BOOTMEM=y # @@ -697,7 +700,6 @@ CONFIG_CRYPTO=y # # Crypto core or helper # -# CONFIG_CRYPTO_FIPS is not set # CONFIG_CRYPTO_MANAGER is not set # CONFIG_CRYPTO_MANAGER2 is not set # CONFIG_CRYPTO_GF128MUL is not set @@ -729,11 +731,13 @@ CONFIG_CRYPTO=y # # CONFIG_CRYPTO_HMAC is not set # CONFIG_CRYPTO_XCBC is not set +# CONFIG_CRYPTO_VMAC is not set # # Digest # # CONFIG_CRYPTO_CRC32C is not set +# CONFIG_CRYPTO_GHASH is not set # CONFIG_CRYPTO_MD4 is not set # CONFIG_CRYPTO_MD5 is not set # CONFIG_CRYPTO_MICHAEL_MIC is not set diff --git a/arch/microblaze/configs/nommu_defconfig b/arch/microblaze/configs/nommu_defconfig index 8b638615a97..adb839bab70 100644 --- a/arch/microblaze/configs/nommu_defconfig +++ b/arch/microblaze/configs/nommu_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.31-rc6 -# Tue Aug 18 10:35:30 2009 +# Linux kernel version: 2.6.31 +# Thu Sep 24 10:29:43 2009 # CONFIG_MICROBLAZE=y # CONFIG_SWAP is not set @@ -44,11 +44,12 @@ CONFIG_BSD_PROCESS_ACCT_V3=y # # RCU Subsystem # -CONFIG_CLASSIC_RCU=y -# CONFIG_TREE_RCU is not set -# CONFIG_PREEMPT_RCU is not set +CONFIG_TREE_RCU=y +# CONFIG_TREE_PREEMPT_RCU is not set +# CONFIG_RCU_TRACE is not set +CONFIG_RCU_FANOUT=32 +# CONFIG_RCU_FANOUT_EXACT is not set # CONFIG_TREE_RCU_TRACE is not set -# CONFIG_PREEMPT_RCU_TRACE is not set CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=17 @@ -243,6 +244,7 @@ CONFIG_DEFAULT_TCP_CONG="cubic" # CONFIG_NETFILTER is not set # CONFIG_IP_DCCP is not set # CONFIG_IP_SCTP is not set +# CONFIG_RDS is not set # CONFIG_TIPC is not set # CONFIG_ATM is not set # CONFIG_BRIDGE is not set @@ -272,6 +274,7 @@ CONFIG_DEFAULT_TCP_CONG="cubic" # CONFIG_AF_RXRPC is not set CONFIG_WIRELESS=y # CONFIG_CFG80211 is not set +CONFIG_CFG80211_DEFAULT_PS_VALUE=0 CONFIG_WIRELESS_OLD_REGULATORY=y # CONFIG_WIRELESS_EXT is not set # CONFIG_LIB80211 is not set @@ -279,7 +282,6 @@ CONFIG_WIRELESS_OLD_REGULATORY=y # # CFG80211 needs to be enabled for MAC80211 # -CONFIG_MAC80211_DEFAULT_PS_VALUE=0 # CONFIG_WIMAX is not set # CONFIG_RFKILL is not set # CONFIG_NET_9P is not set @@ -304,6 +306,7 @@ CONFIG_MTD_PARTITIONS=y # CONFIG_MTD_TESTS is not set # CONFIG_MTD_REDBOOT_PARTS is not set CONFIG_MTD_CMDLINE_PARTS=y +# CONFIG_MTD_OF_PARTS is not set # CONFIG_MTD_AR7_PARTS is not set # @@ -349,6 +352,7 @@ CONFIG_MTD_RAM=y # # CONFIG_MTD_COMPLEX_MAPPINGS is not set # CONFIG_MTD_PHYSMAP is not set +# CONFIG_MTD_PHYSMAP_OF is not set CONFIG_MTD_UCLINUX=y # CONFIG_MTD_PLATRAM is not set @@ -429,12 +433,10 @@ CONFIG_NET_ETHERNET=y # CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set # CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set # CONFIG_KS8842 is not set +# CONFIG_XILINX_EMACLITE is not set CONFIG_NETDEV_1000=y CONFIG_NETDEV_10000=y - -# -# Wireless LAN -# +CONFIG_WLAN=y # CONFIG_WLAN_PRE80211 is not set # CONFIG_WLAN_80211 is not set @@ -535,7 +537,7 @@ CONFIG_VIDEO_OUTPUT_CONTROL=y CONFIG_USB_SUPPORT=y CONFIG_USB_ARCH_HAS_HCD=y # CONFIG_USB_ARCH_HAS_OHCI is not set -# CONFIG_USB_ARCH_HAS_EHCI is not set +CONFIG_USB_ARCH_HAS_EHCI=y # CONFIG_USB is not set # CONFIG_USB_OTG_WHITELIST is not set # CONFIG_USB_OTG_BLACKLIST_HUB is not set @@ -579,6 +581,7 @@ CONFIG_FS_POSIX_ACL=y # CONFIG_GFS2_FS is not set # CONFIG_OCFS2_FS is not set # CONFIG_BTRFS_FS is not set +# CONFIG_NILFS2_FS is not set CONFIG_FILE_LOCKING=y CONFIG_FSNOTIFY=y # CONFIG_DNOTIFY is not set @@ -639,7 +642,6 @@ CONFIG_ROMFS_BACKED_BY_BLOCK=y CONFIG_ROMFS_ON_BLOCK=y # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set -# CONFIG_NILFS2_FS is not set CONFIG_NETWORK_FILESYSTEMS=y CONFIG_NFS_FS=y CONFIG_NFS_V3=y @@ -710,18 +712,20 @@ CONFIG_DEBUG_INFO=y CONFIG_DEBUG_LIST=y CONFIG_DEBUG_SG=y # CONFIG_DEBUG_NOTIFIERS is not set +# CONFIG_DEBUG_CREDENTIALS is not set # CONFIG_BOOT_PRINTK_DELAY is not set # CONFIG_RCU_TORTURE_TEST is not set # CONFIG_RCU_CPU_STALL_DETECTOR is not set # CONFIG_BACKTRACE_SELF_TEST is not set # CONFIG_DEBUG_BLOCK_EXT_DEVT is not set +# CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set # CONFIG_FAULT_INJECTION is not set CONFIG_SYSCTL_SYSCALL_CHECK=y # CONFIG_PAGE_POISONING is not set # CONFIG_DYNAMIC_DEBUG is not set # CONFIG_SAMPLES is not set CONFIG_EARLY_PRINTK=y -CONFIG_HEART_BEAT=y +# CONFIG_HEART_BEAT is not set # CONFIG_DEBUG_BOOTMEM is not set # @@ -736,7 +740,6 @@ CONFIG_CRYPTO=y # # Crypto core or helper # -# CONFIG_CRYPTO_FIPS is not set # CONFIG_CRYPTO_MANAGER is not set # CONFIG_CRYPTO_MANAGER2 is not set # CONFIG_CRYPTO_GF128MUL is not set @@ -768,11 +771,13 @@ CONFIG_CRYPTO=y # # CONFIG_CRYPTO_HMAC is not set # CONFIG_CRYPTO_XCBC is not set +# CONFIG_CRYPTO_VMAC is not set # # Digest # # CONFIG_CRYPTO_CRC32C is not set +# CONFIG_CRYPTO_GHASH is not set # CONFIG_CRYPTO_MD4 is not set # CONFIG_CRYPTO_MD5 is not set # CONFIG_CRYPTO_MICHAEL_MIC is not set diff --git a/arch/microblaze/include/asm/asm-compat.h b/arch/microblaze/include/asm/asm-compat.h new file mode 100644 index 00000000000..e7bc9dc11b5 --- /dev/null +++ b/arch/microblaze/include/asm/asm-compat.h @@ -0,0 +1,17 @@ +#ifndef _ASM_MICROBLAZE_ASM_COMPAT_H +#define _ASM_MICROBLAZE_ASM_COMPAT_H + +#include <asm/types.h> + +#ifdef __ASSEMBLY__ +# define stringify_in_c(...) __VA_ARGS__ +# define ASM_CONST(x) x +#else +/* This version of stringify will deal with commas... */ +# define __stringify_in_c(...) #__VA_ARGS__ +# define stringify_in_c(...) __stringify_in_c(__VA_ARGS__) " " +# define __ASM_CONST(x) x##UL +# define ASM_CONST(x) __ASM_CONST(x) +#endif + +#endif /* _ASM_MICROBLAZE_ASM_COMPAT_H */ diff --git a/arch/microblaze/include/asm/io.h b/arch/microblaze/include/asm/io.h index 7c3ec13b44d..fc9997b73c0 100644 --- a/arch/microblaze/include/asm/io.h +++ b/arch/microblaze/include/asm/io.h @@ -210,6 +210,9 @@ static inline void __iomem *__ioremap(phys_addr_t address, unsigned long size, #define in_be32(a) __raw_readl((const void __iomem __force *)(a)) #define in_be16(a) __raw_readw(a) +#define writel_be(v, a) out_be32((__force unsigned *)a, v) +#define readl_be(a) in_be32((__force unsigned *)a) + /* * Little endian */ diff --git a/arch/microblaze/include/asm/ipc.h b/arch/microblaze/include/asm/ipc.h deleted file mode 100644 index a46e3d9c2a3..00000000000 --- a/arch/microblaze/include/asm/ipc.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/ipc.h> diff --git a/arch/microblaze/include/asm/page.h b/arch/microblaze/include/asm/page.h index 72aceae8868..880c988c223 100644 --- a/arch/microblaze/include/asm/page.h +++ b/arch/microblaze/include/asm/page.h @@ -17,6 +17,7 @@ #include <linux/pfn.h> #include <asm/setup.h> +#include <asm/asm-compat.h> #include <linux/const.h> #ifdef __KERNEL__ @@ -26,6 +27,8 @@ #define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) #define PAGE_MASK (~(PAGE_SIZE-1)) +#define LOAD_OFFSET ASM_CONST((CONFIG_KERNEL_START-CONFIG_KERNEL_BASE_ADDR)) + #ifndef __ASSEMBLY__ #define PAGE_UP(addr) (((addr)+((PAGE_SIZE)-1))&(~((PAGE_SIZE)-1))) diff --git a/arch/microblaze/include/asm/setup.h b/arch/microblaze/include/asm/setup.h index 27f8dafd8c3..ed67c9ed15b 100644 --- a/arch/microblaze/include/asm/setup.h +++ b/arch/microblaze/include/asm/setup.h @@ -38,7 +38,7 @@ extern void early_console_reg_tlb_alloc(unsigned int addr); void time_init(void); void init_IRQ(void); void machine_early_init(const char *cmdline, unsigned int ram, - unsigned int fdt); + unsigned int fdt, unsigned int msr); void machine_restart(char *cmd); void machine_shutdown(void); diff --git a/arch/microblaze/include/asm/syscall.h b/arch/microblaze/include/asm/syscall.h new file mode 100644 index 00000000000..048dfcd8d89 --- /dev/null +++ b/arch/microblaze/include/asm/syscall.h @@ -0,0 +1,99 @@ +#ifndef __ASM_MICROBLAZE_SYSCALL_H +#define __ASM_MICROBLAZE_SYSCALL_H + +#include <linux/kernel.h> +#include <linux/sched.h> +#include <asm/ptrace.h> + +/* The system call number is given by the user in R12 */ +static inline long syscall_get_nr(struct task_struct *task, + struct pt_regs *regs) +{ + return regs->r12; +} + +static inline void syscall_rollback(struct task_struct *task, + struct pt_regs *regs) +{ + /* TODO. */ +} + +static inline long syscall_get_error(struct task_struct *task, + struct pt_regs *regs) +{ + return IS_ERR_VALUE(regs->r3) ? regs->r3 : 0; +} + +static inline long syscall_get_return_value(struct task_struct *task, + struct pt_regs *regs) +{ + return regs->r3; +} + +static inline void syscall_set_return_value(struct task_struct *task, + struct pt_regs *regs, + int error, long val) +{ + if (error) + regs->r3 = -error; + else + regs->r3 = val; +} + +static inline microblaze_reg_t microblaze_get_syscall_arg(struct pt_regs *regs, + unsigned int n) +{ + switch (n) { + case 5: return regs->r10; + case 4: return regs->r9; + case 3: return regs->r8; + case 2: return regs->r7; + case 1: return regs->r6; + case 0: return regs->r5; + default: + BUG(); + } + return ~0; +} + +static inline void microblaze_set_syscall_arg(struct pt_regs *regs, + unsigned int n, + unsigned long val) +{ + switch (n) { + case 5: + regs->r10 = val; + case 4: + regs->r9 = val; + case 3: + regs->r8 = val; + case 2: + regs->r7 = val; + case 1: + regs->r6 = val; + case 0: + regs->r5 = val; + default: + BUG(); + } +} + +static inline void syscall_get_arguments(struct task_struct *task, + struct pt_regs *regs, + unsigned int i, unsigned int n, + unsigned long *args) +{ + while (n--) + *args++ = microblaze_get_syscall_arg(regs, i++); +} + +static inline void syscall_set_arguments(struct task_struct *task, + struct pt_regs *regs, + unsigned int i, unsigned int n, + const unsigned long *args) +{ + while (n--) + microblaze_set_syscall_arg(regs, i++, *args++); +} + +#endif /* __ASM_MICROBLAZE_SYSCALL_H */ diff --git a/arch/microblaze/kernel/cpu/cpuinfo.c b/arch/microblaze/kernel/cpu/cpuinfo.c index c411c6757de..3539babc1c1 100644 --- a/arch/microblaze/kernel/cpu/cpuinfo.c +++ b/arch/microblaze/kernel/cpu/cpuinfo.c @@ -28,6 +28,7 @@ const struct cpu_ver_key cpu_ver_lookup[] = { {"7.10.d", 0x0b}, {"7.20.a", 0x0c}, {"7.20.b", 0x0d}, + {"7.20.c", 0x0e}, /* FIXME There is no keycode defined in MBV for these versions */ {"2.10.a", 0x10}, {"3.00.a", 0x20}, @@ -49,6 +50,8 @@ const struct family_string_key family_string_lookup[] = { {"spartan3a", 0xa}, {"spartan3an", 0xb}, {"spartan3adsp", 0xc}, + {"spartan6", 0xd}, + {"virtex6", 0xe}, /* FIXME There is no key code defined for spartan2 */ {"spartan2", 0xf0}, {NULL, 0}, diff --git a/arch/microblaze/kernel/entry.S b/arch/microblaze/kernel/entry.S index c7353e79f4a..acc1f05d1e2 100644 --- a/arch/microblaze/kernel/entry.S +++ b/arch/microblaze/kernel/entry.S @@ -308,38 +308,69 @@ C_ENTRY(_user_exception): swi r12, r1, PTO+PT_R0; tovirt(r1,r1) - la r15, r0, ret_from_trap-8 /* where the trap should return need -8 to adjust for rtsd r15, 8*/ /* Jump to the appropriate function for the system call number in r12 * (r12 is not preserved), or return an error if r12 is not valid. The LP * register should point to the location where * the called function should return. [note that MAKE_SYS_CALL uses label 1] */ - /* See if the system call number is valid. */ + + # Step into virtual mode. + set_vms; + addik r11, r0, 3f + rtid r11, 0 + nop +3: + add r11, r0, CURRENT_TASK /* Get current task ptr into r11 */ + lwi r11, r11, TS_THREAD_INFO /* get thread info */ + lwi r11, r11, TI_FLAGS /* get flags in thread info */ + andi r11, r11, _TIF_WORK_SYSCALL_MASK + beqi r11, 4f + + addik r3, r0, -ENOSYS + swi r3, r1, PTO + PT_R3 + brlid r15, do_syscall_trace_enter + addik r5, r1, PTO + PT_R0 + + # do_syscall_trace_enter returns the new syscall nr. + addk r12, r0, r3 + lwi r5, r1, PTO+PT_R5; + lwi r6, r1, PTO+PT_R6; + lwi r7, r1, PTO+PT_R7; + lwi r8, r1, PTO+PT_R8; + lwi r9, r1, PTO+PT_R9; + lwi r10, r1, PTO+PT_R10; +4: +/* Jump to the appropriate function for the system call number in r12 + * (r12 is not preserved), or return an error if r12 is not valid. + * The LP register should point to the location where the called function + * should return. [note that MAKE_SYS_CALL uses label 1] */ + /* See if the system call number is valid */ addi r11, r12, -__NR_syscalls; - bgei r11,1f; + bgei r11,5f; /* Figure out which function to use for this system call. */ /* Note Microblaze barrel shift is optional, so don't rely on it */ add r12, r12, r12; /* convert num -> ptr */ add r12, r12, r12; /* Trac syscalls and stored them to r0_ram */ - lwi r3, r12, 0x400 + TOPHYS(r0_ram) + lwi r3, r12, 0x400 + r0_ram addi r3, r3, 1 - swi r3, r12, 0x400 + TOPHYS(r0_ram) + swi r3, r12, 0x400 + r0_ram + + # Find and jump into the syscall handler. + lwi r12, r12, sys_call_table + /* where the trap should return need -8 to adjust for rtsd r15, 8 */ + la r15, r0, ret_from_trap-8 + bra r12 - lwi r12, r12, TOPHYS(sys_call_table); /* Function ptr */ - /* Make the system call. to r12*/ - set_vms; - rtid r12, 0; - nop; /* The syscall number is invalid, return an error. */ -1: VM_ON; /* RETURN() expects virtual mode*/ +5: addi r3, r0, -ENOSYS; rtsd r15,8; /* looks like a normal subroutine return */ or r0, r0, r0 -/* Entry point used to return from a syscall/trap. */ +/* Entry point used to return from a syscall/trap */ /* We re-enable BIP bit before state restore */ C_ENTRY(ret_from_trap): set_bip; /* Ints masked for state restore*/ @@ -349,6 +380,23 @@ C_ENTRY(ret_from_trap): /* We're returning to user mode, so check for various conditions that * trigger rescheduling. */ + # FIXME: Restructure all these flag checks. + add r11, r0, CURRENT_TASK; /* Get current task ptr into r11 */ + lwi r11, r11, TS_THREAD_INFO; /* get thread info */ + lwi r11, r11, TI_FLAGS; /* get flags in thread info */ + andi r11, r11, _TIF_WORK_SYSCALL_MASK + beqi r11, 1f + + swi r3, r1, PTO + PT_R3 + swi r4, r1, PTO + PT_R4 + brlid r15, do_syscall_trace_leave + addik r5, r1, PTO + PT_R0 + lwi r3, r1, PTO + PT_R3 + lwi r4, r1, PTO + PT_R4 +1: + + /* We're returning to user mode, so check for various conditions that + * trigger rescheduling. */ /* Get current task ptr into r11 */ add r11, r0, CURRENT_TASK; /* Get current task ptr into r11 */ lwi r11, r11, TS_THREAD_INFO; /* get thread info */ diff --git a/arch/microblaze/kernel/exceptions.c b/arch/microblaze/kernel/exceptions.c index 0cb64a31e89..d9f70f83097 100644 --- a/arch/microblaze/kernel/exceptions.c +++ b/arch/microblaze/kernel/exceptions.c @@ -72,7 +72,8 @@ asmlinkage void full_exception(struct pt_regs *regs, unsigned int type, #endif #if 0 - printk(KERN_WARNING "Exception %02x in %s mode, FSR=%08x PC=%08x ESR=%08x\n", + printk(KERN_WARNING "Exception %02x in %s mode, FSR=%08x PC=%08x " \ + "ESR=%08x\n", type, user_mode(regs) ? "user" : "kernel", fsr, (unsigned int) regs->pc, (unsigned int) regs->esr); #endif @@ -80,42 +81,50 @@ asmlinkage void full_exception(struct pt_regs *regs, unsigned int type, switch (type & 0x1F) { case MICROBLAZE_ILL_OPCODE_EXCEPTION: if (user_mode(regs)) { - printk(KERN_WARNING "Illegal opcode exception in user mode.\n"); + pr_debug(KERN_WARNING "Illegal opcode exception " \ + "in user mode.\n"); _exception(SIGILL, regs, ILL_ILLOPC, addr); return; } - printk(KERN_WARNING "Illegal opcode exception in kernel mode.\n"); + printk(KERN_WARNING "Illegal opcode exception " \ + "in kernel mode.\n"); die("opcode exception", regs, SIGBUS); break; case MICROBLAZE_IBUS_EXCEPTION: if (user_mode(regs)) { - printk(KERN_WARNING "Instruction bus error exception in user mode.\n"); + pr_debug(KERN_WARNING "Instruction bus error " \ + "exception in user mode.\n"); _exception(SIGBUS, regs, BUS_ADRERR, addr); return; } - printk(KERN_WARNING "Instruction bus error exception in kernel mode.\n"); + printk(KERN_WARNING "Instruction bus error exception " \ + "in kernel mode.\n"); die("bus exception", regs, SIGBUS); break; case MICROBLAZE_DBUS_EXCEPTION: if (user_mode(regs)) { - printk(KERN_WARNING "Data bus error exception in user mode.\n"); + pr_debug(KERN_WARNING "Data bus error exception " \ + "in user mode.\n"); _exception(SIGBUS, regs, BUS_ADRERR, addr); return; } - printk(KERN_WARNING "Data bus error exception in kernel mode.\n"); + printk(KERN_WARNING "Data bus error exception " \ + "in kernel mode.\n"); die("bus exception", regs, SIGBUS); break; case MICROBLAZE_DIV_ZERO_EXCEPTION: if (user_mode(regs)) { - printk(KERN_WARNING "Divide by zero exception in user mode\n"); - _exception(SIGILL, regs, ILL_ILLOPC, addr); + pr_debug(KERN_WARNING "Divide by zero exception " \ + "in user mode\n"); + _exception(SIGILL, regs, FPE_INTDIV, addr); return; } - printk(KERN_WARNING "Divide by zero exception in kernel mode.\n"); + printk(KERN_WARNING "Divide by zero exception " \ + "in kernel mode.\n"); die("Divide by exception", regs, SIGBUS); break; case MICROBLAZE_FPU_EXCEPTION: - printk(KERN_WARNING "FPU exception\n"); + pr_debug(KERN_WARNING "FPU exception\n"); /* IEEE FP exception */ /* I removed fsr variable and use code var for storing fsr */ if (fsr & FSR_IO) @@ -133,7 +142,7 @@ asmlinkage void full_exception(struct pt_regs *regs, unsigned int type, #ifdef CONFIG_MMU case MICROBLAZE_PRIVILEGED_EXCEPTION: - printk(KERN_WARNING "Privileged exception\n"); + pr_debug(KERN_WARNING "Privileged exception\n"); /* "brk r0,r0" - used as debug breakpoint */ if (get_user(code, (unsigned long *)regs->pc) == 0 && code == 0x980c0000) { diff --git a/arch/microblaze/kernel/head.S b/arch/microblaze/kernel/head.S index e41c6ce2a7b..697ce3007f3 100644 --- a/arch/microblaze/kernel/head.S +++ b/arch/microblaze/kernel/head.S @@ -54,6 +54,16 @@ ENTRY(_start) mfs r1, rmsr andi r1, r1, ~2 mts rmsr, r1 +/* + * Here is checking mechanism which check if Microblaze has msr instructions + * We load msr and compare it with previous r1 value - if is the same, + * msr instructions works if not - cpu don't have them. + */ + /* r8=0 - I have msr instr, 1 - I don't have them */ + rsubi r0, r0, 1 /* set the carry bit */ + msrclr r0, 0x4 /* try to clear it */ + /* read the carry bit, r8 will be '0' if msrclr exists */ + addik r8, r0, 0 /* r7 may point to an FDT, or there may be one linked in. if it's in r7, we've got to save it away ASAP. @@ -209,8 +219,8 @@ start_here: * Please see $(ARCH)/mach-$(SUBARCH)/setup.c for * the function. */ - la r8, r0, machine_early_init - brald r15, r8 + la r9, r0, machine_early_init + brald r15, r9 nop #ifndef CONFIG_MMU diff --git a/arch/microblaze/kernel/hw_exception_handler.S b/arch/microblaze/kernel/hw_exception_handler.S index 3288c973767..6b0288ebccd 100644 --- a/arch/microblaze/kernel/hw_exception_handler.S +++ b/arch/microblaze/kernel/hw_exception_handler.S @@ -84,9 +84,10 @@ #define NUM_TO_REG(num) r ## num #ifdef CONFIG_MMU -/* FIXME you can't change first load of MSR because there is - * hardcoded jump bri 4 */ #define RESTORE_STATE \ + lwi r5, r1, 0; \ + mts rmsr, r5; \ + nop; \ lwi r3, r1, PT_R3; \ lwi r4, r1, PT_R4; \ lwi r5, r1, PT_R5; \ @@ -309,6 +310,9 @@ _hw_exception_handler: lwi r31, r0, TOPHYS(PER_CPU(CURRENT_SAVE)) /* get saved current */ #endif + mfs r5, rmsr; + nop + swi r5, r1, 0; mfs r3, resr nop mfs r4, rear; @@ -380,6 +384,8 @@ handle_other_ex: /* Handle Other exceptions here */ addk r8, r17, r0; /* Load exception address */ bralid r15, full_exception; /* Branch to the handler */ nop; + mts r0, rfsr; /* Clear sticky fsr */ + nop /* * Trigger execution of the signal handler by enabling diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c index 00b12c6d532..4201c743cc9 100644 --- a/arch/microblaze/kernel/process.c +++ b/arch/microblaze/kernel/process.c @@ -235,6 +235,7 @@ void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long usp) regs->pc = pc; regs->r1 = usp; regs->pt_mode = 0; + regs->msr |= MSR_UMS; } #ifdef CONFIG_MMU diff --git a/arch/microblaze/kernel/ptrace.c b/arch/microblaze/kernel/ptrace.c index 53ff39af6a5..4b3ac32754d 100644 --- a/arch/microblaze/kernel/ptrace.c +++ b/arch/microblaze/kernel/ptrace.c @@ -29,6 +29,10 @@ #include <linux/sched.h> #include <linux/ptrace.h> #include <linux/signal.h> +#include <linux/elf.h> +#include <linux/audit.h> +#include <linux/seccomp.h> +#include <linux/tracehook.h> #include <linux/errno.h> #include <asm/processor.h> @@ -174,6 +178,64 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) return rval; } +asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) +{ + long ret = 0; + + secure_computing(regs->r12); + + if (test_thread_flag(TIF_SYSCALL_TRACE) && + tracehook_report_syscall_entry(regs)) + /* + * Tracing decided this syscall should not happen. + * We'll return a bogus call number to get an ENOSYS + * error, but leave the original number in regs->regs[0]. + */ + ret = -1L; + + if (unlikely(current->audit_context)) + audit_syscall_entry(EM_XILINX_MICROBLAZE, regs->r12, + regs->r5, regs->r6, + regs->r7, regs->r8); + + return ret ?: regs->r12; +} + +asmlinkage void do_syscall_trace_leave(struct pt_regs *regs) +{ + int step; + + if (unlikely(current->audit_context)) + audit_syscall_exit(AUDITSC_RESULT(regs->r3), regs->r3); + + step = test_thread_flag(TIF_SINGLESTEP); + if (step || test_thread_flag(TIF_SYSCALL_TRACE)) + tracehook_report_syscall_exit(regs, step); +} + +#if 0 +static asmlinkage void syscall_trace(void) +{ + if (!test_thread_flag(TIF_SYSCALL_TRACE)) + return; + if (!(current->ptrace & PT_PTRACED)) + return; + /* The 0x80 provides a way for the tracing parent to distinguish + between a syscall stop and SIGTRAP delivery */ + ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) + ? 0x80 : 0)); + /* + * this isn't the same as continuing with a signal, but it will do + * for normal use. strace only continues with a signal if the + * stopping signal is not SIGTRAP. -brl + */ + if (current->exit_code) { + send_sig(current->exit_code, current, 1); + current->exit_code = 0; + } +} +#endif + void ptrace_disable(struct task_struct *child) { /* nothing to do */ diff --git a/arch/microblaze/kernel/setup.c b/arch/microblaze/kernel/setup.c index 2a97bf513b6..8c1e0f4dcf1 100644 --- a/arch/microblaze/kernel/setup.c +++ b/arch/microblaze/kernel/setup.c @@ -94,7 +94,7 @@ inline unsigned get_romfs_len(unsigned *addr) #endif /* CONFIG_MTD_UCLINUX_EBSS */ void __init machine_early_init(const char *cmdline, unsigned int ram, - unsigned int fdt) + unsigned int fdt, unsigned int msr) { unsigned long *src, *dst = (unsigned long *)0x0; @@ -157,6 +157,16 @@ void __init machine_early_init(const char *cmdline, unsigned int ram, early_printk("New klimit: 0x%08x\n", (unsigned)klimit); #endif +#if CONFIG_XILINX_MICROBLAZE0_USE_MSR_INSTR + if (msr) + early_printk("!!!Your kernel has setup MSR instruction but " + "CPU don't have it %d\n", msr); +#else + if (!msr) + early_printk("!!!Your kernel not setup MSR instruction but " + "CPU have it %d\n", msr); +#endif + for (src = __ivt_start; src < __ivt_end; src++, dst++) *dst = *src; diff --git a/arch/microblaze/kernel/vmlinux.lds.S b/arch/microblaze/kernel/vmlinux.lds.S index ec5fa91a48d..e704188d785 100644 --- a/arch/microblaze/kernel/vmlinux.lds.S +++ b/arch/microblaze/kernel/vmlinux.lds.S @@ -12,13 +12,16 @@ OUTPUT_FORMAT("elf32-microblaze", "elf32-microblaze", "elf32-microblaze") OUTPUT_ARCH(microblaze) ENTRY(_start) +#include <asm/page.h> #include <asm-generic/vmlinux.lds.h> +#include <asm/thread_info.h> jiffies = jiffies_64 + 4; SECTIONS { . = CONFIG_KERNEL_START; - .text : { + _start = CONFIG_KERNEL_BASE_ADDR; + .text : AT(ADDR(.text) - LOAD_OFFSET) { _text = . ; _stext = . ; *(.text .text.*) @@ -33,24 +36,22 @@ SECTIONS { } . = ALIGN (4) ; - _fdt_start = . ; /* place for fdt blob */ - . = . + 0x4000; - _fdt_end = . ; + __fdt_blob : AT(ADDR(__fdt_blob) - LOAD_OFFSET) { + _fdt_start = . ; /* place for fdt blob */ + *(__fdt_blob) ; /* Any link-placed DTB */ + . = _fdt_start + 0x4000; /* Pad up to 16kbyte */ + _fdt_end = . ; + } . = ALIGN(16); RODATA - . = ALIGN(16); - __ex_table : { - __start___ex_table = .; - *(__ex_table) - __stop___ex_table = .; - } + EXCEPTION_TABLE(16) /* * sdata2 section can go anywhere, but must be word aligned * and SDA2_BASE must point to the middle of it */ - .sdata2 : { + .sdata2 : AT(ADDR(.sdata2) - LOAD_OFFSET) { _ssrw = .; . = ALIGN(4096); /* page aligned when MMU used - origin 0x8 */ *(.sdata2) @@ -61,12 +62,7 @@ SECTIONS { } _sdata = . ; - .data ALIGN (4096) : { /* page aligned when MMU used - origin 0x4 */ - DATA_DATA - CONSTRUCTORS - } - . = ALIGN(32); - .data.cacheline_aligned : { *(.data.cacheline_aligned) } + RW_DATA_SECTION(32, PAGE_SIZE, THREAD_SIZE) _edata = . ; /* Reserve some low RAM for r0 based memory references */ @@ -74,18 +70,14 @@ SECTIONS { r0_ram = . ; . = . + 4096; /* a page should be enough */ - /* The initial task */ - . = ALIGN(8192); - .data.init_task : { *(.data.init_task) } - /* Under the microblaze ABI, .sdata and .sbss must be contiguous */ . = ALIGN(8); - .sdata : { + .sdata : AT(ADDR(.sdata) - LOAD_OFFSET) { _ssro = .; *(.sdata) } - .sbss : { + .sbss : AT(ADDR(.sbss) - LOAD_OFFSET) { _ssbss = .; *(.sbss) _esbss = .; @@ -96,47 +88,36 @@ SECTIONS { __init_begin = .; - . = ALIGN(4096); - .init.text : { - _sinittext = . ; - INIT_TEXT - _einittext = .; - } + INIT_TEXT_SECTION(PAGE_SIZE) - .init.data : { + .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { INIT_DATA } . = ALIGN(4); - .init.ivt : { + .init.ivt : AT(ADDR(.init.ivt) - LOAD_OFFSET) { __ivt_start = .; *(.init.ivt) __ivt_end = .; } - .init.setup : { - __setup_start = .; - *(.init.setup) - __setup_end = .; + .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { + INIT_SETUP(0) } - .initcall.init : { - __initcall_start = .; - INITCALLS - __initcall_end = .; + .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET ) { + INIT_CALLS } - .con_initcall.init : { - __con_initcall_start = .; - *(.con_initcall.init) - __con_initcall_end = .; + .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) { + CON_INITCALL } SECURITY_INIT __init_end_before_initramfs = .; - .init.ramfs ALIGN(4096) : { + .init.ramfs ALIGN(4096) : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { __initramfs_start = .; *(.init.ramfs) __initramfs_end = .; @@ -152,7 +133,8 @@ SECTIONS { } __init_end = .; - .bss ALIGN (4096) : { /* page aligned when MMU used */ + .bss ALIGN (4096) : AT(ADDR(.bss) - LOAD_OFFSET) { + /* page aligned when MMU used */ __bss_start = . ; *(.bss*) *(COMMON) diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c index 1110784eb3f..a44892e7cd5 100644 --- a/arch/microblaze/mm/init.c +++ b/arch/microblaze/mm/init.c @@ -180,7 +180,8 @@ void free_initrd_mem(unsigned long start, unsigned long end) totalram_pages++; pages++; } - printk(KERN_NOTICE "Freeing initrd memory: %dk freed\n", pages); + printk(KERN_NOTICE "Freeing initrd memory: %dk freed\n", + (int)(pages * (PAGE_SIZE / 1024))); } #endif diff --git a/arch/mips/lasat/sysctl.c b/arch/mips/lasat/sysctl.c index 3f04d4c406b..b3deed8db61 100644 --- a/arch/mips/lasat/sysctl.c +++ b/arch/mips/lasat/sysctl.c @@ -56,12 +56,12 @@ int sysctl_lasatstring(ctl_table *table, /* And the same for proc */ -int proc_dolasatstring(ctl_table *table, int write, struct file *filp, +int proc_dolasatstring(ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int r; - r = proc_dostring(table, write, filp, buffer, lenp, ppos); + r = proc_dostring(table, write, buffer, lenp, ppos); if ((!write) || r) return r; @@ -71,12 +71,12 @@ int proc_dolasatstring(ctl_table *table, int write, struct file *filp, } /* proc function to write EEPROM after changing int entry */ -int proc_dolasatint(ctl_table *table, int write, struct file *filp, +int proc_dolasatint(ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int r; - r = proc_dointvec(table, write, filp, buffer, lenp, ppos); + r = proc_dointvec(table, write, buffer, lenp, ppos); if ((!write) || r) return r; @@ -89,7 +89,7 @@ int proc_dolasatint(ctl_table *table, int write, struct file *filp, static int rtctmp; /* proc function to read/write RealTime Clock */ -int proc_dolasatrtc(ctl_table *table, int write, struct file *filp, +int proc_dolasatrtc(ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct timespec ts; @@ -102,7 +102,7 @@ int proc_dolasatrtc(ctl_table *table, int write, struct file *filp, if (rtctmp < 0) rtctmp = 0; } - r = proc_dointvec(table, write, filp, buffer, lenp, ppos); + r = proc_dointvec(table, write, buffer, lenp, ppos); if (r) return r; @@ -154,7 +154,7 @@ int sysctl_lasat_rtc(ctl_table *table, #endif #ifdef CONFIG_INET -int proc_lasat_ip(ctl_table *table, int write, struct file *filp, +int proc_lasat_ip(ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { unsigned int ip; @@ -231,12 +231,12 @@ static int sysctl_lasat_prid(ctl_table *table, return 0; } -int proc_lasat_prid(ctl_table *table, int write, struct file *filp, +int proc_lasat_prid(ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int r; - r = proc_dointvec(table, write, filp, buffer, lenp, ppos); + r = proc_dointvec(table, write, buffer, lenp, ppos); if (r < 0) return r; if (write) { diff --git a/arch/mn10300/kernel/vmlinux.lds.S b/arch/mn10300/kernel/vmlinux.lds.S index 76f41bdb79c..10549dcfb61 100644 --- a/arch/mn10300/kernel/vmlinux.lds.S +++ b/arch/mn10300/kernel/vmlinux.lds.S @@ -44,24 +44,8 @@ SECTIONS RO_DATA(PAGE_SIZE) /* writeable */ - .data : { /* Data */ - DATA_DATA - CONSTRUCTORS - } - - .data_nosave : { NOSAVE_DATA; } - - .data.page_aligned : { PAGE_ALIGNED_DATA(PAGE_SIZE); } - .data.cacheline_aligned : { CACHELINE_ALIGNED_DATA(32); } - - /* rarely changed data like cpu maps */ - . = ALIGN(32); - .data.read_mostly : AT(ADDR(.data.read_mostly)) { - READ_MOSTLY_DATA(32); - _edata = .; /* End of data section */ - } - - .data.init_task : { INIT_TASK_DATA(THREAD_SIZE); } + RW_DATA_SECTION(32, PAGE_SIZE, THREAD_SIZE) + _edata = .; /* might get freed after init */ . = ALIGN(PAGE_SIZE); @@ -74,22 +58,8 @@ SECTIONS /* will be freed after init */ . = ALIGN(PAGE_SIZE); /* Init code and data */ __init_begin = .; - .init.text : { - _sinittext = .; - INIT_TEXT; - _einittext = .; - } - .init.data : { INIT_DATA; } - .setup.init : { INIT_SETUP(16); } - - __initcall_start = .; - .initcall.init : { - INITCALLS - } - __initcall_end = .; - .con_initcall.init : { CON_INITCALL; } - - SECURITY_INIT + INIT_TEXT_SECTION(PAGE_SIZE) + INIT_DATA_SECTION(16) . = ALIGN(4); __alt_instructions = .; .altinstructions : { *(.altinstructions) } @@ -100,8 +70,6 @@ SECTIONS .exit.text : { EXIT_TEXT; } .exit.data : { EXIT_DATA; } - .init.ramfs : { INIT_RAM_FS; } - PERCPU(32) . = ALIGN(PAGE_SIZE); __init_end = .; diff --git a/arch/parisc/include/asm/fcntl.h b/arch/parisc/include/asm/fcntl.h index 1e1c824764e..5f39d5597ce 100644 --- a/arch/parisc/include/asm/fcntl.h +++ b/arch/parisc/include/asm/fcntl.h @@ -28,6 +28,8 @@ #define F_SETOWN 12 /* for sockets. */ #define F_SETSIG 13 /* for sockets. */ #define F_GETSIG 14 /* for sockets. */ +#define F_GETOWN_EX 15 +#define F_SETOWN_EX 16 /* for posix fcntl() and lockf() */ #define F_RDLCK 01 diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S index aea1784edbd..775be2791bc 100644 --- a/arch/parisc/kernel/vmlinux.lds.S +++ b/arch/parisc/kernel/vmlinux.lds.S @@ -77,13 +77,7 @@ SECTIONS */ . = ALIGN(PAGE_SIZE); data_start = .; - . = ALIGN(16); - /* Exception table */ - __ex_table : { - __start___ex_table = .; - *(__ex_table) - __stop___ex_table = .; - } + EXCEPTION_TABLE(16) NOTES @@ -94,23 +88,8 @@ SECTIONS __stop___unwind = .; } - /* rarely changed data like cpu maps */ - . = ALIGN(16); - .data.read_mostly : { - *(.data.read_mostly) - } - - . = ALIGN(L1_CACHE_BYTES); /* Data */ - .data : { - DATA_DATA - CONSTRUCTORS - } - - . = ALIGN(L1_CACHE_BYTES); - .data.cacheline_aligned : { - *(.data.cacheline_aligned) - } + RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE) /* PA-RISC locks requires 16-byte alignment */ . = ALIGN(16); @@ -118,17 +97,6 @@ SECTIONS *(.data.lock_aligned) } - /* nosave data is really only used for software suspend...it's here - * just in case we ever implement it - */ - . = ALIGN(PAGE_SIZE); - __nosave_begin = .; - .data_nosave : { - *(.data.nosave) - } - . = ALIGN(PAGE_SIZE); - __nosave_end = .; - /* End of data section */ _edata = .; @@ -147,14 +115,6 @@ SECTIONS } __bss_stop = .; - - /* assembler code expects init_task to be 16k aligned */ - . = ALIGN(16384); - /* init_task */ - .data.init_task : { - *(.data.init_task) - } - #ifdef CONFIG_64BIT . = ALIGN(16); /* Linkage tables */ @@ -172,64 +132,8 @@ SECTIONS /* reserve space for interrupt stack by aligning __init* to 16k */ . = ALIGN(16384); __init_begin = .; - .init.text : { - _sinittext = .; - INIT_TEXT - _einittext = .; - } - .init.data : { - INIT_DATA - } - . = ALIGN(16); - .init.setup : { - __setup_start = .; - *(.init.setup) - __setup_end = .; - } - .initcall.init : { - __initcall_start = .; - INITCALLS - __initcall_end = .; - } - .con_initcall.init : { - __con_initcall_start = .; - *(.con_initcall.init) - __con_initcall_end = .; - } - SECURITY_INIT - - /* alternate instruction replacement. This is a mechanism x86 uses - * to detect the CPU type and replace generic instruction sequences - * with CPU specific ones. We don't currently do this in PA, but - * it seems like a good idea... - */ - . = ALIGN(4); - .altinstructions : { - __alt_instructions = .; - *(.altinstructions) - __alt_instructions_end = .; - } - .altinstr_replacement : { - *(.altinstr_replacement) - } - - /* .exit.text is discard at runtime, not link time, to deal with references - * from .altinstructions and .eh_frame - */ - .exit.text : { - EXIT_TEXT - } - .exit.data : { - EXIT_DATA - } -#ifdef CONFIG_BLK_DEV_INITRD - . = ALIGN(PAGE_SIZE); - .init.ramfs : { - __initramfs_start = .; - *(.init.ramfs) - __initramfs_end = .; - } -#endif + INIT_TEXT_SECTION(16384) + INIT_DATA_SECTION(16) PERCPU(PAGE_SIZE) . = ALIGN(PAGE_SIZE); diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 4fd479059d6..10a0a5488a4 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -385,9 +385,15 @@ config NUMA config NODES_SHIFT int + default "8" if PPC64 default "4" depends on NEED_MULTIPLE_NODES +config MAX_ACTIVE_REGIONS + int + default "256" if PPC64 + default "32" + config ARCH_SELECT_MEMORY_MODEL def_bool y depends on PPC64 diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index aacf629c1a9..1a54a3b3a3f 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -164,6 +164,17 @@ PHONY += $(BOOT_TARGETS) boot := arch/$(ARCH)/boot +ifeq ($(CONFIG_RELOCATABLE),y) +quiet_cmd_relocs_check = CALL $< + cmd_relocs_check = perl $< "$(OBJDUMP)" "$(obj)/vmlinux" + +PHONY += relocs_check +relocs_check: arch/powerpc/relocs_check.pl vmlinux + $(call cmd,relocs_check) + +zImage: relocs_check +endif + $(BOOT_TARGETS): vmlinux $(Q)$(MAKE) ARCH=ppc64 $(build)=$(boot) $(patsubst %,$(boot)/%,$@) diff --git a/arch/powerpc/include/asm/device.h b/arch/powerpc/include/asm/device.h index 9dade15d1ab..6d94d27ed85 100644 --- a/arch/powerpc/include/asm/device.h +++ b/arch/powerpc/include/asm/device.h @@ -15,7 +15,16 @@ struct dev_archdata { /* DMA operations on that device */ struct dma_map_ops *dma_ops; - void *dma_data; + + /* + * When an iommu is in use, dma_data is used as a ptr to the base of the + * iommu_table. Otherwise, it is a simple numerical offset. + */ + union { + dma_addr_t dma_offset; + void *iommu_table_base; + } dma_data; + #ifdef CONFIG_SWIOTLB dma_addr_t max_direct_dma_addr; #endif diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h index cb2ca41dd52..e281daebddc 100644 --- a/arch/powerpc/include/asm/dma-mapping.h +++ b/arch/powerpc/include/asm/dma-mapping.h @@ -26,7 +26,6 @@ extern void *dma_direct_alloc_coherent(struct device *dev, size_t size, extern void dma_direct_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle); -extern unsigned long get_dma_direct_offset(struct device *dev); #ifdef CONFIG_NOT_COHERENT_CACHE /* @@ -90,6 +89,28 @@ static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops) dev->archdata.dma_ops = ops; } +/* + * get_dma_offset() + * + * Get the dma offset on configurations where the dma address can be determined + * from the physical address by looking at a simple offset. Direct dma and + * swiotlb use this function, but it is typically not used by implementations + * with an iommu. + */ +static inline dma_addr_t get_dma_offset(struct device *dev) +{ + if (dev) + return dev->archdata.dma_data.dma_offset; + + return PCI_DRAM_OFFSET; +} + +static inline void set_dma_offset(struct device *dev, dma_addr_t off) +{ + if (dev) + dev->archdata.dma_data.dma_offset = off; +} + /* this will be removed soon */ #define flush_write_buffers() @@ -181,12 +202,12 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) { - return paddr + get_dma_direct_offset(dev); + return paddr + get_dma_offset(dev); } static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) { - return daddr - get_dma_direct_offset(dev); + return daddr - get_dma_offset(dev); } #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) diff --git a/arch/powerpc/include/asm/fsldma.h b/arch/powerpc/include/asm/fsldma.h new file mode 100644 index 00000000000..a67aeed17d4 --- /dev/null +++ b/arch/powerpc/include/asm/fsldma.h @@ -0,0 +1,136 @@ +/* + * Freescale MPC83XX / MPC85XX DMA Controller + * + * Copyright (c) 2009 Ira W. Snyder <iws@ovro.caltech.edu> + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +#ifndef __ARCH_POWERPC_ASM_FSLDMA_H__ +#define __ARCH_POWERPC_ASM_FSLDMA_H__ + +#include <linux/dmaengine.h> + +/* + * Definitions for the Freescale DMA controller's DMA_SLAVE implemention + * + * The Freescale DMA_SLAVE implementation was designed to handle many-to-many + * transfers. An example usage would be an accelerated copy between two + * scatterlists. Another example use would be an accelerated copy from + * multiple non-contiguous device buffers into a single scatterlist. + * + * A DMA_SLAVE transaction is defined by a struct fsl_dma_slave. This + * structure contains a list of hardware addresses that should be copied + * to/from the scatterlist passed into device_prep_slave_sg(). The structure + * also has some fields to enable hardware-specific features. + */ + +/** + * struct fsl_dma_hw_addr + * @entry: linked list entry + * @address: the hardware address + * @length: length to transfer + * + * Holds a single physical hardware address / length pair for use + * with the DMAEngine DMA_SLAVE API. + */ +struct fsl_dma_hw_addr { + struct list_head entry; + + dma_addr_t address; + size_t length; +}; + +/** + * struct fsl_dma_slave + * @addresses: a linked list of struct fsl_dma_hw_addr structures + * @request_count: value for DMA request count + * @src_loop_size: setup and enable constant source-address DMA transfers + * @dst_loop_size: setup and enable constant destination address DMA transfers + * @external_start: enable externally started DMA transfers + * @external_pause: enable externally paused DMA transfers + * + * Holds a list of address / length pairs for use with the DMAEngine + * DMA_SLAVE API implementation for the Freescale DMA controller. + */ +struct fsl_dma_slave { + + /* List of hardware address/length pairs */ + struct list_head addresses; + + /* Support for extra controller features */ + unsigned int request_count; + unsigned int src_loop_size; + unsigned int dst_loop_size; + bool external_start; + bool external_pause; +}; + +/** + * fsl_dma_slave_append - add an address/length pair to a struct fsl_dma_slave + * @slave: the &struct fsl_dma_slave to add to + * @address: the hardware address to add + * @length: the length of bytes to transfer from @address + * + * Add a hardware address/length pair to a struct fsl_dma_slave. Returns 0 on + * success, -ERRNO otherwise. + */ +static inline int fsl_dma_slave_append(struct fsl_dma_slave *slave, + dma_addr_t address, size_t length) +{ + struct fsl_dma_hw_addr *addr; + + addr = kzalloc(sizeof(*addr), GFP_ATOMIC); + if (!addr) + return -ENOMEM; + + INIT_LIST_HEAD(&addr->entry); + addr->address = address; + addr->length = length; + + list_add_tail(&addr->entry, &slave->addresses); + return 0; +} + +/** + * fsl_dma_slave_free - free a struct fsl_dma_slave + * @slave: the struct fsl_dma_slave to free + * + * Free a struct fsl_dma_slave and all associated address/length pairs + */ +static inline void fsl_dma_slave_free(struct fsl_dma_slave *slave) +{ + struct fsl_dma_hw_addr *addr, *tmp; + + if (slave) { + list_for_each_entry_safe(addr, tmp, &slave->addresses, entry) { + list_del(&addr->entry); + kfree(addr); + } + + kfree(slave); + } +} + +/** + * fsl_dma_slave_alloc - allocate a struct fsl_dma_slave + * @gfp: the flags to pass to kmalloc when allocating this structure + * + * Allocate a struct fsl_dma_slave for use by the DMA_SLAVE API. Returns a new + * struct fsl_dma_slave on success, or NULL on failure. + */ +static inline struct fsl_dma_slave *fsl_dma_slave_alloc(gfp_t gfp) +{ + struct fsl_dma_slave *slave; + + slave = kzalloc(sizeof(*slave), gfp); + if (!slave) + return NULL; + + INIT_LIST_HEAD(&slave->addresses); + return slave; +} + +#endif /* __ARCH_POWERPC_ASM_FSLDMA_H__ */ diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index 7464c0daddd..edfc9803ec9 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -70,6 +70,16 @@ struct iommu_table { struct scatterlist; +static inline void set_iommu_table_base(struct device *dev, void *base) +{ + dev->archdata.dma_data.iommu_table_base = base; +} + +static inline void *get_iommu_table_base(struct device *dev) +{ + return dev->archdata.dma_data.iommu_table_base; +} + /* Frees table for an individual device node */ extern void iommu_free_table(struct iommu_table *tbl, const char *node_name); diff --git a/arch/powerpc/include/asm/pmc.h b/arch/powerpc/include/asm/pmc.h index ccc68b50d05..5a9ede4962c 100644 --- a/arch/powerpc/include/asm/pmc.h +++ b/arch/powerpc/include/asm/pmc.h @@ -29,7 +29,7 @@ int reserve_pmc_hardware(perf_irq_t new_perf_irq); void release_pmc_hardware(void); void ppc_enable_pmcs(void); -#ifdef CONFIG_PPC64 +#ifdef CONFIG_PPC_BOOK3S_64 #include <asm/lppaca.h> static inline void ppc_set_pmu_inuse(int inuse) diff --git a/arch/powerpc/include/asm/pte-40x.h b/arch/powerpc/include/asm/pte-40x.h index 6c3e1f4378d..ec0b0b0d1df 100644 --- a/arch/powerpc/include/asm/pte-40x.h +++ b/arch/powerpc/include/asm/pte-40x.h @@ -43,6 +43,7 @@ #define _PAGE_NO_CACHE 0x004 /* I: caching is inhibited */ #define _PAGE_WRITETHRU 0x008 /* W: caching is write-through */ #define _PAGE_USER 0x010 /* matches one of the zone permission bits */ +#define _PAGE_SPECIAL 0x020 /* software: Special page */ #define _PAGE_RW 0x040 /* software: Writes permitted */ #define _PAGE_DIRTY 0x080 /* software: dirty page */ #define _PAGE_HWWRITE 0x100 /* hardware: Dirty & RW, set in exception */ diff --git a/arch/powerpc/include/asm/pte-8xx.h b/arch/powerpc/include/asm/pte-8xx.h index 94e979718dc..dd5ea95fe61 100644 --- a/arch/powerpc/include/asm/pte-8xx.h +++ b/arch/powerpc/include/asm/pte-8xx.h @@ -32,6 +32,7 @@ #define _PAGE_FILE 0x0002 /* when !present: nonlinear file mapping */ #define _PAGE_NO_CACHE 0x0002 /* I: cache inhibit */ #define _PAGE_SHARED 0x0004 /* No ASID (context) compare */ +#define _PAGE_SPECIAL 0x0008 /* SW entry, forced to 0 by the TLB miss */ /* These five software bits must be masked out when the entry is loaded * into the TLB. diff --git a/arch/powerpc/include/asm/pte-common.h b/arch/powerpc/include/asm/pte-common.h index c3b65076a26..f2b370180a0 100644 --- a/arch/powerpc/include/asm/pte-common.h +++ b/arch/powerpc/include/asm/pte-common.h @@ -25,9 +25,6 @@ #ifndef _PAGE_WRITETHRU #define _PAGE_WRITETHRU 0 #endif -#ifndef _PAGE_SPECIAL -#define _PAGE_SPECIAL 0 -#endif #ifndef _PAGE_4K_PFN #define _PAGE_4K_PFN 0 #endif @@ -179,7 +176,5 @@ extern unsigned long bad_call_to_PMD_PAGE_SIZE(void); #define HAVE_PAGE_AGP /* Advertise support for _PAGE_SPECIAL */ -#ifdef _PAGE_SPECIAL #define __HAVE_ARCH_PTE_SPECIAL -#endif diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index 87ddb3fb948..37771a51811 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -18,7 +18,7 @@ static void *dma_iommu_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag) { - return iommu_alloc_coherent(dev, dev->archdata.dma_data, size, + return iommu_alloc_coherent(dev, get_iommu_table_base(dev), size, dma_handle, device_to_mask(dev), flag, dev_to_node(dev)); } @@ -26,7 +26,7 @@ static void *dma_iommu_alloc_coherent(struct device *dev, size_t size, static void dma_iommu_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle) { - iommu_free_coherent(dev->archdata.dma_data, size, vaddr, dma_handle); + iommu_free_coherent(get_iommu_table_base(dev), size, vaddr, dma_handle); } /* Creates TCEs for a user provided buffer. The user buffer must be @@ -39,8 +39,8 @@ static dma_addr_t dma_iommu_map_page(struct device *dev, struct page *page, enum dma_data_direction direction, struct dma_attrs *attrs) { - return iommu_map_page(dev, dev->archdata.dma_data, page, offset, size, - device_to_mask(dev), direction, attrs); + return iommu_map_page(dev, get_iommu_table_base(dev), page, offset, + size, device_to_mask(dev), direction, attrs); } @@ -48,7 +48,7 @@ static void dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle, size_t size, enum dma_data_direction direction, struct dma_attrs *attrs) { - iommu_unmap_page(dev->archdata.dma_data, dma_handle, size, direction, + iommu_unmap_page(get_iommu_table_base(dev), dma_handle, size, direction, attrs); } @@ -57,7 +57,7 @@ static int dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist, int nelems, enum dma_data_direction direction, struct dma_attrs *attrs) { - return iommu_map_sg(dev, dev->archdata.dma_data, sglist, nelems, + return iommu_map_sg(dev, get_iommu_table_base(dev), sglist, nelems, device_to_mask(dev), direction, attrs); } @@ -65,14 +65,14 @@ static void dma_iommu_unmap_sg(struct device *dev, struct scatterlist *sglist, int nelems, enum dma_data_direction direction, struct dma_attrs *attrs) { - iommu_unmap_sg(dev->archdata.dma_data, sglist, nelems, direction, + iommu_unmap_sg(get_iommu_table_base(dev), sglist, nelems, direction, attrs); } /* We support DMA to/from any memory page via the iommu */ static int dma_iommu_dma_supported(struct device *dev, u64 mask) { - struct iommu_table *tbl = dev->archdata.dma_data; + struct iommu_table *tbl = get_iommu_table_base(dev); if (!tbl || tbl->it_offset > mask) { printk(KERN_INFO diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index 21b784d7e7d..6215062caf8 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -21,13 +21,6 @@ * default the offset is PCI_DRAM_OFFSET. */ -unsigned long get_dma_direct_offset(struct device *dev) -{ - if (dev) - return (unsigned long)dev->archdata.dma_data; - - return PCI_DRAM_OFFSET; -} void *dma_direct_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag) @@ -37,7 +30,7 @@ void *dma_direct_alloc_coherent(struct device *dev, size_t size, ret = __dma_alloc_coherent(dev, size, dma_handle, flag); if (ret == NULL) return NULL; - *dma_handle += get_dma_direct_offset(dev); + *dma_handle += get_dma_offset(dev); return ret; #else struct page *page; @@ -51,7 +44,7 @@ void *dma_direct_alloc_coherent(struct device *dev, size_t size, return NULL; ret = page_address(page); memset(ret, 0, size); - *dma_handle = virt_to_abs(ret) + get_dma_direct_offset(dev); + *dma_handle = virt_to_abs(ret) + get_dma_offset(dev); return ret; #endif @@ -75,7 +68,7 @@ static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int i; for_each_sg(sgl, sg, nents, i) { - sg->dma_address = sg_phys(sg) + get_dma_direct_offset(dev); + sg->dma_address = sg_phys(sg) + get_dma_offset(dev); sg->dma_length = sg->length; __dma_sync_page(sg_page(sg), sg->offset, sg->length, direction); } @@ -110,7 +103,7 @@ static inline dma_addr_t dma_direct_map_page(struct device *dev, { BUG_ON(dir == DMA_NONE); __dma_sync_page(page, offset, size, dir); - return page_to_phys(page) + offset + get_dma_direct_offset(dev); + return page_to_phys(page) + offset + get_dma_offset(dev); } static inline void dma_direct_unmap_page(struct device *dev, diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 9048f96237f..24dcc0ecf24 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -17,7 +17,6 @@ #include <asm/cputable.h> #include <asm/setup.h> #include <asm/thread_info.h> -#include <asm/reg.h> #include <asm/exception-64e.h> #include <asm/bug.h> #include <asm/irqflags.h> diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index e9f4840096b..bb8209e3493 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -1117,7 +1117,7 @@ void __devinit pcibios_setup_bus_devices(struct pci_bus *bus) /* Hook up default DMA ops */ sd->dma_ops = pci_dma_ops; - sd->dma_data = (void *)PCI_DRAM_OFFSET; + set_dma_offset(&dev->dev, PCI_DRAM_OFFSET); /* Additional platform DMA/iommu setup */ if (ppc_md.pci_dma_dev_setup) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 0a321643305..1168c5f440a 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1165,7 +1165,22 @@ static inline unsigned long brk_rnd(void) unsigned long arch_randomize_brk(struct mm_struct *mm) { - unsigned long ret = PAGE_ALIGN(mm->brk + brk_rnd()); + unsigned long base = mm->brk; + unsigned long ret; + +#ifdef CONFIG_PPC64 + /* + * If we are using 1TB segments and we are allowed to randomise + * the heap, we can put it above 1TB so it is backed by a 1TB + * segment. Otherwise the heap will be in the bottom 1TB + * which always uses 256MB segments and this may result in a + * performance penalty. + */ + if (!is_32bit_task() && (mmu_highuser_ssize == MMU_SEGSIZE_1T)) + base = max_t(unsigned long, mm->brk, 1UL << SID_SHIFT_1T); +#endif + + ret = PAGE_ALIGN(base + brk_rnd()); if (ret < mm->brk) return mm->brk; diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 864334b337a..bafac2e41ae 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -800,7 +800,7 @@ static void __init prom_send_capabilities(void) root = call_prom("open", 1, 1, ADDR("/")); if (root != 0) { /* try calling the ibm,client-architecture-support method */ - prom_printf("Calling ibm,client-architecture..."); + prom_printf("Calling ibm,client-architecture-support..."); if (call_prom_ret("call-method", 3, 2, &ret, ADDR("ibm,client-architecture-support"), root, @@ -814,6 +814,7 @@ static void __init prom_send_capabilities(void) return; } call_prom("close", 1, 0, root); + prom_printf(" not implemented\n"); } /* no ibm,client-architecture-support call, try the old way */ diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 3faaf29bdb2..94e2df3cae0 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -241,6 +241,13 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) } /* + * Put vDSO base into mm struct. We need to do this before calling + * install_special_mapping or the perf counter mmap tracking code + * will fail to recognise it as a vDSO (since arch_vma_name fails). + */ + current->mm->context.vdso_base = vdso_base; + + /* * our vma flags don't have VM_WRITE so by default, the process isn't * allowed to write those pages. * gdb can break that with ptrace interface, and thus trigger COW on @@ -260,11 +267,10 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| VM_ALWAYSDUMP, vdso_pagelist); - if (rc) + if (rc) { + current->mm->context.vdso_base = 0; goto fail_mmapsem; - - /* Put vDSO base into mm struct */ - current->mm->context.vdso_base = vdso_base; + } up_write(&mm->mmap_sem); return 0; diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index bc7b41edbdf..77f64218abf 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -1054,6 +1054,8 @@ static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev) return NULL; tbl = kmalloc(sizeof(*tbl), GFP_KERNEL); + if (tbl == NULL) + return NULL; of_parse_dma_window(dev->dev.archdata.of_node, dma_window, &tbl->it_index, &offset, &size); @@ -1233,7 +1235,7 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node) vio_cmo_set_dma_ops(viodev); else viodev->dev.archdata.dma_ops = &dma_iommu_ops; - viodev->dev.archdata.dma_data = vio_build_iommu_table(viodev); + set_iommu_table_base(&viodev->dev, vio_build_iommu_table(viodev)); set_dev_node(&viodev->dev, of_node_to_nid(of_node)); /* init generic 'struct device' fields: */ diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 58da4070723..f56429362a1 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -6,6 +6,7 @@ #include <asm/page.h> #include <asm-generic/vmlinux.lds.h> #include <asm/cache.h> +#include <asm/thread_info.h> ENTRY(_stext) @@ -71,12 +72,7 @@ SECTIONS /* Read-only data */ RODATA - /* Exception & bug tables */ - __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { - __start___ex_table = .; - *(__ex_table) - __stop___ex_table = .; - } + EXCEPTION_TABLE(0) NOTES :kernel :notes @@ -93,12 +89,7 @@ SECTIONS */ . = ALIGN(PAGE_SIZE); __init_begin = .; - - .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { - _sinittext = .; - INIT_TEXT - _einittext = .; - } :kernel + INIT_TEXT_SECTION(PAGE_SIZE) :kernel /* .exit.text is discarded at runtime, not link time, * to deal with references from __bug_table @@ -122,23 +113,16 @@ SECTIONS #endif } - . = ALIGN(16); .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { - __setup_start = .; - *(.init.setup) - __setup_end = .; + INIT_SETUP(16) } .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) { - __initcall_start = .; - INITCALLS - __initcall_end = .; - } + INIT_CALLS + } .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) { - __con_initcall_start = .; - *(.con_initcall.init) - __con_initcall_end = .; + CON_INITCALL } SECURITY_INIT @@ -169,14 +153,10 @@ SECTIONS __stop___fw_ftr_fixup = .; } #endif -#ifdef CONFIG_BLK_DEV_INITRD - . = ALIGN(PAGE_SIZE); .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { - __initramfs_start = .; - *(.init.ramfs) - __initramfs_end = .; + INIT_RAM_FS } -#endif + PERCPU(PAGE_SIZE) . = ALIGN(8); @@ -240,36 +220,24 @@ SECTIONS #endif /* The initial task and kernel stack */ -#ifdef CONFIG_PPC32 - . = ALIGN(8192); -#else - . = ALIGN(16384); -#endif .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { - *(.data.init_task) + INIT_TASK_DATA(THREAD_SIZE) } - . = ALIGN(PAGE_SIZE); .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { - *(.data.page_aligned) + PAGE_ALIGNED_DATA(PAGE_SIZE) } - . = ALIGN(L1_CACHE_BYTES); .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { - *(.data.cacheline_aligned) + CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES) } - . = ALIGN(L1_CACHE_BYTES); .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { - *(.data.read_mostly) + READ_MOSTLY_DATA(L1_CACHE_BYTES) } - . = ALIGN(PAGE_SIZE); .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { - __nosave_begin = .; - *(.data.nosave) - . = ALIGN(PAGE_SIZE); - __nosave_end = .; + NOSAVE_DATA } . = ALIGN(PAGE_SIZE); @@ -280,14 +248,7 @@ SECTIONS * And finally the bss */ - .bss : AT(ADDR(.bss) - LOAD_OFFSET) { - __bss_start = .; - *(.sbss) *(.scommon) - *(.dynbss) - *(.bss) - *(COMMON) - __bss_stop = .; - } + BSS_SECTION(0, 0, 0) . = ALIGN(PAGE_SIZE); _end = . ; diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index 83f1551ec2c..53040931de3 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -30,6 +30,8 @@ #include <asm/tlbflush.h> #include <asm/tlb.h> +#include "mmu_decl.h" + DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); #ifdef CONFIG_SMP @@ -166,7 +168,7 @@ struct page * maybe_pte_to_page(pte_t pte) * support falls into the same category. */ -static pte_t set_pte_filter(pte_t pte) +static pte_t set_pte_filter(pte_t pte, unsigned long addr) { pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); if (pte_looks_normal(pte) && !(cpu_has_feature(CPU_FTR_COHERENT_ICACHE) || @@ -175,6 +177,17 @@ static pte_t set_pte_filter(pte_t pte) if (!pg) return pte; if (!test_bit(PG_arch_1, &pg->flags)) { +#ifdef CONFIG_8xx + /* On 8xx, cache control instructions (particularly + * "dcbst" from flush_dcache_icache) fault as write + * operation if there is an unpopulated TLB entry + * for the address in question. To workaround that, + * we invalidate the TLB here, thus avoiding dcbst + * misbehaviour. + */ + /* 8xx doesn't care about PID, size or ind args */ + _tlbil_va(addr, 0, 0, 0); +#endif /* CONFIG_8xx */ flush_dcache_icache_page(pg); set_bit(PG_arch_1, &pg->flags); } @@ -194,7 +207,7 @@ static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma, * as we don't have two bits to spare for _PAGE_EXEC and _PAGE_HWEXEC so * instead we "filter out" the exec permission for non clean pages. */ -static pte_t set_pte_filter(pte_t pte) +static pte_t set_pte_filter(pte_t pte, unsigned long addr) { struct page *pg; @@ -276,7 +289,7 @@ void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, * this context might not have been activated yet when this * is called. */ - pte = set_pte_filter(pte); + pte = set_pte_filter(pte, addr); /* Perform the setting of the PTE */ __set_pte_at(mm, addr, ptep, pte, 0); diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S index ef1cccf7117..f288279e679 100644 --- a/arch/powerpc/mm/tlb_low_64e.S +++ b/arch/powerpc/mm/tlb_low_64e.S @@ -18,7 +18,6 @@ #include <asm/asm-offsets.h> #include <asm/cputable.h> #include <asm/pgtable.h> -#include <asm/reg.h> #include <asm/exception-64e.h> #include <asm/ppc-opcode.h> diff --git a/arch/powerpc/platforms/cell/beat_iommu.c b/arch/powerpc/platforms/cell/beat_iommu.c index 93b0efddd65..39d361c5c6d 100644 --- a/arch/powerpc/platforms/cell/beat_iommu.c +++ b/arch/powerpc/platforms/cell/beat_iommu.c @@ -77,7 +77,7 @@ static void __init celleb_init_direct_mapping(void) static void celleb_dma_dev_setup(struct device *dev) { dev->archdata.dma_ops = get_pci_dma_ops(); - dev->archdata.dma_data = (void *)celleb_dma_direct_offset; + set_dma_offset(dev, celleb_dma_direct_offset); } static void celleb_pci_dma_dev_setup(struct pci_dev *pdev) diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index 416db17eb18..ca5bfdfe47f 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -657,15 +657,13 @@ static void cell_dma_dev_setup_fixed(struct device *dev); static void cell_dma_dev_setup(struct device *dev) { - struct dev_archdata *archdata = &dev->archdata; - /* Order is important here, these are not mutually exclusive */ if (get_dma_ops(dev) == &dma_iommu_fixed_ops) cell_dma_dev_setup_fixed(dev); else if (get_pci_dma_ops() == &dma_iommu_ops) - archdata->dma_data = cell_get_iommu_table(dev); + set_iommu_table_base(dev, cell_get_iommu_table(dev)); else if (get_pci_dma_ops() == &dma_direct_ops) - archdata->dma_data = (void *)cell_dma_direct_offset; + set_dma_offset(dev, cell_dma_direct_offset); else BUG(); } @@ -973,11 +971,10 @@ static int dma_set_mask_and_switch(struct device *dev, u64 dma_mask) static void cell_dma_dev_setup_fixed(struct device *dev) { - struct dev_archdata *archdata = &dev->archdata; u64 addr; addr = cell_iommu_get_fixed_address(dev) + dma_iommu_fixed_base; - archdata->dma_data = (void *)addr; + set_dma_offset(dev, addr); dev_dbg(dev, "iommu: fixed addr = %llx\n", addr); } diff --git a/arch/powerpc/platforms/iseries/iommu.c b/arch/powerpc/platforms/iseries/iommu.c index 6c1e1011959..9d53cb481a7 100644 --- a/arch/powerpc/platforms/iseries/iommu.c +++ b/arch/powerpc/platforms/iseries/iommu.c @@ -193,7 +193,7 @@ static void pci_dma_dev_setup_iseries(struct pci_dev *pdev) pdn->iommu_table = iommu_init_table(tbl, -1); else kfree(tbl); - pdev->dev.archdata.dma_data = pdn->iommu_table; + set_iommu_table_base(&pdev->dev, pdn->iommu_table); } #else #define pci_dma_dev_setup_iseries NULL diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c index a0ff03a3d8d..7b1d608ea3c 100644 --- a/arch/powerpc/platforms/pasemi/iommu.c +++ b/arch/powerpc/platforms/pasemi/iommu.c @@ -189,7 +189,7 @@ static void pci_dma_dev_setup_pasemi(struct pci_dev *dev) } #endif - dev->dev.archdata.dma_data = &iommu_table_iobmap; + set_iommu_table_base(&dev->dev, &iommu_table_iobmap); } static void pci_dma_bus_setup_null(struct pci_bus *b) { } diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 661c8e02bcb..1a0000a4b6d 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -482,7 +482,7 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev) phb->node); iommu_table_setparms(phb, dn, tbl); PCI_DN(dn)->iommu_table = iommu_init_table(tbl, phb->node); - dev->dev.archdata.dma_data = PCI_DN(dn)->iommu_table; + set_iommu_table_base(&dev->dev, PCI_DN(dn)->iommu_table); return; } @@ -494,7 +494,7 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev) dn = dn->parent; if (dn && PCI_DN(dn)) - dev->dev.archdata.dma_data = PCI_DN(dn)->iommu_table; + set_iommu_table_base(&dev->dev, PCI_DN(dn)->iommu_table); else printk(KERN_WARNING "iommu: Device %s has no iommu table\n", pci_name(dev)); @@ -538,7 +538,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) */ if (dma_window == NULL || pdn->parent == NULL) { pr_debug(" no dma window for device, linking to parent\n"); - dev->dev.archdata.dma_data = PCI_DN(pdn)->iommu_table; + set_iommu_table_base(&dev->dev, PCI_DN(pdn)->iommu_table); return; } @@ -554,7 +554,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) pr_debug(" found DMA window, table: %p\n", pci->iommu_table); } - dev->dev.archdata.dma_data = pci->iommu_table; + set_iommu_table_base(&dev->dev, pci->iommu_table); } #else /* CONFIG_PCI */ #define pci_dma_bus_setup_pSeries NULL diff --git a/arch/powerpc/relocs_check.pl b/arch/powerpc/relocs_check.pl new file mode 100755 index 00000000000..d2571096c3e --- /dev/null +++ b/arch/powerpc/relocs_check.pl @@ -0,0 +1,56 @@ +#!/usr/bin/perl + +# Copyright © 2009 IBM Corporation + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version +# 2 of the License, or (at your option) any later version. + +# This script checks the relcoations of a vmlinux for "suspicious" +# relocations. + +use strict; +use warnings; + +if ($#ARGV != 1) { + die "$0 [path to objdump] [path to vmlinux]\n"; +} + +# Have Kbuild supply the path to objdump so we handle cross compilation. +my $objdump = shift; +my $vmlinux = shift; +my $bad_relocs_count = 0; +my $bad_relocs = ""; +my $old_binutils = 0; + +open(FD, "$objdump -R $vmlinux|") or die; +while (<FD>) { + study $_; + + # Only look at relcoation lines. + next if (!/\s+R_/); + + # These relocations are okay + next if (/R_PPC64_RELATIVE/ or /R_PPC64_NONE/ or + /R_PPC64_ADDR64\s+mach_/); + + # If we see this type of relcoation it's an idication that + # we /may/ be using an old version of binutils. + if (/R_PPC64_UADDR64/) { + $old_binutils++; + } + + $bad_relocs_count++; + $bad_relocs .= $_; +} + +if ($bad_relocs_count) { + print "WARNING: $bad_relocs_count bad relocations\n"; + print $bad_relocs; +} + +if ($old_binutils) { + print "WARNING: You need at binutils >= 2.19 to build a ". + "CONFIG_RELCOATABLE kernel\n"; +} diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c index 89639ecbf38..ae3c4db86fe 100644 --- a/arch/powerpc/sysdev/dart_iommu.c +++ b/arch/powerpc/sysdev/dart_iommu.c @@ -297,7 +297,7 @@ static void pci_dma_dev_setup_dart(struct pci_dev *dev) /* We only have one iommu table on the mac for now, which makes * things simple. Setup all PCI devices to point to this table */ - dev->dev.archdata.dma_data = &iommu_table_dart; + set_iommu_table_base(&dev->dev, &iommu_table_dart); } static void pci_dma_bus_setup_dart(struct pci_bus *bus) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 0e09a45ac79..c6f0a71b405 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -335,6 +335,16 @@ int cpus_are_in_xmon(void) } #endif +static inline int unrecoverable_excp(struct pt_regs *regs) +{ +#ifdef CONFIG_4xx + /* We have no MSR_RI bit on 4xx, so we simply return false */ + return 0; +#else + return ((regs->msr & MSR_RI) == 0); +#endif +} + static int xmon_core(struct pt_regs *regs, int fromipi) { int cmd = 0; @@ -388,7 +398,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi) bp = NULL; if ((regs->msr & (MSR_IR|MSR_PR|MSR_SF)) == (MSR_IR|MSR_SF)) bp = at_breakpoint(regs->nip); - if (bp || (regs->msr & MSR_RI) == 0) + if (bp || unrecoverable_excp(regs)) fromipi = 0; if (!fromipi) { @@ -399,7 +409,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi) cpu, BP_NUM(bp)); xmon_print_symbol(regs->nip, " ", ")\n"); } - if ((regs->msr & MSR_RI) == 0) + if (unrecoverable_excp(regs)) printf("WARNING: exception is not recoverable, " "can't continue\n"); release_output_lock(); @@ -490,7 +500,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi) printf("Stopped at breakpoint %x (", BP_NUM(bp)); xmon_print_symbol(regs->nip, " ", ")\n"); } - if ((regs->msr & MSR_RI) == 0) + if (unrecoverable_excp(regs)) printf("WARNING: exception is not recoverable, " "can't continue\n"); remove_bpts(); diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index 264528e4f58..b55fd7ed1c3 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -50,10 +50,9 @@ static struct platform_device *appldata_pdev; * /proc entries (sysctl) */ static const char appldata_proc_name[APPLDATA_PROC_NAME_LENGTH] = "appldata"; -static int appldata_timer_handler(ctl_table *ctl, int write, struct file *filp, +static int appldata_timer_handler(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos); static int appldata_interval_handler(ctl_table *ctl, int write, - struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos); @@ -247,7 +246,7 @@ __appldata_vtimer_setup(int cmd) * Start/Stop timer, show status of timer (0 = not active, 1 = active) */ static int -appldata_timer_handler(ctl_table *ctl, int write, struct file *filp, +appldata_timer_handler(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int len; @@ -289,7 +288,7 @@ out: * current timer interval. */ static int -appldata_interval_handler(ctl_table *ctl, int write, struct file *filp, +appldata_interval_handler(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int len, interval; @@ -335,7 +334,7 @@ out: * monitoring (0 = not in process, 1 = in process) */ static int -appldata_generic_handler(ctl_table *ctl, int write, struct file *filp, +appldata_generic_handler(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct appldata_ops *ops = NULL, *tmp_ops; diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 4c512561687..20f282c911c 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -881,11 +881,11 @@ static int debug_active=1; * if debug_active is already off */ static int -s390dbf_procactive(ctl_table *table, int write, struct file *filp, +s390dbf_procactive(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { if (!write || debug_stoppable || !debug_active) - return proc_dointvec(table, write, filp, buffer, lenp, ppos); + return proc_dointvec(table, write, buffer, lenp, ppos); else return 0; } diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index 413c240cbca..b201135cc18 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c @@ -262,7 +262,7 @@ cmm_skip_blanks(char *cp, char **endp) static struct ctl_table cmm_table[]; static int -cmm_pages_handler(ctl_table *ctl, int write, struct file *filp, +cmm_pages_handler(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { char buf[16], *p; @@ -303,7 +303,7 @@ cmm_pages_handler(ctl_table *ctl, int write, struct file *filp, } static int -cmm_timeout_handler(ctl_table *ctl, int write, struct file *filp, +cmm_timeout_handler(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { char buf[64], *p; diff --git a/arch/sh/drivers/dma/Kconfig b/arch/sh/drivers/dma/Kconfig index b91fa8dbf04..4d58eb0973d 100644 --- a/arch/sh/drivers/dma/Kconfig +++ b/arch/sh/drivers/dma/Kconfig @@ -1,12 +1,9 @@ menu "DMA support" -config SH_DMA_API - bool config SH_DMA bool "SuperH on-chip DMA controller (DMAC) support" depends on CPU_SH3 || CPU_SH4 - select SH_DMA_API default n config SH_DMA_IRQ_MULTI @@ -19,6 +16,15 @@ config SH_DMA_IRQ_MULTI CPU_SUBTYPE_SH7780 || CPU_SUBTYPE_SH7785 || \ CPU_SUBTYPE_SH7760 +config SH_DMA_API + depends on SH_DMA + bool "SuperH DMA API support" + default n + help + SH_DMA_API always enabled DMA API of used SuperH. + If you want to use DMA ENGINE, you must not enable this. + Please enable DMA_ENGINE and SH_DMAE. + config NR_ONCHIP_DMA_CHANNELS int depends on SH_DMA diff --git a/arch/sh/drivers/dma/Makefile b/arch/sh/drivers/dma/Makefile index c6068137b46..d88c9484762 100644 --- a/arch/sh/drivers/dma/Makefile +++ b/arch/sh/drivers/dma/Makefile @@ -2,8 +2,7 @@ # Makefile for the SuperH DMA specific kernel interface routines under Linux. # -obj-$(CONFIG_SH_DMA_API) += dma-api.o dma-sysfs.o -obj-$(CONFIG_SH_DMA) += dma-sh.o +obj-$(CONFIG_SH_DMA_API) += dma-sh.o dma-api.o dma-sysfs.o obj-$(CONFIG_PVR2_DMA) += dma-pvr2.o obj-$(CONFIG_G2_DMA) += dma-g2.o obj-$(CONFIG_SH_DMABRG) += dmabrg.o diff --git a/arch/sh/include/asm/dma-sh.h b/arch/sh/include/asm/dma-sh.h index 68a5f4cb034..78eed3e0bdf 100644 --- a/arch/sh/include/asm/dma-sh.h +++ b/arch/sh/include/asm/dma-sh.h @@ -116,4 +116,17 @@ static u32 dma_base_addr[] __maybe_unused = { #define CHCR 0x0C #define DMAOR 0x40 +/* + * for dma engine + * + * SuperH DMA mode + */ +#define SHDMA_MIX_IRQ (1 << 1) +#define SHDMA_DMAOR1 (1 << 2) +#define SHDMA_DMAE1 (1 << 3) + +struct sh_dmae_pdata { + unsigned int mode; +}; + #endif /* __DMA_SH_H */ diff --git a/arch/um/include/asm/common.lds.S b/arch/um/include/asm/common.lds.S index 37ecc5577a9..ac55b9efa1c 100644 --- a/arch/um/include/asm/common.lds.S +++ b/arch/um/include/asm/common.lds.S @@ -16,11 +16,7 @@ . = ALIGN(4096); .note : { *(.note.*) } - __ex_table : { - __start___ex_table = .; - *(__ex_table) - __stop___ex_table = .; - } + EXCEPTION_TABLE(0) BUG_TABLE @@ -43,28 +39,17 @@ } .init.setup : { - __setup_start = .; - *(.init.setup) - __setup_end = .; + INIT_SETUP(0) } - . = ALIGN(32); - .data.percpu : { - __per_cpu_start = . ; - *(.data.percpu) - __per_cpu_end = . ; - } + PERCPU(32) .initcall.init : { - __initcall_start = .; - INITCALLS - __initcall_end = .; + INIT_CALLS } .con_initcall.init : { - __con_initcall_start = .; - *(.con_initcall.init) - __con_initcall_end = .; + CON_INITCALL } .uml.initcall.init : { @@ -118,8 +103,6 @@ . = ALIGN(4096); .init.ramfs : { - __initramfs_start = .; - *(.init.ramfs) - __initramfs_end = .; + INIT_RAM_FS } diff --git a/arch/um/kernel/dyn.lds.S b/arch/um/kernel/dyn.lds.S index 715a188c047..7fcad58e216 100644 --- a/arch/um/kernel/dyn.lds.S +++ b/arch/um/kernel/dyn.lds.S @@ -16,11 +16,7 @@ SECTIONS _text = .; _stext = .; __init_begin = .; - .init.text : { - _sinittext = .; - INIT_TEXT - _einittext = .; - } + INIT_TEXT_SECTION(PAGE_SIZE) . = ALIGN(PAGE_SIZE); @@ -96,8 +92,7 @@ SECTIONS .init_array : { *(.init_array) } .fini_array : { *(.fini_array) } .data : { - . = ALIGN(KERNEL_STACK_SIZE); /* init_task */ - *(.data.init_task) + INIT_TASK_DATA(KERNEL_STACK_SIZE) . = ALIGN(KERNEL_STACK_SIZE); *(.data.init_irqstack) DATA_DATA diff --git a/arch/um/kernel/uml.lds.S b/arch/um/kernel/uml.lds.S index 2ebd39765db..e7a6cca667a 100644 --- a/arch/um/kernel/uml.lds.S +++ b/arch/um/kernel/uml.lds.S @@ -22,11 +22,7 @@ SECTIONS _text = .; _stext = .; __init_begin = .; - .init.text : { - _sinittext = .; - INIT_TEXT - _einittext = .; - } + INIT_TEXT_SECTION(PAGE_SIZE) . = ALIGN(PAGE_SIZE); .text : @@ -52,8 +48,7 @@ SECTIONS init.data : { INIT_DATA } .data : { - . = ALIGN(KERNEL_STACK_SIZE); /* init_task */ - *(.data.init_task) + INIT_TASK_DATA(KERNEL_STACK_SIZE) . = ALIGN(KERNEL_STACK_SIZE); *(.data.init_irqstack) DATA_DATA @@ -81,19 +76,10 @@ SECTIONS _edata = .; PROVIDE (edata = .); . = ALIGN(PAGE_SIZE); - .sbss : - { - __bss_start = .; - PROVIDE(_bss_start = .); - *(.sbss) - *(.scommon) - } - .bss : - { - *(.dynbss) - *(.bss) - *(COMMON) - } + __bss_start = .; + PROVIDE(_bss_start = .); + SBSS(0) + BSS(0) _end = .; PROVIDE (end = .); diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index e63cf7d441e..139d4c1a33a 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -40,8 +40,7 @@ extern unsigned int nmi_watchdog; #define NMI_INVALID 3 struct ctl_table; -struct file; -extern int proc_nmi_enabled(struct ctl_table *, int , struct file *, +extern int proc_nmi_enabled(struct ctl_table *, int , void __user *, size_t *, loff_t *); extern int unknown_nmi_panic; diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index f76a162c082..ada8c201d51 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -143,7 +143,11 @@ static inline int __pcibus_to_node(const struct pci_bus *bus) static inline const struct cpumask * cpumask_of_pcibus(const struct pci_bus *bus) { - return cpumask_of_node(__pcibus_to_node(bus)); + int node; + + node = __pcibus_to_node(bus); + return (node == -1) ? cpu_online_mask : + cpumask_of_node(node); } #endif diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c index cb66a22d98a..7ff61d6a188 100644 --- a/arch/x86/kernel/apic/nmi.c +++ b/arch/x86/kernel/apic/nmi.c @@ -508,14 +508,14 @@ static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) /* * proc handler for /proc/sys/kernel/nmi */ -int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, +int proc_nmi_enabled(struct ctl_table *table, int write, void __user *buffer, size_t *length, loff_t *ppos) { int old_state; nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0; old_state = nmi_watchdog_enabled; - proc_dointvec(table, write, file, buffer, length, ppos); + proc_dointvec(table, write, buffer, length, ppos); if (!!old_state == !!nmi_watchdog_enabled) return 0; diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index cf53a78e2dc..8cb4974ff59 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -228,19 +228,11 @@ static long __vsyscall(3) venosys_1(void) } #ifdef CONFIG_SYSCTL - -static int -vsyscall_sysctl_change(ctl_table *ctl, int write, struct file * filp, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - return proc_dointvec(ctl, write, filp, buffer, lenp, ppos); -} - static ctl_table kernel_table2[] = { { .procname = "vsyscall64", .data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = vsyscall_sysctl_change }, + .proc_handler = proc_dointvec }, {} }; diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 82728f2c6d5..f4cee9028cf 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -167,6 +167,7 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address, info.si_errno = 0; info.si_code = si_code; info.si_addr = (void __user *)address; + info.si_addr_lsb = si_code == BUS_MCEERR_AR ? PAGE_SHIFT : 0; force_sig_info(si_signo, &info, tsk); } @@ -790,10 +791,12 @@ out_of_memory(struct pt_regs *regs, unsigned long error_code, } static void -do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address) +do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, + unsigned int fault) { struct task_struct *tsk = current; struct mm_struct *mm = tsk->mm; + int code = BUS_ADRERR; up_read(&mm->mmap_sem); @@ -809,7 +812,15 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address) tsk->thread.error_code = error_code; tsk->thread.trap_no = 14; - force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); +#ifdef CONFIG_MEMORY_FAILURE + if (fault & VM_FAULT_HWPOISON) { + printk(KERN_ERR + "MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n", + tsk->comm, tsk->pid, address); + code = BUS_MCEERR_AR; + } +#endif + force_sig_info_fault(SIGBUS, code, address, tsk); } static noinline void @@ -819,8 +830,8 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code, if (fault & VM_FAULT_OOM) { out_of_memory(regs, error_code, address); } else { - if (fault & VM_FAULT_SIGBUS) - do_sigbus(regs, error_code, address); + if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON)) + do_sigbus(regs, error_code, address, fault); else BUG(); } diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 24952fdc7e4..dd38bfbefd1 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -144,6 +144,7 @@ void clflush_cache_range(void *vaddr, unsigned int size) mb(); } +EXPORT_SYMBOL_GPL(clflush_cache_range); static void __cpa_flush_all(void *arg) { diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 5db96d4304d..1331fcf2614 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -646,7 +646,7 @@ int get_mp_bus_to_node(int busnum) #else /* CONFIG_X86_32 */ -static unsigned char mp_bus_to_node[BUS_NR] = { +static int mp_bus_to_node[BUS_NR] = { [0 ... BUS_NR - 1] = -1 }; diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S index 921b6ff3b64..9b526154c9b 100644 --- a/arch/xtensa/kernel/vmlinux.lds.S +++ b/arch/xtensa/kernel/vmlinux.lds.S @@ -15,6 +15,8 @@ */ #include <asm-generic/vmlinux.lds.h> +#include <asm/page.h> +#include <asm/thread_info.h> #include <variant/core.h> #include <platform/hardware.h> @@ -107,41 +109,18 @@ SECTIONS .fixup : { *(.fixup) } - . = ALIGN(16); - - __ex_table : { - __start___ex_table = .; - *(__ex_table) - __stop___ex_table = .; - } - + EXCEPTION_TABLE(16) /* Data section */ - . = ALIGN(XCHAL_ICACHE_LINESIZE); _fdata = .; - .data : - { - DATA_DATA - CONSTRUCTORS - . = ALIGN(XCHAL_ICACHE_LINESIZE); - *(.data.cacheline_aligned) - } - + RW_DATA_SECTION(XCHAL_ICACHE_LINESIZE, PAGE_SIZE, THREAD_SIZE) _edata = .; - /* The initial task */ - . = ALIGN(8192); - .data.init_task : { *(.data.init_task) } - /* Initialization code and data: */ - . = ALIGN(1 << 12); + . = ALIGN(PAGE_SIZE); __init_begin = .; - .init.text : { - _sinittext = .; - INIT_TEXT - _einittext = .; - } + INIT_TEXT_SECTION(PAGE_SIZE) .init.data : { @@ -168,36 +147,15 @@ SECTIONS .DebugInterruptVector.text); __boot_reloc_table_end = ABSOLUTE(.) ; - } - . = ALIGN(XCHAL_ICACHE_LINESIZE); - - __setup_start = .; - .init.setup : { *(.init.setup) } - __setup_end = .; - - __initcall_start = .; - .initcall.init : { - INITCALLS + INIT_SETUP(XCHAL_ICACHE_LINESIZE) + INIT_CALLS + CON_INITCALL + SECURITY_INITCALL + INIT_RAM_FS } - __initcall_end = .; - - __con_initcall_start = .; - .con_initcall.init : { *(.con_initcall.init) } - __con_initcall_end = .; - - SECURITY_INIT - - -#ifdef CONFIG_BLK_DEV_INITRD - . = ALIGN(4096); - __initramfs_start =.; - .init.ramfs : { *(.init.ramfs) } - __initramfs_end = .; -#endif - - PERCPU(4096) + PERCPU(PAGE_SIZE) /* We need this dummy segment here */ @@ -252,16 +210,11 @@ SECTIONS .DoubleExceptionVector.literal) . = (LOADADDR( .DoubleExceptionVector.text ) + SIZEOF( .DoubleExceptionVector.text ) + 3) & ~ 3; - . = ALIGN(1 << 12); + . = ALIGN(PAGE_SIZE); __init_end = .; - . = ALIGN(8192); - - /* BSS section */ - _bss_start = .; - .bss : { *(.bss.page_aligned) *(.bss) } - _bss_end = .; + BSS_SECTION(0, 8192, 0) _end = .; diff --git a/crypto/async_tx/Kconfig b/crypto/async_tx/Kconfig index d8fb3914598..e5aeb2b79e6 100644 --- a/crypto/async_tx/Kconfig +++ b/crypto/async_tx/Kconfig @@ -14,3 +14,12 @@ config ASYNC_MEMSET tristate select ASYNC_CORE +config ASYNC_PQ + tristate + select ASYNC_CORE + +config ASYNC_RAID6_RECOV + tristate + select ASYNC_CORE + select ASYNC_PQ + diff --git a/crypto/async_tx/Makefile b/crypto/async_tx/Makefile index 27baa7d52fb..d1e0e6f72bc 100644 --- a/crypto/async_tx/Makefile +++ b/crypto/async_tx/Makefile @@ -2,3 +2,6 @@ obj-$(CONFIG_ASYNC_CORE) += async_tx.o obj-$(CONFIG_ASYNC_MEMCPY) += async_memcpy.o obj-$(CONFIG_ASYNC_MEMSET) += async_memset.o obj-$(CONFIG_ASYNC_XOR) += async_xor.o +obj-$(CONFIG_ASYNC_PQ) += async_pq.o +obj-$(CONFIG_ASYNC_RAID6_RECOV) += async_raid6_recov.o +obj-$(CONFIG_ASYNC_RAID6_TEST) += raid6test.o diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c index ddccfb01c41..0ec1fb69d4e 100644 --- a/crypto/async_tx/async_memcpy.c +++ b/crypto/async_tx/async_memcpy.c @@ -33,28 +33,31 @@ * async_memcpy - attempt to copy memory with a dma engine. * @dest: destination page * @src: src page - * @offset: offset in pages to start transaction + * @dest_offset: offset into 'dest' to start transaction + * @src_offset: offset into 'src' to start transaction * @len: length in bytes - * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK, - * @depend_tx: memcpy depends on the result of this transaction - * @cb_fn: function to call when the memcpy completes - * @cb_param: parameter to pass to the callback routine + * @submit: submission / completion modifiers + * + * honored flags: ASYNC_TX_ACK */ struct dma_async_tx_descriptor * async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, - unsigned int src_offset, size_t len, enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_param) + unsigned int src_offset, size_t len, + struct async_submit_ctl *submit) { - struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMCPY, + struct dma_chan *chan = async_tx_find_channel(submit, DMA_MEMCPY, &dest, 1, &src, 1, len); struct dma_device *device = chan ? chan->device : NULL; struct dma_async_tx_descriptor *tx = NULL; - if (device) { + if (device && is_dma_copy_aligned(device, src_offset, dest_offset, len)) { dma_addr_t dma_dest, dma_src; - unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0; + unsigned long dma_prep_flags = 0; + if (submit->cb_fn) + dma_prep_flags |= DMA_PREP_INTERRUPT; + if (submit->flags & ASYNC_TX_FENCE) + dma_prep_flags |= DMA_PREP_FENCE; dma_dest = dma_map_page(device->dev, dest, dest_offset, len, DMA_FROM_DEVICE); @@ -67,13 +70,13 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, if (tx) { pr_debug("%s: (async) len: %zu\n", __func__, len); - async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); + async_tx_submit(chan, tx, submit); } else { void *dest_buf, *src_buf; pr_debug("%s: (sync) len: %zu\n", __func__, len); /* wait for any prerequisite operations */ - async_tx_quiesce(&depend_tx); + async_tx_quiesce(&submit->depend_tx); dest_buf = kmap_atomic(dest, KM_USER0) + dest_offset; src_buf = kmap_atomic(src, KM_USER1) + src_offset; @@ -83,26 +86,13 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, kunmap_atomic(dest_buf, KM_USER0); kunmap_atomic(src_buf, KM_USER1); - async_tx_sync_epilog(cb_fn, cb_param); + async_tx_sync_epilog(submit); } return tx; } EXPORT_SYMBOL_GPL(async_memcpy); -static int __init async_memcpy_init(void) -{ - return 0; -} - -static void __exit async_memcpy_exit(void) -{ - do { } while (0); -} - -module_init(async_memcpy_init); -module_exit(async_memcpy_exit); - MODULE_AUTHOR("Intel Corporation"); MODULE_DESCRIPTION("asynchronous memcpy api"); MODULE_LICENSE("GPL"); diff --git a/crypto/async_tx/async_memset.c b/crypto/async_tx/async_memset.c index 5b5eb99bb24..58e4a8752ae 100644 --- a/crypto/async_tx/async_memset.c +++ b/crypto/async_tx/async_memset.c @@ -35,26 +35,26 @@ * @val: fill value * @offset: offset in pages to start transaction * @len: length in bytes - * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK - * @depend_tx: memset depends on the result of this transaction - * @cb_fn: function to call when the memcpy completes - * @cb_param: parameter to pass to the callback routine + * + * honored flags: ASYNC_TX_ACK */ struct dma_async_tx_descriptor * -async_memset(struct page *dest, int val, unsigned int offset, - size_t len, enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_param) +async_memset(struct page *dest, int val, unsigned int offset, size_t len, + struct async_submit_ctl *submit) { - struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMSET, + struct dma_chan *chan = async_tx_find_channel(submit, DMA_MEMSET, &dest, 1, NULL, 0, len); struct dma_device *device = chan ? chan->device : NULL; struct dma_async_tx_descriptor *tx = NULL; - if (device) { + if (device && is_dma_fill_aligned(device, offset, 0, len)) { dma_addr_t dma_dest; - unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0; + unsigned long dma_prep_flags = 0; + if (submit->cb_fn) + dma_prep_flags |= DMA_PREP_INTERRUPT; + if (submit->flags & ASYNC_TX_FENCE) + dma_prep_flags |= DMA_PREP_FENCE; dma_dest = dma_map_page(device->dev, dest, offset, len, DMA_FROM_DEVICE); @@ -64,38 +64,25 @@ async_memset(struct page *dest, int val, unsigned int offset, if (tx) { pr_debug("%s: (async) len: %zu\n", __func__, len); - async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); + async_tx_submit(chan, tx, submit); } else { /* run the memset synchronously */ void *dest_buf; pr_debug("%s: (sync) len: %zu\n", __func__, len); - dest_buf = (void *) (((char *) page_address(dest)) + offset); + dest_buf = page_address(dest) + offset; /* wait for any prerequisite operations */ - async_tx_quiesce(&depend_tx); + async_tx_quiesce(&submit->depend_tx); memset(dest_buf, val, len); - async_tx_sync_epilog(cb_fn, cb_param); + async_tx_sync_epilog(submit); } return tx; } EXPORT_SYMBOL_GPL(async_memset); -static int __init async_memset_init(void) -{ - return 0; -} - -static void __exit async_memset_exit(void) -{ - do { } while (0); -} - -module_init(async_memset_init); -module_exit(async_memset_exit); - MODULE_AUTHOR("Intel Corporation"); MODULE_DESCRIPTION("asynchronous memset api"); MODULE_LICENSE("GPL"); diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c new file mode 100644 index 00000000000..b88db6d1dc6 --- /dev/null +++ b/crypto/async_tx/async_pq.c @@ -0,0 +1,395 @@ +/* + * Copyright(c) 2007 Yuri Tikhonov <yur@emcraft.com> + * Copyright(c) 2009 Intel Corporation + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The full GNU General Public License is included in this distribution in the + * file called COPYING. + */ +#include <linux/kernel.h> +#include <linux/interrupt.h> +#include <linux/dma-mapping.h> +#include <linux/raid/pq.h> +#include <linux/async_tx.h> + +/** + * scribble - space to hold throwaway P buffer for synchronous gen_syndrome + */ +static struct page *scribble; + +static bool is_raid6_zero_block(struct page *p) +{ + return p == (void *) raid6_empty_zero_page; +} + +/* the struct page *blocks[] parameter passed to async_gen_syndrome() + * and async_syndrome_val() contains the 'P' destination address at + * blocks[disks-2] and the 'Q' destination address at blocks[disks-1] + * + * note: these are macros as they are used as lvalues + */ +#define P(b, d) (b[d-2]) +#define Q(b, d) (b[d-1]) + +/** + * do_async_gen_syndrome - asynchronously calculate P and/or Q + */ +static __async_inline struct dma_async_tx_descriptor * +do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks, + const unsigned char *scfs, unsigned int offset, int disks, + size_t len, dma_addr_t *dma_src, + struct async_submit_ctl *submit) +{ + struct dma_async_tx_descriptor *tx = NULL; + struct dma_device *dma = chan->device; + enum dma_ctrl_flags dma_flags = 0; + enum async_tx_flags flags_orig = submit->flags; + dma_async_tx_callback cb_fn_orig = submit->cb_fn; + dma_async_tx_callback cb_param_orig = submit->cb_param; + int src_cnt = disks - 2; + unsigned char coefs[src_cnt]; + unsigned short pq_src_cnt; + dma_addr_t dma_dest[2]; + int src_off = 0; + int idx; + int i; + + /* DMAs use destinations as sources, so use BIDIRECTIONAL mapping */ + if (P(blocks, disks)) + dma_dest[0] = dma_map_page(dma->dev, P(blocks, disks), offset, + len, DMA_BIDIRECTIONAL); + else + dma_flags |= DMA_PREP_PQ_DISABLE_P; + if (Q(blocks, disks)) + dma_dest[1] = dma_map_page(dma->dev, Q(blocks, disks), offset, + len, DMA_BIDIRECTIONAL); + else + dma_flags |= DMA_PREP_PQ_DISABLE_Q; + + /* convert source addresses being careful to collapse 'empty' + * sources and update the coefficients accordingly + */ + for (i = 0, idx = 0; i < src_cnt; i++) { + if (is_raid6_zero_block(blocks[i])) + continue; + dma_src[idx] = dma_map_page(dma->dev, blocks[i], offset, len, + DMA_TO_DEVICE); + coefs[idx] = scfs[i]; + idx++; + } + src_cnt = idx; + + while (src_cnt > 0) { + submit->flags = flags_orig; + pq_src_cnt = min(src_cnt, dma_maxpq(dma, dma_flags)); + /* if we are submitting additional pqs, leave the chain open, + * clear the callback parameters, and leave the destination + * buffers mapped + */ + if (src_cnt > pq_src_cnt) { + submit->flags &= ~ASYNC_TX_ACK; + submit->flags |= ASYNC_TX_FENCE; + dma_flags |= DMA_COMPL_SKIP_DEST_UNMAP; + submit->cb_fn = NULL; + submit->cb_param = NULL; + } else { + dma_flags &= ~DMA_COMPL_SKIP_DEST_UNMAP; + submit->cb_fn = cb_fn_orig; + submit->cb_param = cb_param_orig; + if (cb_fn_orig) + dma_flags |= DMA_PREP_INTERRUPT; + } + if (submit->flags & ASYNC_TX_FENCE) + dma_flags |= DMA_PREP_FENCE; + + /* Since we have clobbered the src_list we are committed + * to doing this asynchronously. Drivers force forward + * progress in case they can not provide a descriptor + */ + for (;;) { + tx = dma->device_prep_dma_pq(chan, dma_dest, + &dma_src[src_off], + pq_src_cnt, + &coefs[src_off], len, + dma_flags); + if (likely(tx)) + break; + async_tx_quiesce(&submit->depend_tx); + dma_async_issue_pending(chan); + } + + async_tx_submit(chan, tx, submit); + submit->depend_tx = tx; + + /* drop completed sources */ + src_cnt -= pq_src_cnt; + src_off += pq_src_cnt; + + dma_flags |= DMA_PREP_CONTINUE; + } + + return tx; +} + +/** + * do_sync_gen_syndrome - synchronously calculate a raid6 syndrome + */ +static void +do_sync_gen_syndrome(struct page **blocks, unsigned int offset, int disks, + size_t len, struct async_submit_ctl *submit) +{ + void **srcs; + int i; + + if (submit->scribble) + srcs = submit->scribble; + else + srcs = (void **) blocks; + + for (i = 0; i < disks; i++) { + if (is_raid6_zero_block(blocks[i])) { + BUG_ON(i > disks - 3); /* P or Q can't be zero */ + srcs[i] = blocks[i]; + } else + srcs[i] = page_address(blocks[i]) + offset; + } + raid6_call.gen_syndrome(disks, len, srcs); + async_tx_sync_epilog(submit); +} + +/** + * async_gen_syndrome - asynchronously calculate a raid6 syndrome + * @blocks: source blocks from idx 0..disks-3, P @ disks-2 and Q @ disks-1 + * @offset: common offset into each block (src and dest) to start transaction + * @disks: number of blocks (including missing P or Q, see below) + * @len: length of operation in bytes + * @submit: submission/completion modifiers + * + * General note: This routine assumes a field of GF(2^8) with a + * primitive polynomial of 0x11d and a generator of {02}. + * + * 'disks' note: callers can optionally omit either P or Q (but not + * both) from the calculation by setting blocks[disks-2] or + * blocks[disks-1] to NULL. When P or Q is omitted 'len' must be <= + * PAGE_SIZE as a temporary buffer of this size is used in the + * synchronous path. 'disks' always accounts for both destination + * buffers. + * + * 'blocks' note: if submit->scribble is NULL then the contents of + * 'blocks' may be overridden + */ +struct dma_async_tx_descriptor * +async_gen_syndrome(struct page **blocks, unsigned int offset, int disks, + size_t len, struct async_submit_ctl *submit) +{ + int src_cnt = disks - 2; + struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ, + &P(blocks, disks), 2, + blocks, src_cnt, len); + struct dma_device *device = chan ? chan->device : NULL; + dma_addr_t *dma_src = NULL; + + BUG_ON(disks > 255 || !(P(blocks, disks) || Q(blocks, disks))); + + if (submit->scribble) + dma_src = submit->scribble; + else if (sizeof(dma_addr_t) <= sizeof(struct page *)) + dma_src = (dma_addr_t *) blocks; + + if (dma_src && device && + (src_cnt <= dma_maxpq(device, 0) || + dma_maxpq(device, DMA_PREP_CONTINUE) > 0) && + is_dma_pq_aligned(device, offset, 0, len)) { + /* run the p+q asynchronously */ + pr_debug("%s: (async) disks: %d len: %zu\n", + __func__, disks, len); + return do_async_gen_syndrome(chan, blocks, raid6_gfexp, offset, + disks, len, dma_src, submit); + } + + /* run the pq synchronously */ + pr_debug("%s: (sync) disks: %d len: %zu\n", __func__, disks, len); + + /* wait for any prerequisite operations */ + async_tx_quiesce(&submit->depend_tx); + + if (!P(blocks, disks)) { + P(blocks, disks) = scribble; + BUG_ON(len + offset > PAGE_SIZE); + } + if (!Q(blocks, disks)) { + Q(blocks, disks) = scribble; + BUG_ON(len + offset > PAGE_SIZE); + } + do_sync_gen_syndrome(blocks, offset, disks, len, submit); + + return NULL; +} +EXPORT_SYMBOL_GPL(async_gen_syndrome); + +/** + * async_syndrome_val - asynchronously validate a raid6 syndrome + * @blocks: source blocks from idx 0..disks-3, P @ disks-2 and Q @ disks-1 + * @offset: common offset into each block (src and dest) to start transaction + * @disks: number of blocks (including missing P or Q, see below) + * @len: length of operation in bytes + * @pqres: on val failure SUM_CHECK_P_RESULT and/or SUM_CHECK_Q_RESULT are set + * @spare: temporary result buffer for the synchronous case + * @submit: submission / completion modifiers + * + * The same notes from async_gen_syndrome apply to the 'blocks', + * and 'disks' parameters of this routine. The synchronous path + * requires a temporary result buffer and submit->scribble to be + * specified. + */ +struct dma_async_tx_descriptor * +async_syndrome_val(struct page **blocks, unsigned int offset, int disks, + size_t len, enum sum_check_flags *pqres, struct page *spare, + struct async_submit_ctl *submit) +{ + struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ_VAL, + NULL, 0, blocks, disks, + len); + struct dma_device *device = chan ? chan->device : NULL; + struct dma_async_tx_descriptor *tx; + enum dma_ctrl_flags dma_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0; + dma_addr_t *dma_src = NULL; + + BUG_ON(disks < 4); + + if (submit->scribble) + dma_src = submit->scribble; + else if (sizeof(dma_addr_t) <= sizeof(struct page *)) + dma_src = (dma_addr_t *) blocks; + + if (dma_src && device && disks <= dma_maxpq(device, 0) && + is_dma_pq_aligned(device, offset, 0, len)) { + struct device *dev = device->dev; + dma_addr_t *pq = &dma_src[disks-2]; + int i; + + pr_debug("%s: (async) disks: %d len: %zu\n", + __func__, disks, len); + if (!P(blocks, disks)) + dma_flags |= DMA_PREP_PQ_DISABLE_P; + if (!Q(blocks, disks)) + dma_flags |= DMA_PREP_PQ_DISABLE_Q; + if (submit->flags & ASYNC_TX_FENCE) + dma_flags |= DMA_PREP_FENCE; + for (i = 0; i < disks; i++) + if (likely(blocks[i])) { + BUG_ON(is_raid6_zero_block(blocks[i])); + dma_src[i] = dma_map_page(dev, blocks[i], + offset, len, + DMA_TO_DEVICE); + } + + for (;;) { + tx = device->device_prep_dma_pq_val(chan, pq, dma_src, + disks - 2, + raid6_gfexp, + len, pqres, + dma_flags); + if (likely(tx)) + break; + async_tx_quiesce(&submit->depend_tx); + dma_async_issue_pending(chan); + } + async_tx_submit(chan, tx, submit); + + return tx; + } else { + struct page *p_src = P(blocks, disks); + struct page *q_src = Q(blocks, disks); + enum async_tx_flags flags_orig = submit->flags; + dma_async_tx_callback cb_fn_orig = submit->cb_fn; + void *scribble = submit->scribble; + void *cb_param_orig = submit->cb_param; + void *p, *q, *s; + + pr_debug("%s: (sync) disks: %d len: %zu\n", + __func__, disks, len); + + /* caller must provide a temporary result buffer and + * allow the input parameters to be preserved + */ + BUG_ON(!spare || !scribble); + + /* wait for any prerequisite operations */ + async_tx_quiesce(&submit->depend_tx); + + /* recompute p and/or q into the temporary buffer and then + * check to see the result matches the current value + */ + tx = NULL; + *pqres = 0; + if (p_src) { + init_async_submit(submit, ASYNC_TX_XOR_ZERO_DST, NULL, + NULL, NULL, scribble); + tx = async_xor(spare, blocks, offset, disks-2, len, submit); + async_tx_quiesce(&tx); + p = page_address(p_src) + offset; + s = page_address(spare) + offset; + *pqres |= !!memcmp(p, s, len) << SUM_CHECK_P; + } + + if (q_src) { + P(blocks, disks) = NULL; + Q(blocks, disks) = spare; + init_async_submit(submit, 0, NULL, NULL, NULL, scribble); + tx = async_gen_syndrome(blocks, offset, disks, len, submit); + async_tx_quiesce(&tx); + q = page_address(q_src) + offset; + s = page_address(spare) + offset; + *pqres |= !!memcmp(q, s, len) << SUM_CHECK_Q; + } + + /* restore P, Q and submit */ + P(blocks, disks) = p_src; + Q(blocks, disks) = q_src; + + submit->cb_fn = cb_fn_orig; + submit->cb_param = cb_param_orig; + submit->flags = flags_orig; + async_tx_sync_epilog(submit); + + return NULL; + } +} +EXPORT_SYMBOL_GPL(async_syndrome_val); + +static int __init async_pq_init(void) +{ + scribble = alloc_page(GFP_KERNEL); + + if (scribble) + return 0; + + pr_err("%s: failed to allocate required spare page\n", __func__); + + return -ENOMEM; +} + +static void __exit async_pq_exit(void) +{ + put_page(scribble); +} + +module_init(async_pq_init); +module_exit(async_pq_exit); + +MODULE_DESCRIPTION("asynchronous raid6 syndrome generation/validation"); +MODULE_LICENSE("GPL"); diff --git a/crypto/async_tx/async_raid6_recov.c b/crypto/async_tx/async_raid6_recov.c new file mode 100644 index 00000000000..6d73dde4786 --- /dev/null +++ b/crypto/async_tx/async_raid6_recov.c @@ -0,0 +1,468 @@ +/* + * Asynchronous RAID-6 recovery calculations ASYNC_TX API. + * Copyright(c) 2009 Intel Corporation + * + * based on raid6recov.c: + * Copyright 2002 H. Peter Anvin + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + */ +#include <linux/kernel.h> +#include <linux/interrupt.h> +#include <linux/dma-mapping.h> +#include <linux/raid/pq.h> +#include <linux/async_tx.h> + +static struct dma_async_tx_descriptor * +async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef, + size_t len, struct async_submit_ctl *submit) +{ + struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ, + &dest, 1, srcs, 2, len); + struct dma_device *dma = chan ? chan->device : NULL; + const u8 *amul, *bmul; + u8 ax, bx; + u8 *a, *b, *c; + + if (dma) { + dma_addr_t dma_dest[2]; + dma_addr_t dma_src[2]; + struct device *dev = dma->dev; + struct dma_async_tx_descriptor *tx; + enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P; + + if (submit->flags & ASYNC_TX_FENCE) + dma_flags |= DMA_PREP_FENCE; + dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL); + dma_src[0] = dma_map_page(dev, srcs[0], 0, len, DMA_TO_DEVICE); + dma_src[1] = dma_map_page(dev, srcs[1], 0, len, DMA_TO_DEVICE); + tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 2, coef, + len, dma_flags); + if (tx) { + async_tx_submit(chan, tx, submit); + return tx; + } + + /* could not get a descriptor, unmap and fall through to + * the synchronous path + */ + dma_unmap_page(dev, dma_dest[1], len, DMA_BIDIRECTIONAL); + dma_unmap_page(dev, dma_src[0], len, DMA_TO_DEVICE); + dma_unmap_page(dev, dma_src[1], len, DMA_TO_DEVICE); + } + + /* run the operation synchronously */ + async_tx_quiesce(&submit->depend_tx); + amul = raid6_gfmul[coef[0]]; + bmul = raid6_gfmul[coef[1]]; + a = page_address(srcs[0]); + b = page_address(srcs[1]); + c = page_address(dest); + + while (len--) { + ax = amul[*a++]; + bx = bmul[*b++]; + *c++ = ax ^ bx; + } + + return NULL; +} + +static struct dma_async_tx_descriptor * +async_mult(struct page *dest, struct page *src, u8 coef, size_t len, + struct async_submit_ctl *submit) +{ + struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ, + &dest, 1, &src, 1, len); + struct dma_device *dma = chan ? chan->device : NULL; + const u8 *qmul; /* Q multiplier table */ + u8 *d, *s; + + if (dma) { + dma_addr_t dma_dest[2]; + dma_addr_t dma_src[1]; + struct device *dev = dma->dev; + struct dma_async_tx_descriptor *tx; + enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P; + + if (submit->flags & ASYNC_TX_FENCE) + dma_flags |= DMA_PREP_FENCE; + dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL); + dma_src[0] = dma_map_page(dev, src, 0, len, DMA_TO_DEVICE); + tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 1, &coef, + len, dma_flags); + if (tx) { + async_tx_submit(chan, tx, submit); + return tx; + } + + /* could not get a descriptor, unmap and fall through to + * the synchronous path + */ + dma_unmap_page(dev, dma_dest[1], len, DMA_BIDIRECTIONAL); + dma_unmap_page(dev, dma_src[0], len, DMA_TO_DEVICE); + } + + /* no channel available, or failed to allocate a descriptor, so + * perform the operation synchronously + */ + async_tx_quiesce(&submit->depend_tx); + qmul = raid6_gfmul[coef]; + d = page_address(dest); + s = page_address(src); + + while (len--) + *d++ = qmul[*s++]; + + return NULL; +} + +static struct dma_async_tx_descriptor * +__2data_recov_4(size_t bytes, int faila, int failb, struct page **blocks, + struct async_submit_ctl *submit) +{ + struct dma_async_tx_descriptor *tx = NULL; + struct page *p, *q, *a, *b; + struct page *srcs[2]; + unsigned char coef[2]; + enum async_tx_flags flags = submit->flags; + dma_async_tx_callback cb_fn = submit->cb_fn; + void *cb_param = submit->cb_param; + void *scribble = submit->scribble; + + p = blocks[4-2]; + q = blocks[4-1]; + + a = blocks[faila]; + b = blocks[failb]; + + /* in the 4 disk case P + Pxy == P and Q + Qxy == Q */ + /* Dx = A*(P+Pxy) + B*(Q+Qxy) */ + srcs[0] = p; + srcs[1] = q; + coef[0] = raid6_gfexi[failb-faila]; + coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]; + init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); + tx = async_sum_product(b, srcs, coef, bytes, submit); + + /* Dy = P+Pxy+Dx */ + srcs[0] = p; + srcs[1] = b; + init_async_submit(submit, flags | ASYNC_TX_XOR_ZERO_DST, tx, cb_fn, + cb_param, scribble); + tx = async_xor(a, srcs, 0, 2, bytes, submit); + + return tx; + +} + +static struct dma_async_tx_descriptor * +__2data_recov_5(size_t bytes, int faila, int failb, struct page **blocks, + struct async_submit_ctl *submit) +{ + struct dma_async_tx_descriptor *tx = NULL; + struct page *p, *q, *g, *dp, *dq; + struct page *srcs[2]; + unsigned char coef[2]; + enum async_tx_flags flags = submit->flags; + dma_async_tx_callback cb_fn = submit->cb_fn; + void *cb_param = submit->cb_param; + void *scribble = submit->scribble; + int uninitialized_var(good); + int i; + + for (i = 0; i < 3; i++) { + if (i == faila || i == failb) + continue; + else { + good = i; + break; + } + } + BUG_ON(i >= 3); + + p = blocks[5-2]; + q = blocks[5-1]; + g = blocks[good]; + + /* Compute syndrome with zero for the missing data pages + * Use the dead data pages as temporary storage for delta p and + * delta q + */ + dp = blocks[faila]; + dq = blocks[failb]; + + init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); + tx = async_memcpy(dp, g, 0, 0, bytes, submit); + init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); + tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit); + + /* compute P + Pxy */ + srcs[0] = dp; + srcs[1] = p; + init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx, + NULL, NULL, scribble); + tx = async_xor(dp, srcs, 0, 2, bytes, submit); + + /* compute Q + Qxy */ + srcs[0] = dq; + srcs[1] = q; + init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx, + NULL, NULL, scribble); + tx = async_xor(dq, srcs, 0, 2, bytes, submit); + + /* Dx = A*(P+Pxy) + B*(Q+Qxy) */ + srcs[0] = dp; + srcs[1] = dq; + coef[0] = raid6_gfexi[failb-faila]; + coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]; + init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); + tx = async_sum_product(dq, srcs, coef, bytes, submit); + + /* Dy = P+Pxy+Dx */ + srcs[0] = dp; + srcs[1] = dq; + init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn, + cb_param, scribble); + tx = async_xor(dp, srcs, 0, 2, bytes, submit); + + return tx; +} + +static struct dma_async_tx_descriptor * +__2data_recov_n(int disks, size_t bytes, int faila, int failb, + struct page **blocks, struct async_submit_ctl *submit) +{ + struct dma_async_tx_descriptor *tx = NULL; + struct page *p, *q, *dp, *dq; + struct page *srcs[2]; + unsigned char coef[2]; + enum async_tx_flags flags = submit->flags; + dma_async_tx_callback cb_fn = submit->cb_fn; + void *cb_param = submit->cb_param; + void *scribble = submit->scribble; + + p = blocks[disks-2]; + q = blocks[disks-1]; + + /* Compute syndrome with zero for the missing data pages + * Use the dead data pages as temporary storage for + * delta p and delta q + */ + dp = blocks[faila]; + blocks[faila] = (void *)raid6_empty_zero_page; + blocks[disks-2] = dp; + dq = blocks[failb]; + blocks[failb] = (void *)raid6_empty_zero_page; + blocks[disks-1] = dq; + + init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); + tx = async_gen_syndrome(blocks, 0, disks, bytes, submit); + + /* Restore pointer table */ + blocks[faila] = dp; + blocks[failb] = dq; + blocks[disks-2] = p; + blocks[disks-1] = q; + + /* compute P + Pxy */ + srcs[0] = dp; + srcs[1] = p; + init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx, + NULL, NULL, scribble); + tx = async_xor(dp, srcs, 0, 2, bytes, submit); + + /* compute Q + Qxy */ + srcs[0] = dq; + srcs[1] = q; + init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx, + NULL, NULL, scribble); + tx = async_xor(dq, srcs, 0, 2, bytes, submit); + + /* Dx = A*(P+Pxy) + B*(Q+Qxy) */ + srcs[0] = dp; + srcs[1] = dq; + coef[0] = raid6_gfexi[failb-faila]; + coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]; + init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); + tx = async_sum_product(dq, srcs, coef, bytes, submit); + + /* Dy = P+Pxy+Dx */ + srcs[0] = dp; + srcs[1] = dq; + init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn, + cb_param, scribble); + tx = async_xor(dp, srcs, 0, 2, bytes, submit); + + return tx; +} + +/** + * async_raid6_2data_recov - asynchronously calculate two missing data blocks + * @disks: number of disks in the RAID-6 array + * @bytes: block size + * @faila: first failed drive index + * @failb: second failed drive index + * @blocks: array of source pointers where the last two entries are p and q + * @submit: submission/completion modifiers + */ +struct dma_async_tx_descriptor * +async_raid6_2data_recov(int disks, size_t bytes, int faila, int failb, + struct page **blocks, struct async_submit_ctl *submit) +{ + BUG_ON(faila == failb); + if (failb < faila) + swap(faila, failb); + + pr_debug("%s: disks: %d len: %zu\n", __func__, disks, bytes); + + /* we need to preserve the contents of 'blocks' for the async + * case, so punt to synchronous if a scribble buffer is not available + */ + if (!submit->scribble) { + void **ptrs = (void **) blocks; + int i; + + async_tx_quiesce(&submit->depend_tx); + for (i = 0; i < disks; i++) + ptrs[i] = page_address(blocks[i]); + + raid6_2data_recov(disks, bytes, faila, failb, ptrs); + + async_tx_sync_epilog(submit); + + return NULL; + } + + switch (disks) { + case 4: + /* dma devices do not uniformly understand a zero source pq + * operation (in contrast to the synchronous case), so + * explicitly handle the 4 disk special case + */ + return __2data_recov_4(bytes, faila, failb, blocks, submit); + case 5: + /* dma devices do not uniformly understand a single + * source pq operation (in contrast to the synchronous + * case), so explicitly handle the 5 disk special case + */ + return __2data_recov_5(bytes, faila, failb, blocks, submit); + default: + return __2data_recov_n(disks, bytes, faila, failb, blocks, submit); + } +} +EXPORT_SYMBOL_GPL(async_raid6_2data_recov); + +/** + * async_raid6_datap_recov - asynchronously calculate a data and the 'p' block + * @disks: number of disks in the RAID-6 array + * @bytes: block size + * @faila: failed drive index + * @blocks: array of source pointers where the last two entries are p and q + * @submit: submission/completion modifiers + */ +struct dma_async_tx_descriptor * +async_raid6_datap_recov(int disks, size_t bytes, int faila, + struct page **blocks, struct async_submit_ctl *submit) +{ + struct dma_async_tx_descriptor *tx = NULL; + struct page *p, *q, *dq; + u8 coef; + enum async_tx_flags flags = submit->flags; + dma_async_tx_callback cb_fn = submit->cb_fn; + void *cb_param = submit->cb_param; + void *scribble = submit->scribble; + struct page *srcs[2]; + + pr_debug("%s: disks: %d len: %zu\n", __func__, disks, bytes); + + /* we need to preserve the contents of 'blocks' for the async + * case, so punt to synchronous if a scribble buffer is not available + */ + if (!scribble) { + void **ptrs = (void **) blocks; + int i; + + async_tx_quiesce(&submit->depend_tx); + for (i = 0; i < disks; i++) + ptrs[i] = page_address(blocks[i]); + + raid6_datap_recov(disks, bytes, faila, ptrs); + + async_tx_sync_epilog(submit); + + return NULL; + } + + p = blocks[disks-2]; + q = blocks[disks-1]; + + /* Compute syndrome with zero for the missing data page + * Use the dead data page as temporary storage for delta q + */ + dq = blocks[faila]; + blocks[faila] = (void *)raid6_empty_zero_page; + blocks[disks-1] = dq; + + /* in the 4 disk case we only need to perform a single source + * multiplication + */ + if (disks == 4) { + int good = faila == 0 ? 1 : 0; + struct page *g = blocks[good]; + + init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, + scribble); + tx = async_memcpy(p, g, 0, 0, bytes, submit); + + init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, + scribble); + tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit); + } else { + init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, + scribble); + tx = async_gen_syndrome(blocks, 0, disks, bytes, submit); + } + + /* Restore pointer table */ + blocks[faila] = dq; + blocks[disks-1] = q; + + /* calculate g^{-faila} */ + coef = raid6_gfinv[raid6_gfexp[faila]]; + + srcs[0] = dq; + srcs[1] = q; + init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx, + NULL, NULL, scribble); + tx = async_xor(dq, srcs, 0, 2, bytes, submit); + + init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); + tx = async_mult(dq, dq, coef, bytes, submit); + + srcs[0] = p; + srcs[1] = dq; + init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn, + cb_param, scribble); + tx = async_xor(p, srcs, 0, 2, bytes, submit); + + return tx; +} +EXPORT_SYMBOL_GPL(async_raid6_datap_recov); + +MODULE_AUTHOR("Dan Williams <dan.j.williams@intel.com>"); +MODULE_DESCRIPTION("asynchronous RAID-6 recovery api"); +MODULE_LICENSE("GPL"); diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c index 06eb6cc09fe..f9cdf04fe7c 100644 --- a/crypto/async_tx/async_tx.c +++ b/crypto/async_tx/async_tx.c @@ -42,16 +42,21 @@ static void __exit async_tx_exit(void) async_dmaengine_put(); } +module_init(async_tx_init); +module_exit(async_tx_exit); + /** * __async_tx_find_channel - find a channel to carry out the operation or let * the transaction execute synchronously - * @depend_tx: transaction dependency + * @submit: transaction dependency and submission modifiers * @tx_type: transaction type */ struct dma_chan * -__async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, - enum dma_transaction_type tx_type) +__async_tx_find_channel(struct async_submit_ctl *submit, + enum dma_transaction_type tx_type) { + struct dma_async_tx_descriptor *depend_tx = submit->depend_tx; + /* see if we can keep the chain on one channel */ if (depend_tx && dma_has_cap(tx_type, depend_tx->chan->device->cap_mask)) @@ -59,17 +64,6 @@ __async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, return async_dma_find_channel(tx_type); } EXPORT_SYMBOL_GPL(__async_tx_find_channel); -#else -static int __init async_tx_init(void) -{ - printk(KERN_INFO "async_tx: api initialized (sync-only)\n"); - return 0; -} - -static void __exit async_tx_exit(void) -{ - do { } while (0); -} #endif @@ -83,10 +77,14 @@ static void async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx, struct dma_async_tx_descriptor *tx) { - struct dma_chan *chan; - struct dma_device *device; + struct dma_chan *chan = depend_tx->chan; + struct dma_device *device = chan->device; struct dma_async_tx_descriptor *intr_tx = (void *) ~0; + #ifdef CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH + BUG(); + #endif + /* first check to see if we can still append to depend_tx */ spin_lock_bh(&depend_tx->lock); if (depend_tx->parent && depend_tx->chan == tx->chan) { @@ -96,11 +94,11 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx, } spin_unlock_bh(&depend_tx->lock); - if (!intr_tx) + /* attached dependency, flush the parent channel */ + if (!intr_tx) { + device->device_issue_pending(chan); return; - - chan = depend_tx->chan; - device = chan->device; + } /* see if we can schedule an interrupt * otherwise poll for completion @@ -134,6 +132,7 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx, intr_tx->tx_submit(intr_tx); async_tx_ack(intr_tx); } + device->device_issue_pending(chan); } else { if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR) panic("%s: DMA_ERROR waiting for depend_tx\n", @@ -144,13 +143,14 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx, /** - * submit_disposition - while holding depend_tx->lock we must avoid submitting - * new operations to prevent a circular locking dependency with - * drivers that already hold a channel lock when calling - * async_tx_run_dependencies. + * submit_disposition - flags for routing an incoming operation * @ASYNC_TX_SUBMITTED: we were able to append the new operation under the lock * @ASYNC_TX_CHANNEL_SWITCH: when the lock is dropped schedule a channel switch * @ASYNC_TX_DIRECT_SUBMIT: when the lock is dropped submit directly + * + * while holding depend_tx->lock we must avoid submitting new operations + * to prevent a circular locking dependency with drivers that already + * hold a channel lock when calling async_tx_run_dependencies. */ enum submit_disposition { ASYNC_TX_SUBMITTED, @@ -160,11 +160,12 @@ enum submit_disposition { void async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, - enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_param) + struct async_submit_ctl *submit) { - tx->callback = cb_fn; - tx->callback_param = cb_param; + struct dma_async_tx_descriptor *depend_tx = submit->depend_tx; + + tx->callback = submit->cb_fn; + tx->callback_param = submit->cb_param; if (depend_tx) { enum submit_disposition s; @@ -220,30 +221,29 @@ async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, tx->tx_submit(tx); } - if (flags & ASYNC_TX_ACK) + if (submit->flags & ASYNC_TX_ACK) async_tx_ack(tx); - if (depend_tx && (flags & ASYNC_TX_DEP_ACK)) + if (depend_tx) async_tx_ack(depend_tx); } EXPORT_SYMBOL_GPL(async_tx_submit); /** - * async_trigger_callback - schedules the callback function to be run after - * any dependent operations have been completed. - * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK - * @depend_tx: 'callback' requires the completion of this transaction - * @cb_fn: function to call after depend_tx completes - * @cb_param: parameter to pass to the callback routine + * async_trigger_callback - schedules the callback function to be run + * @submit: submission and completion parameters + * + * honored flags: ASYNC_TX_ACK + * + * The callback is run after any dependent operations have completed. */ struct dma_async_tx_descriptor * -async_trigger_callback(enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_param) +async_trigger_callback(struct async_submit_ctl *submit) { struct dma_chan *chan; struct dma_device *device; struct dma_async_tx_descriptor *tx; + struct dma_async_tx_descriptor *depend_tx = submit->depend_tx; if (depend_tx) { chan = depend_tx->chan; @@ -262,14 +262,14 @@ async_trigger_callback(enum async_tx_flags flags, if (tx) { pr_debug("%s: (async)\n", __func__); - async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); + async_tx_submit(chan, tx, submit); } else { pr_debug("%s: (sync)\n", __func__); /* wait for any prerequisite operations */ - async_tx_quiesce(&depend_tx); + async_tx_quiesce(&submit->depend_tx); - async_tx_sync_epilog(cb_fn, cb_param); + async_tx_sync_epilog(submit); } return tx; @@ -295,9 +295,6 @@ void async_tx_quiesce(struct dma_async_tx_descriptor **tx) } EXPORT_SYMBOL_GPL(async_tx_quiesce); -module_init(async_tx_init); -module_exit(async_tx_exit); - MODULE_AUTHOR("Intel Corporation"); MODULE_DESCRIPTION("Asynchronous Bulk Memory Transactions API"); MODULE_LICENSE("GPL"); diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c index 90dd3f8bd28..b459a9034aa 100644 --- a/crypto/async_tx/async_xor.c +++ b/crypto/async_tx/async_xor.c @@ -33,19 +33,16 @@ /* do_async_xor - dma map the pages and perform the xor with an engine */ static __async_inline struct dma_async_tx_descriptor * do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, - unsigned int offset, int src_cnt, size_t len, - enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_param) + unsigned int offset, int src_cnt, size_t len, dma_addr_t *dma_src, + struct async_submit_ctl *submit) { struct dma_device *dma = chan->device; - dma_addr_t *dma_src = (dma_addr_t *) src_list; struct dma_async_tx_descriptor *tx = NULL; int src_off = 0; int i; - dma_async_tx_callback _cb_fn; - void *_cb_param; - enum async_tx_flags async_flags; + dma_async_tx_callback cb_fn_orig = submit->cb_fn; + void *cb_param_orig = submit->cb_param; + enum async_tx_flags flags_orig = submit->flags; enum dma_ctrl_flags dma_flags; int xor_src_cnt; dma_addr_t dma_dest; @@ -63,25 +60,27 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, } while (src_cnt) { - async_flags = flags; + submit->flags = flags_orig; dma_flags = 0; - xor_src_cnt = min(src_cnt, dma->max_xor); + xor_src_cnt = min(src_cnt, (int)dma->max_xor); /* if we are submitting additional xors, leave the chain open, * clear the callback parameters, and leave the destination * buffer mapped */ if (src_cnt > xor_src_cnt) { - async_flags &= ~ASYNC_TX_ACK; + submit->flags &= ~ASYNC_TX_ACK; + submit->flags |= ASYNC_TX_FENCE; dma_flags = DMA_COMPL_SKIP_DEST_UNMAP; - _cb_fn = NULL; - _cb_param = NULL; + submit->cb_fn = NULL; + submit->cb_param = NULL; } else { - _cb_fn = cb_fn; - _cb_param = cb_param; + submit->cb_fn = cb_fn_orig; + submit->cb_param = cb_param_orig; } - if (_cb_fn) + if (submit->cb_fn) dma_flags |= DMA_PREP_INTERRUPT; - + if (submit->flags & ASYNC_TX_FENCE) + dma_flags |= DMA_PREP_FENCE; /* Since we have clobbered the src_list we are committed * to doing this asynchronously. Drivers force forward progress * in case they can not provide a descriptor @@ -90,7 +89,7 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, xor_src_cnt, len, dma_flags); if (unlikely(!tx)) - async_tx_quiesce(&depend_tx); + async_tx_quiesce(&submit->depend_tx); /* spin wait for the preceeding transactions to complete */ while (unlikely(!tx)) { @@ -101,11 +100,8 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, dma_flags); } - async_tx_submit(chan, tx, async_flags, depend_tx, _cb_fn, - _cb_param); - - depend_tx = tx; - flags |= ASYNC_TX_DEP_ACK; + async_tx_submit(chan, tx, submit); + submit->depend_tx = tx; if (src_cnt > xor_src_cnt) { /* drop completed sources */ @@ -124,23 +120,27 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, static void do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset, - int src_cnt, size_t len, enum async_tx_flags flags, - dma_async_tx_callback cb_fn, void *cb_param) + int src_cnt, size_t len, struct async_submit_ctl *submit) { int i; int xor_src_cnt; int src_off = 0; void *dest_buf; - void **srcs = (void **) src_list; + void **srcs; + + if (submit->scribble) + srcs = submit->scribble; + else + srcs = (void **) src_list; - /* reuse the 'src_list' array to convert to buffer pointers */ + /* convert to buffer pointers */ for (i = 0; i < src_cnt; i++) srcs[i] = page_address(src_list[i]) + offset; /* set destination address */ dest_buf = page_address(dest) + offset; - if (flags & ASYNC_TX_XOR_ZERO_DST) + if (submit->flags & ASYNC_TX_XOR_ZERO_DST) memset(dest_buf, 0, len); while (src_cnt > 0) { @@ -153,61 +153,70 @@ do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset, src_off += xor_src_cnt; } - async_tx_sync_epilog(cb_fn, cb_param); + async_tx_sync_epilog(submit); } /** * async_xor - attempt to xor a set of blocks with a dma engine. - * xor_blocks always uses the dest as a source so the ASYNC_TX_XOR_ZERO_DST - * flag must be set to not include dest data in the calculation. The - * assumption with dma eninges is that they only use the destination - * buffer as a source when it is explicity specified in the source list. * @dest: destination page - * @src_list: array of source pages (if the dest is also a source it must be - * at index zero). The contents of this array may be overwritten. - * @offset: offset in pages to start transaction + * @src_list: array of source pages + * @offset: common src/dst offset to start transaction * @src_cnt: number of source pages * @len: length in bytes - * @flags: ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DEST, - * ASYNC_TX_ACK, ASYNC_TX_DEP_ACK - * @depend_tx: xor depends on the result of this transaction. - * @cb_fn: function to call when the xor completes - * @cb_param: parameter to pass to the callback routine + * @submit: submission / completion modifiers + * + * honored flags: ASYNC_TX_ACK, ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DST + * + * xor_blocks always uses the dest as a source so the + * ASYNC_TX_XOR_ZERO_DST flag must be set to not include dest data in + * the calculation. The assumption with dma eninges is that they only + * use the destination buffer as a source when it is explicity specified + * in the source list. + * + * src_list note: if the dest is also a source it must be at index zero. + * The contents of this array will be overwritten if a scribble region + * is not specified. */ struct dma_async_tx_descriptor * async_xor(struct page *dest, struct page **src_list, unsigned int offset, - int src_cnt, size_t len, enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_param) + int src_cnt, size_t len, struct async_submit_ctl *submit) { - struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_XOR, + struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR, &dest, 1, src_list, src_cnt, len); + dma_addr_t *dma_src = NULL; + BUG_ON(src_cnt <= 1); - if (chan) { + if (submit->scribble) + dma_src = submit->scribble; + else if (sizeof(dma_addr_t) <= sizeof(struct page *)) + dma_src = (dma_addr_t *) src_list; + + if (dma_src && chan && is_dma_xor_aligned(chan->device, offset, 0, len)) { /* run the xor asynchronously */ pr_debug("%s (async): len: %zu\n", __func__, len); return do_async_xor(chan, dest, src_list, offset, src_cnt, len, - flags, depend_tx, cb_fn, cb_param); + dma_src, submit); } else { /* run the xor synchronously */ pr_debug("%s (sync): len: %zu\n", __func__, len); + WARN_ONCE(chan, "%s: no space for dma address conversion\n", + __func__); /* in the sync case the dest is an implied source * (assumes the dest is the first source) */ - if (flags & ASYNC_TX_XOR_DROP_DST) { + if (submit->flags & ASYNC_TX_XOR_DROP_DST) { src_cnt--; src_list++; } /* wait for any prerequisite operations */ - async_tx_quiesce(&depend_tx); + async_tx_quiesce(&submit->depend_tx); - do_sync_xor(dest, src_list, offset, src_cnt, len, - flags, cb_fn, cb_param); + do_sync_xor(dest, src_list, offset, src_cnt, len, submit); return NULL; } @@ -222,104 +231,94 @@ static int page_is_zero(struct page *p, unsigned int offset, size_t len) } /** - * async_xor_zero_sum - attempt a xor parity check with a dma engine. + * async_xor_val - attempt a xor parity check with a dma engine. * @dest: destination page used if the xor is performed synchronously - * @src_list: array of source pages. The dest page must be listed as a source - * at index zero. The contents of this array may be overwritten. + * @src_list: array of source pages * @offset: offset in pages to start transaction * @src_cnt: number of source pages * @len: length in bytes * @result: 0 if sum == 0 else non-zero - * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK - * @depend_tx: xor depends on the result of this transaction. - * @cb_fn: function to call when the xor completes - * @cb_param: parameter to pass to the callback routine + * @submit: submission / completion modifiers + * + * honored flags: ASYNC_TX_ACK + * + * src_list note: if the dest is also a source it must be at index zero. + * The contents of this array will be overwritten if a scribble region + * is not specified. */ struct dma_async_tx_descriptor * -async_xor_zero_sum(struct page *dest, struct page **src_list, - unsigned int offset, int src_cnt, size_t len, - u32 *result, enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_param) +async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, + int src_cnt, size_t len, enum sum_check_flags *result, + struct async_submit_ctl *submit) { - struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_ZERO_SUM, + struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR_VAL, &dest, 1, src_list, src_cnt, len); struct dma_device *device = chan ? chan->device : NULL; struct dma_async_tx_descriptor *tx = NULL; + dma_addr_t *dma_src = NULL; BUG_ON(src_cnt <= 1); - if (device && src_cnt <= device->max_xor) { - dma_addr_t *dma_src = (dma_addr_t *) src_list; - unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0; + if (submit->scribble) + dma_src = submit->scribble; + else if (sizeof(dma_addr_t) <= sizeof(struct page *)) + dma_src = (dma_addr_t *) src_list; + + if (dma_src && device && src_cnt <= device->max_xor && + is_dma_xor_aligned(device, offset, 0, len)) { + unsigned long dma_prep_flags = 0; int i; pr_debug("%s: (async) len: %zu\n", __func__, len); + if (submit->cb_fn) + dma_prep_flags |= DMA_PREP_INTERRUPT; + if (submit->flags & ASYNC_TX_FENCE) + dma_prep_flags |= DMA_PREP_FENCE; for (i = 0; i < src_cnt; i++) dma_src[i] = dma_map_page(device->dev, src_list[i], offset, len, DMA_TO_DEVICE); - tx = device->device_prep_dma_zero_sum(chan, dma_src, src_cnt, - len, result, - dma_prep_flags); + tx = device->device_prep_dma_xor_val(chan, dma_src, src_cnt, + len, result, + dma_prep_flags); if (unlikely(!tx)) { - async_tx_quiesce(&depend_tx); + async_tx_quiesce(&submit->depend_tx); while (!tx) { dma_async_issue_pending(chan); - tx = device->device_prep_dma_zero_sum(chan, + tx = device->device_prep_dma_xor_val(chan, dma_src, src_cnt, len, result, dma_prep_flags); } } - async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); + async_tx_submit(chan, tx, submit); } else { - unsigned long xor_flags = flags; + enum async_tx_flags flags_orig = submit->flags; pr_debug("%s: (sync) len: %zu\n", __func__, len); + WARN_ONCE(device && src_cnt <= device->max_xor, + "%s: no space for dma address conversion\n", + __func__); - xor_flags |= ASYNC_TX_XOR_DROP_DST; - xor_flags &= ~ASYNC_TX_ACK; + submit->flags |= ASYNC_TX_XOR_DROP_DST; + submit->flags &= ~ASYNC_TX_ACK; - tx = async_xor(dest, src_list, offset, src_cnt, len, xor_flags, - depend_tx, NULL, NULL); + tx = async_xor(dest, src_list, offset, src_cnt, len, submit); async_tx_quiesce(&tx); - *result = page_is_zero(dest, offset, len) ? 0 : 1; + *result = !page_is_zero(dest, offset, len) << SUM_CHECK_P; - async_tx_sync_epilog(cb_fn, cb_param); + async_tx_sync_epilog(submit); + submit->flags = flags_orig; } return tx; } -EXPORT_SYMBOL_GPL(async_xor_zero_sum); - -static int __init async_xor_init(void) -{ - #ifdef CONFIG_ASYNC_TX_DMA - /* To conserve stack space the input src_list (array of page pointers) - * is reused to hold the array of dma addresses passed to the driver. - * This conversion is only possible when dma_addr_t is less than the - * the size of a pointer. HIGHMEM64G is known to violate this - * assumption. - */ - BUILD_BUG_ON(sizeof(dma_addr_t) > sizeof(struct page *)); - #endif - - return 0; -} - -static void __exit async_xor_exit(void) -{ - do { } while (0); -} - -module_init(async_xor_init); -module_exit(async_xor_exit); +EXPORT_SYMBOL_GPL(async_xor_val); MODULE_AUTHOR("Intel Corporation"); MODULE_DESCRIPTION("asynchronous xor/xor-zero-sum api"); diff --git a/crypto/async_tx/raid6test.c b/crypto/async_tx/raid6test.c new file mode 100644 index 00000000000..3ec27c7e62e --- /dev/null +++ b/crypto/async_tx/raid6test.c @@ -0,0 +1,240 @@ +/* + * asynchronous raid6 recovery self test + * Copyright (c) 2009, Intel Corporation. + * + * based on drivers/md/raid6test/test.c: + * Copyright 2002-2007 H. Peter Anvin + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + */ +#include <linux/async_tx.h> +#include <linux/random.h> + +#undef pr +#define pr(fmt, args...) pr_info("raid6test: " fmt, ##args) + +#define NDISKS 16 /* Including P and Q */ + +static struct page *dataptrs[NDISKS]; +static addr_conv_t addr_conv[NDISKS]; +static struct page *data[NDISKS+3]; +static struct page *spare; +static struct page *recovi; +static struct page *recovj; + +static void callback(void *param) +{ + struct completion *cmp = param; + + complete(cmp); +} + +static void makedata(int disks) +{ + int i, j; + + for (i = 0; i < disks; i++) { + for (j = 0; j < PAGE_SIZE/sizeof(u32); j += sizeof(u32)) { + u32 *p = page_address(data[i]) + j; + + *p = random32(); + } + + dataptrs[i] = data[i]; + } +} + +static char disk_type(int d, int disks) +{ + if (d == disks - 2) + return 'P'; + else if (d == disks - 1) + return 'Q'; + else + return 'D'; +} + +/* Recover two failed blocks. */ +static void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, struct page **ptrs) +{ + struct async_submit_ctl submit; + struct completion cmp; + struct dma_async_tx_descriptor *tx = NULL; + enum sum_check_flags result = ~0; + + if (faila > failb) + swap(faila, failb); + + if (failb == disks-1) { + if (faila == disks-2) { + /* P+Q failure. Just rebuild the syndrome. */ + init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv); + tx = async_gen_syndrome(ptrs, 0, disks, bytes, &submit); + } else { + struct page *blocks[disks]; + struct page *dest; + int count = 0; + int i; + + /* data+Q failure. Reconstruct data from P, + * then rebuild syndrome + */ + for (i = disks; i-- ; ) { + if (i == faila || i == failb) + continue; + blocks[count++] = ptrs[i]; + } + dest = ptrs[faila]; + init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL, + NULL, NULL, addr_conv); + tx = async_xor(dest, blocks, 0, count, bytes, &submit); + + init_async_submit(&submit, 0, tx, NULL, NULL, addr_conv); + tx = async_gen_syndrome(ptrs, 0, disks, bytes, &submit); + } + } else { + if (failb == disks-2) { + /* data+P failure. */ + init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv); + tx = async_raid6_datap_recov(disks, bytes, faila, ptrs, &submit); + } else { + /* data+data failure. */ + init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv); + tx = async_raid6_2data_recov(disks, bytes, faila, failb, ptrs, &submit); + } + } + init_completion(&cmp); + init_async_submit(&submit, ASYNC_TX_ACK, tx, callback, &cmp, addr_conv); + tx = async_syndrome_val(ptrs, 0, disks, bytes, &result, spare, &submit); + async_tx_issue_pending(tx); + + if (wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)) == 0) + pr("%s: timeout! (faila: %d failb: %d disks: %d)\n", + __func__, faila, failb, disks); + + if (result != 0) + pr("%s: validation failure! faila: %d failb: %d sum_check_flags: %x\n", + __func__, faila, failb, result); +} + +static int test_disks(int i, int j, int disks) +{ + int erra, errb; + + memset(page_address(recovi), 0xf0, PAGE_SIZE); + memset(page_address(recovj), 0xba, PAGE_SIZE); + + dataptrs[i] = recovi; + dataptrs[j] = recovj; + + raid6_dual_recov(disks, PAGE_SIZE, i, j, dataptrs); + + erra = memcmp(page_address(data[i]), page_address(recovi), PAGE_SIZE); + errb = memcmp(page_address(data[j]), page_address(recovj), PAGE_SIZE); + + pr("%s(%d, %d): faila=%3d(%c) failb=%3d(%c) %s\n", + __func__, i, j, i, disk_type(i, disks), j, disk_type(j, disks), + (!erra && !errb) ? "OK" : !erra ? "ERRB" : !errb ? "ERRA" : "ERRAB"); + + dataptrs[i] = data[i]; + dataptrs[j] = data[j]; + + return erra || errb; +} + +static int test(int disks, int *tests) +{ + struct dma_async_tx_descriptor *tx; + struct async_submit_ctl submit; + struct completion cmp; + int err = 0; + int i, j; + + recovi = data[disks]; + recovj = data[disks+1]; + spare = data[disks+2]; + + makedata(disks); + + /* Nuke syndromes */ + memset(page_address(data[disks-2]), 0xee, PAGE_SIZE); + memset(page_address(data[disks-1]), 0xee, PAGE_SIZE); + + /* Generate assumed good syndrome */ + init_completion(&cmp); + init_async_submit(&submit, ASYNC_TX_ACK, NULL, callback, &cmp, addr_conv); + tx = async_gen_syndrome(dataptrs, 0, disks, PAGE_SIZE, &submit); + async_tx_issue_pending(tx); + + if (wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)) == 0) { + pr("error: initial gen_syndrome(%d) timed out\n", disks); + return 1; + } + + pr("testing the %d-disk case...\n", disks); + for (i = 0; i < disks-1; i++) + for (j = i+1; j < disks; j++) { + (*tests)++; + err += test_disks(i, j, disks); + } + + return err; +} + + +static int raid6_test(void) +{ + int err = 0; + int tests = 0; + int i; + + for (i = 0; i < NDISKS+3; i++) { + data[i] = alloc_page(GFP_KERNEL); + if (!data[i]) { + while (i--) + put_page(data[i]); + return -ENOMEM; + } + } + + /* the 4-disk and 5-disk cases are special for the recovery code */ + if (NDISKS > 4) + err += test(4, &tests); + if (NDISKS > 5) + err += test(5, &tests); + err += test(NDISKS, &tests); + + pr("\n"); + pr("complete (%d tests, %d failure%s)\n", + tests, err, err == 1 ? "" : "s"); + + for (i = 0; i < NDISKS+3; i++) + put_page(data[i]); + + return 0; +} + +static void raid6_test_exit(void) +{ +} + +/* when compiled-in wait for drivers to load first (assumes dma drivers + * are also compliled-in) + */ +late_initcall(raid6_test); +module_exit(raid6_test_exit); +MODULE_AUTHOR("Dan Williams <dan.j.williams@intel.com>"); +MODULE_DESCRIPTION("asynchronous RAID-6 recovery self tests"); +MODULE_LICENSE("GPL"); diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c index d295bdccc09..9335b87c517 100644 --- a/drivers/acpi/button.c +++ b/drivers/acpi/button.c @@ -115,6 +115,9 @@ static const struct file_operations acpi_button_state_fops = { .release = single_release, }; +static BLOCKING_NOTIFIER_HEAD(acpi_lid_notifier); +static struct acpi_device *lid_device; + /* -------------------------------------------------------------------------- FS Interface (/proc) -------------------------------------------------------------------------- */ @@ -231,11 +234,38 @@ static int acpi_button_remove_fs(struct acpi_device *device) /* -------------------------------------------------------------------------- Driver Interface -------------------------------------------------------------------------- */ +int acpi_lid_notifier_register(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&acpi_lid_notifier, nb); +} +EXPORT_SYMBOL(acpi_lid_notifier_register); + +int acpi_lid_notifier_unregister(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&acpi_lid_notifier, nb); +} +EXPORT_SYMBOL(acpi_lid_notifier_unregister); + +int acpi_lid_open(void) +{ + acpi_status status; + unsigned long long state; + + status = acpi_evaluate_integer(lid_device->handle, "_LID", NULL, + &state); + if (ACPI_FAILURE(status)) + return -ENODEV; + + return !!state; +} +EXPORT_SYMBOL(acpi_lid_open); + static int acpi_lid_send_state(struct acpi_device *device) { struct acpi_button *button = acpi_driver_data(device); unsigned long long state; acpi_status status; + int ret; status = acpi_evaluate_integer(device->handle, "_LID", NULL, &state); if (ACPI_FAILURE(status)) @@ -244,7 +274,12 @@ static int acpi_lid_send_state(struct acpi_device *device) /* input layer checks if event is redundant */ input_report_switch(button->input, SW_LID, !state); input_sync(button->input); - return 0; + + ret = blocking_notifier_call_chain(&acpi_lid_notifier, state, device); + if (ret == NOTIFY_DONE) + ret = blocking_notifier_call_chain(&acpi_lid_notifier, state, + device); + return ret; } static void acpi_button_notify(struct acpi_device *device, u32 event) @@ -366,8 +401,14 @@ static int acpi_button_add(struct acpi_device *device) error = input_register_device(input); if (error) goto err_remove_fs; - if (button->type == ACPI_BUTTON_TYPE_LID) + if (button->type == ACPI_BUTTON_TYPE_LID) { acpi_lid_send_state(device); + /* + * This assumes there's only one lid device, or if there are + * more we only care about the last one... + */ + lid_device = device; + } if (device->wakeup.flags.valid) { /* Button's GPE is run-wake GPE */ diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index 71d1b9bab70..614da5b8613 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -3412,7 +3412,7 @@ static int cdrom_print_info(const char *header, int val, char *info, return 0; } -static int cdrom_sysctl_info(ctl_table *ctl, int write, struct file * filp, +static int cdrom_sysctl_info(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int pos; @@ -3489,7 +3489,7 @@ static int cdrom_sysctl_info(ctl_table *ctl, int write, struct file * filp, goto done; doit: mutex_unlock(&cdrom_mutex); - return proc_dostring(ctl, write, filp, buffer, lenp, ppos); + return proc_dostring(ctl, write, buffer, lenp, ppos); done: printk(KERN_INFO "cdrom: info buffer too small\n"); goto doit; @@ -3525,12 +3525,12 @@ static void cdrom_update_settings(void) mutex_unlock(&cdrom_mutex); } -static int cdrom_sysctl_handler(ctl_table *ctl, int write, struct file * filp, +static int cdrom_sysctl_handler(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; - ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); + ret = proc_dointvec(ctl, write, buffer, lenp, ppos); if (write) { diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index 6a06913b01d..08a6f50ae79 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -1087,6 +1087,14 @@ config MMTIMER The mmtimer device allows direct userspace access to the Altix system timer. +config UV_MMTIMER + tristate "UV_MMTIMER Memory mapped RTC for SGI UV" + depends on X86_UV + default m + help + The uv_mmtimer device allows direct userspace access to the + UV system timer. + source "drivers/char/tpm/Kconfig" config TELCLOCK diff --git a/drivers/char/Makefile b/drivers/char/Makefile index 66f779ad4f4..19a79dd79ee 100644 --- a/drivers/char/Makefile +++ b/drivers/char/Makefile @@ -58,6 +58,7 @@ obj-$(CONFIG_RAW_DRIVER) += raw.o obj-$(CONFIG_SGI_SNSC) += snsc.o snsc_event.o obj-$(CONFIG_MSPEC) += mspec.o obj-$(CONFIG_MMTIMER) += mmtimer.o +obj-$(CONFIG_UV_MMTIMER) += uv_mmtimer.o obj-$(CONFIG_VIOTAPE) += viotape.o obj-$(CONFIG_HVCS) += hvcs.o obj-$(CONFIG_IBM_BSR) += bsr.o diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c index 1540e693d91..4068467ce7b 100644 --- a/drivers/char/agp/intel-agp.c +++ b/drivers/char/agp/intel-agp.c @@ -46,6 +46,8 @@ #define PCI_DEVICE_ID_INTEL_Q35_IG 0x29B2 #define PCI_DEVICE_ID_INTEL_Q33_HB 0x29D0 #define PCI_DEVICE_ID_INTEL_Q33_IG 0x29D2 +#define PCI_DEVICE_ID_INTEL_B43_HB 0x2E40 +#define PCI_DEVICE_ID_INTEL_B43_IG 0x2E42 #define PCI_DEVICE_ID_INTEL_GM45_HB 0x2A40 #define PCI_DEVICE_ID_INTEL_GM45_IG 0x2A42 #define PCI_DEVICE_ID_INTEL_IGD_E_HB 0x2E00 @@ -91,6 +93,7 @@ agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_G45_HB || \ agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_GM45_HB || \ agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_G41_HB || \ + agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_B43_HB || \ agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_IGDNG_D_HB || \ agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_IGDNG_M_HB || \ agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_IGDNG_MA_HB) @@ -804,23 +807,39 @@ static void intel_i830_setup_flush(void) if (!intel_private.i8xx_page) return; - /* make page uncached */ - map_page_into_agp(intel_private.i8xx_page); - intel_private.i8xx_flush_page = kmap(intel_private.i8xx_page); if (!intel_private.i8xx_flush_page) intel_i830_fini_flush(); } +static void +do_wbinvd(void *null) +{ + wbinvd(); +} + +/* The chipset_flush interface needs to get data that has already been + * flushed out of the CPU all the way out to main memory, because the GPU + * doesn't snoop those buffers. + * + * The 8xx series doesn't have the same lovely interface for flushing the + * chipset write buffers that the later chips do. According to the 865 + * specs, it's 64 octwords, or 1KB. So, to get those previous things in + * that buffer out, we just fill 1KB and clflush it out, on the assumption + * that it'll push whatever was in there out. It appears to work. + */ static void intel_i830_chipset_flush(struct agp_bridge_data *bridge) { unsigned int *pg = intel_private.i8xx_flush_page; - int i; - for (i = 0; i < 256; i += 2) - *(pg + i) = i; + memset(pg, 0, 1024); - wmb(); + if (cpu_has_clflush) { + clflush_cache_range(pg, 1024); + } else { + if (on_each_cpu(do_wbinvd, NULL, 1) != 0) + printk(KERN_ERR "Timed out waiting for cache flush.\n"); + } } /* The intel i830 automatically initializes the agp aperture during POST. @@ -1341,6 +1360,7 @@ static void intel_i965_get_gtt_range(int *gtt_offset, int *gtt_size) case PCI_DEVICE_ID_INTEL_Q45_HB: case PCI_DEVICE_ID_INTEL_G45_HB: case PCI_DEVICE_ID_INTEL_G41_HB: + case PCI_DEVICE_ID_INTEL_B43_HB: case PCI_DEVICE_ID_INTEL_IGDNG_D_HB: case PCI_DEVICE_ID_INTEL_IGDNG_M_HB: case PCI_DEVICE_ID_INTEL_IGDNG_MA_HB: @@ -2335,6 +2355,8 @@ static const struct intel_driver_description { "Q45/Q43", NULL, &intel_i965_driver }, { PCI_DEVICE_ID_INTEL_G45_HB, PCI_DEVICE_ID_INTEL_G45_IG, 0, "G45/G43", NULL, &intel_i965_driver }, + { PCI_DEVICE_ID_INTEL_B43_HB, PCI_DEVICE_ID_INTEL_B43_IG, 0, + "B43", NULL, &intel_i965_driver }, { PCI_DEVICE_ID_INTEL_G41_HB, PCI_DEVICE_ID_INTEL_G41_IG, 0, "G41", NULL, &intel_i965_driver }, { PCI_DEVICE_ID_INTEL_IGDNG_D_HB, PCI_DEVICE_ID_INTEL_IGDNG_D_IG, 0, @@ -2535,6 +2557,7 @@ static struct pci_device_id agp_intel_pci_table[] = { ID(PCI_DEVICE_ID_INTEL_Q45_HB), ID(PCI_DEVICE_ID_INTEL_G45_HB), ID(PCI_DEVICE_ID_INTEL_G41_HB), + ID(PCI_DEVICE_ID_INTEL_B43_HB), ID(PCI_DEVICE_ID_INTEL_IGDNG_D_HB), ID(PCI_DEVICE_ID_INTEL_IGDNG_M_HB), ID(PCI_DEVICE_ID_INTEL_IGDNG_MA_HB), diff --git a/drivers/char/bfin-otp.c b/drivers/char/bfin-otp.c index 0a01329451e..e3dd24bff51 100644 --- a/drivers/char/bfin-otp.c +++ b/drivers/char/bfin-otp.c @@ -1,8 +1,7 @@ /* * Blackfin On-Chip OTP Memory Interface - * Supports BF52x/BF54x * - * Copyright 2007-2008 Analog Devices Inc. + * Copyright 2007-2009 Analog Devices Inc. * * Enter bugs at http://blackfin.uclinux.org/ * @@ -17,8 +16,10 @@ #include <linux/module.h> #include <linux/mutex.h> #include <linux/types.h> +#include <mtd/mtd-abi.h> #include <asm/blackfin.h> +#include <asm/bfrom.h> #include <asm/uaccess.h> #define stamp(fmt, args...) pr_debug("%s:%i: " fmt "\n", __func__, __LINE__, ## args) @@ -30,39 +31,6 @@ static DEFINE_MUTEX(bfin_otp_lock); -/* OTP Boot ROM functions */ -#define _BOOTROM_OTP_COMMAND 0xEF000018 -#define _BOOTROM_OTP_READ 0xEF00001A -#define _BOOTROM_OTP_WRITE 0xEF00001C - -static u32 (* const otp_command)(u32 command, u32 value) = (void *)_BOOTROM_OTP_COMMAND; -static u32 (* const otp_read)(u32 page, u32 flags, u64 *page_content) = (void *)_BOOTROM_OTP_READ; -static u32 (* const otp_write)(u32 page, u32 flags, u64 *page_content) = (void *)_BOOTROM_OTP_WRITE; - -/* otp_command(): defines for "command" */ -#define OTP_INIT 0x00000001 -#define OTP_CLOSE 0x00000002 - -/* otp_{read,write}(): defines for "flags" */ -#define OTP_LOWER_HALF 0x00000000 /* select upper/lower 64-bit half (bit 0) */ -#define OTP_UPPER_HALF 0x00000001 -#define OTP_NO_ECC 0x00000010 /* do not use ECC */ -#define OTP_LOCK 0x00000020 /* sets page protection bit for page */ -#define OTP_ACCESS_READ 0x00001000 -#define OTP_ACCESS_READWRITE 0x00002000 - -/* Return values for all functions */ -#define OTP_SUCCESS 0x00000000 -#define OTP_MASTER_ERROR 0x001 -#define OTP_WRITE_ERROR 0x003 -#define OTP_READ_ERROR 0x005 -#define OTP_ACC_VIO_ERROR 0x009 -#define OTP_DATA_MULT_ERROR 0x011 -#define OTP_ECC_MULT_ERROR 0x021 -#define OTP_PREV_WR_ERROR 0x041 -#define OTP_DATA_SB_WARN 0x100 -#define OTP_ECC_SB_WARN 0x200 - /** * bfin_otp_read - Read OTP pages * @@ -86,9 +54,11 @@ static ssize_t bfin_otp_read(struct file *file, char __user *buff, size_t count, page = *pos / (sizeof(u64) * 2); while (bytes_done < count) { flags = (*pos % (sizeof(u64) * 2) ? OTP_UPPER_HALF : OTP_LOWER_HALF); - stamp("processing page %i (%s)", page, (flags == OTP_UPPER_HALF ? "upper" : "lower")); - ret = otp_read(page, flags, &content); + stamp("processing page %i (0x%x:%s)", page, flags, + (flags & OTP_UPPER_HALF ? "upper" : "lower")); + ret = bfrom_OtpRead(page, flags, &content); if (ret & OTP_MASTER_ERROR) { + stamp("error from otp: 0x%x", ret); bytes_done = -EIO; break; } @@ -96,7 +66,7 @@ static ssize_t bfin_otp_read(struct file *file, char __user *buff, size_t count, bytes_done = -EFAULT; break; } - if (flags == OTP_UPPER_HALF) + if (flags & OTP_UPPER_HALF) ++page; bytes_done += sizeof(content); *pos += sizeof(content); @@ -108,14 +78,53 @@ static ssize_t bfin_otp_read(struct file *file, char __user *buff, size_t count, } #ifdef CONFIG_BFIN_OTP_WRITE_ENABLE +static bool allow_writes; + +/** + * bfin_otp_init_timing - setup OTP timing parameters + * + * Required before doing any write operation. Algorithms from HRM. + */ +static u32 bfin_otp_init_timing(void) +{ + u32 tp1, tp2, tp3, timing; + + tp1 = get_sclk() / 1000000; + tp2 = (2 * get_sclk() / 10000000) << 8; + tp3 = (0x1401) << 15; + timing = tp1 | tp2 | tp3; + if (bfrom_OtpCommand(OTP_INIT, timing)) + return 0; + + return timing; +} + +/** + * bfin_otp_deinit_timing - set timings to only allow reads + * + * Should be called after all writes are done. + */ +static void bfin_otp_deinit_timing(u32 timing) +{ + /* mask bits [31:15] so that any attempts to write fail */ + bfrom_OtpCommand(OTP_CLOSE, 0); + bfrom_OtpCommand(OTP_INIT, timing & ~(-1 << 15)); + bfrom_OtpCommand(OTP_CLOSE, 0); +} + /** - * bfin_otp_write - Write OTP pages + * bfin_otp_write - write OTP pages * * All writes must be in half page chunks (half page == 64 bits). */ static ssize_t bfin_otp_write(struct file *filp, const char __user *buff, size_t count, loff_t *pos) { - stampit(); + ssize_t bytes_done; + u32 timing, page, base_flags, flags, ret; + u64 content; + + if (!allow_writes) + return -EACCES; if (count % sizeof(u64)) return -EMSGSIZE; @@ -123,20 +132,96 @@ static ssize_t bfin_otp_write(struct file *filp, const char __user *buff, size_t if (mutex_lock_interruptible(&bfin_otp_lock)) return -ERESTARTSYS; - /* need otp_init() documentation before this can be implemented */ + stampit(); + + timing = bfin_otp_init_timing(); + if (timing == 0) { + mutex_unlock(&bfin_otp_lock); + return -EIO; + } + + base_flags = OTP_CHECK_FOR_PREV_WRITE; + + bytes_done = 0; + page = *pos / (sizeof(u64) * 2); + while (bytes_done < count) { + flags = base_flags | (*pos % (sizeof(u64) * 2) ? OTP_UPPER_HALF : OTP_LOWER_HALF); + stamp("processing page %i (0x%x:%s) from %p", page, flags, + (flags & OTP_UPPER_HALF ? "upper" : "lower"), buff + bytes_done); + if (copy_from_user(&content, buff + bytes_done, sizeof(content))) { + bytes_done = -EFAULT; + break; + } + ret = bfrom_OtpWrite(page, flags, &content); + if (ret & OTP_MASTER_ERROR) { + stamp("error from otp: 0x%x", ret); + bytes_done = -EIO; + break; + } + if (flags & OTP_UPPER_HALF) + ++page; + bytes_done += sizeof(content); + *pos += sizeof(content); + } + + bfin_otp_deinit_timing(timing); mutex_unlock(&bfin_otp_lock); + return bytes_done; +} + +static long bfin_otp_ioctl(struct file *filp, unsigned cmd, unsigned long arg) +{ + stampit(); + + switch (cmd) { + case OTPLOCK: { + u32 timing; + int ret = -EIO; + + if (!allow_writes) + return -EACCES; + + if (mutex_lock_interruptible(&bfin_otp_lock)) + return -ERESTARTSYS; + + timing = bfin_otp_init_timing(); + if (timing) { + u32 otp_result = bfrom_OtpWrite(arg, OTP_LOCK, NULL); + stamp("locking page %lu resulted in 0x%x", arg, otp_result); + if (!(otp_result & OTP_MASTER_ERROR)) + ret = 0; + + bfin_otp_deinit_timing(timing); + } + + mutex_unlock(&bfin_otp_lock); + + return ret; + } + + case MEMLOCK: + allow_writes = false; + return 0; + + case MEMUNLOCK: + allow_writes = true; + return 0; + } + return -EINVAL; } #else # define bfin_otp_write NULL +# define bfin_otp_ioctl NULL #endif static struct file_operations bfin_otp_fops = { - .owner = THIS_MODULE, - .read = bfin_otp_read, - .write = bfin_otp_write, + .owner = THIS_MODULE, + .unlocked_ioctl = bfin_otp_ioctl, + .read = bfin_otp_read, + .write = bfin_otp_write, }; static struct miscdevice bfin_otp_misc_device = { diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c index 4a9f3492b92..70a770ac013 100644 --- a/drivers/char/hpet.c +++ b/drivers/char/hpet.c @@ -166,9 +166,8 @@ static irqreturn_t hpet_interrupt(int irq, void *data) unsigned long m, t; t = devp->hd_ireqfreq; - m = read_counter(&devp->hd_hpet->hpet_mc); - write_counter(t + m + devp->hd_hpets->hp_delta, - &devp->hd_timer->hpet_compare); + m = read_counter(&devp->hd_timer->hpet_compare); + write_counter(t + m, &devp->hd_timer->hpet_compare); } if (devp->hd_flags & HPET_SHARED_IRQ) @@ -504,21 +503,25 @@ static int hpet_ioctl_ieon(struct hpet_dev *devp) g = v | Tn_32MODE_CNF_MASK | Tn_INT_ENB_CNF_MASK; if (devp->hd_flags & HPET_PERIODIC) { - write_counter(t, &timer->hpet_compare); g |= Tn_TYPE_CNF_MASK; - v |= Tn_TYPE_CNF_MASK; - writeq(v, &timer->hpet_config); - v |= Tn_VAL_SET_CNF_MASK; + v |= Tn_TYPE_CNF_MASK | Tn_VAL_SET_CNF_MASK; writeq(v, &timer->hpet_config); local_irq_save(flags); - /* NOTE: what we modify here is a hidden accumulator + /* + * NOTE: First we modify the hidden accumulator * register supported by periodic-capable comparators. * We never want to modify the (single) counter; that - * would affect all the comparators. + * would affect all the comparators. The value written + * is the counter value when the first interrupt is due. */ m = read_counter(&hpet->hpet_mc); write_counter(t + m + hpetp->hp_delta, &timer->hpet_compare); + /* + * Then we modify the comparator, indicating the period + * for subsequent interrupt. + */ + write_counter(t, &timer->hpet_compare); } else { local_irq_save(flags); m = read_counter(&hpet->hpet_mc); diff --git a/drivers/char/hvc_console.c b/drivers/char/hvc_console.c index 25ce15bb1c0..a632f25f144 100644 --- a/drivers/char/hvc_console.c +++ b/drivers/char/hvc_console.c @@ -678,7 +678,7 @@ int hvc_poll(struct hvc_struct *hp) EXPORT_SYMBOL_GPL(hvc_poll); /** - * hvc_resize() - Update terminal window size information. + * __hvc_resize() - Update terminal window size information. * @hp: HVC console pointer * @ws: Terminal window size structure * @@ -687,12 +687,12 @@ EXPORT_SYMBOL_GPL(hvc_poll); * * Locking: Locking free; the function MUST be called holding hp->lock */ -void hvc_resize(struct hvc_struct *hp, struct winsize ws) +void __hvc_resize(struct hvc_struct *hp, struct winsize ws) { hp->ws = ws; schedule_work(&hp->tty_resize); } -EXPORT_SYMBOL_GPL(hvc_resize); +EXPORT_SYMBOL_GPL(__hvc_resize); /* * This kthread is either polling or interrupt driven. This is determined by diff --git a/drivers/char/hvc_console.h b/drivers/char/hvc_console.h index 3c85d78c975..10950ca706d 100644 --- a/drivers/char/hvc_console.h +++ b/drivers/char/hvc_console.h @@ -28,6 +28,7 @@ #define HVC_CONSOLE_H #include <linux/kref.h> #include <linux/tty.h> +#include <linux/spinlock.h> /* * This is the max number of console adapters that can/will be found as @@ -88,7 +89,16 @@ int hvc_poll(struct hvc_struct *hp); void hvc_kick(void); /* Resize hvc tty terminal window */ -extern void hvc_resize(struct hvc_struct *hp, struct winsize ws); +extern void __hvc_resize(struct hvc_struct *hp, struct winsize ws); + +static inline void hvc_resize(struct hvc_struct *hp, struct winsize ws) +{ + unsigned long flags; + + spin_lock_irqsave(&hp->lock, flags); + __hvc_resize(hp, ws); + spin_unlock_irqrestore(&hp->lock, flags); +} /* default notifier for irq based notification */ extern int notifier_add_irq(struct hvc_struct *hp, int data); diff --git a/drivers/char/hvc_iucv.c b/drivers/char/hvc_iucv.c index 0ecac7e532f..b8a5d654d3d 100644 --- a/drivers/char/hvc_iucv.c +++ b/drivers/char/hvc_iucv.c @@ -273,7 +273,9 @@ static int hvc_iucv_write(struct hvc_iucv_private *priv, case MSG_TYPE_WINSIZE: if (rb->mbuf->datalen != sizeof(struct winsize)) break; - hvc_resize(priv->hvc, *((struct winsize *) rb->mbuf->data)); + /* The caller must ensure that the hvc is locked, which + * is the case when called from hvc_iucv_get_chars() */ + __hvc_resize(priv->hvc, *((struct winsize *) rb->mbuf->data)); break; case MSG_TYPE_ERROR: /* ignored ... */ diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 0aede1d6a9e..6c8b65d069e 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -690,7 +690,7 @@ static ssize_t read_zero(struct file * file, char __user * buf, if (chunk > PAGE_SIZE) chunk = PAGE_SIZE; /* Just for latency reasons */ - unwritten = clear_user(buf, chunk); + unwritten = __clear_user(buf, chunk); written += chunk - unwritten; if (unwritten) break; diff --git a/drivers/char/mwave/mwavedd.c b/drivers/char/mwave/mwavedd.c index 94ad2c3bfc4..a4ec50c9507 100644 --- a/drivers/char/mwave/mwavedd.c +++ b/drivers/char/mwave/mwavedd.c @@ -281,12 +281,6 @@ static long mwave_ioctl(struct file *file, unsigned int iocmd, case IOCTL_MW_REGISTER_IPC: { unsigned int ipcnum = (unsigned int) ioarg; - PRINTK_3(TRACE_MWAVE, - "mwavedd::mwave_ioctl IOCTL_MW_REGISTER_IPC" - " ipcnum %x entry usIntCount %x\n", - ipcnum, - pDrvData->IPCs[ipcnum].usIntCount); - if (ipcnum >= ARRAY_SIZE(pDrvData->IPCs)) { PRINTK_ERROR(KERN_ERR_MWAVE "mwavedd::mwave_ioctl:" @@ -295,6 +289,12 @@ static long mwave_ioctl(struct file *file, unsigned int iocmd, ipcnum); return -EINVAL; } + PRINTK_3(TRACE_MWAVE, + "mwavedd::mwave_ioctl IOCTL_MW_REGISTER_IPC" + " ipcnum %x entry usIntCount %x\n", + ipcnum, + pDrvData->IPCs[ipcnum].usIntCount); + lock_kernel(); pDrvData->IPCs[ipcnum].bIsHere = FALSE; pDrvData->IPCs[ipcnum].bIsEnabled = TRUE; @@ -310,11 +310,6 @@ static long mwave_ioctl(struct file *file, unsigned int iocmd, case IOCTL_MW_GET_IPC: { unsigned int ipcnum = (unsigned int) ioarg; - PRINTK_3(TRACE_MWAVE, - "mwavedd::mwave_ioctl IOCTL_MW_GET_IPC" - " ipcnum %x, usIntCount %x\n", - ipcnum, - pDrvData->IPCs[ipcnum].usIntCount); if (ipcnum >= ARRAY_SIZE(pDrvData->IPCs)) { PRINTK_ERROR(KERN_ERR_MWAVE "mwavedd::mwave_ioctl:" @@ -322,6 +317,11 @@ static long mwave_ioctl(struct file *file, unsigned int iocmd, " Invalid ipcnum %x\n", ipcnum); return -EINVAL; } + PRINTK_3(TRACE_MWAVE, + "mwavedd::mwave_ioctl IOCTL_MW_GET_IPC" + " ipcnum %x, usIntCount %x\n", + ipcnum, + pDrvData->IPCs[ipcnum].usIntCount); lock_kernel(); if (pDrvData->IPCs[ipcnum].bIsEnabled == TRUE) { diff --git a/drivers/char/random.c b/drivers/char/random.c index d8a9255e1a3..04b505e5a5e 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1231,7 +1231,7 @@ static char sysctl_bootid[16]; * as an ASCII string in the standard UUID format. If accesses via the * sysctl system call, it is returned as 16 bytes of binary data. */ -static int proc_do_uuid(ctl_table *table, int write, struct file *filp, +static int proc_do_uuid(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { ctl_table fake_table; @@ -1254,7 +1254,7 @@ static int proc_do_uuid(ctl_table *table, int write, struct file *filp, fake_table.data = buf; fake_table.maxlen = sizeof(buf); - return proc_dostring(&fake_table, write, filp, buffer, lenp, ppos); + return proc_dostring(&fake_table, write, buffer, lenp, ppos); } static int uuid_strategy(ctl_table *table, diff --git a/drivers/char/rio/rioctrl.c b/drivers/char/rio/rioctrl.c index eecee0f576d..74339559f0b 100644 --- a/drivers/char/rio/rioctrl.c +++ b/drivers/char/rio/rioctrl.c @@ -873,7 +873,7 @@ int riocontrol(struct rio_info *p, dev_t dev, int cmd, unsigned long arg, int su /* ** It is important that the product code is an unsigned object! */ - if (DownLoad.ProductCode > MAX_PRODUCT) { + if (DownLoad.ProductCode >= MAX_PRODUCT) { rio_dprintk(RIO_DEBUG_CTRL, "RIO_DOWNLOAD: Bad product code %d passed\n", DownLoad.ProductCode); p->RIOError.Error = NO_SUCH_PRODUCT; return -ENXIO; diff --git a/drivers/char/uv_mmtimer.c b/drivers/char/uv_mmtimer.c new file mode 100644 index 00000000000..867b67be9f0 --- /dev/null +++ b/drivers/char/uv_mmtimer.c @@ -0,0 +1,216 @@ +/* + * Timer device implementation for SGI UV platform. + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2009 Silicon Graphics, Inc. All rights reserved. + * + */ + +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/ioctl.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/errno.h> +#include <linux/mm.h> +#include <linux/fs.h> +#include <linux/mmtimer.h> +#include <linux/miscdevice.h> +#include <linux/posix-timers.h> +#include <linux/interrupt.h> +#include <linux/time.h> +#include <linux/math64.h> +#include <linux/smp_lock.h> + +#include <asm/genapic.h> +#include <asm/uv/uv_hub.h> +#include <asm/uv/bios.h> +#include <asm/uv/uv.h> + +MODULE_AUTHOR("Dimitri Sivanich <sivanich@sgi.com>"); +MODULE_DESCRIPTION("SGI UV Memory Mapped RTC Timer"); +MODULE_LICENSE("GPL"); + +/* name of the device, usually in /dev */ +#define UV_MMTIMER_NAME "mmtimer" +#define UV_MMTIMER_DESC "SGI UV Memory Mapped RTC Timer" +#define UV_MMTIMER_VERSION "1.0" + +static long uv_mmtimer_ioctl(struct file *file, unsigned int cmd, + unsigned long arg); +static int uv_mmtimer_mmap(struct file *file, struct vm_area_struct *vma); + +/* + * Period in femtoseconds (10^-15 s) + */ +static unsigned long uv_mmtimer_femtoperiod; + +static const struct file_operations uv_mmtimer_fops = { + .owner = THIS_MODULE, + .mmap = uv_mmtimer_mmap, + .unlocked_ioctl = uv_mmtimer_ioctl, +}; + +/** + * uv_mmtimer_ioctl - ioctl interface for /dev/uv_mmtimer + * @file: file structure for the device + * @cmd: command to execute + * @arg: optional argument to command + * + * Executes the command specified by @cmd. Returns 0 for success, < 0 for + * failure. + * + * Valid commands: + * + * %MMTIMER_GETOFFSET - Should return the offset (relative to the start + * of the page where the registers are mapped) for the counter in question. + * + * %MMTIMER_GETRES - Returns the resolution of the clock in femto (10^-15) + * seconds + * + * %MMTIMER_GETFREQ - Copies the frequency of the clock in Hz to the address + * specified by @arg + * + * %MMTIMER_GETBITS - Returns the number of bits in the clock's counter + * + * %MMTIMER_MMAPAVAIL - Returns 1 if registers can be mmap'd into userspace + * + * %MMTIMER_GETCOUNTER - Gets the current value in the counter and places it + * in the address specified by @arg. + */ +static long uv_mmtimer_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + int ret = 0; + + switch (cmd) { + case MMTIMER_GETOFFSET: /* offset of the counter */ + /* + * UV RTC register is on its own page + */ + if (PAGE_SIZE <= (1 << 16)) + ret = ((UV_LOCAL_MMR_BASE | UVH_RTC) & (PAGE_SIZE-1)) + / 8; + else + ret = -ENOSYS; + break; + + case MMTIMER_GETRES: /* resolution of the clock in 10^-15 s */ + if (copy_to_user((unsigned long __user *)arg, + &uv_mmtimer_femtoperiod, sizeof(unsigned long))) + ret = -EFAULT; + break; + + case MMTIMER_GETFREQ: /* frequency in Hz */ + if (copy_to_user((unsigned long __user *)arg, + &sn_rtc_cycles_per_second, + sizeof(unsigned long))) + ret = -EFAULT; + break; + + case MMTIMER_GETBITS: /* number of bits in the clock */ + ret = hweight64(UVH_RTC_REAL_TIME_CLOCK_MASK); + break; + + case MMTIMER_MMAPAVAIL: /* can we mmap the clock into userspace? */ + ret = (PAGE_SIZE <= (1 << 16)) ? 1 : 0; + break; + + case MMTIMER_GETCOUNTER: + if (copy_to_user((unsigned long __user *)arg, + (unsigned long *)uv_local_mmr_address(UVH_RTC), + sizeof(unsigned long))) + ret = -EFAULT; + break; + default: + ret = -ENOTTY; + break; + } + return ret; +} + +/** + * uv_mmtimer_mmap - maps the clock's registers into userspace + * @file: file structure for the device + * @vma: VMA to map the registers into + * + * Calls remap_pfn_range() to map the clock's registers into + * the calling process' address space. + */ +static int uv_mmtimer_mmap(struct file *file, struct vm_area_struct *vma) +{ + unsigned long uv_mmtimer_addr; + + if (vma->vm_end - vma->vm_start != PAGE_SIZE) + return -EINVAL; + + if (vma->vm_flags & VM_WRITE) + return -EPERM; + + if (PAGE_SIZE > (1 << 16)) + return -ENOSYS; + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + uv_mmtimer_addr = UV_LOCAL_MMR_BASE | UVH_RTC; + uv_mmtimer_addr &= ~(PAGE_SIZE - 1); + uv_mmtimer_addr &= 0xfffffffffffffffUL; + + if (remap_pfn_range(vma, vma->vm_start, uv_mmtimer_addr >> PAGE_SHIFT, + PAGE_SIZE, vma->vm_page_prot)) { + printk(KERN_ERR "remap_pfn_range failed in uv_mmtimer_mmap\n"); + return -EAGAIN; + } + + return 0; +} + +static struct miscdevice uv_mmtimer_miscdev = { + MISC_DYNAMIC_MINOR, + UV_MMTIMER_NAME, + &uv_mmtimer_fops +}; + + +/** + * uv_mmtimer_init - device initialization routine + * + * Does initial setup for the uv_mmtimer device. + */ +static int __init uv_mmtimer_init(void) +{ + if (!is_uv_system()) { + printk(KERN_ERR "%s: Hardware unsupported\n", UV_MMTIMER_NAME); + return -1; + } + + /* + * Sanity check the cycles/sec variable + */ + if (sn_rtc_cycles_per_second < 100000) { + printk(KERN_ERR "%s: unable to determine clock frequency\n", + UV_MMTIMER_NAME); + return -1; + } + + uv_mmtimer_femtoperiod = ((unsigned long)1E15 + + sn_rtc_cycles_per_second / 2) / + sn_rtc_cycles_per_second; + + if (misc_register(&uv_mmtimer_miscdev)) { + printk(KERN_ERR "%s: failed to register device\n", + UV_MMTIMER_NAME); + return -1; + } + + printk(KERN_INFO "%s: v%s, %ld MHz\n", UV_MMTIMER_DESC, + UV_MMTIMER_VERSION, + sn_rtc_cycles_per_second/(unsigned long)1E6); + + return 0; +} + +module_init(uv_mmtimer_init); diff --git a/drivers/dca/dca-core.c b/drivers/dca/dca-core.c index 25b743abfb5..52e6bb70a49 100644 --- a/drivers/dca/dca-core.c +++ b/drivers/dca/dca-core.c @@ -28,7 +28,7 @@ #include <linux/device.h> #include <linux/dca.h> -#define DCA_VERSION "1.8" +#define DCA_VERSION "1.12.1" MODULE_VERSION(DCA_VERSION); MODULE_LICENSE("GPL"); @@ -36,20 +36,92 @@ MODULE_AUTHOR("Intel Corporation"); static DEFINE_SPINLOCK(dca_lock); -static LIST_HEAD(dca_providers); +static LIST_HEAD(dca_domains); -static struct dca_provider *dca_find_provider_by_dev(struct device *dev) +static struct pci_bus *dca_pci_rc_from_dev(struct device *dev) { - struct dca_provider *dca, *ret = NULL; + struct pci_dev *pdev = to_pci_dev(dev); + struct pci_bus *bus = pdev->bus; - list_for_each_entry(dca, &dca_providers, node) { - if ((!dev) || (dca->ops->dev_managed(dca, dev))) { - ret = dca; - break; - } + while (bus->parent) + bus = bus->parent; + + return bus; +} + +static struct dca_domain *dca_allocate_domain(struct pci_bus *rc) +{ + struct dca_domain *domain; + + domain = kzalloc(sizeof(*domain), GFP_NOWAIT); + if (!domain) + return NULL; + + INIT_LIST_HEAD(&domain->dca_providers); + domain->pci_rc = rc; + + return domain; +} + +static void dca_free_domain(struct dca_domain *domain) +{ + list_del(&domain->node); + kfree(domain); +} + +static struct dca_domain *dca_find_domain(struct pci_bus *rc) +{ + struct dca_domain *domain; + + list_for_each_entry(domain, &dca_domains, node) + if (domain->pci_rc == rc) + return domain; + + return NULL; +} + +static struct dca_domain *dca_get_domain(struct device *dev) +{ + struct pci_bus *rc; + struct dca_domain *domain; + + rc = dca_pci_rc_from_dev(dev); + domain = dca_find_domain(rc); + + if (!domain) { + domain = dca_allocate_domain(rc); + if (domain) + list_add(&domain->node, &dca_domains); + } + + return domain; +} + +static struct dca_provider *dca_find_provider_by_dev(struct device *dev) +{ + struct dca_provider *dca; + struct pci_bus *rc; + struct dca_domain *domain; + + if (dev) { + rc = dca_pci_rc_from_dev(dev); + domain = dca_find_domain(rc); + if (!domain) + return NULL; + } else { + if (!list_empty(&dca_domains)) + domain = list_first_entry(&dca_domains, + struct dca_domain, + node); + else + return NULL; } - return ret; + list_for_each_entry(dca, &domain->dca_providers, node) + if ((!dev) || (dca->ops->dev_managed(dca, dev))) + return dca; + + return NULL; } /** @@ -61,6 +133,8 @@ int dca_add_requester(struct device *dev) struct dca_provider *dca; int err, slot = -ENODEV; unsigned long flags; + struct pci_bus *pci_rc; + struct dca_domain *domain; if (!dev) return -EFAULT; @@ -74,7 +148,14 @@ int dca_add_requester(struct device *dev) return -EEXIST; } - list_for_each_entry(dca, &dca_providers, node) { + pci_rc = dca_pci_rc_from_dev(dev); + domain = dca_find_domain(pci_rc); + if (!domain) { + spin_unlock_irqrestore(&dca_lock, flags); + return -ENODEV; + } + + list_for_each_entry(dca, &domain->dca_providers, node) { slot = dca->ops->add_requester(dca, dev); if (slot >= 0) break; @@ -222,13 +303,19 @@ int register_dca_provider(struct dca_provider *dca, struct device *dev) { int err; unsigned long flags; + struct dca_domain *domain; err = dca_sysfs_add_provider(dca, dev); if (err) return err; spin_lock_irqsave(&dca_lock, flags); - list_add(&dca->node, &dca_providers); + domain = dca_get_domain(dev); + if (!domain) { + spin_unlock_irqrestore(&dca_lock, flags); + return -ENODEV; + } + list_add(&dca->node, &domain->dca_providers); spin_unlock_irqrestore(&dca_lock, flags); blocking_notifier_call_chain(&dca_provider_chain, @@ -241,15 +328,24 @@ EXPORT_SYMBOL_GPL(register_dca_provider); * unregister_dca_provider - remove a dca provider * @dca - struct created by alloc_dca_provider() */ -void unregister_dca_provider(struct dca_provider *dca) +void unregister_dca_provider(struct dca_provider *dca, struct device *dev) { unsigned long flags; + struct pci_bus *pci_rc; + struct dca_domain *domain; blocking_notifier_call_chain(&dca_provider_chain, DCA_PROVIDER_REMOVE, NULL); spin_lock_irqsave(&dca_lock, flags); + list_del(&dca->node); + + pci_rc = dca_pci_rc_from_dev(dev); + domain = dca_find_domain(pci_rc); + if (list_empty(&domain->dca_providers)) + dca_free_domain(domain); + spin_unlock_irqrestore(&dca_lock, flags); dca_sysfs_remove_provider(dca); @@ -276,7 +372,7 @@ EXPORT_SYMBOL_GPL(dca_unregister_notify); static int __init dca_init(void) { - printk(KERN_ERR "dca service started, version %s\n", DCA_VERSION); + pr_info("dca service started, version %s\n", DCA_VERSION); return dca_sysfs_init(); } diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 81e1020fb51..5903a88351b 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -17,11 +17,15 @@ if DMADEVICES comment "DMA Devices" +config ASYNC_TX_DISABLE_CHANNEL_SWITCH + bool + config INTEL_IOATDMA tristate "Intel I/OAT DMA support" depends on PCI && X86 select DMA_ENGINE select DCA + select ASYNC_TX_DISABLE_CHANNEL_SWITCH help Enable support for the Intel(R) I/OAT DMA engine present in recent Intel Xeon chipsets. @@ -97,6 +101,14 @@ config TXX9_DMAC Support the TXx9 SoC internal DMA controller. This can be integrated in chips such as the Toshiba TX4927/38/39. +config SH_DMAE + tristate "Renesas SuperH DMAC support" + depends on SUPERH && SH_DMA + depends on !SH_DMA_API + select DMA_ENGINE + help + Enable support for the Renesas SuperH DMA controllers. + config DMA_ENGINE bool @@ -116,7 +128,7 @@ config NET_DMA config ASYNC_TX_DMA bool "Async_tx: Offload support for the async_tx api" - depends on DMA_ENGINE && !HIGHMEM64G + depends on DMA_ENGINE help This allows the async_tx api to take advantage of offload engines for memcpy, memset, xor, and raid6 p+q operations. If your platform has diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile index 40e1e008357..eca71ba78ae 100644 --- a/drivers/dma/Makefile +++ b/drivers/dma/Makefile @@ -1,8 +1,7 @@ obj-$(CONFIG_DMA_ENGINE) += dmaengine.o obj-$(CONFIG_NET_DMA) += iovlock.o obj-$(CONFIG_DMATEST) += dmatest.o -obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o -ioatdma-objs := ioat.o ioat_dma.o ioat_dca.o +obj-$(CONFIG_INTEL_IOATDMA) += ioat/ obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o obj-$(CONFIG_FSL_DMA) += fsldma.o obj-$(CONFIG_MV_XOR) += mv_xor.o @@ -10,3 +9,4 @@ obj-$(CONFIG_DW_DMAC) += dw_dmac.o obj-$(CONFIG_AT_HDMAC) += at_hdmac.o obj-$(CONFIG_MX3_IPU) += ipu/ obj-$(CONFIG_TXX9_DMAC) += txx9dmac.o +obj-$(CONFIG_SH_DMAE) += shdma.o diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index c8522e6f1ad..7585c4164bd 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -87,6 +87,7 @@ static struct at_desc *atc_alloc_descriptor(struct dma_chan *chan, desc = dma_pool_alloc(atdma->dma_desc_pool, gfp_flags, &phys); if (desc) { memset(desc, 0, sizeof(struct at_desc)); + INIT_LIST_HEAD(&desc->tx_list); dma_async_tx_descriptor_init(&desc->txd, chan); /* txd.flags will be overwritten in prep functions */ desc->txd.flags = DMA_CTRL_ACK; @@ -150,11 +151,11 @@ static void atc_desc_put(struct at_dma_chan *atchan, struct at_desc *desc) struct at_desc *child; spin_lock_bh(&atchan->lock); - list_for_each_entry(child, &desc->txd.tx_list, desc_node) + list_for_each_entry(child, &desc->tx_list, desc_node) dev_vdbg(chan2dev(&atchan->chan_common), "moving child desc %p to freelist\n", child); - list_splice_init(&desc->txd.tx_list, &atchan->free_list); + list_splice_init(&desc->tx_list, &atchan->free_list); dev_vdbg(chan2dev(&atchan->chan_common), "moving desc %p to freelist\n", desc); list_add(&desc->desc_node, &atchan->free_list); @@ -247,30 +248,33 @@ atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc) param = txd->callback_param; /* move children to free_list */ - list_splice_init(&txd->tx_list, &atchan->free_list); + list_splice_init(&desc->tx_list, &atchan->free_list); /* move myself to free_list */ list_move(&desc->desc_node, &atchan->free_list); /* unmap dma addresses */ - if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) { - if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE) - dma_unmap_single(chan2parent(&atchan->chan_common), - desc->lli.daddr, - desc->len, DMA_FROM_DEVICE); - else - dma_unmap_page(chan2parent(&atchan->chan_common), - desc->lli.daddr, - desc->len, DMA_FROM_DEVICE); - } - if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) { - if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE) - dma_unmap_single(chan2parent(&atchan->chan_common), - desc->lli.saddr, - desc->len, DMA_TO_DEVICE); - else - dma_unmap_page(chan2parent(&atchan->chan_common), - desc->lli.saddr, - desc->len, DMA_TO_DEVICE); + if (!atchan->chan_common.private) { + struct device *parent = chan2parent(&atchan->chan_common); + if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) { + if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE) + dma_unmap_single(parent, + desc->lli.daddr, + desc->len, DMA_FROM_DEVICE); + else + dma_unmap_page(parent, + desc->lli.daddr, + desc->len, DMA_FROM_DEVICE); + } + if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) { + if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE) + dma_unmap_single(parent, + desc->lli.saddr, + desc->len, DMA_TO_DEVICE); + else + dma_unmap_page(parent, + desc->lli.saddr, + desc->len, DMA_TO_DEVICE); + } } /* @@ -334,7 +338,7 @@ static void atc_cleanup_descriptors(struct at_dma_chan *atchan) /* This one is currently in progress */ return; - list_for_each_entry(child, &desc->txd.tx_list, desc_node) + list_for_each_entry(child, &desc->tx_list, desc_node) if (!(child->lli.ctrla & ATC_DONE)) /* Currently in progress */ return; @@ -407,7 +411,7 @@ static void atc_handle_error(struct at_dma_chan *atchan) dev_crit(chan2dev(&atchan->chan_common), " cookie: %d\n", bad_desc->txd.cookie); atc_dump_lli(atchan, &bad_desc->lli); - list_for_each_entry(child, &bad_desc->txd.tx_list, desc_node) + list_for_each_entry(child, &bad_desc->tx_list, desc_node) atc_dump_lli(atchan, &child->lli); /* Pretend the descriptor completed successfully */ @@ -587,7 +591,7 @@ atc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, prev->lli.dscr = desc->txd.phys; /* insert the link descriptor to the LD ring */ list_add_tail(&desc->desc_node, - &first->txd.tx_list); + &first->tx_list); } prev = desc; } @@ -646,8 +650,6 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, reg_width = atslave->reg_width; - sg_len = dma_map_sg(chan2parent(chan), sgl, sg_len, direction); - ctrla = ATC_DEFAULT_CTRLA | atslave->ctrla; ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN; @@ -687,7 +689,7 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, prev->lli.dscr = desc->txd.phys; /* insert the link descriptor to the LD ring */ list_add_tail(&desc->desc_node, - &first->txd.tx_list); + &first->tx_list); } prev = desc; total_len += len; @@ -729,7 +731,7 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, prev->lli.dscr = desc->txd.phys; /* insert the link descriptor to the LD ring */ list_add_tail(&desc->desc_node, - &first->txd.tx_list); + &first->tx_list); } prev = desc; total_len += len; diff --git a/drivers/dma/at_hdmac_regs.h b/drivers/dma/at_hdmac_regs.h index 4c972afc49e..495457e3dc4 100644 --- a/drivers/dma/at_hdmac_regs.h +++ b/drivers/dma/at_hdmac_regs.h @@ -165,6 +165,7 @@ struct at_desc { struct at_lli lli; /* THEN values for driver housekeeping */ + struct list_head tx_list; struct dma_async_tx_descriptor txd; struct list_head desc_node; size_t len; diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 5a87384ea4f..bd0b248de2c 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -608,6 +608,40 @@ void dmaengine_put(void) } EXPORT_SYMBOL(dmaengine_put); +static bool device_has_all_tx_types(struct dma_device *device) +{ + /* A device that satisfies this test has channels that will never cause + * an async_tx channel switch event as all possible operation types can + * be handled. + */ + #ifdef CONFIG_ASYNC_TX_DMA + if (!dma_has_cap(DMA_INTERRUPT, device->cap_mask)) + return false; + #endif + + #if defined(CONFIG_ASYNC_MEMCPY) || defined(CONFIG_ASYNC_MEMCPY_MODULE) + if (!dma_has_cap(DMA_MEMCPY, device->cap_mask)) + return false; + #endif + + #if defined(CONFIG_ASYNC_MEMSET) || defined(CONFIG_ASYNC_MEMSET_MODULE) + if (!dma_has_cap(DMA_MEMSET, device->cap_mask)) + return false; + #endif + + #if defined(CONFIG_ASYNC_XOR) || defined(CONFIG_ASYNC_XOR_MODULE) + if (!dma_has_cap(DMA_XOR, device->cap_mask)) + return false; + #endif + + #if defined(CONFIG_ASYNC_PQ) || defined(CONFIG_ASYNC_PQ_MODULE) + if (!dma_has_cap(DMA_PQ, device->cap_mask)) + return false; + #endif + + return true; +} + static int get_dma_id(struct dma_device *device) { int rc; @@ -644,8 +678,12 @@ int dma_async_device_register(struct dma_device *device) !device->device_prep_dma_memcpy); BUG_ON(dma_has_cap(DMA_XOR, device->cap_mask) && !device->device_prep_dma_xor); - BUG_ON(dma_has_cap(DMA_ZERO_SUM, device->cap_mask) && - !device->device_prep_dma_zero_sum); + BUG_ON(dma_has_cap(DMA_XOR_VAL, device->cap_mask) && + !device->device_prep_dma_xor_val); + BUG_ON(dma_has_cap(DMA_PQ, device->cap_mask) && + !device->device_prep_dma_pq); + BUG_ON(dma_has_cap(DMA_PQ_VAL, device->cap_mask) && + !device->device_prep_dma_pq_val); BUG_ON(dma_has_cap(DMA_MEMSET, device->cap_mask) && !device->device_prep_dma_memset); BUG_ON(dma_has_cap(DMA_INTERRUPT, device->cap_mask) && @@ -661,6 +699,12 @@ int dma_async_device_register(struct dma_device *device) BUG_ON(!device->device_issue_pending); BUG_ON(!device->dev); + /* note: this only matters in the + * CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH=y case + */ + if (device_has_all_tx_types(device)) + dma_cap_set(DMA_ASYNC_TX, device->cap_mask); + idr_ref = kmalloc(sizeof(*idr_ref), GFP_KERNEL); if (!idr_ref) return -ENOMEM; @@ -933,55 +977,29 @@ void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx, { tx->chan = chan; spin_lock_init(&tx->lock); - INIT_LIST_HEAD(&tx->tx_list); } EXPORT_SYMBOL(dma_async_tx_descriptor_init); /* dma_wait_for_async_tx - spin wait for a transaction to complete * @tx: in-flight transaction to wait on - * - * This routine assumes that tx was obtained from a call to async_memcpy, - * async_xor, async_memset, etc which ensures that tx is "in-flight" (prepped - * and submitted). Walking the parent chain is only meant to cover for DMA - * drivers that do not implement the DMA_INTERRUPT capability and may race with - * the driver's descriptor cleanup routine. */ enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx) { - enum dma_status status; - struct dma_async_tx_descriptor *iter; - struct dma_async_tx_descriptor *parent; + unsigned long dma_sync_wait_timeout = jiffies + msecs_to_jiffies(5000); if (!tx) return DMA_SUCCESS; - WARN_ONCE(tx->parent, "%s: speculatively walking dependency chain for" - " %s\n", __func__, dma_chan_name(tx->chan)); - - /* poll through the dependency chain, return when tx is complete */ - do { - iter = tx; - - /* find the root of the unsubmitted dependency chain */ - do { - parent = iter->parent; - if (!parent) - break; - else - iter = parent; - } while (parent); - - /* there is a small window for ->parent == NULL and - * ->cookie == -EBUSY - */ - while (iter->cookie == -EBUSY) - cpu_relax(); - - status = dma_sync_wait(iter->chan, iter->cookie); - } while (status == DMA_IN_PROGRESS || (iter != tx)); - - return status; + while (tx->cookie == -EBUSY) { + if (time_after_eq(jiffies, dma_sync_wait_timeout)) { + pr_err("%s timeout waiting for descriptor submission\n", + __func__); + return DMA_ERROR; + } + cpu_relax(); + } + return dma_sync_wait(tx->chan, tx->cookie); } EXPORT_SYMBOL_GPL(dma_wait_for_async_tx); diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c index d93017fc787..a32a4cf7b1e 100644 --- a/drivers/dma/dmatest.c +++ b/drivers/dma/dmatest.c @@ -48,6 +48,11 @@ module_param(xor_sources, uint, S_IRUGO); MODULE_PARM_DESC(xor_sources, "Number of xor source buffers (default: 3)"); +static unsigned int pq_sources = 3; +module_param(pq_sources, uint, S_IRUGO); +MODULE_PARM_DESC(pq_sources, + "Number of p+q source buffers (default: 3)"); + /* * Initialization patterns. All bytes in the source buffer has bit 7 * set, all bytes in the destination buffer has bit 7 cleared. @@ -232,6 +237,7 @@ static int dmatest_func(void *data) dma_cookie_t cookie; enum dma_status status; enum dma_ctrl_flags flags; + u8 pq_coefs[pq_sources]; int ret; int src_cnt; int dst_cnt; @@ -248,6 +254,11 @@ static int dmatest_func(void *data) else if (thread->type == DMA_XOR) { src_cnt = xor_sources | 1; /* force odd to ensure dst = src */ dst_cnt = 1; + } else if (thread->type == DMA_PQ) { + src_cnt = pq_sources | 1; /* force odd to ensure dst = src */ + dst_cnt = 2; + for (i = 0; i < pq_sources; i++) + pq_coefs[i] = 1; } else goto err_srcs; @@ -283,6 +294,7 @@ static int dmatest_func(void *data) dma_addr_t dma_dsts[dst_cnt]; struct completion cmp; unsigned long tmo = msecs_to_jiffies(3000); + u8 align = 0; total_tests++; @@ -290,6 +302,18 @@ static int dmatest_func(void *data) src_off = dmatest_random() % (test_buf_size - len + 1); dst_off = dmatest_random() % (test_buf_size - len + 1); + /* honor alignment restrictions */ + if (thread->type == DMA_MEMCPY) + align = dev->copy_align; + else if (thread->type == DMA_XOR) + align = dev->xor_align; + else if (thread->type == DMA_PQ) + align = dev->pq_align; + + len = (len >> align) << align; + src_off = (src_off >> align) << align; + dst_off = (dst_off >> align) << align; + dmatest_init_srcs(thread->srcs, src_off, len); dmatest_init_dsts(thread->dsts, dst_off, len); @@ -306,6 +330,7 @@ static int dmatest_func(void *data) DMA_BIDIRECTIONAL); } + if (thread->type == DMA_MEMCPY) tx = dev->device_prep_dma_memcpy(chan, dma_dsts[0] + dst_off, @@ -316,6 +341,15 @@ static int dmatest_func(void *data) dma_dsts[0] + dst_off, dma_srcs, xor_sources, len, flags); + else if (thread->type == DMA_PQ) { + dma_addr_t dma_pq[dst_cnt]; + + for (i = 0; i < dst_cnt; i++) + dma_pq[i] = dma_dsts[i] + dst_off; + tx = dev->device_prep_dma_pq(chan, dma_pq, dma_srcs, + pq_sources, pq_coefs, + len, flags); + } if (!tx) { for (i = 0; i < src_cnt; i++) @@ -459,6 +493,8 @@ static int dmatest_add_threads(struct dmatest_chan *dtc, enum dma_transaction_ty op = "copy"; else if (type == DMA_XOR) op = "xor"; + else if (type == DMA_PQ) + op = "pq"; else return -EINVAL; @@ -514,6 +550,10 @@ static int dmatest_add_channel(struct dma_chan *chan) cnt = dmatest_add_threads(dtc, DMA_XOR); thread_count += cnt > 0 ? cnt : 0; } + if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) { + cnt = dmatest_add_threads(dtc, DMA_PQ); + thread_count += cnt > 0 ?: 0; + } pr_info("dmatest: Started %u threads using %s\n", thread_count, dma_chan_name(chan)); diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c index 933c143b6a7..2eea823516a 100644 --- a/drivers/dma/dw_dmac.c +++ b/drivers/dma/dw_dmac.c @@ -116,7 +116,7 @@ static void dwc_sync_desc_for_cpu(struct dw_dma_chan *dwc, struct dw_desc *desc) { struct dw_desc *child; - list_for_each_entry(child, &desc->txd.tx_list, desc_node) + list_for_each_entry(child, &desc->tx_list, desc_node) dma_sync_single_for_cpu(chan2parent(&dwc->chan), child->txd.phys, sizeof(child->lli), DMA_TO_DEVICE); @@ -137,11 +137,11 @@ static void dwc_desc_put(struct dw_dma_chan *dwc, struct dw_desc *desc) dwc_sync_desc_for_cpu(dwc, desc); spin_lock_bh(&dwc->lock); - list_for_each_entry(child, &desc->txd.tx_list, desc_node) + list_for_each_entry(child, &desc->tx_list, desc_node) dev_vdbg(chan2dev(&dwc->chan), "moving child desc %p to freelist\n", child); - list_splice_init(&desc->txd.tx_list, &dwc->free_list); + list_splice_init(&desc->tx_list, &dwc->free_list); dev_vdbg(chan2dev(&dwc->chan), "moving desc %p to freelist\n", desc); list_add(&desc->desc_node, &dwc->free_list); spin_unlock_bh(&dwc->lock); @@ -209,19 +209,28 @@ dwc_descriptor_complete(struct dw_dma_chan *dwc, struct dw_desc *desc) param = txd->callback_param; dwc_sync_desc_for_cpu(dwc, desc); - list_splice_init(&txd->tx_list, &dwc->free_list); + list_splice_init(&desc->tx_list, &dwc->free_list); list_move(&desc->desc_node, &dwc->free_list); - /* - * We use dma_unmap_page() regardless of how the buffers were - * mapped before they were submitted... - */ - if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) - dma_unmap_page(chan2parent(&dwc->chan), desc->lli.dar, - desc->len, DMA_FROM_DEVICE); - if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) - dma_unmap_page(chan2parent(&dwc->chan), desc->lli.sar, - desc->len, DMA_TO_DEVICE); + if (!dwc->chan.private) { + struct device *parent = chan2parent(&dwc->chan); + if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) { + if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE) + dma_unmap_single(parent, desc->lli.dar, + desc->len, DMA_FROM_DEVICE); + else + dma_unmap_page(parent, desc->lli.dar, + desc->len, DMA_FROM_DEVICE); + } + if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) { + if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE) + dma_unmap_single(parent, desc->lli.sar, + desc->len, DMA_TO_DEVICE); + else + dma_unmap_page(parent, desc->lli.sar, + desc->len, DMA_TO_DEVICE); + } + } /* * The API requires that no submissions are done from a @@ -289,7 +298,7 @@ static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc) /* This one is currently in progress */ return; - list_for_each_entry(child, &desc->txd.tx_list, desc_node) + list_for_each_entry(child, &desc->tx_list, desc_node) if (child->lli.llp == llp) /* Currently in progress */ return; @@ -356,7 +365,7 @@ static void dwc_handle_error(struct dw_dma *dw, struct dw_dma_chan *dwc) dev_printk(KERN_CRIT, chan2dev(&dwc->chan), " cookie: %d\n", bad_desc->txd.cookie); dwc_dump_lli(dwc, &bad_desc->lli); - list_for_each_entry(child, &bad_desc->txd.tx_list, desc_node) + list_for_each_entry(child, &bad_desc->tx_list, desc_node) dwc_dump_lli(dwc, &child->lli); /* Pretend the descriptor completed successfully */ @@ -608,7 +617,7 @@ dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, prev->txd.phys, sizeof(prev->lli), DMA_TO_DEVICE); list_add_tail(&desc->desc_node, - &first->txd.tx_list); + &first->tx_list); } prev = desc; } @@ -658,8 +667,6 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, reg_width = dws->reg_width; prev = first = NULL; - sg_len = dma_map_sg(chan2parent(chan), sgl, sg_len, direction); - switch (direction) { case DMA_TO_DEVICE: ctllo = (DWC_DEFAULT_CTLLO @@ -700,7 +707,7 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, sizeof(prev->lli), DMA_TO_DEVICE); list_add_tail(&desc->desc_node, - &first->txd.tx_list); + &first->tx_list); } prev = desc; total_len += len; @@ -746,7 +753,7 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, sizeof(prev->lli), DMA_TO_DEVICE); list_add_tail(&desc->desc_node, - &first->txd.tx_list); + &first->tx_list); } prev = desc; total_len += len; @@ -902,6 +909,7 @@ static int dwc_alloc_chan_resources(struct dma_chan *chan) break; } + INIT_LIST_HEAD(&desc->tx_list); dma_async_tx_descriptor_init(&desc->txd, chan); desc->txd.tx_submit = dwc_tx_submit; desc->txd.flags = DMA_CTRL_ACK; diff --git a/drivers/dma/dw_dmac_regs.h b/drivers/dma/dw_dmac_regs.h index 13a58076703..d9a939f67f4 100644 --- a/drivers/dma/dw_dmac_regs.h +++ b/drivers/dma/dw_dmac_regs.h @@ -217,6 +217,7 @@ struct dw_desc { /* THEN values for driver housekeeping */ struct list_head desc_node; + struct list_head tx_list; struct dma_async_tx_descriptor txd; size_t len; }; diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c index ef87a898414..296f9e747fa 100644 --- a/drivers/dma/fsldma.c +++ b/drivers/dma/fsldma.c @@ -34,6 +34,7 @@ #include <linux/dmapool.h> #include <linux/of_platform.h> +#include <asm/fsldma.h> #include "fsldma.h" static void dma_init(struct fsl_dma_chan *fsl_chan) @@ -280,28 +281,40 @@ static void fsl_chan_set_dest_loop_size(struct fsl_dma_chan *fsl_chan, int size) } /** - * fsl_chan_toggle_ext_pause - Toggle channel external pause status + * fsl_chan_set_request_count - Set DMA Request Count for external control * @fsl_chan : Freescale DMA channel - * @size : Pause control size, 0 for disable external pause control. - * The maximum is 1024. + * @size : Number of bytes to transfer in a single request + * + * The Freescale DMA channel can be controlled by the external signal DREQ#. + * The DMA request count is how many bytes are allowed to transfer before + * pausing the channel, after which a new assertion of DREQ# resumes channel + * operation. * - * The Freescale DMA channel can be controlled by the external - * signal DREQ#. The pause control size is how many bytes are allowed - * to transfer before pausing the channel, after which a new assertion - * of DREQ# resumes channel operation. + * A size of 0 disables external pause control. The maximum size is 1024. */ -static void fsl_chan_toggle_ext_pause(struct fsl_dma_chan *fsl_chan, int size) +static void fsl_chan_set_request_count(struct fsl_dma_chan *fsl_chan, int size) { - if (size > 1024) - return; + BUG_ON(size > 1024); + DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr, + DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32) + | ((__ilog2(size) << 24) & 0x0f000000), + 32); +} - if (size) { - DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr, - DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32) - | ((__ilog2(size) << 24) & 0x0f000000), - 32); +/** + * fsl_chan_toggle_ext_pause - Toggle channel external pause status + * @fsl_chan : Freescale DMA channel + * @enable : 0 is disabled, 1 is enabled. + * + * The Freescale DMA channel can be controlled by the external signal DREQ#. + * The DMA Request Count feature should be used in addition to this feature + * to set the number of bytes to transfer before pausing the channel. + */ +static void fsl_chan_toggle_ext_pause(struct fsl_dma_chan *fsl_chan, int enable) +{ + if (enable) fsl_chan->feature |= FSL_DMA_CHAN_PAUSE_EXT; - } else + else fsl_chan->feature &= ~FSL_DMA_CHAN_PAUSE_EXT; } @@ -326,7 +339,8 @@ static void fsl_chan_toggle_ext_start(struct fsl_dma_chan *fsl_chan, int enable) static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx) { struct fsl_dma_chan *fsl_chan = to_fsl_chan(tx->chan); - struct fsl_desc_sw *desc; + struct fsl_desc_sw *desc = tx_to_fsl_desc(tx); + struct fsl_desc_sw *child; unsigned long flags; dma_cookie_t cookie; @@ -334,7 +348,7 @@ static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx) spin_lock_irqsave(&fsl_chan->desc_lock, flags); cookie = fsl_chan->common.cookie; - list_for_each_entry(desc, &tx->tx_list, node) { + list_for_each_entry(child, &desc->tx_list, node) { cookie++; if (cookie < 0) cookie = 1; @@ -343,8 +357,8 @@ static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx) } fsl_chan->common.cookie = cookie; - append_ld_queue(fsl_chan, tx_to_fsl_desc(tx)); - list_splice_init(&tx->tx_list, fsl_chan->ld_queue.prev); + append_ld_queue(fsl_chan, desc); + list_splice_init(&desc->tx_list, fsl_chan->ld_queue.prev); spin_unlock_irqrestore(&fsl_chan->desc_lock, flags); @@ -366,6 +380,7 @@ static struct fsl_desc_sw *fsl_dma_alloc_descriptor( desc_sw = dma_pool_alloc(fsl_chan->desc_pool, GFP_ATOMIC, &pdesc); if (desc_sw) { memset(desc_sw, 0, sizeof(struct fsl_desc_sw)); + INIT_LIST_HEAD(&desc_sw->tx_list); dma_async_tx_descriptor_init(&desc_sw->async_tx, &fsl_chan->common); desc_sw->async_tx.tx_submit = fsl_dma_tx_submit; @@ -455,7 +470,7 @@ fsl_dma_prep_interrupt(struct dma_chan *chan, unsigned long flags) new->async_tx.flags = flags; /* Insert the link descriptor to the LD ring */ - list_add_tail(&new->node, &new->async_tx.tx_list); + list_add_tail(&new->node, &new->tx_list); /* Set End-of-link to the last link descriptor of new list*/ set_ld_eol(fsl_chan, new); @@ -513,7 +528,7 @@ static struct dma_async_tx_descriptor *fsl_dma_prep_memcpy( dma_dest += copy; /* Insert the link descriptor to the LD ring */ - list_add_tail(&new->node, &first->async_tx.tx_list); + list_add_tail(&new->node, &first->tx_list); } while (len); new->async_tx.flags = flags; /* client is in control of this ack */ @@ -528,7 +543,7 @@ fail: if (!first) return NULL; - list = &first->async_tx.tx_list; + list = &first->tx_list; list_for_each_entry_safe_reverse(new, prev, list, node) { list_del(&new->node); dma_pool_free(fsl_chan->desc_pool, new, new->async_tx.phys); @@ -538,6 +553,229 @@ fail: } /** + * fsl_dma_prep_slave_sg - prepare descriptors for a DMA_SLAVE transaction + * @chan: DMA channel + * @sgl: scatterlist to transfer to/from + * @sg_len: number of entries in @scatterlist + * @direction: DMA direction + * @flags: DMAEngine flags + * + * Prepare a set of descriptors for a DMA_SLAVE transaction. Following the + * DMA_SLAVE API, this gets the device-specific information from the + * chan->private variable. + */ +static struct dma_async_tx_descriptor *fsl_dma_prep_slave_sg( + struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len, + enum dma_data_direction direction, unsigned long flags) +{ + struct fsl_dma_chan *fsl_chan; + struct fsl_desc_sw *first = NULL, *prev = NULL, *new = NULL; + struct fsl_dma_slave *slave; + struct list_head *tx_list; + size_t copy; + + int i; + struct scatterlist *sg; + size_t sg_used; + size_t hw_used; + struct fsl_dma_hw_addr *hw; + dma_addr_t dma_dst, dma_src; + + if (!chan) + return NULL; + + if (!chan->private) + return NULL; + + fsl_chan = to_fsl_chan(chan); + slave = chan->private; + + if (list_empty(&slave->addresses)) + return NULL; + + hw = list_first_entry(&slave->addresses, struct fsl_dma_hw_addr, entry); + hw_used = 0; + + /* + * Build the hardware transaction to copy from the scatterlist to + * the hardware, or from the hardware to the scatterlist + * + * If you are copying from the hardware to the scatterlist and it + * takes two hardware entries to fill an entire page, then both + * hardware entries will be coalesced into the same page + * + * If you are copying from the scatterlist to the hardware and a + * single page can fill two hardware entries, then the data will + * be read out of the page into the first hardware entry, and so on + */ + for_each_sg(sgl, sg, sg_len, i) { + sg_used = 0; + + /* Loop until the entire scatterlist entry is used */ + while (sg_used < sg_dma_len(sg)) { + + /* + * If we've used up the current hardware address/length + * pair, we need to load a new one + * + * This is done in a while loop so that descriptors with + * length == 0 will be skipped + */ + while (hw_used >= hw->length) { + + /* + * If the current hardware entry is the last + * entry in the list, we're finished + */ + if (list_is_last(&hw->entry, &slave->addresses)) + goto finished; + + /* Get the next hardware address/length pair */ + hw = list_entry(hw->entry.next, + struct fsl_dma_hw_addr, entry); + hw_used = 0; + } + + /* Allocate the link descriptor from DMA pool */ + new = fsl_dma_alloc_descriptor(fsl_chan); + if (!new) { + dev_err(fsl_chan->dev, "No free memory for " + "link descriptor\n"); + goto fail; + } +#ifdef FSL_DMA_LD_DEBUG + dev_dbg(fsl_chan->dev, "new link desc alloc %p\n", new); +#endif + + /* + * Calculate the maximum number of bytes to transfer, + * making sure it is less than the DMA controller limit + */ + copy = min_t(size_t, sg_dma_len(sg) - sg_used, + hw->length - hw_used); + copy = min_t(size_t, copy, FSL_DMA_BCR_MAX_CNT); + + /* + * DMA_FROM_DEVICE + * from the hardware to the scatterlist + * + * DMA_TO_DEVICE + * from the scatterlist to the hardware + */ + if (direction == DMA_FROM_DEVICE) { + dma_src = hw->address + hw_used; + dma_dst = sg_dma_address(sg) + sg_used; + } else { + dma_src = sg_dma_address(sg) + sg_used; + dma_dst = hw->address + hw_used; + } + + /* Fill in the descriptor */ + set_desc_cnt(fsl_chan, &new->hw, copy); + set_desc_src(fsl_chan, &new->hw, dma_src); + set_desc_dest(fsl_chan, &new->hw, dma_dst); + + /* + * If this is not the first descriptor, chain the + * current descriptor after the previous descriptor + */ + if (!first) { + first = new; + } else { + set_desc_next(fsl_chan, &prev->hw, + new->async_tx.phys); + } + + new->async_tx.cookie = 0; + async_tx_ack(&new->async_tx); + + prev = new; + sg_used += copy; + hw_used += copy; + + /* Insert the link descriptor into the LD ring */ + list_add_tail(&new->node, &first->tx_list); + } + } + +finished: + + /* All of the hardware address/length pairs had length == 0 */ + if (!first || !new) + return NULL; + + new->async_tx.flags = flags; + new->async_tx.cookie = -EBUSY; + + /* Set End-of-link to the last link descriptor of new list */ + set_ld_eol(fsl_chan, new); + + /* Enable extra controller features */ + if (fsl_chan->set_src_loop_size) + fsl_chan->set_src_loop_size(fsl_chan, slave->src_loop_size); + + if (fsl_chan->set_dest_loop_size) + fsl_chan->set_dest_loop_size(fsl_chan, slave->dst_loop_size); + + if (fsl_chan->toggle_ext_start) + fsl_chan->toggle_ext_start(fsl_chan, slave->external_start); + + if (fsl_chan->toggle_ext_pause) + fsl_chan->toggle_ext_pause(fsl_chan, slave->external_pause); + + if (fsl_chan->set_request_count) + fsl_chan->set_request_count(fsl_chan, slave->request_count); + + return &first->async_tx; + +fail: + /* If first was not set, then we failed to allocate the very first + * descriptor, and we're done */ + if (!first) + return NULL; + + /* + * First is set, so all of the descriptors we allocated have been added + * to first->tx_list, INCLUDING "first" itself. Therefore we + * must traverse the list backwards freeing each descriptor in turn + * + * We're re-using variables for the loop, oh well + */ + tx_list = &first->tx_list; + list_for_each_entry_safe_reverse(new, prev, tx_list, node) { + list_del_init(&new->node); + dma_pool_free(fsl_chan->desc_pool, new, new->async_tx.phys); + } + + return NULL; +} + +static void fsl_dma_device_terminate_all(struct dma_chan *chan) +{ + struct fsl_dma_chan *fsl_chan; + struct fsl_desc_sw *desc, *tmp; + unsigned long flags; + + if (!chan) + return; + + fsl_chan = to_fsl_chan(chan); + + /* Halt the DMA engine */ + dma_halt(fsl_chan); + + spin_lock_irqsave(&fsl_chan->desc_lock, flags); + + /* Remove and free all of the descriptors in the LD queue */ + list_for_each_entry_safe(desc, tmp, &fsl_chan->ld_queue, node) { + list_del(&desc->node); + dma_pool_free(fsl_chan->desc_pool, desc, desc->async_tx.phys); + } + + spin_unlock_irqrestore(&fsl_chan->desc_lock, flags); +} + +/** * fsl_dma_update_completed_cookie - Update the completed cookie. * @fsl_chan : Freescale DMA channel */ @@ -883,6 +1121,7 @@ static int __devinit fsl_dma_chan_probe(struct fsl_dma_device *fdev, new_fsl_chan->toggle_ext_start = fsl_chan_toggle_ext_start; new_fsl_chan->set_src_loop_size = fsl_chan_set_src_loop_size; new_fsl_chan->set_dest_loop_size = fsl_chan_set_dest_loop_size; + new_fsl_chan->set_request_count = fsl_chan_set_request_count; } spin_lock_init(&new_fsl_chan->desc_lock); @@ -962,12 +1201,15 @@ static int __devinit of_fsl_dma_probe(struct of_device *dev, dma_cap_set(DMA_MEMCPY, fdev->common.cap_mask); dma_cap_set(DMA_INTERRUPT, fdev->common.cap_mask); + dma_cap_set(DMA_SLAVE, fdev->common.cap_mask); fdev->common.device_alloc_chan_resources = fsl_dma_alloc_chan_resources; fdev->common.device_free_chan_resources = fsl_dma_free_chan_resources; fdev->common.device_prep_dma_interrupt = fsl_dma_prep_interrupt; fdev->common.device_prep_dma_memcpy = fsl_dma_prep_memcpy; fdev->common.device_is_tx_complete = fsl_dma_is_complete; fdev->common.device_issue_pending = fsl_dma_memcpy_issue_pending; + fdev->common.device_prep_slave_sg = fsl_dma_prep_slave_sg; + fdev->common.device_terminate_all = fsl_dma_device_terminate_all; fdev->common.dev = &dev->dev; fdev->irq = irq_of_parse_and_map(dev->node, 0); diff --git a/drivers/dma/fsldma.h b/drivers/dma/fsldma.h index dc7f2686579..0df14cbb8ca 100644 --- a/drivers/dma/fsldma.h +++ b/drivers/dma/fsldma.h @@ -90,6 +90,7 @@ struct fsl_dma_ld_hw { struct fsl_desc_sw { struct fsl_dma_ld_hw hw; struct list_head node; + struct list_head tx_list; struct dma_async_tx_descriptor async_tx; struct list_head *ld; void *priv; @@ -143,10 +144,11 @@ struct fsl_dma_chan { struct tasklet_struct tasklet; u32 feature; - void (*toggle_ext_pause)(struct fsl_dma_chan *fsl_chan, int size); + void (*toggle_ext_pause)(struct fsl_dma_chan *fsl_chan, int enable); void (*toggle_ext_start)(struct fsl_dma_chan *fsl_chan, int enable); void (*set_src_loop_size)(struct fsl_dma_chan *fsl_chan, int size); void (*set_dest_loop_size)(struct fsl_dma_chan *fsl_chan, int size); + void (*set_request_count)(struct fsl_dma_chan *fsl_chan, int size); }; #define to_fsl_chan(chan) container_of(chan, struct fsl_dma_chan, common) diff --git a/drivers/dma/ioat.c b/drivers/dma/ioat.c deleted file mode 100644 index 2225bb6ba3d..00000000000 --- a/drivers/dma/ioat.c +++ /dev/null @@ -1,202 +0,0 @@ -/* - * Intel I/OAT DMA Linux driver - * Copyright(c) 2007 - 2009 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - */ - -/* - * This driver supports an Intel I/OAT DMA engine, which does asynchronous - * copy operations. - */ - -#include <linux/init.h> -#include <linux/module.h> -#include <linux/pci.h> -#include <linux/interrupt.h> -#include <linux/dca.h> -#include "ioatdma.h" -#include "ioatdma_registers.h" -#include "ioatdma_hw.h" - -MODULE_VERSION(IOAT_DMA_VERSION); -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Intel Corporation"); - -static struct pci_device_id ioat_pci_tbl[] = { - /* I/OAT v1 platforms */ - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SCNB) }, - { PCI_DEVICE(PCI_VENDOR_ID_UNISYS, PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR) }, - - /* I/OAT v2 platforms */ - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB) }, - - /* I/OAT v3 platforms */ - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) }, - { 0, } -}; - -struct ioat_device { - struct pci_dev *pdev; - void __iomem *iobase; - struct ioatdma_device *dma; - struct dca_provider *dca; -}; - -static int __devinit ioat_probe(struct pci_dev *pdev, - const struct pci_device_id *id); -static void __devexit ioat_remove(struct pci_dev *pdev); - -static int ioat_dca_enabled = 1; -module_param(ioat_dca_enabled, int, 0644); -MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)"); - -static struct pci_driver ioat_pci_driver = { - .name = "ioatdma", - .id_table = ioat_pci_tbl, - .probe = ioat_probe, - .remove = __devexit_p(ioat_remove), -}; - -static int __devinit ioat_probe(struct pci_dev *pdev, - const struct pci_device_id *id) -{ - void __iomem *iobase; - struct ioat_device *device; - unsigned long mmio_start, mmio_len; - int err; - - err = pci_enable_device(pdev); - if (err) - goto err_enable_device; - - err = pci_request_regions(pdev, ioat_pci_driver.name); - if (err) - goto err_request_regions; - - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); - if (err) - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); - if (err) - goto err_set_dma_mask; - - err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); - if (err) - err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); - if (err) - goto err_set_dma_mask; - - mmio_start = pci_resource_start(pdev, 0); - mmio_len = pci_resource_len(pdev, 0); - iobase = ioremap(mmio_start, mmio_len); - if (!iobase) { - err = -ENOMEM; - goto err_ioremap; - } - - device = kzalloc(sizeof(*device), GFP_KERNEL); - if (!device) { - err = -ENOMEM; - goto err_kzalloc; - } - device->pdev = pdev; - pci_set_drvdata(pdev, device); - device->iobase = iobase; - - pci_set_master(pdev); - - switch (readb(iobase + IOAT_VER_OFFSET)) { - case IOAT_VER_1_2: - device->dma = ioat_dma_probe(pdev, iobase); - if (device->dma && ioat_dca_enabled) - device->dca = ioat_dca_init(pdev, iobase); - break; - case IOAT_VER_2_0: - device->dma = ioat_dma_probe(pdev, iobase); - if (device->dma && ioat_dca_enabled) - device->dca = ioat2_dca_init(pdev, iobase); - break; - case IOAT_VER_3_0: - device->dma = ioat_dma_probe(pdev, iobase); - if (device->dma && ioat_dca_enabled) - device->dca = ioat3_dca_init(pdev, iobase); - break; - default: - err = -ENODEV; - break; - } - if (!device->dma) - err = -ENODEV; - - if (err) - goto err_version; - - return 0; - -err_version: - kfree(device); -err_kzalloc: - iounmap(iobase); -err_ioremap: -err_set_dma_mask: - pci_release_regions(pdev); - pci_disable_device(pdev); -err_request_regions: -err_enable_device: - return err; -} - -static void __devexit ioat_remove(struct pci_dev *pdev) -{ - struct ioat_device *device = pci_get_drvdata(pdev); - - dev_err(&pdev->dev, "Removing dma and dca services\n"); - if (device->dca) { - unregister_dca_provider(device->dca); - free_dca_provider(device->dca); - device->dca = NULL; - } - - if (device->dma) { - ioat_dma_remove(device->dma); - device->dma = NULL; - } - - kfree(device); -} - -static int __init ioat_init_module(void) -{ - return pci_register_driver(&ioat_pci_driver); -} -module_init(ioat_init_module); - -static void __exit ioat_exit_module(void) -{ - pci_unregister_driver(&ioat_pci_driver); -} -module_exit(ioat_exit_module); diff --git a/drivers/dma/ioat/Makefile b/drivers/dma/ioat/Makefile new file mode 100644 index 00000000000..8997d3fb905 --- /dev/null +++ b/drivers/dma/ioat/Makefile @@ -0,0 +1,2 @@ +obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o +ioatdma-objs := pci.o dma.o dma_v2.o dma_v3.o dca.o diff --git a/drivers/dma/ioat_dca.c b/drivers/dma/ioat/dca.c index c012a1e1504..69d02615c4d 100644 --- a/drivers/dma/ioat_dca.c +++ b/drivers/dma/ioat/dca.c @@ -33,8 +33,8 @@ #define cpu_physical_id(cpu) (cpuid_ebx(1) >> 24) #endif -#include "ioatdma.h" -#include "ioatdma_registers.h" +#include "dma.h" +#include "registers.h" /* * Bit 7 of a tag map entry is the "valid" bit, if it is set then bits 0:6 @@ -242,7 +242,8 @@ static struct dca_ops ioat_dca_ops = { }; -struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase) +struct dca_provider * __devinit +ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase) { struct dca_provider *dca; struct ioat_dca_priv *ioatdca; @@ -407,7 +408,8 @@ static int ioat2_dca_count_dca_slots(void __iomem *iobase, u16 dca_offset) return slots; } -struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase) +struct dca_provider * __devinit +ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase) { struct dca_provider *dca; struct ioat_dca_priv *ioatdca; @@ -602,7 +604,8 @@ static int ioat3_dca_count_dca_slots(void *iobase, u16 dca_offset) return slots; } -struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase) +struct dca_provider * __devinit +ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase) { struct dca_provider *dca; struct ioat_dca_priv *ioatdca; diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c new file mode 100644 index 00000000000..c524d36d3c2 --- /dev/null +++ b/drivers/dma/ioat/dma.c @@ -0,0 +1,1238 @@ +/* + * Intel I/OAT DMA Linux driver + * Copyright(c) 2004 - 2009 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + */ + +/* + * This driver supports an Intel I/OAT DMA engine, which does asynchronous + * copy operations. + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/interrupt.h> +#include <linux/dmaengine.h> +#include <linux/delay.h> +#include <linux/dma-mapping.h> +#include <linux/workqueue.h> +#include <linux/i7300_idle.h> +#include "dma.h" +#include "registers.h" +#include "hw.h" + +int ioat_pending_level = 4; +module_param(ioat_pending_level, int, 0644); +MODULE_PARM_DESC(ioat_pending_level, + "high-water mark for pushing ioat descriptors (default: 4)"); + +/* internal functions */ +static void ioat1_cleanup(struct ioat_dma_chan *ioat); +static void ioat1_dma_start_null_desc(struct ioat_dma_chan *ioat); + +/** + * ioat_dma_do_interrupt - handler used for single vector interrupt mode + * @irq: interrupt id + * @data: interrupt data + */ +static irqreturn_t ioat_dma_do_interrupt(int irq, void *data) +{ + struct ioatdma_device *instance = data; + struct ioat_chan_common *chan; + unsigned long attnstatus; + int bit; + u8 intrctrl; + + intrctrl = readb(instance->reg_base + IOAT_INTRCTRL_OFFSET); + + if (!(intrctrl & IOAT_INTRCTRL_MASTER_INT_EN)) + return IRQ_NONE; + + if (!(intrctrl & IOAT_INTRCTRL_INT_STATUS)) { + writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET); + return IRQ_NONE; + } + + attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET); + for_each_bit(bit, &attnstatus, BITS_PER_LONG) { + chan = ioat_chan_by_index(instance, bit); + tasklet_schedule(&chan->cleanup_task); + } + + writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET); + return IRQ_HANDLED; +} + +/** + * ioat_dma_do_interrupt_msix - handler used for vector-per-channel interrupt mode + * @irq: interrupt id + * @data: interrupt data + */ +static irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data) +{ + struct ioat_chan_common *chan = data; + + tasklet_schedule(&chan->cleanup_task); + + return IRQ_HANDLED; +} + +static void ioat1_cleanup_tasklet(unsigned long data); + +/* common channel initialization */ +void ioat_init_channel(struct ioatdma_device *device, + struct ioat_chan_common *chan, int idx, + void (*timer_fn)(unsigned long), + void (*tasklet)(unsigned long), + unsigned long ioat) +{ + struct dma_device *dma = &device->common; + + chan->device = device; + chan->reg_base = device->reg_base + (0x80 * (idx + 1)); + spin_lock_init(&chan->cleanup_lock); + chan->common.device = dma; + list_add_tail(&chan->common.device_node, &dma->channels); + device->idx[idx] = chan; + init_timer(&chan->timer); + chan->timer.function = timer_fn; + chan->timer.data = ioat; + tasklet_init(&chan->cleanup_task, tasklet, ioat); + tasklet_disable(&chan->cleanup_task); +} + +static void ioat1_timer_event(unsigned long data); + +/** + * ioat1_dma_enumerate_channels - find and initialize the device's channels + * @device: the device to be enumerated + */ +static int ioat1_enumerate_channels(struct ioatdma_device *device) +{ + u8 xfercap_scale; + u32 xfercap; + int i; + struct ioat_dma_chan *ioat; + struct device *dev = &device->pdev->dev; + struct dma_device *dma = &device->common; + + INIT_LIST_HEAD(&dma->channels); + dma->chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET); + dma->chancnt &= 0x1f; /* bits [4:0] valid */ + if (dma->chancnt > ARRAY_SIZE(device->idx)) { + dev_warn(dev, "(%d) exceeds max supported channels (%zu)\n", + dma->chancnt, ARRAY_SIZE(device->idx)); + dma->chancnt = ARRAY_SIZE(device->idx); + } + xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET); + xfercap_scale &= 0x1f; /* bits [4:0] valid */ + xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale)); + dev_dbg(dev, "%s: xfercap = %d\n", __func__, xfercap); + +#ifdef CONFIG_I7300_IDLE_IOAT_CHANNEL + if (i7300_idle_platform_probe(NULL, NULL, 1) == 0) + dma->chancnt--; +#endif + for (i = 0; i < dma->chancnt; i++) { + ioat = devm_kzalloc(dev, sizeof(*ioat), GFP_KERNEL); + if (!ioat) + break; + + ioat_init_channel(device, &ioat->base, i, + ioat1_timer_event, + ioat1_cleanup_tasklet, + (unsigned long) ioat); + ioat->xfercap = xfercap; + spin_lock_init(&ioat->desc_lock); + INIT_LIST_HEAD(&ioat->free_desc); + INIT_LIST_HEAD(&ioat->used_desc); + } + dma->chancnt = i; + return i; +} + +/** + * ioat_dma_memcpy_issue_pending - push potentially unrecognized appended + * descriptors to hw + * @chan: DMA channel handle + */ +static inline void +__ioat1_dma_memcpy_issue_pending(struct ioat_dma_chan *ioat) +{ + void __iomem *reg_base = ioat->base.reg_base; + + dev_dbg(to_dev(&ioat->base), "%s: pending: %d\n", + __func__, ioat->pending); + ioat->pending = 0; + writeb(IOAT_CHANCMD_APPEND, reg_base + IOAT1_CHANCMD_OFFSET); +} + +static void ioat1_dma_memcpy_issue_pending(struct dma_chan *chan) +{ + struct ioat_dma_chan *ioat = to_ioat_chan(chan); + + if (ioat->pending > 0) { + spin_lock_bh(&ioat->desc_lock); + __ioat1_dma_memcpy_issue_pending(ioat); + spin_unlock_bh(&ioat->desc_lock); + } +} + +/** + * ioat1_reset_channel - restart a channel + * @ioat: IOAT DMA channel handle + */ +static void ioat1_reset_channel(struct ioat_dma_chan *ioat) +{ + struct ioat_chan_common *chan = &ioat->base; + void __iomem *reg_base = chan->reg_base; + u32 chansts, chanerr; + + dev_warn(to_dev(chan), "reset\n"); + chanerr = readl(reg_base + IOAT_CHANERR_OFFSET); + chansts = *chan->completion & IOAT_CHANSTS_STATUS; + if (chanerr) { + dev_err(to_dev(chan), + "chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n", + chan_num(chan), chansts, chanerr); + writel(chanerr, reg_base + IOAT_CHANERR_OFFSET); + } + + /* + * whack it upside the head with a reset + * and wait for things to settle out. + * force the pending count to a really big negative + * to make sure no one forces an issue_pending + * while we're waiting. + */ + + ioat->pending = INT_MIN; + writeb(IOAT_CHANCMD_RESET, + reg_base + IOAT_CHANCMD_OFFSET(chan->device->version)); + set_bit(IOAT_RESET_PENDING, &chan->state); + mod_timer(&chan->timer, jiffies + RESET_DELAY); +} + +static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct dma_chan *c = tx->chan; + struct ioat_dma_chan *ioat = to_ioat_chan(c); + struct ioat_desc_sw *desc = tx_to_ioat_desc(tx); + struct ioat_chan_common *chan = &ioat->base; + struct ioat_desc_sw *first; + struct ioat_desc_sw *chain_tail; + dma_cookie_t cookie; + + spin_lock_bh(&ioat->desc_lock); + /* cookie incr and addition to used_list must be atomic */ + cookie = c->cookie; + cookie++; + if (cookie < 0) + cookie = 1; + c->cookie = cookie; + tx->cookie = cookie; + dev_dbg(to_dev(&ioat->base), "%s: cookie: %d\n", __func__, cookie); + + /* write address into NextDescriptor field of last desc in chain */ + first = to_ioat_desc(desc->tx_list.next); + chain_tail = to_ioat_desc(ioat->used_desc.prev); + /* make descriptor updates globally visible before chaining */ + wmb(); + chain_tail->hw->next = first->txd.phys; + list_splice_tail_init(&desc->tx_list, &ioat->used_desc); + dump_desc_dbg(ioat, chain_tail); + dump_desc_dbg(ioat, first); + + if (!test_and_set_bit(IOAT_COMPLETION_PENDING, &chan->state)) + mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + + ioat->active += desc->hw->tx_cnt; + ioat->pending += desc->hw->tx_cnt; + if (ioat->pending >= ioat_pending_level) + __ioat1_dma_memcpy_issue_pending(ioat); + spin_unlock_bh(&ioat->desc_lock); + + return cookie; +} + +/** + * ioat_dma_alloc_descriptor - allocate and return a sw and hw descriptor pair + * @ioat: the channel supplying the memory pool for the descriptors + * @flags: allocation flags + */ +static struct ioat_desc_sw * +ioat_dma_alloc_descriptor(struct ioat_dma_chan *ioat, gfp_t flags) +{ + struct ioat_dma_descriptor *desc; + struct ioat_desc_sw *desc_sw; + struct ioatdma_device *ioatdma_device; + dma_addr_t phys; + + ioatdma_device = ioat->base.device; + desc = pci_pool_alloc(ioatdma_device->dma_pool, flags, &phys); + if (unlikely(!desc)) + return NULL; + + desc_sw = kzalloc(sizeof(*desc_sw), flags); + if (unlikely(!desc_sw)) { + pci_pool_free(ioatdma_device->dma_pool, desc, phys); + return NULL; + } + + memset(desc, 0, sizeof(*desc)); + + INIT_LIST_HEAD(&desc_sw->tx_list); + dma_async_tx_descriptor_init(&desc_sw->txd, &ioat->base.common); + desc_sw->txd.tx_submit = ioat1_tx_submit; + desc_sw->hw = desc; + desc_sw->txd.phys = phys; + set_desc_id(desc_sw, -1); + + return desc_sw; +} + +static int ioat_initial_desc_count = 256; +module_param(ioat_initial_desc_count, int, 0644); +MODULE_PARM_DESC(ioat_initial_desc_count, + "ioat1: initial descriptors per channel (default: 256)"); +/** + * ioat1_dma_alloc_chan_resources - returns the number of allocated descriptors + * @chan: the channel to be filled out + */ +static int ioat1_dma_alloc_chan_resources(struct dma_chan *c) +{ + struct ioat_dma_chan *ioat = to_ioat_chan(c); + struct ioat_chan_common *chan = &ioat->base; + struct ioat_desc_sw *desc; + u32 chanerr; + int i; + LIST_HEAD(tmp_list); + + /* have we already been set up? */ + if (!list_empty(&ioat->free_desc)) + return ioat->desccount; + + /* Setup register to interrupt and write completion status on error */ + writew(IOAT_CHANCTRL_RUN, chan->reg_base + IOAT_CHANCTRL_OFFSET); + + chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); + if (chanerr) { + dev_err(to_dev(chan), "CHANERR = %x, clearing\n", chanerr); + writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET); + } + + /* Allocate descriptors */ + for (i = 0; i < ioat_initial_desc_count; i++) { + desc = ioat_dma_alloc_descriptor(ioat, GFP_KERNEL); + if (!desc) { + dev_err(to_dev(chan), "Only %d initial descriptors\n", i); + break; + } + set_desc_id(desc, i); + list_add_tail(&desc->node, &tmp_list); + } + spin_lock_bh(&ioat->desc_lock); + ioat->desccount = i; + list_splice(&tmp_list, &ioat->free_desc); + spin_unlock_bh(&ioat->desc_lock); + + /* allocate a completion writeback area */ + /* doing 2 32bit writes to mmio since 1 64b write doesn't work */ + chan->completion = pci_pool_alloc(chan->device->completion_pool, + GFP_KERNEL, &chan->completion_dma); + memset(chan->completion, 0, sizeof(*chan->completion)); + writel(((u64) chan->completion_dma) & 0x00000000FFFFFFFF, + chan->reg_base + IOAT_CHANCMP_OFFSET_LOW); + writel(((u64) chan->completion_dma) >> 32, + chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); + + tasklet_enable(&chan->cleanup_task); + ioat1_dma_start_null_desc(ioat); /* give chain to dma device */ + dev_dbg(to_dev(chan), "%s: allocated %d descriptors\n", + __func__, ioat->desccount); + return ioat->desccount; +} + +/** + * ioat1_dma_free_chan_resources - release all the descriptors + * @chan: the channel to be cleaned + */ +static void ioat1_dma_free_chan_resources(struct dma_chan *c) +{ + struct ioat_dma_chan *ioat = to_ioat_chan(c); + struct ioat_chan_common *chan = &ioat->base; + struct ioatdma_device *ioatdma_device = chan->device; + struct ioat_desc_sw *desc, *_desc; + int in_use_descs = 0; + + /* Before freeing channel resources first check + * if they have been previously allocated for this channel. + */ + if (ioat->desccount == 0) + return; + + tasklet_disable(&chan->cleanup_task); + del_timer_sync(&chan->timer); + ioat1_cleanup(ioat); + + /* Delay 100ms after reset to allow internal DMA logic to quiesce + * before removing DMA descriptor resources. + */ + writeb(IOAT_CHANCMD_RESET, + chan->reg_base + IOAT_CHANCMD_OFFSET(chan->device->version)); + mdelay(100); + + spin_lock_bh(&ioat->desc_lock); + list_for_each_entry_safe(desc, _desc, &ioat->used_desc, node) { + dev_dbg(to_dev(chan), "%s: freeing %d from used list\n", + __func__, desc_id(desc)); + dump_desc_dbg(ioat, desc); + in_use_descs++; + list_del(&desc->node); + pci_pool_free(ioatdma_device->dma_pool, desc->hw, + desc->txd.phys); + kfree(desc); + } + list_for_each_entry_safe(desc, _desc, + &ioat->free_desc, node) { + list_del(&desc->node); + pci_pool_free(ioatdma_device->dma_pool, desc->hw, + desc->txd.phys); + kfree(desc); + } + spin_unlock_bh(&ioat->desc_lock); + + pci_pool_free(ioatdma_device->completion_pool, + chan->completion, + chan->completion_dma); + + /* one is ok since we left it on there on purpose */ + if (in_use_descs > 1) + dev_err(to_dev(chan), "Freeing %d in use descriptors!\n", + in_use_descs - 1); + + chan->last_completion = 0; + chan->completion_dma = 0; + ioat->pending = 0; + ioat->desccount = 0; +} + +/** + * ioat1_dma_get_next_descriptor - return the next available descriptor + * @ioat: IOAT DMA channel handle + * + * Gets the next descriptor from the chain, and must be called with the + * channel's desc_lock held. Allocates more descriptors if the channel + * has run out. + */ +static struct ioat_desc_sw * +ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat) +{ + struct ioat_desc_sw *new; + + if (!list_empty(&ioat->free_desc)) { + new = to_ioat_desc(ioat->free_desc.next); + list_del(&new->node); + } else { + /* try to get another desc */ + new = ioat_dma_alloc_descriptor(ioat, GFP_ATOMIC); + if (!new) { + dev_err(to_dev(&ioat->base), "alloc failed\n"); + return NULL; + } + } + dev_dbg(to_dev(&ioat->base), "%s: allocated: %d\n", + __func__, desc_id(new)); + prefetch(new->hw); + return new; +} + +static struct dma_async_tx_descriptor * +ioat1_dma_prep_memcpy(struct dma_chan *c, dma_addr_t dma_dest, + dma_addr_t dma_src, size_t len, unsigned long flags) +{ + struct ioat_dma_chan *ioat = to_ioat_chan(c); + struct ioat_desc_sw *desc; + size_t copy; + LIST_HEAD(chain); + dma_addr_t src = dma_src; + dma_addr_t dest = dma_dest; + size_t total_len = len; + struct ioat_dma_descriptor *hw = NULL; + int tx_cnt = 0; + + spin_lock_bh(&ioat->desc_lock); + desc = ioat1_dma_get_next_descriptor(ioat); + do { + if (!desc) + break; + + tx_cnt++; + copy = min_t(size_t, len, ioat->xfercap); + + hw = desc->hw; + hw->size = copy; + hw->ctl = 0; + hw->src_addr = src; + hw->dst_addr = dest; + + list_add_tail(&desc->node, &chain); + + len -= copy; + dest += copy; + src += copy; + if (len) { + struct ioat_desc_sw *next; + + async_tx_ack(&desc->txd); + next = ioat1_dma_get_next_descriptor(ioat); + hw->next = next ? next->txd.phys : 0; + dump_desc_dbg(ioat, desc); + desc = next; + } else + hw->next = 0; + } while (len); + + if (!desc) { + struct ioat_chan_common *chan = &ioat->base; + + dev_err(to_dev(chan), + "chan%d - get_next_desc failed\n", chan_num(chan)); + list_splice(&chain, &ioat->free_desc); + spin_unlock_bh(&ioat->desc_lock); + return NULL; + } + spin_unlock_bh(&ioat->desc_lock); + + desc->txd.flags = flags; + desc->len = total_len; + list_splice(&chain, &desc->tx_list); + hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); + hw->ctl_f.compl_write = 1; + hw->tx_cnt = tx_cnt; + dump_desc_dbg(ioat, desc); + + return &desc->txd; +} + +static void ioat1_cleanup_tasklet(unsigned long data) +{ + struct ioat_dma_chan *chan = (void *)data; + + ioat1_cleanup(chan); + writew(IOAT_CHANCTRL_RUN, chan->base.reg_base + IOAT_CHANCTRL_OFFSET); +} + +void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags, + size_t len, struct ioat_dma_descriptor *hw) +{ + struct pci_dev *pdev = chan->device->pdev; + size_t offset = len - hw->size; + + if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) + ioat_unmap(pdev, hw->dst_addr - offset, len, + PCI_DMA_FROMDEVICE, flags, 1); + + if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) + ioat_unmap(pdev, hw->src_addr - offset, len, + PCI_DMA_TODEVICE, flags, 0); +} + +unsigned long ioat_get_current_completion(struct ioat_chan_common *chan) +{ + unsigned long phys_complete; + u64 completion; + + completion = *chan->completion; + phys_complete = ioat_chansts_to_addr(completion); + + dev_dbg(to_dev(chan), "%s: phys_complete: %#llx\n", __func__, + (unsigned long long) phys_complete); + + if (is_ioat_halted(completion)) { + u32 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); + dev_err(to_dev(chan), "Channel halted, chanerr = %x\n", + chanerr); + + /* TODO do something to salvage the situation */ + } + + return phys_complete; +} + +bool ioat_cleanup_preamble(struct ioat_chan_common *chan, + unsigned long *phys_complete) +{ + *phys_complete = ioat_get_current_completion(chan); + if (*phys_complete == chan->last_completion) + return false; + clear_bit(IOAT_COMPLETION_ACK, &chan->state); + mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + + return true; +} + +static void __cleanup(struct ioat_dma_chan *ioat, unsigned long phys_complete) +{ + struct ioat_chan_common *chan = &ioat->base; + struct list_head *_desc, *n; + struct dma_async_tx_descriptor *tx; + + dev_dbg(to_dev(chan), "%s: phys_complete: %lx\n", + __func__, phys_complete); + list_for_each_safe(_desc, n, &ioat->used_desc) { + struct ioat_desc_sw *desc; + + prefetch(n); + desc = list_entry(_desc, typeof(*desc), node); + tx = &desc->txd; + /* + * Incoming DMA requests may use multiple descriptors, + * due to exceeding xfercap, perhaps. If so, only the + * last one will have a cookie, and require unmapping. + */ + dump_desc_dbg(ioat, desc); + if (tx->cookie) { + chan->completed_cookie = tx->cookie; + tx->cookie = 0; + ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw); + ioat->active -= desc->hw->tx_cnt; + if (tx->callback) { + tx->callback(tx->callback_param); + tx->callback = NULL; + } + } + + if (tx->phys != phys_complete) { + /* + * a completed entry, but not the last, so clean + * up if the client is done with the descriptor + */ + if (async_tx_test_ack(tx)) + list_move_tail(&desc->node, &ioat->free_desc); + } else { + /* + * last used desc. Do not remove, so we can + * append from it. + */ + + /* if nothing else is pending, cancel the + * completion timeout + */ + if (n == &ioat->used_desc) { + dev_dbg(to_dev(chan), + "%s cancel completion timeout\n", + __func__); + clear_bit(IOAT_COMPLETION_PENDING, &chan->state); + } + + /* TODO check status bits? */ + break; + } + } + + chan->last_completion = phys_complete; +} + +/** + * ioat1_cleanup - cleanup up finished descriptors + * @chan: ioat channel to be cleaned up + * + * To prevent lock contention we defer cleanup when the locks are + * contended with a terminal timeout that forces cleanup and catches + * completion notification errors. + */ +static void ioat1_cleanup(struct ioat_dma_chan *ioat) +{ + struct ioat_chan_common *chan = &ioat->base; + unsigned long phys_complete; + + prefetch(chan->completion); + + if (!spin_trylock_bh(&chan->cleanup_lock)) + return; + + if (!ioat_cleanup_preamble(chan, &phys_complete)) { + spin_unlock_bh(&chan->cleanup_lock); + return; + } + + if (!spin_trylock_bh(&ioat->desc_lock)) { + spin_unlock_bh(&chan->cleanup_lock); + return; + } + + __cleanup(ioat, phys_complete); + + spin_unlock_bh(&ioat->desc_lock); + spin_unlock_bh(&chan->cleanup_lock); +} + +static void ioat1_timer_event(unsigned long data) +{ + struct ioat_dma_chan *ioat = (void *) data; + struct ioat_chan_common *chan = &ioat->base; + + dev_dbg(to_dev(chan), "%s: state: %lx\n", __func__, chan->state); + + spin_lock_bh(&chan->cleanup_lock); + if (test_and_clear_bit(IOAT_RESET_PENDING, &chan->state)) { + struct ioat_desc_sw *desc; + + spin_lock_bh(&ioat->desc_lock); + + /* restart active descriptors */ + desc = to_ioat_desc(ioat->used_desc.prev); + ioat_set_chainaddr(ioat, desc->txd.phys); + ioat_start(chan); + + ioat->pending = 0; + set_bit(IOAT_COMPLETION_PENDING, &chan->state); + mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + spin_unlock_bh(&ioat->desc_lock); + } else if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) { + unsigned long phys_complete; + + spin_lock_bh(&ioat->desc_lock); + /* if we haven't made progress and we have already + * acknowledged a pending completion once, then be more + * forceful with a restart + */ + if (ioat_cleanup_preamble(chan, &phys_complete)) + __cleanup(ioat, phys_complete); + else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) + ioat1_reset_channel(ioat); + else { + u64 status = ioat_chansts(chan); + + /* manually update the last completion address */ + if (ioat_chansts_to_addr(status) != 0) + *chan->completion = status; + + set_bit(IOAT_COMPLETION_ACK, &chan->state); + mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + } + spin_unlock_bh(&ioat->desc_lock); + } + spin_unlock_bh(&chan->cleanup_lock); +} + +static enum dma_status +ioat1_dma_is_complete(struct dma_chan *c, dma_cookie_t cookie, + dma_cookie_t *done, dma_cookie_t *used) +{ + struct ioat_dma_chan *ioat = to_ioat_chan(c); + + if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS) + return DMA_SUCCESS; + + ioat1_cleanup(ioat); + + return ioat_is_complete(c, cookie, done, used); +} + +static void ioat1_dma_start_null_desc(struct ioat_dma_chan *ioat) +{ + struct ioat_chan_common *chan = &ioat->base; + struct ioat_desc_sw *desc; + struct ioat_dma_descriptor *hw; + + spin_lock_bh(&ioat->desc_lock); + + desc = ioat1_dma_get_next_descriptor(ioat); + + if (!desc) { + dev_err(to_dev(chan), + "Unable to start null desc - get next desc failed\n"); + spin_unlock_bh(&ioat->desc_lock); + return; + } + + hw = desc->hw; + hw->ctl = 0; + hw->ctl_f.null = 1; + hw->ctl_f.int_en = 1; + hw->ctl_f.compl_write = 1; + /* set size to non-zero value (channel returns error when size is 0) */ + hw->size = NULL_DESC_BUFFER_SIZE; + hw->src_addr = 0; + hw->dst_addr = 0; + async_tx_ack(&desc->txd); + hw->next = 0; + list_add_tail(&desc->node, &ioat->used_desc); + dump_desc_dbg(ioat, desc); + + ioat_set_chainaddr(ioat, desc->txd.phys); + ioat_start(chan); + spin_unlock_bh(&ioat->desc_lock); +} + +/* + * Perform a IOAT transaction to verify the HW works. + */ +#define IOAT_TEST_SIZE 2000 + +static void __devinit ioat_dma_test_callback(void *dma_async_param) +{ + struct completion *cmp = dma_async_param; + + complete(cmp); +} + +/** + * ioat_dma_self_test - Perform a IOAT transaction to verify the HW works. + * @device: device to be tested + */ +int __devinit ioat_dma_self_test(struct ioatdma_device *device) +{ + int i; + u8 *src; + u8 *dest; + struct dma_device *dma = &device->common; + struct device *dev = &device->pdev->dev; + struct dma_chan *dma_chan; + struct dma_async_tx_descriptor *tx; + dma_addr_t dma_dest, dma_src; + dma_cookie_t cookie; + int err = 0; + struct completion cmp; + unsigned long tmo; + unsigned long flags; + + src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL); + if (!src) + return -ENOMEM; + dest = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL); + if (!dest) { + kfree(src); + return -ENOMEM; + } + + /* Fill in src buffer */ + for (i = 0; i < IOAT_TEST_SIZE; i++) + src[i] = (u8)i; + + /* Start copy, using first DMA channel */ + dma_chan = container_of(dma->channels.next, struct dma_chan, + device_node); + if (dma->device_alloc_chan_resources(dma_chan) < 1) { + dev_err(dev, "selftest cannot allocate chan resource\n"); + err = -ENODEV; + goto out; + } + + dma_src = dma_map_single(dev, src, IOAT_TEST_SIZE, DMA_TO_DEVICE); + dma_dest = dma_map_single(dev, dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE); + flags = DMA_COMPL_SRC_UNMAP_SINGLE | DMA_COMPL_DEST_UNMAP_SINGLE | + DMA_PREP_INTERRUPT; + tx = device->common.device_prep_dma_memcpy(dma_chan, dma_dest, dma_src, + IOAT_TEST_SIZE, flags); + if (!tx) { + dev_err(dev, "Self-test prep failed, disabling\n"); + err = -ENODEV; + goto free_resources; + } + + async_tx_ack(tx); + init_completion(&cmp); + tx->callback = ioat_dma_test_callback; + tx->callback_param = &cmp; + cookie = tx->tx_submit(tx); + if (cookie < 0) { + dev_err(dev, "Self-test setup failed, disabling\n"); + err = -ENODEV; + goto free_resources; + } + dma->device_issue_pending(dma_chan); + + tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); + + if (tmo == 0 || + dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) + != DMA_SUCCESS) { + dev_err(dev, "Self-test copy timed out, disabling\n"); + err = -ENODEV; + goto free_resources; + } + if (memcmp(src, dest, IOAT_TEST_SIZE)) { + dev_err(dev, "Self-test copy failed compare, disabling\n"); + err = -ENODEV; + goto free_resources; + } + +free_resources: + dma->device_free_chan_resources(dma_chan); +out: + kfree(src); + kfree(dest); + return err; +} + +static char ioat_interrupt_style[32] = "msix"; +module_param_string(ioat_interrupt_style, ioat_interrupt_style, + sizeof(ioat_interrupt_style), 0644); +MODULE_PARM_DESC(ioat_interrupt_style, + "set ioat interrupt style: msix (default), " + "msix-single-vector, msi, intx)"); + +/** + * ioat_dma_setup_interrupts - setup interrupt handler + * @device: ioat device + */ +static int ioat_dma_setup_interrupts(struct ioatdma_device *device) +{ + struct ioat_chan_common *chan; + struct pci_dev *pdev = device->pdev; + struct device *dev = &pdev->dev; + struct msix_entry *msix; + int i, j, msixcnt; + int err = -EINVAL; + u8 intrctrl = 0; + + if (!strcmp(ioat_interrupt_style, "msix")) + goto msix; + if (!strcmp(ioat_interrupt_style, "msix-single-vector")) + goto msix_single_vector; + if (!strcmp(ioat_interrupt_style, "msi")) + goto msi; + if (!strcmp(ioat_interrupt_style, "intx")) + goto intx; + dev_err(dev, "invalid ioat_interrupt_style %s\n", ioat_interrupt_style); + goto err_no_irq; + +msix: + /* The number of MSI-X vectors should equal the number of channels */ + msixcnt = device->common.chancnt; + for (i = 0; i < msixcnt; i++) + device->msix_entries[i].entry = i; + + err = pci_enable_msix(pdev, device->msix_entries, msixcnt); + if (err < 0) + goto msi; + if (err > 0) + goto msix_single_vector; + + for (i = 0; i < msixcnt; i++) { + msix = &device->msix_entries[i]; + chan = ioat_chan_by_index(device, i); + err = devm_request_irq(dev, msix->vector, + ioat_dma_do_interrupt_msix, 0, + "ioat-msix", chan); + if (err) { + for (j = 0; j < i; j++) { + msix = &device->msix_entries[j]; + chan = ioat_chan_by_index(device, j); + devm_free_irq(dev, msix->vector, chan); + } + goto msix_single_vector; + } + } + intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL; + goto done; + +msix_single_vector: + msix = &device->msix_entries[0]; + msix->entry = 0; + err = pci_enable_msix(pdev, device->msix_entries, 1); + if (err) + goto msi; + + err = devm_request_irq(dev, msix->vector, ioat_dma_do_interrupt, 0, + "ioat-msix", device); + if (err) { + pci_disable_msix(pdev); + goto msi; + } + goto done; + +msi: + err = pci_enable_msi(pdev); + if (err) + goto intx; + + err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt, 0, + "ioat-msi", device); + if (err) { + pci_disable_msi(pdev); + goto intx; + } + goto done; + +intx: + err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt, + IRQF_SHARED, "ioat-intx", device); + if (err) + goto err_no_irq; + +done: + if (device->intr_quirk) + device->intr_quirk(device); + intrctrl |= IOAT_INTRCTRL_MASTER_INT_EN; + writeb(intrctrl, device->reg_base + IOAT_INTRCTRL_OFFSET); + return 0; + +err_no_irq: + /* Disable all interrupt generation */ + writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET); + dev_err(dev, "no usable interrupts\n"); + return err; +} + +static void ioat_disable_interrupts(struct ioatdma_device *device) +{ + /* Disable all interrupt generation */ + writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET); +} + +int __devinit ioat_probe(struct ioatdma_device *device) +{ + int err = -ENODEV; + struct dma_device *dma = &device->common; + struct pci_dev *pdev = device->pdev; + struct device *dev = &pdev->dev; + + /* DMA coherent memory pool for DMA descriptor allocations */ + device->dma_pool = pci_pool_create("dma_desc_pool", pdev, + sizeof(struct ioat_dma_descriptor), + 64, 0); + if (!device->dma_pool) { + err = -ENOMEM; + goto err_dma_pool; + } + + device->completion_pool = pci_pool_create("completion_pool", pdev, + sizeof(u64), SMP_CACHE_BYTES, + SMP_CACHE_BYTES); + + if (!device->completion_pool) { + err = -ENOMEM; + goto err_completion_pool; + } + + device->enumerate_channels(device); + + dma_cap_set(DMA_MEMCPY, dma->cap_mask); + dma->dev = &pdev->dev; + + if (!dma->chancnt) { + dev_err(dev, "zero channels detected\n"); + goto err_setup_interrupts; + } + + err = ioat_dma_setup_interrupts(device); + if (err) + goto err_setup_interrupts; + + err = device->self_test(device); + if (err) + goto err_self_test; + + return 0; + +err_self_test: + ioat_disable_interrupts(device); +err_setup_interrupts: + pci_pool_destroy(device->completion_pool); +err_completion_pool: + pci_pool_destroy(device->dma_pool); +err_dma_pool: + return err; +} + +int __devinit ioat_register(struct ioatdma_device *device) +{ + int err = dma_async_device_register(&device->common); + + if (err) { + ioat_disable_interrupts(device); + pci_pool_destroy(device->completion_pool); + pci_pool_destroy(device->dma_pool); + } + + return err; +} + +/* ioat1_intr_quirk - fix up dma ctrl register to enable / disable msi */ +static void ioat1_intr_quirk(struct ioatdma_device *device) +{ + struct pci_dev *pdev = device->pdev; + u32 dmactrl; + + pci_read_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, &dmactrl); + if (pdev->msi_enabled) + dmactrl |= IOAT_PCI_DMACTRL_MSI_EN; + else + dmactrl &= ~IOAT_PCI_DMACTRL_MSI_EN; + pci_write_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, dmactrl); +} + +static ssize_t ring_size_show(struct dma_chan *c, char *page) +{ + struct ioat_dma_chan *ioat = to_ioat_chan(c); + + return sprintf(page, "%d\n", ioat->desccount); +} +static struct ioat_sysfs_entry ring_size_attr = __ATTR_RO(ring_size); + +static ssize_t ring_active_show(struct dma_chan *c, char *page) +{ + struct ioat_dma_chan *ioat = to_ioat_chan(c); + + return sprintf(page, "%d\n", ioat->active); +} +static struct ioat_sysfs_entry ring_active_attr = __ATTR_RO(ring_active); + +static ssize_t cap_show(struct dma_chan *c, char *page) +{ + struct dma_device *dma = c->device; + + return sprintf(page, "copy%s%s%s%s%s%s\n", + dma_has_cap(DMA_PQ, dma->cap_mask) ? " pq" : "", + dma_has_cap(DMA_PQ_VAL, dma->cap_mask) ? " pq_val" : "", + dma_has_cap(DMA_XOR, dma->cap_mask) ? " xor" : "", + dma_has_cap(DMA_XOR_VAL, dma->cap_mask) ? " xor_val" : "", + dma_has_cap(DMA_MEMSET, dma->cap_mask) ? " fill" : "", + dma_has_cap(DMA_INTERRUPT, dma->cap_mask) ? " intr" : ""); + +} +struct ioat_sysfs_entry ioat_cap_attr = __ATTR_RO(cap); + +static ssize_t version_show(struct dma_chan *c, char *page) +{ + struct dma_device *dma = c->device; + struct ioatdma_device *device = to_ioatdma_device(dma); + + return sprintf(page, "%d.%d\n", + device->version >> 4, device->version & 0xf); +} +struct ioat_sysfs_entry ioat_version_attr = __ATTR_RO(version); + +static struct attribute *ioat1_attrs[] = { + &ring_size_attr.attr, + &ring_active_attr.attr, + &ioat_cap_attr.attr, + &ioat_version_attr.attr, + NULL, +}; + +static ssize_t +ioat_attr_show(struct kobject *kobj, struct attribute *attr, char *page) +{ + struct ioat_sysfs_entry *entry; + struct ioat_chan_common *chan; + + entry = container_of(attr, struct ioat_sysfs_entry, attr); + chan = container_of(kobj, struct ioat_chan_common, kobj); + + if (!entry->show) + return -EIO; + return entry->show(&chan->common, page); +} + +struct sysfs_ops ioat_sysfs_ops = { + .show = ioat_attr_show, +}; + +static struct kobj_type ioat1_ktype = { + .sysfs_ops = &ioat_sysfs_ops, + .default_attrs = ioat1_attrs, +}; + +void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type) +{ + struct dma_device *dma = &device->common; + struct dma_chan *c; + + list_for_each_entry(c, &dma->channels, device_node) { + struct ioat_chan_common *chan = to_chan_common(c); + struct kobject *parent = &c->dev->device.kobj; + int err; + + err = kobject_init_and_add(&chan->kobj, type, parent, "quickdata"); + if (err) { + dev_warn(to_dev(chan), + "sysfs init error (%d), continuing...\n", err); + kobject_put(&chan->kobj); + set_bit(IOAT_KOBJ_INIT_FAIL, &chan->state); + } + } +} + +void ioat_kobject_del(struct ioatdma_device *device) +{ + struct dma_device *dma = &device->common; + struct dma_chan *c; + + list_for_each_entry(c, &dma->channels, device_node) { + struct ioat_chan_common *chan = to_chan_common(c); + + if (!test_bit(IOAT_KOBJ_INIT_FAIL, &chan->state)) { + kobject_del(&chan->kobj); + kobject_put(&chan->kobj); + } + } +} + +int __devinit ioat1_dma_probe(struct ioatdma_device *device, int dca) +{ + struct pci_dev *pdev = device->pdev; + struct dma_device *dma; + int err; + + device->intr_quirk = ioat1_intr_quirk; + device->enumerate_channels = ioat1_enumerate_channels; + device->self_test = ioat_dma_self_test; + dma = &device->common; + dma->device_prep_dma_memcpy = ioat1_dma_prep_memcpy; + dma->device_issue_pending = ioat1_dma_memcpy_issue_pending; + dma->device_alloc_chan_resources = ioat1_dma_alloc_chan_resources; + dma->device_free_chan_resources = ioat1_dma_free_chan_resources; + dma->device_is_tx_complete = ioat1_dma_is_complete; + + err = ioat_probe(device); + if (err) + return err; + ioat_set_tcp_copy_break(4096); + err = ioat_register(device); + if (err) + return err; + ioat_kobject_add(device, &ioat1_ktype); + + if (dca) + device->dca = ioat_dca_init(pdev, device->reg_base); + + return err; +} + +void __devexit ioat_dma_remove(struct ioatdma_device *device) +{ + struct dma_device *dma = &device->common; + + ioat_disable_interrupts(device); + + ioat_kobject_del(device); + + dma_async_device_unregister(dma); + + pci_pool_destroy(device->dma_pool); + pci_pool_destroy(device->completion_pool); + + INIT_LIST_HEAD(&dma->channels); +} diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h new file mode 100644 index 00000000000..c14fdfeb7f3 --- /dev/null +++ b/drivers/dma/ioat/dma.h @@ -0,0 +1,337 @@ +/* + * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The full GNU General Public License is included in this distribution in the + * file called COPYING. + */ +#ifndef IOATDMA_H +#define IOATDMA_H + +#include <linux/dmaengine.h> +#include "hw.h" +#include "registers.h" +#include <linux/init.h> +#include <linux/dmapool.h> +#include <linux/cache.h> +#include <linux/pci_ids.h> +#include <net/tcp.h> + +#define IOAT_DMA_VERSION "4.00" + +#define IOAT_LOW_COMPLETION_MASK 0xffffffc0 +#define IOAT_DMA_DCA_ANY_CPU ~0 + +#define to_ioatdma_device(dev) container_of(dev, struct ioatdma_device, common) +#define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node) +#define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, txd) +#define to_dev(ioat_chan) (&(ioat_chan)->device->pdev->dev) + +#define chan_num(ch) ((int)((ch)->reg_base - (ch)->device->reg_base) / 0x80) + +/* + * workaround for IOAT ver.3.0 null descriptor issue + * (channel returns error when size is 0) + */ +#define NULL_DESC_BUFFER_SIZE 1 + +/** + * struct ioatdma_device - internal representation of a IOAT device + * @pdev: PCI-Express device + * @reg_base: MMIO register space base address + * @dma_pool: for allocating DMA descriptors + * @common: embedded struct dma_device + * @version: version of ioatdma device + * @msix_entries: irq handlers + * @idx: per channel data + * @dca: direct cache access context + * @intr_quirk: interrupt setup quirk (for ioat_v1 devices) + * @enumerate_channels: hw version specific channel enumeration + * @cleanup_tasklet: select between the v2 and v3 cleanup routines + * @timer_fn: select between the v2 and v3 timer watchdog routines + * @self_test: hardware version specific self test for each supported op type + * + * Note: the v3 cleanup routine supports raid operations + */ +struct ioatdma_device { + struct pci_dev *pdev; + void __iomem *reg_base; + struct pci_pool *dma_pool; + struct pci_pool *completion_pool; + struct dma_device common; + u8 version; + struct msix_entry msix_entries[4]; + struct ioat_chan_common *idx[4]; + struct dca_provider *dca; + void (*intr_quirk)(struct ioatdma_device *device); + int (*enumerate_channels)(struct ioatdma_device *device); + void (*cleanup_tasklet)(unsigned long data); + void (*timer_fn)(unsigned long data); + int (*self_test)(struct ioatdma_device *device); +}; + +struct ioat_chan_common { + struct dma_chan common; + void __iomem *reg_base; + unsigned long last_completion; + spinlock_t cleanup_lock; + dma_cookie_t completed_cookie; + unsigned long state; + #define IOAT_COMPLETION_PENDING 0 + #define IOAT_COMPLETION_ACK 1 + #define IOAT_RESET_PENDING 2 + #define IOAT_KOBJ_INIT_FAIL 3 + struct timer_list timer; + #define COMPLETION_TIMEOUT msecs_to_jiffies(100) + #define IDLE_TIMEOUT msecs_to_jiffies(2000) + #define RESET_DELAY msecs_to_jiffies(100) + struct ioatdma_device *device; + dma_addr_t completion_dma; + u64 *completion; + struct tasklet_struct cleanup_task; + struct kobject kobj; +}; + +struct ioat_sysfs_entry { + struct attribute attr; + ssize_t (*show)(struct dma_chan *, char *); +}; + +/** + * struct ioat_dma_chan - internal representation of a DMA channel + */ +struct ioat_dma_chan { + struct ioat_chan_common base; + + size_t xfercap; /* XFERCAP register value expanded out */ + + spinlock_t desc_lock; + struct list_head free_desc; + struct list_head used_desc; + + int pending; + u16 desccount; + u16 active; +}; + +static inline struct ioat_chan_common *to_chan_common(struct dma_chan *c) +{ + return container_of(c, struct ioat_chan_common, common); +} + +static inline struct ioat_dma_chan *to_ioat_chan(struct dma_chan *c) +{ + struct ioat_chan_common *chan = to_chan_common(c); + + return container_of(chan, struct ioat_dma_chan, base); +} + +/** + * ioat_is_complete - poll the status of an ioat transaction + * @c: channel handle + * @cookie: transaction identifier + * @done: if set, updated with last completed transaction + * @used: if set, updated with last used transaction + */ +static inline enum dma_status +ioat_is_complete(struct dma_chan *c, dma_cookie_t cookie, + dma_cookie_t *done, dma_cookie_t *used) +{ + struct ioat_chan_common *chan = to_chan_common(c); + dma_cookie_t last_used; + dma_cookie_t last_complete; + + last_used = c->cookie; + last_complete = chan->completed_cookie; + + if (done) + *done = last_complete; + if (used) + *used = last_used; + + return dma_async_is_complete(cookie, last_complete, last_used); +} + +/* wrapper around hardware descriptor format + additional software fields */ + +/** + * struct ioat_desc_sw - wrapper around hardware descriptor + * @hw: hardware DMA descriptor (for memcpy) + * @node: this descriptor will either be on the free list, + * or attached to a transaction list (tx_list) + * @txd: the generic software descriptor for all engines + * @id: identifier for debug + */ +struct ioat_desc_sw { + struct ioat_dma_descriptor *hw; + struct list_head node; + size_t len; + struct list_head tx_list; + struct dma_async_tx_descriptor txd; + #ifdef DEBUG + int id; + #endif +}; + +#ifdef DEBUG +#define set_desc_id(desc, i) ((desc)->id = (i)) +#define desc_id(desc) ((desc)->id) +#else +#define set_desc_id(desc, i) +#define desc_id(desc) (0) +#endif + +static inline void +__dump_desc_dbg(struct ioat_chan_common *chan, struct ioat_dma_descriptor *hw, + struct dma_async_tx_descriptor *tx, int id) +{ + struct device *dev = to_dev(chan); + + dev_dbg(dev, "desc[%d]: (%#llx->%#llx) cookie: %d flags: %#x" + " ctl: %#x (op: %d int_en: %d compl: %d)\n", id, + (unsigned long long) tx->phys, + (unsigned long long) hw->next, tx->cookie, tx->flags, + hw->ctl, hw->ctl_f.op, hw->ctl_f.int_en, hw->ctl_f.compl_write); +} + +#define dump_desc_dbg(c, d) \ + ({ if (d) __dump_desc_dbg(&c->base, d->hw, &d->txd, desc_id(d)); 0; }) + +static inline void ioat_set_tcp_copy_break(unsigned long copybreak) +{ + #ifdef CONFIG_NET_DMA + sysctl_tcp_dma_copybreak = copybreak; + #endif +} + +static inline struct ioat_chan_common * +ioat_chan_by_index(struct ioatdma_device *device, int index) +{ + return device->idx[index]; +} + +static inline u64 ioat_chansts(struct ioat_chan_common *chan) +{ + u8 ver = chan->device->version; + u64 status; + u32 status_lo; + + /* We need to read the low address first as this causes the + * chipset to latch the upper bits for the subsequent read + */ + status_lo = readl(chan->reg_base + IOAT_CHANSTS_OFFSET_LOW(ver)); + status = readl(chan->reg_base + IOAT_CHANSTS_OFFSET_HIGH(ver)); + status <<= 32; + status |= status_lo; + + return status; +} + +static inline void ioat_start(struct ioat_chan_common *chan) +{ + u8 ver = chan->device->version; + + writeb(IOAT_CHANCMD_START, chan->reg_base + IOAT_CHANCMD_OFFSET(ver)); +} + +static inline u64 ioat_chansts_to_addr(u64 status) +{ + return status & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; +} + +static inline u32 ioat_chanerr(struct ioat_chan_common *chan) +{ + return readl(chan->reg_base + IOAT_CHANERR_OFFSET); +} + +static inline void ioat_suspend(struct ioat_chan_common *chan) +{ + u8 ver = chan->device->version; + + writeb(IOAT_CHANCMD_SUSPEND, chan->reg_base + IOAT_CHANCMD_OFFSET(ver)); +} + +static inline void ioat_set_chainaddr(struct ioat_dma_chan *ioat, u64 addr) +{ + struct ioat_chan_common *chan = &ioat->base; + + writel(addr & 0x00000000FFFFFFFF, + chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW); + writel(addr >> 32, + chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH); +} + +static inline bool is_ioat_active(unsigned long status) +{ + return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_ACTIVE); +} + +static inline bool is_ioat_idle(unsigned long status) +{ + return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_DONE); +} + +static inline bool is_ioat_halted(unsigned long status) +{ + return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_HALTED); +} + +static inline bool is_ioat_suspended(unsigned long status) +{ + return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_SUSPENDED); +} + +/* channel was fatally programmed */ +static inline bool is_ioat_bug(unsigned long err) +{ + return !!(err & (IOAT_CHANERR_SRC_ADDR_ERR|IOAT_CHANERR_DEST_ADDR_ERR| + IOAT_CHANERR_NEXT_ADDR_ERR|IOAT_CHANERR_CONTROL_ERR| + IOAT_CHANERR_LENGTH_ERR)); +} + +static inline void ioat_unmap(struct pci_dev *pdev, dma_addr_t addr, size_t len, + int direction, enum dma_ctrl_flags flags, bool dst) +{ + if ((dst && (flags & DMA_COMPL_DEST_UNMAP_SINGLE)) || + (!dst && (flags & DMA_COMPL_SRC_UNMAP_SINGLE))) + pci_unmap_single(pdev, addr, len, direction); + else + pci_unmap_page(pdev, addr, len, direction); +} + +int __devinit ioat_probe(struct ioatdma_device *device); +int __devinit ioat_register(struct ioatdma_device *device); +int __devinit ioat1_dma_probe(struct ioatdma_device *dev, int dca); +int __devinit ioat_dma_self_test(struct ioatdma_device *device); +void __devexit ioat_dma_remove(struct ioatdma_device *device); +struct dca_provider * __devinit ioat_dca_init(struct pci_dev *pdev, + void __iomem *iobase); +unsigned long ioat_get_current_completion(struct ioat_chan_common *chan); +void ioat_init_channel(struct ioatdma_device *device, + struct ioat_chan_common *chan, int idx, + void (*timer_fn)(unsigned long), + void (*tasklet)(unsigned long), + unsigned long ioat); +void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags, + size_t len, struct ioat_dma_descriptor *hw); +bool ioat_cleanup_preamble(struct ioat_chan_common *chan, + unsigned long *phys_complete); +void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type); +void ioat_kobject_del(struct ioatdma_device *device); +extern struct sysfs_ops ioat_sysfs_ops; +extern struct ioat_sysfs_entry ioat_version_attr; +extern struct ioat_sysfs_entry ioat_cap_attr; +#endif /* IOATDMA_H */ diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c new file mode 100644 index 00000000000..96ffab7d37a --- /dev/null +++ b/drivers/dma/ioat/dma_v2.c @@ -0,0 +1,871 @@ +/* + * Intel I/OAT DMA Linux driver + * Copyright(c) 2004 - 2009 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + */ + +/* + * This driver supports an Intel I/OAT DMA engine (versions >= 2), which + * does asynchronous data movement and checksumming operations. + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/interrupt.h> +#include <linux/dmaengine.h> +#include <linux/delay.h> +#include <linux/dma-mapping.h> +#include <linux/workqueue.h> +#include <linux/i7300_idle.h> +#include "dma.h" +#include "dma_v2.h" +#include "registers.h" +#include "hw.h" + +int ioat_ring_alloc_order = 8; +module_param(ioat_ring_alloc_order, int, 0644); +MODULE_PARM_DESC(ioat_ring_alloc_order, + "ioat2+: allocate 2^n descriptors per channel" + " (default: 8 max: 16)"); +static int ioat_ring_max_alloc_order = IOAT_MAX_ORDER; +module_param(ioat_ring_max_alloc_order, int, 0644); +MODULE_PARM_DESC(ioat_ring_max_alloc_order, + "ioat2+: upper limit for ring size (default: 16)"); + +void __ioat2_issue_pending(struct ioat2_dma_chan *ioat) +{ + void * __iomem reg_base = ioat->base.reg_base; + + ioat->pending = 0; + ioat->dmacount += ioat2_ring_pending(ioat); + ioat->issued = ioat->head; + /* make descriptor updates globally visible before notifying channel */ + wmb(); + writew(ioat->dmacount, reg_base + IOAT_CHAN_DMACOUNT_OFFSET); + dev_dbg(to_dev(&ioat->base), + "%s: head: %#x tail: %#x issued: %#x count: %#x\n", + __func__, ioat->head, ioat->tail, ioat->issued, ioat->dmacount); +} + +void ioat2_issue_pending(struct dma_chan *chan) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(chan); + + spin_lock_bh(&ioat->ring_lock); + if (ioat->pending == 1) + __ioat2_issue_pending(ioat); + spin_unlock_bh(&ioat->ring_lock); +} + +/** + * ioat2_update_pending - log pending descriptors + * @ioat: ioat2+ channel + * + * set pending to '1' unless pending is already set to '2', pending == 2 + * indicates that submission is temporarily blocked due to an in-flight + * reset. If we are already above the ioat_pending_level threshold then + * just issue pending. + * + * called with ring_lock held + */ +static void ioat2_update_pending(struct ioat2_dma_chan *ioat) +{ + if (unlikely(ioat->pending == 2)) + return; + else if (ioat2_ring_pending(ioat) > ioat_pending_level) + __ioat2_issue_pending(ioat); + else + ioat->pending = 1; +} + +static void __ioat2_start_null_desc(struct ioat2_dma_chan *ioat) +{ + struct ioat_ring_ent *desc; + struct ioat_dma_descriptor *hw; + int idx; + + if (ioat2_ring_space(ioat) < 1) { + dev_err(to_dev(&ioat->base), + "Unable to start null desc - ring full\n"); + return; + } + + dev_dbg(to_dev(&ioat->base), "%s: head: %#x tail: %#x issued: %#x\n", + __func__, ioat->head, ioat->tail, ioat->issued); + idx = ioat2_desc_alloc(ioat, 1); + desc = ioat2_get_ring_ent(ioat, idx); + + hw = desc->hw; + hw->ctl = 0; + hw->ctl_f.null = 1; + hw->ctl_f.int_en = 1; + hw->ctl_f.compl_write = 1; + /* set size to non-zero value (channel returns error when size is 0) */ + hw->size = NULL_DESC_BUFFER_SIZE; + hw->src_addr = 0; + hw->dst_addr = 0; + async_tx_ack(&desc->txd); + ioat2_set_chainaddr(ioat, desc->txd.phys); + dump_desc_dbg(ioat, desc); + __ioat2_issue_pending(ioat); +} + +static void ioat2_start_null_desc(struct ioat2_dma_chan *ioat) +{ + spin_lock_bh(&ioat->ring_lock); + __ioat2_start_null_desc(ioat); + spin_unlock_bh(&ioat->ring_lock); +} + +static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete) +{ + struct ioat_chan_common *chan = &ioat->base; + struct dma_async_tx_descriptor *tx; + struct ioat_ring_ent *desc; + bool seen_current = false; + u16 active; + int i; + + dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n", + __func__, ioat->head, ioat->tail, ioat->issued); + + active = ioat2_ring_active(ioat); + for (i = 0; i < active && !seen_current; i++) { + prefetch(ioat2_get_ring_ent(ioat, ioat->tail + i + 1)); + desc = ioat2_get_ring_ent(ioat, ioat->tail + i); + tx = &desc->txd; + dump_desc_dbg(ioat, desc); + if (tx->cookie) { + ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw); + chan->completed_cookie = tx->cookie; + tx->cookie = 0; + if (tx->callback) { + tx->callback(tx->callback_param); + tx->callback = NULL; + } + } + + if (tx->phys == phys_complete) + seen_current = true; + } + ioat->tail += i; + BUG_ON(!seen_current); /* no active descs have written a completion? */ + + chan->last_completion = phys_complete; + if (ioat->head == ioat->tail) { + dev_dbg(to_dev(chan), "%s: cancel completion timeout\n", + __func__); + clear_bit(IOAT_COMPLETION_PENDING, &chan->state); + mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); + } +} + +/** + * ioat2_cleanup - clean finished descriptors (advance tail pointer) + * @chan: ioat channel to be cleaned up + */ +static void ioat2_cleanup(struct ioat2_dma_chan *ioat) +{ + struct ioat_chan_common *chan = &ioat->base; + unsigned long phys_complete; + + prefetch(chan->completion); + + if (!spin_trylock_bh(&chan->cleanup_lock)) + return; + + if (!ioat_cleanup_preamble(chan, &phys_complete)) { + spin_unlock_bh(&chan->cleanup_lock); + return; + } + + if (!spin_trylock_bh(&ioat->ring_lock)) { + spin_unlock_bh(&chan->cleanup_lock); + return; + } + + __cleanup(ioat, phys_complete); + + spin_unlock_bh(&ioat->ring_lock); + spin_unlock_bh(&chan->cleanup_lock); +} + +void ioat2_cleanup_tasklet(unsigned long data) +{ + struct ioat2_dma_chan *ioat = (void *) data; + + ioat2_cleanup(ioat); + writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET); +} + +void __ioat2_restart_chan(struct ioat2_dma_chan *ioat) +{ + struct ioat_chan_common *chan = &ioat->base; + + /* set the tail to be re-issued */ + ioat->issued = ioat->tail; + ioat->dmacount = 0; + set_bit(IOAT_COMPLETION_PENDING, &chan->state); + mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + + dev_dbg(to_dev(chan), + "%s: head: %#x tail: %#x issued: %#x count: %#x\n", + __func__, ioat->head, ioat->tail, ioat->issued, ioat->dmacount); + + if (ioat2_ring_pending(ioat)) { + struct ioat_ring_ent *desc; + + desc = ioat2_get_ring_ent(ioat, ioat->tail); + ioat2_set_chainaddr(ioat, desc->txd.phys); + __ioat2_issue_pending(ioat); + } else + __ioat2_start_null_desc(ioat); +} + +static void ioat2_restart_channel(struct ioat2_dma_chan *ioat) +{ + struct ioat_chan_common *chan = &ioat->base; + unsigned long phys_complete; + u32 status; + + status = ioat_chansts(chan); + if (is_ioat_active(status) || is_ioat_idle(status)) + ioat_suspend(chan); + while (is_ioat_active(status) || is_ioat_idle(status)) { + status = ioat_chansts(chan); + cpu_relax(); + } + + if (ioat_cleanup_preamble(chan, &phys_complete)) + __cleanup(ioat, phys_complete); + + __ioat2_restart_chan(ioat); +} + +void ioat2_timer_event(unsigned long data) +{ + struct ioat2_dma_chan *ioat = (void *) data; + struct ioat_chan_common *chan = &ioat->base; + + spin_lock_bh(&chan->cleanup_lock); + if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) { + unsigned long phys_complete; + u64 status; + + spin_lock_bh(&ioat->ring_lock); + status = ioat_chansts(chan); + + /* when halted due to errors check for channel + * programming errors before advancing the completion state + */ + if (is_ioat_halted(status)) { + u32 chanerr; + + chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); + BUG_ON(is_ioat_bug(chanerr)); + } + + /* if we haven't made progress and we have already + * acknowledged a pending completion once, then be more + * forceful with a restart + */ + if (ioat_cleanup_preamble(chan, &phys_complete)) + __cleanup(ioat, phys_complete); + else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) + ioat2_restart_channel(ioat); + else { + set_bit(IOAT_COMPLETION_ACK, &chan->state); + mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + } + spin_unlock_bh(&ioat->ring_lock); + } else { + u16 active; + + /* if the ring is idle, empty, and oversized try to step + * down the size + */ + spin_lock_bh(&ioat->ring_lock); + active = ioat2_ring_active(ioat); + if (active == 0 && ioat->alloc_order > ioat_get_alloc_order()) + reshape_ring(ioat, ioat->alloc_order-1); + spin_unlock_bh(&ioat->ring_lock); + + /* keep shrinking until we get back to our minimum + * default size + */ + if (ioat->alloc_order > ioat_get_alloc_order()) + mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); + } + spin_unlock_bh(&chan->cleanup_lock); +} + +/** + * ioat2_enumerate_channels - find and initialize the device's channels + * @device: the device to be enumerated + */ +int ioat2_enumerate_channels(struct ioatdma_device *device) +{ + struct ioat2_dma_chan *ioat; + struct device *dev = &device->pdev->dev; + struct dma_device *dma = &device->common; + u8 xfercap_log; + int i; + + INIT_LIST_HEAD(&dma->channels); + dma->chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET); + dma->chancnt &= 0x1f; /* bits [4:0] valid */ + if (dma->chancnt > ARRAY_SIZE(device->idx)) { + dev_warn(dev, "(%d) exceeds max supported channels (%zu)\n", + dma->chancnt, ARRAY_SIZE(device->idx)); + dma->chancnt = ARRAY_SIZE(device->idx); + } + xfercap_log = readb(device->reg_base + IOAT_XFERCAP_OFFSET); + xfercap_log &= 0x1f; /* bits [4:0] valid */ + if (xfercap_log == 0) + return 0; + dev_dbg(dev, "%s: xfercap = %d\n", __func__, 1 << xfercap_log); + + /* FIXME which i/oat version is i7300? */ +#ifdef CONFIG_I7300_IDLE_IOAT_CHANNEL + if (i7300_idle_platform_probe(NULL, NULL, 1) == 0) + dma->chancnt--; +#endif + for (i = 0; i < dma->chancnt; i++) { + ioat = devm_kzalloc(dev, sizeof(*ioat), GFP_KERNEL); + if (!ioat) + break; + + ioat_init_channel(device, &ioat->base, i, + device->timer_fn, + device->cleanup_tasklet, + (unsigned long) ioat); + ioat->xfercap_log = xfercap_log; + spin_lock_init(&ioat->ring_lock); + } + dma->chancnt = i; + return i; +} + +static dma_cookie_t ioat2_tx_submit_unlock(struct dma_async_tx_descriptor *tx) +{ + struct dma_chan *c = tx->chan; + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + struct ioat_chan_common *chan = &ioat->base; + dma_cookie_t cookie = c->cookie; + + cookie++; + if (cookie < 0) + cookie = 1; + tx->cookie = cookie; + c->cookie = cookie; + dev_dbg(to_dev(&ioat->base), "%s: cookie: %d\n", __func__, cookie); + + if (!test_and_set_bit(IOAT_COMPLETION_PENDING, &chan->state)) + mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + ioat2_update_pending(ioat); + spin_unlock_bh(&ioat->ring_lock); + + return cookie; +} + +static struct ioat_ring_ent *ioat2_alloc_ring_ent(struct dma_chan *chan, gfp_t flags) +{ + struct ioat_dma_descriptor *hw; + struct ioat_ring_ent *desc; + struct ioatdma_device *dma; + dma_addr_t phys; + + dma = to_ioatdma_device(chan->device); + hw = pci_pool_alloc(dma->dma_pool, flags, &phys); + if (!hw) + return NULL; + memset(hw, 0, sizeof(*hw)); + + desc = kmem_cache_alloc(ioat2_cache, flags); + if (!desc) { + pci_pool_free(dma->dma_pool, hw, phys); + return NULL; + } + memset(desc, 0, sizeof(*desc)); + + dma_async_tx_descriptor_init(&desc->txd, chan); + desc->txd.tx_submit = ioat2_tx_submit_unlock; + desc->hw = hw; + desc->txd.phys = phys; + return desc; +} + +static void ioat2_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *chan) +{ + struct ioatdma_device *dma; + + dma = to_ioatdma_device(chan->device); + pci_pool_free(dma->dma_pool, desc->hw, desc->txd.phys); + kmem_cache_free(ioat2_cache, desc); +} + +static struct ioat_ring_ent **ioat2_alloc_ring(struct dma_chan *c, int order, gfp_t flags) +{ + struct ioat_ring_ent **ring; + int descs = 1 << order; + int i; + + if (order > ioat_get_max_alloc_order()) + return NULL; + + /* allocate the array to hold the software ring */ + ring = kcalloc(descs, sizeof(*ring), flags); + if (!ring) + return NULL; + for (i = 0; i < descs; i++) { + ring[i] = ioat2_alloc_ring_ent(c, flags); + if (!ring[i]) { + while (i--) + ioat2_free_ring_ent(ring[i], c); + kfree(ring); + return NULL; + } + set_desc_id(ring[i], i); + } + + /* link descs */ + for (i = 0; i < descs-1; i++) { + struct ioat_ring_ent *next = ring[i+1]; + struct ioat_dma_descriptor *hw = ring[i]->hw; + + hw->next = next->txd.phys; + } + ring[i]->hw->next = ring[0]->txd.phys; + + return ring; +} + +/* ioat2_alloc_chan_resources - allocate/initialize ioat2 descriptor ring + * @chan: channel to be initialized + */ +int ioat2_alloc_chan_resources(struct dma_chan *c) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + struct ioat_chan_common *chan = &ioat->base; + struct ioat_ring_ent **ring; + u32 chanerr; + int order; + + /* have we already been set up? */ + if (ioat->ring) + return 1 << ioat->alloc_order; + + /* Setup register to interrupt and write completion status on error */ + writew(IOAT_CHANCTRL_RUN, chan->reg_base + IOAT_CHANCTRL_OFFSET); + + chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); + if (chanerr) { + dev_err(to_dev(chan), "CHANERR = %x, clearing\n", chanerr); + writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET); + } + + /* allocate a completion writeback area */ + /* doing 2 32bit writes to mmio since 1 64b write doesn't work */ + chan->completion = pci_pool_alloc(chan->device->completion_pool, + GFP_KERNEL, &chan->completion_dma); + if (!chan->completion) + return -ENOMEM; + + memset(chan->completion, 0, sizeof(*chan->completion)); + writel(((u64) chan->completion_dma) & 0x00000000FFFFFFFF, + chan->reg_base + IOAT_CHANCMP_OFFSET_LOW); + writel(((u64) chan->completion_dma) >> 32, + chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); + + order = ioat_get_alloc_order(); + ring = ioat2_alloc_ring(c, order, GFP_KERNEL); + if (!ring) + return -ENOMEM; + + spin_lock_bh(&ioat->ring_lock); + ioat->ring = ring; + ioat->head = 0; + ioat->issued = 0; + ioat->tail = 0; + ioat->pending = 0; + ioat->alloc_order = order; + spin_unlock_bh(&ioat->ring_lock); + + tasklet_enable(&chan->cleanup_task); + ioat2_start_null_desc(ioat); + + return 1 << ioat->alloc_order; +} + +bool reshape_ring(struct ioat2_dma_chan *ioat, int order) +{ + /* reshape differs from normal ring allocation in that we want + * to allocate a new software ring while only + * extending/truncating the hardware ring + */ + struct ioat_chan_common *chan = &ioat->base; + struct dma_chan *c = &chan->common; + const u16 curr_size = ioat2_ring_mask(ioat) + 1; + const u16 active = ioat2_ring_active(ioat); + const u16 new_size = 1 << order; + struct ioat_ring_ent **ring; + u16 i; + + if (order > ioat_get_max_alloc_order()) + return false; + + /* double check that we have at least 1 free descriptor */ + if (active == curr_size) + return false; + + /* when shrinking, verify that we can hold the current active + * set in the new ring + */ + if (active >= new_size) + return false; + + /* allocate the array to hold the software ring */ + ring = kcalloc(new_size, sizeof(*ring), GFP_NOWAIT); + if (!ring) + return false; + + /* allocate/trim descriptors as needed */ + if (new_size > curr_size) { + /* copy current descriptors to the new ring */ + for (i = 0; i < curr_size; i++) { + u16 curr_idx = (ioat->tail+i) & (curr_size-1); + u16 new_idx = (ioat->tail+i) & (new_size-1); + + ring[new_idx] = ioat->ring[curr_idx]; + set_desc_id(ring[new_idx], new_idx); + } + + /* add new descriptors to the ring */ + for (i = curr_size; i < new_size; i++) { + u16 new_idx = (ioat->tail+i) & (new_size-1); + + ring[new_idx] = ioat2_alloc_ring_ent(c, GFP_NOWAIT); + if (!ring[new_idx]) { + while (i--) { + u16 new_idx = (ioat->tail+i) & (new_size-1); + + ioat2_free_ring_ent(ring[new_idx], c); + } + kfree(ring); + return false; + } + set_desc_id(ring[new_idx], new_idx); + } + + /* hw link new descriptors */ + for (i = curr_size-1; i < new_size; i++) { + u16 new_idx = (ioat->tail+i) & (new_size-1); + struct ioat_ring_ent *next = ring[(new_idx+1) & (new_size-1)]; + struct ioat_dma_descriptor *hw = ring[new_idx]->hw; + + hw->next = next->txd.phys; + } + } else { + struct ioat_dma_descriptor *hw; + struct ioat_ring_ent *next; + + /* copy current descriptors to the new ring, dropping the + * removed descriptors + */ + for (i = 0; i < new_size; i++) { + u16 curr_idx = (ioat->tail+i) & (curr_size-1); + u16 new_idx = (ioat->tail+i) & (new_size-1); + + ring[new_idx] = ioat->ring[curr_idx]; + set_desc_id(ring[new_idx], new_idx); + } + + /* free deleted descriptors */ + for (i = new_size; i < curr_size; i++) { + struct ioat_ring_ent *ent; + + ent = ioat2_get_ring_ent(ioat, ioat->tail+i); + ioat2_free_ring_ent(ent, c); + } + + /* fix up hardware ring */ + hw = ring[(ioat->tail+new_size-1) & (new_size-1)]->hw; + next = ring[(ioat->tail+new_size) & (new_size-1)]; + hw->next = next->txd.phys; + } + + dev_dbg(to_dev(chan), "%s: allocated %d descriptors\n", + __func__, new_size); + + kfree(ioat->ring); + ioat->ring = ring; + ioat->alloc_order = order; + + return true; +} + +/** + * ioat2_alloc_and_lock - common descriptor alloc boilerplate for ioat2,3 ops + * @idx: gets starting descriptor index on successful allocation + * @ioat: ioat2,3 channel (ring) to operate on + * @num_descs: allocation length + */ +int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_descs) +{ + struct ioat_chan_common *chan = &ioat->base; + + spin_lock_bh(&ioat->ring_lock); + /* never allow the last descriptor to be consumed, we need at + * least one free at all times to allow for on-the-fly ring + * resizing. + */ + while (unlikely(ioat2_ring_space(ioat) <= num_descs)) { + if (reshape_ring(ioat, ioat->alloc_order + 1) && + ioat2_ring_space(ioat) > num_descs) + break; + + if (printk_ratelimit()) + dev_dbg(to_dev(chan), + "%s: ring full! num_descs: %d (%x:%x:%x)\n", + __func__, num_descs, ioat->head, ioat->tail, + ioat->issued); + spin_unlock_bh(&ioat->ring_lock); + + /* progress reclaim in the allocation failure case we + * may be called under bh_disabled so we need to trigger + * the timer event directly + */ + spin_lock_bh(&chan->cleanup_lock); + if (jiffies > chan->timer.expires && + timer_pending(&chan->timer)) { + struct ioatdma_device *device = chan->device; + + mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + spin_unlock_bh(&chan->cleanup_lock); + device->timer_fn((unsigned long) ioat); + } else + spin_unlock_bh(&chan->cleanup_lock); + return -ENOMEM; + } + + dev_dbg(to_dev(chan), "%s: num_descs: %d (%x:%x:%x)\n", + __func__, num_descs, ioat->head, ioat->tail, ioat->issued); + + *idx = ioat2_desc_alloc(ioat, num_descs); + return 0; /* with ioat->ring_lock held */ +} + +struct dma_async_tx_descriptor * +ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest, + dma_addr_t dma_src, size_t len, unsigned long flags) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + struct ioat_dma_descriptor *hw; + struct ioat_ring_ent *desc; + dma_addr_t dst = dma_dest; + dma_addr_t src = dma_src; + size_t total_len = len; + int num_descs; + u16 idx; + int i; + + num_descs = ioat2_xferlen_to_descs(ioat, len); + if (likely(num_descs) && + ioat2_alloc_and_lock(&idx, ioat, num_descs) == 0) + /* pass */; + else + return NULL; + i = 0; + do { + size_t copy = min_t(size_t, len, 1 << ioat->xfercap_log); + + desc = ioat2_get_ring_ent(ioat, idx + i); + hw = desc->hw; + + hw->size = copy; + hw->ctl = 0; + hw->src_addr = src; + hw->dst_addr = dst; + + len -= copy; + dst += copy; + src += copy; + dump_desc_dbg(ioat, desc); + } while (++i < num_descs); + + desc->txd.flags = flags; + desc->len = total_len; + hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); + hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE); + hw->ctl_f.compl_write = 1; + dump_desc_dbg(ioat, desc); + /* we leave the channel locked to ensure in order submission */ + + return &desc->txd; +} + +/** + * ioat2_free_chan_resources - release all the descriptors + * @chan: the channel to be cleaned + */ +void ioat2_free_chan_resources(struct dma_chan *c) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + struct ioat_chan_common *chan = &ioat->base; + struct ioatdma_device *device = chan->device; + struct ioat_ring_ent *desc; + const u16 total_descs = 1 << ioat->alloc_order; + int descs; + int i; + + /* Before freeing channel resources first check + * if they have been previously allocated for this channel. + */ + if (!ioat->ring) + return; + + tasklet_disable(&chan->cleanup_task); + del_timer_sync(&chan->timer); + device->cleanup_tasklet((unsigned long) ioat); + + /* Delay 100ms after reset to allow internal DMA logic to quiesce + * before removing DMA descriptor resources. + */ + writeb(IOAT_CHANCMD_RESET, + chan->reg_base + IOAT_CHANCMD_OFFSET(chan->device->version)); + mdelay(100); + + spin_lock_bh(&ioat->ring_lock); + descs = ioat2_ring_space(ioat); + dev_dbg(to_dev(chan), "freeing %d idle descriptors\n", descs); + for (i = 0; i < descs; i++) { + desc = ioat2_get_ring_ent(ioat, ioat->head + i); + ioat2_free_ring_ent(desc, c); + } + + if (descs < total_descs) + dev_err(to_dev(chan), "Freeing %d in use descriptors!\n", + total_descs - descs); + + for (i = 0; i < total_descs - descs; i++) { + desc = ioat2_get_ring_ent(ioat, ioat->tail + i); + dump_desc_dbg(ioat, desc); + ioat2_free_ring_ent(desc, c); + } + + kfree(ioat->ring); + ioat->ring = NULL; + ioat->alloc_order = 0; + pci_pool_free(device->completion_pool, chan->completion, + chan->completion_dma); + spin_unlock_bh(&ioat->ring_lock); + + chan->last_completion = 0; + chan->completion_dma = 0; + ioat->pending = 0; + ioat->dmacount = 0; +} + +enum dma_status +ioat2_is_complete(struct dma_chan *c, dma_cookie_t cookie, + dma_cookie_t *done, dma_cookie_t *used) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + struct ioatdma_device *device = ioat->base.device; + + if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS) + return DMA_SUCCESS; + + device->cleanup_tasklet((unsigned long) ioat); + + return ioat_is_complete(c, cookie, done, used); +} + +static ssize_t ring_size_show(struct dma_chan *c, char *page) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + + return sprintf(page, "%d\n", (1 << ioat->alloc_order) & ~1); +} +static struct ioat_sysfs_entry ring_size_attr = __ATTR_RO(ring_size); + +static ssize_t ring_active_show(struct dma_chan *c, char *page) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + + /* ...taken outside the lock, no need to be precise */ + return sprintf(page, "%d\n", ioat2_ring_active(ioat)); +} +static struct ioat_sysfs_entry ring_active_attr = __ATTR_RO(ring_active); + +static struct attribute *ioat2_attrs[] = { + &ring_size_attr.attr, + &ring_active_attr.attr, + &ioat_cap_attr.attr, + &ioat_version_attr.attr, + NULL, +}; + +struct kobj_type ioat2_ktype = { + .sysfs_ops = &ioat_sysfs_ops, + .default_attrs = ioat2_attrs, +}; + +int __devinit ioat2_dma_probe(struct ioatdma_device *device, int dca) +{ + struct pci_dev *pdev = device->pdev; + struct dma_device *dma; + struct dma_chan *c; + struct ioat_chan_common *chan; + int err; + + device->enumerate_channels = ioat2_enumerate_channels; + device->cleanup_tasklet = ioat2_cleanup_tasklet; + device->timer_fn = ioat2_timer_event; + device->self_test = ioat_dma_self_test; + dma = &device->common; + dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock; + dma->device_issue_pending = ioat2_issue_pending; + dma->device_alloc_chan_resources = ioat2_alloc_chan_resources; + dma->device_free_chan_resources = ioat2_free_chan_resources; + dma->device_is_tx_complete = ioat2_is_complete; + + err = ioat_probe(device); + if (err) + return err; + ioat_set_tcp_copy_break(2048); + + list_for_each_entry(c, &dma->channels, device_node) { + chan = to_chan_common(c); + writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE | IOAT_DMA_DCA_ANY_CPU, + chan->reg_base + IOAT_DCACTRL_OFFSET); + } + + err = ioat_register(device); + if (err) + return err; + + ioat_kobject_add(device, &ioat2_ktype); + + if (dca) + device->dca = ioat2_dca_init(pdev, device->reg_base); + + return err; +} diff --git a/drivers/dma/ioat/dma_v2.h b/drivers/dma/ioat/dma_v2.h new file mode 100644 index 00000000000..1d849ef74d5 --- /dev/null +++ b/drivers/dma/ioat/dma_v2.h @@ -0,0 +1,190 @@ +/* + * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The full GNU General Public License is included in this distribution in the + * file called COPYING. + */ +#ifndef IOATDMA_V2_H +#define IOATDMA_V2_H + +#include <linux/dmaengine.h> +#include "dma.h" +#include "hw.h" + + +extern int ioat_pending_level; +extern int ioat_ring_alloc_order; + +/* + * workaround for IOAT ver.3.0 null descriptor issue + * (channel returns error when size is 0) + */ +#define NULL_DESC_BUFFER_SIZE 1 + +#define IOAT_MAX_ORDER 16 +#define ioat_get_alloc_order() \ + (min(ioat_ring_alloc_order, IOAT_MAX_ORDER)) +#define ioat_get_max_alloc_order() \ + (min(ioat_ring_max_alloc_order, IOAT_MAX_ORDER)) + +/* struct ioat2_dma_chan - ioat v2 / v3 channel attributes + * @base: common ioat channel parameters + * @xfercap_log; log2 of channel max transfer length (for fast division) + * @head: allocated index + * @issued: hardware notification point + * @tail: cleanup index + * @pending: lock free indicator for issued != head + * @dmacount: identical to 'head' except for occasionally resetting to zero + * @alloc_order: log2 of the number of allocated descriptors + * @ring: software ring buffer implementation of hardware ring + * @ring_lock: protects ring attributes + */ +struct ioat2_dma_chan { + struct ioat_chan_common base; + size_t xfercap_log; + u16 head; + u16 issued; + u16 tail; + u16 dmacount; + u16 alloc_order; + int pending; + struct ioat_ring_ent **ring; + spinlock_t ring_lock; +}; + +static inline struct ioat2_dma_chan *to_ioat2_chan(struct dma_chan *c) +{ + struct ioat_chan_common *chan = to_chan_common(c); + + return container_of(chan, struct ioat2_dma_chan, base); +} + +static inline u16 ioat2_ring_mask(struct ioat2_dma_chan *ioat) +{ + return (1 << ioat->alloc_order) - 1; +} + +/* count of descriptors in flight with the engine */ +static inline u16 ioat2_ring_active(struct ioat2_dma_chan *ioat) +{ + return (ioat->head - ioat->tail) & ioat2_ring_mask(ioat); +} + +/* count of descriptors pending submission to hardware */ +static inline u16 ioat2_ring_pending(struct ioat2_dma_chan *ioat) +{ + return (ioat->head - ioat->issued) & ioat2_ring_mask(ioat); +} + +static inline u16 ioat2_ring_space(struct ioat2_dma_chan *ioat) +{ + u16 num_descs = ioat2_ring_mask(ioat) + 1; + u16 active = ioat2_ring_active(ioat); + + BUG_ON(active > num_descs); + + return num_descs - active; +} + +/* assumes caller already checked space */ +static inline u16 ioat2_desc_alloc(struct ioat2_dma_chan *ioat, u16 len) +{ + ioat->head += len; + return ioat->head - len; +} + +static inline u16 ioat2_xferlen_to_descs(struct ioat2_dma_chan *ioat, size_t len) +{ + u16 num_descs = len >> ioat->xfercap_log; + + num_descs += !!(len & ((1 << ioat->xfercap_log) - 1)); + return num_descs; +} + +/** + * struct ioat_ring_ent - wrapper around hardware descriptor + * @hw: hardware DMA descriptor (for memcpy) + * @fill: hardware fill descriptor + * @xor: hardware xor descriptor + * @xor_ex: hardware xor extension descriptor + * @pq: hardware pq descriptor + * @pq_ex: hardware pq extension descriptor + * @pqu: hardware pq update descriptor + * @raw: hardware raw (un-typed) descriptor + * @txd: the generic software descriptor for all engines + * @len: total transaction length for unmap + * @result: asynchronous result of validate operations + * @id: identifier for debug + */ + +struct ioat_ring_ent { + union { + struct ioat_dma_descriptor *hw; + struct ioat_fill_descriptor *fill; + struct ioat_xor_descriptor *xor; + struct ioat_xor_ext_descriptor *xor_ex; + struct ioat_pq_descriptor *pq; + struct ioat_pq_ext_descriptor *pq_ex; + struct ioat_pq_update_descriptor *pqu; + struct ioat_raw_descriptor *raw; + }; + size_t len; + struct dma_async_tx_descriptor txd; + enum sum_check_flags *result; + #ifdef DEBUG + int id; + #endif +}; + +static inline struct ioat_ring_ent * +ioat2_get_ring_ent(struct ioat2_dma_chan *ioat, u16 idx) +{ + return ioat->ring[idx & ioat2_ring_mask(ioat)]; +} + +static inline void ioat2_set_chainaddr(struct ioat2_dma_chan *ioat, u64 addr) +{ + struct ioat_chan_common *chan = &ioat->base; + + writel(addr & 0x00000000FFFFFFFF, + chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW); + writel(addr >> 32, + chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH); +} + +int __devinit ioat2_dma_probe(struct ioatdma_device *dev, int dca); +int __devinit ioat3_dma_probe(struct ioatdma_device *dev, int dca); +struct dca_provider * __devinit ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase); +struct dca_provider * __devinit ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase); +int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_descs); +int ioat2_enumerate_channels(struct ioatdma_device *device); +struct dma_async_tx_descriptor * +ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest, + dma_addr_t dma_src, size_t len, unsigned long flags); +void ioat2_issue_pending(struct dma_chan *chan); +int ioat2_alloc_chan_resources(struct dma_chan *c); +void ioat2_free_chan_resources(struct dma_chan *c); +enum dma_status ioat2_is_complete(struct dma_chan *c, dma_cookie_t cookie, + dma_cookie_t *done, dma_cookie_t *used); +void __ioat2_restart_chan(struct ioat2_dma_chan *ioat); +bool reshape_ring(struct ioat2_dma_chan *ioat, int order); +void __ioat2_issue_pending(struct ioat2_dma_chan *ioat); +void ioat2_cleanup_tasklet(unsigned long data); +void ioat2_timer_event(unsigned long data); +extern struct kobj_type ioat2_ktype; +extern struct kmem_cache *ioat2_cache; +#endif /* IOATDMA_V2_H */ diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c new file mode 100644 index 00000000000..35d1e33afd5 --- /dev/null +++ b/drivers/dma/ioat/dma_v3.c @@ -0,0 +1,1223 @@ +/* + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * BSD LICENSE + * + * Copyright(c) 2004-2009 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Support routines for v3+ hardware + */ + +#include <linux/pci.h> +#include <linux/dmaengine.h> +#include <linux/dma-mapping.h> +#include "registers.h" +#include "hw.h" +#include "dma.h" +#include "dma_v2.h" + +/* ioat hardware assumes at least two sources for raid operations */ +#define src_cnt_to_sw(x) ((x) + 2) +#define src_cnt_to_hw(x) ((x) - 2) + +/* provide a lookup table for setting the source address in the base or + * extended descriptor of an xor or pq descriptor + */ +static const u8 xor_idx_to_desc __read_mostly = 0xd0; +static const u8 xor_idx_to_field[] __read_mostly = { 1, 4, 5, 6, 7, 0, 1, 2 }; +static const u8 pq_idx_to_desc __read_mostly = 0xf8; +static const u8 pq_idx_to_field[] __read_mostly = { 1, 4, 5, 0, 1, 2, 4, 5 }; + +static dma_addr_t xor_get_src(struct ioat_raw_descriptor *descs[2], int idx) +{ + struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1]; + + return raw->field[xor_idx_to_field[idx]]; +} + +static void xor_set_src(struct ioat_raw_descriptor *descs[2], + dma_addr_t addr, u32 offset, int idx) +{ + struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1]; + + raw->field[xor_idx_to_field[idx]] = addr + offset; +} + +static dma_addr_t pq_get_src(struct ioat_raw_descriptor *descs[2], int idx) +{ + struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1]; + + return raw->field[pq_idx_to_field[idx]]; +} + +static void pq_set_src(struct ioat_raw_descriptor *descs[2], + dma_addr_t addr, u32 offset, u8 coef, int idx) +{ + struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *) descs[0]; + struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1]; + + raw->field[pq_idx_to_field[idx]] = addr + offset; + pq->coef[idx] = coef; +} + +static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat, + struct ioat_ring_ent *desc, int idx) +{ + struct ioat_chan_common *chan = &ioat->base; + struct pci_dev *pdev = chan->device->pdev; + size_t len = desc->len; + size_t offset = len - desc->hw->size; + struct dma_async_tx_descriptor *tx = &desc->txd; + enum dma_ctrl_flags flags = tx->flags; + + switch (desc->hw->ctl_f.op) { + case IOAT_OP_COPY: + if (!desc->hw->ctl_f.null) /* skip 'interrupt' ops */ + ioat_dma_unmap(chan, flags, len, desc->hw); + break; + case IOAT_OP_FILL: { + struct ioat_fill_descriptor *hw = desc->fill; + + if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) + ioat_unmap(pdev, hw->dst_addr - offset, len, + PCI_DMA_FROMDEVICE, flags, 1); + break; + } + case IOAT_OP_XOR_VAL: + case IOAT_OP_XOR: { + struct ioat_xor_descriptor *xor = desc->xor; + struct ioat_ring_ent *ext; + struct ioat_xor_ext_descriptor *xor_ex = NULL; + int src_cnt = src_cnt_to_sw(xor->ctl_f.src_cnt); + struct ioat_raw_descriptor *descs[2]; + int i; + + if (src_cnt > 5) { + ext = ioat2_get_ring_ent(ioat, idx + 1); + xor_ex = ext->xor_ex; + } + + if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) { + descs[0] = (struct ioat_raw_descriptor *) xor; + descs[1] = (struct ioat_raw_descriptor *) xor_ex; + for (i = 0; i < src_cnt; i++) { + dma_addr_t src = xor_get_src(descs, i); + + ioat_unmap(pdev, src - offset, len, + PCI_DMA_TODEVICE, flags, 0); + } + + /* dest is a source in xor validate operations */ + if (xor->ctl_f.op == IOAT_OP_XOR_VAL) { + ioat_unmap(pdev, xor->dst_addr - offset, len, + PCI_DMA_TODEVICE, flags, 1); + break; + } + } + + if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) + ioat_unmap(pdev, xor->dst_addr - offset, len, + PCI_DMA_FROMDEVICE, flags, 1); + break; + } + case IOAT_OP_PQ_VAL: + case IOAT_OP_PQ: { + struct ioat_pq_descriptor *pq = desc->pq; + struct ioat_ring_ent *ext; + struct ioat_pq_ext_descriptor *pq_ex = NULL; + int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt); + struct ioat_raw_descriptor *descs[2]; + int i; + + if (src_cnt > 3) { + ext = ioat2_get_ring_ent(ioat, idx + 1); + pq_ex = ext->pq_ex; + } + + /* in the 'continue' case don't unmap the dests as sources */ + if (dmaf_p_disabled_continue(flags)) + src_cnt--; + else if (dmaf_continue(flags)) + src_cnt -= 3; + + if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) { + descs[0] = (struct ioat_raw_descriptor *) pq; + descs[1] = (struct ioat_raw_descriptor *) pq_ex; + for (i = 0; i < src_cnt; i++) { + dma_addr_t src = pq_get_src(descs, i); + + ioat_unmap(pdev, src - offset, len, + PCI_DMA_TODEVICE, flags, 0); + } + + /* the dests are sources in pq validate operations */ + if (pq->ctl_f.op == IOAT_OP_XOR_VAL) { + if (!(flags & DMA_PREP_PQ_DISABLE_P)) + ioat_unmap(pdev, pq->p_addr - offset, + len, PCI_DMA_TODEVICE, flags, 0); + if (!(flags & DMA_PREP_PQ_DISABLE_Q)) + ioat_unmap(pdev, pq->q_addr - offset, + len, PCI_DMA_TODEVICE, flags, 0); + break; + } + } + + if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) { + if (!(flags & DMA_PREP_PQ_DISABLE_P)) + ioat_unmap(pdev, pq->p_addr - offset, len, + PCI_DMA_BIDIRECTIONAL, flags, 1); + if (!(flags & DMA_PREP_PQ_DISABLE_Q)) + ioat_unmap(pdev, pq->q_addr - offset, len, + PCI_DMA_BIDIRECTIONAL, flags, 1); + } + break; + } + default: + dev_err(&pdev->dev, "%s: unknown op type: %#x\n", + __func__, desc->hw->ctl_f.op); + } +} + +static bool desc_has_ext(struct ioat_ring_ent *desc) +{ + struct ioat_dma_descriptor *hw = desc->hw; + + if (hw->ctl_f.op == IOAT_OP_XOR || + hw->ctl_f.op == IOAT_OP_XOR_VAL) { + struct ioat_xor_descriptor *xor = desc->xor; + + if (src_cnt_to_sw(xor->ctl_f.src_cnt) > 5) + return true; + } else if (hw->ctl_f.op == IOAT_OP_PQ || + hw->ctl_f.op == IOAT_OP_PQ_VAL) { + struct ioat_pq_descriptor *pq = desc->pq; + + if (src_cnt_to_sw(pq->ctl_f.src_cnt) > 3) + return true; + } + + return false; +} + +/** + * __cleanup - reclaim used descriptors + * @ioat: channel (ring) to clean + * + * The difference from the dma_v2.c __cleanup() is that this routine + * handles extended descriptors and dma-unmapping raid operations. + */ +static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete) +{ + struct ioat_chan_common *chan = &ioat->base; + struct ioat_ring_ent *desc; + bool seen_current = false; + u16 active; + int i; + + dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n", + __func__, ioat->head, ioat->tail, ioat->issued); + + active = ioat2_ring_active(ioat); + for (i = 0; i < active && !seen_current; i++) { + struct dma_async_tx_descriptor *tx; + + prefetch(ioat2_get_ring_ent(ioat, ioat->tail + i + 1)); + desc = ioat2_get_ring_ent(ioat, ioat->tail + i); + dump_desc_dbg(ioat, desc); + tx = &desc->txd; + if (tx->cookie) { + chan->completed_cookie = tx->cookie; + ioat3_dma_unmap(ioat, desc, ioat->tail + i); + tx->cookie = 0; + if (tx->callback) { + tx->callback(tx->callback_param); + tx->callback = NULL; + } + } + + if (tx->phys == phys_complete) + seen_current = true; + + /* skip extended descriptors */ + if (desc_has_ext(desc)) { + BUG_ON(i + 1 >= active); + i++; + } + } + ioat->tail += i; + BUG_ON(!seen_current); /* no active descs have written a completion? */ + chan->last_completion = phys_complete; + if (ioat->head == ioat->tail) { + dev_dbg(to_dev(chan), "%s: cancel completion timeout\n", + __func__); + clear_bit(IOAT_COMPLETION_PENDING, &chan->state); + mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); + } +} + +static void ioat3_cleanup(struct ioat2_dma_chan *ioat) +{ + struct ioat_chan_common *chan = &ioat->base; + unsigned long phys_complete; + + prefetch(chan->completion); + + if (!spin_trylock_bh(&chan->cleanup_lock)) + return; + + if (!ioat_cleanup_preamble(chan, &phys_complete)) { + spin_unlock_bh(&chan->cleanup_lock); + return; + } + + if (!spin_trylock_bh(&ioat->ring_lock)) { + spin_unlock_bh(&chan->cleanup_lock); + return; + } + + __cleanup(ioat, phys_complete); + + spin_unlock_bh(&ioat->ring_lock); + spin_unlock_bh(&chan->cleanup_lock); +} + +static void ioat3_cleanup_tasklet(unsigned long data) +{ + struct ioat2_dma_chan *ioat = (void *) data; + + ioat3_cleanup(ioat); + writew(IOAT_CHANCTRL_RUN | IOAT3_CHANCTRL_COMPL_DCA_EN, + ioat->base.reg_base + IOAT_CHANCTRL_OFFSET); +} + +static void ioat3_restart_channel(struct ioat2_dma_chan *ioat) +{ + struct ioat_chan_common *chan = &ioat->base; + unsigned long phys_complete; + u32 status; + + status = ioat_chansts(chan); + if (is_ioat_active(status) || is_ioat_idle(status)) + ioat_suspend(chan); + while (is_ioat_active(status) || is_ioat_idle(status)) { + status = ioat_chansts(chan); + cpu_relax(); + } + + if (ioat_cleanup_preamble(chan, &phys_complete)) + __cleanup(ioat, phys_complete); + + __ioat2_restart_chan(ioat); +} + +static void ioat3_timer_event(unsigned long data) +{ + struct ioat2_dma_chan *ioat = (void *) data; + struct ioat_chan_common *chan = &ioat->base; + + spin_lock_bh(&chan->cleanup_lock); + if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) { + unsigned long phys_complete; + u64 status; + + spin_lock_bh(&ioat->ring_lock); + status = ioat_chansts(chan); + + /* when halted due to errors check for channel + * programming errors before advancing the completion state + */ + if (is_ioat_halted(status)) { + u32 chanerr; + + chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); + BUG_ON(is_ioat_bug(chanerr)); + } + + /* if we haven't made progress and we have already + * acknowledged a pending completion once, then be more + * forceful with a restart + */ + if (ioat_cleanup_preamble(chan, &phys_complete)) + __cleanup(ioat, phys_complete); + else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) + ioat3_restart_channel(ioat); + else { + set_bit(IOAT_COMPLETION_ACK, &chan->state); + mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + } + spin_unlock_bh(&ioat->ring_lock); + } else { + u16 active; + + /* if the ring is idle, empty, and oversized try to step + * down the size + */ + spin_lock_bh(&ioat->ring_lock); + active = ioat2_ring_active(ioat); + if (active == 0 && ioat->alloc_order > ioat_get_alloc_order()) + reshape_ring(ioat, ioat->alloc_order-1); + spin_unlock_bh(&ioat->ring_lock); + + /* keep shrinking until we get back to our minimum + * default size + */ + if (ioat->alloc_order > ioat_get_alloc_order()) + mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); + } + spin_unlock_bh(&chan->cleanup_lock); +} + +static enum dma_status +ioat3_is_complete(struct dma_chan *c, dma_cookie_t cookie, + dma_cookie_t *done, dma_cookie_t *used) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + + if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS) + return DMA_SUCCESS; + + ioat3_cleanup(ioat); + + return ioat_is_complete(c, cookie, done, used); +} + +static struct dma_async_tx_descriptor * +ioat3_prep_memset_lock(struct dma_chan *c, dma_addr_t dest, int value, + size_t len, unsigned long flags) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + struct ioat_ring_ent *desc; + size_t total_len = len; + struct ioat_fill_descriptor *fill; + int num_descs; + u64 src_data = (0x0101010101010101ULL) * (value & 0xff); + u16 idx; + int i; + + num_descs = ioat2_xferlen_to_descs(ioat, len); + if (likely(num_descs) && + ioat2_alloc_and_lock(&idx, ioat, num_descs) == 0) + /* pass */; + else + return NULL; + i = 0; + do { + size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log); + + desc = ioat2_get_ring_ent(ioat, idx + i); + fill = desc->fill; + + fill->size = xfer_size; + fill->src_data = src_data; + fill->dst_addr = dest; + fill->ctl = 0; + fill->ctl_f.op = IOAT_OP_FILL; + + len -= xfer_size; + dest += xfer_size; + dump_desc_dbg(ioat, desc); + } while (++i < num_descs); + + desc->txd.flags = flags; + desc->len = total_len; + fill->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); + fill->ctl_f.fence = !!(flags & DMA_PREP_FENCE); + fill->ctl_f.compl_write = 1; + dump_desc_dbg(ioat, desc); + + /* we leave the channel locked to ensure in order submission */ + return &desc->txd; +} + +static struct dma_async_tx_descriptor * +__ioat3_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result, + dma_addr_t dest, dma_addr_t *src, unsigned int src_cnt, + size_t len, unsigned long flags) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + struct ioat_ring_ent *compl_desc; + struct ioat_ring_ent *desc; + struct ioat_ring_ent *ext; + size_t total_len = len; + struct ioat_xor_descriptor *xor; + struct ioat_xor_ext_descriptor *xor_ex = NULL; + struct ioat_dma_descriptor *hw; + u32 offset = 0; + int num_descs; + int with_ext; + int i; + u16 idx; + u8 op = result ? IOAT_OP_XOR_VAL : IOAT_OP_XOR; + + BUG_ON(src_cnt < 2); + + num_descs = ioat2_xferlen_to_descs(ioat, len); + /* we need 2x the number of descriptors to cover greater than 5 + * sources + */ + if (src_cnt > 5) { + with_ext = 1; + num_descs *= 2; + } else + with_ext = 0; + + /* completion writes from the raid engine may pass completion + * writes from the legacy engine, so we need one extra null + * (legacy) descriptor to ensure all completion writes arrive in + * order. + */ + if (likely(num_descs) && + ioat2_alloc_and_lock(&idx, ioat, num_descs+1) == 0) + /* pass */; + else + return NULL; + i = 0; + do { + struct ioat_raw_descriptor *descs[2]; + size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log); + int s; + + desc = ioat2_get_ring_ent(ioat, idx + i); + xor = desc->xor; + + /* save a branch by unconditionally retrieving the + * extended descriptor xor_set_src() knows to not write + * to it in the single descriptor case + */ + ext = ioat2_get_ring_ent(ioat, idx + i + 1); + xor_ex = ext->xor_ex; + + descs[0] = (struct ioat_raw_descriptor *) xor; + descs[1] = (struct ioat_raw_descriptor *) xor_ex; + for (s = 0; s < src_cnt; s++) + xor_set_src(descs, src[s], offset, s); + xor->size = xfer_size; + xor->dst_addr = dest + offset; + xor->ctl = 0; + xor->ctl_f.op = op; + xor->ctl_f.src_cnt = src_cnt_to_hw(src_cnt); + + len -= xfer_size; + offset += xfer_size; + dump_desc_dbg(ioat, desc); + } while ((i += 1 + with_ext) < num_descs); + + /* last xor descriptor carries the unmap parameters and fence bit */ + desc->txd.flags = flags; + desc->len = total_len; + if (result) + desc->result = result; + xor->ctl_f.fence = !!(flags & DMA_PREP_FENCE); + + /* completion descriptor carries interrupt bit */ + compl_desc = ioat2_get_ring_ent(ioat, idx + i); + compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT; + hw = compl_desc->hw; + hw->ctl = 0; + hw->ctl_f.null = 1; + hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); + hw->ctl_f.compl_write = 1; + hw->size = NULL_DESC_BUFFER_SIZE; + dump_desc_dbg(ioat, compl_desc); + + /* we leave the channel locked to ensure in order submission */ + return &desc->txd; +} + +static struct dma_async_tx_descriptor * +ioat3_prep_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, + unsigned int src_cnt, size_t len, unsigned long flags) +{ + return __ioat3_prep_xor_lock(chan, NULL, dest, src, src_cnt, len, flags); +} + +struct dma_async_tx_descriptor * +ioat3_prep_xor_val(struct dma_chan *chan, dma_addr_t *src, + unsigned int src_cnt, size_t len, + enum sum_check_flags *result, unsigned long flags) +{ + /* the cleanup routine only sets bits on validate failure, it + * does not clear bits on validate success... so clear it here + */ + *result = 0; + + return __ioat3_prep_xor_lock(chan, result, src[0], &src[1], + src_cnt - 1, len, flags); +} + +static void +dump_pq_desc_dbg(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc, struct ioat_ring_ent *ext) +{ + struct device *dev = to_dev(&ioat->base); + struct ioat_pq_descriptor *pq = desc->pq; + struct ioat_pq_ext_descriptor *pq_ex = ext ? ext->pq_ex : NULL; + struct ioat_raw_descriptor *descs[] = { (void *) pq, (void *) pq_ex }; + int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt); + int i; + + dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x" + " sz: %#x ctl: %#x (op: %d int: %d compl: %d pq: '%s%s' src_cnt: %d)\n", + desc_id(desc), (unsigned long long) desc->txd.phys, + (unsigned long long) (pq_ex ? pq_ex->next : pq->next), + desc->txd.flags, pq->size, pq->ctl, pq->ctl_f.op, pq->ctl_f.int_en, + pq->ctl_f.compl_write, + pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q", + pq->ctl_f.src_cnt); + for (i = 0; i < src_cnt; i++) + dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i, + (unsigned long long) pq_get_src(descs, i), pq->coef[i]); + dev_dbg(dev, "\tP: %#llx\n", pq->p_addr); + dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr); +} + +static struct dma_async_tx_descriptor * +__ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result, + const dma_addr_t *dst, const dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, + size_t len, unsigned long flags) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + struct ioat_chan_common *chan = &ioat->base; + struct ioat_ring_ent *compl_desc; + struct ioat_ring_ent *desc; + struct ioat_ring_ent *ext; + size_t total_len = len; + struct ioat_pq_descriptor *pq; + struct ioat_pq_ext_descriptor *pq_ex = NULL; + struct ioat_dma_descriptor *hw; + u32 offset = 0; + int num_descs; + int with_ext; + int i, s; + u16 idx; + u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ; + + dev_dbg(to_dev(chan), "%s\n", __func__); + /* the engine requires at least two sources (we provide + * at least 1 implied source in the DMA_PREP_CONTINUE case) + */ + BUG_ON(src_cnt + dmaf_continue(flags) < 2); + + num_descs = ioat2_xferlen_to_descs(ioat, len); + /* we need 2x the number of descriptors to cover greater than 3 + * sources + */ + if (src_cnt > 3 || flags & DMA_PREP_CONTINUE) { + with_ext = 1; + num_descs *= 2; + } else + with_ext = 0; + + /* completion writes from the raid engine may pass completion + * writes from the legacy engine, so we need one extra null + * (legacy) descriptor to ensure all completion writes arrive in + * order. + */ + if (likely(num_descs) && + ioat2_alloc_and_lock(&idx, ioat, num_descs+1) == 0) + /* pass */; + else + return NULL; + i = 0; + do { + struct ioat_raw_descriptor *descs[2]; + size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log); + + desc = ioat2_get_ring_ent(ioat, idx + i); + pq = desc->pq; + + /* save a branch by unconditionally retrieving the + * extended descriptor pq_set_src() knows to not write + * to it in the single descriptor case + */ + ext = ioat2_get_ring_ent(ioat, idx + i + with_ext); + pq_ex = ext->pq_ex; + + descs[0] = (struct ioat_raw_descriptor *) pq; + descs[1] = (struct ioat_raw_descriptor *) pq_ex; + + for (s = 0; s < src_cnt; s++) + pq_set_src(descs, src[s], offset, scf[s], s); + + /* see the comment for dma_maxpq in include/linux/dmaengine.h */ + if (dmaf_p_disabled_continue(flags)) + pq_set_src(descs, dst[1], offset, 1, s++); + else if (dmaf_continue(flags)) { + pq_set_src(descs, dst[0], offset, 0, s++); + pq_set_src(descs, dst[1], offset, 1, s++); + pq_set_src(descs, dst[1], offset, 0, s++); + } + pq->size = xfer_size; + pq->p_addr = dst[0] + offset; + pq->q_addr = dst[1] + offset; + pq->ctl = 0; + pq->ctl_f.op = op; + pq->ctl_f.src_cnt = src_cnt_to_hw(s); + pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P); + pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q); + + len -= xfer_size; + offset += xfer_size; + } while ((i += 1 + with_ext) < num_descs); + + /* last pq descriptor carries the unmap parameters and fence bit */ + desc->txd.flags = flags; + desc->len = total_len; + if (result) + desc->result = result; + pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE); + dump_pq_desc_dbg(ioat, desc, ext); + + /* completion descriptor carries interrupt bit */ + compl_desc = ioat2_get_ring_ent(ioat, idx + i); + compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT; + hw = compl_desc->hw; + hw->ctl = 0; + hw->ctl_f.null = 1; + hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); + hw->ctl_f.compl_write = 1; + hw->size = NULL_DESC_BUFFER_SIZE; + dump_desc_dbg(ioat, compl_desc); + + /* we leave the channel locked to ensure in order submission */ + return &desc->txd; +} + +static struct dma_async_tx_descriptor * +ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, size_t len, + unsigned long flags) +{ + /* handle the single source multiply case from the raid6 + * recovery path + */ + if (unlikely((flags & DMA_PREP_PQ_DISABLE_P) && src_cnt == 1)) { + dma_addr_t single_source[2]; + unsigned char single_source_coef[2]; + + BUG_ON(flags & DMA_PREP_PQ_DISABLE_Q); + single_source[0] = src[0]; + single_source[1] = src[0]; + single_source_coef[0] = scf[0]; + single_source_coef[1] = 0; + + return __ioat3_prep_pq_lock(chan, NULL, dst, single_source, 2, + single_source_coef, len, flags); + } else + return __ioat3_prep_pq_lock(chan, NULL, dst, src, src_cnt, scf, + len, flags); +} + +struct dma_async_tx_descriptor * +ioat3_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, size_t len, + enum sum_check_flags *pqres, unsigned long flags) +{ + /* the cleanup routine only sets bits on validate failure, it + * does not clear bits on validate success... so clear it here + */ + *pqres = 0; + + return __ioat3_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len, + flags); +} + +static struct dma_async_tx_descriptor * +ioat3_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src, + unsigned int src_cnt, size_t len, unsigned long flags) +{ + unsigned char scf[src_cnt]; + dma_addr_t pq[2]; + + memset(scf, 0, src_cnt); + flags |= DMA_PREP_PQ_DISABLE_Q; + pq[0] = dst; + pq[1] = ~0; + + return __ioat3_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len, + flags); +} + +struct dma_async_tx_descriptor * +ioat3_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src, + unsigned int src_cnt, size_t len, + enum sum_check_flags *result, unsigned long flags) +{ + unsigned char scf[src_cnt]; + dma_addr_t pq[2]; + + /* the cleanup routine only sets bits on validate failure, it + * does not clear bits on validate success... so clear it here + */ + *result = 0; + + memset(scf, 0, src_cnt); + flags |= DMA_PREP_PQ_DISABLE_Q; + pq[0] = src[0]; + pq[1] = ~0; + + return __ioat3_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1, scf, + len, flags); +} + +static struct dma_async_tx_descriptor * +ioat3_prep_interrupt_lock(struct dma_chan *c, unsigned long flags) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + struct ioat_ring_ent *desc; + struct ioat_dma_descriptor *hw; + u16 idx; + + if (ioat2_alloc_and_lock(&idx, ioat, 1) == 0) + desc = ioat2_get_ring_ent(ioat, idx); + else + return NULL; + + hw = desc->hw; + hw->ctl = 0; + hw->ctl_f.null = 1; + hw->ctl_f.int_en = 1; + hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE); + hw->ctl_f.compl_write = 1; + hw->size = NULL_DESC_BUFFER_SIZE; + hw->src_addr = 0; + hw->dst_addr = 0; + + desc->txd.flags = flags; + desc->len = 1; + + dump_desc_dbg(ioat, desc); + + /* we leave the channel locked to ensure in order submission */ + return &desc->txd; +} + +static void __devinit ioat3_dma_test_callback(void *dma_async_param) +{ + struct completion *cmp = dma_async_param; + + complete(cmp); +} + +#define IOAT_NUM_SRC_TEST 6 /* must be <= 8 */ +static int __devinit ioat_xor_val_self_test(struct ioatdma_device *device) +{ + int i, src_idx; + struct page *dest; + struct page *xor_srcs[IOAT_NUM_SRC_TEST]; + struct page *xor_val_srcs[IOAT_NUM_SRC_TEST + 1]; + dma_addr_t dma_srcs[IOAT_NUM_SRC_TEST + 1]; + dma_addr_t dma_addr, dest_dma; + struct dma_async_tx_descriptor *tx; + struct dma_chan *dma_chan; + dma_cookie_t cookie; + u8 cmp_byte = 0; + u32 cmp_word; + u32 xor_val_result; + int err = 0; + struct completion cmp; + unsigned long tmo; + struct device *dev = &device->pdev->dev; + struct dma_device *dma = &device->common; + + dev_dbg(dev, "%s\n", __func__); + + if (!dma_has_cap(DMA_XOR, dma->cap_mask)) + return 0; + + for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) { + xor_srcs[src_idx] = alloc_page(GFP_KERNEL); + if (!xor_srcs[src_idx]) { + while (src_idx--) + __free_page(xor_srcs[src_idx]); + return -ENOMEM; + } + } + + dest = alloc_page(GFP_KERNEL); + if (!dest) { + while (src_idx--) + __free_page(xor_srcs[src_idx]); + return -ENOMEM; + } + + /* Fill in src buffers */ + for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) { + u8 *ptr = page_address(xor_srcs[src_idx]); + for (i = 0; i < PAGE_SIZE; i++) + ptr[i] = (1 << src_idx); + } + + for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) + cmp_byte ^= (u8) (1 << src_idx); + + cmp_word = (cmp_byte << 24) | (cmp_byte << 16) | + (cmp_byte << 8) | cmp_byte; + + memset(page_address(dest), 0, PAGE_SIZE); + + dma_chan = container_of(dma->channels.next, struct dma_chan, + device_node); + if (dma->device_alloc_chan_resources(dma_chan) < 1) { + err = -ENODEV; + goto out; + } + + /* test xor */ + dest_dma = dma_map_page(dev, dest, 0, PAGE_SIZE, DMA_FROM_DEVICE); + for (i = 0; i < IOAT_NUM_SRC_TEST; i++) + dma_srcs[i] = dma_map_page(dev, xor_srcs[i], 0, PAGE_SIZE, + DMA_TO_DEVICE); + tx = dma->device_prep_dma_xor(dma_chan, dest_dma, dma_srcs, + IOAT_NUM_SRC_TEST, PAGE_SIZE, + DMA_PREP_INTERRUPT); + + if (!tx) { + dev_err(dev, "Self-test xor prep failed\n"); + err = -ENODEV; + goto free_resources; + } + + async_tx_ack(tx); + init_completion(&cmp); + tx->callback = ioat3_dma_test_callback; + tx->callback_param = &cmp; + cookie = tx->tx_submit(tx); + if (cookie < 0) { + dev_err(dev, "Self-test xor setup failed\n"); + err = -ENODEV; + goto free_resources; + } + dma->device_issue_pending(dma_chan); + + tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); + + if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) { + dev_err(dev, "Self-test xor timed out\n"); + err = -ENODEV; + goto free_resources; + } + + dma_sync_single_for_cpu(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE); + for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) { + u32 *ptr = page_address(dest); + if (ptr[i] != cmp_word) { + dev_err(dev, "Self-test xor failed compare\n"); + err = -ENODEV; + goto free_resources; + } + } + dma_sync_single_for_device(dev, dest_dma, PAGE_SIZE, DMA_TO_DEVICE); + + /* skip validate if the capability is not present */ + if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask)) + goto free_resources; + + /* validate the sources with the destintation page */ + for (i = 0; i < IOAT_NUM_SRC_TEST; i++) + xor_val_srcs[i] = xor_srcs[i]; + xor_val_srcs[i] = dest; + + xor_val_result = 1; + + for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) + dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE, + DMA_TO_DEVICE); + tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs, + IOAT_NUM_SRC_TEST + 1, PAGE_SIZE, + &xor_val_result, DMA_PREP_INTERRUPT); + if (!tx) { + dev_err(dev, "Self-test zero prep failed\n"); + err = -ENODEV; + goto free_resources; + } + + async_tx_ack(tx); + init_completion(&cmp); + tx->callback = ioat3_dma_test_callback; + tx->callback_param = &cmp; + cookie = tx->tx_submit(tx); + if (cookie < 0) { + dev_err(dev, "Self-test zero setup failed\n"); + err = -ENODEV; + goto free_resources; + } + dma->device_issue_pending(dma_chan); + + tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); + + if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) { + dev_err(dev, "Self-test validate timed out\n"); + err = -ENODEV; + goto free_resources; + } + + if (xor_val_result != 0) { + dev_err(dev, "Self-test validate failed compare\n"); + err = -ENODEV; + goto free_resources; + } + + /* skip memset if the capability is not present */ + if (!dma_has_cap(DMA_MEMSET, dma_chan->device->cap_mask)) + goto free_resources; + + /* test memset */ + dma_addr = dma_map_page(dev, dest, 0, + PAGE_SIZE, DMA_FROM_DEVICE); + tx = dma->device_prep_dma_memset(dma_chan, dma_addr, 0, PAGE_SIZE, + DMA_PREP_INTERRUPT); + if (!tx) { + dev_err(dev, "Self-test memset prep failed\n"); + err = -ENODEV; + goto free_resources; + } + + async_tx_ack(tx); + init_completion(&cmp); + tx->callback = ioat3_dma_test_callback; + tx->callback_param = &cmp; + cookie = tx->tx_submit(tx); + if (cookie < 0) { + dev_err(dev, "Self-test memset setup failed\n"); + err = -ENODEV; + goto free_resources; + } + dma->device_issue_pending(dma_chan); + + tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); + + if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) { + dev_err(dev, "Self-test memset timed out\n"); + err = -ENODEV; + goto free_resources; + } + + for (i = 0; i < PAGE_SIZE/sizeof(u32); i++) { + u32 *ptr = page_address(dest); + if (ptr[i]) { + dev_err(dev, "Self-test memset failed compare\n"); + err = -ENODEV; + goto free_resources; + } + } + + /* test for non-zero parity sum */ + xor_val_result = 0; + for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) + dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE, + DMA_TO_DEVICE); + tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs, + IOAT_NUM_SRC_TEST + 1, PAGE_SIZE, + &xor_val_result, DMA_PREP_INTERRUPT); + if (!tx) { + dev_err(dev, "Self-test 2nd zero prep failed\n"); + err = -ENODEV; + goto free_resources; + } + + async_tx_ack(tx); + init_completion(&cmp); + tx->callback = ioat3_dma_test_callback; + tx->callback_param = &cmp; + cookie = tx->tx_submit(tx); + if (cookie < 0) { + dev_err(dev, "Self-test 2nd zero setup failed\n"); + err = -ENODEV; + goto free_resources; + } + dma->device_issue_pending(dma_chan); + + tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); + + if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) { + dev_err(dev, "Self-test 2nd validate timed out\n"); + err = -ENODEV; + goto free_resources; + } + + if (xor_val_result != SUM_CHECK_P_RESULT) { + dev_err(dev, "Self-test validate failed compare\n"); + err = -ENODEV; + goto free_resources; + } + +free_resources: + dma->device_free_chan_resources(dma_chan); +out: + src_idx = IOAT_NUM_SRC_TEST; + while (src_idx--) + __free_page(xor_srcs[src_idx]); + __free_page(dest); + return err; +} + +static int __devinit ioat3_dma_self_test(struct ioatdma_device *device) +{ + int rc = ioat_dma_self_test(device); + + if (rc) + return rc; + + rc = ioat_xor_val_self_test(device); + if (rc) + return rc; + + return 0; +} + +int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca) +{ + struct pci_dev *pdev = device->pdev; + struct dma_device *dma; + struct dma_chan *c; + struct ioat_chan_common *chan; + bool is_raid_device = false; + int err; + u16 dev_id; + u32 cap; + + device->enumerate_channels = ioat2_enumerate_channels; + device->self_test = ioat3_dma_self_test; + dma = &device->common; + dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock; + dma->device_issue_pending = ioat2_issue_pending; + dma->device_alloc_chan_resources = ioat2_alloc_chan_resources; + dma->device_free_chan_resources = ioat2_free_chan_resources; + + dma_cap_set(DMA_INTERRUPT, dma->cap_mask); + dma->device_prep_dma_interrupt = ioat3_prep_interrupt_lock; + + cap = readl(device->reg_base + IOAT_DMA_CAP_OFFSET); + if (cap & IOAT_CAP_XOR) { + is_raid_device = true; + dma->max_xor = 8; + dma->xor_align = 2; + + dma_cap_set(DMA_XOR, dma->cap_mask); + dma->device_prep_dma_xor = ioat3_prep_xor; + + dma_cap_set(DMA_XOR_VAL, dma->cap_mask); + dma->device_prep_dma_xor_val = ioat3_prep_xor_val; + } + if (cap & IOAT_CAP_PQ) { + is_raid_device = true; + dma_set_maxpq(dma, 8, 0); + dma->pq_align = 2; + + dma_cap_set(DMA_PQ, dma->cap_mask); + dma->device_prep_dma_pq = ioat3_prep_pq; + + dma_cap_set(DMA_PQ_VAL, dma->cap_mask); + dma->device_prep_dma_pq_val = ioat3_prep_pq_val; + + if (!(cap & IOAT_CAP_XOR)) { + dma->max_xor = 8; + dma->xor_align = 2; + + dma_cap_set(DMA_XOR, dma->cap_mask); + dma->device_prep_dma_xor = ioat3_prep_pqxor; + + dma_cap_set(DMA_XOR_VAL, dma->cap_mask); + dma->device_prep_dma_xor_val = ioat3_prep_pqxor_val; + } + } + if (is_raid_device && (cap & IOAT_CAP_FILL_BLOCK)) { + dma_cap_set(DMA_MEMSET, dma->cap_mask); + dma->device_prep_dma_memset = ioat3_prep_memset_lock; + } + + + if (is_raid_device) { + dma->device_is_tx_complete = ioat3_is_complete; + device->cleanup_tasklet = ioat3_cleanup_tasklet; + device->timer_fn = ioat3_timer_event; + } else { + dma->device_is_tx_complete = ioat2_is_complete; + device->cleanup_tasklet = ioat2_cleanup_tasklet; + device->timer_fn = ioat2_timer_event; + } + + /* -= IOAT ver.3 workarounds =- */ + /* Write CHANERRMSK_INT with 3E07h to mask out the errors + * that can cause stability issues for IOAT ver.3 + */ + pci_write_config_dword(pdev, IOAT_PCI_CHANERRMASK_INT_OFFSET, 0x3e07); + + /* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit + * (workaround for spurious config parity error after restart) + */ + pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id); + if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) + pci_write_config_dword(pdev, IOAT_PCI_DMAUNCERRSTS_OFFSET, 0x10); + + err = ioat_probe(device); + if (err) + return err; + ioat_set_tcp_copy_break(262144); + + list_for_each_entry(c, &dma->channels, device_node) { + chan = to_chan_common(c); + writel(IOAT_DMA_DCA_ANY_CPU, + chan->reg_base + IOAT_DCACTRL_OFFSET); + } + + err = ioat_register(device); + if (err) + return err; + + ioat_kobject_add(device, &ioat2_ktype); + + if (dca) + device->dca = ioat3_dca_init(pdev, device->reg_base); + + return 0; +} diff --git a/drivers/dma/ioat/hw.h b/drivers/dma/ioat/hw.h new file mode 100644 index 00000000000..99afb12bd40 --- /dev/null +++ b/drivers/dma/ioat/hw.h @@ -0,0 +1,215 @@ +/* + * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The full GNU General Public License is included in this distribution in the + * file called COPYING. + */ +#ifndef _IOAT_HW_H_ +#define _IOAT_HW_H_ + +/* PCI Configuration Space Values */ +#define IOAT_PCI_VID 0x8086 +#define IOAT_MMIO_BAR 0 + +/* CB device ID's */ +#define IOAT_PCI_DID_5000 0x1A38 +#define IOAT_PCI_DID_CNB 0x360B +#define IOAT_PCI_DID_SCNB 0x65FF +#define IOAT_PCI_DID_SNB 0x402F + +#define IOAT_PCI_RID 0x00 +#define IOAT_PCI_SVID 0x8086 +#define IOAT_PCI_SID 0x8086 +#define IOAT_VER_1_2 0x12 /* Version 1.2 */ +#define IOAT_VER_2_0 0x20 /* Version 2.0 */ +#define IOAT_VER_3_0 0x30 /* Version 3.0 */ +#define IOAT_VER_3_2 0x32 /* Version 3.2 */ + +struct ioat_dma_descriptor { + uint32_t size; + union { + uint32_t ctl; + struct { + unsigned int int_en:1; + unsigned int src_snoop_dis:1; + unsigned int dest_snoop_dis:1; + unsigned int compl_write:1; + unsigned int fence:1; + unsigned int null:1; + unsigned int src_brk:1; + unsigned int dest_brk:1; + unsigned int bundle:1; + unsigned int dest_dca:1; + unsigned int hint:1; + unsigned int rsvd2:13; + #define IOAT_OP_COPY 0x00 + unsigned int op:8; + } ctl_f; + }; + uint64_t src_addr; + uint64_t dst_addr; + uint64_t next; + uint64_t rsv1; + uint64_t rsv2; + /* store some driver data in an unused portion of the descriptor */ + union { + uint64_t user1; + uint64_t tx_cnt; + }; + uint64_t user2; +}; + +struct ioat_fill_descriptor { + uint32_t size; + union { + uint32_t ctl; + struct { + unsigned int int_en:1; + unsigned int rsvd:1; + unsigned int dest_snoop_dis:1; + unsigned int compl_write:1; + unsigned int fence:1; + unsigned int rsvd2:2; + unsigned int dest_brk:1; + unsigned int bundle:1; + unsigned int rsvd4:15; + #define IOAT_OP_FILL 0x01 + unsigned int op:8; + } ctl_f; + }; + uint64_t src_data; + uint64_t dst_addr; + uint64_t next; + uint64_t rsv1; + uint64_t next_dst_addr; + uint64_t user1; + uint64_t user2; +}; + +struct ioat_xor_descriptor { + uint32_t size; + union { + uint32_t ctl; + struct { + unsigned int int_en:1; + unsigned int src_snoop_dis:1; + unsigned int dest_snoop_dis:1; + unsigned int compl_write:1; + unsigned int fence:1; + unsigned int src_cnt:3; + unsigned int bundle:1; + unsigned int dest_dca:1; + unsigned int hint:1; + unsigned int rsvd:13; + #define IOAT_OP_XOR 0x87 + #define IOAT_OP_XOR_VAL 0x88 + unsigned int op:8; + } ctl_f; + }; + uint64_t src_addr; + uint64_t dst_addr; + uint64_t next; + uint64_t src_addr2; + uint64_t src_addr3; + uint64_t src_addr4; + uint64_t src_addr5; +}; + +struct ioat_xor_ext_descriptor { + uint64_t src_addr6; + uint64_t src_addr7; + uint64_t src_addr8; + uint64_t next; + uint64_t rsvd[4]; +}; + +struct ioat_pq_descriptor { + uint32_t size; + union { + uint32_t ctl; + struct { + unsigned int int_en:1; + unsigned int src_snoop_dis:1; + unsigned int dest_snoop_dis:1; + unsigned int compl_write:1; + unsigned int fence:1; + unsigned int src_cnt:3; + unsigned int bundle:1; + unsigned int dest_dca:1; + unsigned int hint:1; + unsigned int p_disable:1; + unsigned int q_disable:1; + unsigned int rsvd:11; + #define IOAT_OP_PQ 0x89 + #define IOAT_OP_PQ_VAL 0x8a + unsigned int op:8; + } ctl_f; + }; + uint64_t src_addr; + uint64_t p_addr; + uint64_t next; + uint64_t src_addr2; + uint64_t src_addr3; + uint8_t coef[8]; + uint64_t q_addr; +}; + +struct ioat_pq_ext_descriptor { + uint64_t src_addr4; + uint64_t src_addr5; + uint64_t src_addr6; + uint64_t next; + uint64_t src_addr7; + uint64_t src_addr8; + uint64_t rsvd[2]; +}; + +struct ioat_pq_update_descriptor { + uint32_t size; + union { + uint32_t ctl; + struct { + unsigned int int_en:1; + unsigned int src_snoop_dis:1; + unsigned int dest_snoop_dis:1; + unsigned int compl_write:1; + unsigned int fence:1; + unsigned int src_cnt:3; + unsigned int bundle:1; + unsigned int dest_dca:1; + unsigned int hint:1; + unsigned int p_disable:1; + unsigned int q_disable:1; + unsigned int rsvd:3; + unsigned int coef:8; + #define IOAT_OP_PQ_UP 0x8b + unsigned int op:8; + } ctl_f; + }; + uint64_t src_addr; + uint64_t p_addr; + uint64_t next; + uint64_t src_addr2; + uint64_t p_src; + uint64_t q_src; + uint64_t q_addr; +}; + +struct ioat_raw_descriptor { + uint64_t field[8]; +}; +#endif diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c new file mode 100644 index 00000000000..d545fae30f3 --- /dev/null +++ b/drivers/dma/ioat/pci.c @@ -0,0 +1,210 @@ +/* + * Intel I/OAT DMA Linux driver + * Copyright(c) 2007 - 2009 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + */ + +/* + * This driver supports an Intel I/OAT DMA engine, which does asynchronous + * copy operations. + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/interrupt.h> +#include <linux/dca.h> +#include "dma.h" +#include "dma_v2.h" +#include "registers.h" +#include "hw.h" + +MODULE_VERSION(IOAT_DMA_VERSION); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Intel Corporation"); + +static struct pci_device_id ioat_pci_tbl[] = { + /* I/OAT v1 platforms */ + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SCNB) }, + { PCI_VDEVICE(UNISYS, PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR) }, + + /* I/OAT v2 platforms */ + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB) }, + + /* I/OAT v3 platforms */ + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) }, + + /* I/OAT v3.2 platforms */ + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF0) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF1) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF2) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF3) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF4) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF5) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF6) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF7) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF8) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF9) }, + + { 0, } +}; +MODULE_DEVICE_TABLE(pci, ioat_pci_tbl); + +static int __devinit ioat_pci_probe(struct pci_dev *pdev, + const struct pci_device_id *id); +static void __devexit ioat_remove(struct pci_dev *pdev); + +static int ioat_dca_enabled = 1; +module_param(ioat_dca_enabled, int, 0644); +MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)"); + +struct kmem_cache *ioat2_cache; + +#define DRV_NAME "ioatdma" + +static struct pci_driver ioat_pci_driver = { + .name = DRV_NAME, + .id_table = ioat_pci_tbl, + .probe = ioat_pci_probe, + .remove = __devexit_p(ioat_remove), +}; + +static struct ioatdma_device * +alloc_ioatdma(struct pci_dev *pdev, void __iomem *iobase) +{ + struct device *dev = &pdev->dev; + struct ioatdma_device *d = devm_kzalloc(dev, sizeof(*d), GFP_KERNEL); + + if (!d) + return NULL; + d->pdev = pdev; + d->reg_base = iobase; + return d; +} + +static int __devinit ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + void __iomem * const *iomap; + struct device *dev = &pdev->dev; + struct ioatdma_device *device; + int err; + + err = pcim_enable_device(pdev); + if (err) + return err; + + err = pcim_iomap_regions(pdev, 1 << IOAT_MMIO_BAR, DRV_NAME); + if (err) + return err; + iomap = pcim_iomap_table(pdev); + if (!iomap) + return -ENOMEM; + + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); + if (err) + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + if (err) + return err; + + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); + if (err) + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); + if (err) + return err; + + device = devm_kzalloc(dev, sizeof(*device), GFP_KERNEL); + if (!device) + return -ENOMEM; + + pci_set_master(pdev); + + device = alloc_ioatdma(pdev, iomap[IOAT_MMIO_BAR]); + if (!device) + return -ENOMEM; + pci_set_drvdata(pdev, device); + + device->version = readb(device->reg_base + IOAT_VER_OFFSET); + if (device->version == IOAT_VER_1_2) + err = ioat1_dma_probe(device, ioat_dca_enabled); + else if (device->version == IOAT_VER_2_0) + err = ioat2_dma_probe(device, ioat_dca_enabled); + else if (device->version >= IOAT_VER_3_0) + err = ioat3_dma_probe(device, ioat_dca_enabled); + else + return -ENODEV; + + if (err) { + dev_err(dev, "Intel(R) I/OAT DMA Engine init failed\n"); + return -ENODEV; + } + + return 0; +} + +static void __devexit ioat_remove(struct pci_dev *pdev) +{ + struct ioatdma_device *device = pci_get_drvdata(pdev); + + if (!device) + return; + + dev_err(&pdev->dev, "Removing dma and dca services\n"); + if (device->dca) { + unregister_dca_provider(device->dca, &pdev->dev); + free_dca_provider(device->dca); + device->dca = NULL; + } + ioat_dma_remove(device); +} + +static int __init ioat_init_module(void) +{ + int err; + + pr_info("%s: Intel(R) QuickData Technology Driver %s\n", + DRV_NAME, IOAT_DMA_VERSION); + + ioat2_cache = kmem_cache_create("ioat2", sizeof(struct ioat_ring_ent), + 0, SLAB_HWCACHE_ALIGN, NULL); + if (!ioat2_cache) + return -ENOMEM; + + err = pci_register_driver(&ioat_pci_driver); + if (err) + kmem_cache_destroy(ioat2_cache); + + return err; +} +module_init(ioat_init_module); + +static void __exit ioat_exit_module(void) +{ + pci_unregister_driver(&ioat_pci_driver); + kmem_cache_destroy(ioat2_cache); +} +module_exit(ioat_exit_module); diff --git a/drivers/dma/ioatdma_registers.h b/drivers/dma/ioat/registers.h index 49bc277424f..63038e18ab0 100644 --- a/drivers/dma/ioatdma_registers.h +++ b/drivers/dma/ioat/registers.h @@ -64,18 +64,37 @@ #define IOAT_DEVICE_STATUS_OFFSET 0x0E /* 16-bit */ #define IOAT_DEVICE_STATUS_DEGRADED_MODE 0x0001 +#define IOAT_DEVICE_MMIO_RESTRICTED 0x0002 +#define IOAT_DEVICE_MEMORY_BYPASS 0x0004 +#define IOAT_DEVICE_ADDRESS_REMAPPING 0x0008 + +#define IOAT_DMA_CAP_OFFSET 0x10 /* 32-bit */ +#define IOAT_CAP_PAGE_BREAK 0x00000001 +#define IOAT_CAP_CRC 0x00000002 +#define IOAT_CAP_SKIP_MARKER 0x00000004 +#define IOAT_CAP_DCA 0x00000010 +#define IOAT_CAP_CRC_MOVE 0x00000020 +#define IOAT_CAP_FILL_BLOCK 0x00000040 +#define IOAT_CAP_APIC 0x00000080 +#define IOAT_CAP_XOR 0x00000100 +#define IOAT_CAP_PQ 0x00000200 #define IOAT_CHANNEL_MMIO_SIZE 0x80 /* Each Channel MMIO space is this size */ /* DMA Channel Registers */ #define IOAT_CHANCTRL_OFFSET 0x00 /* 16-bit Channel Control Register */ #define IOAT_CHANCTRL_CHANNEL_PRIORITY_MASK 0xF000 +#define IOAT3_CHANCTRL_COMPL_DCA_EN 0x0200 #define IOAT_CHANCTRL_CHANNEL_IN_USE 0x0100 #define IOAT_CHANCTRL_DESCRIPTOR_ADDR_SNOOP_CONTROL 0x0020 #define IOAT_CHANCTRL_ERR_INT_EN 0x0010 #define IOAT_CHANCTRL_ANY_ERR_ABORT_EN 0x0008 #define IOAT_CHANCTRL_ERR_COMPLETION_EN 0x0004 -#define IOAT_CHANCTRL_INT_DISABLE 0x0001 +#define IOAT_CHANCTRL_INT_REARM 0x0001 +#define IOAT_CHANCTRL_RUN (IOAT_CHANCTRL_INT_REARM |\ + IOAT_CHANCTRL_ERR_COMPLETION_EN |\ + IOAT_CHANCTRL_ANY_ERR_ABORT_EN |\ + IOAT_CHANCTRL_ERR_INT_EN) #define IOAT_DMA_COMP_OFFSET 0x02 /* 16-bit DMA channel compatibility */ #define IOAT_DMA_COMP_V1 0x0001 /* Compatibility with DMA version 1 */ @@ -94,14 +113,14 @@ #define IOAT2_CHANSTS_OFFSET_HIGH 0x0C #define IOAT_CHANSTS_OFFSET_HIGH(ver) ((ver) < IOAT_VER_2_0 \ ? IOAT1_CHANSTS_OFFSET_HIGH : IOAT2_CHANSTS_OFFSET_HIGH) -#define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR ~0x3F -#define IOAT_CHANSTS_SOFT_ERR 0x0000000000000010 -#define IOAT_CHANSTS_UNAFFILIATED_ERR 0x0000000000000008 -#define IOAT_CHANSTS_DMA_TRANSFER_STATUS 0x0000000000000007 -#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE 0x0 -#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_DONE 0x1 -#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_SUSPENDED 0x2 -#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED 0x3 +#define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR (~0x3fULL) +#define IOAT_CHANSTS_SOFT_ERR 0x10ULL +#define IOAT_CHANSTS_UNAFFILIATED_ERR 0x8ULL +#define IOAT_CHANSTS_STATUS 0x7ULL +#define IOAT_CHANSTS_ACTIVE 0x0 +#define IOAT_CHANSTS_DONE 0x1 +#define IOAT_CHANSTS_SUSPENDED 0x2 +#define IOAT_CHANSTS_HALTED 0x3 @@ -204,22 +223,27 @@ #define IOAT_CDAR_OFFSET_HIGH 0x24 #define IOAT_CHANERR_OFFSET 0x28 /* 32-bit Channel Error Register */ -#define IOAT_CHANERR_DMA_TRANSFER_SRC_ADDR_ERR 0x0001 -#define IOAT_CHANERR_DMA_TRANSFER_DEST_ADDR_ERR 0x0002 -#define IOAT_CHANERR_NEXT_DESCRIPTOR_ADDR_ERR 0x0004 -#define IOAT_CHANERR_NEXT_DESCRIPTOR_ALIGNMENT_ERR 0x0008 +#define IOAT_CHANERR_SRC_ADDR_ERR 0x0001 +#define IOAT_CHANERR_DEST_ADDR_ERR 0x0002 +#define IOAT_CHANERR_NEXT_ADDR_ERR 0x0004 +#define IOAT_CHANERR_NEXT_DESC_ALIGN_ERR 0x0008 #define IOAT_CHANERR_CHAIN_ADDR_VALUE_ERR 0x0010 #define IOAT_CHANERR_CHANCMD_ERR 0x0020 #define IOAT_CHANERR_CHIPSET_UNCORRECTABLE_DATA_INTEGRITY_ERR 0x0040 #define IOAT_CHANERR_DMA_UNCORRECTABLE_DATA_INTEGRITY_ERR 0x0080 #define IOAT_CHANERR_READ_DATA_ERR 0x0100 #define IOAT_CHANERR_WRITE_DATA_ERR 0x0200 -#define IOAT_CHANERR_DESCRIPTOR_CONTROL_ERR 0x0400 -#define IOAT_CHANERR_DESCRIPTOR_LENGTH_ERR 0x0800 +#define IOAT_CHANERR_CONTROL_ERR 0x0400 +#define IOAT_CHANERR_LENGTH_ERR 0x0800 #define IOAT_CHANERR_COMPLETION_ADDR_ERR 0x1000 #define IOAT_CHANERR_INT_CONFIGURATION_ERR 0x2000 #define IOAT_CHANERR_SOFT_ERR 0x4000 #define IOAT_CHANERR_UNAFFILIATED_ERR 0x8000 +#define IOAT_CHANERR_XOR_P_OR_CRC_ERR 0x10000 +#define IOAT_CHANERR_XOR_Q_ERR 0x20000 +#define IOAT_CHANERR_DESCRIPTOR_COUNT_ERR 0x40000 + +#define IOAT_CHANERR_HANDLE_MASK (IOAT_CHANERR_XOR_P_OR_CRC_ERR | IOAT_CHANERR_XOR_Q_ERR) #define IOAT_CHANERR_MASK_OFFSET 0x2C /* 32-bit Channel Error Register */ diff --git a/drivers/dma/ioat_dma.c b/drivers/dma/ioat_dma.c deleted file mode 100644 index a600fc0f796..00000000000 --- a/drivers/dma/ioat_dma.c +++ /dev/null @@ -1,1741 +0,0 @@ -/* - * Intel I/OAT DMA Linux driver - * Copyright(c) 2004 - 2009 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - */ - -/* - * This driver supports an Intel I/OAT DMA engine, which does asynchronous - * copy operations. - */ - -#include <linux/init.h> -#include <linux/module.h> -#include <linux/pci.h> -#include <linux/interrupt.h> -#include <linux/dmaengine.h> -#include <linux/delay.h> -#include <linux/dma-mapping.h> -#include <linux/workqueue.h> -#include <linux/i7300_idle.h> -#include "ioatdma.h" -#include "ioatdma_registers.h" -#include "ioatdma_hw.h" - -#define to_ioat_chan(chan) container_of(chan, struct ioat_dma_chan, common) -#define to_ioatdma_device(dev) container_of(dev, struct ioatdma_device, common) -#define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node) -#define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, async_tx) - -#define chan_num(ch) ((int)((ch)->reg_base - (ch)->device->reg_base) / 0x80) -static int ioat_pending_level = 4; -module_param(ioat_pending_level, int, 0644); -MODULE_PARM_DESC(ioat_pending_level, - "high-water mark for pushing ioat descriptors (default: 4)"); - -#define RESET_DELAY msecs_to_jiffies(100) -#define WATCHDOG_DELAY round_jiffies(msecs_to_jiffies(2000)) -static void ioat_dma_chan_reset_part2(struct work_struct *work); -static void ioat_dma_chan_watchdog(struct work_struct *work); - -/* - * workaround for IOAT ver.3.0 null descriptor issue - * (channel returns error when size is 0) - */ -#define NULL_DESC_BUFFER_SIZE 1 - -/* internal functions */ -static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan); -static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan); - -static struct ioat_desc_sw * -ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan); -static struct ioat_desc_sw * -ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan); - -static inline struct ioat_dma_chan *ioat_lookup_chan_by_index( - struct ioatdma_device *device, - int index) -{ - return device->idx[index]; -} - -/** - * ioat_dma_do_interrupt - handler used for single vector interrupt mode - * @irq: interrupt id - * @data: interrupt data - */ -static irqreturn_t ioat_dma_do_interrupt(int irq, void *data) -{ - struct ioatdma_device *instance = data; - struct ioat_dma_chan *ioat_chan; - unsigned long attnstatus; - int bit; - u8 intrctrl; - - intrctrl = readb(instance->reg_base + IOAT_INTRCTRL_OFFSET); - - if (!(intrctrl & IOAT_INTRCTRL_MASTER_INT_EN)) - return IRQ_NONE; - - if (!(intrctrl & IOAT_INTRCTRL_INT_STATUS)) { - writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET); - return IRQ_NONE; - } - - attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET); - for_each_bit(bit, &attnstatus, BITS_PER_LONG) { - ioat_chan = ioat_lookup_chan_by_index(instance, bit); - tasklet_schedule(&ioat_chan->cleanup_task); - } - - writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET); - return IRQ_HANDLED; -} - -/** - * ioat_dma_do_interrupt_msix - handler used for vector-per-channel interrupt mode - * @irq: interrupt id - * @data: interrupt data - */ -static irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data) -{ - struct ioat_dma_chan *ioat_chan = data; - - tasklet_schedule(&ioat_chan->cleanup_task); - - return IRQ_HANDLED; -} - -static void ioat_dma_cleanup_tasklet(unsigned long data); - -/** - * ioat_dma_enumerate_channels - find and initialize the device's channels - * @device: the device to be enumerated - */ -static int ioat_dma_enumerate_channels(struct ioatdma_device *device) -{ - u8 xfercap_scale; - u32 xfercap; - int i; - struct ioat_dma_chan *ioat_chan; - - /* - * IOAT ver.3 workarounds - */ - if (device->version == IOAT_VER_3_0) { - u32 chan_err_mask; - u16 dev_id; - u32 dmauncerrsts; - - /* - * Write CHANERRMSK_INT with 3E07h to mask out the errors - * that can cause stability issues for IOAT ver.3 - */ - chan_err_mask = 0x3E07; - pci_write_config_dword(device->pdev, - IOAT_PCI_CHANERRMASK_INT_OFFSET, - chan_err_mask); - - /* - * Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit - * (workaround for spurious config parity error after restart) - */ - pci_read_config_word(device->pdev, - IOAT_PCI_DEVICE_ID_OFFSET, - &dev_id); - if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) { - dmauncerrsts = 0x10; - pci_write_config_dword(device->pdev, - IOAT_PCI_DMAUNCERRSTS_OFFSET, - dmauncerrsts); - } - } - - device->common.chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET); - xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET); - xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale)); - -#ifdef CONFIG_I7300_IDLE_IOAT_CHANNEL - if (i7300_idle_platform_probe(NULL, NULL, 1) == 0) { - device->common.chancnt--; - } -#endif - for (i = 0; i < device->common.chancnt; i++) { - ioat_chan = kzalloc(sizeof(*ioat_chan), GFP_KERNEL); - if (!ioat_chan) { - device->common.chancnt = i; - break; - } - - ioat_chan->device = device; - ioat_chan->reg_base = device->reg_base + (0x80 * (i + 1)); - ioat_chan->xfercap = xfercap; - ioat_chan->desccount = 0; - INIT_DELAYED_WORK(&ioat_chan->work, ioat_dma_chan_reset_part2); - if (ioat_chan->device->version == IOAT_VER_2_0) - writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE | - IOAT_DMA_DCA_ANY_CPU, - ioat_chan->reg_base + IOAT_DCACTRL_OFFSET); - else if (ioat_chan->device->version == IOAT_VER_3_0) - writel(IOAT_DMA_DCA_ANY_CPU, - ioat_chan->reg_base + IOAT_DCACTRL_OFFSET); - spin_lock_init(&ioat_chan->cleanup_lock); - spin_lock_init(&ioat_chan->desc_lock); - INIT_LIST_HEAD(&ioat_chan->free_desc); - INIT_LIST_HEAD(&ioat_chan->used_desc); - /* This should be made common somewhere in dmaengine.c */ - ioat_chan->common.device = &device->common; - list_add_tail(&ioat_chan->common.device_node, - &device->common.channels); - device->idx[i] = ioat_chan; - tasklet_init(&ioat_chan->cleanup_task, - ioat_dma_cleanup_tasklet, - (unsigned long) ioat_chan); - tasklet_disable(&ioat_chan->cleanup_task); - } - return device->common.chancnt; -} - -/** - * ioat_dma_memcpy_issue_pending - push potentially unrecognized appended - * descriptors to hw - * @chan: DMA channel handle - */ -static inline void __ioat1_dma_memcpy_issue_pending( - struct ioat_dma_chan *ioat_chan) -{ - ioat_chan->pending = 0; - writeb(IOAT_CHANCMD_APPEND, ioat_chan->reg_base + IOAT1_CHANCMD_OFFSET); -} - -static void ioat1_dma_memcpy_issue_pending(struct dma_chan *chan) -{ - struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); - - if (ioat_chan->pending > 0) { - spin_lock_bh(&ioat_chan->desc_lock); - __ioat1_dma_memcpy_issue_pending(ioat_chan); - spin_unlock_bh(&ioat_chan->desc_lock); - } -} - -static inline void __ioat2_dma_memcpy_issue_pending( - struct ioat_dma_chan *ioat_chan) -{ - ioat_chan->pending = 0; - writew(ioat_chan->dmacount, - ioat_chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET); -} - -static void ioat2_dma_memcpy_issue_pending(struct dma_chan *chan) -{ - struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); - - if (ioat_chan->pending > 0) { - spin_lock_bh(&ioat_chan->desc_lock); - __ioat2_dma_memcpy_issue_pending(ioat_chan); - spin_unlock_bh(&ioat_chan->desc_lock); - } -} - - -/** - * ioat_dma_chan_reset_part2 - reinit the channel after a reset - */ -static void ioat_dma_chan_reset_part2(struct work_struct *work) -{ - struct ioat_dma_chan *ioat_chan = - container_of(work, struct ioat_dma_chan, work.work); - struct ioat_desc_sw *desc; - - spin_lock_bh(&ioat_chan->cleanup_lock); - spin_lock_bh(&ioat_chan->desc_lock); - - ioat_chan->completion_virt->low = 0; - ioat_chan->completion_virt->high = 0; - ioat_chan->pending = 0; - - /* - * count the descriptors waiting, and be sure to do it - * right for both the CB1 line and the CB2 ring - */ - ioat_chan->dmacount = 0; - if (ioat_chan->used_desc.prev) { - desc = to_ioat_desc(ioat_chan->used_desc.prev); - do { - ioat_chan->dmacount++; - desc = to_ioat_desc(desc->node.next); - } while (&desc->node != ioat_chan->used_desc.next); - } - - /* - * write the new starting descriptor address - * this puts channel engine into ARMED state - */ - desc = to_ioat_desc(ioat_chan->used_desc.prev); - switch (ioat_chan->device->version) { - case IOAT_VER_1_2: - writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF, - ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW); - writel(((u64) desc->async_tx.phys) >> 32, - ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH); - - writeb(IOAT_CHANCMD_START, ioat_chan->reg_base - + IOAT_CHANCMD_OFFSET(ioat_chan->device->version)); - break; - case IOAT_VER_2_0: - writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF, - ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW); - writel(((u64) desc->async_tx.phys) >> 32, - ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH); - - /* tell the engine to go with what's left to be done */ - writew(ioat_chan->dmacount, - ioat_chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET); - - break; - } - dev_err(&ioat_chan->device->pdev->dev, - "chan%d reset - %d descs waiting, %d total desc\n", - chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount); - - spin_unlock_bh(&ioat_chan->desc_lock); - spin_unlock_bh(&ioat_chan->cleanup_lock); -} - -/** - * ioat_dma_reset_channel - restart a channel - * @ioat_chan: IOAT DMA channel handle - */ -static void ioat_dma_reset_channel(struct ioat_dma_chan *ioat_chan) -{ - u32 chansts, chanerr; - - if (!ioat_chan->used_desc.prev) - return; - - chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); - chansts = (ioat_chan->completion_virt->low - & IOAT_CHANSTS_DMA_TRANSFER_STATUS); - if (chanerr) { - dev_err(&ioat_chan->device->pdev->dev, - "chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n", - chan_num(ioat_chan), chansts, chanerr); - writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET); - } - - /* - * whack it upside the head with a reset - * and wait for things to settle out. - * force the pending count to a really big negative - * to make sure no one forces an issue_pending - * while we're waiting. - */ - - spin_lock_bh(&ioat_chan->desc_lock); - ioat_chan->pending = INT_MIN; - writeb(IOAT_CHANCMD_RESET, - ioat_chan->reg_base - + IOAT_CHANCMD_OFFSET(ioat_chan->device->version)); - spin_unlock_bh(&ioat_chan->desc_lock); - - /* schedule the 2nd half instead of sleeping a long time */ - schedule_delayed_work(&ioat_chan->work, RESET_DELAY); -} - -/** - * ioat_dma_chan_watchdog - watch for stuck channels - */ -static void ioat_dma_chan_watchdog(struct work_struct *work) -{ - struct ioatdma_device *device = - container_of(work, struct ioatdma_device, work.work); - struct ioat_dma_chan *ioat_chan; - int i; - - union { - u64 full; - struct { - u32 low; - u32 high; - }; - } completion_hw; - unsigned long compl_desc_addr_hw; - - for (i = 0; i < device->common.chancnt; i++) { - ioat_chan = ioat_lookup_chan_by_index(device, i); - - if (ioat_chan->device->version == IOAT_VER_1_2 - /* have we started processing anything yet */ - && ioat_chan->last_completion - /* have we completed any since last watchdog cycle? */ - && (ioat_chan->last_completion == - ioat_chan->watchdog_completion) - /* has TCP stuck on one cookie since last watchdog? */ - && (ioat_chan->watchdog_tcp_cookie == - ioat_chan->watchdog_last_tcp_cookie) - && (ioat_chan->watchdog_tcp_cookie != - ioat_chan->completed_cookie) - /* is there something in the chain to be processed? */ - /* CB1 chain always has at least the last one processed */ - && (ioat_chan->used_desc.prev != ioat_chan->used_desc.next) - && ioat_chan->pending == 0) { - - /* - * check CHANSTS register for completed - * descriptor address. - * if it is different than completion writeback, - * it is not zero - * and it has changed since the last watchdog - * we can assume that channel - * is still working correctly - * and the problem is in completion writeback. - * update completion writeback - * with actual CHANSTS value - * else - * try resetting the channel - */ - - completion_hw.low = readl(ioat_chan->reg_base + - IOAT_CHANSTS_OFFSET_LOW(ioat_chan->device->version)); - completion_hw.high = readl(ioat_chan->reg_base + - IOAT_CHANSTS_OFFSET_HIGH(ioat_chan->device->version)); -#if (BITS_PER_LONG == 64) - compl_desc_addr_hw = - completion_hw.full - & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; -#else - compl_desc_addr_hw = - completion_hw.low & IOAT_LOW_COMPLETION_MASK; -#endif - - if ((compl_desc_addr_hw != 0) - && (compl_desc_addr_hw != ioat_chan->watchdog_completion) - && (compl_desc_addr_hw != ioat_chan->last_compl_desc_addr_hw)) { - ioat_chan->last_compl_desc_addr_hw = compl_desc_addr_hw; - ioat_chan->completion_virt->low = completion_hw.low; - ioat_chan->completion_virt->high = completion_hw.high; - } else { - ioat_dma_reset_channel(ioat_chan); - ioat_chan->watchdog_completion = 0; - ioat_chan->last_compl_desc_addr_hw = 0; - } - - /* - * for version 2.0 if there are descriptors yet to be processed - * and the last completed hasn't changed since the last watchdog - * if they haven't hit the pending level - * issue the pending to push them through - * else - * try resetting the channel - */ - } else if (ioat_chan->device->version == IOAT_VER_2_0 - && ioat_chan->used_desc.prev - && ioat_chan->last_completion - && ioat_chan->last_completion == ioat_chan->watchdog_completion) { - - if (ioat_chan->pending < ioat_pending_level) - ioat2_dma_memcpy_issue_pending(&ioat_chan->common); - else { - ioat_dma_reset_channel(ioat_chan); - ioat_chan->watchdog_completion = 0; - } - } else { - ioat_chan->last_compl_desc_addr_hw = 0; - ioat_chan->watchdog_completion - = ioat_chan->last_completion; - } - - ioat_chan->watchdog_last_tcp_cookie = - ioat_chan->watchdog_tcp_cookie; - } - - schedule_delayed_work(&device->work, WATCHDOG_DELAY); -} - -static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx) -{ - struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan); - struct ioat_desc_sw *first = tx_to_ioat_desc(tx); - struct ioat_desc_sw *prev, *new; - struct ioat_dma_descriptor *hw; - dma_cookie_t cookie; - LIST_HEAD(new_chain); - u32 copy; - size_t len; - dma_addr_t src, dst; - unsigned long orig_flags; - unsigned int desc_count = 0; - - /* src and dest and len are stored in the initial descriptor */ - len = first->len; - src = first->src; - dst = first->dst; - orig_flags = first->async_tx.flags; - new = first; - - spin_lock_bh(&ioat_chan->desc_lock); - prev = to_ioat_desc(ioat_chan->used_desc.prev); - prefetch(prev->hw); - do { - copy = min_t(size_t, len, ioat_chan->xfercap); - - async_tx_ack(&new->async_tx); - - hw = new->hw; - hw->size = copy; - hw->ctl = 0; - hw->src_addr = src; - hw->dst_addr = dst; - hw->next = 0; - - /* chain together the physical address list for the HW */ - wmb(); - prev->hw->next = (u64) new->async_tx.phys; - - len -= copy; - dst += copy; - src += copy; - - list_add_tail(&new->node, &new_chain); - desc_count++; - prev = new; - } while (len && (new = ioat1_dma_get_next_descriptor(ioat_chan))); - - if (!new) { - dev_err(&ioat_chan->device->pdev->dev, - "tx submit failed\n"); - spin_unlock_bh(&ioat_chan->desc_lock); - return -ENOMEM; - } - - hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS; - if (first->async_tx.callback) { - hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN; - if (first != new) { - /* move callback into to last desc */ - new->async_tx.callback = first->async_tx.callback; - new->async_tx.callback_param - = first->async_tx.callback_param; - first->async_tx.callback = NULL; - first->async_tx.callback_param = NULL; - } - } - - new->tx_cnt = desc_count; - new->async_tx.flags = orig_flags; /* client is in control of this ack */ - - /* store the original values for use in later cleanup */ - if (new != first) { - new->src = first->src; - new->dst = first->dst; - new->len = first->len; - } - - /* cookie incr and addition to used_list must be atomic */ - cookie = ioat_chan->common.cookie; - cookie++; - if (cookie < 0) - cookie = 1; - ioat_chan->common.cookie = new->async_tx.cookie = cookie; - - /* write address into NextDescriptor field of last desc in chain */ - to_ioat_desc(ioat_chan->used_desc.prev)->hw->next = - first->async_tx.phys; - list_splice_tail(&new_chain, &ioat_chan->used_desc); - - ioat_chan->dmacount += desc_count; - ioat_chan->pending += desc_count; - if (ioat_chan->pending >= ioat_pending_level) - __ioat1_dma_memcpy_issue_pending(ioat_chan); - spin_unlock_bh(&ioat_chan->desc_lock); - - return cookie; -} - -static dma_cookie_t ioat2_tx_submit(struct dma_async_tx_descriptor *tx) -{ - struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan); - struct ioat_desc_sw *first = tx_to_ioat_desc(tx); - struct ioat_desc_sw *new; - struct ioat_dma_descriptor *hw; - dma_cookie_t cookie; - u32 copy; - size_t len; - dma_addr_t src, dst; - unsigned long orig_flags; - unsigned int desc_count = 0; - - /* src and dest and len are stored in the initial descriptor */ - len = first->len; - src = first->src; - dst = first->dst; - orig_flags = first->async_tx.flags; - new = first; - - /* - * ioat_chan->desc_lock is still in force in version 2 path - * it gets unlocked at end of this function - */ - do { - copy = min_t(size_t, len, ioat_chan->xfercap); - - async_tx_ack(&new->async_tx); - - hw = new->hw; - hw->size = copy; - hw->ctl = 0; - hw->src_addr = src; - hw->dst_addr = dst; - - len -= copy; - dst += copy; - src += copy; - desc_count++; - } while (len && (new = ioat2_dma_get_next_descriptor(ioat_chan))); - - if (!new) { - dev_err(&ioat_chan->device->pdev->dev, - "tx submit failed\n"); - spin_unlock_bh(&ioat_chan->desc_lock); - return -ENOMEM; - } - - hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_CP_STS; - if (first->async_tx.callback) { - hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN; - if (first != new) { - /* move callback into to last desc */ - new->async_tx.callback = first->async_tx.callback; - new->async_tx.callback_param - = first->async_tx.callback_param; - first->async_tx.callback = NULL; - first->async_tx.callback_param = NULL; - } - } - - new->tx_cnt = desc_count; - new->async_tx.flags = orig_flags; /* client is in control of this ack */ - - /* store the original values for use in later cleanup */ - if (new != first) { - new->src = first->src; - new->dst = first->dst; - new->len = first->len; - } - - /* cookie incr and addition to used_list must be atomic */ - cookie = ioat_chan->common.cookie; - cookie++; - if (cookie < 0) - cookie = 1; - ioat_chan->common.cookie = new->async_tx.cookie = cookie; - - ioat_chan->dmacount += desc_count; - ioat_chan->pending += desc_count; - if (ioat_chan->pending >= ioat_pending_level) - __ioat2_dma_memcpy_issue_pending(ioat_chan); - spin_unlock_bh(&ioat_chan->desc_lock); - - return cookie; -} - -/** - * ioat_dma_alloc_descriptor - allocate and return a sw and hw descriptor pair - * @ioat_chan: the channel supplying the memory pool for the descriptors - * @flags: allocation flags - */ -static struct ioat_desc_sw *ioat_dma_alloc_descriptor( - struct ioat_dma_chan *ioat_chan, - gfp_t flags) -{ - struct ioat_dma_descriptor *desc; - struct ioat_desc_sw *desc_sw; - struct ioatdma_device *ioatdma_device; - dma_addr_t phys; - - ioatdma_device = to_ioatdma_device(ioat_chan->common.device); - desc = pci_pool_alloc(ioatdma_device->dma_pool, flags, &phys); - if (unlikely(!desc)) - return NULL; - - desc_sw = kzalloc(sizeof(*desc_sw), flags); - if (unlikely(!desc_sw)) { - pci_pool_free(ioatdma_device->dma_pool, desc, phys); - return NULL; - } - - memset(desc, 0, sizeof(*desc)); - dma_async_tx_descriptor_init(&desc_sw->async_tx, &ioat_chan->common); - switch (ioat_chan->device->version) { - case IOAT_VER_1_2: - desc_sw->async_tx.tx_submit = ioat1_tx_submit; - break; - case IOAT_VER_2_0: - case IOAT_VER_3_0: - desc_sw->async_tx.tx_submit = ioat2_tx_submit; - break; - } - - desc_sw->hw = desc; - desc_sw->async_tx.phys = phys; - - return desc_sw; -} - -static int ioat_initial_desc_count = 256; -module_param(ioat_initial_desc_count, int, 0644); -MODULE_PARM_DESC(ioat_initial_desc_count, - "initial descriptors per channel (default: 256)"); - -/** - * ioat2_dma_massage_chan_desc - link the descriptors into a circle - * @ioat_chan: the channel to be massaged - */ -static void ioat2_dma_massage_chan_desc(struct ioat_dma_chan *ioat_chan) -{ - struct ioat_desc_sw *desc, *_desc; - - /* setup used_desc */ - ioat_chan->used_desc.next = ioat_chan->free_desc.next; - ioat_chan->used_desc.prev = NULL; - - /* pull free_desc out of the circle so that every node is a hw - * descriptor, but leave it pointing to the list - */ - ioat_chan->free_desc.prev->next = ioat_chan->free_desc.next; - ioat_chan->free_desc.next->prev = ioat_chan->free_desc.prev; - - /* circle link the hw descriptors */ - desc = to_ioat_desc(ioat_chan->free_desc.next); - desc->hw->next = to_ioat_desc(desc->node.next)->async_tx.phys; - list_for_each_entry_safe(desc, _desc, ioat_chan->free_desc.next, node) { - desc->hw->next = to_ioat_desc(desc->node.next)->async_tx.phys; - } -} - -/** - * ioat_dma_alloc_chan_resources - returns the number of allocated descriptors - * @chan: the channel to be filled out - */ -static int ioat_dma_alloc_chan_resources(struct dma_chan *chan) -{ - struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); - struct ioat_desc_sw *desc; - u16 chanctrl; - u32 chanerr; - int i; - LIST_HEAD(tmp_list); - - /* have we already been set up? */ - if (!list_empty(&ioat_chan->free_desc)) - return ioat_chan->desccount; - - /* Setup register to interrupt and write completion status on error */ - chanctrl = IOAT_CHANCTRL_ERR_INT_EN | - IOAT_CHANCTRL_ANY_ERR_ABORT_EN | - IOAT_CHANCTRL_ERR_COMPLETION_EN; - writew(chanctrl, ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET); - - chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); - if (chanerr) { - dev_err(&ioat_chan->device->pdev->dev, - "CHANERR = %x, clearing\n", chanerr); - writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET); - } - - /* Allocate descriptors */ - for (i = 0; i < ioat_initial_desc_count; i++) { - desc = ioat_dma_alloc_descriptor(ioat_chan, GFP_KERNEL); - if (!desc) { - dev_err(&ioat_chan->device->pdev->dev, - "Only %d initial descriptors\n", i); - break; - } - list_add_tail(&desc->node, &tmp_list); - } - spin_lock_bh(&ioat_chan->desc_lock); - ioat_chan->desccount = i; - list_splice(&tmp_list, &ioat_chan->free_desc); - if (ioat_chan->device->version != IOAT_VER_1_2) - ioat2_dma_massage_chan_desc(ioat_chan); - spin_unlock_bh(&ioat_chan->desc_lock); - - /* allocate a completion writeback area */ - /* doing 2 32bit writes to mmio since 1 64b write doesn't work */ - ioat_chan->completion_virt = - pci_pool_alloc(ioat_chan->device->completion_pool, - GFP_KERNEL, - &ioat_chan->completion_addr); - memset(ioat_chan->completion_virt, 0, - sizeof(*ioat_chan->completion_virt)); - writel(((u64) ioat_chan->completion_addr) & 0x00000000FFFFFFFF, - ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_LOW); - writel(((u64) ioat_chan->completion_addr) >> 32, - ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); - - tasklet_enable(&ioat_chan->cleanup_task); - ioat_dma_start_null_desc(ioat_chan); /* give chain to dma device */ - return ioat_chan->desccount; -} - -/** - * ioat_dma_free_chan_resources - release all the descriptors - * @chan: the channel to be cleaned - */ -static void ioat_dma_free_chan_resources(struct dma_chan *chan) -{ - struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); - struct ioatdma_device *ioatdma_device = to_ioatdma_device(chan->device); - struct ioat_desc_sw *desc, *_desc; - int in_use_descs = 0; - - /* Before freeing channel resources first check - * if they have been previously allocated for this channel. - */ - if (ioat_chan->desccount == 0) - return; - - tasklet_disable(&ioat_chan->cleanup_task); - ioat_dma_memcpy_cleanup(ioat_chan); - - /* Delay 100ms after reset to allow internal DMA logic to quiesce - * before removing DMA descriptor resources. - */ - writeb(IOAT_CHANCMD_RESET, - ioat_chan->reg_base - + IOAT_CHANCMD_OFFSET(ioat_chan->device->version)); - mdelay(100); - - spin_lock_bh(&ioat_chan->desc_lock); - switch (ioat_chan->device->version) { - case IOAT_VER_1_2: - list_for_each_entry_safe(desc, _desc, - &ioat_chan->used_desc, node) { - in_use_descs++; - list_del(&desc->node); - pci_pool_free(ioatdma_device->dma_pool, desc->hw, - desc->async_tx.phys); - kfree(desc); - } - list_for_each_entry_safe(desc, _desc, - &ioat_chan->free_desc, node) { - list_del(&desc->node); - pci_pool_free(ioatdma_device->dma_pool, desc->hw, - desc->async_tx.phys); - kfree(desc); - } - break; - case IOAT_VER_2_0: - case IOAT_VER_3_0: - list_for_each_entry_safe(desc, _desc, - ioat_chan->free_desc.next, node) { - list_del(&desc->node); - pci_pool_free(ioatdma_device->dma_pool, desc->hw, - desc->async_tx.phys); - kfree(desc); - } - desc = to_ioat_desc(ioat_chan->free_desc.next); - pci_pool_free(ioatdma_device->dma_pool, desc->hw, - desc->async_tx.phys); - kfree(desc); - INIT_LIST_HEAD(&ioat_chan->free_desc); - INIT_LIST_HEAD(&ioat_chan->used_desc); - break; - } - spin_unlock_bh(&ioat_chan->desc_lock); - - pci_pool_free(ioatdma_device->completion_pool, - ioat_chan->completion_virt, - ioat_chan->completion_addr); - - /* one is ok since we left it on there on purpose */ - if (in_use_descs > 1) - dev_err(&ioat_chan->device->pdev->dev, - "Freeing %d in use descriptors!\n", - in_use_descs - 1); - - ioat_chan->last_completion = ioat_chan->completion_addr = 0; - ioat_chan->pending = 0; - ioat_chan->dmacount = 0; - ioat_chan->desccount = 0; - ioat_chan->watchdog_completion = 0; - ioat_chan->last_compl_desc_addr_hw = 0; - ioat_chan->watchdog_tcp_cookie = - ioat_chan->watchdog_last_tcp_cookie = 0; -} - -/** - * ioat_dma_get_next_descriptor - return the next available descriptor - * @ioat_chan: IOAT DMA channel handle - * - * Gets the next descriptor from the chain, and must be called with the - * channel's desc_lock held. Allocates more descriptors if the channel - * has run out. - */ -static struct ioat_desc_sw * -ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan) -{ - struct ioat_desc_sw *new; - - if (!list_empty(&ioat_chan->free_desc)) { - new = to_ioat_desc(ioat_chan->free_desc.next); - list_del(&new->node); - } else { - /* try to get another desc */ - new = ioat_dma_alloc_descriptor(ioat_chan, GFP_ATOMIC); - if (!new) { - dev_err(&ioat_chan->device->pdev->dev, - "alloc failed\n"); - return NULL; - } - } - - prefetch(new->hw); - return new; -} - -static struct ioat_desc_sw * -ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan) -{ - struct ioat_desc_sw *new; - - /* - * used.prev points to where to start processing - * used.next points to next free descriptor - * if used.prev == NULL, there are none waiting to be processed - * if used.next == used.prev.prev, there is only one free descriptor, - * and we need to use it to as a noop descriptor before - * linking in a new set of descriptors, since the device - * has probably already read the pointer to it - */ - if (ioat_chan->used_desc.prev && - ioat_chan->used_desc.next == ioat_chan->used_desc.prev->prev) { - - struct ioat_desc_sw *desc; - struct ioat_desc_sw *noop_desc; - int i; - - /* set up the noop descriptor */ - noop_desc = to_ioat_desc(ioat_chan->used_desc.next); - /* set size to non-zero value (channel returns error when size is 0) */ - noop_desc->hw->size = NULL_DESC_BUFFER_SIZE; - noop_desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL; - noop_desc->hw->src_addr = 0; - noop_desc->hw->dst_addr = 0; - - ioat_chan->used_desc.next = ioat_chan->used_desc.next->next; - ioat_chan->pending++; - ioat_chan->dmacount++; - - /* try to get a few more descriptors */ - for (i = 16; i; i--) { - desc = ioat_dma_alloc_descriptor(ioat_chan, GFP_ATOMIC); - if (!desc) { - dev_err(&ioat_chan->device->pdev->dev, - "alloc failed\n"); - break; - } - list_add_tail(&desc->node, ioat_chan->used_desc.next); - - desc->hw->next - = to_ioat_desc(desc->node.next)->async_tx.phys; - to_ioat_desc(desc->node.prev)->hw->next - = desc->async_tx.phys; - ioat_chan->desccount++; - } - - ioat_chan->used_desc.next = noop_desc->node.next; - } - new = to_ioat_desc(ioat_chan->used_desc.next); - prefetch(new); - ioat_chan->used_desc.next = new->node.next; - - if (ioat_chan->used_desc.prev == NULL) - ioat_chan->used_desc.prev = &new->node; - - prefetch(new->hw); - return new; -} - -static struct ioat_desc_sw *ioat_dma_get_next_descriptor( - struct ioat_dma_chan *ioat_chan) -{ - if (!ioat_chan) - return NULL; - - switch (ioat_chan->device->version) { - case IOAT_VER_1_2: - return ioat1_dma_get_next_descriptor(ioat_chan); - case IOAT_VER_2_0: - case IOAT_VER_3_0: - return ioat2_dma_get_next_descriptor(ioat_chan); - } - return NULL; -} - -static struct dma_async_tx_descriptor *ioat1_dma_prep_memcpy( - struct dma_chan *chan, - dma_addr_t dma_dest, - dma_addr_t dma_src, - size_t len, - unsigned long flags) -{ - struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); - struct ioat_desc_sw *new; - - spin_lock_bh(&ioat_chan->desc_lock); - new = ioat_dma_get_next_descriptor(ioat_chan); - spin_unlock_bh(&ioat_chan->desc_lock); - - if (new) { - new->len = len; - new->dst = dma_dest; - new->src = dma_src; - new->async_tx.flags = flags; - return &new->async_tx; - } else { - dev_err(&ioat_chan->device->pdev->dev, - "chan%d - get_next_desc failed: %d descs waiting, %d total desc\n", - chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount); - return NULL; - } -} - -static struct dma_async_tx_descriptor *ioat2_dma_prep_memcpy( - struct dma_chan *chan, - dma_addr_t dma_dest, - dma_addr_t dma_src, - size_t len, - unsigned long flags) -{ - struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); - struct ioat_desc_sw *new; - - spin_lock_bh(&ioat_chan->desc_lock); - new = ioat2_dma_get_next_descriptor(ioat_chan); - - /* - * leave ioat_chan->desc_lock set in ioat 2 path - * it will get unlocked at end of tx_submit - */ - - if (new) { - new->len = len; - new->dst = dma_dest; - new->src = dma_src; - new->async_tx.flags = flags; - return &new->async_tx; - } else { - spin_unlock_bh(&ioat_chan->desc_lock); - dev_err(&ioat_chan->device->pdev->dev, - "chan%d - get_next_desc failed: %d descs waiting, %d total desc\n", - chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount); - return NULL; - } -} - -static void ioat_dma_cleanup_tasklet(unsigned long data) -{ - struct ioat_dma_chan *chan = (void *)data; - ioat_dma_memcpy_cleanup(chan); - writew(IOAT_CHANCTRL_INT_DISABLE, - chan->reg_base + IOAT_CHANCTRL_OFFSET); -} - -static void -ioat_dma_unmap(struct ioat_dma_chan *ioat_chan, struct ioat_desc_sw *desc) -{ - if (!(desc->async_tx.flags & DMA_COMPL_SKIP_DEST_UNMAP)) { - if (desc->async_tx.flags & DMA_COMPL_DEST_UNMAP_SINGLE) - pci_unmap_single(ioat_chan->device->pdev, - pci_unmap_addr(desc, dst), - pci_unmap_len(desc, len), - PCI_DMA_FROMDEVICE); - else - pci_unmap_page(ioat_chan->device->pdev, - pci_unmap_addr(desc, dst), - pci_unmap_len(desc, len), - PCI_DMA_FROMDEVICE); - } - - if (!(desc->async_tx.flags & DMA_COMPL_SKIP_SRC_UNMAP)) { - if (desc->async_tx.flags & DMA_COMPL_SRC_UNMAP_SINGLE) - pci_unmap_single(ioat_chan->device->pdev, - pci_unmap_addr(desc, src), - pci_unmap_len(desc, len), - PCI_DMA_TODEVICE); - else - pci_unmap_page(ioat_chan->device->pdev, - pci_unmap_addr(desc, src), - pci_unmap_len(desc, len), - PCI_DMA_TODEVICE); - } -} - -/** - * ioat_dma_memcpy_cleanup - cleanup up finished descriptors - * @chan: ioat channel to be cleaned up - */ -static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan) -{ - unsigned long phys_complete; - struct ioat_desc_sw *desc, *_desc; - dma_cookie_t cookie = 0; - unsigned long desc_phys; - struct ioat_desc_sw *latest_desc; - - prefetch(ioat_chan->completion_virt); - - if (!spin_trylock_bh(&ioat_chan->cleanup_lock)) - return; - - /* The completion writeback can happen at any time, - so reads by the driver need to be atomic operations - The descriptor physical addresses are limited to 32-bits - when the CPU can only do a 32-bit mov */ - -#if (BITS_PER_LONG == 64) - phys_complete = - ioat_chan->completion_virt->full - & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; -#else - phys_complete = - ioat_chan->completion_virt->low & IOAT_LOW_COMPLETION_MASK; -#endif - - if ((ioat_chan->completion_virt->full - & IOAT_CHANSTS_DMA_TRANSFER_STATUS) == - IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED) { - dev_err(&ioat_chan->device->pdev->dev, - "Channel halted, chanerr = %x\n", - readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET)); - - /* TODO do something to salvage the situation */ - } - - if (phys_complete == ioat_chan->last_completion) { - spin_unlock_bh(&ioat_chan->cleanup_lock); - /* - * perhaps we're stuck so hard that the watchdog can't go off? - * try to catch it after 2 seconds - */ - if (ioat_chan->device->version != IOAT_VER_3_0) { - if (time_after(jiffies, - ioat_chan->last_completion_time + HZ*WATCHDOG_DELAY)) { - ioat_dma_chan_watchdog(&(ioat_chan->device->work.work)); - ioat_chan->last_completion_time = jiffies; - } - } - return; - } - ioat_chan->last_completion_time = jiffies; - - cookie = 0; - if (!spin_trylock_bh(&ioat_chan->desc_lock)) { - spin_unlock_bh(&ioat_chan->cleanup_lock); - return; - } - - switch (ioat_chan->device->version) { - case IOAT_VER_1_2: - list_for_each_entry_safe(desc, _desc, - &ioat_chan->used_desc, node) { - - /* - * Incoming DMA requests may use multiple descriptors, - * due to exceeding xfercap, perhaps. If so, only the - * last one will have a cookie, and require unmapping. - */ - if (desc->async_tx.cookie) { - cookie = desc->async_tx.cookie; - ioat_dma_unmap(ioat_chan, desc); - if (desc->async_tx.callback) { - desc->async_tx.callback(desc->async_tx.callback_param); - desc->async_tx.callback = NULL; - } - } - - if (desc->async_tx.phys != phys_complete) { - /* - * a completed entry, but not the last, so clean - * up if the client is done with the descriptor - */ - if (async_tx_test_ack(&desc->async_tx)) { - list_move_tail(&desc->node, - &ioat_chan->free_desc); - } else - desc->async_tx.cookie = 0; - } else { - /* - * last used desc. Do not remove, so we can - * append from it, but don't look at it next - * time, either - */ - desc->async_tx.cookie = 0; - - /* TODO check status bits? */ - break; - } - } - break; - case IOAT_VER_2_0: - case IOAT_VER_3_0: - /* has some other thread has already cleaned up? */ - if (ioat_chan->used_desc.prev == NULL) - break; - - /* work backwards to find latest finished desc */ - desc = to_ioat_desc(ioat_chan->used_desc.next); - latest_desc = NULL; - do { - desc = to_ioat_desc(desc->node.prev); - desc_phys = (unsigned long)desc->async_tx.phys - & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; - if (desc_phys == phys_complete) { - latest_desc = desc; - break; - } - } while (&desc->node != ioat_chan->used_desc.prev); - - if (latest_desc != NULL) { - - /* work forwards to clear finished descriptors */ - for (desc = to_ioat_desc(ioat_chan->used_desc.prev); - &desc->node != latest_desc->node.next && - &desc->node != ioat_chan->used_desc.next; - desc = to_ioat_desc(desc->node.next)) { - if (desc->async_tx.cookie) { - cookie = desc->async_tx.cookie; - desc->async_tx.cookie = 0; - ioat_dma_unmap(ioat_chan, desc); - if (desc->async_tx.callback) { - desc->async_tx.callback(desc->async_tx.callback_param); - desc->async_tx.callback = NULL; - } - } - } - - /* move used.prev up beyond those that are finished */ - if (&desc->node == ioat_chan->used_desc.next) - ioat_chan->used_desc.prev = NULL; - else - ioat_chan->used_desc.prev = &desc->node; - } - break; - } - - spin_unlock_bh(&ioat_chan->desc_lock); - - ioat_chan->last_completion = phys_complete; - if (cookie != 0) - ioat_chan->completed_cookie = cookie; - - spin_unlock_bh(&ioat_chan->cleanup_lock); -} - -/** - * ioat_dma_is_complete - poll the status of a IOAT DMA transaction - * @chan: IOAT DMA channel handle - * @cookie: DMA transaction identifier - * @done: if not %NULL, updated with last completed transaction - * @used: if not %NULL, updated with last used transaction - */ -static enum dma_status ioat_dma_is_complete(struct dma_chan *chan, - dma_cookie_t cookie, - dma_cookie_t *done, - dma_cookie_t *used) -{ - struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); - dma_cookie_t last_used; - dma_cookie_t last_complete; - enum dma_status ret; - - last_used = chan->cookie; - last_complete = ioat_chan->completed_cookie; - ioat_chan->watchdog_tcp_cookie = cookie; - - if (done) - *done = last_complete; - if (used) - *used = last_used; - - ret = dma_async_is_complete(cookie, last_complete, last_used); - if (ret == DMA_SUCCESS) - return ret; - - ioat_dma_memcpy_cleanup(ioat_chan); - - last_used = chan->cookie; - last_complete = ioat_chan->completed_cookie; - - if (done) - *done = last_complete; - if (used) - *used = last_used; - - return dma_async_is_complete(cookie, last_complete, last_used); -} - -static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan) -{ - struct ioat_desc_sw *desc; - - spin_lock_bh(&ioat_chan->desc_lock); - - desc = ioat_dma_get_next_descriptor(ioat_chan); - - if (!desc) { - dev_err(&ioat_chan->device->pdev->dev, - "Unable to start null desc - get next desc failed\n"); - spin_unlock_bh(&ioat_chan->desc_lock); - return; - } - - desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL - | IOAT_DMA_DESCRIPTOR_CTL_INT_GN - | IOAT_DMA_DESCRIPTOR_CTL_CP_STS; - /* set size to non-zero value (channel returns error when size is 0) */ - desc->hw->size = NULL_DESC_BUFFER_SIZE; - desc->hw->src_addr = 0; - desc->hw->dst_addr = 0; - async_tx_ack(&desc->async_tx); - switch (ioat_chan->device->version) { - case IOAT_VER_1_2: - desc->hw->next = 0; - list_add_tail(&desc->node, &ioat_chan->used_desc); - - writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF, - ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW); - writel(((u64) desc->async_tx.phys) >> 32, - ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH); - - writeb(IOAT_CHANCMD_START, ioat_chan->reg_base - + IOAT_CHANCMD_OFFSET(ioat_chan->device->version)); - break; - case IOAT_VER_2_0: - case IOAT_VER_3_0: - writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF, - ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW); - writel(((u64) desc->async_tx.phys) >> 32, - ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH); - - ioat_chan->dmacount++; - __ioat2_dma_memcpy_issue_pending(ioat_chan); - break; - } - spin_unlock_bh(&ioat_chan->desc_lock); -} - -/* - * Perform a IOAT transaction to verify the HW works. - */ -#define IOAT_TEST_SIZE 2000 - -static void ioat_dma_test_callback(void *dma_async_param) -{ - struct completion *cmp = dma_async_param; - - complete(cmp); -} - -/** - * ioat_dma_self_test - Perform a IOAT transaction to verify the HW works. - * @device: device to be tested - */ -static int ioat_dma_self_test(struct ioatdma_device *device) -{ - int i; - u8 *src; - u8 *dest; - struct dma_chan *dma_chan; - struct dma_async_tx_descriptor *tx; - dma_addr_t dma_dest, dma_src; - dma_cookie_t cookie; - int err = 0; - struct completion cmp; - unsigned long tmo; - unsigned long flags; - - src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL); - if (!src) - return -ENOMEM; - dest = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL); - if (!dest) { - kfree(src); - return -ENOMEM; - } - - /* Fill in src buffer */ - for (i = 0; i < IOAT_TEST_SIZE; i++) - src[i] = (u8)i; - - /* Start copy, using first DMA channel */ - dma_chan = container_of(device->common.channels.next, - struct dma_chan, - device_node); - if (device->common.device_alloc_chan_resources(dma_chan) < 1) { - dev_err(&device->pdev->dev, - "selftest cannot allocate chan resource\n"); - err = -ENODEV; - goto out; - } - - dma_src = dma_map_single(dma_chan->device->dev, src, IOAT_TEST_SIZE, - DMA_TO_DEVICE); - dma_dest = dma_map_single(dma_chan->device->dev, dest, IOAT_TEST_SIZE, - DMA_FROM_DEVICE); - flags = DMA_COMPL_SRC_UNMAP_SINGLE | DMA_COMPL_DEST_UNMAP_SINGLE; - tx = device->common.device_prep_dma_memcpy(dma_chan, dma_dest, dma_src, - IOAT_TEST_SIZE, flags); - if (!tx) { - dev_err(&device->pdev->dev, - "Self-test prep failed, disabling\n"); - err = -ENODEV; - goto free_resources; - } - - async_tx_ack(tx); - init_completion(&cmp); - tx->callback = ioat_dma_test_callback; - tx->callback_param = &cmp; - cookie = tx->tx_submit(tx); - if (cookie < 0) { - dev_err(&device->pdev->dev, - "Self-test setup failed, disabling\n"); - err = -ENODEV; - goto free_resources; - } - device->common.device_issue_pending(dma_chan); - - tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); - - if (tmo == 0 || - device->common.device_is_tx_complete(dma_chan, cookie, NULL, NULL) - != DMA_SUCCESS) { - dev_err(&device->pdev->dev, - "Self-test copy timed out, disabling\n"); - err = -ENODEV; - goto free_resources; - } - if (memcmp(src, dest, IOAT_TEST_SIZE)) { - dev_err(&device->pdev->dev, - "Self-test copy failed compare, disabling\n"); - err = -ENODEV; - goto free_resources; - } - -free_resources: - device->common.device_free_chan_resources(dma_chan); -out: - kfree(src); - kfree(dest); - return err; -} - -static char ioat_interrupt_style[32] = "msix"; -module_param_string(ioat_interrupt_style, ioat_interrupt_style, - sizeof(ioat_interrupt_style), 0644); -MODULE_PARM_DESC(ioat_interrupt_style, - "set ioat interrupt style: msix (default), " - "msix-single-vector, msi, intx)"); - -/** - * ioat_dma_setup_interrupts - setup interrupt handler - * @device: ioat device - */ -static int ioat_dma_setup_interrupts(struct ioatdma_device *device) -{ - struct ioat_dma_chan *ioat_chan; - int err, i, j, msixcnt; - u8 intrctrl = 0; - - if (!strcmp(ioat_interrupt_style, "msix")) - goto msix; - if (!strcmp(ioat_interrupt_style, "msix-single-vector")) - goto msix_single_vector; - if (!strcmp(ioat_interrupt_style, "msi")) - goto msi; - if (!strcmp(ioat_interrupt_style, "intx")) - goto intx; - dev_err(&device->pdev->dev, "invalid ioat_interrupt_style %s\n", - ioat_interrupt_style); - goto err_no_irq; - -msix: - /* The number of MSI-X vectors should equal the number of channels */ - msixcnt = device->common.chancnt; - for (i = 0; i < msixcnt; i++) - device->msix_entries[i].entry = i; - - err = pci_enable_msix(device->pdev, device->msix_entries, msixcnt); - if (err < 0) - goto msi; - if (err > 0) - goto msix_single_vector; - - for (i = 0; i < msixcnt; i++) { - ioat_chan = ioat_lookup_chan_by_index(device, i); - err = request_irq(device->msix_entries[i].vector, - ioat_dma_do_interrupt_msix, - 0, "ioat-msix", ioat_chan); - if (err) { - for (j = 0; j < i; j++) { - ioat_chan = - ioat_lookup_chan_by_index(device, j); - free_irq(device->msix_entries[j].vector, - ioat_chan); - } - goto msix_single_vector; - } - } - intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL; - device->irq_mode = msix_multi_vector; - goto done; - -msix_single_vector: - device->msix_entries[0].entry = 0; - err = pci_enable_msix(device->pdev, device->msix_entries, 1); - if (err) - goto msi; - - err = request_irq(device->msix_entries[0].vector, ioat_dma_do_interrupt, - 0, "ioat-msix", device); - if (err) { - pci_disable_msix(device->pdev); - goto msi; - } - device->irq_mode = msix_single_vector; - goto done; - -msi: - err = pci_enable_msi(device->pdev); - if (err) - goto intx; - - err = request_irq(device->pdev->irq, ioat_dma_do_interrupt, - 0, "ioat-msi", device); - if (err) { - pci_disable_msi(device->pdev); - goto intx; - } - /* - * CB 1.2 devices need a bit set in configuration space to enable MSI - */ - if (device->version == IOAT_VER_1_2) { - u32 dmactrl; - pci_read_config_dword(device->pdev, - IOAT_PCI_DMACTRL_OFFSET, &dmactrl); - dmactrl |= IOAT_PCI_DMACTRL_MSI_EN; - pci_write_config_dword(device->pdev, - IOAT_PCI_DMACTRL_OFFSET, dmactrl); - } - device->irq_mode = msi; - goto done; - -intx: - err = request_irq(device->pdev->irq, ioat_dma_do_interrupt, - IRQF_SHARED, "ioat-intx", device); - if (err) - goto err_no_irq; - device->irq_mode = intx; - -done: - intrctrl |= IOAT_INTRCTRL_MASTER_INT_EN; - writeb(intrctrl, device->reg_base + IOAT_INTRCTRL_OFFSET); - return 0; - -err_no_irq: - /* Disable all interrupt generation */ - writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET); - dev_err(&device->pdev->dev, "no usable interrupts\n"); - device->irq_mode = none; - return -1; -} - -/** - * ioat_dma_remove_interrupts - remove whatever interrupts were set - * @device: ioat device - */ -static void ioat_dma_remove_interrupts(struct ioatdma_device *device) -{ - struct ioat_dma_chan *ioat_chan; - int i; - - /* Disable all interrupt generation */ - writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET); - - switch (device->irq_mode) { - case msix_multi_vector: - for (i = 0; i < device->common.chancnt; i++) { - ioat_chan = ioat_lookup_chan_by_index(device, i); - free_irq(device->msix_entries[i].vector, ioat_chan); - } - pci_disable_msix(device->pdev); - break; - case msix_single_vector: - free_irq(device->msix_entries[0].vector, device); - pci_disable_msix(device->pdev); - break; - case msi: - free_irq(device->pdev->irq, device); - pci_disable_msi(device->pdev); - break; - case intx: - free_irq(device->pdev->irq, device); - break; - case none: - dev_warn(&device->pdev->dev, - "call to %s without interrupts setup\n", __func__); - } - device->irq_mode = none; -} - -struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev, - void __iomem *iobase) -{ - int err; - struct ioatdma_device *device; - - device = kzalloc(sizeof(*device), GFP_KERNEL); - if (!device) { - err = -ENOMEM; - goto err_kzalloc; - } - device->pdev = pdev; - device->reg_base = iobase; - device->version = readb(device->reg_base + IOAT_VER_OFFSET); - - /* DMA coherent memory pool for DMA descriptor allocations */ - device->dma_pool = pci_pool_create("dma_desc_pool", pdev, - sizeof(struct ioat_dma_descriptor), - 64, 0); - if (!device->dma_pool) { - err = -ENOMEM; - goto err_dma_pool; - } - - device->completion_pool = pci_pool_create("completion_pool", pdev, - sizeof(u64), SMP_CACHE_BYTES, - SMP_CACHE_BYTES); - if (!device->completion_pool) { - err = -ENOMEM; - goto err_completion_pool; - } - - INIT_LIST_HEAD(&device->common.channels); - ioat_dma_enumerate_channels(device); - - device->common.device_alloc_chan_resources = - ioat_dma_alloc_chan_resources; - device->common.device_free_chan_resources = - ioat_dma_free_chan_resources; - device->common.dev = &pdev->dev; - - dma_cap_set(DMA_MEMCPY, device->common.cap_mask); - device->common.device_is_tx_complete = ioat_dma_is_complete; - switch (device->version) { - case IOAT_VER_1_2: - device->common.device_prep_dma_memcpy = ioat1_dma_prep_memcpy; - device->common.device_issue_pending = - ioat1_dma_memcpy_issue_pending; - break; - case IOAT_VER_2_0: - case IOAT_VER_3_0: - device->common.device_prep_dma_memcpy = ioat2_dma_prep_memcpy; - device->common.device_issue_pending = - ioat2_dma_memcpy_issue_pending; - break; - } - - dev_err(&device->pdev->dev, - "Intel(R) I/OAT DMA Engine found," - " %d channels, device version 0x%02x, driver version %s\n", - device->common.chancnt, device->version, IOAT_DMA_VERSION); - - if (!device->common.chancnt) { - dev_err(&device->pdev->dev, - "Intel(R) I/OAT DMA Engine problem found: " - "zero channels detected\n"); - goto err_setup_interrupts; - } - - err = ioat_dma_setup_interrupts(device); - if (err) - goto err_setup_interrupts; - - err = ioat_dma_self_test(device); - if (err) - goto err_self_test; - - ioat_set_tcp_copy_break(device); - - dma_async_device_register(&device->common); - - if (device->version != IOAT_VER_3_0) { - INIT_DELAYED_WORK(&device->work, ioat_dma_chan_watchdog); - schedule_delayed_work(&device->work, - WATCHDOG_DELAY); - } - - return device; - -err_self_test: - ioat_dma_remove_interrupts(device); -err_setup_interrupts: - pci_pool_destroy(device->completion_pool); -err_completion_pool: - pci_pool_destroy(device->dma_pool); -err_dma_pool: - kfree(device); -err_kzalloc: - dev_err(&pdev->dev, - "Intel(R) I/OAT DMA Engine initialization failed\n"); - return NULL; -} - -void ioat_dma_remove(struct ioatdma_device *device) -{ - struct dma_chan *chan, *_chan; - struct ioat_dma_chan *ioat_chan; - - if (device->version != IOAT_VER_3_0) - cancel_delayed_work(&device->work); - - ioat_dma_remove_interrupts(device); - - dma_async_device_unregister(&device->common); - - pci_pool_destroy(device->dma_pool); - pci_pool_destroy(device->completion_pool); - - iounmap(device->reg_base); - pci_release_regions(device->pdev); - pci_disable_device(device->pdev); - - list_for_each_entry_safe(chan, _chan, - &device->common.channels, device_node) { - ioat_chan = to_ioat_chan(chan); - list_del(&chan->device_node); - kfree(ioat_chan); - } - kfree(device); -} - diff --git a/drivers/dma/ioatdma.h b/drivers/dma/ioatdma.h deleted file mode 100644 index a52ff4bd460..00000000000 --- a/drivers/dma/ioatdma.h +++ /dev/null @@ -1,165 +0,0 @@ -/* - * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 - * Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * The full GNU General Public License is included in this distribution in the - * file called COPYING. - */ -#ifndef IOATDMA_H -#define IOATDMA_H - -#include <linux/dmaengine.h> -#include "ioatdma_hw.h" -#include <linux/init.h> -#include <linux/dmapool.h> -#include <linux/cache.h> -#include <linux/pci_ids.h> -#include <net/tcp.h> - -#define IOAT_DMA_VERSION "3.64" - -enum ioat_interrupt { - none = 0, - msix_multi_vector = 1, - msix_single_vector = 2, - msi = 3, - intx = 4, -}; - -#define IOAT_LOW_COMPLETION_MASK 0xffffffc0 -#define IOAT_DMA_DCA_ANY_CPU ~0 -#define IOAT_WATCHDOG_PERIOD (2 * HZ) - - -/** - * struct ioatdma_device - internal representation of a IOAT device - * @pdev: PCI-Express device - * @reg_base: MMIO register space base address - * @dma_pool: for allocating DMA descriptors - * @common: embedded struct dma_device - * @version: version of ioatdma device - * @irq_mode: which style irq to use - * @msix_entries: irq handlers - * @idx: per channel data - */ - -struct ioatdma_device { - struct pci_dev *pdev; - void __iomem *reg_base; - struct pci_pool *dma_pool; - struct pci_pool *completion_pool; - struct dma_device common; - u8 version; - enum ioat_interrupt irq_mode; - struct delayed_work work; - struct msix_entry msix_entries[4]; - struct ioat_dma_chan *idx[4]; -}; - -/** - * struct ioat_dma_chan - internal representation of a DMA channel - */ -struct ioat_dma_chan { - - void __iomem *reg_base; - - dma_cookie_t completed_cookie; - unsigned long last_completion; - unsigned long last_completion_time; - - size_t xfercap; /* XFERCAP register value expanded out */ - - spinlock_t cleanup_lock; - spinlock_t desc_lock; - struct list_head free_desc; - struct list_head used_desc; - unsigned long watchdog_completion; - int watchdog_tcp_cookie; - u32 watchdog_last_tcp_cookie; - struct delayed_work work; - - int pending; - int dmacount; - int desccount; - - struct ioatdma_device *device; - struct dma_chan common; - - dma_addr_t completion_addr; - union { - u64 full; /* HW completion writeback */ - struct { - u32 low; - u32 high; - }; - } *completion_virt; - unsigned long last_compl_desc_addr_hw; - struct tasklet_struct cleanup_task; -}; - -/* wrapper around hardware descriptor format + additional software fields */ - -/** - * struct ioat_desc_sw - wrapper around hardware descriptor - * @hw: hardware DMA descriptor - * @node: this descriptor will either be on the free list, - * or attached to a transaction list (async_tx.tx_list) - * @tx_cnt: number of descriptors required to complete the transaction - * @async_tx: the generic software descriptor for all engines - */ -struct ioat_desc_sw { - struct ioat_dma_descriptor *hw; - struct list_head node; - int tx_cnt; - size_t len; - dma_addr_t src; - dma_addr_t dst; - struct dma_async_tx_descriptor async_tx; -}; - -static inline void ioat_set_tcp_copy_break(struct ioatdma_device *dev) -{ - #ifdef CONFIG_NET_DMA - switch (dev->version) { - case IOAT_VER_1_2: - sysctl_tcp_dma_copybreak = 4096; - break; - case IOAT_VER_2_0: - sysctl_tcp_dma_copybreak = 2048; - break; - case IOAT_VER_3_0: - sysctl_tcp_dma_copybreak = 262144; - break; - } - #endif -} - -#if defined(CONFIG_INTEL_IOATDMA) || defined(CONFIG_INTEL_IOATDMA_MODULE) -struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev, - void __iomem *iobase); -void ioat_dma_remove(struct ioatdma_device *device); -struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase); -struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase); -struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase); -#else -#define ioat_dma_probe(pdev, iobase) NULL -#define ioat_dma_remove(device) do { } while (0) -#define ioat_dca_init(pdev, iobase) NULL -#define ioat2_dca_init(pdev, iobase) NULL -#define ioat3_dca_init(pdev, iobase) NULL -#endif - -#endif /* IOATDMA_H */ diff --git a/drivers/dma/ioatdma_hw.h b/drivers/dma/ioatdma_hw.h deleted file mode 100644 index afa57eef86c..00000000000 --- a/drivers/dma/ioatdma_hw.h +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 - * Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * The full GNU General Public License is included in this distribution in the - * file called COPYING. - */ -#ifndef _IOAT_HW_H_ -#define _IOAT_HW_H_ - -/* PCI Configuration Space Values */ -#define IOAT_PCI_VID 0x8086 - -/* CB device ID's */ -#define IOAT_PCI_DID_5000 0x1A38 -#define IOAT_PCI_DID_CNB 0x360B -#define IOAT_PCI_DID_SCNB 0x65FF -#define IOAT_PCI_DID_SNB 0x402F - -#define IOAT_PCI_RID 0x00 -#define IOAT_PCI_SVID 0x8086 -#define IOAT_PCI_SID 0x8086 -#define IOAT_VER_1_2 0x12 /* Version 1.2 */ -#define IOAT_VER_2_0 0x20 /* Version 2.0 */ -#define IOAT_VER_3_0 0x30 /* Version 3.0 */ - -struct ioat_dma_descriptor { - uint32_t size; - uint32_t ctl; - uint64_t src_addr; - uint64_t dst_addr; - uint64_t next; - uint64_t rsv1; - uint64_t rsv2; - uint64_t user1; - uint64_t user2; -}; - -#define IOAT_DMA_DESCRIPTOR_CTL_INT_GN 0x00000001 -#define IOAT_DMA_DESCRIPTOR_CTL_SRC_SN 0x00000002 -#define IOAT_DMA_DESCRIPTOR_CTL_DST_SN 0x00000004 -#define IOAT_DMA_DESCRIPTOR_CTL_CP_STS 0x00000008 -#define IOAT_DMA_DESCRIPTOR_CTL_FRAME 0x00000010 -#define IOAT_DMA_DESCRIPTOR_NUL 0x00000020 -#define IOAT_DMA_DESCRIPTOR_CTL_SP_BRK 0x00000040 -#define IOAT_DMA_DESCRIPTOR_CTL_DP_BRK 0x00000080 -#define IOAT_DMA_DESCRIPTOR_CTL_BNDL 0x00000100 -#define IOAT_DMA_DESCRIPTOR_CTL_DCA 0x00000200 -#define IOAT_DMA_DESCRIPTOR_CTL_BUFHINT 0x00000400 - -#define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_CONTEXT 0xFF000000 -#define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_DMA 0x00000000 - -#define IOAT_DMA_DESCRIPTOR_CTL_CONTEXT_DCA 0x00000001 -#define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_MASK 0xFF000000 - -#endif diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c index 2f052265122..645ca8d54ec 100644 --- a/drivers/dma/iop-adma.c +++ b/drivers/dma/iop-adma.c @@ -31,6 +31,7 @@ #include <linux/platform_device.h> #include <linux/memory.h> #include <linux/ioport.h> +#include <linux/raid/pq.h> #include <mach/adma.h> @@ -57,65 +58,110 @@ static void iop_adma_free_slots(struct iop_adma_desc_slot *slot) } } +static void +iop_desc_unmap(struct iop_adma_chan *iop_chan, struct iop_adma_desc_slot *desc) +{ + struct dma_async_tx_descriptor *tx = &desc->async_tx; + struct iop_adma_desc_slot *unmap = desc->group_head; + struct device *dev = &iop_chan->device->pdev->dev; + u32 len = unmap->unmap_len; + enum dma_ctrl_flags flags = tx->flags; + u32 src_cnt; + dma_addr_t addr; + dma_addr_t dest; + + src_cnt = unmap->unmap_src_cnt; + dest = iop_desc_get_dest_addr(unmap, iop_chan); + if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) { + enum dma_data_direction dir; + + if (src_cnt > 1) /* is xor? */ + dir = DMA_BIDIRECTIONAL; + else + dir = DMA_FROM_DEVICE; + + dma_unmap_page(dev, dest, len, dir); + } + + if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) { + while (src_cnt--) { + addr = iop_desc_get_src_addr(unmap, iop_chan, src_cnt); + if (addr == dest) + continue; + dma_unmap_page(dev, addr, len, DMA_TO_DEVICE); + } + } + desc->group_head = NULL; +} + +static void +iop_desc_unmap_pq(struct iop_adma_chan *iop_chan, struct iop_adma_desc_slot *desc) +{ + struct dma_async_tx_descriptor *tx = &desc->async_tx; + struct iop_adma_desc_slot *unmap = desc->group_head; + struct device *dev = &iop_chan->device->pdev->dev; + u32 len = unmap->unmap_len; + enum dma_ctrl_flags flags = tx->flags; + u32 src_cnt = unmap->unmap_src_cnt; + dma_addr_t pdest = iop_desc_get_dest_addr(unmap, iop_chan); + dma_addr_t qdest = iop_desc_get_qdest_addr(unmap, iop_chan); + int i; + + if (tx->flags & DMA_PREP_CONTINUE) + src_cnt -= 3; + + if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP) && !desc->pq_check_result) { + dma_unmap_page(dev, pdest, len, DMA_BIDIRECTIONAL); + dma_unmap_page(dev, qdest, len, DMA_BIDIRECTIONAL); + } + + if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) { + dma_addr_t addr; + + for (i = 0; i < src_cnt; i++) { + addr = iop_desc_get_src_addr(unmap, iop_chan, i); + dma_unmap_page(dev, addr, len, DMA_TO_DEVICE); + } + if (desc->pq_check_result) { + dma_unmap_page(dev, pdest, len, DMA_TO_DEVICE); + dma_unmap_page(dev, qdest, len, DMA_TO_DEVICE); + } + } + + desc->group_head = NULL; +} + + static dma_cookie_t iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc, struct iop_adma_chan *iop_chan, dma_cookie_t cookie) { - BUG_ON(desc->async_tx.cookie < 0); - if (desc->async_tx.cookie > 0) { - cookie = desc->async_tx.cookie; - desc->async_tx.cookie = 0; + struct dma_async_tx_descriptor *tx = &desc->async_tx; + + BUG_ON(tx->cookie < 0); + if (tx->cookie > 0) { + cookie = tx->cookie; + tx->cookie = 0; /* call the callback (must not sleep or submit new * operations to this channel) */ - if (desc->async_tx.callback) - desc->async_tx.callback( - desc->async_tx.callback_param); + if (tx->callback) + tx->callback(tx->callback_param); /* unmap dma addresses * (unmap_single vs unmap_page?) */ if (desc->group_head && desc->unmap_len) { - struct iop_adma_desc_slot *unmap = desc->group_head; - struct device *dev = - &iop_chan->device->pdev->dev; - u32 len = unmap->unmap_len; - enum dma_ctrl_flags flags = desc->async_tx.flags; - u32 src_cnt; - dma_addr_t addr; - dma_addr_t dest; - - src_cnt = unmap->unmap_src_cnt; - dest = iop_desc_get_dest_addr(unmap, iop_chan); - if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) { - enum dma_data_direction dir; - - if (src_cnt > 1) /* is xor? */ - dir = DMA_BIDIRECTIONAL; - else - dir = DMA_FROM_DEVICE; - - dma_unmap_page(dev, dest, len, dir); - } - - if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) { - while (src_cnt--) { - addr = iop_desc_get_src_addr(unmap, - iop_chan, - src_cnt); - if (addr == dest) - continue; - dma_unmap_page(dev, addr, len, - DMA_TO_DEVICE); - } - } - desc->group_head = NULL; + if (iop_desc_is_pq(desc)) + iop_desc_unmap_pq(iop_chan, desc); + else + iop_desc_unmap(iop_chan, desc); } } /* run dependent operations */ - dma_run_dependencies(&desc->async_tx); + dma_run_dependencies(tx); return cookie; } @@ -287,7 +333,12 @@ static void iop_adma_tasklet(unsigned long data) { struct iop_adma_chan *iop_chan = (struct iop_adma_chan *) data; - spin_lock(&iop_chan->lock); + /* lockdep will flag depedency submissions as potentially + * recursive locking, this is not the case as a dependency + * submission will never recurse a channels submit routine. + * There are checks in async_tx.c to prevent this. + */ + spin_lock_nested(&iop_chan->lock, SINGLE_DEPTH_NESTING); __iop_adma_slot_cleanup(iop_chan); spin_unlock(&iop_chan->lock); } @@ -370,7 +421,7 @@ retry: } alloc_tail->group_head = alloc_start; alloc_tail->async_tx.cookie = -EBUSY; - list_splice(&chain, &alloc_tail->async_tx.tx_list); + list_splice(&chain, &alloc_tail->tx_list); iop_chan->last_used = last_used; iop_desc_clear_next_desc(alloc_start); iop_desc_clear_next_desc(alloc_tail); @@ -429,7 +480,7 @@ iop_adma_tx_submit(struct dma_async_tx_descriptor *tx) old_chain_tail = list_entry(iop_chan->chain.prev, struct iop_adma_desc_slot, chain_node); - list_splice_init(&sw_desc->async_tx.tx_list, + list_splice_init(&sw_desc->tx_list, &old_chain_tail->chain_node); /* fix up the hardware chain */ @@ -496,6 +547,7 @@ static int iop_adma_alloc_chan_resources(struct dma_chan *chan) dma_async_tx_descriptor_init(&slot->async_tx, chan); slot->async_tx.tx_submit = iop_adma_tx_submit; + INIT_LIST_HEAD(&slot->tx_list); INIT_LIST_HEAD(&slot->chain_node); INIT_LIST_HEAD(&slot->slot_node); hw_desc = (char *) iop_chan->device->dma_desc_pool; @@ -660,9 +712,9 @@ iop_adma_prep_dma_xor(struct dma_chan *chan, dma_addr_t dma_dest, } static struct dma_async_tx_descriptor * -iop_adma_prep_dma_zero_sum(struct dma_chan *chan, dma_addr_t *dma_src, - unsigned int src_cnt, size_t len, u32 *result, - unsigned long flags) +iop_adma_prep_dma_xor_val(struct dma_chan *chan, dma_addr_t *dma_src, + unsigned int src_cnt, size_t len, u32 *result, + unsigned long flags) { struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); struct iop_adma_desc_slot *sw_desc, *grp_start; @@ -696,6 +748,118 @@ iop_adma_prep_dma_zero_sum(struct dma_chan *chan, dma_addr_t *dma_src, return sw_desc ? &sw_desc->async_tx : NULL; } +static struct dma_async_tx_descriptor * +iop_adma_prep_dma_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, size_t len, + unsigned long flags) +{ + struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); + struct iop_adma_desc_slot *sw_desc, *g; + int slot_cnt, slots_per_op; + int continue_srcs; + + if (unlikely(!len)) + return NULL; + BUG_ON(len > IOP_ADMA_XOR_MAX_BYTE_COUNT); + + dev_dbg(iop_chan->device->common.dev, + "%s src_cnt: %d len: %u flags: %lx\n", + __func__, src_cnt, len, flags); + + if (dmaf_p_disabled_continue(flags)) + continue_srcs = 1+src_cnt; + else if (dmaf_continue(flags)) + continue_srcs = 3+src_cnt; + else + continue_srcs = 0+src_cnt; + + spin_lock_bh(&iop_chan->lock); + slot_cnt = iop_chan_pq_slot_count(len, continue_srcs, &slots_per_op); + sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); + if (sw_desc) { + int i; + + g = sw_desc->group_head; + iop_desc_set_byte_count(g, iop_chan, len); + + /* even if P is disabled its destination address (bits + * [3:0]) must match Q. It is ok if P points to an + * invalid address, it won't be written. + */ + if (flags & DMA_PREP_PQ_DISABLE_P) + dst[0] = dst[1] & 0x7; + + iop_desc_set_pq_addr(g, dst); + sw_desc->unmap_src_cnt = src_cnt; + sw_desc->unmap_len = len; + sw_desc->async_tx.flags = flags; + for (i = 0; i < src_cnt; i++) + iop_desc_set_pq_src_addr(g, i, src[i], scf[i]); + + /* if we are continuing a previous operation factor in + * the old p and q values, see the comment for dma_maxpq + * in include/linux/dmaengine.h + */ + if (dmaf_p_disabled_continue(flags)) + iop_desc_set_pq_src_addr(g, i++, dst[1], 1); + else if (dmaf_continue(flags)) { + iop_desc_set_pq_src_addr(g, i++, dst[0], 0); + iop_desc_set_pq_src_addr(g, i++, dst[1], 1); + iop_desc_set_pq_src_addr(g, i++, dst[1], 0); + } + iop_desc_init_pq(g, i, flags); + } + spin_unlock_bh(&iop_chan->lock); + + return sw_desc ? &sw_desc->async_tx : NULL; +} + +static struct dma_async_tx_descriptor * +iop_adma_prep_dma_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, + size_t len, enum sum_check_flags *pqres, + unsigned long flags) +{ + struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); + struct iop_adma_desc_slot *sw_desc, *g; + int slot_cnt, slots_per_op; + + if (unlikely(!len)) + return NULL; + BUG_ON(len > IOP_ADMA_XOR_MAX_BYTE_COUNT); + + dev_dbg(iop_chan->device->common.dev, "%s src_cnt: %d len: %u\n", + __func__, src_cnt, len); + + spin_lock_bh(&iop_chan->lock); + slot_cnt = iop_chan_pq_zero_sum_slot_count(len, src_cnt + 2, &slots_per_op); + sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); + if (sw_desc) { + /* for validate operations p and q are tagged onto the + * end of the source list + */ + int pq_idx = src_cnt; + + g = sw_desc->group_head; + iop_desc_init_pq_zero_sum(g, src_cnt+2, flags); + iop_desc_set_pq_zero_sum_byte_count(g, len); + g->pq_check_result = pqres; + pr_debug("\t%s: g->pq_check_result: %p\n", + __func__, g->pq_check_result); + sw_desc->unmap_src_cnt = src_cnt+2; + sw_desc->unmap_len = len; + sw_desc->async_tx.flags = flags; + while (src_cnt--) + iop_desc_set_pq_zero_sum_src_addr(g, src_cnt, + src[src_cnt], + scf[src_cnt]); + iop_desc_set_pq_zero_sum_addr(g, pq_idx, src); + } + spin_unlock_bh(&iop_chan->lock); + + return sw_desc ? &sw_desc->async_tx : NULL; +} + static void iop_adma_free_chan_resources(struct dma_chan *chan) { struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); @@ -906,7 +1070,7 @@ out: #define IOP_ADMA_NUM_SRC_TEST 4 /* must be <= 15 */ static int __devinit -iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device) +iop_adma_xor_val_self_test(struct iop_adma_device *device) { int i, src_idx; struct page *dest; @@ -1002,7 +1166,7 @@ iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device) PAGE_SIZE, DMA_TO_DEVICE); /* skip zero sum if the capability is not present */ - if (!dma_has_cap(DMA_ZERO_SUM, dma_chan->device->cap_mask)) + if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask)) goto free_resources; /* zero sum the sources with the destintation page */ @@ -1016,10 +1180,10 @@ iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device) dma_srcs[i] = dma_map_page(dma_chan->device->dev, zero_sum_srcs[i], 0, PAGE_SIZE, DMA_TO_DEVICE); - tx = iop_adma_prep_dma_zero_sum(dma_chan, dma_srcs, - IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE, - &zero_sum_result, - DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + tx = iop_adma_prep_dma_xor_val(dma_chan, dma_srcs, + IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE, + &zero_sum_result, + DMA_PREP_INTERRUPT | DMA_CTRL_ACK); cookie = iop_adma_tx_submit(tx); iop_adma_issue_pending(dma_chan); @@ -1072,10 +1236,10 @@ iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device) dma_srcs[i] = dma_map_page(dma_chan->device->dev, zero_sum_srcs[i], 0, PAGE_SIZE, DMA_TO_DEVICE); - tx = iop_adma_prep_dma_zero_sum(dma_chan, dma_srcs, - IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE, - &zero_sum_result, - DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + tx = iop_adma_prep_dma_xor_val(dma_chan, dma_srcs, + IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE, + &zero_sum_result, + DMA_PREP_INTERRUPT | DMA_CTRL_ACK); cookie = iop_adma_tx_submit(tx); iop_adma_issue_pending(dma_chan); @@ -1105,6 +1269,170 @@ out: return err; } +#ifdef CONFIG_MD_RAID6_PQ +static int __devinit +iop_adma_pq_zero_sum_self_test(struct iop_adma_device *device) +{ + /* combined sources, software pq results, and extra hw pq results */ + struct page *pq[IOP_ADMA_NUM_SRC_TEST+2+2]; + /* ptr to the extra hw pq buffers defined above */ + struct page **pq_hw = &pq[IOP_ADMA_NUM_SRC_TEST+2]; + /* address conversion buffers (dma_map / page_address) */ + void *pq_sw[IOP_ADMA_NUM_SRC_TEST+2]; + dma_addr_t pq_src[IOP_ADMA_NUM_SRC_TEST]; + dma_addr_t pq_dest[2]; + + int i; + struct dma_async_tx_descriptor *tx; + struct dma_chan *dma_chan; + dma_cookie_t cookie; + u32 zero_sum_result; + int err = 0; + struct device *dev; + + dev_dbg(device->common.dev, "%s\n", __func__); + + for (i = 0; i < ARRAY_SIZE(pq); i++) { + pq[i] = alloc_page(GFP_KERNEL); + if (!pq[i]) { + while (i--) + __free_page(pq[i]); + return -ENOMEM; + } + } + + /* Fill in src buffers */ + for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++) { + pq_sw[i] = page_address(pq[i]); + memset(pq_sw[i], 0x11111111 * (1<<i), PAGE_SIZE); + } + pq_sw[i] = page_address(pq[i]); + pq_sw[i+1] = page_address(pq[i+1]); + + dma_chan = container_of(device->common.channels.next, + struct dma_chan, + device_node); + if (iop_adma_alloc_chan_resources(dma_chan) < 1) { + err = -ENODEV; + goto out; + } + + dev = dma_chan->device->dev; + + /* initialize the dests */ + memset(page_address(pq_hw[0]), 0 , PAGE_SIZE); + memset(page_address(pq_hw[1]), 0 , PAGE_SIZE); + + /* test pq */ + pq_dest[0] = dma_map_page(dev, pq_hw[0], 0, PAGE_SIZE, DMA_FROM_DEVICE); + pq_dest[1] = dma_map_page(dev, pq_hw[1], 0, PAGE_SIZE, DMA_FROM_DEVICE); + for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++) + pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE, + DMA_TO_DEVICE); + + tx = iop_adma_prep_dma_pq(dma_chan, pq_dest, pq_src, + IOP_ADMA_NUM_SRC_TEST, (u8 *)raid6_gfexp, + PAGE_SIZE, + DMA_PREP_INTERRUPT | + DMA_CTRL_ACK); + + cookie = iop_adma_tx_submit(tx); + iop_adma_issue_pending(dma_chan); + msleep(8); + + if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != + DMA_SUCCESS) { + dev_err(dev, "Self-test pq timed out, disabling\n"); + err = -ENODEV; + goto free_resources; + } + + raid6_call.gen_syndrome(IOP_ADMA_NUM_SRC_TEST+2, PAGE_SIZE, pq_sw); + + if (memcmp(pq_sw[IOP_ADMA_NUM_SRC_TEST], + page_address(pq_hw[0]), PAGE_SIZE) != 0) { + dev_err(dev, "Self-test p failed compare, disabling\n"); + err = -ENODEV; + goto free_resources; + } + if (memcmp(pq_sw[IOP_ADMA_NUM_SRC_TEST+1], + page_address(pq_hw[1]), PAGE_SIZE) != 0) { + dev_err(dev, "Self-test q failed compare, disabling\n"); + err = -ENODEV; + goto free_resources; + } + + /* test correct zero sum using the software generated pq values */ + for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 2; i++) + pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE, + DMA_TO_DEVICE); + + zero_sum_result = ~0; + tx = iop_adma_prep_dma_pq_val(dma_chan, &pq_src[IOP_ADMA_NUM_SRC_TEST], + pq_src, IOP_ADMA_NUM_SRC_TEST, + raid6_gfexp, PAGE_SIZE, &zero_sum_result, + DMA_PREP_INTERRUPT|DMA_CTRL_ACK); + + cookie = iop_adma_tx_submit(tx); + iop_adma_issue_pending(dma_chan); + msleep(8); + + if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != + DMA_SUCCESS) { + dev_err(dev, "Self-test pq-zero-sum timed out, disabling\n"); + err = -ENODEV; + goto free_resources; + } + + if (zero_sum_result != 0) { + dev_err(dev, "Self-test pq-zero-sum failed to validate: %x\n", + zero_sum_result); + err = -ENODEV; + goto free_resources; + } + + /* test incorrect zero sum */ + i = IOP_ADMA_NUM_SRC_TEST; + memset(pq_sw[i] + 100, 0, 100); + memset(pq_sw[i+1] + 200, 0, 200); + for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 2; i++) + pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE, + DMA_TO_DEVICE); + + zero_sum_result = 0; + tx = iop_adma_prep_dma_pq_val(dma_chan, &pq_src[IOP_ADMA_NUM_SRC_TEST], + pq_src, IOP_ADMA_NUM_SRC_TEST, + raid6_gfexp, PAGE_SIZE, &zero_sum_result, + DMA_PREP_INTERRUPT|DMA_CTRL_ACK); + + cookie = iop_adma_tx_submit(tx); + iop_adma_issue_pending(dma_chan); + msleep(8); + + if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != + DMA_SUCCESS) { + dev_err(dev, "Self-test !pq-zero-sum timed out, disabling\n"); + err = -ENODEV; + goto free_resources; + } + + if (zero_sum_result != (SUM_CHECK_P_RESULT | SUM_CHECK_Q_RESULT)) { + dev_err(dev, "Self-test !pq-zero-sum failed to validate: %x\n", + zero_sum_result); + err = -ENODEV; + goto free_resources; + } + +free_resources: + iop_adma_free_chan_resources(dma_chan); +out: + i = ARRAY_SIZE(pq); + while (i--) + __free_page(pq[i]); + return err; +} +#endif + static int __devexit iop_adma_remove(struct platform_device *dev) { struct iop_adma_device *device = platform_get_drvdata(dev); @@ -1192,9 +1520,16 @@ static int __devinit iop_adma_probe(struct platform_device *pdev) dma_dev->max_xor = iop_adma_get_max_xor(); dma_dev->device_prep_dma_xor = iop_adma_prep_dma_xor; } - if (dma_has_cap(DMA_ZERO_SUM, dma_dev->cap_mask)) - dma_dev->device_prep_dma_zero_sum = - iop_adma_prep_dma_zero_sum; + if (dma_has_cap(DMA_XOR_VAL, dma_dev->cap_mask)) + dma_dev->device_prep_dma_xor_val = + iop_adma_prep_dma_xor_val; + if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) { + dma_set_maxpq(dma_dev, iop_adma_get_max_pq(), 0); + dma_dev->device_prep_dma_pq = iop_adma_prep_dma_pq; + } + if (dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask)) + dma_dev->device_prep_dma_pq_val = + iop_adma_prep_dma_pq_val; if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask)) dma_dev->device_prep_dma_interrupt = iop_adma_prep_dma_interrupt; @@ -1248,23 +1583,35 @@ static int __devinit iop_adma_probe(struct platform_device *pdev) } if (dma_has_cap(DMA_XOR, dma_dev->cap_mask) || - dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) { - ret = iop_adma_xor_zero_sum_self_test(adev); + dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) { + ret = iop_adma_xor_val_self_test(adev); dev_dbg(&pdev->dev, "xor self test returned %d\n", ret); if (ret) goto err_free_iop_chan; } + if (dma_has_cap(DMA_PQ, dma_dev->cap_mask) && + dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask)) { + #ifdef CONFIG_MD_RAID6_PQ + ret = iop_adma_pq_zero_sum_self_test(adev); + dev_dbg(&pdev->dev, "pq self test returned %d\n", ret); + #else + /* can not test raid6, so do not publish capability */ + dma_cap_clear(DMA_PQ, dma_dev->cap_mask); + dma_cap_clear(DMA_PQ_VAL, dma_dev->cap_mask); + ret = 0; + #endif + if (ret) + goto err_free_iop_chan; + } + dev_printk(KERN_INFO, &pdev->dev, "Intel(R) IOP: " - "( %s%s%s%s%s%s%s%s%s%s)\n", - dma_has_cap(DMA_PQ_XOR, dma_dev->cap_mask) ? "pq_xor " : "", - dma_has_cap(DMA_PQ_UPDATE, dma_dev->cap_mask) ? "pq_update " : "", - dma_has_cap(DMA_PQ_ZERO_SUM, dma_dev->cap_mask) ? "pq_zero_sum " : "", + "( %s%s%s%s%s%s%s)\n", + dma_has_cap(DMA_PQ, dma_dev->cap_mask) ? "pq " : "", + dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask) ? "pq_val " : "", dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "", - dma_has_cap(DMA_DUAL_XOR, dma_dev->cap_mask) ? "dual_xor " : "", - dma_has_cap(DMA_ZERO_SUM, dma_dev->cap_mask) ? "xor_zero_sum " : "", + dma_has_cap(DMA_XOR_VAL, dma_dev->cap_mask) ? "xor_val " : "", dma_has_cap(DMA_MEMSET, dma_dev->cap_mask) ? "fill " : "", - dma_has_cap(DMA_MEMCPY_CRC32C, dma_dev->cap_mask) ? "cpy+crc " : "", dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "", dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : ""); @@ -1296,7 +1643,7 @@ static void iop_chan_start_null_memcpy(struct iop_adma_chan *iop_chan) if (sw_desc) { grp_start = sw_desc->group_head; - list_splice_init(&sw_desc->async_tx.tx_list, &iop_chan->chain); + list_splice_init(&sw_desc->tx_list, &iop_chan->chain); async_tx_ack(&sw_desc->async_tx); iop_desc_init_memcpy(grp_start, 0); iop_desc_set_byte_count(grp_start, iop_chan, 0); @@ -1352,7 +1699,7 @@ static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan) sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); if (sw_desc) { grp_start = sw_desc->group_head; - list_splice_init(&sw_desc->async_tx.tx_list, &iop_chan->chain); + list_splice_init(&sw_desc->tx_list, &iop_chan->chain); async_tx_ack(&sw_desc->async_tx); iop_desc_init_null_xor(grp_start, 2, 0); iop_desc_set_byte_count(grp_start, iop_chan, 0); diff --git a/drivers/dma/iovlock.c b/drivers/dma/iovlock.c index 9f6fe46a9b8..c0a272c7368 100644 --- a/drivers/dma/iovlock.c +++ b/drivers/dma/iovlock.c @@ -183,6 +183,11 @@ dma_cookie_t dma_memcpy_to_iovec(struct dma_chan *chan, struct iovec *iov, iov_byte_offset, kdata, copy); + /* poll for a descriptor slot */ + if (unlikely(dma_cookie < 0)) { + dma_async_issue_pending(chan); + continue; + } len -= copy; iov[iovec_idx].iov_len -= copy; @@ -248,6 +253,11 @@ dma_cookie_t dma_memcpy_pg_to_iovec(struct dma_chan *chan, struct iovec *iov, page, offset, copy); + /* poll for a descriptor slot */ + if (unlikely(dma_cookie < 0)) { + dma_async_issue_pending(chan); + continue; + } len -= copy; iov[iovec_idx].iov_len -= copy; diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c index 3f23eabe09f..466ab10c1ff 100644 --- a/drivers/dma/mv_xor.c +++ b/drivers/dma/mv_xor.c @@ -517,7 +517,7 @@ retry: } alloc_tail->group_head = alloc_start; alloc_tail->async_tx.cookie = -EBUSY; - list_splice(&chain, &alloc_tail->async_tx.tx_list); + list_splice(&chain, &alloc_tail->tx_list); mv_chan->last_used = last_used; mv_desc_clear_next_desc(alloc_start); mv_desc_clear_next_desc(alloc_tail); @@ -565,14 +565,14 @@ mv_xor_tx_submit(struct dma_async_tx_descriptor *tx) cookie = mv_desc_assign_cookie(mv_chan, sw_desc); if (list_empty(&mv_chan->chain)) - list_splice_init(&sw_desc->async_tx.tx_list, &mv_chan->chain); + list_splice_init(&sw_desc->tx_list, &mv_chan->chain); else { new_hw_chain = 0; old_chain_tail = list_entry(mv_chan->chain.prev, struct mv_xor_desc_slot, chain_node); - list_splice_init(&grp_start->async_tx.tx_list, + list_splice_init(&grp_start->tx_list, &old_chain_tail->chain_node); if (!mv_can_chain(grp_start)) @@ -632,6 +632,7 @@ static int mv_xor_alloc_chan_resources(struct dma_chan *chan) slot->async_tx.tx_submit = mv_xor_tx_submit; INIT_LIST_HEAD(&slot->chain_node); INIT_LIST_HEAD(&slot->slot_node); + INIT_LIST_HEAD(&slot->tx_list); hw_desc = (char *) mv_chan->device->dma_desc_pool; slot->async_tx.phys = (dma_addr_t) &hw_desc[idx * MV_XOR_SLOT_SIZE]; diff --git a/drivers/dma/mv_xor.h b/drivers/dma/mv_xor.h index 06cafe1ef52..977b592e976 100644 --- a/drivers/dma/mv_xor.h +++ b/drivers/dma/mv_xor.h @@ -126,9 +126,8 @@ struct mv_xor_chan { * @idx: pool index * @unmap_src_cnt: number of xor sources * @unmap_len: transaction bytecount + * @tx_list: list of slots that make up a multi-descriptor transaction * @async_tx: support for the async_tx api - * @group_list: list of slots that make up a multi-descriptor transaction - * for example transfer lengths larger than the supported hw max * @xor_check_result: result of zero sum * @crc32_result: result crc calculation */ @@ -145,6 +144,7 @@ struct mv_xor_desc_slot { u16 unmap_src_cnt; u32 value; size_t unmap_len; + struct list_head tx_list; struct dma_async_tx_descriptor async_tx; union { u32 *xor_check_result; diff --git a/drivers/dma/shdma.c b/drivers/dma/shdma.c new file mode 100644 index 00000000000..b3b065c4e5c --- /dev/null +++ b/drivers/dma/shdma.c @@ -0,0 +1,786 @@ +/* + * Renesas SuperH DMA Engine support + * + * base is drivers/dma/flsdma.c + * + * Copyright (C) 2009 Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com> + * Copyright (C) 2009 Renesas Solutions, Inc. All rights reserved. + * Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved. + * + * This is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * - DMA of SuperH does not have Hardware DMA chain mode. + * - MAX DMA size is 16MB. + * + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/interrupt.h> +#include <linux/dmaengine.h> +#include <linux/delay.h> +#include <linux/dma-mapping.h> +#include <linux/dmapool.h> +#include <linux/platform_device.h> +#include <cpu/dma.h> +#include <asm/dma-sh.h> +#include "shdma.h" + +/* DMA descriptor control */ +#define DESC_LAST (-1) +#define DESC_COMP (1) +#define DESC_NCOMP (0) + +#define NR_DESCS_PER_CHANNEL 32 +/* + * Define the default configuration for dual address memory-memory transfer. + * The 0x400 value represents auto-request, external->external. + * + * And this driver set 4byte burst mode. + * If you want to change mode, you need to change RS_DEFAULT of value. + * (ex 1byte burst mode -> (RS_DUAL & ~TS_32) + */ +#define RS_DEFAULT (RS_DUAL) + +#define SH_DMAC_CHAN_BASE(id) (dma_base_addr[id]) +static void sh_dmae_writel(struct sh_dmae_chan *sh_dc, u32 data, u32 reg) +{ + ctrl_outl(data, (SH_DMAC_CHAN_BASE(sh_dc->id) + reg)); +} + +static u32 sh_dmae_readl(struct sh_dmae_chan *sh_dc, u32 reg) +{ + return ctrl_inl((SH_DMAC_CHAN_BASE(sh_dc->id) + reg)); +} + +static void dmae_init(struct sh_dmae_chan *sh_chan) +{ + u32 chcr = RS_DEFAULT; /* default is DUAL mode */ + sh_dmae_writel(sh_chan, chcr, CHCR); +} + +/* + * Reset DMA controller + * + * SH7780 has two DMAOR register + */ +static void sh_dmae_ctl_stop(int id) +{ + unsigned short dmaor = dmaor_read_reg(id); + + dmaor &= ~(DMAOR_NMIF | DMAOR_AE); + dmaor_write_reg(id, dmaor); +} + +static int sh_dmae_rst(int id) +{ + unsigned short dmaor; + + sh_dmae_ctl_stop(id); + dmaor = (dmaor_read_reg(id)|DMAOR_INIT); + + dmaor_write_reg(id, dmaor); + if ((dmaor_read_reg(id) & (DMAOR_AE | DMAOR_NMIF))) { + pr_warning(KERN_ERR "dma-sh: Can't initialize DMAOR.\n"); + return -EINVAL; + } + return 0; +} + +static int dmae_is_idle(struct sh_dmae_chan *sh_chan) +{ + u32 chcr = sh_dmae_readl(sh_chan, CHCR); + if (chcr & CHCR_DE) { + if (!(chcr & CHCR_TE)) + return -EBUSY; /* working */ + } + return 0; /* waiting */ +} + +static inline unsigned int calc_xmit_shift(struct sh_dmae_chan *sh_chan) +{ + u32 chcr = sh_dmae_readl(sh_chan, CHCR); + return ts_shift[(chcr & CHCR_TS_MASK) >> CHCR_TS_SHIFT]; +} + +static void dmae_set_reg(struct sh_dmae_chan *sh_chan, struct sh_dmae_regs hw) +{ + sh_dmae_writel(sh_chan, hw.sar, SAR); + sh_dmae_writel(sh_chan, hw.dar, DAR); + sh_dmae_writel(sh_chan, + (hw.tcr >> calc_xmit_shift(sh_chan)), TCR); +} + +static void dmae_start(struct sh_dmae_chan *sh_chan) +{ + u32 chcr = sh_dmae_readl(sh_chan, CHCR); + + chcr |= (CHCR_DE|CHCR_IE); + sh_dmae_writel(sh_chan, chcr, CHCR); +} + +static void dmae_halt(struct sh_dmae_chan *sh_chan) +{ + u32 chcr = sh_dmae_readl(sh_chan, CHCR); + + chcr &= ~(CHCR_DE | CHCR_TE | CHCR_IE); + sh_dmae_writel(sh_chan, chcr, CHCR); +} + +static int dmae_set_chcr(struct sh_dmae_chan *sh_chan, u32 val) +{ + int ret = dmae_is_idle(sh_chan); + /* When DMA was working, can not set data to CHCR */ + if (ret) + return ret; + + sh_dmae_writel(sh_chan, val, CHCR); + return 0; +} + +#define DMARS1_ADDR 0x04 +#define DMARS2_ADDR 0x08 +#define DMARS_SHIFT 8 +#define DMARS_CHAN_MSK 0x01 +static int dmae_set_dmars(struct sh_dmae_chan *sh_chan, u16 val) +{ + u32 addr; + int shift = 0; + int ret = dmae_is_idle(sh_chan); + if (ret) + return ret; + + if (sh_chan->id & DMARS_CHAN_MSK) + shift = DMARS_SHIFT; + + switch (sh_chan->id) { + /* DMARS0 */ + case 0: + case 1: + addr = SH_DMARS_BASE; + break; + /* DMARS1 */ + case 2: + case 3: + addr = (SH_DMARS_BASE + DMARS1_ADDR); + break; + /* DMARS2 */ + case 4: + case 5: + addr = (SH_DMARS_BASE + DMARS2_ADDR); + break; + default: + return -EINVAL; + } + + ctrl_outw((val << shift) | + (ctrl_inw(addr) & (shift ? 0xFF00 : 0x00FF)), + addr); + + return 0; +} + +static dma_cookie_t sh_dmae_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct sh_desc *desc = tx_to_sh_desc(tx); + struct sh_dmae_chan *sh_chan = to_sh_chan(tx->chan); + dma_cookie_t cookie; + + spin_lock_bh(&sh_chan->desc_lock); + + cookie = sh_chan->common.cookie; + cookie++; + if (cookie < 0) + cookie = 1; + + /* If desc only in the case of 1 */ + if (desc->async_tx.cookie != -EBUSY) + desc->async_tx.cookie = cookie; + sh_chan->common.cookie = desc->async_tx.cookie; + + list_splice_init(&desc->tx_list, sh_chan->ld_queue.prev); + + spin_unlock_bh(&sh_chan->desc_lock); + + return cookie; +} + +static struct sh_desc *sh_dmae_get_desc(struct sh_dmae_chan *sh_chan) +{ + struct sh_desc *desc, *_desc, *ret = NULL; + + spin_lock_bh(&sh_chan->desc_lock); + list_for_each_entry_safe(desc, _desc, &sh_chan->ld_free, node) { + if (async_tx_test_ack(&desc->async_tx)) { + list_del(&desc->node); + ret = desc; + break; + } + } + spin_unlock_bh(&sh_chan->desc_lock); + + return ret; +} + +static void sh_dmae_put_desc(struct sh_dmae_chan *sh_chan, struct sh_desc *desc) +{ + if (desc) { + spin_lock_bh(&sh_chan->desc_lock); + + list_splice_init(&desc->tx_list, &sh_chan->ld_free); + list_add(&desc->node, &sh_chan->ld_free); + + spin_unlock_bh(&sh_chan->desc_lock); + } +} + +static int sh_dmae_alloc_chan_resources(struct dma_chan *chan) +{ + struct sh_dmae_chan *sh_chan = to_sh_chan(chan); + struct sh_desc *desc; + + spin_lock_bh(&sh_chan->desc_lock); + while (sh_chan->descs_allocated < NR_DESCS_PER_CHANNEL) { + spin_unlock_bh(&sh_chan->desc_lock); + desc = kzalloc(sizeof(struct sh_desc), GFP_KERNEL); + if (!desc) { + spin_lock_bh(&sh_chan->desc_lock); + break; + } + dma_async_tx_descriptor_init(&desc->async_tx, + &sh_chan->common); + desc->async_tx.tx_submit = sh_dmae_tx_submit; + desc->async_tx.flags = DMA_CTRL_ACK; + INIT_LIST_HEAD(&desc->tx_list); + sh_dmae_put_desc(sh_chan, desc); + + spin_lock_bh(&sh_chan->desc_lock); + sh_chan->descs_allocated++; + } + spin_unlock_bh(&sh_chan->desc_lock); + + return sh_chan->descs_allocated; +} + +/* + * sh_dma_free_chan_resources - Free all resources of the channel. + */ +static void sh_dmae_free_chan_resources(struct dma_chan *chan) +{ + struct sh_dmae_chan *sh_chan = to_sh_chan(chan); + struct sh_desc *desc, *_desc; + LIST_HEAD(list); + + BUG_ON(!list_empty(&sh_chan->ld_queue)); + spin_lock_bh(&sh_chan->desc_lock); + + list_splice_init(&sh_chan->ld_free, &list); + sh_chan->descs_allocated = 0; + + spin_unlock_bh(&sh_chan->desc_lock); + + list_for_each_entry_safe(desc, _desc, &list, node) + kfree(desc); +} + +static struct dma_async_tx_descriptor *sh_dmae_prep_memcpy( + struct dma_chan *chan, dma_addr_t dma_dest, dma_addr_t dma_src, + size_t len, unsigned long flags) +{ + struct sh_dmae_chan *sh_chan; + struct sh_desc *first = NULL, *prev = NULL, *new; + size_t copy_size; + + if (!chan) + return NULL; + + if (!len) + return NULL; + + sh_chan = to_sh_chan(chan); + + do { + /* Allocate the link descriptor from DMA pool */ + new = sh_dmae_get_desc(sh_chan); + if (!new) { + dev_err(sh_chan->dev, + "No free memory for link descriptor\n"); + goto err_get_desc; + } + + copy_size = min(len, (size_t)SH_DMA_TCR_MAX); + + new->hw.sar = dma_src; + new->hw.dar = dma_dest; + new->hw.tcr = copy_size; + if (!first) + first = new; + + new->mark = DESC_NCOMP; + async_tx_ack(&new->async_tx); + + prev = new; + len -= copy_size; + dma_src += copy_size; + dma_dest += copy_size; + /* Insert the link descriptor to the LD ring */ + list_add_tail(&new->node, &first->tx_list); + } while (len); + + new->async_tx.flags = flags; /* client is in control of this ack */ + new->async_tx.cookie = -EBUSY; /* Last desc */ + + return &first->async_tx; + +err_get_desc: + sh_dmae_put_desc(sh_chan, first); + return NULL; + +} + +/* + * sh_chan_ld_cleanup - Clean up link descriptors + * + * This function clean up the ld_queue of DMA channel. + */ +static void sh_dmae_chan_ld_cleanup(struct sh_dmae_chan *sh_chan) +{ + struct sh_desc *desc, *_desc; + + spin_lock_bh(&sh_chan->desc_lock); + list_for_each_entry_safe(desc, _desc, &sh_chan->ld_queue, node) { + dma_async_tx_callback callback; + void *callback_param; + + /* non send data */ + if (desc->mark == DESC_NCOMP) + break; + + /* send data sesc */ + callback = desc->async_tx.callback; + callback_param = desc->async_tx.callback_param; + + /* Remove from ld_queue list */ + list_splice_init(&desc->tx_list, &sh_chan->ld_free); + + dev_dbg(sh_chan->dev, "link descriptor %p will be recycle.\n", + desc); + + list_move(&desc->node, &sh_chan->ld_free); + /* Run the link descriptor callback function */ + if (callback) { + spin_unlock_bh(&sh_chan->desc_lock); + dev_dbg(sh_chan->dev, "link descriptor %p callback\n", + desc); + callback(callback_param); + spin_lock_bh(&sh_chan->desc_lock); + } + } + spin_unlock_bh(&sh_chan->desc_lock); +} + +static void sh_chan_xfer_ld_queue(struct sh_dmae_chan *sh_chan) +{ + struct list_head *ld_node; + struct sh_dmae_regs hw; + + /* DMA work check */ + if (dmae_is_idle(sh_chan)) + return; + + /* Find the first un-transfer desciptor */ + for (ld_node = sh_chan->ld_queue.next; + (ld_node != &sh_chan->ld_queue) + && (to_sh_desc(ld_node)->mark == DESC_COMP); + ld_node = ld_node->next) + cpu_relax(); + + if (ld_node != &sh_chan->ld_queue) { + /* Get the ld start address from ld_queue */ + hw = to_sh_desc(ld_node)->hw; + dmae_set_reg(sh_chan, hw); + dmae_start(sh_chan); + } +} + +static void sh_dmae_memcpy_issue_pending(struct dma_chan *chan) +{ + struct sh_dmae_chan *sh_chan = to_sh_chan(chan); + sh_chan_xfer_ld_queue(sh_chan); +} + +static enum dma_status sh_dmae_is_complete(struct dma_chan *chan, + dma_cookie_t cookie, + dma_cookie_t *done, + dma_cookie_t *used) +{ + struct sh_dmae_chan *sh_chan = to_sh_chan(chan); + dma_cookie_t last_used; + dma_cookie_t last_complete; + + sh_dmae_chan_ld_cleanup(sh_chan); + + last_used = chan->cookie; + last_complete = sh_chan->completed_cookie; + if (last_complete == -EBUSY) + last_complete = last_used; + + if (done) + *done = last_complete; + + if (used) + *used = last_used; + + return dma_async_is_complete(cookie, last_complete, last_used); +} + +static irqreturn_t sh_dmae_interrupt(int irq, void *data) +{ + irqreturn_t ret = IRQ_NONE; + struct sh_dmae_chan *sh_chan = (struct sh_dmae_chan *)data; + u32 chcr = sh_dmae_readl(sh_chan, CHCR); + + if (chcr & CHCR_TE) { + /* DMA stop */ + dmae_halt(sh_chan); + + ret = IRQ_HANDLED; + tasklet_schedule(&sh_chan->tasklet); + } + + return ret; +} + +#if defined(CONFIG_CPU_SH4) +static irqreturn_t sh_dmae_err(int irq, void *data) +{ + int err = 0; + struct sh_dmae_device *shdev = (struct sh_dmae_device *)data; + + /* IRQ Multi */ + if (shdev->pdata.mode & SHDMA_MIX_IRQ) { + int cnt = 0; + switch (irq) { +#if defined(DMTE6_IRQ) && defined(DMAE1_IRQ) + case DMTE6_IRQ: + cnt++; +#endif + case DMTE0_IRQ: + if (dmaor_read_reg(cnt) & (DMAOR_NMIF | DMAOR_AE)) { + disable_irq(irq); + return IRQ_HANDLED; + } + default: + return IRQ_NONE; + } + } else { + /* reset dma controller */ + err = sh_dmae_rst(0); + if (err) + return err; + if (shdev->pdata.mode & SHDMA_DMAOR1) { + err = sh_dmae_rst(1); + if (err) + return err; + } + disable_irq(irq); + return IRQ_HANDLED; + } +} +#endif + +static void dmae_do_tasklet(unsigned long data) +{ + struct sh_dmae_chan *sh_chan = (struct sh_dmae_chan *)data; + struct sh_desc *desc, *_desc, *cur_desc = NULL; + u32 sar_buf = sh_dmae_readl(sh_chan, SAR); + list_for_each_entry_safe(desc, _desc, + &sh_chan->ld_queue, node) { + if ((desc->hw.sar + desc->hw.tcr) == sar_buf) { + cur_desc = desc; + break; + } + } + + if (cur_desc) { + switch (cur_desc->async_tx.cookie) { + case 0: /* other desc data */ + break; + case -EBUSY: /* last desc */ + sh_chan->completed_cookie = + cur_desc->async_tx.cookie; + break; + default: /* first desc ( 0 < )*/ + sh_chan->completed_cookie = + cur_desc->async_tx.cookie - 1; + break; + } + cur_desc->mark = DESC_COMP; + } + /* Next desc */ + sh_chan_xfer_ld_queue(sh_chan); + sh_dmae_chan_ld_cleanup(sh_chan); +} + +static unsigned int get_dmae_irq(unsigned int id) +{ + unsigned int irq = 0; + if (id < ARRAY_SIZE(dmte_irq_map)) + irq = dmte_irq_map[id]; + return irq; +} + +static int __devinit sh_dmae_chan_probe(struct sh_dmae_device *shdev, int id) +{ + int err; + unsigned int irq = get_dmae_irq(id); + unsigned long irqflags = IRQF_DISABLED; + struct sh_dmae_chan *new_sh_chan; + + /* alloc channel */ + new_sh_chan = kzalloc(sizeof(struct sh_dmae_chan), GFP_KERNEL); + if (!new_sh_chan) { + dev_err(shdev->common.dev, "No free memory for allocating " + "dma channels!\n"); + return -ENOMEM; + } + + new_sh_chan->dev = shdev->common.dev; + new_sh_chan->id = id; + + /* Init DMA tasklet */ + tasklet_init(&new_sh_chan->tasklet, dmae_do_tasklet, + (unsigned long)new_sh_chan); + + /* Init the channel */ + dmae_init(new_sh_chan); + + spin_lock_init(&new_sh_chan->desc_lock); + + /* Init descripter manage list */ + INIT_LIST_HEAD(&new_sh_chan->ld_queue); + INIT_LIST_HEAD(&new_sh_chan->ld_free); + + /* copy struct dma_device */ + new_sh_chan->common.device = &shdev->common; + + /* Add the channel to DMA device channel list */ + list_add_tail(&new_sh_chan->common.device_node, + &shdev->common.channels); + shdev->common.chancnt++; + + if (shdev->pdata.mode & SHDMA_MIX_IRQ) { + irqflags = IRQF_SHARED; +#if defined(DMTE6_IRQ) + if (irq >= DMTE6_IRQ) + irq = DMTE6_IRQ; + else +#endif + irq = DMTE0_IRQ; + } + + snprintf(new_sh_chan->dev_id, sizeof(new_sh_chan->dev_id), + "sh-dmae%d", new_sh_chan->id); + + /* set up channel irq */ + err = request_irq(irq, &sh_dmae_interrupt, + irqflags, new_sh_chan->dev_id, new_sh_chan); + if (err) { + dev_err(shdev->common.dev, "DMA channel %d request_irq error " + "with return %d\n", id, err); + goto err_no_irq; + } + + /* CHCR register control function */ + new_sh_chan->set_chcr = dmae_set_chcr; + /* DMARS register control function */ + new_sh_chan->set_dmars = dmae_set_dmars; + + shdev->chan[id] = new_sh_chan; + return 0; + +err_no_irq: + /* remove from dmaengine device node */ + list_del(&new_sh_chan->common.device_node); + kfree(new_sh_chan); + return err; +} + +static void sh_dmae_chan_remove(struct sh_dmae_device *shdev) +{ + int i; + + for (i = shdev->common.chancnt - 1 ; i >= 0 ; i--) { + if (shdev->chan[i]) { + struct sh_dmae_chan *shchan = shdev->chan[i]; + if (!(shdev->pdata.mode & SHDMA_MIX_IRQ)) + free_irq(dmte_irq_map[i], shchan); + + list_del(&shchan->common.device_node); + kfree(shchan); + shdev->chan[i] = NULL; + } + } + shdev->common.chancnt = 0; +} + +static int __init sh_dmae_probe(struct platform_device *pdev) +{ + int err = 0, cnt, ecnt; + unsigned long irqflags = IRQF_DISABLED; +#if defined(CONFIG_CPU_SH4) + int eirq[] = { DMAE0_IRQ, +#if defined(DMAE1_IRQ) + DMAE1_IRQ +#endif + }; +#endif + struct sh_dmae_device *shdev; + + shdev = kzalloc(sizeof(struct sh_dmae_device), GFP_KERNEL); + if (!shdev) { + dev_err(&pdev->dev, "No enough memory\n"); + err = -ENOMEM; + goto shdev_err; + } + + /* get platform data */ + if (!pdev->dev.platform_data) + goto shdev_err; + + /* platform data */ + memcpy(&shdev->pdata, pdev->dev.platform_data, + sizeof(struct sh_dmae_pdata)); + + /* reset dma controller */ + err = sh_dmae_rst(0); + if (err) + goto rst_err; + + /* SH7780/85/23 has DMAOR1 */ + if (shdev->pdata.mode & SHDMA_DMAOR1) { + err = sh_dmae_rst(1); + if (err) + goto rst_err; + } + + INIT_LIST_HEAD(&shdev->common.channels); + + dma_cap_set(DMA_MEMCPY, shdev->common.cap_mask); + shdev->common.device_alloc_chan_resources + = sh_dmae_alloc_chan_resources; + shdev->common.device_free_chan_resources = sh_dmae_free_chan_resources; + shdev->common.device_prep_dma_memcpy = sh_dmae_prep_memcpy; + shdev->common.device_is_tx_complete = sh_dmae_is_complete; + shdev->common.device_issue_pending = sh_dmae_memcpy_issue_pending; + shdev->common.dev = &pdev->dev; + +#if defined(CONFIG_CPU_SH4) + /* Non Mix IRQ mode SH7722/SH7730 etc... */ + if (shdev->pdata.mode & SHDMA_MIX_IRQ) { + irqflags = IRQF_SHARED; + eirq[0] = DMTE0_IRQ; +#if defined(DMTE6_IRQ) && defined(DMAE1_IRQ) + eirq[1] = DMTE6_IRQ; +#endif + } + + for (ecnt = 0 ; ecnt < ARRAY_SIZE(eirq); ecnt++) { + err = request_irq(eirq[ecnt], sh_dmae_err, + irqflags, "DMAC Address Error", shdev); + if (err) { + dev_err(&pdev->dev, "DMA device request_irq" + "error (irq %d) with return %d\n", + eirq[ecnt], err); + goto eirq_err; + } + } +#endif /* CONFIG_CPU_SH4 */ + + /* Create DMA Channel */ + for (cnt = 0 ; cnt < MAX_DMA_CHANNELS ; cnt++) { + err = sh_dmae_chan_probe(shdev, cnt); + if (err) + goto chan_probe_err; + } + + platform_set_drvdata(pdev, shdev); + dma_async_device_register(&shdev->common); + + return err; + +chan_probe_err: + sh_dmae_chan_remove(shdev); + +eirq_err: + for (ecnt-- ; ecnt >= 0; ecnt--) + free_irq(eirq[ecnt], shdev); + +rst_err: + kfree(shdev); + +shdev_err: + return err; +} + +static int __exit sh_dmae_remove(struct platform_device *pdev) +{ + struct sh_dmae_device *shdev = platform_get_drvdata(pdev); + + dma_async_device_unregister(&shdev->common); + + if (shdev->pdata.mode & SHDMA_MIX_IRQ) { + free_irq(DMTE0_IRQ, shdev); +#if defined(DMTE6_IRQ) + free_irq(DMTE6_IRQ, shdev); +#endif + } + + /* channel data remove */ + sh_dmae_chan_remove(shdev); + + if (!(shdev->pdata.mode & SHDMA_MIX_IRQ)) { + free_irq(DMAE0_IRQ, shdev); +#if defined(DMAE1_IRQ) + free_irq(DMAE1_IRQ, shdev); +#endif + } + kfree(shdev); + + return 0; +} + +static void sh_dmae_shutdown(struct platform_device *pdev) +{ + struct sh_dmae_device *shdev = platform_get_drvdata(pdev); + sh_dmae_ctl_stop(0); + if (shdev->pdata.mode & SHDMA_DMAOR1) + sh_dmae_ctl_stop(1); +} + +static struct platform_driver sh_dmae_driver = { + .remove = __exit_p(sh_dmae_remove), + .shutdown = sh_dmae_shutdown, + .driver = { + .name = "sh-dma-engine", + }, +}; + +static int __init sh_dmae_init(void) +{ + return platform_driver_probe(&sh_dmae_driver, sh_dmae_probe); +} +module_init(sh_dmae_init); + +static void __exit sh_dmae_exit(void) +{ + platform_driver_unregister(&sh_dmae_driver); +} +module_exit(sh_dmae_exit); + +MODULE_AUTHOR("Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>"); +MODULE_DESCRIPTION("Renesas SH DMA Engine driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/dma/shdma.h b/drivers/dma/shdma.h new file mode 100644 index 00000000000..2b4bc15a2c0 --- /dev/null +++ b/drivers/dma/shdma.h @@ -0,0 +1,64 @@ +/* + * Renesas SuperH DMA Engine support + * + * Copyright (C) 2009 Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com> + * Copyright (C) 2009 Renesas Solutions, Inc. All rights reserved. + * + * This is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ +#ifndef __DMA_SHDMA_H +#define __DMA_SHDMA_H + +#include <linux/device.h> +#include <linux/dmapool.h> +#include <linux/dmaengine.h> + +#define SH_DMA_TCR_MAX 0x00FFFFFF /* 16MB */ + +struct sh_dmae_regs { + u32 sar; /* SAR / source address */ + u32 dar; /* DAR / destination address */ + u32 tcr; /* TCR / transfer count */ +}; + +struct sh_desc { + struct list_head tx_list; + struct sh_dmae_regs hw; + struct list_head node; + struct dma_async_tx_descriptor async_tx; + int mark; +}; + +struct sh_dmae_chan { + dma_cookie_t completed_cookie; /* The maximum cookie completed */ + spinlock_t desc_lock; /* Descriptor operation lock */ + struct list_head ld_queue; /* Link descriptors queue */ + struct list_head ld_free; /* Link descriptors free */ + struct dma_chan common; /* DMA common channel */ + struct device *dev; /* Channel device */ + struct tasklet_struct tasklet; /* Tasklet */ + int descs_allocated; /* desc count */ + int id; /* Raw id of this channel */ + char dev_id[16]; /* unique name per DMAC of channel */ + + /* Set chcr */ + int (*set_chcr)(struct sh_dmae_chan *sh_chan, u32 regs); + /* Set DMA resource */ + int (*set_dmars)(struct sh_dmae_chan *sh_chan, u16 res); +}; + +struct sh_dmae_device { + struct dma_device common; + struct sh_dmae_chan *chan[MAX_DMA_CHANNELS]; + struct sh_dmae_pdata pdata; +}; + +#define to_sh_chan(chan) container_of(chan, struct sh_dmae_chan, common) +#define to_sh_desc(lh) container_of(lh, struct sh_desc, node) +#define tx_to_sh_desc(tx) container_of(tx, struct sh_desc, async_tx) + +#endif /* __DMA_SHDMA_H */ diff --git a/drivers/dma/txx9dmac.c b/drivers/dma/txx9dmac.c index 7837930146a..fb6bb64e886 100644 --- a/drivers/dma/txx9dmac.c +++ b/drivers/dma/txx9dmac.c @@ -180,9 +180,8 @@ static struct txx9dmac_desc *txx9dmac_first_queued(struct txx9dmac_chan *dc) static struct txx9dmac_desc *txx9dmac_last_child(struct txx9dmac_desc *desc) { - if (!list_empty(&desc->txd.tx_list)) - desc = list_entry(desc->txd.tx_list.prev, - struct txx9dmac_desc, desc_node); + if (!list_empty(&desc->tx_list)) + desc = list_entry(desc->tx_list.prev, typeof(*desc), desc_node); return desc; } @@ -197,6 +196,7 @@ static struct txx9dmac_desc *txx9dmac_desc_alloc(struct txx9dmac_chan *dc, desc = kzalloc(sizeof(*desc), flags); if (!desc) return NULL; + INIT_LIST_HEAD(&desc->tx_list); dma_async_tx_descriptor_init(&desc->txd, &dc->chan); desc->txd.tx_submit = txx9dmac_tx_submit; /* txd.flags will be overwritten in prep funcs */ @@ -245,7 +245,7 @@ static void txx9dmac_sync_desc_for_cpu(struct txx9dmac_chan *dc, struct txx9dmac_dev *ddev = dc->ddev; struct txx9dmac_desc *child; - list_for_each_entry(child, &desc->txd.tx_list, desc_node) + list_for_each_entry(child, &desc->tx_list, desc_node) dma_sync_single_for_cpu(chan2parent(&dc->chan), child->txd.phys, ddev->descsize, DMA_TO_DEVICE); @@ -267,11 +267,11 @@ static void txx9dmac_desc_put(struct txx9dmac_chan *dc, txx9dmac_sync_desc_for_cpu(dc, desc); spin_lock_bh(&dc->lock); - list_for_each_entry(child, &desc->txd.tx_list, desc_node) + list_for_each_entry(child, &desc->tx_list, desc_node) dev_vdbg(chan2dev(&dc->chan), "moving child desc %p to freelist\n", child); - list_splice_init(&desc->txd.tx_list, &dc->free_list); + list_splice_init(&desc->tx_list, &dc->free_list); dev_vdbg(chan2dev(&dc->chan), "moving desc %p to freelist\n", desc); list_add(&desc->desc_node, &dc->free_list); @@ -429,7 +429,7 @@ txx9dmac_descriptor_complete(struct txx9dmac_chan *dc, param = txd->callback_param; txx9dmac_sync_desc_for_cpu(dc, desc); - list_splice_init(&txd->tx_list, &dc->free_list); + list_splice_init(&desc->tx_list, &dc->free_list); list_move(&desc->desc_node, &dc->free_list); if (!ds) { @@ -571,7 +571,7 @@ static void txx9dmac_handle_error(struct txx9dmac_chan *dc, u32 csr) "Bad descriptor submitted for DMA! (cookie: %d)\n", bad_desc->txd.cookie); txx9dmac_dump_desc(dc, &bad_desc->hwdesc); - list_for_each_entry(child, &bad_desc->txd.tx_list, desc_node) + list_for_each_entry(child, &bad_desc->tx_list, desc_node) txx9dmac_dump_desc(dc, &child->hwdesc); /* Pretend the descriptor completed successfully */ txx9dmac_descriptor_complete(dc, bad_desc); @@ -613,7 +613,7 @@ static void txx9dmac_scan_descriptors(struct txx9dmac_chan *dc) return; } - list_for_each_entry(child, &desc->txd.tx_list, desc_node) + list_for_each_entry(child, &desc->tx_list, desc_node) if (desc_read_CHAR(dc, child) == chain) { /* Currently in progress */ if (csr & TXX9_DMA_CSR_ABCHC) @@ -823,8 +823,7 @@ txx9dmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, dma_sync_single_for_device(chan2parent(&dc->chan), prev->txd.phys, ddev->descsize, DMA_TO_DEVICE); - list_add_tail(&desc->desc_node, - &first->txd.tx_list); + list_add_tail(&desc->desc_node, &first->tx_list); } prev = desc; } @@ -919,8 +918,7 @@ txx9dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, prev->txd.phys, ddev->descsize, DMA_TO_DEVICE); - list_add_tail(&desc->desc_node, - &first->txd.tx_list); + list_add_tail(&desc->desc_node, &first->tx_list); } prev = desc; } diff --git a/drivers/dma/txx9dmac.h b/drivers/dma/txx9dmac.h index c907ff01d27..365d42366b9 100644 --- a/drivers/dma/txx9dmac.h +++ b/drivers/dma/txx9dmac.h @@ -231,6 +231,7 @@ struct txx9dmac_desc { /* THEN values for driver housekeeping */ struct list_head desc_node ____cacheline_aligned; + struct list_head tx_list; struct dma_async_tx_descriptor txd; size_t len; }; diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index a3ca18e2d7c..02127e59fe8 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -133,6 +133,13 @@ config EDAC_I3000 Support for error detection and correction on the Intel 3000 and 3010 server chipsets. +config EDAC_I3200 + tristate "Intel 3200" + depends on EDAC_MM_EDAC && PCI && X86 && EXPERIMENTAL + help + Support for error detection and correction on the Intel + 3200 and 3210 server chipsets. + config EDAC_X38 tristate "Intel X38" depends on EDAC_MM_EDAC && PCI && X86 @@ -176,11 +183,11 @@ config EDAC_I5100 San Clemente MCH. config EDAC_MPC85XX - tristate "Freescale MPC85xx" - depends on EDAC_MM_EDAC && FSL_SOC && MPC85xx + tristate "Freescale MPC83xx / MPC85xx" + depends on EDAC_MM_EDAC && FSL_SOC && (PPC_83xx || MPC85xx) help Support for error detection and correction on the Freescale - MPC8560, MPC8540, MPC8548 + MPC8349, MPC8560, MPC8540, MPC8548 config EDAC_MV64X60 tristate "Marvell MV64x60" diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index cfa033ce53a..7a473bbe8ab 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -32,6 +32,7 @@ obj-$(CONFIG_EDAC_I82443BXGX) += i82443bxgx_edac.o obj-$(CONFIG_EDAC_I82875P) += i82875p_edac.o obj-$(CONFIG_EDAC_I82975X) += i82975x_edac.o obj-$(CONFIG_EDAC_I3000) += i3000_edac.o +obj-$(CONFIG_EDAC_I3200) += i3200_edac.o obj-$(CONFIG_EDAC_X38) += x38_edac.o obj-$(CONFIG_EDAC_I82860) += i82860_edac.o obj-$(CONFIG_EDAC_R82600) += r82600_edac.o @@ -49,3 +50,4 @@ obj-$(CONFIG_EDAC_CELL) += cell_edac.o obj-$(CONFIG_EDAC_PPC4XX) += ppc4xx_edac.o obj-$(CONFIG_EDAC_AMD8111) += amd8111_edac.o obj-$(CONFIG_EDAC_AMD8131) += amd8131_edac.o + diff --git a/drivers/edac/cpc925_edac.c b/drivers/edac/cpc925_edac.c index 8c54196b5ab..3d50274f134 100644 --- a/drivers/edac/cpc925_edac.c +++ b/drivers/edac/cpc925_edac.c @@ -885,14 +885,14 @@ static int __devinit cpc925_probe(struct platform_device *pdev) if (!devm_request_mem_region(&pdev->dev, r->start, - r->end - r->start + 1, + resource_size(r), pdev->name)) { cpc925_printk(KERN_ERR, "Unable to request mem region\n"); res = -EBUSY; goto err1; } - vbase = devm_ioremap(&pdev->dev, r->start, r->end - r->start + 1); + vbase = devm_ioremap(&pdev->dev, r->start, resource_size(r)); if (!vbase) { cpc925_printk(KERN_ERR, "Unable to ioremap device\n"); res = -ENOMEM; @@ -953,7 +953,7 @@ err3: cpc925_mc_exit(mci); edac_mc_free(mci); err2: - devm_release_mem_region(&pdev->dev, r->start, r->end-r->start+1); + devm_release_mem_region(&pdev->dev, r->start, resource_size(r)); err1: devres_release_group(&pdev->dev, cpc925_probe); out: diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c index b02a6a69a8f..d5e13c94714 100644 --- a/drivers/edac/edac_device.c +++ b/drivers/edac/edac_device.c @@ -356,7 +356,6 @@ static void complete_edac_device_list_del(struct rcu_head *head) edac_dev = container_of(head, struct edac_device_ctl_info, rcu); INIT_LIST_HEAD(&edac_dev->link); - complete(&edac_dev->removal_complete); } /* @@ -369,10 +368,8 @@ static void del_edac_device_from_global_list(struct edac_device_ctl_info *edac_device) { list_del_rcu(&edac_device->link); - - init_completion(&edac_device->removal_complete); call_rcu(&edac_device->rcu, complete_edac_device_list_del); - wait_for_completion(&edac_device->removal_complete); + rcu_barrier(); } /* diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 335b7ebdb11..b629c41756f 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -418,16 +418,14 @@ static void complete_mc_list_del(struct rcu_head *head) mci = container_of(head, struct mem_ctl_info, rcu); INIT_LIST_HEAD(&mci->link); - complete(&mci->complete); } static void del_mc_from_global_list(struct mem_ctl_info *mci) { atomic_dec(&edac_handlers); list_del_rcu(&mci->link); - init_completion(&mci->complete); call_rcu(&mci->rcu, complete_mc_list_del); - wait_for_completion(&mci->complete); + rcu_barrier(); } /** diff --git a/drivers/edac/edac_pci.c b/drivers/edac/edac_pci.c index 30b585b1d60..efb5d565078 100644 --- a/drivers/edac/edac_pci.c +++ b/drivers/edac/edac_pci.c @@ -174,7 +174,6 @@ static void complete_edac_pci_list_del(struct rcu_head *head) pci = container_of(head, struct edac_pci_ctl_info, rcu); INIT_LIST_HEAD(&pci->link); - complete(&pci->complete); } /* @@ -185,9 +184,8 @@ static void complete_edac_pci_list_del(struct rcu_head *head) static void del_edac_pci_from_global_list(struct edac_pci_ctl_info *pci) { list_del_rcu(&pci->link); - init_completion(&pci->complete); call_rcu(&pci->rcu, complete_edac_pci_list_del); - wait_for_completion(&pci->complete); + rcu_barrier(); } #if 0 diff --git a/drivers/edac/i3200_edac.c b/drivers/edac/i3200_edac.c new file mode 100644 index 00000000000..fde4db91c4d --- /dev/null +++ b/drivers/edac/i3200_edac.c @@ -0,0 +1,527 @@ +/* + * Intel 3200/3210 Memory Controller kernel module + * Copyright (C) 2008-2009 Akamai Technologies, Inc. + * Portions by Hitoshi Mitake <h.mitake@gmail.com>. + * + * This file may be distributed under the terms of the + * GNU General Public License. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/pci.h> +#include <linux/pci_ids.h> +#include <linux/slab.h> +#include <linux/edac.h> +#include <linux/io.h> +#include "edac_core.h" + +#define I3200_REVISION "1.1" + +#define EDAC_MOD_STR "i3200_edac" + +#define PCI_DEVICE_ID_INTEL_3200_HB 0x29f0 + +#define I3200_RANKS 8 +#define I3200_RANKS_PER_CHANNEL 4 +#define I3200_CHANNELS 2 + +/* Intel 3200 register addresses - device 0 function 0 - DRAM Controller */ + +#define I3200_MCHBAR_LOW 0x48 /* MCH Memory Mapped Register BAR */ +#define I3200_MCHBAR_HIGH 0x4c +#define I3200_MCHBAR_MASK 0xfffffc000ULL /* bits 35:14 */ +#define I3200_MMR_WINDOW_SIZE 16384 + +#define I3200_TOM 0xa0 /* Top of Memory (16b) + * + * 15:10 reserved + * 9:0 total populated physical memory + */ +#define I3200_TOM_MASK 0x3ff /* bits 9:0 */ +#define I3200_TOM_SHIFT 26 /* 64MiB grain */ + +#define I3200_ERRSTS 0xc8 /* Error Status Register (16b) + * + * 15 reserved + * 14 Isochronous TBWRR Run Behind FIFO Full + * (ITCV) + * 13 Isochronous TBWRR Run Behind FIFO Put + * (ITSTV) + * 12 reserved + * 11 MCH Thermal Sensor Event + * for SMI/SCI/SERR (GTSE) + * 10 reserved + * 9 LOCK to non-DRAM Memory Flag (LCKF) + * 8 reserved + * 7 DRAM Throttle Flag (DTF) + * 6:2 reserved + * 1 Multi-bit DRAM ECC Error Flag (DMERR) + * 0 Single-bit DRAM ECC Error Flag (DSERR) + */ +#define I3200_ERRSTS_UE 0x0002 +#define I3200_ERRSTS_CE 0x0001 +#define I3200_ERRSTS_BITS (I3200_ERRSTS_UE | I3200_ERRSTS_CE) + + +/* Intel MMIO register space - device 0 function 0 - MMR space */ + +#define I3200_C0DRB 0x200 /* Channel 0 DRAM Rank Boundary (16b x 4) + * + * 15:10 reserved + * 9:0 Channel 0 DRAM Rank Boundary Address + */ +#define I3200_C1DRB 0x600 /* Channel 1 DRAM Rank Boundary (16b x 4) */ +#define I3200_DRB_MASK 0x3ff /* bits 9:0 */ +#define I3200_DRB_SHIFT 26 /* 64MiB grain */ + +#define I3200_C0ECCERRLOG 0x280 /* Channel 0 ECC Error Log (64b) + * + * 63:48 Error Column Address (ERRCOL) + * 47:32 Error Row Address (ERRROW) + * 31:29 Error Bank Address (ERRBANK) + * 28:27 Error Rank Address (ERRRANK) + * 26:24 reserved + * 23:16 Error Syndrome (ERRSYND) + * 15: 2 reserved + * 1 Multiple Bit Error Status (MERRSTS) + * 0 Correctable Error Status (CERRSTS) + */ +#define I3200_C1ECCERRLOG 0x680 /* Chan 1 ECC Error Log (64b) */ +#define I3200_ECCERRLOG_CE 0x1 +#define I3200_ECCERRLOG_UE 0x2 +#define I3200_ECCERRLOG_RANK_BITS 0x18000000 +#define I3200_ECCERRLOG_RANK_SHIFT 27 +#define I3200_ECCERRLOG_SYNDROME_BITS 0xff0000 +#define I3200_ECCERRLOG_SYNDROME_SHIFT 16 +#define I3200_CAPID0 0xe0 /* P.95 of spec for details */ + +struct i3200_priv { + void __iomem *window; +}; + +static int nr_channels; + +static int how_many_channels(struct pci_dev *pdev) +{ + unsigned char capid0_8b; /* 8th byte of CAPID0 */ + + pci_read_config_byte(pdev, I3200_CAPID0 + 8, &capid0_8b); + if (capid0_8b & 0x20) { /* check DCD: Dual Channel Disable */ + debugf0("In single channel mode.\n"); + return 1; + } else { + debugf0("In dual channel mode.\n"); + return 2; + } +} + +static unsigned long eccerrlog_syndrome(u64 log) +{ + return (log & I3200_ECCERRLOG_SYNDROME_BITS) >> + I3200_ECCERRLOG_SYNDROME_SHIFT; +} + +static int eccerrlog_row(int channel, u64 log) +{ + u64 rank = ((log & I3200_ECCERRLOG_RANK_BITS) >> + I3200_ECCERRLOG_RANK_SHIFT); + return rank | (channel * I3200_RANKS_PER_CHANNEL); +} + +enum i3200_chips { + I3200 = 0, +}; + +struct i3200_dev_info { + const char *ctl_name; +}; + +struct i3200_error_info { + u16 errsts; + u16 errsts2; + u64 eccerrlog[I3200_CHANNELS]; +}; + +static const struct i3200_dev_info i3200_devs[] = { + [I3200] = { + .ctl_name = "i3200" + }, +}; + +static struct pci_dev *mci_pdev; +static int i3200_registered = 1; + + +static void i3200_clear_error_info(struct mem_ctl_info *mci) +{ + struct pci_dev *pdev; + + pdev = to_pci_dev(mci->dev); + + /* + * Clear any error bits. + * (Yes, we really clear bits by writing 1 to them.) + */ + pci_write_bits16(pdev, I3200_ERRSTS, I3200_ERRSTS_BITS, + I3200_ERRSTS_BITS); +} + +static void i3200_get_and_clear_error_info(struct mem_ctl_info *mci, + struct i3200_error_info *info) +{ + struct pci_dev *pdev; + struct i3200_priv *priv = mci->pvt_info; + void __iomem *window = priv->window; + + pdev = to_pci_dev(mci->dev); + + /* + * This is a mess because there is no atomic way to read all the + * registers at once and the registers can transition from CE being + * overwritten by UE. + */ + pci_read_config_word(pdev, I3200_ERRSTS, &info->errsts); + if (!(info->errsts & I3200_ERRSTS_BITS)) + return; + + info->eccerrlog[0] = readq(window + I3200_C0ECCERRLOG); + if (nr_channels == 2) + info->eccerrlog[1] = readq(window + I3200_C1ECCERRLOG); + + pci_read_config_word(pdev, I3200_ERRSTS, &info->errsts2); + + /* + * If the error is the same for both reads then the first set + * of reads is valid. If there is a change then there is a CE + * with no info and the second set of reads is valid and + * should be UE info. + */ + if ((info->errsts ^ info->errsts2) & I3200_ERRSTS_BITS) { + info->eccerrlog[0] = readq(window + I3200_C0ECCERRLOG); + if (nr_channels == 2) + info->eccerrlog[1] = readq(window + I3200_C1ECCERRLOG); + } + + i3200_clear_error_info(mci); +} + +static void i3200_process_error_info(struct mem_ctl_info *mci, + struct i3200_error_info *info) +{ + int channel; + u64 log; + + if (!(info->errsts & I3200_ERRSTS_BITS)) + return; + + if ((info->errsts ^ info->errsts2) & I3200_ERRSTS_BITS) { + edac_mc_handle_ce_no_info(mci, "UE overwrote CE"); + info->errsts = info->errsts2; + } + + for (channel = 0; channel < nr_channels; channel++) { + log = info->eccerrlog[channel]; + if (log & I3200_ECCERRLOG_UE) { + edac_mc_handle_ue(mci, 0, 0, + eccerrlog_row(channel, log), + "i3200 UE"); + } else if (log & I3200_ECCERRLOG_CE) { + edac_mc_handle_ce(mci, 0, 0, + eccerrlog_syndrome(log), + eccerrlog_row(channel, log), 0, + "i3200 CE"); + } + } +} + +static void i3200_check(struct mem_ctl_info *mci) +{ + struct i3200_error_info info; + + debugf1("MC%d: %s()\n", mci->mc_idx, __func__); + i3200_get_and_clear_error_info(mci, &info); + i3200_process_error_info(mci, &info); +} + + +void __iomem *i3200_map_mchbar(struct pci_dev *pdev) +{ + union { + u64 mchbar; + struct { + u32 mchbar_low; + u32 mchbar_high; + }; + } u; + void __iomem *window; + + pci_read_config_dword(pdev, I3200_MCHBAR_LOW, &u.mchbar_low); + pci_read_config_dword(pdev, I3200_MCHBAR_HIGH, &u.mchbar_high); + u.mchbar &= I3200_MCHBAR_MASK; + + if (u.mchbar != (resource_size_t)u.mchbar) { + printk(KERN_ERR + "i3200: mmio space beyond accessible range (0x%llx)\n", + (unsigned long long)u.mchbar); + return NULL; + } + + window = ioremap_nocache(u.mchbar, I3200_MMR_WINDOW_SIZE); + if (!window) + printk(KERN_ERR "i3200: cannot map mmio space at 0x%llx\n", + (unsigned long long)u.mchbar); + + return window; +} + + +static void i3200_get_drbs(void __iomem *window, + u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL]) +{ + int i; + + for (i = 0; i < I3200_RANKS_PER_CHANNEL; i++) { + drbs[0][i] = readw(window + I3200_C0DRB + 2*i) & I3200_DRB_MASK; + drbs[1][i] = readw(window + I3200_C1DRB + 2*i) & I3200_DRB_MASK; + } +} + +static bool i3200_is_stacked(struct pci_dev *pdev, + u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL]) +{ + u16 tom; + + pci_read_config_word(pdev, I3200_TOM, &tom); + tom &= I3200_TOM_MASK; + + return drbs[I3200_CHANNELS - 1][I3200_RANKS_PER_CHANNEL - 1] == tom; +} + +static unsigned long drb_to_nr_pages( + u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL], bool stacked, + int channel, int rank) +{ + int n; + + n = drbs[channel][rank]; + if (rank > 0) + n -= drbs[channel][rank - 1]; + if (stacked && (channel == 1) && + drbs[channel][rank] == drbs[channel][I3200_RANKS_PER_CHANNEL - 1]) + n -= drbs[0][I3200_RANKS_PER_CHANNEL - 1]; + + n <<= (I3200_DRB_SHIFT - PAGE_SHIFT); + return n; +} + +static int i3200_probe1(struct pci_dev *pdev, int dev_idx) +{ + int rc; + int i; + struct mem_ctl_info *mci = NULL; + unsigned long last_page; + u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL]; + bool stacked; + void __iomem *window; + struct i3200_priv *priv; + + debugf0("MC: %s()\n", __func__); + + window = i3200_map_mchbar(pdev); + if (!window) + return -ENODEV; + + i3200_get_drbs(window, drbs); + nr_channels = how_many_channels(pdev); + + mci = edac_mc_alloc(sizeof(struct i3200_priv), I3200_RANKS, + nr_channels, 0); + if (!mci) + return -ENOMEM; + + debugf3("MC: %s(): init mci\n", __func__); + + mci->dev = &pdev->dev; + mci->mtype_cap = MEM_FLAG_DDR2; + + mci->edac_ctl_cap = EDAC_FLAG_SECDED; + mci->edac_cap = EDAC_FLAG_SECDED; + + mci->mod_name = EDAC_MOD_STR; + mci->mod_ver = I3200_REVISION; + mci->ctl_name = i3200_devs[dev_idx].ctl_name; + mci->dev_name = pci_name(pdev); + mci->edac_check = i3200_check; + mci->ctl_page_to_phys = NULL; + priv = mci->pvt_info; + priv->window = window; + + stacked = i3200_is_stacked(pdev, drbs); + + /* + * The dram rank boundary (DRB) reg values are boundary addresses + * for each DRAM rank with a granularity of 64MB. DRB regs are + * cumulative; the last one will contain the total memory + * contained in all ranks. + */ + last_page = -1UL; + for (i = 0; i < mci->nr_csrows; i++) { + unsigned long nr_pages; + struct csrow_info *csrow = &mci->csrows[i]; + + nr_pages = drb_to_nr_pages(drbs, stacked, + i / I3200_RANKS_PER_CHANNEL, + i % I3200_RANKS_PER_CHANNEL); + + if (nr_pages == 0) { + csrow->mtype = MEM_EMPTY; + continue; + } + + csrow->first_page = last_page + 1; + last_page += nr_pages; + csrow->last_page = last_page; + csrow->nr_pages = nr_pages; + + csrow->grain = nr_pages << PAGE_SHIFT; + csrow->mtype = MEM_DDR2; + csrow->dtype = DEV_UNKNOWN; + csrow->edac_mode = EDAC_UNKNOWN; + } + + i3200_clear_error_info(mci); + + rc = -ENODEV; + if (edac_mc_add_mc(mci)) { + debugf3("MC: %s(): failed edac_mc_add_mc()\n", __func__); + goto fail; + } + + /* get this far and it's successful */ + debugf3("MC: %s(): success\n", __func__); + return 0; + +fail: + iounmap(window); + if (mci) + edac_mc_free(mci); + + return rc; +} + +static int __devinit i3200_init_one(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + int rc; + + debugf0("MC: %s()\n", __func__); + + if (pci_enable_device(pdev) < 0) + return -EIO; + + rc = i3200_probe1(pdev, ent->driver_data); + if (!mci_pdev) + mci_pdev = pci_dev_get(pdev); + + return rc; +} + +static void __devexit i3200_remove_one(struct pci_dev *pdev) +{ + struct mem_ctl_info *mci; + struct i3200_priv *priv; + + debugf0("%s()\n", __func__); + + mci = edac_mc_del_mc(&pdev->dev); + if (!mci) + return; + + priv = mci->pvt_info; + iounmap(priv->window); + + edac_mc_free(mci); +} + +static const struct pci_device_id i3200_pci_tbl[] __devinitdata = { + { + PCI_VEND_DEV(INTEL, 3200_HB), PCI_ANY_ID, PCI_ANY_ID, 0, 0, + I3200}, + { + 0, + } /* 0 terminated list. */ +}; + +MODULE_DEVICE_TABLE(pci, i3200_pci_tbl); + +static struct pci_driver i3200_driver = { + .name = EDAC_MOD_STR, + .probe = i3200_init_one, + .remove = __devexit_p(i3200_remove_one), + .id_table = i3200_pci_tbl, +}; + +static int __init i3200_init(void) +{ + int pci_rc; + + debugf3("MC: %s()\n", __func__); + + /* Ensure that the OPSTATE is set correctly for POLL or NMI */ + opstate_init(); + + pci_rc = pci_register_driver(&i3200_driver); + if (pci_rc < 0) + goto fail0; + + if (!mci_pdev) { + i3200_registered = 0; + mci_pdev = pci_get_device(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_3200_HB, NULL); + if (!mci_pdev) { + debugf0("i3200 pci_get_device fail\n"); + pci_rc = -ENODEV; + goto fail1; + } + + pci_rc = i3200_init_one(mci_pdev, i3200_pci_tbl); + if (pci_rc < 0) { + debugf0("i3200 init fail\n"); + pci_rc = -ENODEV; + goto fail1; + } + } + + return 0; + +fail1: + pci_unregister_driver(&i3200_driver); + +fail0: + if (mci_pdev) + pci_dev_put(mci_pdev); + + return pci_rc; +} + +static void __exit i3200_exit(void) +{ + debugf3("MC: %s()\n", __func__); + + pci_unregister_driver(&i3200_driver); + if (!i3200_registered) { + i3200_remove_one(mci_pdev); + pci_dev_put(mci_pdev); + } +} + +module_init(i3200_init); +module_exit(i3200_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Akamai Technologies, Inc."); +MODULE_DESCRIPTION("MC support for Intel 3200 memory hub controllers"); + +module_param(edac_op_state, int, 0444); +MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); diff --git a/drivers/edac/mpc85xx_edac.c b/drivers/edac/mpc85xx_edac.c index 3f2ccfc6407..157f6504f25 100644 --- a/drivers/edac/mpc85xx_edac.c +++ b/drivers/edac/mpc85xx_edac.c @@ -41,7 +41,9 @@ static u32 orig_pci_err_en; #endif static u32 orig_l2_err_disable; +#ifdef CONFIG_MPC85xx static u32 orig_hid1[2]; +#endif /************************ MC SYSFS parts ***********************************/ @@ -646,6 +648,7 @@ static struct of_device_id mpc85xx_l2_err_of_match[] = { { .compatible = "fsl,mpc8560-l2-cache-controller", }, { .compatible = "fsl,mpc8568-l2-cache-controller", }, { .compatible = "fsl,mpc8572-l2-cache-controller", }, + { .compatible = "fsl,p2020-l2-cache-controller", }, {}, }; @@ -788,19 +791,20 @@ static void __devinit mpc85xx_init_csrows(struct mem_ctl_info *mci) csrow = &mci->csrows[index]; cs_bnds = in_be32(pdata->mc_vbase + MPC85XX_MC_CS_BNDS_0 + (index * MPC85XX_MC_CS_BNDS_OFS)); - start = (cs_bnds & 0xfff0000) << 4; - end = ((cs_bnds & 0xfff) << 20); - if (start) - start |= 0xfffff; - if (end) - end |= 0xfffff; + + start = (cs_bnds & 0xffff0000) >> 16; + end = (cs_bnds & 0x0000ffff); if (start == end) continue; /* not populated */ + start <<= (24 - PAGE_SHIFT); + end <<= (24 - PAGE_SHIFT); + end |= (1 << (24 - PAGE_SHIFT)) - 1; + csrow->first_page = start >> PAGE_SHIFT; csrow->last_page = end >> PAGE_SHIFT; - csrow->nr_pages = csrow->last_page + 1 - csrow->first_page; + csrow->nr_pages = end + 1 - start; csrow->grain = 8; csrow->mtype = mtype; csrow->dtype = DEV_UNKNOWN; @@ -984,6 +988,8 @@ static struct of_device_id mpc85xx_mc_err_of_match[] = { { .compatible = "fsl,mpc8560-memory-controller", }, { .compatible = "fsl,mpc8568-memory-controller", }, { .compatible = "fsl,mpc8572-memory-controller", }, + { .compatible = "fsl,mpc8349-memory-controller", }, + { .compatible = "fsl,p2020-memory-controller", }, {}, }; @@ -999,13 +1005,13 @@ static struct of_platform_driver mpc85xx_mc_err_driver = { }, }; - +#ifdef CONFIG_MPC85xx static void __init mpc85xx_mc_clear_rfxe(void *data) { orig_hid1[smp_processor_id()] = mfspr(SPRN_HID1); mtspr(SPRN_HID1, (orig_hid1[smp_processor_id()] & ~0x20000)); } - +#endif static int __init mpc85xx_mc_init(void) { @@ -1038,26 +1044,32 @@ static int __init mpc85xx_mc_init(void) printk(KERN_WARNING EDAC_MOD_STR "PCI fails to register\n"); #endif +#ifdef CONFIG_MPC85xx /* * need to clear HID1[RFXE] to disable machine check int * so we can catch it */ if (edac_op_state == EDAC_OPSTATE_INT) on_each_cpu(mpc85xx_mc_clear_rfxe, NULL, 0); +#endif return 0; } module_init(mpc85xx_mc_init); +#ifdef CONFIG_MPC85xx static void __exit mpc85xx_mc_restore_hid1(void *data) { mtspr(SPRN_HID1, orig_hid1[smp_processor_id()]); } +#endif static void __exit mpc85xx_mc_exit(void) { +#ifdef CONFIG_MPC85xx on_each_cpu(mpc85xx_mc_restore_hid1, NULL, 0); +#endif #ifdef CONFIG_PCI of_unregister_platform_driver(&mpc85xx_pci_err_driver); #endif diff --git a/drivers/edac/mv64x60_edac.c b/drivers/edac/mv64x60_edac.c index 5131aaae8e0..a6b9fec13a7 100644 --- a/drivers/edac/mv64x60_edac.c +++ b/drivers/edac/mv64x60_edac.c @@ -90,7 +90,7 @@ static int __init mv64x60_pci_fixup(struct platform_device *pdev) return -ENOENT; } - pci_serr = ioremap(r->start, r->end - r->start + 1); + pci_serr = ioremap(r->start, resource_size(r)); if (!pci_serr) return -ENOMEM; @@ -140,7 +140,7 @@ static int __devinit mv64x60_pci_err_probe(struct platform_device *pdev) if (!devm_request_mem_region(&pdev->dev, r->start, - r->end - r->start + 1, + resource_size(r), pdata->name)) { printk(KERN_ERR "%s: Error while requesting mem region\n", __func__); @@ -150,7 +150,7 @@ static int __devinit mv64x60_pci_err_probe(struct platform_device *pdev) pdata->pci_vbase = devm_ioremap(&pdev->dev, r->start, - r->end - r->start + 1); + resource_size(r)); if (!pdata->pci_vbase) { printk(KERN_ERR "%s: Unable to setup PCI err regs\n", __func__); res = -ENOMEM; @@ -306,7 +306,7 @@ static int __devinit mv64x60_sram_err_probe(struct platform_device *pdev) if (!devm_request_mem_region(&pdev->dev, r->start, - r->end - r->start + 1, + resource_size(r), pdata->name)) { printk(KERN_ERR "%s: Error while request mem region\n", __func__); @@ -316,7 +316,7 @@ static int __devinit mv64x60_sram_err_probe(struct platform_device *pdev) pdata->sram_vbase = devm_ioremap(&pdev->dev, r->start, - r->end - r->start + 1); + resource_size(r)); if (!pdata->sram_vbase) { printk(KERN_ERR "%s: Unable to setup SRAM err regs\n", __func__); @@ -474,7 +474,7 @@ static int __devinit mv64x60_cpu_err_probe(struct platform_device *pdev) if (!devm_request_mem_region(&pdev->dev, r->start, - r->end - r->start + 1, + resource_size(r), pdata->name)) { printk(KERN_ERR "%s: Error while requesting mem region\n", __func__); @@ -484,7 +484,7 @@ static int __devinit mv64x60_cpu_err_probe(struct platform_device *pdev) pdata->cpu_vbase[0] = devm_ioremap(&pdev->dev, r->start, - r->end - r->start + 1); + resource_size(r)); if (!pdata->cpu_vbase[0]) { printk(KERN_ERR "%s: Unable to setup CPU err regs\n", __func__); res = -ENOMEM; @@ -501,7 +501,7 @@ static int __devinit mv64x60_cpu_err_probe(struct platform_device *pdev) if (!devm_request_mem_region(&pdev->dev, r->start, - r->end - r->start + 1, + resource_size(r), pdata->name)) { printk(KERN_ERR "%s: Error while requesting mem region\n", __func__); @@ -511,7 +511,7 @@ static int __devinit mv64x60_cpu_err_probe(struct platform_device *pdev) pdata->cpu_vbase[1] = devm_ioremap(&pdev->dev, r->start, - r->end - r->start + 1); + resource_size(r)); if (!pdata->cpu_vbase[1]) { printk(KERN_ERR "%s: Unable to setup CPU err regs\n", __func__); res = -ENOMEM; @@ -726,7 +726,7 @@ static int __devinit mv64x60_mc_err_probe(struct platform_device *pdev) if (!devm_request_mem_region(&pdev->dev, r->start, - r->end - r->start + 1, + resource_size(r), pdata->name)) { printk(KERN_ERR "%s: Error while requesting mem region\n", __func__); @@ -736,7 +736,7 @@ static int __devinit mv64x60_mc_err_probe(struct platform_device *pdev) pdata->mc_vbase = devm_ioremap(&pdev->dev, r->start, - r->end - r->start + 1); + resource_size(r)); if (!pdata->mc_vbase) { printk(KERN_ERR "%s: Unable to setup MC err regs\n", __func__); res = -ENOMEM; diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index e4d971c8b9d..f831ea15929 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -102,6 +102,7 @@ config DRM_I915 select BACKLIGHT_CLASS_DEVICE if ACPI select INPUT if ACPI select ACPI_VIDEO if ACPI + select ACPI_BUTTON if ACPI help Choose this option if you have a system that has Intel 830M, 845G, 852GM, 855GM 865G or 915G integrated graphics. If M is selected, the diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 230c9ffdd5e..80391995bde 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -142,6 +142,19 @@ drm_gem_object_alloc(struct drm_device *dev, size_t size) if (IS_ERR(obj->filp)) goto free; + /* Basically we want to disable the OOM killer and handle ENOMEM + * ourselves by sacrificing pages from cached buffers. + * XXX shmem_file_[gs]et_gfp_mask() + */ + mapping_set_gfp_mask(obj->filp->f_path.dentry->d_inode->i_mapping, + GFP_HIGHUSER | + __GFP_COLD | + __GFP_FS | + __GFP_RECLAIMABLE | + __GFP_NORETRY | + __GFP_NOWARN | + __GFP_NOMEMALLOC); + kref_init(&obj->refcount); kref_init(&obj->handlecount); obj->size = size; diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 5269dfa5f62..fa7b9be096b 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -9,6 +9,7 @@ i915-y := i915_drv.o i915_dma.o i915_irq.o i915_mem.o \ i915_gem.o \ i915_gem_debug.o \ i915_gem_tiling.o \ + i915_trace_points.o \ intel_display.o \ intel_crt.o \ intel_lvds.o \ diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 1e3bdcee863..f8ce9a3a420 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -96,11 +96,13 @@ static int i915_gem_object_list_info(struct seq_file *m, void *data) { struct drm_gem_object *obj = obj_priv->obj; - seq_printf(m, " %p: %s %08x %08x %d", + seq_printf(m, " %p: %s %8zd %08x %08x %d %s", obj, get_pin_flag(obj_priv), + obj->size, obj->read_domains, obj->write_domain, - obj_priv->last_rendering_seqno); + obj_priv->last_rendering_seqno, + obj_priv->dirty ? "dirty" : ""); if (obj->name) seq_printf(m, " (name: %d)", obj->name); diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 5a49a1867b3..45d507ebd3f 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -33,6 +33,7 @@ #include "intel_drv.h" #include "i915_drm.h" #include "i915_drv.h" +#include "i915_trace.h" #include <linux/vgaarb.h> /* Really want an OS-independent resettable timer. Would like to have @@ -50,14 +51,18 @@ int i915_wait_ring(struct drm_device * dev, int n, const char *caller) u32 last_head = I915_READ(PRB0_HEAD) & HEAD_ADDR; int i; + trace_i915_ring_wait_begin (dev); + for (i = 0; i < 100000; i++) { ring->head = I915_READ(PRB0_HEAD) & HEAD_ADDR; acthd = I915_READ(acthd_reg); ring->space = ring->head - (ring->tail + 8); if (ring->space < 0) ring->space += ring->Size; - if (ring->space >= n) + if (ring->space >= n) { + trace_i915_ring_wait_end (dev); return 0; + } if (dev->primary->master) { struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv; @@ -77,6 +82,7 @@ int i915_wait_ring(struct drm_device * dev, int n, const char *caller) } + trace_i915_ring_wait_end (dev); return -EBUSY; } @@ -922,7 +928,8 @@ static int i915_get_bridge_dev(struct drm_device *dev) * how much was set aside so we can use it for our own purposes. */ static int i915_probe_agp(struct drm_device *dev, uint32_t *aperture_size, - uint32_t *preallocated_size) + uint32_t *preallocated_size, + uint32_t *start) { struct drm_i915_private *dev_priv = dev->dev_private; u16 tmp = 0; @@ -1009,10 +1016,159 @@ static int i915_probe_agp(struct drm_device *dev, uint32_t *aperture_size, return -1; } *preallocated_size = stolen - overhead; + *start = overhead; return 0; } +#define PTE_ADDRESS_MASK 0xfffff000 +#define PTE_ADDRESS_MASK_HIGH 0x000000f0 /* i915+ */ +#define PTE_MAPPING_TYPE_UNCACHED (0 << 1) +#define PTE_MAPPING_TYPE_DCACHE (1 << 1) /* i830 only */ +#define PTE_MAPPING_TYPE_CACHED (3 << 1) +#define PTE_MAPPING_TYPE_MASK (3 << 1) +#define PTE_VALID (1 << 0) + +/** + * i915_gtt_to_phys - take a GTT address and turn it into a physical one + * @dev: drm device + * @gtt_addr: address to translate + * + * Some chip functions require allocations from stolen space but need the + * physical address of the memory in question. We use this routine + * to get a physical address suitable for register programming from a given + * GTT address. + */ +static unsigned long i915_gtt_to_phys(struct drm_device *dev, + unsigned long gtt_addr) +{ + unsigned long *gtt; + unsigned long entry, phys; + int gtt_bar = IS_I9XX(dev) ? 0 : 1; + int gtt_offset, gtt_size; + + if (IS_I965G(dev)) { + if (IS_G4X(dev) || IS_IGDNG(dev)) { + gtt_offset = 2*1024*1024; + gtt_size = 2*1024*1024; + } else { + gtt_offset = 512*1024; + gtt_size = 512*1024; + } + } else { + gtt_bar = 3; + gtt_offset = 0; + gtt_size = pci_resource_len(dev->pdev, gtt_bar); + } + + gtt = ioremap_wc(pci_resource_start(dev->pdev, gtt_bar) + gtt_offset, + gtt_size); + if (!gtt) { + DRM_ERROR("ioremap of GTT failed\n"); + return 0; + } + + entry = *(volatile u32 *)(gtt + (gtt_addr / 1024)); + + DRM_DEBUG("GTT addr: 0x%08lx, PTE: 0x%08lx\n", gtt_addr, entry); + + /* Mask out these reserved bits on this hardware. */ + if (!IS_I9XX(dev) || IS_I915G(dev) || IS_I915GM(dev) || + IS_I945G(dev) || IS_I945GM(dev)) { + entry &= ~PTE_ADDRESS_MASK_HIGH; + } + + /* If it's not a mapping type we know, then bail. */ + if ((entry & PTE_MAPPING_TYPE_MASK) != PTE_MAPPING_TYPE_UNCACHED && + (entry & PTE_MAPPING_TYPE_MASK) != PTE_MAPPING_TYPE_CACHED) { + iounmap(gtt); + return 0; + } + + if (!(entry & PTE_VALID)) { + DRM_ERROR("bad GTT entry in stolen space\n"); + iounmap(gtt); + return 0; + } + + iounmap(gtt); + + phys =(entry & PTE_ADDRESS_MASK) | + ((uint64_t)(entry & PTE_ADDRESS_MASK_HIGH) << (32 - 4)); + + DRM_DEBUG("GTT addr: 0x%08lx, phys addr: 0x%08lx\n", gtt_addr, phys); + + return phys; +} + +static void i915_warn_stolen(struct drm_device *dev) +{ + DRM_ERROR("not enough stolen space for compressed buffer, disabling\n"); + DRM_ERROR("hint: you may be able to increase stolen memory size in the BIOS to avoid this\n"); +} + +static void i915_setup_compression(struct drm_device *dev, int size) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_mm_node *compressed_fb, *compressed_llb; + unsigned long cfb_base, ll_base; + + /* Leave 1M for line length buffer & misc. */ + compressed_fb = drm_mm_search_free(&dev_priv->vram, size, 4096, 0); + if (!compressed_fb) { + i915_warn_stolen(dev); + return; + } + + compressed_fb = drm_mm_get_block(compressed_fb, size, 4096); + if (!compressed_fb) { + i915_warn_stolen(dev); + return; + } + + cfb_base = i915_gtt_to_phys(dev, compressed_fb->start); + if (!cfb_base) { + DRM_ERROR("failed to get stolen phys addr, disabling FBC\n"); + drm_mm_put_block(compressed_fb); + } + + if (!IS_GM45(dev)) { + compressed_llb = drm_mm_search_free(&dev_priv->vram, 4096, + 4096, 0); + if (!compressed_llb) { + i915_warn_stolen(dev); + return; + } + + compressed_llb = drm_mm_get_block(compressed_llb, 4096, 4096); + if (!compressed_llb) { + i915_warn_stolen(dev); + return; + } + + ll_base = i915_gtt_to_phys(dev, compressed_llb->start); + if (!ll_base) { + DRM_ERROR("failed to get stolen phys addr, disabling FBC\n"); + drm_mm_put_block(compressed_fb); + drm_mm_put_block(compressed_llb); + } + } + + dev_priv->cfb_size = size; + + if (IS_GM45(dev)) { + g4x_disable_fbc(dev); + I915_WRITE(DPFC_CB_BASE, compressed_fb->start); + } else { + i8xx_disable_fbc(dev); + I915_WRITE(FBC_CFB_BASE, cfb_base); + I915_WRITE(FBC_LL_BASE, ll_base); + } + + DRM_DEBUG("FBC base 0x%08lx, ll base 0x%08lx, size %dM\n", cfb_base, + ll_base, size >> 20); +} + /* true = enable decode, false = disable decoder */ static unsigned int i915_vga_set_decode(void *cookie, bool state) { @@ -1027,6 +1183,7 @@ static unsigned int i915_vga_set_decode(void *cookie, bool state) } static int i915_load_modeset_init(struct drm_device *dev, + unsigned long prealloc_start, unsigned long prealloc_size, unsigned long agp_size) { @@ -1047,6 +1204,10 @@ static int i915_load_modeset_init(struct drm_device *dev, /* Basic memrange allocator for stolen space (aka vram) */ drm_mm_init(&dev_priv->vram, 0, prealloc_size); + DRM_INFO("set up %ldM of stolen space\n", prealloc_size / (1024*1024)); + + /* We're off and running w/KMS */ + dev_priv->mm.suspended = 0; /* Let GEM Manage from end of prealloc space to end of aperture. * @@ -1059,10 +1220,25 @@ static int i915_load_modeset_init(struct drm_device *dev, */ i915_gem_do_init(dev, prealloc_size, agp_size - 4096); + mutex_lock(&dev->struct_mutex); ret = i915_gem_init_ringbuffer(dev); + mutex_unlock(&dev->struct_mutex); if (ret) goto out; + /* Try to set up FBC with a reasonable compressed buffer size */ + if (IS_MOBILE(dev) && (IS_I9XX(dev) || IS_I965G(dev) || IS_GM45(dev)) && + i915_powersave) { + int cfb_size; + + /* Try to get an 8M buffer... */ + if (prealloc_size > (9*1024*1024)) + cfb_size = 8*1024*1024; + else /* fall back to 7/8 of the stolen space */ + cfb_size = prealloc_size * 7 / 8; + i915_setup_compression(dev, cfb_size); + } + /* Allow hardware batchbuffers unless told otherwise. */ dev_priv->allow_batchbuffer = 1; @@ -1180,7 +1356,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) struct drm_i915_private *dev_priv = dev->dev_private; resource_size_t base, size; int ret = 0, mmio_bar = IS_I9XX(dev) ? 0 : 1; - uint32_t agp_size, prealloc_size; + uint32_t agp_size, prealloc_size, prealloc_start; /* i915 has 4 more counters */ dev->counters += 4; @@ -1234,7 +1410,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) "performance may suffer.\n"); } - ret = i915_probe_agp(dev, &agp_size, &prealloc_size); + ret = i915_probe_agp(dev, &agp_size, &prealloc_size, &prealloc_start); if (ret) goto out_iomapfree; @@ -1300,8 +1476,12 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) return ret; } + /* Start out suspended */ + dev_priv->mm.suspended = 1; + if (drm_core_check_feature(dev, DRIVER_MODESET)) { - ret = i915_load_modeset_init(dev, prealloc_size, agp_size); + ret = i915_load_modeset_init(dev, prealloc_start, + prealloc_size, agp_size); if (ret < 0) { DRM_ERROR("failed to init modeset\n"); goto out_workqueue_free; @@ -1313,6 +1493,8 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) if (!IS_IGDNG(dev)) intel_opregion_init(dev, 0); + setup_timer(&dev_priv->hangcheck_timer, i915_hangcheck_elapsed, + (unsigned long) dev); return 0; out_workqueue_free: @@ -1333,6 +1515,7 @@ int i915_driver_unload(struct drm_device *dev) struct drm_i915_private *dev_priv = dev->dev_private; destroy_workqueue(dev_priv->wq); + del_timer_sync(&dev_priv->hangcheck_timer); io_mapping_free(dev_priv->mm.gtt_mapping); if (dev_priv->mm.gtt_mtrr >= 0) { @@ -1472,6 +1655,7 @@ struct drm_ioctl_desc i915_ioctls[] = { DRM_IOCTL_DEF(DRM_I915_GEM_GET_TILING, i915_gem_get_tiling, 0), DRM_IOCTL_DEF(DRM_I915_GEM_GET_APERTURE, i915_gem_get_aperture_ioctl, 0), DRM_IOCTL_DEF(DRM_I915_GET_PIPE_FROM_CRTC_ID, intel_get_pipe_from_crtc_id, 0), + DRM_IOCTL_DEF(DRM_I915_GEM_MADVISE, i915_gem_madvise_ioctl, 0), }; int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls); diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index dbe568c9327..b93814c0d3e 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -89,6 +89,8 @@ static int i915_suspend(struct drm_device *dev, pm_message_t state) pci_set_power_state(dev->pdev, PCI_D3hot); } + dev_priv->suspended = 1; + return 0; } @@ -97,8 +99,6 @@ static int i915_resume(struct drm_device *dev) struct drm_i915_private *dev_priv = dev->dev_private; int ret = 0; - pci_set_power_state(dev->pdev, PCI_D0); - pci_restore_state(dev->pdev); if (pci_enable_device(dev->pdev)) return -1; pci_set_master(dev->pdev); @@ -124,9 +124,135 @@ static int i915_resume(struct drm_device *dev) drm_helper_resume_force_mode(dev); } + dev_priv->suspended = 0; + return ret; } +/** + * i965_reset - reset chip after a hang + * @dev: drm device to reset + * @flags: reset domains + * + * Reset the chip. Useful if a hang is detected. Returns zero on successful + * reset or otherwise an error code. + * + * Procedure is fairly simple: + * - reset the chip using the reset reg + * - re-init context state + * - re-init hardware status page + * - re-init ring buffer + * - re-init interrupt state + * - re-init display + */ +int i965_reset(struct drm_device *dev, u8 flags) +{ + drm_i915_private_t *dev_priv = dev->dev_private; + unsigned long timeout; + u8 gdrst; + /* + * We really should only reset the display subsystem if we actually + * need to + */ + bool need_display = true; + + mutex_lock(&dev->struct_mutex); + + /* + * Clear request list + */ + i915_gem_retire_requests(dev); + + if (need_display) + i915_save_display(dev); + + if (IS_I965G(dev) || IS_G4X(dev)) { + /* + * Set the domains we want to reset, then the reset bit (bit 0). + * Clear the reset bit after a while and wait for hardware status + * bit (bit 1) to be set + */ + pci_read_config_byte(dev->pdev, GDRST, &gdrst); + pci_write_config_byte(dev->pdev, GDRST, gdrst | flags | ((flags == GDRST_FULL) ? 0x1 : 0x0)); + udelay(50); + pci_write_config_byte(dev->pdev, GDRST, gdrst & 0xfe); + + /* ...we don't want to loop forever though, 500ms should be plenty */ + timeout = jiffies + msecs_to_jiffies(500); + do { + udelay(100); + pci_read_config_byte(dev->pdev, GDRST, &gdrst); + } while ((gdrst & 0x1) && time_after(timeout, jiffies)); + + if (gdrst & 0x1) { + WARN(true, "i915: Failed to reset chip\n"); + mutex_unlock(&dev->struct_mutex); + return -EIO; + } + } else { + DRM_ERROR("Error occurred. Don't know how to reset this chip.\n"); + return -ENODEV; + } + + /* Ok, now get things going again... */ + + /* + * Everything depends on having the GTT running, so we need to start + * there. Fortunately we don't need to do this unless we reset the + * chip at a PCI level. + * + * Next we need to restore the context, but we don't use those + * yet either... + * + * Ring buffer needs to be re-initialized in the KMS case, or if X + * was running at the time of the reset (i.e. we weren't VT + * switched away). + */ + if (drm_core_check_feature(dev, DRIVER_MODESET) || + !dev_priv->mm.suspended) { + drm_i915_ring_buffer_t *ring = &dev_priv->ring; + struct drm_gem_object *obj = ring->ring_obj; + struct drm_i915_gem_object *obj_priv = obj->driver_private; + dev_priv->mm.suspended = 0; + + /* Stop the ring if it's running. */ + I915_WRITE(PRB0_CTL, 0); + I915_WRITE(PRB0_TAIL, 0); + I915_WRITE(PRB0_HEAD, 0); + + /* Initialize the ring. */ + I915_WRITE(PRB0_START, obj_priv->gtt_offset); + I915_WRITE(PRB0_CTL, + ((obj->size - 4096) & RING_NR_PAGES) | + RING_NO_REPORT | + RING_VALID); + if (!drm_core_check_feature(dev, DRIVER_MODESET)) + i915_kernel_lost_context(dev); + else { + ring->head = I915_READ(PRB0_HEAD) & HEAD_ADDR; + ring->tail = I915_READ(PRB0_TAIL) & TAIL_ADDR; + ring->space = ring->head - (ring->tail + 8); + if (ring->space < 0) + ring->space += ring->Size; + } + + mutex_unlock(&dev->struct_mutex); + drm_irq_uninstall(dev); + drm_irq_install(dev); + mutex_lock(&dev->struct_mutex); + } + + /* + * Display needs restore too... + */ + if (need_display) + i915_restore_display(dev); + + mutex_unlock(&dev->struct_mutex); + return 0; +} + + static int __devinit i915_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { @@ -234,6 +360,8 @@ static int __init i915_init(void) { driver.num_ioctls = i915_max_ioctl; + i915_gem_shrinker_init(); + /* * If CONFIG_DRM_I915_KMS is set, default to KMS unless * explicitly disabled with the module pararmeter. @@ -260,6 +388,7 @@ static int __init i915_init(void) static void __exit i915_exit(void) { + i915_gem_shrinker_exit(); drm_exit(&driver); } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index a0632f8e76a..b24b2d145b7 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -48,6 +48,11 @@ enum pipe { PIPE_B, }; +enum plane { + PLANE_A = 0, + PLANE_B, +}; + #define I915_NUM_PIPE 2 /* Interface history: @@ -148,6 +153,23 @@ struct drm_i915_error_state { struct timeval time; }; +struct drm_i915_display_funcs { + void (*dpms)(struct drm_crtc *crtc, int mode); + bool (*fbc_enabled)(struct drm_crtc *crtc); + void (*enable_fbc)(struct drm_crtc *crtc, unsigned long interval); + void (*disable_fbc)(struct drm_device *dev); + int (*get_display_clock_speed)(struct drm_device *dev); + int (*get_fifo_size)(struct drm_device *dev, int plane); + void (*update_wm)(struct drm_device *dev, int planea_clock, + int planeb_clock, int sr_hdisplay, int pixel_size); + /* clock updates for mode set */ + /* cursor updates */ + /* render clock increase/decrease */ + /* display clock increase/decrease */ + /* pll clock increase/decrease */ + /* clock gating init */ +}; + typedef struct drm_i915_private { struct drm_device *dev; @@ -198,10 +220,21 @@ typedef struct drm_i915_private { unsigned int sr01, adpa, ppcr, dvob, dvoc, lvds; int vblank_pipe; + /* For hangcheck timer */ +#define DRM_I915_HANGCHECK_PERIOD 75 /* in jiffies */ + struct timer_list hangcheck_timer; + int hangcheck_count; + uint32_t last_acthd; + bool cursor_needs_physical; struct drm_mm vram; + unsigned long cfb_size; + unsigned long cfb_pitch; + int cfb_fence; + int cfb_plane; + int irq_enabled; struct intel_opregion opregion; @@ -222,6 +255,8 @@ typedef struct drm_i915_private { unsigned int edp_support:1; int lvds_ssc_freq; + struct notifier_block lid_notifier; + int crt_ddc_bus; /* -1 = unknown, else GPIO to use for CRT DDC */ struct drm_i915_fence_reg fence_regs[16]; /* assume 965 */ int fence_reg_start; /* 4 if userland hasn't ioctl'd us yet */ @@ -234,7 +269,11 @@ typedef struct drm_i915_private { struct work_struct error_work; struct workqueue_struct *wq; + /* Display functions */ + struct drm_i915_display_funcs display; + /* Register state */ + bool suspended; u8 saveLBB; u32 saveDSPACNTR; u32 saveDSPBCNTR; @@ -350,6 +389,15 @@ typedef struct drm_i915_private { int gtt_mtrr; /** + * Membership on list of all loaded devices, used to evict + * inactive buffers under memory pressure. + * + * Modifications should only be done whilst holding the + * shrink_list_lock spinlock. + */ + struct list_head shrink_list; + + /** * List of objects currently involved in rendering from the * ringbuffer. * @@ -432,7 +480,7 @@ typedef struct drm_i915_private { * It prevents command submission from occuring and makes * every pending request fail */ - int wedged; + atomic_t wedged; /** Bit 6 swizzling required for X tiling */ uint32_t bit_6_swizzle_x; @@ -491,10 +539,7 @@ struct drm_i915_gem_object { * This is the same as gtt_space->start */ uint32_t gtt_offset; - /** - * Required alignment for the object - */ - uint32_t gtt_alignment; + /** * Fake offset for use by mmap(2) */ @@ -541,6 +586,11 @@ struct drm_i915_gem_object { * in an execbuffer object list. */ int in_execbuffer; + + /** + * Advice: are the backing pages purgeable? + */ + int madv; }; /** @@ -585,6 +635,8 @@ extern int i915_max_ioctl; extern unsigned int i915_fbpercrtc; extern unsigned int i915_powersave; +extern void i915_save_display(struct drm_device *dev); +extern void i915_restore_display(struct drm_device *dev); extern int i915_master_create(struct drm_device *dev, struct drm_master *master); extern void i915_master_destroy(struct drm_device *dev, struct drm_master *master); @@ -604,8 +656,10 @@ extern long i915_compat_ioctl(struct file *filp, unsigned int cmd, extern int i915_emit_box(struct drm_device *dev, struct drm_clip_rect *boxes, int i, int DR1, int DR4); +extern int i965_reset(struct drm_device *dev, u8 flags); /* i915_irq.c */ +void i915_hangcheck_elapsed(unsigned long data); extern int i915_irq_emit(struct drm_device *dev, void *data, struct drm_file *file_priv); extern int i915_irq_wait(struct drm_device *dev, void *data, @@ -676,6 +730,8 @@ int i915_gem_busy_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int i915_gem_throttle_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +int i915_gem_madvise_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); int i915_gem_entervt_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int i915_gem_leavevt_ioctl(struct drm_device *dev, void *data, @@ -695,6 +751,7 @@ int i915_gem_object_unbind(struct drm_gem_object *obj); void i915_gem_release_mmap(struct drm_gem_object *obj); void i915_gem_lastclose(struct drm_device *dev); uint32_t i915_get_gem_seqno(struct drm_device *dev); +bool i915_seqno_passed(uint32_t seq1, uint32_t seq2); int i915_gem_object_get_fence_reg(struct drm_gem_object *obj); int i915_gem_object_put_fence_reg(struct drm_gem_object *obj); void i915_gem_retire_requests(struct drm_device *dev); @@ -720,6 +777,9 @@ int i915_gem_object_get_pages(struct drm_gem_object *obj); void i915_gem_object_put_pages(struct drm_gem_object *obj); void i915_gem_release(struct drm_device * dev, struct drm_file *file_priv); +void i915_gem_shrinker_init(void); +void i915_gem_shrinker_exit(void); + /* i915_gem_tiling.c */ void i915_gem_detect_bit_6_swizzle(struct drm_device *dev); void i915_gem_object_do_bit_17_swizzle(struct drm_gem_object *obj); @@ -767,6 +827,8 @@ static inline void opregion_enable_asle(struct drm_device *dev) { return; } extern void intel_modeset_init(struct drm_device *dev); extern void intel_modeset_cleanup(struct drm_device *dev); extern int intel_modeset_vga_set_state(struct drm_device *dev, bool state); +extern void i8xx_disable_fbc(struct drm_device *dev); +extern void g4x_disable_fbc(struct drm_device *dev); /** * Lock test for when it's just for synchronization of ring access. @@ -864,6 +926,7 @@ extern int i915_wait_ring(struct drm_device * dev, int n, const char *caller); (dev)->pci_device == 0x2E12 || \ (dev)->pci_device == 0x2E22 || \ (dev)->pci_device == 0x2E32 || \ + (dev)->pci_device == 0x2E42 || \ (dev)->pci_device == 0x0042 || \ (dev)->pci_device == 0x0046) @@ -876,6 +939,7 @@ extern int i915_wait_ring(struct drm_device * dev, int n, const char *caller); (dev)->pci_device == 0x2E12 || \ (dev)->pci_device == 0x2E22 || \ (dev)->pci_device == 0x2E32 || \ + (dev)->pci_device == 0x2E42 || \ IS_GM45(dev)) #define IS_IGDG(dev) ((dev)->pci_device == 0xa001) @@ -909,12 +973,13 @@ extern int i915_wait_ring(struct drm_device * dev, int n, const char *caller); #define SUPPORTS_INTEGRATED_HDMI(dev) (IS_G4X(dev) || IS_IGDNG(dev)) #define SUPPORTS_INTEGRATED_DP(dev) (IS_G4X(dev) || IS_IGDNG(dev)) #define SUPPORTS_EDP(dev) (IS_IGDNG_M(dev)) -#define I915_HAS_HOTPLUG(dev) (IS_I945G(dev) || IS_I945GM(dev) || IS_I965G(dev)) +#define I915_HAS_HOTPLUG(dev) (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev) || IS_I965G(dev)) /* dsparb controlled by hw only */ #define DSPARB_HWCONTROL(dev) (IS_G4X(dev) || IS_IGDNG(dev)) #define HAS_FW_BLC(dev) (IS_I9XX(dev) || IS_G4X(dev) || IS_IGDNG(dev)) #define HAS_PIPE_CXSR(dev) (IS_G4X(dev) || IS_IGDNG(dev)) +#define I915_HAS_FBC(dev) (IS_MOBILE(dev) && (IS_I9XX(dev) || IS_I965G(dev))) #define PRIMARY_RINGBUFFER_SIZE (128*1024) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c67317112f4..40727d4c291 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -29,6 +29,7 @@ #include "drm.h" #include "i915_drm.h" #include "i915_drv.h" +#include "i915_trace.h" #include "intel_drv.h" #include <linux/swap.h> #include <linux/pci.h> @@ -48,11 +49,15 @@ static int i915_gem_object_wait_rendering(struct drm_gem_object *obj); static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment); static void i915_gem_clear_fence_reg(struct drm_gem_object *obj); -static int i915_gem_evict_something(struct drm_device *dev); +static int i915_gem_evict_something(struct drm_device *dev, int min_size); +static int i915_gem_evict_from_inactive_list(struct drm_device *dev); static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj, struct drm_i915_gem_pwrite *args, struct drm_file *file_priv); +static LIST_HEAD(shrink_list); +static DEFINE_SPINLOCK(shrink_list_lock); + int i915_gem_do_init(struct drm_device *dev, unsigned long start, unsigned long end) { @@ -316,6 +321,45 @@ fail_unlock: return ret; } +static inline gfp_t +i915_gem_object_get_page_gfp_mask (struct drm_gem_object *obj) +{ + return mapping_gfp_mask(obj->filp->f_path.dentry->d_inode->i_mapping); +} + +static inline void +i915_gem_object_set_page_gfp_mask (struct drm_gem_object *obj, gfp_t gfp) +{ + mapping_set_gfp_mask(obj->filp->f_path.dentry->d_inode->i_mapping, gfp); +} + +static int +i915_gem_object_get_pages_or_evict(struct drm_gem_object *obj) +{ + int ret; + + ret = i915_gem_object_get_pages(obj); + + /* If we've insufficient memory to map in the pages, attempt + * to make some space by throwing out some old buffers. + */ + if (ret == -ENOMEM) { + struct drm_device *dev = obj->dev; + gfp_t gfp; + + ret = i915_gem_evict_something(dev, obj->size); + if (ret) + return ret; + + gfp = i915_gem_object_get_page_gfp_mask(obj); + i915_gem_object_set_page_gfp_mask(obj, gfp & ~__GFP_NORETRY); + ret = i915_gem_object_get_pages(obj); + i915_gem_object_set_page_gfp_mask (obj, gfp); + } + + return ret; +} + /** * This is the fallback shmem pread path, which allocates temporary storage * in kernel space to copy_to_user into outside of the struct_mutex, so we @@ -367,8 +411,8 @@ i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj, mutex_lock(&dev->struct_mutex); - ret = i915_gem_object_get_pages(obj); - if (ret != 0) + ret = i915_gem_object_get_pages_or_evict(obj); + if (ret) goto fail_unlock; ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset, @@ -842,8 +886,8 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj, mutex_lock(&dev->struct_mutex); - ret = i915_gem_object_get_pages(obj); - if (ret != 0) + ret = i915_gem_object_get_pages_or_evict(obj); + if (ret) goto fail_unlock; ret = i915_gem_object_set_to_cpu_domain(obj, 1); @@ -1155,28 +1199,22 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) /* Now bind it into the GTT if needed */ mutex_lock(&dev->struct_mutex); if (!obj_priv->gtt_space) { - ret = i915_gem_object_bind_to_gtt(obj, obj_priv->gtt_alignment); - if (ret) { - mutex_unlock(&dev->struct_mutex); - return VM_FAULT_SIGBUS; - } - - ret = i915_gem_object_set_to_gtt_domain(obj, write); - if (ret) { - mutex_unlock(&dev->struct_mutex); - return VM_FAULT_SIGBUS; - } + ret = i915_gem_object_bind_to_gtt(obj, 0); + if (ret) + goto unlock; list_add_tail(&obj_priv->list, &dev_priv->mm.inactive_list); + + ret = i915_gem_object_set_to_gtt_domain(obj, write); + if (ret) + goto unlock; } /* Need a new fence register? */ if (obj_priv->tiling_mode != I915_TILING_NONE) { ret = i915_gem_object_get_fence_reg(obj); - if (ret) { - mutex_unlock(&dev->struct_mutex); - return VM_FAULT_SIGBUS; - } + if (ret) + goto unlock; } pfn = ((dev->agp->base + obj_priv->gtt_offset) >> PAGE_SHIFT) + @@ -1184,18 +1222,18 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) /* Finally, remap it using the new GTT offset */ ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn); - +unlock: mutex_unlock(&dev->struct_mutex); switch (ret) { + case 0: + case -ERESTARTSYS: + return VM_FAULT_NOPAGE; case -ENOMEM: case -EAGAIN: return VM_FAULT_OOM; - case -EFAULT: - case -EINVAL: - return VM_FAULT_SIGBUS; default: - return VM_FAULT_NOPAGE; + return VM_FAULT_SIGBUS; } } @@ -1388,6 +1426,14 @@ i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, obj_priv = obj->driver_private; + if (obj_priv->madv != I915_MADV_WILLNEED) { + DRM_ERROR("Attempting to mmap a purgeable buffer\n"); + drm_gem_object_unreference(obj); + mutex_unlock(&dev->struct_mutex); + return -EINVAL; + } + + if (!obj_priv->mmap_offset) { ret = i915_gem_create_mmap_offset(obj); if (ret) { @@ -1399,22 +1445,12 @@ i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, args->offset = obj_priv->mmap_offset; - obj_priv->gtt_alignment = i915_gem_get_gtt_alignment(obj); - - /* Make sure the alignment is correct for fence regs etc */ - if (obj_priv->agp_mem && - (obj_priv->gtt_offset & (obj_priv->gtt_alignment - 1))) { - drm_gem_object_unreference(obj); - mutex_unlock(&dev->struct_mutex); - return -EINVAL; - } - /* * Pull it into the GTT so that we have a page list (makes the * initial fault faster and any subsequent flushing possible). */ if (!obj_priv->agp_mem) { - ret = i915_gem_object_bind_to_gtt(obj, obj_priv->gtt_alignment); + ret = i915_gem_object_bind_to_gtt(obj, 0); if (ret) { drm_gem_object_unreference(obj); mutex_unlock(&dev->struct_mutex); @@ -1437,6 +1473,7 @@ i915_gem_object_put_pages(struct drm_gem_object *obj) int i; BUG_ON(obj_priv->pages_refcount == 0); + BUG_ON(obj_priv->madv == __I915_MADV_PURGED); if (--obj_priv->pages_refcount != 0) return; @@ -1444,13 +1481,21 @@ i915_gem_object_put_pages(struct drm_gem_object *obj) if (obj_priv->tiling_mode != I915_TILING_NONE) i915_gem_object_save_bit_17_swizzle(obj); - for (i = 0; i < page_count; i++) - if (obj_priv->pages[i] != NULL) { - if (obj_priv->dirty) - set_page_dirty(obj_priv->pages[i]); + if (obj_priv->madv == I915_MADV_DONTNEED) + obj_priv->dirty = 0; + + for (i = 0; i < page_count; i++) { + if (obj_priv->pages[i] == NULL) + break; + + if (obj_priv->dirty) + set_page_dirty(obj_priv->pages[i]); + + if (obj_priv->madv == I915_MADV_WILLNEED) mark_page_accessed(obj_priv->pages[i]); - page_cache_release(obj_priv->pages[i]); - } + + page_cache_release(obj_priv->pages[i]); + } obj_priv->dirty = 0; drm_free_large(obj_priv->pages); @@ -1489,6 +1534,26 @@ i915_gem_object_move_to_flushing(struct drm_gem_object *obj) obj_priv->last_rendering_seqno = 0; } +/* Immediately discard the backing storage */ +static void +i915_gem_object_truncate(struct drm_gem_object *obj) +{ + struct drm_i915_gem_object *obj_priv = obj->driver_private; + struct inode *inode; + + inode = obj->filp->f_path.dentry->d_inode; + if (inode->i_op->truncate) + inode->i_op->truncate (inode); + + obj_priv->madv = __I915_MADV_PURGED; +} + +static inline int +i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj_priv) +{ + return obj_priv->madv == I915_MADV_DONTNEED; +} + static void i915_gem_object_move_to_inactive(struct drm_gem_object *obj) { @@ -1577,15 +1642,24 @@ i915_add_request(struct drm_device *dev, struct drm_file *file_priv, if ((obj->write_domain & flush_domains) == obj->write_domain) { + uint32_t old_write_domain = obj->write_domain; + obj->write_domain = 0; i915_gem_object_move_to_active(obj, seqno); + + trace_i915_gem_object_change_domain(obj, + obj->read_domains, + old_write_domain); } } } - if (was_empty && !dev_priv->mm.suspended) - queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ); + if (!dev_priv->mm.suspended) { + mod_timer(&dev_priv->hangcheck_timer, jiffies + DRM_I915_HANGCHECK_PERIOD); + if (was_empty) + queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ); + } return seqno; } @@ -1623,6 +1697,8 @@ i915_gem_retire_request(struct drm_device *dev, { drm_i915_private_t *dev_priv = dev->dev_private; + trace_i915_gem_request_retire(dev, request->seqno); + /* Move any buffers on the active list that are no longer referenced * by the ringbuffer to the flushing/inactive lists as appropriate. */ @@ -1671,7 +1747,7 @@ out: /** * Returns true if seq1 is later than seq2. */ -static int +bool i915_seqno_passed(uint32_t seq1, uint32_t seq2) { return (int32_t)(seq1 - seq2) >= 0; @@ -1709,7 +1785,7 @@ i915_gem_retire_requests(struct drm_device *dev) retiring_seqno = request->seqno; if (i915_seqno_passed(seqno, retiring_seqno) || - dev_priv->mm.wedged) { + atomic_read(&dev_priv->mm.wedged)) { i915_gem_retire_request(dev, request); list_del(&request->list); @@ -1751,6 +1827,9 @@ i915_wait_request(struct drm_device *dev, uint32_t seqno) BUG_ON(seqno == 0); + if (atomic_read(&dev_priv->mm.wedged)) + return -EIO; + if (!i915_seqno_passed(i915_get_gem_seqno(dev), seqno)) { if (IS_IGDNG(dev)) ier = I915_READ(DEIER) | I915_READ(GTIER); @@ -1763,16 +1842,20 @@ i915_wait_request(struct drm_device *dev, uint32_t seqno) i915_driver_irq_postinstall(dev); } + trace_i915_gem_request_wait_begin(dev, seqno); + dev_priv->mm.waiting_gem_seqno = seqno; i915_user_irq_get(dev); ret = wait_event_interruptible(dev_priv->irq_queue, i915_seqno_passed(i915_get_gem_seqno(dev), seqno) || - dev_priv->mm.wedged); + atomic_read(&dev_priv->mm.wedged)); i915_user_irq_put(dev); dev_priv->mm.waiting_gem_seqno = 0; + + trace_i915_gem_request_wait_end(dev, seqno); } - if (dev_priv->mm.wedged) + if (atomic_read(&dev_priv->mm.wedged)) ret = -EIO; if (ret && ret != -ERESTARTSYS) @@ -1803,6 +1886,8 @@ i915_gem_flush(struct drm_device *dev, DRM_INFO("%s: invalidate %08x flush %08x\n", __func__, invalidate_domains, flush_domains); #endif + trace_i915_gem_request_flush(dev, dev_priv->mm.next_gem_seqno, + invalidate_domains, flush_domains); if (flush_domains & I915_GEM_DOMAIN_CPU) drm_agp_chipset_flush(dev); @@ -1915,6 +2000,12 @@ i915_gem_object_unbind(struct drm_gem_object *obj) return -EINVAL; } + /* blow away mappings if mapped through GTT */ + i915_gem_release_mmap(obj); + + if (obj_priv->fence_reg != I915_FENCE_REG_NONE) + i915_gem_clear_fence_reg(obj); + /* Move the object to the CPU domain to ensure that * any possible CPU writes while it's not in the GTT * are flushed when we go to remap it. This will @@ -1928,21 +2019,16 @@ i915_gem_object_unbind(struct drm_gem_object *obj) return ret; } + BUG_ON(obj_priv->active); + if (obj_priv->agp_mem != NULL) { drm_unbind_agp(obj_priv->agp_mem); drm_free_agp(obj_priv->agp_mem, obj->size / PAGE_SIZE); obj_priv->agp_mem = NULL; } - BUG_ON(obj_priv->active); - - /* blow away mappings if mapped through GTT */ - i915_gem_release_mmap(obj); - - if (obj_priv->fence_reg != I915_FENCE_REG_NONE) - i915_gem_clear_fence_reg(obj); - i915_gem_object_put_pages(obj); + BUG_ON(obj_priv->pages_refcount); if (obj_priv->gtt_space) { atomic_dec(&dev->gtt_count); @@ -1956,40 +2042,113 @@ i915_gem_object_unbind(struct drm_gem_object *obj) if (!list_empty(&obj_priv->list)) list_del_init(&obj_priv->list); + if (i915_gem_object_is_purgeable(obj_priv)) + i915_gem_object_truncate(obj); + + trace_i915_gem_object_unbind(obj); + return 0; } +static struct drm_gem_object * +i915_gem_find_inactive_object(struct drm_device *dev, int min_size) +{ + drm_i915_private_t *dev_priv = dev->dev_private; + struct drm_i915_gem_object *obj_priv; + struct drm_gem_object *best = NULL; + struct drm_gem_object *first = NULL; + + /* Try to find the smallest clean object */ + list_for_each_entry(obj_priv, &dev_priv->mm.inactive_list, list) { + struct drm_gem_object *obj = obj_priv->obj; + if (obj->size >= min_size) { + if ((!obj_priv->dirty || + i915_gem_object_is_purgeable(obj_priv)) && + (!best || obj->size < best->size)) { + best = obj; + if (best->size == min_size) + return best; + } + if (!first) + first = obj; + } + } + + return best ? best : first; +} + static int -i915_gem_evict_something(struct drm_device *dev) +i915_gem_evict_everything(struct drm_device *dev) +{ + drm_i915_private_t *dev_priv = dev->dev_private; + uint32_t seqno; + int ret; + bool lists_empty; + + spin_lock(&dev_priv->mm.active_list_lock); + lists_empty = (list_empty(&dev_priv->mm.inactive_list) && + list_empty(&dev_priv->mm.flushing_list) && + list_empty(&dev_priv->mm.active_list)); + spin_unlock(&dev_priv->mm.active_list_lock); + + if (lists_empty) + return -ENOSPC; + + /* Flush everything (on to the inactive lists) and evict */ + i915_gem_flush(dev, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); + seqno = i915_add_request(dev, NULL, I915_GEM_GPU_DOMAINS); + if (seqno == 0) + return -ENOMEM; + + ret = i915_wait_request(dev, seqno); + if (ret) + return ret; + + ret = i915_gem_evict_from_inactive_list(dev); + if (ret) + return ret; + + spin_lock(&dev_priv->mm.active_list_lock); + lists_empty = (list_empty(&dev_priv->mm.inactive_list) && + list_empty(&dev_priv->mm.flushing_list) && + list_empty(&dev_priv->mm.active_list)); + spin_unlock(&dev_priv->mm.active_list_lock); + BUG_ON(!lists_empty); + + return 0; +} + +static int +i915_gem_evict_something(struct drm_device *dev, int min_size) { drm_i915_private_t *dev_priv = dev->dev_private; struct drm_gem_object *obj; - struct drm_i915_gem_object *obj_priv; - int ret = 0; + int ret; for (;;) { + i915_gem_retire_requests(dev); + /* If there's an inactive buffer available now, grab it * and be done. */ - if (!list_empty(&dev_priv->mm.inactive_list)) { - obj_priv = list_first_entry(&dev_priv->mm.inactive_list, - struct drm_i915_gem_object, - list); - obj = obj_priv->obj; - BUG_ON(obj_priv->pin_count != 0); + obj = i915_gem_find_inactive_object(dev, min_size); + if (obj) { + struct drm_i915_gem_object *obj_priv; + #if WATCH_LRU DRM_INFO("%s: evicting %p\n", __func__, obj); #endif + obj_priv = obj->driver_private; + BUG_ON(obj_priv->pin_count != 0); BUG_ON(obj_priv->active); /* Wait on the rendering and unbind the buffer. */ - ret = i915_gem_object_unbind(obj); - break; + return i915_gem_object_unbind(obj); } /* If we didn't get anything, but the ring is still processing - * things, wait for one of those things to finish and hopefully - * leave us a buffer to evict. + * things, wait for the next to finish and hopefully leave us + * a buffer to evict. */ if (!list_empty(&dev_priv->mm.request_list)) { struct drm_i915_gem_request *request; @@ -2000,16 +2159,9 @@ i915_gem_evict_something(struct drm_device *dev) ret = i915_wait_request(dev, request->seqno); if (ret) - break; + return ret; - /* if waiting caused an object to become inactive, - * then loop around and wait for it. Otherwise, we - * assume that waiting freed and unbound something, - * so there should now be some space in the GTT - */ - if (!list_empty(&dev_priv->mm.inactive_list)) - continue; - break; + continue; } /* If we didn't have anything on the request list but there @@ -2018,46 +2170,44 @@ i915_gem_evict_something(struct drm_device *dev) * will get moved to inactive. */ if (!list_empty(&dev_priv->mm.flushing_list)) { - obj_priv = list_first_entry(&dev_priv->mm.flushing_list, - struct drm_i915_gem_object, - list); - obj = obj_priv->obj; + struct drm_i915_gem_object *obj_priv; - i915_gem_flush(dev, - obj->write_domain, - obj->write_domain); - i915_add_request(dev, NULL, obj->write_domain); + /* Find an object that we can immediately reuse */ + list_for_each_entry(obj_priv, &dev_priv->mm.flushing_list, list) { + obj = obj_priv->obj; + if (obj->size >= min_size) + break; - obj = NULL; - continue; - } + obj = NULL; + } - DRM_ERROR("inactive empty %d request empty %d " - "flushing empty %d\n", - list_empty(&dev_priv->mm.inactive_list), - list_empty(&dev_priv->mm.request_list), - list_empty(&dev_priv->mm.flushing_list)); - /* If we didn't do any of the above, there's nothing to be done - * and we just can't fit it in. - */ - return -ENOSPC; - } - return ret; -} + if (obj != NULL) { + uint32_t seqno; -static int -i915_gem_evict_everything(struct drm_device *dev) -{ - int ret; + i915_gem_flush(dev, + obj->write_domain, + obj->write_domain); + seqno = i915_add_request(dev, NULL, obj->write_domain); + if (seqno == 0) + return -ENOMEM; - for (;;) { - ret = i915_gem_evict_something(dev); - if (ret != 0) - break; + ret = i915_wait_request(dev, seqno); + if (ret) + return ret; + + continue; + } + } + + /* If we didn't do any of the above, there's no single buffer + * large enough to swap out for the new one, so just evict + * everything and start again. (This should be rare.) + */ + if (!list_empty (&dev_priv->mm.inactive_list)) + return i915_gem_evict_from_inactive_list(dev); + else + return i915_gem_evict_everything(dev); } - if (ret == -ENOSPC) - return 0; - return ret; } int @@ -2080,7 +2230,6 @@ i915_gem_object_get_pages(struct drm_gem_object *obj) BUG_ON(obj_priv->pages != NULL); obj_priv->pages = drm_calloc_large(page_count, sizeof(struct page *)); if (obj_priv->pages == NULL) { - DRM_ERROR("Faled to allocate page list\n"); obj_priv->pages_refcount--; return -ENOMEM; } @@ -2091,7 +2240,6 @@ i915_gem_object_get_pages(struct drm_gem_object *obj) page = read_mapping_page(mapping, i, NULL); if (IS_ERR(page)) { ret = PTR_ERR(page); - DRM_ERROR("read_mapping_page failed: %d\n", ret); i915_gem_object_put_pages(obj); return ret; } @@ -2328,6 +2476,8 @@ i915_gem_object_get_fence_reg(struct drm_gem_object *obj) else i830_write_fence_reg(reg); + trace_i915_gem_object_get_fence(obj, i, obj_priv->tiling_mode); + return 0; } @@ -2410,10 +2560,17 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment) drm_i915_private_t *dev_priv = dev->dev_private; struct drm_i915_gem_object *obj_priv = obj->driver_private; struct drm_mm_node *free_space; - int page_count, ret; + bool retry_alloc = false; + int ret; if (dev_priv->mm.suspended) return -EBUSY; + + if (obj_priv->madv != I915_MADV_WILLNEED) { + DRM_ERROR("Attempting to bind a purgeable object\n"); + return -EINVAL; + } + if (alignment == 0) alignment = i915_gem_get_gtt_alignment(obj); if (alignment & (i915_gem_get_gtt_alignment(obj) - 1)) { @@ -2433,30 +2590,16 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment) } } if (obj_priv->gtt_space == NULL) { - bool lists_empty; - /* If the gtt is empty and we're still having trouble * fitting our object in, we're out of memory. */ #if WATCH_LRU DRM_INFO("%s: GTT full, evicting something\n", __func__); #endif - spin_lock(&dev_priv->mm.active_list_lock); - lists_empty = (list_empty(&dev_priv->mm.inactive_list) && - list_empty(&dev_priv->mm.flushing_list) && - list_empty(&dev_priv->mm.active_list)); - spin_unlock(&dev_priv->mm.active_list_lock); - if (lists_empty) { - DRM_ERROR("GTT full, but LRU list empty\n"); - return -ENOSPC; - } - - ret = i915_gem_evict_something(dev); - if (ret != 0) { - if (ret != -ERESTARTSYS) - DRM_ERROR("Failed to evict a buffer %d\n", ret); + ret = i915_gem_evict_something(dev, obj->size); + if (ret) return ret; - } + goto search_free; } @@ -2464,27 +2607,56 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment) DRM_INFO("Binding object of size %zd at 0x%08x\n", obj->size, obj_priv->gtt_offset); #endif + if (retry_alloc) { + i915_gem_object_set_page_gfp_mask (obj, + i915_gem_object_get_page_gfp_mask (obj) & ~__GFP_NORETRY); + } ret = i915_gem_object_get_pages(obj); + if (retry_alloc) { + i915_gem_object_set_page_gfp_mask (obj, + i915_gem_object_get_page_gfp_mask (obj) | __GFP_NORETRY); + } if (ret) { drm_mm_put_block(obj_priv->gtt_space); obj_priv->gtt_space = NULL; + + if (ret == -ENOMEM) { + /* first try to clear up some space from the GTT */ + ret = i915_gem_evict_something(dev, obj->size); + if (ret) { + /* now try to shrink everyone else */ + if (! retry_alloc) { + retry_alloc = true; + goto search_free; + } + + return ret; + } + + goto search_free; + } + return ret; } - page_count = obj->size / PAGE_SIZE; /* Create an AGP memory structure pointing at our pages, and bind it * into the GTT. */ obj_priv->agp_mem = drm_agp_bind_pages(dev, obj_priv->pages, - page_count, + obj->size >> PAGE_SHIFT, obj_priv->gtt_offset, obj_priv->agp_type); if (obj_priv->agp_mem == NULL) { i915_gem_object_put_pages(obj); drm_mm_put_block(obj_priv->gtt_space); obj_priv->gtt_space = NULL; - return -ENOMEM; + + ret = i915_gem_evict_something(dev, obj->size); + if (ret) + return ret; + + goto search_free; } atomic_inc(&dev->gtt_count); atomic_add(obj->size, &dev->gtt_memory); @@ -2496,6 +2668,8 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment) BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS); BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS); + trace_i915_gem_object_bind(obj, obj_priv->gtt_offset); + return 0; } @@ -2511,15 +2685,7 @@ i915_gem_clflush_object(struct drm_gem_object *obj) if (obj_priv->pages == NULL) return; - /* XXX: The 865 in particular appears to be weird in how it handles - * cache flushing. We haven't figured it out, but the - * clflush+agp_chipset_flush doesn't appear to successfully get the - * data visible to the PGU, while wbinvd + agp_chipset_flush does. - */ - if (IS_I865G(obj->dev)) { - wbinvd(); - return; - } + trace_i915_gem_object_clflush(obj); drm_clflush_pages(obj_priv->pages, obj->size / PAGE_SIZE); } @@ -2530,21 +2696,29 @@ i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj) { struct drm_device *dev = obj->dev; uint32_t seqno; + uint32_t old_write_domain; if ((obj->write_domain & I915_GEM_GPU_DOMAINS) == 0) return; /* Queue the GPU write cache flushing we need. */ + old_write_domain = obj->write_domain; i915_gem_flush(dev, 0, obj->write_domain); seqno = i915_add_request(dev, NULL, obj->write_domain); obj->write_domain = 0; i915_gem_object_move_to_active(obj, seqno); + + trace_i915_gem_object_change_domain(obj, + obj->read_domains, + old_write_domain); } /** Flushes the GTT write domain for the object if it's dirty. */ static void i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj) { + uint32_t old_write_domain; + if (obj->write_domain != I915_GEM_DOMAIN_GTT) return; @@ -2552,7 +2726,12 @@ i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj) * to it immediately go to main memory as far as we know, so there's * no chipset flush. It also doesn't land in render cache. */ + old_write_domain = obj->write_domain; obj->write_domain = 0; + + trace_i915_gem_object_change_domain(obj, + obj->read_domains, + old_write_domain); } /** Flushes the CPU write domain for the object if it's dirty. */ @@ -2560,13 +2739,19 @@ static void i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj) { struct drm_device *dev = obj->dev; + uint32_t old_write_domain; if (obj->write_domain != I915_GEM_DOMAIN_CPU) return; i915_gem_clflush_object(obj); drm_agp_chipset_flush(dev); + old_write_domain = obj->write_domain; obj->write_domain = 0; + + trace_i915_gem_object_change_domain(obj, + obj->read_domains, + old_write_domain); } /** @@ -2579,6 +2764,7 @@ int i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write) { struct drm_i915_gem_object *obj_priv = obj->driver_private; + uint32_t old_write_domain, old_read_domains; int ret; /* Not valid to be called on unbound objects. */ @@ -2591,6 +2777,9 @@ i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write) if (ret != 0) return ret; + old_write_domain = obj->write_domain; + old_read_domains = obj->read_domains; + /* If we're writing through the GTT domain, then CPU and GPU caches * will need to be invalidated at next use. */ @@ -2609,6 +2798,10 @@ i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write) obj_priv->dirty = 1; } + trace_i915_gem_object_change_domain(obj, + old_read_domains, + old_write_domain); + return 0; } @@ -2621,6 +2814,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write) static int i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write) { + uint32_t old_write_domain, old_read_domains; int ret; i915_gem_object_flush_gpu_write_domain(obj); @@ -2636,6 +2830,9 @@ i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write) */ i915_gem_object_set_to_full_cpu_read_domain(obj); + old_write_domain = obj->write_domain; + old_read_domains = obj->read_domains; + /* Flush the CPU cache if it's still invalid. */ if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { i915_gem_clflush_object(obj); @@ -2656,6 +2853,10 @@ i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write) obj->write_domain = I915_GEM_DOMAIN_CPU; } + trace_i915_gem_object_change_domain(obj, + old_read_domains, + old_write_domain); + return 0; } @@ -2777,6 +2978,7 @@ i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj) struct drm_i915_gem_object *obj_priv = obj->driver_private; uint32_t invalidate_domains = 0; uint32_t flush_domains = 0; + uint32_t old_read_domains; BUG_ON(obj->pending_read_domains & I915_GEM_DOMAIN_CPU); BUG_ON(obj->pending_write_domain == I915_GEM_DOMAIN_CPU); @@ -2823,6 +3025,8 @@ i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj) i915_gem_clflush_object(obj); } + old_read_domains = obj->read_domains; + /* The actual obj->write_domain will be updated with * pending_write_domain after we emit the accumulated flush for all * of our domain changes in execbuffers (which clears objects' @@ -2841,6 +3045,10 @@ i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj) obj->read_domains, obj->write_domain, dev->invalidate_domains, dev->flush_domains); #endif + + trace_i915_gem_object_change_domain(obj, + old_read_domains, + obj->write_domain); } /** @@ -2893,6 +3101,7 @@ i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj, uint64_t offset, uint64_t size) { struct drm_i915_gem_object *obj_priv = obj->driver_private; + uint32_t old_read_domains; int i, ret; if (offset == 0 && size == obj->size) @@ -2939,8 +3148,13 @@ i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj, */ BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0); + old_read_domains = obj->read_domains; obj->read_domains |= I915_GEM_DOMAIN_CPU; + trace_i915_gem_object_change_domain(obj, + old_read_domains, + obj->write_domain); + return 0; } @@ -2984,6 +3198,21 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, } target_obj_priv = target_obj->driver_private; +#if WATCH_RELOC + DRM_INFO("%s: obj %p offset %08x target %d " + "read %08x write %08x gtt %08x " + "presumed %08x delta %08x\n", + __func__, + obj, + (int) reloc->offset, + (int) reloc->target_handle, + (int) reloc->read_domains, + (int) reloc->write_domain, + (int) target_obj_priv->gtt_offset, + (int) reloc->presumed_offset, + reloc->delta); +#endif + /* The target buffer should have appeared before us in the * exec_object list, so it should have a GTT space bound by now. */ @@ -2995,25 +3224,7 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, return -EINVAL; } - if (reloc->offset > obj->size - 4) { - DRM_ERROR("Relocation beyond object bounds: " - "obj %p target %d offset %d size %d.\n", - obj, reloc->target_handle, - (int) reloc->offset, (int) obj->size); - drm_gem_object_unreference(target_obj); - i915_gem_object_unpin(obj); - return -EINVAL; - } - if (reloc->offset & 3) { - DRM_ERROR("Relocation not 4-byte aligned: " - "obj %p target %d offset %d.\n", - obj, reloc->target_handle, - (int) reloc->offset); - drm_gem_object_unreference(target_obj); - i915_gem_object_unpin(obj); - return -EINVAL; - } - + /* Validate that the target is in a valid r/w GPU domain */ if (reloc->write_domain & I915_GEM_DOMAIN_CPU || reloc->read_domains & I915_GEM_DOMAIN_CPU) { DRM_ERROR("reloc with read/write CPU domains: " @@ -3027,7 +3238,6 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, i915_gem_object_unpin(obj); return -EINVAL; } - if (reloc->write_domain && target_obj->pending_write_domain && reloc->write_domain != target_obj->pending_write_domain) { DRM_ERROR("Write domain conflict: " @@ -3042,21 +3252,6 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, return -EINVAL; } -#if WATCH_RELOC - DRM_INFO("%s: obj %p offset %08x target %d " - "read %08x write %08x gtt %08x " - "presumed %08x delta %08x\n", - __func__, - obj, - (int) reloc->offset, - (int) reloc->target_handle, - (int) reloc->read_domains, - (int) reloc->write_domain, - (int) target_obj_priv->gtt_offset, - (int) reloc->presumed_offset, - reloc->delta); -#endif - target_obj->pending_read_domains |= reloc->read_domains; target_obj->pending_write_domain |= reloc->write_domain; @@ -3068,6 +3263,37 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, continue; } + /* Check that the relocation address is valid... */ + if (reloc->offset > obj->size - 4) { + DRM_ERROR("Relocation beyond object bounds: " + "obj %p target %d offset %d size %d.\n", + obj, reloc->target_handle, + (int) reloc->offset, (int) obj->size); + drm_gem_object_unreference(target_obj); + i915_gem_object_unpin(obj); + return -EINVAL; + } + if (reloc->offset & 3) { + DRM_ERROR("Relocation not 4-byte aligned: " + "obj %p target %d offset %d.\n", + obj, reloc->target_handle, + (int) reloc->offset); + drm_gem_object_unreference(target_obj); + i915_gem_object_unpin(obj); + return -EINVAL; + } + + /* and points to somewhere within the target object. */ + if (reloc->delta >= target_obj->size) { + DRM_ERROR("Relocation beyond target object bounds: " + "obj %p target %d delta %d size %d.\n", + obj, reloc->target_handle, + (int) reloc->delta, (int) target_obj->size); + drm_gem_object_unreference(target_obj); + i915_gem_object_unpin(obj); + return -EINVAL; + } + ret = i915_gem_object_set_to_gtt_domain(obj, 1); if (ret != 0) { drm_gem_object_unreference(target_obj); @@ -3126,6 +3352,8 @@ i915_dispatch_gem_execbuffer(struct drm_device *dev, exec_start = (uint32_t) exec_offset + exec->batch_start_offset; exec_len = (uint32_t) exec->batch_len; + trace_i915_gem_request_submit(dev, dev_priv->mm.next_gem_seqno); + count = nbox ? nbox : 1; for (i = 0; i < count; i++) { @@ -3363,7 +3591,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, i915_verify_inactive(dev, __FILE__, __LINE__); - if (dev_priv->mm.wedged) { + if (atomic_read(&dev_priv->mm.wedged)) { DRM_ERROR("Execbuf while wedged\n"); mutex_unlock(&dev->struct_mutex); ret = -EIO; @@ -3421,8 +3649,23 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, /* error other than GTT full, or we've already tried again */ if (ret != -ENOSPC || pin_tries >= 1) { - if (ret != -ERESTARTSYS) - DRM_ERROR("Failed to pin buffers %d\n", ret); + if (ret != -ERESTARTSYS) { + unsigned long long total_size = 0; + for (i = 0; i < args->buffer_count; i++) + total_size += object_list[i]->size; + DRM_ERROR("Failed to pin buffer %d of %d, total %llu bytes: %d\n", + pinned+1, args->buffer_count, + total_size, ret); + DRM_ERROR("%d objects [%d pinned], " + "%d object bytes [%d pinned], " + "%d/%d gtt bytes\n", + atomic_read(&dev->object_count), + atomic_read(&dev->pin_count), + atomic_read(&dev->object_memory), + atomic_read(&dev->pin_memory), + atomic_read(&dev->gtt_memory), + dev->gtt_total); + } goto err; } @@ -3433,7 +3676,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, /* evict everyone we can from the aperture */ ret = i915_gem_evict_everything(dev); - if (ret) + if (ret && ret != -ENOSPC) goto err; } @@ -3489,8 +3732,12 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, for (i = 0; i < args->buffer_count; i++) { struct drm_gem_object *obj = object_list[i]; + uint32_t old_write_domain = obj->write_domain; obj->write_domain = obj->pending_write_domain; + trace_i915_gem_object_change_domain(obj, + obj->read_domains, + old_write_domain); } i915_verify_inactive(dev, __FILE__, __LINE__); @@ -3607,11 +3854,8 @@ i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment) i915_verify_inactive(dev, __FILE__, __LINE__); if (obj_priv->gtt_space == NULL) { ret = i915_gem_object_bind_to_gtt(obj, alignment); - if (ret != 0) { - if (ret != -EBUSY && ret != -ERESTARTSYS) - DRM_ERROR("Failure to bind: %d\n", ret); + if (ret) return ret; - } } /* * Pre-965 chips need a fence register set up in order to @@ -3691,6 +3935,13 @@ i915_gem_pin_ioctl(struct drm_device *dev, void *data, } obj_priv = obj->driver_private; + if (obj_priv->madv != I915_MADV_WILLNEED) { + DRM_ERROR("Attempting to pin a purgeable buffer\n"); + drm_gem_object_unreference(obj); + mutex_unlock(&dev->struct_mutex); + return -EINVAL; + } + if (obj_priv->pin_filp != NULL && obj_priv->pin_filp != file_priv) { DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n", args->handle); @@ -3803,6 +4054,56 @@ i915_gem_throttle_ioctl(struct drm_device *dev, void *data, return i915_gem_ring_throttle(dev, file_priv); } +int +i915_gem_madvise_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_i915_gem_madvise *args = data; + struct drm_gem_object *obj; + struct drm_i915_gem_object *obj_priv; + + switch (args->madv) { + case I915_MADV_DONTNEED: + case I915_MADV_WILLNEED: + break; + default: + return -EINVAL; + } + + obj = drm_gem_object_lookup(dev, file_priv, args->handle); + if (obj == NULL) { + DRM_ERROR("Bad handle in i915_gem_madvise_ioctl(): %d\n", + args->handle); + return -EBADF; + } + + mutex_lock(&dev->struct_mutex); + obj_priv = obj->driver_private; + + if (obj_priv->pin_count) { + drm_gem_object_unreference(obj); + mutex_unlock(&dev->struct_mutex); + + DRM_ERROR("Attempted i915_gem_madvise_ioctl() on a pinned object\n"); + return -EINVAL; + } + + if (obj_priv->madv != __I915_MADV_PURGED) + obj_priv->madv = args->madv; + + /* if the object is no longer bound, discard its backing storage */ + if (i915_gem_object_is_purgeable(obj_priv) && + obj_priv->gtt_space == NULL) + i915_gem_object_truncate(obj); + + args->retained = obj_priv->madv != __I915_MADV_PURGED; + + drm_gem_object_unreference(obj); + mutex_unlock(&dev->struct_mutex); + + return 0; +} + int i915_gem_init_object(struct drm_gem_object *obj) { struct drm_i915_gem_object *obj_priv; @@ -3827,6 +4128,9 @@ int i915_gem_init_object(struct drm_gem_object *obj) obj_priv->fence_reg = I915_FENCE_REG_NONE; INIT_LIST_HEAD(&obj_priv->list); INIT_LIST_HEAD(&obj_priv->fence_list); + obj_priv->madv = I915_MADV_WILLNEED; + + trace_i915_gem_object_create(obj); return 0; } @@ -3836,6 +4140,8 @@ void i915_gem_free_object(struct drm_gem_object *obj) struct drm_device *dev = obj->dev; struct drm_i915_gem_object *obj_priv = obj->driver_private; + trace_i915_gem_object_destroy(obj); + while (obj_priv->pin_count > 0) i915_gem_object_unpin(obj); @@ -3844,43 +4150,35 @@ void i915_gem_free_object(struct drm_gem_object *obj) i915_gem_object_unbind(obj); - i915_gem_free_mmap_offset(obj); + if (obj_priv->mmap_offset) + i915_gem_free_mmap_offset(obj); kfree(obj_priv->page_cpu_valid); kfree(obj_priv->bit_17); kfree(obj->driver_private); } -/** Unbinds all objects that are on the given buffer list. */ +/** Unbinds all inactive objects. */ static int -i915_gem_evict_from_list(struct drm_device *dev, struct list_head *head) +i915_gem_evict_from_inactive_list(struct drm_device *dev) { - struct drm_gem_object *obj; - struct drm_i915_gem_object *obj_priv; - int ret; + drm_i915_private_t *dev_priv = dev->dev_private; - while (!list_empty(head)) { - obj_priv = list_first_entry(head, - struct drm_i915_gem_object, - list); - obj = obj_priv->obj; + while (!list_empty(&dev_priv->mm.inactive_list)) { + struct drm_gem_object *obj; + int ret; - if (obj_priv->pin_count != 0) { - DRM_ERROR("Pinned object in unbind list\n"); - mutex_unlock(&dev->struct_mutex); - return -EINVAL; - } + obj = list_first_entry(&dev_priv->mm.inactive_list, + struct drm_i915_gem_object, + list)->obj; ret = i915_gem_object_unbind(obj); if (ret != 0) { - DRM_ERROR("Error unbinding object in LeaveVT: %d\n", - ret); - mutex_unlock(&dev->struct_mutex); + DRM_ERROR("Error unbinding object: %d\n", ret); return ret; } } - return 0; } @@ -3902,6 +4200,7 @@ i915_gem_idle(struct drm_device *dev) * We need to replace this with a semaphore, or something. */ dev_priv->mm.suspended = 1; + del_timer(&dev_priv->hangcheck_timer); /* Cancel the retire work handler, wait for it to finish if running */ @@ -3931,7 +4230,7 @@ i915_gem_idle(struct drm_device *dev) if (last_seqno == cur_seqno) { if (stuck++ > 100) { DRM_ERROR("hardware wedged\n"); - dev_priv->mm.wedged = 1; + atomic_set(&dev_priv->mm.wedged, 1); DRM_WAKEUP(&dev_priv->irq_queue); break; } @@ -3944,7 +4243,7 @@ i915_gem_idle(struct drm_device *dev) i915_gem_retire_requests(dev); spin_lock(&dev_priv->mm.active_list_lock); - if (!dev_priv->mm.wedged) { + if (!atomic_read(&dev_priv->mm.wedged)) { /* Active and flushing should now be empty as we've * waited for a sequence higher than any pending execbuffer */ @@ -3962,29 +4261,41 @@ i915_gem_idle(struct drm_device *dev) * the GPU domains and just stuff them onto inactive. */ while (!list_empty(&dev_priv->mm.active_list)) { - struct drm_i915_gem_object *obj_priv; + struct drm_gem_object *obj; + uint32_t old_write_domain; - obj_priv = list_first_entry(&dev_priv->mm.active_list, - struct drm_i915_gem_object, - list); - obj_priv->obj->write_domain &= ~I915_GEM_GPU_DOMAINS; - i915_gem_object_move_to_inactive(obj_priv->obj); + obj = list_first_entry(&dev_priv->mm.active_list, + struct drm_i915_gem_object, + list)->obj; + old_write_domain = obj->write_domain; + obj->write_domain &= ~I915_GEM_GPU_DOMAINS; + i915_gem_object_move_to_inactive(obj); + + trace_i915_gem_object_change_domain(obj, + obj->read_domains, + old_write_domain); } spin_unlock(&dev_priv->mm.active_list_lock); while (!list_empty(&dev_priv->mm.flushing_list)) { - struct drm_i915_gem_object *obj_priv; + struct drm_gem_object *obj; + uint32_t old_write_domain; - obj_priv = list_first_entry(&dev_priv->mm.flushing_list, - struct drm_i915_gem_object, - list); - obj_priv->obj->write_domain &= ~I915_GEM_GPU_DOMAINS; - i915_gem_object_move_to_inactive(obj_priv->obj); + obj = list_first_entry(&dev_priv->mm.flushing_list, + struct drm_i915_gem_object, + list)->obj; + old_write_domain = obj->write_domain; + obj->write_domain &= ~I915_GEM_GPU_DOMAINS; + i915_gem_object_move_to_inactive(obj); + + trace_i915_gem_object_change_domain(obj, + obj->read_domains, + old_write_domain); } /* Move all inactive buffers out of the GTT. */ - ret = i915_gem_evict_from_list(dev, &dev_priv->mm.inactive_list); + ret = i915_gem_evict_from_inactive_list(dev); WARN_ON(!list_empty(&dev_priv->mm.inactive_list)); if (ret) { mutex_unlock(&dev->struct_mutex); @@ -4206,9 +4517,9 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data, if (drm_core_check_feature(dev, DRIVER_MODESET)) return 0; - if (dev_priv->mm.wedged) { + if (atomic_read(&dev_priv->mm.wedged)) { DRM_ERROR("Reenabling wedged hardware, good luck\n"); - dev_priv->mm.wedged = 0; + atomic_set(&dev_priv->mm.wedged, 0); } mutex_lock(&dev->struct_mutex); @@ -4274,6 +4585,10 @@ i915_gem_load(struct drm_device *dev) i915_gem_retire_work_handler); dev_priv->mm.next_gem_seqno = 1; + spin_lock(&shrink_list_lock); + list_add(&dev_priv->mm.shrink_list, &shrink_list); + spin_unlock(&shrink_list_lock); + /* Old X drivers will take 0-2 for front, back, depth buffers */ dev_priv->fence_reg_start = 3; @@ -4491,3 +4806,116 @@ void i915_gem_release(struct drm_device * dev, struct drm_file *file_priv) list_del_init(i915_file_priv->mm.request_list.next); mutex_unlock(&dev->struct_mutex); } + +static int +i915_gem_shrink(int nr_to_scan, gfp_t gfp_mask) +{ + drm_i915_private_t *dev_priv, *next_dev; + struct drm_i915_gem_object *obj_priv, *next_obj; + int cnt = 0; + int would_deadlock = 1; + + /* "fast-path" to count number of available objects */ + if (nr_to_scan == 0) { + spin_lock(&shrink_list_lock); + list_for_each_entry(dev_priv, &shrink_list, mm.shrink_list) { + struct drm_device *dev = dev_priv->dev; + + if (mutex_trylock(&dev->struct_mutex)) { + list_for_each_entry(obj_priv, + &dev_priv->mm.inactive_list, + list) + cnt++; + mutex_unlock(&dev->struct_mutex); + } + } + spin_unlock(&shrink_list_lock); + + return (cnt / 100) * sysctl_vfs_cache_pressure; + } + + spin_lock(&shrink_list_lock); + + /* first scan for clean buffers */ + list_for_each_entry_safe(dev_priv, next_dev, + &shrink_list, mm.shrink_list) { + struct drm_device *dev = dev_priv->dev; + + if (! mutex_trylock(&dev->struct_mutex)) + continue; + + spin_unlock(&shrink_list_lock); + + i915_gem_retire_requests(dev); + + list_for_each_entry_safe(obj_priv, next_obj, + &dev_priv->mm.inactive_list, + list) { + if (i915_gem_object_is_purgeable(obj_priv)) { + i915_gem_object_unbind(obj_priv->obj); + if (--nr_to_scan <= 0) + break; + } + } + + spin_lock(&shrink_list_lock); + mutex_unlock(&dev->struct_mutex); + + would_deadlock = 0; + + if (nr_to_scan <= 0) + break; + } + + /* second pass, evict/count anything still on the inactive list */ + list_for_each_entry_safe(dev_priv, next_dev, + &shrink_list, mm.shrink_list) { + struct drm_device *dev = dev_priv->dev; + + if (! mutex_trylock(&dev->struct_mutex)) + continue; + + spin_unlock(&shrink_list_lock); + + list_for_each_entry_safe(obj_priv, next_obj, + &dev_priv->mm.inactive_list, + list) { + if (nr_to_scan > 0) { + i915_gem_object_unbind(obj_priv->obj); + nr_to_scan--; + } else + cnt++; + } + + spin_lock(&shrink_list_lock); + mutex_unlock(&dev->struct_mutex); + + would_deadlock = 0; + } + + spin_unlock(&shrink_list_lock); + + if (would_deadlock) + return -1; + else if (cnt > 0) + return (cnt / 100) * sysctl_vfs_cache_pressure; + else + return 0; +} + +static struct shrinker shrinker = { + .shrink = i915_gem_shrink, + .seeks = DEFAULT_SEEKS, +}; + +__init void +i915_gem_shrinker_init(void) +{ + register_shrinker(&shrinker); +} + +__exit void +i915_gem_shrinker_exit(void) +{ + unregister_shrinker(&shrinker); +} diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 6c89f2ff249..4dfeec7cdd4 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -31,6 +31,7 @@ #include "drm.h" #include "i915_drm.h" #include "i915_drv.h" +#include "i915_trace.h" #include "intel_drv.h" #define MAX_NOPID ((u32)~0) @@ -279,7 +280,9 @@ irqreturn_t igdng_irq_handler(struct drm_device *dev) } if (gt_iir & GT_USER_INTERRUPT) { - dev_priv->mm.irq_gem_seqno = i915_get_gem_seqno(dev); + u32 seqno = i915_get_gem_seqno(dev); + dev_priv->mm.irq_gem_seqno = seqno; + trace_i915_gem_request_complete(dev, seqno); DRM_WAKEUP(&dev_priv->irq_queue); } @@ -302,12 +305,25 @@ static void i915_error_work_func(struct work_struct *work) drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t, error_work); struct drm_device *dev = dev_priv->dev; - char *event_string = "ERROR=1"; - char *envp[] = { event_string, NULL }; + char *error_event[] = { "ERROR=1", NULL }; + char *reset_event[] = { "RESET=1", NULL }; + char *reset_done_event[] = { "ERROR=0", NULL }; DRM_DEBUG("generating error event\n"); - - kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, envp); + kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, error_event); + + if (atomic_read(&dev_priv->mm.wedged)) { + if (IS_I965G(dev)) { + DRM_DEBUG("resetting chip\n"); + kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, reset_event); + if (!i965_reset(dev, GDRST_RENDER)) { + atomic_set(&dev_priv->mm.wedged, 0); + kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, reset_done_event); + } + } else { + printk("reboot required\n"); + } + } } /** @@ -372,7 +388,7 @@ out: * so userspace knows something bad happened (should trigger collection * of a ring dump etc.). */ -static void i915_handle_error(struct drm_device *dev) +static void i915_handle_error(struct drm_device *dev, bool wedged) { struct drm_i915_private *dev_priv = dev->dev_private; u32 eir = I915_READ(EIR); @@ -482,6 +498,16 @@ static void i915_handle_error(struct drm_device *dev) I915_WRITE(IIR, I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT); } + if (wedged) { + atomic_set(&dev_priv->mm.wedged, 1); + + /* + * Wakeup waiting processes so they don't hang + */ + printk("i915: Waking up sleeping processes\n"); + DRM_WAKEUP(&dev_priv->irq_queue); + } + queue_work(dev_priv->wq, &dev_priv->error_work); } @@ -527,7 +553,7 @@ irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS) pipeb_stats = I915_READ(PIPEBSTAT); if (iir & I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT) - i915_handle_error(dev); + i915_handle_error(dev, false); /* * Clear the PIPE(A|B)STAT regs before the IIR @@ -599,8 +625,12 @@ irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS) } if (iir & I915_USER_INTERRUPT) { - dev_priv->mm.irq_gem_seqno = i915_get_gem_seqno(dev); + u32 seqno = i915_get_gem_seqno(dev); + dev_priv->mm.irq_gem_seqno = seqno; + trace_i915_gem_request_complete(dev, seqno); DRM_WAKEUP(&dev_priv->irq_queue); + dev_priv->hangcheck_count = 0; + mod_timer(&dev_priv->hangcheck_timer, jiffies + DRM_I915_HANGCHECK_PERIOD); } if (pipea_stats & vblank_status) { @@ -880,6 +910,52 @@ int i915_vblank_swap(struct drm_device *dev, void *data, return -EINVAL; } +struct drm_i915_gem_request *i915_get_tail_request(struct drm_device *dev) { + drm_i915_private_t *dev_priv = dev->dev_private; + return list_entry(dev_priv->mm.request_list.prev, struct drm_i915_gem_request, list); +} + +/** + * This is called when the chip hasn't reported back with completed + * batchbuffers in a long time. The first time this is called we simply record + * ACTHD. If ACTHD hasn't changed by the time the hangcheck timer elapses + * again, we assume the chip is wedged and try to fix it. + */ +void i915_hangcheck_elapsed(unsigned long data) +{ + struct drm_device *dev = (struct drm_device *)data; + drm_i915_private_t *dev_priv = dev->dev_private; + uint32_t acthd; + + if (!IS_I965G(dev)) + acthd = I915_READ(ACTHD); + else + acthd = I915_READ(ACTHD_I965); + + /* If all work is done then ACTHD clearly hasn't advanced. */ + if (list_empty(&dev_priv->mm.request_list) || + i915_seqno_passed(i915_get_gem_seqno(dev), i915_get_tail_request(dev)->seqno)) { + dev_priv->hangcheck_count = 0; + return; + } + + if (dev_priv->last_acthd == acthd && dev_priv->hangcheck_count > 0) { + DRM_ERROR("Hangcheck timer elapsed... GPU hung\n"); + i915_handle_error(dev, true); + return; + } + + /* Reset timer case chip hangs without another request being added */ + mod_timer(&dev_priv->hangcheck_timer, jiffies + DRM_I915_HANGCHECK_PERIOD); + + if (acthd != dev_priv->last_acthd) + dev_priv->hangcheck_count = 0; + else + dev_priv->hangcheck_count++; + + dev_priv->last_acthd = acthd; +} + /* drm_dma.h hooks */ static void igdng_irq_preinstall(struct drm_device *dev) diff --git a/drivers/gpu/drm/i915/i915_opregion.c b/drivers/gpu/drm/i915/i915_opregion.c index e4b4e8898e3..2d5193556d3 100644 --- a/drivers/gpu/drm/i915/i915_opregion.c +++ b/drivers/gpu/drm/i915/i915_opregion.c @@ -148,6 +148,7 @@ static u32 asle_set_backlight(struct drm_device *dev, u32 bclp) struct drm_i915_private *dev_priv = dev->dev_private; struct opregion_asle *asle = dev_priv->opregion.asle; u32 blc_pwm_ctl, blc_pwm_ctl2; + u32 max_backlight, level, shift; if (!(bclp & ASLE_BCLP_VALID)) return ASLE_BACKLIGHT_FAIL; @@ -157,14 +158,25 @@ static u32 asle_set_backlight(struct drm_device *dev, u32 bclp) return ASLE_BACKLIGHT_FAIL; blc_pwm_ctl = I915_READ(BLC_PWM_CTL); - blc_pwm_ctl &= ~BACKLIGHT_DUTY_CYCLE_MASK; blc_pwm_ctl2 = I915_READ(BLC_PWM_CTL2); - if (blc_pwm_ctl2 & BLM_COMBINATION_MODE) + if (IS_I965G(dev) && (blc_pwm_ctl2 & BLM_COMBINATION_MODE)) pci_write_config_dword(dev->pdev, PCI_LBPC, bclp); - else - I915_WRITE(BLC_PWM_CTL, blc_pwm_ctl | ((bclp * 0x101)-1)); - + else { + if (IS_IGD(dev)) { + blc_pwm_ctl &= ~(BACKLIGHT_DUTY_CYCLE_MASK - 1); + max_backlight = (blc_pwm_ctl & BACKLIGHT_MODULATION_FREQ_MASK) >> + BACKLIGHT_MODULATION_FREQ_SHIFT; + shift = BACKLIGHT_DUTY_CYCLE_SHIFT + 1; + } else { + blc_pwm_ctl &= ~BACKLIGHT_DUTY_CYCLE_MASK; + max_backlight = ((blc_pwm_ctl & BACKLIGHT_MODULATION_FREQ_MASK) >> + BACKLIGHT_MODULATION_FREQ_SHIFT) * 2; + shift = BACKLIGHT_DUTY_CYCLE_SHIFT; + } + level = (bclp * max_backlight) / 255; + I915_WRITE(BLC_PWM_CTL, blc_pwm_ctl | (level << shift)); + } asle->cblv = (bclp*0x64)/0xff | ASLE_CBLV_VALID; return 0; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 3f796355346..0466ddbeba3 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -86,6 +86,10 @@ #define I915_GC_RENDER_CLOCK_200_MHZ (1 << 0) #define I915_GC_RENDER_CLOCK_333_MHZ (4 << 0) #define LBB 0xf4 +#define GDRST 0xc0 +#define GDRST_FULL (0<<2) +#define GDRST_RENDER (1<<2) +#define GDRST_MEDIA (3<<2) /* VGA stuff */ @@ -344,9 +348,37 @@ #define FBC_CTL_PLANEA (0<<0) #define FBC_CTL_PLANEB (1<<0) #define FBC_FENCE_OFF 0x0321b +#define FBC_TAG 0x03300 #define FBC_LL_SIZE (1536) +/* Framebuffer compression for GM45+ */ +#define DPFC_CB_BASE 0x3200 +#define DPFC_CONTROL 0x3208 +#define DPFC_CTL_EN (1<<31) +#define DPFC_CTL_PLANEA (0<<30) +#define DPFC_CTL_PLANEB (1<<30) +#define DPFC_CTL_FENCE_EN (1<<29) +#define DPFC_SR_EN (1<<10) +#define DPFC_CTL_LIMIT_1X (0<<6) +#define DPFC_CTL_LIMIT_2X (1<<6) +#define DPFC_CTL_LIMIT_4X (2<<6) +#define DPFC_RECOMP_CTL 0x320c +#define DPFC_RECOMP_STALL_EN (1<<27) +#define DPFC_RECOMP_STALL_WM_SHIFT (16) +#define DPFC_RECOMP_STALL_WM_MASK (0x07ff0000) +#define DPFC_RECOMP_TIMER_COUNT_SHIFT (0) +#define DPFC_RECOMP_TIMER_COUNT_MASK (0x0000003f) +#define DPFC_STATUS 0x3210 +#define DPFC_INVAL_SEG_SHIFT (16) +#define DPFC_INVAL_SEG_MASK (0x07ff0000) +#define DPFC_COMP_SEG_SHIFT (0) +#define DPFC_COMP_SEG_MASK (0x000003ff) +#define DPFC_STATUS2 0x3214 +#define DPFC_FENCE_YOFF 0x3218 +#define DPFC_CHICKEN 0x3224 +#define DPFC_HT_MODIFY (1<<31) + /* * GPIO regs */ @@ -2000,6 +2032,8 @@ #define PF_ENABLE (1<<31) #define PFA_WIN_SZ 0x68074 #define PFB_WIN_SZ 0x68874 +#define PFA_WIN_POS 0x68070 +#define PFB_WIN_POS 0x68870 /* legacy palette */ #define LGC_PALETTE_A 0x4a000 diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c index 20d4d19f556..bd6d8d91ca9 100644 --- a/drivers/gpu/drm/i915/i915_suspend.c +++ b/drivers/gpu/drm/i915/i915_suspend.c @@ -228,6 +228,7 @@ static void i915_save_modeset_reg(struct drm_device *dev) if (drm_core_check_feature(dev, DRIVER_MODESET)) return; + /* Pipe & plane A info */ dev_priv->savePIPEACONF = I915_READ(PIPEACONF); dev_priv->savePIPEASRC = I915_READ(PIPEASRC); @@ -285,6 +286,7 @@ static void i915_save_modeset_reg(struct drm_device *dev) dev_priv->savePIPEBSTAT = I915_READ(PIPEBSTAT); return; } + static void i915_restore_modeset_reg(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -379,19 +381,10 @@ static void i915_restore_modeset_reg(struct drm_device *dev) return; } -int i915_save_state(struct drm_device *dev) + +void i915_save_display(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - int i; - - pci_read_config_byte(dev->pdev, LBB, &dev_priv->saveLBB); - - /* Render Standby */ - if (IS_I965G(dev) && IS_MOBILE(dev)) - dev_priv->saveRENDERSTANDBY = I915_READ(MCHBAR_RENDER_STANDBY); - - /* Hardware status page */ - dev_priv->saveHWS = I915_READ(HWS_PGA); /* Display arbitration control */ dev_priv->saveDSPARB = I915_READ(DSPARB); @@ -399,6 +392,7 @@ int i915_save_state(struct drm_device *dev) /* This is only meaningful in non-KMS mode */ /* Don't save them in KMS mode */ i915_save_modeset_reg(dev); + /* Cursor state */ dev_priv->saveCURACNTR = I915_READ(CURACNTR); dev_priv->saveCURAPOS = I915_READ(CURAPOS); @@ -448,81 +442,22 @@ int i915_save_state(struct drm_device *dev) dev_priv->saveFBC_CONTROL2 = I915_READ(FBC_CONTROL2); dev_priv->saveFBC_CONTROL = I915_READ(FBC_CONTROL); - /* Interrupt state */ - dev_priv->saveIIR = I915_READ(IIR); - dev_priv->saveIER = I915_READ(IER); - dev_priv->saveIMR = I915_READ(IMR); - /* VGA state */ dev_priv->saveVGA0 = I915_READ(VGA0); dev_priv->saveVGA1 = I915_READ(VGA1); dev_priv->saveVGA_PD = I915_READ(VGA_PD); dev_priv->saveVGACNTRL = I915_READ(VGACNTRL); - /* Clock gating state */ - dev_priv->saveD_STATE = I915_READ(D_STATE); - dev_priv->saveDSPCLK_GATE_D = I915_READ(DSPCLK_GATE_D); - - /* Cache mode state */ - dev_priv->saveCACHE_MODE_0 = I915_READ(CACHE_MODE_0); - - /* Memory Arbitration state */ - dev_priv->saveMI_ARB_STATE = I915_READ(MI_ARB_STATE); - - /* Scratch space */ - for (i = 0; i < 16; i++) { - dev_priv->saveSWF0[i] = I915_READ(SWF00 + (i << 2)); - dev_priv->saveSWF1[i] = I915_READ(SWF10 + (i << 2)); - } - for (i = 0; i < 3; i++) - dev_priv->saveSWF2[i] = I915_READ(SWF30 + (i << 2)); - - /* Fences */ - if (IS_I965G(dev)) { - for (i = 0; i < 16; i++) - dev_priv->saveFENCE[i] = I915_READ64(FENCE_REG_965_0 + (i * 8)); - } else { - for (i = 0; i < 8; i++) - dev_priv->saveFENCE[i] = I915_READ(FENCE_REG_830_0 + (i * 4)); - - if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) - for (i = 0; i < 8; i++) - dev_priv->saveFENCE[i+8] = I915_READ(FENCE_REG_945_8 + (i * 4)); - } i915_save_vga(dev); - - return 0; } -int i915_restore_state(struct drm_device *dev) +void i915_restore_display(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - int i; - - pci_write_config_byte(dev->pdev, LBB, dev_priv->saveLBB); - - /* Render Standby */ - if (IS_I965G(dev) && IS_MOBILE(dev)) - I915_WRITE(MCHBAR_RENDER_STANDBY, dev_priv->saveRENDERSTANDBY); - - /* Hardware status page */ - I915_WRITE(HWS_PGA, dev_priv->saveHWS); /* Display arbitration */ I915_WRITE(DSPARB, dev_priv->saveDSPARB); - /* Fences */ - if (IS_I965G(dev)) { - for (i = 0; i < 16; i++) - I915_WRITE64(FENCE_REG_965_0 + (i * 8), dev_priv->saveFENCE[i]); - } else { - for (i = 0; i < 8; i++) - I915_WRITE(FENCE_REG_830_0 + (i * 4), dev_priv->saveFENCE[i]); - if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) - for (i = 0; i < 8; i++) - I915_WRITE(FENCE_REG_945_8 + (i * 4), dev_priv->saveFENCE[i+8]); - } - /* Display port ratios (must be done before clock is set) */ if (SUPPORTS_INTEGRATED_DP(dev)) { I915_WRITE(PIPEA_GMCH_DATA_M, dev_priv->savePIPEA_GMCH_DATA_M); @@ -534,9 +469,11 @@ int i915_restore_state(struct drm_device *dev) I915_WRITE(PIPEA_DP_LINK_N, dev_priv->savePIPEA_DP_LINK_N); I915_WRITE(PIPEB_DP_LINK_N, dev_priv->savePIPEB_DP_LINK_N); } + /* This is only meaningful in non-KMS mode */ /* Don't restore them in KMS mode */ i915_restore_modeset_reg(dev); + /* Cursor state */ I915_WRITE(CURAPOS, dev_priv->saveCURAPOS); I915_WRITE(CURACNTR, dev_priv->saveCURACNTR); @@ -586,6 +523,95 @@ int i915_restore_state(struct drm_device *dev) I915_WRITE(VGA_PD, dev_priv->saveVGA_PD); DRM_UDELAY(150); + i915_restore_vga(dev); +} + +int i915_save_state(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + int i; + + pci_read_config_byte(dev->pdev, LBB, &dev_priv->saveLBB); + + /* Render Standby */ + if (IS_I965G(dev) && IS_MOBILE(dev)) + dev_priv->saveRENDERSTANDBY = I915_READ(MCHBAR_RENDER_STANDBY); + + /* Hardware status page */ + dev_priv->saveHWS = I915_READ(HWS_PGA); + + i915_save_display(dev); + + /* Interrupt state */ + dev_priv->saveIER = I915_READ(IER); + dev_priv->saveIMR = I915_READ(IMR); + + /* Clock gating state */ + dev_priv->saveD_STATE = I915_READ(D_STATE); + dev_priv->saveDSPCLK_GATE_D = I915_READ(DSPCLK_GATE_D); /* Not sure about this */ + + /* Cache mode state */ + dev_priv->saveCACHE_MODE_0 = I915_READ(CACHE_MODE_0); + + /* Memory Arbitration state */ + dev_priv->saveMI_ARB_STATE = I915_READ(MI_ARB_STATE); + + /* Scratch space */ + for (i = 0; i < 16; i++) { + dev_priv->saveSWF0[i] = I915_READ(SWF00 + (i << 2)); + dev_priv->saveSWF1[i] = I915_READ(SWF10 + (i << 2)); + } + for (i = 0; i < 3; i++) + dev_priv->saveSWF2[i] = I915_READ(SWF30 + (i << 2)); + + /* Fences */ + if (IS_I965G(dev)) { + for (i = 0; i < 16; i++) + dev_priv->saveFENCE[i] = I915_READ64(FENCE_REG_965_0 + (i * 8)); + } else { + for (i = 0; i < 8; i++) + dev_priv->saveFENCE[i] = I915_READ(FENCE_REG_830_0 + (i * 4)); + + if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) + for (i = 0; i < 8; i++) + dev_priv->saveFENCE[i+8] = I915_READ(FENCE_REG_945_8 + (i * 4)); + } + + return 0; +} + +int i915_restore_state(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + int i; + + pci_write_config_byte(dev->pdev, LBB, dev_priv->saveLBB); + + /* Render Standby */ + if (IS_I965G(dev) && IS_MOBILE(dev)) + I915_WRITE(MCHBAR_RENDER_STANDBY, dev_priv->saveRENDERSTANDBY); + + /* Hardware status page */ + I915_WRITE(HWS_PGA, dev_priv->saveHWS); + + /* Fences */ + if (IS_I965G(dev)) { + for (i = 0; i < 16; i++) + I915_WRITE64(FENCE_REG_965_0 + (i * 8), dev_priv->saveFENCE[i]); + } else { + for (i = 0; i < 8; i++) + I915_WRITE(FENCE_REG_830_0 + (i * 4), dev_priv->saveFENCE[i]); + if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) + for (i = 0; i < 8; i++) + I915_WRITE(FENCE_REG_945_8 + (i * 4), dev_priv->saveFENCE[i+8]); + } + + i915_restore_display(dev); + + /* Interrupt state */ + I915_WRITE (IER, dev_priv->saveIER); + I915_WRITE (IMR, dev_priv->saveIMR); + /* Clock gating state */ I915_WRITE (D_STATE, dev_priv->saveD_STATE); I915_WRITE (DSPCLK_GATE_D, dev_priv->saveDSPCLK_GATE_D); @@ -603,8 +629,6 @@ int i915_restore_state(struct drm_device *dev) for (i = 0; i < 3; i++) I915_WRITE(SWF30 + (i << 2), dev_priv->saveSWF2[i]); - i915_restore_vga(dev); - return 0; } diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h new file mode 100644 index 00000000000..5567a40816f --- /dev/null +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -0,0 +1,315 @@ +#if !defined(_I915_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ) +#define _I915_TRACE_H_ + +#include <linux/stringify.h> +#include <linux/types.h> +#include <linux/tracepoint.h> + +#include <drm/drmP.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM i915 +#define TRACE_SYSTEM_STRING __stringify(TRACE_SYSTEM) +#define TRACE_INCLUDE_FILE i915_trace + +/* object tracking */ + +TRACE_EVENT(i915_gem_object_create, + + TP_PROTO(struct drm_gem_object *obj), + + TP_ARGS(obj), + + TP_STRUCT__entry( + __field(struct drm_gem_object *, obj) + __field(u32, size) + ), + + TP_fast_assign( + __entry->obj = obj; + __entry->size = obj->size; + ), + + TP_printk("obj=%p, size=%u", __entry->obj, __entry->size) +); + +TRACE_EVENT(i915_gem_object_bind, + + TP_PROTO(struct drm_gem_object *obj, u32 gtt_offset), + + TP_ARGS(obj, gtt_offset), + + TP_STRUCT__entry( + __field(struct drm_gem_object *, obj) + __field(u32, gtt_offset) + ), + + TP_fast_assign( + __entry->obj = obj; + __entry->gtt_offset = gtt_offset; + ), + + TP_printk("obj=%p, gtt_offset=%08x", + __entry->obj, __entry->gtt_offset) +); + +TRACE_EVENT(i915_gem_object_clflush, + + TP_PROTO(struct drm_gem_object *obj), + + TP_ARGS(obj), + + TP_STRUCT__entry( + __field(struct drm_gem_object *, obj) + ), + + TP_fast_assign( + __entry->obj = obj; + ), + + TP_printk("obj=%p", __entry->obj) +); + +TRACE_EVENT(i915_gem_object_change_domain, + + TP_PROTO(struct drm_gem_object *obj, uint32_t old_read_domains, uint32_t old_write_domain), + + TP_ARGS(obj, old_read_domains, old_write_domain), + + TP_STRUCT__entry( + __field(struct drm_gem_object *, obj) + __field(u32, read_domains) + __field(u32, write_domain) + ), + + TP_fast_assign( + __entry->obj = obj; + __entry->read_domains = obj->read_domains | (old_read_domains << 16); + __entry->write_domain = obj->write_domain | (old_write_domain << 16); + ), + + TP_printk("obj=%p, read=%04x, write=%04x", + __entry->obj, + __entry->read_domains, __entry->write_domain) +); + +TRACE_EVENT(i915_gem_object_get_fence, + + TP_PROTO(struct drm_gem_object *obj, int fence, int tiling_mode), + + TP_ARGS(obj, fence, tiling_mode), + + TP_STRUCT__entry( + __field(struct drm_gem_object *, obj) + __field(int, fence) + __field(int, tiling_mode) + ), + + TP_fast_assign( + __entry->obj = obj; + __entry->fence = fence; + __entry->tiling_mode = tiling_mode; + ), + + TP_printk("obj=%p, fence=%d, tiling=%d", + __entry->obj, __entry->fence, __entry->tiling_mode) +); + +TRACE_EVENT(i915_gem_object_unbind, + + TP_PROTO(struct drm_gem_object *obj), + + TP_ARGS(obj), + + TP_STRUCT__entry( + __field(struct drm_gem_object *, obj) + ), + + TP_fast_assign( + __entry->obj = obj; + ), + + TP_printk("obj=%p", __entry->obj) +); + +TRACE_EVENT(i915_gem_object_destroy, + + TP_PROTO(struct drm_gem_object *obj), + + TP_ARGS(obj), + + TP_STRUCT__entry( + __field(struct drm_gem_object *, obj) + ), + + TP_fast_assign( + __entry->obj = obj; + ), + + TP_printk("obj=%p", __entry->obj) +); + +/* batch tracing */ + +TRACE_EVENT(i915_gem_request_submit, + + TP_PROTO(struct drm_device *dev, u32 seqno), + + TP_ARGS(dev, seqno), + + TP_STRUCT__entry( + __field(struct drm_device *, dev) + __field(u32, seqno) + ), + + TP_fast_assign( + __entry->dev = dev; + __entry->seqno = seqno; + ), + + TP_printk("dev=%p, seqno=%u", __entry->dev, __entry->seqno) +); + +TRACE_EVENT(i915_gem_request_flush, + + TP_PROTO(struct drm_device *dev, u32 seqno, + u32 flush_domains, u32 invalidate_domains), + + TP_ARGS(dev, seqno, flush_domains, invalidate_domains), + + TP_STRUCT__entry( + __field(struct drm_device *, dev) + __field(u32, seqno) + __field(u32, flush_domains) + __field(u32, invalidate_domains) + ), + + TP_fast_assign( + __entry->dev = dev; + __entry->seqno = seqno; + __entry->flush_domains = flush_domains; + __entry->invalidate_domains = invalidate_domains; + ), + + TP_printk("dev=%p, seqno=%u, flush=%04x, invalidate=%04x", + __entry->dev, __entry->seqno, + __entry->flush_domains, __entry->invalidate_domains) +); + + +TRACE_EVENT(i915_gem_request_complete, + + TP_PROTO(struct drm_device *dev, u32 seqno), + + TP_ARGS(dev, seqno), + + TP_STRUCT__entry( + __field(struct drm_device *, dev) + __field(u32, seqno) + ), + + TP_fast_assign( + __entry->dev = dev; + __entry->seqno = seqno; + ), + + TP_printk("dev=%p, seqno=%u", __entry->dev, __entry->seqno) +); + +TRACE_EVENT(i915_gem_request_retire, + + TP_PROTO(struct drm_device *dev, u32 seqno), + + TP_ARGS(dev, seqno), + + TP_STRUCT__entry( + __field(struct drm_device *, dev) + __field(u32, seqno) + ), + + TP_fast_assign( + __entry->dev = dev; + __entry->seqno = seqno; + ), + + TP_printk("dev=%p, seqno=%u", __entry->dev, __entry->seqno) +); + +TRACE_EVENT(i915_gem_request_wait_begin, + + TP_PROTO(struct drm_device *dev, u32 seqno), + + TP_ARGS(dev, seqno), + + TP_STRUCT__entry( + __field(struct drm_device *, dev) + __field(u32, seqno) + ), + + TP_fast_assign( + __entry->dev = dev; + __entry->seqno = seqno; + ), + + TP_printk("dev=%p, seqno=%u", __entry->dev, __entry->seqno) +); + +TRACE_EVENT(i915_gem_request_wait_end, + + TP_PROTO(struct drm_device *dev, u32 seqno), + + TP_ARGS(dev, seqno), + + TP_STRUCT__entry( + __field(struct drm_device *, dev) + __field(u32, seqno) + ), + + TP_fast_assign( + __entry->dev = dev; + __entry->seqno = seqno; + ), + + TP_printk("dev=%p, seqno=%u", __entry->dev, __entry->seqno) +); + +TRACE_EVENT(i915_ring_wait_begin, + + TP_PROTO(struct drm_device *dev), + + TP_ARGS(dev), + + TP_STRUCT__entry( + __field(struct drm_device *, dev) + ), + + TP_fast_assign( + __entry->dev = dev; + ), + + TP_printk("dev=%p", __entry->dev) +); + +TRACE_EVENT(i915_ring_wait_end, + + TP_PROTO(struct drm_device *dev), + + TP_ARGS(dev), + + TP_STRUCT__entry( + __field(struct drm_device *, dev) + ), + + TP_fast_assign( + __entry->dev = dev; + ), + + TP_printk("dev=%p", __entry->dev) +); + +#endif /* _I915_TRACE_H_ */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/i915 +#include <trace/define_trace.h> diff --git a/drivers/gpu/drm/i915/i915_trace_points.c b/drivers/gpu/drm/i915/i915_trace_points.c new file mode 100644 index 00000000000..ead876eb6ea --- /dev/null +++ b/drivers/gpu/drm/i915/i915_trace_points.c @@ -0,0 +1,11 @@ +/* + * Copyright © 2009 Intel Corporation + * + * Authors: + * Chris Wilson <chris@chris-wilson.co.uk> + */ + +#include "i915_drv.h" + +#define CREATE_TRACE_POINTS +#include "i915_trace.h" diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index 1e28c1652fd..4337414846b 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -217,6 +217,9 @@ parse_general_features(struct drm_i915_private *dev_priv, if (IS_I85X(dev_priv->dev)) dev_priv->lvds_ssc_freq = general->ssc_freq ? 66 : 48; + else if (IS_IGDNG(dev_priv->dev)) + dev_priv->lvds_ssc_freq = + general->ssc_freq ? 100 : 120; else dev_priv->lvds_ssc_freq = general->ssc_freq ? 100 : 96; diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index 88814fa2dfd..212e22740fc 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -179,13 +179,10 @@ static bool intel_igdng_crt_detect_hotplug(struct drm_connector *connector) { struct drm_device *dev = connector->dev; struct drm_i915_private *dev_priv = dev->dev_private; - u32 adpa, temp; + u32 adpa; bool ret; - temp = adpa = I915_READ(PCH_ADPA); - - adpa &= ~ADPA_DAC_ENABLE; - I915_WRITE(PCH_ADPA, adpa); + adpa = I915_READ(PCH_ADPA); adpa &= ~ADPA_CRT_HOTPLUG_MASK; @@ -212,8 +209,6 @@ static bool intel_igdng_crt_detect_hotplug(struct drm_connector *connector) else ret = false; - /* restore origin register */ - I915_WRITE(PCH_ADPA, temp); return ret; } diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 0227b165290..93ff6c03733 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -24,6 +24,8 @@ * Eric Anholt <eric@anholt.net> */ +#include <linux/module.h> +#include <linux/input.h> #include <linux/i2c.h> #include <linux/kernel.h> #include "drmP.h" @@ -875,7 +877,7 @@ intel_igdng_find_best_PLL(const intel_limit_t *limit, struct drm_crtc *crtc, refclk, best_clock); if (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS)) { - if ((I915_READ(LVDS) & LVDS_CLKB_POWER_MASK) == + if ((I915_READ(PCH_LVDS) & LVDS_CLKB_POWER_MASK) == LVDS_CLKB_POWER_UP) clock.p2 = limit->p2.p2_fast; else @@ -952,6 +954,241 @@ intel_wait_for_vblank(struct drm_device *dev) mdelay(20); } +/* Parameters have changed, update FBC info */ +static void i8xx_enable_fbc(struct drm_crtc *crtc, unsigned long interval) +{ + struct drm_device *dev = crtc->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_framebuffer *fb = crtc->fb; + struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb); + struct drm_i915_gem_object *obj_priv = intel_fb->obj->driver_private; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + int plane, i; + u32 fbc_ctl, fbc_ctl2; + + dev_priv->cfb_pitch = dev_priv->cfb_size / FBC_LL_SIZE; + + if (fb->pitch < dev_priv->cfb_pitch) + dev_priv->cfb_pitch = fb->pitch; + + /* FBC_CTL wants 64B units */ + dev_priv->cfb_pitch = (dev_priv->cfb_pitch / 64) - 1; + dev_priv->cfb_fence = obj_priv->fence_reg; + dev_priv->cfb_plane = intel_crtc->plane; + plane = dev_priv->cfb_plane == 0 ? FBC_CTL_PLANEA : FBC_CTL_PLANEB; + + /* Clear old tags */ + for (i = 0; i < (FBC_LL_SIZE / 32) + 1; i++) + I915_WRITE(FBC_TAG + (i * 4), 0); + + /* Set it up... */ + fbc_ctl2 = FBC_CTL_FENCE_DBL | FBC_CTL_IDLE_IMM | plane; + if (obj_priv->tiling_mode != I915_TILING_NONE) + fbc_ctl2 |= FBC_CTL_CPU_FENCE; + I915_WRITE(FBC_CONTROL2, fbc_ctl2); + I915_WRITE(FBC_FENCE_OFF, crtc->y); + + /* enable it... */ + fbc_ctl = FBC_CTL_EN | FBC_CTL_PERIODIC; + fbc_ctl |= (dev_priv->cfb_pitch & 0xff) << FBC_CTL_STRIDE_SHIFT; + fbc_ctl |= (interval & 0x2fff) << FBC_CTL_INTERVAL_SHIFT; + if (obj_priv->tiling_mode != I915_TILING_NONE) + fbc_ctl |= dev_priv->cfb_fence; + I915_WRITE(FBC_CONTROL, fbc_ctl); + + DRM_DEBUG("enabled FBC, pitch %ld, yoff %d, plane %d, ", + dev_priv->cfb_pitch, crtc->y, dev_priv->cfb_plane); +} + +void i8xx_disable_fbc(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + u32 fbc_ctl; + + if (!I915_HAS_FBC(dev)) + return; + + /* Disable compression */ + fbc_ctl = I915_READ(FBC_CONTROL); + fbc_ctl &= ~FBC_CTL_EN; + I915_WRITE(FBC_CONTROL, fbc_ctl); + + /* Wait for compressing bit to clear */ + while (I915_READ(FBC_STATUS) & FBC_STAT_COMPRESSING) + ; /* nothing */ + + intel_wait_for_vblank(dev); + + DRM_DEBUG("disabled FBC\n"); +} + +static bool i8xx_fbc_enabled(struct drm_crtc *crtc) +{ + struct drm_device *dev = crtc->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + + return I915_READ(FBC_CONTROL) & FBC_CTL_EN; +} + +static void g4x_enable_fbc(struct drm_crtc *crtc, unsigned long interval) +{ + struct drm_device *dev = crtc->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_framebuffer *fb = crtc->fb; + struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb); + struct drm_i915_gem_object *obj_priv = intel_fb->obj->driver_private; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + int plane = (intel_crtc->plane == 0 ? DPFC_CTL_PLANEA : + DPFC_CTL_PLANEB); + unsigned long stall_watermark = 200; + u32 dpfc_ctl; + + dev_priv->cfb_pitch = (dev_priv->cfb_pitch / 64) - 1; + dev_priv->cfb_fence = obj_priv->fence_reg; + dev_priv->cfb_plane = intel_crtc->plane; + + dpfc_ctl = plane | DPFC_SR_EN | DPFC_CTL_LIMIT_1X; + if (obj_priv->tiling_mode != I915_TILING_NONE) { + dpfc_ctl |= DPFC_CTL_FENCE_EN | dev_priv->cfb_fence; + I915_WRITE(DPFC_CHICKEN, DPFC_HT_MODIFY); + } else { + I915_WRITE(DPFC_CHICKEN, ~DPFC_HT_MODIFY); + } + + I915_WRITE(DPFC_CONTROL, dpfc_ctl); + I915_WRITE(DPFC_RECOMP_CTL, DPFC_RECOMP_STALL_EN | + (stall_watermark << DPFC_RECOMP_STALL_WM_SHIFT) | + (interval << DPFC_RECOMP_TIMER_COUNT_SHIFT)); + I915_WRITE(DPFC_FENCE_YOFF, crtc->y); + + /* enable it... */ + I915_WRITE(DPFC_CONTROL, I915_READ(DPFC_CONTROL) | DPFC_CTL_EN); + + DRM_DEBUG("enabled fbc on plane %d\n", intel_crtc->plane); +} + +void g4x_disable_fbc(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + u32 dpfc_ctl; + + /* Disable compression */ + dpfc_ctl = I915_READ(DPFC_CONTROL); + dpfc_ctl &= ~DPFC_CTL_EN; + I915_WRITE(DPFC_CONTROL, dpfc_ctl); + intel_wait_for_vblank(dev); + + DRM_DEBUG("disabled FBC\n"); +} + +static bool g4x_fbc_enabled(struct drm_crtc *crtc) +{ + struct drm_device *dev = crtc->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + + return I915_READ(DPFC_CONTROL) & DPFC_CTL_EN; +} + +/** + * intel_update_fbc - enable/disable FBC as needed + * @crtc: CRTC to point the compressor at + * @mode: mode in use + * + * Set up the framebuffer compression hardware at mode set time. We + * enable it if possible: + * - plane A only (on pre-965) + * - no pixel mulitply/line duplication + * - no alpha buffer discard + * - no dual wide + * - framebuffer <= 2048 in width, 1536 in height + * + * We can't assume that any compression will take place (worst case), + * so the compressed buffer has to be the same size as the uncompressed + * one. It also must reside (along with the line length buffer) in + * stolen memory. + * + * We need to enable/disable FBC on a global basis. + */ +static void intel_update_fbc(struct drm_crtc *crtc, + struct drm_display_mode *mode) +{ + struct drm_device *dev = crtc->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_framebuffer *fb = crtc->fb; + struct intel_framebuffer *intel_fb; + struct drm_i915_gem_object *obj_priv; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + int plane = intel_crtc->plane; + + if (!i915_powersave) + return; + + if (!dev_priv->display.fbc_enabled || + !dev_priv->display.enable_fbc || + !dev_priv->display.disable_fbc) + return; + + if (!crtc->fb) + return; + + intel_fb = to_intel_framebuffer(fb); + obj_priv = intel_fb->obj->driver_private; + + /* + * If FBC is already on, we just have to verify that we can + * keep it that way... + * Need to disable if: + * - changing FBC params (stride, fence, mode) + * - new fb is too large to fit in compressed buffer + * - going to an unsupported config (interlace, pixel multiply, etc.) + */ + if (intel_fb->obj->size > dev_priv->cfb_size) { + DRM_DEBUG("framebuffer too large, disabling compression\n"); + goto out_disable; + } + if ((mode->flags & DRM_MODE_FLAG_INTERLACE) || + (mode->flags & DRM_MODE_FLAG_DBLSCAN)) { + DRM_DEBUG("mode incompatible with compression, disabling\n"); + goto out_disable; + } + if ((mode->hdisplay > 2048) || + (mode->vdisplay > 1536)) { + DRM_DEBUG("mode too large for compression, disabling\n"); + goto out_disable; + } + if ((IS_I915GM(dev) || IS_I945GM(dev)) && plane != 0) { + DRM_DEBUG("plane not 0, disabling compression\n"); + goto out_disable; + } + if (obj_priv->tiling_mode != I915_TILING_X) { + DRM_DEBUG("framebuffer not tiled, disabling compression\n"); + goto out_disable; + } + + if (dev_priv->display.fbc_enabled(crtc)) { + /* We can re-enable it in this case, but need to update pitch */ + if (fb->pitch > dev_priv->cfb_pitch) + dev_priv->display.disable_fbc(dev); + if (obj_priv->fence_reg != dev_priv->cfb_fence) + dev_priv->display.disable_fbc(dev); + if (plane != dev_priv->cfb_plane) + dev_priv->display.disable_fbc(dev); + } + + if (!dev_priv->display.fbc_enabled(crtc)) { + /* Now try to turn it back on if possible */ + dev_priv->display.enable_fbc(crtc, 500); + } + + return; + +out_disable: + DRM_DEBUG("unsupported config, disabling FBC\n"); + /* Multiple disables should be harmless */ + if (dev_priv->display.fbc_enabled(crtc)) + dev_priv->display.disable_fbc(dev); +} + static int intel_pipe_set_base(struct drm_crtc *crtc, int x, int y, struct drm_framebuffer *old_fb) @@ -964,12 +1201,13 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y, struct drm_i915_gem_object *obj_priv; struct drm_gem_object *obj; int pipe = intel_crtc->pipe; + int plane = intel_crtc->plane; unsigned long Start, Offset; - int dspbase = (pipe == 0 ? DSPAADDR : DSPBADDR); - int dspsurf = (pipe == 0 ? DSPASURF : DSPBSURF); - int dspstride = (pipe == 0) ? DSPASTRIDE : DSPBSTRIDE; - int dsptileoff = (pipe == 0 ? DSPATILEOFF : DSPBTILEOFF); - int dspcntr_reg = (pipe == 0) ? DSPACNTR : DSPBCNTR; + int dspbase = (plane == 0 ? DSPAADDR : DSPBADDR); + int dspsurf = (plane == 0 ? DSPASURF : DSPBSURF); + int dspstride = (plane == 0) ? DSPASTRIDE : DSPBSTRIDE; + int dsptileoff = (plane == 0 ? DSPATILEOFF : DSPBTILEOFF); + int dspcntr_reg = (plane == 0) ? DSPACNTR : DSPBCNTR; u32 dspcntr, alignment; int ret; @@ -979,12 +1217,12 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y, return 0; } - switch (pipe) { + switch (plane) { case 0: case 1: break; default: - DRM_ERROR("Can't update pipe %d in SAREA\n", pipe); + DRM_ERROR("Can't update plane %d in SAREA\n", plane); return -EINVAL; } @@ -1086,6 +1324,9 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y, I915_READ(dspbase); } + if ((IS_I965G(dev) || plane == 0)) + intel_update_fbc(crtc, &crtc->mode); + intel_wait_for_vblank(dev); if (old_fb) { @@ -1217,6 +1458,7 @@ static void igdng_crtc_dpms(struct drm_crtc *crtc, int mode) int transconf_reg = (pipe == 0) ? TRANSACONF : TRANSBCONF; int pf_ctl_reg = (pipe == 0) ? PFA_CTL_1 : PFB_CTL_1; int pf_win_size = (pipe == 0) ? PFA_WIN_SZ : PFB_WIN_SZ; + int pf_win_pos = (pipe == 0) ? PFA_WIN_POS : PFB_WIN_POS; int cpu_htot_reg = (pipe == 0) ? HTOTAL_A : HTOTAL_B; int cpu_hblank_reg = (pipe == 0) ? HBLANK_A : HBLANK_B; int cpu_hsync_reg = (pipe == 0) ? HSYNC_A : HSYNC_B; @@ -1268,6 +1510,19 @@ static void igdng_crtc_dpms(struct drm_crtc *crtc, int mode) } } + /* Enable panel fitting for LVDS */ + if (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS)) { + temp = I915_READ(pf_ctl_reg); + I915_WRITE(pf_ctl_reg, temp | PF_ENABLE); + + /* currently full aspect */ + I915_WRITE(pf_win_pos, 0); + + I915_WRITE(pf_win_size, + (dev_priv->panel_fixed_mode->hdisplay << 16) | + (dev_priv->panel_fixed_mode->vdisplay)); + } + /* Enable CPU pipe */ temp = I915_READ(pipeconf_reg); if ((temp & PIPEACONF_ENABLE) == 0) { @@ -1532,9 +1787,10 @@ static void i9xx_crtc_dpms(struct drm_crtc *crtc, int mode) struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); int pipe = intel_crtc->pipe; + int plane = intel_crtc->plane; int dpll_reg = (pipe == 0) ? DPLL_A : DPLL_B; - int dspcntr_reg = (pipe == 0) ? DSPACNTR : DSPBCNTR; - int dspbase_reg = (pipe == 0) ? DSPAADDR : DSPBADDR; + int dspcntr_reg = (plane == 0) ? DSPACNTR : DSPBCNTR; + int dspbase_reg = (plane == 0) ? DSPAADDR : DSPBADDR; int pipeconf_reg = (pipe == 0) ? PIPEACONF : PIPEBCONF; u32 temp; @@ -1577,6 +1833,9 @@ static void i9xx_crtc_dpms(struct drm_crtc *crtc, int mode) intel_crtc_load_lut(crtc); + if ((IS_I965G(dev) || plane == 0)) + intel_update_fbc(crtc, &crtc->mode); + /* Give the overlay scaler a chance to enable if it's on this pipe */ //intel_crtc_dpms_video(crtc, true); TODO intel_update_watermarks(dev); @@ -1586,6 +1845,10 @@ static void i9xx_crtc_dpms(struct drm_crtc *crtc, int mode) /* Give the overlay scaler a chance to disable if it's on this pipe */ //intel_crtc_dpms_video(crtc, FALSE); TODO + if (dev_priv->cfb_plane == plane && + dev_priv->display.disable_fbc) + dev_priv->display.disable_fbc(dev); + /* Disable the VGA plane that we never use */ i915_disable_vga(dev); @@ -1634,15 +1897,13 @@ static void i9xx_crtc_dpms(struct drm_crtc *crtc, int mode) static void intel_crtc_dpms(struct drm_crtc *crtc, int mode) { struct drm_device *dev = crtc->dev; + struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_master_private *master_priv; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); int pipe = intel_crtc->pipe; bool enabled; - if (IS_IGDNG(dev)) - igdng_crtc_dpms(crtc, mode); - else - i9xx_crtc_dpms(crtc, mode); + dev_priv->display.dpms(crtc, mode); intel_crtc->dpms_mode = mode; @@ -1709,56 +1970,68 @@ static bool intel_crtc_mode_fixup(struct drm_crtc *crtc, return true; } +static int i945_get_display_clock_speed(struct drm_device *dev) +{ + return 400000; +} -/** Returns the core display clock speed for i830 - i945 */ -static int intel_get_core_clock_speed(struct drm_device *dev) +static int i915_get_display_clock_speed(struct drm_device *dev) { + return 333000; +} - /* Core clock values taken from the published datasheets. - * The 830 may go up to 166 Mhz, which we should check. - */ - if (IS_I945G(dev)) - return 400000; - else if (IS_I915G(dev)) - return 333000; - else if (IS_I945GM(dev) || IS_845G(dev) || IS_IGDGM(dev)) - return 200000; - else if (IS_I915GM(dev)) { - u16 gcfgc = 0; +static int i9xx_misc_get_display_clock_speed(struct drm_device *dev) +{ + return 200000; +} - pci_read_config_word(dev->pdev, GCFGC, &gcfgc); +static int i915gm_get_display_clock_speed(struct drm_device *dev) +{ + u16 gcfgc = 0; - if (gcfgc & GC_LOW_FREQUENCY_ENABLE) - return 133000; - else { - switch (gcfgc & GC_DISPLAY_CLOCK_MASK) { - case GC_DISPLAY_CLOCK_333_MHZ: - return 333000; - default: - case GC_DISPLAY_CLOCK_190_200_MHZ: - return 190000; - } - } - } else if (IS_I865G(dev)) - return 266000; - else if (IS_I855(dev)) { - u16 hpllcc = 0; - /* Assume that the hardware is in the high speed state. This - * should be the default. - */ - switch (hpllcc & GC_CLOCK_CONTROL_MASK) { - case GC_CLOCK_133_200: - case GC_CLOCK_100_200: - return 200000; - case GC_CLOCK_166_250: - return 250000; - case GC_CLOCK_100_133: - return 133000; + pci_read_config_word(dev->pdev, GCFGC, &gcfgc); + + if (gcfgc & GC_LOW_FREQUENCY_ENABLE) + return 133000; + else { + switch (gcfgc & GC_DISPLAY_CLOCK_MASK) { + case GC_DISPLAY_CLOCK_333_MHZ: + return 333000; + default: + case GC_DISPLAY_CLOCK_190_200_MHZ: + return 190000; } - } else /* 852, 830 */ + } +} + +static int i865_get_display_clock_speed(struct drm_device *dev) +{ + return 266000; +} + +static int i855_get_display_clock_speed(struct drm_device *dev) +{ + u16 hpllcc = 0; + /* Assume that the hardware is in the high speed state. This + * should be the default. + */ + switch (hpllcc & GC_CLOCK_CONTROL_MASK) { + case GC_CLOCK_133_200: + case GC_CLOCK_100_200: + return 200000; + case GC_CLOCK_166_250: + return 250000; + case GC_CLOCK_100_133: return 133000; + } + + /* Shouldn't happen */ + return 0; +} - return 0; /* Silence gcc warning */ +static int i830_get_display_clock_speed(struct drm_device *dev) +{ + return 133000; } /** @@ -1921,7 +2194,14 @@ static unsigned long intel_calculate_wm(unsigned long clock_in_khz, { long entries_required, wm_size; - entries_required = (clock_in_khz * pixel_size * latency_ns) / 1000000; + /* + * Note: we need to make sure we don't overflow for various clock & + * latency values. + * clocks go from a few thousand to several hundred thousand. + * latency is usually a few thousand + */ + entries_required = ((clock_in_khz / 1000) * pixel_size * latency_ns) / + 1000; entries_required /= wm->cacheline_size; DRM_DEBUG("FIFO entries required for mode: %d\n", entries_required); @@ -1986,14 +2266,13 @@ static struct cxsr_latency *intel_get_cxsr_latency(int is_desktop, int fsb, for (i = 0; i < ARRAY_SIZE(cxsr_latency_table); i++) { latency = &cxsr_latency_table[i]; if (is_desktop == latency->is_desktop && - fsb == latency->fsb_freq && mem == latency->mem_freq) - break; + fsb == latency->fsb_freq && mem == latency->mem_freq) + return latency; } - if (i >= ARRAY_SIZE(cxsr_latency_table)) { - DRM_DEBUG("Unknown FSB/MEM found, disable CxSR\n"); - return NULL; - } - return latency; + + DRM_DEBUG("Unknown FSB/MEM found, disable CxSR\n"); + + return NULL; } static void igd_disable_cxsr(struct drm_device *dev) @@ -2084,32 +2363,36 @@ static void igd_enable_cxsr(struct drm_device *dev, unsigned long clock, */ const static int latency_ns = 5000; -static int intel_get_fifo_size(struct drm_device *dev, int plane) +static int i9xx_get_fifo_size(struct drm_device *dev, int plane) { struct drm_i915_private *dev_priv = dev->dev_private; uint32_t dsparb = I915_READ(DSPARB); int size; - if (IS_I9XX(dev)) { - if (plane == 0) - size = dsparb & 0x7f; - else - size = ((dsparb >> DSPARB_CSTART_SHIFT) & 0x7f) - - (dsparb & 0x7f); - } else if (IS_I85X(dev)) { - if (plane == 0) - size = dsparb & 0x1ff; - else - size = ((dsparb >> DSPARB_BEND_SHIFT) & 0x1ff) - - (dsparb & 0x1ff); - size >>= 1; /* Convert to cachelines */ - } else if (IS_845G(dev)) { + if (plane == 0) size = dsparb & 0x7f; - size >>= 2; /* Convert to cachelines */ - } else { - size = dsparb & 0x7f; - size >>= 1; /* Convert to cachelines */ - } + else + size = ((dsparb >> DSPARB_CSTART_SHIFT) & 0x7f) - + (dsparb & 0x7f); + + DRM_DEBUG("FIFO size - (0x%08x) %s: %d\n", dsparb, plane ? "B" : "A", + size); + + return size; +} + +static int i85x_get_fifo_size(struct drm_device *dev, int plane) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + uint32_t dsparb = I915_READ(DSPARB); + int size; + + if (plane == 0) + size = dsparb & 0x1ff; + else + size = ((dsparb >> DSPARB_BEND_SHIFT) & 0x1ff) - + (dsparb & 0x1ff); + size >>= 1; /* Convert to cachelines */ DRM_DEBUG("FIFO size - (0x%08x) %s: %d\n", dsparb, plane ? "B" : "A", size); @@ -2117,7 +2400,38 @@ static int intel_get_fifo_size(struct drm_device *dev, int plane) return size; } -static void g4x_update_wm(struct drm_device *dev) +static int i845_get_fifo_size(struct drm_device *dev, int plane) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + uint32_t dsparb = I915_READ(DSPARB); + int size; + + size = dsparb & 0x7f; + size >>= 2; /* Convert to cachelines */ + + DRM_DEBUG("FIFO size - (0x%08x) %s: %d\n", dsparb, plane ? "B" : "A", + size); + + return size; +} + +static int i830_get_fifo_size(struct drm_device *dev, int plane) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + uint32_t dsparb = I915_READ(DSPARB); + int size; + + size = dsparb & 0x7f; + size >>= 1; /* Convert to cachelines */ + + DRM_DEBUG("FIFO size - (0x%08x) %s: %d\n", dsparb, plane ? "B" : "A", + size); + + return size; +} + +static void g4x_update_wm(struct drm_device *dev, int unused, int unused2, + int unused3, int unused4) { struct drm_i915_private *dev_priv = dev->dev_private; u32 fw_blc_self = I915_READ(FW_BLC_SELF); @@ -2129,7 +2443,8 @@ static void g4x_update_wm(struct drm_device *dev) I915_WRITE(FW_BLC_SELF, fw_blc_self); } -static void i965_update_wm(struct drm_device *dev) +static void i965_update_wm(struct drm_device *dev, int unused, int unused2, + int unused3, int unused4) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -2165,8 +2480,8 @@ static void i9xx_update_wm(struct drm_device *dev, int planea_clock, cacheline_size = planea_params.cacheline_size; /* Update per-plane FIFO sizes */ - planea_params.fifo_size = intel_get_fifo_size(dev, 0); - planeb_params.fifo_size = intel_get_fifo_size(dev, 1); + planea_params.fifo_size = dev_priv->display.get_fifo_size(dev, 0); + planeb_params.fifo_size = dev_priv->display.get_fifo_size(dev, 1); planea_wm = intel_calculate_wm(planea_clock, &planea_params, pixel_size, latency_ns); @@ -2213,14 +2528,14 @@ static void i9xx_update_wm(struct drm_device *dev, int planea_clock, I915_WRITE(FW_BLC2, fwater_hi); } -static void i830_update_wm(struct drm_device *dev, int planea_clock, - int pixel_size) +static void i830_update_wm(struct drm_device *dev, int planea_clock, int unused, + int unused2, int pixel_size) { struct drm_i915_private *dev_priv = dev->dev_private; uint32_t fwater_lo = I915_READ(FW_BLC) & ~0xfff; int planea_wm; - i830_wm_info.fifo_size = intel_get_fifo_size(dev, 0); + i830_wm_info.fifo_size = dev_priv->display.get_fifo_size(dev, 0); planea_wm = intel_calculate_wm(planea_clock, &i830_wm_info, pixel_size, latency_ns); @@ -2264,6 +2579,7 @@ static void i830_update_wm(struct drm_device *dev, int planea_clock, */ static void intel_update_watermarks(struct drm_device *dev) { + struct drm_i915_private *dev_priv = dev->dev_private; struct drm_crtc *crtc; struct intel_crtc *intel_crtc; int sr_hdisplay = 0; @@ -2302,15 +2618,8 @@ static void intel_update_watermarks(struct drm_device *dev) else if (IS_IGD(dev)) igd_disable_cxsr(dev); - if (IS_G4X(dev)) - g4x_update_wm(dev); - else if (IS_I965G(dev)) - i965_update_wm(dev); - else if (IS_I9XX(dev) || IS_MOBILE(dev)) - i9xx_update_wm(dev, planea_clock, planeb_clock, sr_hdisplay, - pixel_size); - else - i830_update_wm(dev, planea_clock, pixel_size); + dev_priv->display.update_wm(dev, planea_clock, planeb_clock, + sr_hdisplay, pixel_size); } static int intel_crtc_mode_set(struct drm_crtc *crtc, @@ -2323,10 +2632,11 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc, struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); int pipe = intel_crtc->pipe; + int plane = intel_crtc->plane; int fp_reg = (pipe == 0) ? FPA0 : FPB0; int dpll_reg = (pipe == 0) ? DPLL_A : DPLL_B; int dpll_md_reg = (intel_crtc->pipe == 0) ? DPLL_A_MD : DPLL_B_MD; - int dspcntr_reg = (pipe == 0) ? DSPACNTR : DSPBCNTR; + int dspcntr_reg = (plane == 0) ? DSPACNTR : DSPBCNTR; int pipeconf_reg = (pipe == 0) ? PIPEACONF : PIPEBCONF; int htot_reg = (pipe == 0) ? HTOTAL_A : HTOTAL_B; int hblank_reg = (pipe == 0) ? HBLANK_A : HBLANK_B; @@ -2334,8 +2644,8 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc, int vtot_reg = (pipe == 0) ? VTOTAL_A : VTOTAL_B; int vblank_reg = (pipe == 0) ? VBLANK_A : VBLANK_B; int vsync_reg = (pipe == 0) ? VSYNC_A : VSYNC_B; - int dspsize_reg = (pipe == 0) ? DSPASIZE : DSPBSIZE; - int dsppos_reg = (pipe == 0) ? DSPAPOS : DSPBPOS; + int dspsize_reg = (plane == 0) ? DSPASIZE : DSPBSIZE; + int dsppos_reg = (plane == 0) ? DSPAPOS : DSPBPOS; int pipesrc_reg = (pipe == 0) ? PIPEASRC : PIPEBSRC; int refclk, num_outputs = 0; intel_clock_t clock, reduced_clock; @@ -2568,7 +2878,7 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc, enable color space conversion */ if (!IS_IGDNG(dev)) { if (pipe == 0) - dspcntr |= DISPPLANE_SEL_PIPE_A; + dspcntr &= ~DISPPLANE_SEL_PIPE_MASK; else dspcntr |= DISPPLANE_SEL_PIPE_B; } @@ -2580,7 +2890,8 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc, * XXX: No double-wide on 915GM pipe B. Is that the only reason for the * pipe == 0 check? */ - if (mode->clock > intel_get_core_clock_speed(dev) * 9 / 10) + if (mode->clock > + dev_priv->display.get_display_clock_speed(dev) * 9 / 10) pipeconf |= PIPEACONF_DOUBLE_WIDE; else pipeconf &= ~PIPEACONF_DOUBLE_WIDE; @@ -2652,9 +2963,12 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc, udelay(150); if (IS_I965G(dev) && !IS_IGDNG(dev)) { - sdvo_pixel_multiply = adjusted_mode->clock / mode->clock; - I915_WRITE(dpll_md_reg, (0 << DPLL_MD_UDI_DIVIDER_SHIFT) | + if (is_sdvo) { + sdvo_pixel_multiply = adjusted_mode->clock / mode->clock; + I915_WRITE(dpll_md_reg, (0 << DPLL_MD_UDI_DIVIDER_SHIFT) | ((sdvo_pixel_multiply - 1) << DPLL_MD_UDI_MULTIPLIER_SHIFT)); + } else + I915_WRITE(dpll_md_reg, 0); } else { /* write it again -- the BIOS does, after all */ I915_WRITE(dpll_reg, dpll); @@ -2734,6 +3048,9 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc, /* Flush the plane changes */ ret = intel_pipe_set_base(crtc, x, y, old_fb); + if ((IS_I965G(dev) || plane == 0)) + intel_update_fbc(crtc, &crtc->mode); + intel_update_watermarks(dev); drm_vblank_post_modeset(dev, pipe); @@ -2778,6 +3095,7 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc, struct drm_gem_object *bo; struct drm_i915_gem_object *obj_priv; int pipe = intel_crtc->pipe; + int plane = intel_crtc->plane; uint32_t control = (pipe == 0) ? CURACNTR : CURBCNTR; uint32_t base = (pipe == 0) ? CURABASE : CURBBASE; uint32_t temp = I915_READ(control); @@ -2863,6 +3181,10 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc, i915_gem_object_unpin(intel_crtc->cursor_bo); drm_gem_object_unreference(intel_crtc->cursor_bo); } + + if ((IS_I965G(dev) || plane == 0)) + intel_update_fbc(crtc, &crtc->mode); + mutex_unlock(&dev->struct_mutex); intel_crtc->cursor_addr = addr; @@ -3544,6 +3866,14 @@ static void intel_crtc_init(struct drm_device *dev, int pipe) intel_crtc->lut_b[i] = i; } + /* Swap pipes & planes for FBC on pre-965 */ + intel_crtc->pipe = pipe; + intel_crtc->plane = pipe; + if (IS_MOBILE(dev) && (IS_I9XX(dev) && !IS_I965G(dev))) { + DRM_DEBUG("swapping pipes & planes for FBC\n"); + intel_crtc->plane = ((pipe == 0) ? 1 : 0); + } + intel_crtc->cursor_addr = 0; intel_crtc->dpms_mode = DRM_MODE_DPMS_OFF; drm_crtc_helper_add(&intel_crtc->base, &intel_helper_funcs); @@ -3826,6 +4156,73 @@ void intel_init_clock_gating(struct drm_device *dev) } } +/* Set up chip specific display functions */ +static void intel_init_display(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + + /* We always want a DPMS function */ + if (IS_IGDNG(dev)) + dev_priv->display.dpms = igdng_crtc_dpms; + else + dev_priv->display.dpms = i9xx_crtc_dpms; + + /* Only mobile has FBC, leave pointers NULL for other chips */ + if (IS_MOBILE(dev)) { + if (IS_GM45(dev)) { + dev_priv->display.fbc_enabled = g4x_fbc_enabled; + dev_priv->display.enable_fbc = g4x_enable_fbc; + dev_priv->display.disable_fbc = g4x_disable_fbc; + } else if (IS_I965GM(dev) || IS_I945GM(dev) || IS_I915GM(dev)) { + dev_priv->display.fbc_enabled = i8xx_fbc_enabled; + dev_priv->display.enable_fbc = i8xx_enable_fbc; + dev_priv->display.disable_fbc = i8xx_disable_fbc; + } + /* 855GM needs testing */ + } + + /* Returns the core display clock speed */ + if (IS_I945G(dev)) + dev_priv->display.get_display_clock_speed = + i945_get_display_clock_speed; + else if (IS_I915G(dev)) + dev_priv->display.get_display_clock_speed = + i915_get_display_clock_speed; + else if (IS_I945GM(dev) || IS_845G(dev) || IS_IGDGM(dev)) + dev_priv->display.get_display_clock_speed = + i9xx_misc_get_display_clock_speed; + else if (IS_I915GM(dev)) + dev_priv->display.get_display_clock_speed = + i915gm_get_display_clock_speed; + else if (IS_I865G(dev)) + dev_priv->display.get_display_clock_speed = + i865_get_display_clock_speed; + else if (IS_I855(dev)) + dev_priv->display.get_display_clock_speed = + i855_get_display_clock_speed; + else /* 852, 830 */ + dev_priv->display.get_display_clock_speed = + i830_get_display_clock_speed; + + /* For FIFO watermark updates */ + if (IS_G4X(dev)) + dev_priv->display.update_wm = g4x_update_wm; + else if (IS_I965G(dev)) + dev_priv->display.update_wm = i965_update_wm; + else if (IS_I9XX(dev) || IS_MOBILE(dev)) { + dev_priv->display.update_wm = i9xx_update_wm; + dev_priv->display.get_fifo_size = i9xx_get_fifo_size; + } else { + if (IS_I85X(dev)) + dev_priv->display.get_fifo_size = i85x_get_fifo_size; + else if (IS_845G(dev)) + dev_priv->display.get_fifo_size = i845_get_fifo_size; + else + dev_priv->display.get_fifo_size = i830_get_fifo_size; + dev_priv->display.update_wm = i830_update_wm; + } +} + void intel_modeset_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -3839,6 +4236,8 @@ void intel_modeset_init(struct drm_device *dev) dev->mode_config.funcs = (void *)&intel_mode_funcs; + intel_init_display(dev); + if (IS_I965G(dev)) { dev->mode_config.max_width = 8192; dev->mode_config.max_height = 8192; @@ -3904,6 +4303,9 @@ void intel_modeset_cleanup(struct drm_device *dev) mutex_unlock(&dev->struct_mutex); + if (dev_priv->display.disable_fbc) + dev_priv->display.disable_fbc(dev); + drm_mode_config_cleanup(dev); } diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 3ebbbabfe59..8aa4b7f30da 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -28,6 +28,7 @@ #include <linux/i2c.h> #include <linux/i2c-id.h> #include <linux/i2c-algo-bit.h> +#include "i915_drv.h" #include "drm_crtc.h" #include "drm_crtc_helper.h" @@ -111,8 +112,8 @@ struct intel_output { struct intel_crtc { struct drm_crtc base; - int pipe; - int plane; + enum pipe pipe; + enum plane plane; struct drm_gem_object *cursor_bo; uint32_t cursor_addr; u8 lut_r[256], lut_g[256], lut_b[256]; diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index dafc0da1c25..98ae3d73577 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -27,6 +27,7 @@ * Jesse Barnes <jesse.barnes@intel.com> */ +#include <acpi/button.h> #include <linux/dmi.h> #include <linux/i2c.h> #include "drmP.h" @@ -295,6 +296,10 @@ static bool intel_lvds_mode_fixup(struct drm_encoder *encoder, goto out; } + /* full screen scale for now */ + if (IS_IGDNG(dev)) + goto out; + /* 965+ wants fuzzy fitting */ if (IS_I965G(dev)) pfit_control |= (intel_crtc->pipe << PFIT_PIPE_SHIFT) | @@ -322,8 +327,10 @@ static bool intel_lvds_mode_fixup(struct drm_encoder *encoder, * to register description and PRM. * Change the value here to see the borders for debugging */ - I915_WRITE(BCLRPAT_A, 0); - I915_WRITE(BCLRPAT_B, 0); + if (!IS_IGDNG(dev)) { + I915_WRITE(BCLRPAT_A, 0); + I915_WRITE(BCLRPAT_B, 0); + } switch (lvds_priv->fitting_mode) { case DRM_MODE_SCALE_CENTER: @@ -572,7 +579,6 @@ static void intel_lvds_mode_set(struct drm_encoder *encoder, * settings. */ - /* No panel fitting yet, fixme */ if (IS_IGDNG(dev)) return; @@ -585,15 +591,33 @@ static void intel_lvds_mode_set(struct drm_encoder *encoder, I915_WRITE(PFIT_CONTROL, lvds_priv->pfit_control); } +/* Some lid devices report incorrect lid status, assume they're connected */ +static const struct dmi_system_id bad_lid_status[] = { + { + .ident = "Aspire One", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_NAME, "Aspire one"), + }, + }, + { } +}; + /** * Detect the LVDS connection. * - * This always returns CONNECTOR_STATUS_CONNECTED. This connector should only have - * been set up if the LVDS was actually connected anyway. + * Since LVDS doesn't have hotlug, we use the lid as a proxy. Open means + * connected and closed means disconnected. We also send hotplug events as + * needed, using lid status notification from the input layer. */ static enum drm_connector_status intel_lvds_detect(struct drm_connector *connector) { - return connector_status_connected; + enum drm_connector_status status = connector_status_connected; + + if (!acpi_lid_open() && !dmi_check_system(bad_lid_status)) + status = connector_status_disconnected; + + return status; } /** @@ -632,6 +656,24 @@ static int intel_lvds_get_modes(struct drm_connector *connector) return 0; } +static int intel_lid_notify(struct notifier_block *nb, unsigned long val, + void *unused) +{ + struct drm_i915_private *dev_priv = + container_of(nb, struct drm_i915_private, lid_notifier); + struct drm_device *dev = dev_priv->dev; + + if (acpi_lid_open() && !dev_priv->suspended) { + mutex_lock(&dev->mode_config.mutex); + drm_helper_resume_force_mode(dev); + mutex_unlock(&dev->mode_config.mutex); + } + + drm_sysfs_hotplug_event(dev_priv->dev); + + return NOTIFY_OK; +} + /** * intel_lvds_destroy - unregister and free LVDS structures * @connector: connector to free @@ -641,10 +683,14 @@ static int intel_lvds_get_modes(struct drm_connector *connector) */ static void intel_lvds_destroy(struct drm_connector *connector) { + struct drm_device *dev = connector->dev; struct intel_output *intel_output = to_intel_output(connector); + struct drm_i915_private *dev_priv = dev->dev_private; if (intel_output->ddc_bus) intel_i2c_destroy(intel_output->ddc_bus); + if (dev_priv->lid_notifier.notifier_call) + acpi_lid_notifier_unregister(&dev_priv->lid_notifier); drm_sysfs_connector_remove(connector); drm_connector_cleanup(connector); kfree(connector); @@ -1011,6 +1057,11 @@ out: pwm |= PWM_PCH_ENABLE; I915_WRITE(BLC_PWM_PCH_CTL1, pwm); } + dev_priv->lid_notifier.notifier_call = intel_lid_notify; + if (acpi_lid_notifier_register(&dev_priv->lid_notifier)) { + DRM_DEBUG("lid notifier registration failed\n"); + dev_priv->lid_notifier.notifier_call = NULL; + } drm_sysfs_connector_add(connector); return; diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index 0bf28efcf2c..083bec2e50f 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -135,6 +135,30 @@ struct intel_sdvo_priv { struct intel_sdvo_dtd save_input_dtd_1, save_input_dtd_2; struct intel_sdvo_dtd save_output_dtd[16]; u32 save_SDVOX; + /* add the property for the SDVO-TV */ + struct drm_property *left_property; + struct drm_property *right_property; + struct drm_property *top_property; + struct drm_property *bottom_property; + struct drm_property *hpos_property; + struct drm_property *vpos_property; + + /* add the property for the SDVO-TV/LVDS */ + struct drm_property *brightness_property; + struct drm_property *contrast_property; + struct drm_property *saturation_property; + struct drm_property *hue_property; + + /* Add variable to record current setting for the above property */ + u32 left_margin, right_margin, top_margin, bottom_margin; + /* this is to get the range of margin.*/ + u32 max_hscan, max_vscan; + u32 max_hpos, cur_hpos; + u32 max_vpos, cur_vpos; + u32 cur_brightness, max_brightness; + u32 cur_contrast, max_contrast; + u32 cur_saturation, max_saturation; + u32 cur_hue, max_hue; }; static bool @@ -281,6 +305,31 @@ static const struct _sdvo_cmd_name { SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SDTV_RESOLUTION_SUPPORT), SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SCALED_HDTV_RESOLUTION_SUPPORT), SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SUPPORTED_ENHANCEMENTS), + /* Add the op code for SDVO enhancements */ + SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_POSITION_H), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_POSITION_H), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_POSITION_H), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_POSITION_V), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_POSITION_V), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_POSITION_V), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_SATURATION), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SATURATION), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_SATURATION), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_HUE), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_HUE), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_HUE), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_CONTRAST), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_CONTRAST), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_CONTRAST), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_BRIGHTNESS), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_BRIGHTNESS), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_BRIGHTNESS), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_OVERSCAN_H), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_OVERSCAN_H), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_OVERSCAN_H), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_OVERSCAN_V), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_OVERSCAN_V), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_OVERSCAN_V), /* HDMI op code */ SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SUPP_ENCODE), SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_ENCODE), @@ -981,7 +1030,7 @@ static void intel_sdvo_set_tv_format(struct intel_output *output) status = intel_sdvo_read_response(output, NULL, 0); if (status != SDVO_CMD_STATUS_SUCCESS) - DRM_DEBUG("%s: Failed to set TV format\n", + DRM_DEBUG_KMS("%s: Failed to set TV format\n", SDVO_NAME(sdvo_priv)); } @@ -1792,6 +1841,45 @@ static int intel_sdvo_get_modes(struct drm_connector *connector) return 1; } +static +void intel_sdvo_destroy_enhance_property(struct drm_connector *connector) +{ + struct intel_output *intel_output = to_intel_output(connector); + struct intel_sdvo_priv *sdvo_priv = intel_output->dev_priv; + struct drm_device *dev = connector->dev; + + if (sdvo_priv->is_tv) { + if (sdvo_priv->left_property) + drm_property_destroy(dev, sdvo_priv->left_property); + if (sdvo_priv->right_property) + drm_property_destroy(dev, sdvo_priv->right_property); + if (sdvo_priv->top_property) + drm_property_destroy(dev, sdvo_priv->top_property); + if (sdvo_priv->bottom_property) + drm_property_destroy(dev, sdvo_priv->bottom_property); + if (sdvo_priv->hpos_property) + drm_property_destroy(dev, sdvo_priv->hpos_property); + if (sdvo_priv->vpos_property) + drm_property_destroy(dev, sdvo_priv->vpos_property); + } + if (sdvo_priv->is_tv) { + if (sdvo_priv->saturation_property) + drm_property_destroy(dev, + sdvo_priv->saturation_property); + if (sdvo_priv->contrast_property) + drm_property_destroy(dev, + sdvo_priv->contrast_property); + if (sdvo_priv->hue_property) + drm_property_destroy(dev, sdvo_priv->hue_property); + } + if (sdvo_priv->is_tv || sdvo_priv->is_lvds) { + if (sdvo_priv->brightness_property) + drm_property_destroy(dev, + sdvo_priv->brightness_property); + } + return; +} + static void intel_sdvo_destroy(struct drm_connector *connector) { struct intel_output *intel_output = to_intel_output(connector); @@ -1812,6 +1900,9 @@ static void intel_sdvo_destroy(struct drm_connector *connector) drm_property_destroy(connector->dev, sdvo_priv->tv_format_property); + if (sdvo_priv->is_tv || sdvo_priv->is_lvds) + intel_sdvo_destroy_enhance_property(connector); + drm_sysfs_connector_remove(connector); drm_connector_cleanup(connector); @@ -1829,6 +1920,8 @@ intel_sdvo_set_property(struct drm_connector *connector, struct drm_crtc *crtc = encoder->crtc; int ret = 0; bool changed = false; + uint8_t cmd, status; + uint16_t temp_value; ret = drm_connector_property_set_value(connector, property, val); if (ret < 0) @@ -1845,11 +1938,102 @@ intel_sdvo_set_property(struct drm_connector *connector, sdvo_priv->tv_format_name = sdvo_priv->tv_format_supported[val]; changed = true; - } else { - ret = -EINVAL; - goto out; } + if (sdvo_priv->is_tv || sdvo_priv->is_lvds) { + cmd = 0; + temp_value = val; + if (sdvo_priv->left_property == property) { + drm_connector_property_set_value(connector, + sdvo_priv->right_property, val); + if (sdvo_priv->left_margin == temp_value) + goto out; + + sdvo_priv->left_margin = temp_value; + sdvo_priv->right_margin = temp_value; + temp_value = sdvo_priv->max_hscan - + sdvo_priv->left_margin; + cmd = SDVO_CMD_SET_OVERSCAN_H; + } else if (sdvo_priv->right_property == property) { + drm_connector_property_set_value(connector, + sdvo_priv->left_property, val); + if (sdvo_priv->right_margin == temp_value) + goto out; + + sdvo_priv->left_margin = temp_value; + sdvo_priv->right_margin = temp_value; + temp_value = sdvo_priv->max_hscan - + sdvo_priv->left_margin; + cmd = SDVO_CMD_SET_OVERSCAN_H; + } else if (sdvo_priv->top_property == property) { + drm_connector_property_set_value(connector, + sdvo_priv->bottom_property, val); + if (sdvo_priv->top_margin == temp_value) + goto out; + + sdvo_priv->top_margin = temp_value; + sdvo_priv->bottom_margin = temp_value; + temp_value = sdvo_priv->max_vscan - + sdvo_priv->top_margin; + cmd = SDVO_CMD_SET_OVERSCAN_V; + } else if (sdvo_priv->bottom_property == property) { + drm_connector_property_set_value(connector, + sdvo_priv->top_property, val); + if (sdvo_priv->bottom_margin == temp_value) + goto out; + sdvo_priv->top_margin = temp_value; + sdvo_priv->bottom_margin = temp_value; + temp_value = sdvo_priv->max_vscan - + sdvo_priv->top_margin; + cmd = SDVO_CMD_SET_OVERSCAN_V; + } else if (sdvo_priv->hpos_property == property) { + if (sdvo_priv->cur_hpos == temp_value) + goto out; + + cmd = SDVO_CMD_SET_POSITION_H; + sdvo_priv->cur_hpos = temp_value; + } else if (sdvo_priv->vpos_property == property) { + if (sdvo_priv->cur_vpos == temp_value) + goto out; + + cmd = SDVO_CMD_SET_POSITION_V; + sdvo_priv->cur_vpos = temp_value; + } else if (sdvo_priv->saturation_property == property) { + if (sdvo_priv->cur_saturation == temp_value) + goto out; + + cmd = SDVO_CMD_SET_SATURATION; + sdvo_priv->cur_saturation = temp_value; + } else if (sdvo_priv->contrast_property == property) { + if (sdvo_priv->cur_contrast == temp_value) + goto out; + + cmd = SDVO_CMD_SET_CONTRAST; + sdvo_priv->cur_contrast = temp_value; + } else if (sdvo_priv->hue_property == property) { + if (sdvo_priv->cur_hue == temp_value) + goto out; + + cmd = SDVO_CMD_SET_HUE; + sdvo_priv->cur_hue = temp_value; + } else if (sdvo_priv->brightness_property == property) { + if (sdvo_priv->cur_brightness == temp_value) + goto out; + + cmd = SDVO_CMD_SET_BRIGHTNESS; + sdvo_priv->cur_brightness = temp_value; + } + if (cmd) { + intel_sdvo_write_cmd(intel_output, cmd, &temp_value, 2); + status = intel_sdvo_read_response(intel_output, + NULL, 0); + if (status != SDVO_CMD_STATUS_SUCCESS) { + DRM_DEBUG_KMS("Incorrect SDVO command \n"); + return -EINVAL; + } + changed = true; + } + } if (changed && crtc) drm_crtc_helper_set_mode(crtc, &crtc->mode, crtc->x, crtc->y, crtc->fb); @@ -2090,6 +2274,8 @@ intel_sdvo_output_setup(struct intel_output *intel_output, uint16_t flags) sdvo_priv->controlled_output = SDVO_OUTPUT_RGB1; encoder->encoder_type = DRM_MODE_ENCODER_DAC; connector->connector_type = DRM_MODE_CONNECTOR_VGA; + intel_output->clone_mask = (1 << INTEL_SDVO_NON_TV_CLONE_BIT) | + (1 << INTEL_ANALOG_CLONE_BIT); } else if (flags & SDVO_OUTPUT_LVDS0) { sdvo_priv->controlled_output = SDVO_OUTPUT_LVDS0; @@ -2176,6 +2362,310 @@ static void intel_sdvo_tv_create_property(struct drm_connector *connector) } +static void intel_sdvo_create_enhance_property(struct drm_connector *connector) +{ + struct intel_output *intel_output = to_intel_output(connector); + struct intel_sdvo_priv *sdvo_priv = intel_output->dev_priv; + struct intel_sdvo_enhancements_reply sdvo_data; + struct drm_device *dev = connector->dev; + uint8_t status; + uint16_t response, data_value[2]; + + intel_sdvo_write_cmd(intel_output, SDVO_CMD_GET_SUPPORTED_ENHANCEMENTS, + NULL, 0); + status = intel_sdvo_read_response(intel_output, &sdvo_data, + sizeof(sdvo_data)); + if (status != SDVO_CMD_STATUS_SUCCESS) { + DRM_DEBUG_KMS(" incorrect response is returned\n"); + return; + } + response = *((uint16_t *)&sdvo_data); + if (!response) { + DRM_DEBUG_KMS("No enhancement is supported\n"); + return; + } + if (sdvo_priv->is_tv) { + /* when horizontal overscan is supported, Add the left/right + * property + */ + if (sdvo_data.overscan_h) { + intel_sdvo_write_cmd(intel_output, + SDVO_CMD_GET_MAX_OVERSCAN_H, NULL, 0); + status = intel_sdvo_read_response(intel_output, + &data_value, 4); + if (status != SDVO_CMD_STATUS_SUCCESS) { + DRM_DEBUG_KMS("Incorrect SDVO max " + "h_overscan\n"); + return; + } + intel_sdvo_write_cmd(intel_output, + SDVO_CMD_GET_OVERSCAN_H, NULL, 0); + status = intel_sdvo_read_response(intel_output, + &response, 2); + if (status != SDVO_CMD_STATUS_SUCCESS) { + DRM_DEBUG_KMS("Incorrect SDVO h_overscan\n"); + return; + } + sdvo_priv->max_hscan = data_value[0]; + sdvo_priv->left_margin = data_value[0] - response; + sdvo_priv->right_margin = sdvo_priv->left_margin; + sdvo_priv->left_property = + drm_property_create(dev, DRM_MODE_PROP_RANGE, + "left_margin", 2); + sdvo_priv->left_property->values[0] = 0; + sdvo_priv->left_property->values[1] = data_value[0]; + drm_connector_attach_property(connector, + sdvo_priv->left_property, + sdvo_priv->left_margin); + sdvo_priv->right_property = + drm_property_create(dev, DRM_MODE_PROP_RANGE, + "right_margin", 2); + sdvo_priv->right_property->values[0] = 0; + sdvo_priv->right_property->values[1] = data_value[0]; + drm_connector_attach_property(connector, + sdvo_priv->right_property, + sdvo_priv->right_margin); + DRM_DEBUG_KMS("h_overscan: max %d, " + "default %d, current %d\n", + data_value[0], data_value[1], response); + } + if (sdvo_data.overscan_v) { + intel_sdvo_write_cmd(intel_output, + SDVO_CMD_GET_MAX_OVERSCAN_V, NULL, 0); + status = intel_sdvo_read_response(intel_output, + &data_value, 4); + if (status != SDVO_CMD_STATUS_SUCCESS) { + DRM_DEBUG_KMS("Incorrect SDVO max " + "v_overscan\n"); + return; + } + intel_sdvo_write_cmd(intel_output, + SDVO_CMD_GET_OVERSCAN_V, NULL, 0); + status = intel_sdvo_read_response(intel_output, + &response, 2); + if (status != SDVO_CMD_STATUS_SUCCESS) { + DRM_DEBUG_KMS("Incorrect SDVO v_overscan\n"); + return; + } + sdvo_priv->max_vscan = data_value[0]; + sdvo_priv->top_margin = data_value[0] - response; + sdvo_priv->bottom_margin = sdvo_priv->top_margin; + sdvo_priv->top_property = + drm_property_create(dev, DRM_MODE_PROP_RANGE, + "top_margin", 2); + sdvo_priv->top_property->values[0] = 0; + sdvo_priv->top_property->values[1] = data_value[0]; + drm_connector_attach_property(connector, + sdvo_priv->top_property, + sdvo_priv->top_margin); + sdvo_priv->bottom_property = + drm_property_create(dev, DRM_MODE_PROP_RANGE, + "bottom_margin", 2); + sdvo_priv->bottom_property->values[0] = 0; + sdvo_priv->bottom_property->values[1] = data_value[0]; + drm_connector_attach_property(connector, + sdvo_priv->bottom_property, + sdvo_priv->bottom_margin); + DRM_DEBUG_KMS("v_overscan: max %d, " + "default %d, current %d\n", + data_value[0], data_value[1], response); + } + if (sdvo_data.position_h) { + intel_sdvo_write_cmd(intel_output, + SDVO_CMD_GET_MAX_POSITION_H, NULL, 0); + status = intel_sdvo_read_response(intel_output, + &data_value, 4); + if (status != SDVO_CMD_STATUS_SUCCESS) { + DRM_DEBUG_KMS("Incorrect SDVO Max h_pos\n"); + return; + } + intel_sdvo_write_cmd(intel_output, + SDVO_CMD_GET_POSITION_H, NULL, 0); + status = intel_sdvo_read_response(intel_output, + &response, 2); + if (status != SDVO_CMD_STATUS_SUCCESS) { + DRM_DEBUG_KMS("Incorrect SDVO get h_postion\n"); + return; + } + sdvo_priv->max_hpos = data_value[0]; + sdvo_priv->cur_hpos = response; + sdvo_priv->hpos_property = + drm_property_create(dev, DRM_MODE_PROP_RANGE, + "hpos", 2); + sdvo_priv->hpos_property->values[0] = 0; + sdvo_priv->hpos_property->values[1] = data_value[0]; + drm_connector_attach_property(connector, + sdvo_priv->hpos_property, + sdvo_priv->cur_hpos); + DRM_DEBUG_KMS("h_position: max %d, " + "default %d, current %d\n", + data_value[0], data_value[1], response); + } + if (sdvo_data.position_v) { + intel_sdvo_write_cmd(intel_output, + SDVO_CMD_GET_MAX_POSITION_V, NULL, 0); + status = intel_sdvo_read_response(intel_output, + &data_value, 4); + if (status != SDVO_CMD_STATUS_SUCCESS) { + DRM_DEBUG_KMS("Incorrect SDVO Max v_pos\n"); + return; + } + intel_sdvo_write_cmd(intel_output, + SDVO_CMD_GET_POSITION_V, NULL, 0); + status = intel_sdvo_read_response(intel_output, + &response, 2); + if (status != SDVO_CMD_STATUS_SUCCESS) { + DRM_DEBUG_KMS("Incorrect SDVO get v_postion\n"); + return; + } + sdvo_priv->max_vpos = data_value[0]; + sdvo_priv->cur_vpos = response; + sdvo_priv->vpos_property = + drm_property_create(dev, DRM_MODE_PROP_RANGE, + "vpos", 2); + sdvo_priv->vpos_property->values[0] = 0; + sdvo_priv->vpos_property->values[1] = data_value[0]; + drm_connector_attach_property(connector, + sdvo_priv->vpos_property, + sdvo_priv->cur_vpos); + DRM_DEBUG_KMS("v_position: max %d, " + "default %d, current %d\n", + data_value[0], data_value[1], response); + } + } + if (sdvo_priv->is_tv) { + if (sdvo_data.saturation) { + intel_sdvo_write_cmd(intel_output, + SDVO_CMD_GET_MAX_SATURATION, NULL, 0); + status = intel_sdvo_read_response(intel_output, + &data_value, 4); + if (status != SDVO_CMD_STATUS_SUCCESS) { + DRM_DEBUG_KMS("Incorrect SDVO Max sat\n"); + return; + } + intel_sdvo_write_cmd(intel_output, + SDVO_CMD_GET_SATURATION, NULL, 0); + status = intel_sdvo_read_response(intel_output, + &response, 2); + if (status != SDVO_CMD_STATUS_SUCCESS) { + DRM_DEBUG_KMS("Incorrect SDVO get sat\n"); + return; + } + sdvo_priv->max_saturation = data_value[0]; + sdvo_priv->cur_saturation = response; + sdvo_priv->saturation_property = + drm_property_create(dev, DRM_MODE_PROP_RANGE, + "saturation", 2); + sdvo_priv->saturation_property->values[0] = 0; + sdvo_priv->saturation_property->values[1] = + data_value[0]; + drm_connector_attach_property(connector, + sdvo_priv->saturation_property, + sdvo_priv->cur_saturation); + DRM_DEBUG_KMS("saturation: max %d, " + "default %d, current %d\n", + data_value[0], data_value[1], response); + } + if (sdvo_data.contrast) { + intel_sdvo_write_cmd(intel_output, + SDVO_CMD_GET_MAX_CONTRAST, NULL, 0); + status = intel_sdvo_read_response(intel_output, + &data_value, 4); + if (status != SDVO_CMD_STATUS_SUCCESS) { + DRM_DEBUG_KMS("Incorrect SDVO Max contrast\n"); + return; + } + intel_sdvo_write_cmd(intel_output, + SDVO_CMD_GET_CONTRAST, NULL, 0); + status = intel_sdvo_read_response(intel_output, + &response, 2); + if (status != SDVO_CMD_STATUS_SUCCESS) { + DRM_DEBUG_KMS("Incorrect SDVO get contrast\n"); + return; + } + sdvo_priv->max_contrast = data_value[0]; + sdvo_priv->cur_contrast = response; + sdvo_priv->contrast_property = + drm_property_create(dev, DRM_MODE_PROP_RANGE, + "contrast", 2); + sdvo_priv->contrast_property->values[0] = 0; + sdvo_priv->contrast_property->values[1] = data_value[0]; + drm_connector_attach_property(connector, + sdvo_priv->contrast_property, + sdvo_priv->cur_contrast); + DRM_DEBUG_KMS("contrast: max %d, " + "default %d, current %d\n", + data_value[0], data_value[1], response); + } + if (sdvo_data.hue) { + intel_sdvo_write_cmd(intel_output, + SDVO_CMD_GET_MAX_HUE, NULL, 0); + status = intel_sdvo_read_response(intel_output, + &data_value, 4); + if (status != SDVO_CMD_STATUS_SUCCESS) { + DRM_DEBUG_KMS("Incorrect SDVO Max hue\n"); + return; + } + intel_sdvo_write_cmd(intel_output, + SDVO_CMD_GET_HUE, NULL, 0); + status = intel_sdvo_read_response(intel_output, + &response, 2); + if (status != SDVO_CMD_STATUS_SUCCESS) { + DRM_DEBUG_KMS("Incorrect SDVO get hue\n"); + return; + } + sdvo_priv->max_hue = data_value[0]; + sdvo_priv->cur_hue = response; + sdvo_priv->hue_property = + drm_property_create(dev, DRM_MODE_PROP_RANGE, + "hue", 2); + sdvo_priv->hue_property->values[0] = 0; + sdvo_priv->hue_property->values[1] = + data_value[0]; + drm_connector_attach_property(connector, + sdvo_priv->hue_property, + sdvo_priv->cur_hue); + DRM_DEBUG_KMS("hue: max %d, default %d, current %d\n", + data_value[0], data_value[1], response); + } + } + if (sdvo_priv->is_tv || sdvo_priv->is_lvds) { + if (sdvo_data.brightness) { + intel_sdvo_write_cmd(intel_output, + SDVO_CMD_GET_MAX_BRIGHTNESS, NULL, 0); + status = intel_sdvo_read_response(intel_output, + &data_value, 4); + if (status != SDVO_CMD_STATUS_SUCCESS) { + DRM_DEBUG_KMS("Incorrect SDVO Max bright\n"); + return; + } + intel_sdvo_write_cmd(intel_output, + SDVO_CMD_GET_BRIGHTNESS, NULL, 0); + status = intel_sdvo_read_response(intel_output, + &response, 2); + if (status != SDVO_CMD_STATUS_SUCCESS) { + DRM_DEBUG_KMS("Incorrect SDVO get brigh\n"); + return; + } + sdvo_priv->max_brightness = data_value[0]; + sdvo_priv->cur_brightness = response; + sdvo_priv->brightness_property = + drm_property_create(dev, DRM_MODE_PROP_RANGE, + "brightness", 2); + sdvo_priv->brightness_property->values[0] = 0; + sdvo_priv->brightness_property->values[1] = + data_value[0]; + drm_connector_attach_property(connector, + sdvo_priv->brightness_property, + sdvo_priv->cur_brightness); + DRM_DEBUG_KMS("brightness: max %d, " + "default %d, current %d\n", + data_value[0], data_value[1], response); + } + } + return; +} + bool intel_sdvo_init(struct drm_device *dev, int output_device) { struct drm_connector *connector; @@ -2264,6 +2754,10 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device) drm_mode_connector_attach_encoder(&intel_output->base, &intel_output->enc); if (sdvo_priv->is_tv) intel_sdvo_tv_create_property(connector); + + if (sdvo_priv->is_tv || sdvo_priv->is_lvds) + intel_sdvo_create_enhance_property(connector); + drm_sysfs_connector_add(connector); intel_sdvo_select_ddc_bus(sdvo_priv); diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index 6bedd2fcfc1..737335ff2b2 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -477,8 +477,8 @@ config I2C_PNX will be called i2c-pnx. config I2C_PXA - tristate "Intel PXA2XX I2C adapter (EXPERIMENTAL)" - depends on EXPERIMENTAL && ARCH_PXA + tristate "Intel PXA2XX I2C adapter" + depends on ARCH_PXA || ARCH_MMP help If you have devices in the PXA I2C bus, say yes to this option. This driver can also be built as a module. If so, the module diff --git a/drivers/idle/i7300_idle.c b/drivers/idle/i7300_idle.c index 949c97ff57e..1f20a042a4f 100644 --- a/drivers/idle/i7300_idle.c +++ b/drivers/idle/i7300_idle.c @@ -29,8 +29,8 @@ #include <asm/idle.h> -#include "../dma/ioatdma_hw.h" -#include "../dma/ioatdma_registers.h" +#include "../dma/ioat/hw.h" +#include "../dma/ioat/registers.h" #define I7300_IDLE_DRIVER_VERSION "1.55" #define I7300_PRINT "i7300_idle:" @@ -126,9 +126,9 @@ static void i7300_idle_ioat_stop(void) udelay(10); sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) & - IOAT_CHANSTS_DMA_TRANSFER_STATUS; + IOAT_CHANSTS_STATUS; - if (sts != IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE) + if (sts != IOAT_CHANSTS_ACTIVE) break; } @@ -160,9 +160,9 @@ static int __init i7300_idle_ioat_selftest(u8 *ctl, udelay(1000); chan_sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) & - IOAT_CHANSTS_DMA_TRANSFER_STATUS; + IOAT_CHANSTS_STATUS; - if (chan_sts != IOAT_CHANSTS_DMA_TRANSFER_STATUS_DONE) { + if (chan_sts != IOAT_CHANSTS_DONE) { /* Not complete, reset the channel */ writeb(IOAT_CHANCMD_RESET, ioat_chanbase + IOAT1_CHANCMD_OFFSET); @@ -288,9 +288,9 @@ static void __exit i7300_idle_ioat_exit(void) ioat_chanbase + IOAT1_CHANCMD_OFFSET); chan_sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) & - IOAT_CHANSTS_DMA_TRANSFER_STATUS; + IOAT_CHANSTS_STATUS; - if (chan_sts != IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE) { + if (chan_sts != IOAT_CHANSTS_ACTIVE) { writew(0, ioat_chanbase + IOAT_CHANCTRL_OFFSET); break; } @@ -298,14 +298,14 @@ static void __exit i7300_idle_ioat_exit(void) } chan_sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) & - IOAT_CHANSTS_DMA_TRANSFER_STATUS; + IOAT_CHANSTS_STATUS; /* * We tried to reset multiple times. If IO A/T channel is still active * flag an error and return without cleanup. Memory leak is better * than random corruption in that extreme error situation. */ - if (chan_sts == IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE) { + if (chan_sts == IOAT_CHANSTS_ACTIVE) { printk(KERN_ERR I7300_PRINT "Unable to stop IO A/T channels." " Not freeing resources\n"); return; diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c index 57a3c6f947b..4e0f2829e0e 100644 --- a/drivers/infiniband/core/mad_rmpp.c +++ b/drivers/infiniband/core/mad_rmpp.c @@ -37,7 +37,8 @@ enum rmpp_state { RMPP_STATE_ACTIVE, RMPP_STATE_TIMEOUT, - RMPP_STATE_COMPLETE + RMPP_STATE_COMPLETE, + RMPP_STATE_CANCELING }; struct mad_rmpp_recv { @@ -87,18 +88,22 @@ void ib_cancel_rmpp_recvs(struct ib_mad_agent_private *agent) spin_lock_irqsave(&agent->lock, flags); list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) { + if (rmpp_recv->state != RMPP_STATE_COMPLETE) + ib_free_recv_mad(rmpp_recv->rmpp_wc); + rmpp_recv->state = RMPP_STATE_CANCELING; + } + spin_unlock_irqrestore(&agent->lock, flags); + + list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) { cancel_delayed_work(&rmpp_recv->timeout_work); cancel_delayed_work(&rmpp_recv->cleanup_work); } - spin_unlock_irqrestore(&agent->lock, flags); flush_workqueue(agent->qp_info->port_priv->wq); list_for_each_entry_safe(rmpp_recv, temp_rmpp_recv, &agent->rmpp_list, list) { list_del(&rmpp_recv->list); - if (rmpp_recv->state != RMPP_STATE_COMPLETE) - ib_free_recv_mad(rmpp_recv->rmpp_wc); destroy_rmpp_recv(rmpp_recv); } } @@ -260,6 +265,10 @@ static void recv_cleanup_handler(struct work_struct *work) unsigned long flags; spin_lock_irqsave(&rmpp_recv->agent->lock, flags); + if (rmpp_recv->state == RMPP_STATE_CANCELING) { + spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags); + return; + } list_del(&rmpp_recv->list); spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags); destroy_rmpp_recv(rmpp_recv); diff --git a/drivers/infiniband/hw/mthca/mthca_catas.c b/drivers/infiniband/hw/mthca/mthca_catas.c index 056b2a4c697..0aa0110e4b6 100644 --- a/drivers/infiniband/hw/mthca/mthca_catas.c +++ b/drivers/infiniband/hw/mthca/mthca_catas.c @@ -68,11 +68,16 @@ static void catas_reset(struct work_struct *work) spin_unlock_irq(&catas_lock); list_for_each_entry_safe(dev, tmpdev, &tlist, catas_err.list) { + struct pci_dev *pdev = dev->pdev; ret = __mthca_restart_one(dev->pdev); + /* 'dev' now is not valid */ if (ret) - mthca_err(dev, "Reset failed (%d)\n", ret); - else - mthca_dbg(dev, "Reset succeeded\n"); + printk(KERN_ERR "mthca %s: Reset failed (%d)\n", + pci_name(pdev), ret); + else { + struct mthca_dev *d = pci_get_drvdata(pdev); + mthca_dbg(d, "Reset succeeded\n"); + } } mutex_unlock(&mthca_device_mutex); diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c index 538e409d451..e593af3354b 100644 --- a/drivers/infiniband/hw/nes/nes_nic.c +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -1566,7 +1566,6 @@ static const struct net_device_ops nes_netdev_ops = { .ndo_set_mac_address = nes_netdev_set_mac_address, .ndo_set_multicast_list = nes_netdev_set_multicast_list, .ndo_change_mtu = nes_netdev_change_mtu, - .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, .ndo_vlan_rx_register = nes_netdev_vlan_rx_register, }; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 25874fc680c..8763c1ea5eb 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -362,12 +362,19 @@ void ipoib_mcast_carrier_on_task(struct work_struct *work) { struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, carrier_on_task); + struct ib_port_attr attr; /* * Take rtnl_lock to avoid racing with ipoib_stop() and * turning the carrier back on while a device is being * removed. */ + if (ib_query_port(priv->ca, priv->port, &attr) || + attr.state != IB_PORT_ACTIVE) { + ipoib_dbg(priv, "Keeping carrier off until IB port is active\n"); + return; + } + rtnl_lock(); netif_carrier_on(priv->dev); rtnl_unlock(); diff --git a/drivers/input/misc/Kconfig b/drivers/input/misc/Kconfig index 76d6751f89a..02f4f8f1db6 100644 --- a/drivers/input/misc/Kconfig +++ b/drivers/input/misc/Kconfig @@ -225,6 +225,7 @@ config INPUT_SGI_BTNS config INPUT_WINBOND_CIR tristate "Winbond IR remote control" depends on X86 && PNP + select NEW_LEDS select LEDS_CLASS select BITREVERSE help diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 020f9573fd8..2158377a135 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -124,6 +124,8 @@ config MD_RAID456 select MD_RAID6_PQ select ASYNC_MEMCPY select ASYNC_XOR + select ASYNC_PQ + select ASYNC_RAID6_RECOV ---help--- A RAID-5 set of N drives with a capacity of C MB per drive provides the capacity of C * (N - 1) MB, and protects against a failure @@ -152,9 +154,33 @@ config MD_RAID456 If unsure, say Y. +config MULTICORE_RAID456 + bool "RAID-4/RAID-5/RAID-6 Multicore processing (EXPERIMENTAL)" + depends on MD_RAID456 + depends on SMP + depends on EXPERIMENTAL + ---help--- + Enable the raid456 module to dispatch per-stripe raid operations to a + thread pool. + + If unsure, say N. + config MD_RAID6_PQ tristate +config ASYNC_RAID6_TEST + tristate "Self test for hardware accelerated raid6 recovery" + depends on MD_RAID6_PQ + select ASYNC_RAID6_RECOV + ---help--- + This is a one-shot self test that permutes through the + recovery of all the possible two disk failure scenarios for a + N-disk array. Recovery is performed with the asynchronous + raid6 recovery routines, and will optionally use an offload + engine if one is available. + + If unsure, say N. + config MD_MULTIPATH tristate "Multipath I/O support" depends on BLK_DEV_MD diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 3319c2fec28..6986b0059d2 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -108,6 +108,8 @@ static void bitmap_free_page(struct bitmap *bitmap, unsigned char *page) * allocated while we're using it */ static int bitmap_checkpage(struct bitmap *bitmap, unsigned long page, int create) +__releases(bitmap->lock) +__acquires(bitmap->lock) { unsigned char *mappage; @@ -325,7 +327,6 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait) return 0; bad_alignment: - rcu_read_unlock(); return -EINVAL; } @@ -1207,6 +1208,8 @@ void bitmap_daemon_work(struct bitmap *bitmap) static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap, sector_t offset, int *blocks, int create) +__releases(bitmap->lock) +__acquires(bitmap->lock) { /* If 'create', we might release the lock and reclaim it. * The lock must have been taken with interrupts enabled. diff --git a/drivers/md/linear.c b/drivers/md/linear.c index ea484290544..1ceceb334d5 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -108,6 +108,9 @@ static int linear_congested(void *data, int bits) linear_conf_t *conf; int i, ret = 0; + if (mddev_congested(mddev, bits)) + return 1; + rcu_read_lock(); conf = rcu_dereference(mddev->private); diff --git a/drivers/md/md.c b/drivers/md/md.c index 6aa497e4baf..26ba42a7912 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -262,6 +262,12 @@ static void mddev_resume(mddev_t *mddev) mddev->pers->quiesce(mddev, 0); } +int mddev_congested(mddev_t *mddev, int bits) +{ + return mddev->suspended; +} +EXPORT_SYMBOL(mddev_congested); + static inline mddev_t *mddev_get(mddev_t *mddev) { @@ -4218,7 +4224,7 @@ static int do_md_run(mddev_t * mddev) set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); mddev->sync_thread = md_register_thread(md_do_sync, mddev, - "%s_resync"); + "resync"); if (!mddev->sync_thread) { printk(KERN_ERR "%s: could not start resync" " thread...\n", @@ -4575,10 +4581,10 @@ static int get_version(void __user * arg) static int get_array_info(mddev_t * mddev, void __user * arg) { mdu_array_info_t info; - int nr,working,active,failed,spare; + int nr,working,insync,failed,spare; mdk_rdev_t *rdev; - nr=working=active=failed=spare=0; + nr=working=insync=failed=spare=0; list_for_each_entry(rdev, &mddev->disks, same_set) { nr++; if (test_bit(Faulty, &rdev->flags)) @@ -4586,7 +4592,7 @@ static int get_array_info(mddev_t * mddev, void __user * arg) else { working++; if (test_bit(In_sync, &rdev->flags)) - active++; + insync++; else spare++; } @@ -4611,7 +4617,7 @@ static int get_array_info(mddev_t * mddev, void __user * arg) info.state = (1<<MD_SB_CLEAN); if (mddev->bitmap && mddev->bitmap_offset) info.state = (1<<MD_SB_BITMAP_PRESENT); - info.active_disks = active; + info.active_disks = insync; info.working_disks = working; info.failed_disks = failed; info.spare_disks = spare; @@ -4721,7 +4727,7 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info) if (!list_empty(&mddev->disks)) { mdk_rdev_t *rdev0 = list_entry(mddev->disks.next, mdk_rdev_t, same_set); - int err = super_types[mddev->major_version] + err = super_types[mddev->major_version] .load_super(rdev, rdev0, mddev->minor_version); if (err < 0) { printk(KERN_WARNING @@ -5631,7 +5637,10 @@ mdk_thread_t *md_register_thread(void (*run) (mddev_t *), mddev_t *mddev, thread->run = run; thread->mddev = mddev; thread->timeout = MAX_SCHEDULE_TIMEOUT; - thread->tsk = kthread_run(md_thread, thread, name, mdname(thread->mddev)); + thread->tsk = kthread_run(md_thread, thread, + "%s_%s", + mdname(thread->mddev), + name ?: mddev->pers->name); if (IS_ERR(thread->tsk)) { kfree(thread); return NULL; @@ -6745,7 +6754,7 @@ void md_check_recovery(mddev_t *mddev) } mddev->sync_thread = md_register_thread(md_do_sync, mddev, - "%s_resync"); + "resync"); if (!mddev->sync_thread) { printk(KERN_ERR "%s: could not start resync" " thread...\n", diff --git a/drivers/md/md.h b/drivers/md/md.h index f55d2ff9513..f184b69ef33 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -430,6 +430,7 @@ extern void md_write_end(mddev_t *mddev); extern void md_done_sync(mddev_t *mddev, int blocks, int ok); extern void md_error(mddev_t *mddev, mdk_rdev_t *rdev); +extern int mddev_congested(mddev_t *mddev, int bits); extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, sector_t sector, int size, struct page *page); extern void md_super_wait(mddev_t *mddev); diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index d2d3fd54cc6..ee7646f974a 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -150,7 +150,6 @@ static int multipath_make_request (struct request_queue *q, struct bio * bio) } mp_bh = mempool_alloc(conf->pool, GFP_NOIO); - memset(mp_bh, 0, sizeof(*mp_bh)); mp_bh->master_bio = bio; mp_bh->mddev = mddev; @@ -199,6 +198,9 @@ static int multipath_congested(void *data, int bits) multipath_conf_t *conf = mddev->private; int i, ret = 0; + if (mddev_congested(mddev, bits)) + return 1; + rcu_read_lock(); for (i = 0; i < mddev->raid_disks ; i++) { mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev); @@ -504,7 +506,7 @@ static int multipath_run (mddev_t *mddev) } { - mddev->thread = md_register_thread(multipathd, mddev, "%s_multipath"); + mddev->thread = md_register_thread(multipathd, mddev, NULL); if (!mddev->thread) { printk(KERN_ERR "multipath: couldn't allocate thread" " for %s\n", mdname(mddev)); diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index f845ed98fec..d3a4ce06015 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -44,6 +44,9 @@ static int raid0_congested(void *data, int bits) mdk_rdev_t **devlist = conf->devlist; int i, ret = 0; + if (mddev_congested(mddev, bits)) + return 1; + for (i = 0; i < mddev->raid_disks && !ret ; i++) { struct request_queue *q = bdev_get_queue(devlist[i]->bdev); @@ -86,7 +89,7 @@ static void dump_zones(mddev_t *mddev) static int create_strip_zones(mddev_t *mddev) { - int i, c, j, err; + int i, c, err; sector_t curr_zone_end, sectors; mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev, **dev; struct strip_zone *zone; @@ -198,6 +201,8 @@ static int create_strip_zones(mddev_t *mddev) /* now do the other zones */ for (i = 1; i < conf->nr_strip_zones; i++) { + int j; + zone = conf->strip_zone + i; dev = conf->devlist + i * mddev->raid_disks; @@ -207,7 +212,6 @@ static int create_strip_zones(mddev_t *mddev) c = 0; for (j=0; j<cnt; j++) { - char b[BDEVNAME_SIZE]; rdev = conf->devlist[j]; printk(KERN_INFO "raid0: checking %s ...", bdevname(rdev->bdev, b)); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index ff7ed333599..d1b9bd5fd4f 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -576,6 +576,9 @@ static int raid1_congested(void *data, int bits) conf_t *conf = mddev->private; int i, ret = 0; + if (mddev_congested(mddev, bits)) + return 1; + rcu_read_lock(); for (i = 0; i < mddev->raid_disks; i++) { mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev); @@ -851,7 +854,7 @@ static int make_request(struct request_queue *q, struct bio * bio) read_bio->bi_sector = r1_bio->sector + mirror->rdev->data_offset; read_bio->bi_bdev = mirror->rdev->bdev; read_bio->bi_end_io = raid1_end_read_request; - read_bio->bi_rw = READ | do_sync; + read_bio->bi_rw = READ | (do_sync << BIO_RW_SYNCIO); read_bio->bi_private = r1_bio; generic_make_request(read_bio); @@ -943,7 +946,8 @@ static int make_request(struct request_queue *q, struct bio * bio) mbio->bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset; mbio->bi_bdev = conf->mirrors[i].rdev->bdev; mbio->bi_end_io = raid1_end_write_request; - mbio->bi_rw = WRITE | do_barriers | do_sync; + mbio->bi_rw = WRITE | (do_barriers << BIO_RW_BARRIER) | + (do_sync << BIO_RW_SYNCIO); mbio->bi_private = r1_bio; if (behind_pages) { @@ -1623,7 +1627,8 @@ static void raid1d(mddev_t *mddev) conf->mirrors[i].rdev->data_offset; bio->bi_bdev = conf->mirrors[i].rdev->bdev; bio->bi_end_io = raid1_end_write_request; - bio->bi_rw = WRITE | do_sync; + bio->bi_rw = WRITE | + (do_sync << BIO_RW_SYNCIO); bio->bi_private = r1_bio; r1_bio->bios[i] = bio; generic_make_request(bio); @@ -1672,7 +1677,7 @@ static void raid1d(mddev_t *mddev) bio->bi_sector = r1_bio->sector + rdev->data_offset; bio->bi_bdev = rdev->bdev; bio->bi_end_io = raid1_end_read_request; - bio->bi_rw = READ | do_sync; + bio->bi_rw = READ | (do_sync << BIO_RW_SYNCIO); bio->bi_private = r1_bio; unplug = 1; generic_make_request(bio); @@ -2047,7 +2052,7 @@ static int run(mddev_t *mddev) conf->last_used = j; - mddev->thread = md_register_thread(raid1d, mddev, "%s_raid1"); + mddev->thread = md_register_thread(raid1d, mddev, NULL); if (!mddev->thread) { printk(KERN_ERR "raid1: couldn't allocate thread for %s\n", diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index d0a2152e064..51c4c5c4d87 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -631,6 +631,8 @@ static int raid10_congested(void *data, int bits) conf_t *conf = mddev->private; int i, ret = 0; + if (mddev_congested(mddev, bits)) + return 1; rcu_read_lock(); for (i = 0; i < mddev->raid_disks && ret == 0; i++) { mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev); @@ -882,7 +884,7 @@ static int make_request(struct request_queue *q, struct bio * bio) mirror->rdev->data_offset; read_bio->bi_bdev = mirror->rdev->bdev; read_bio->bi_end_io = raid10_end_read_request; - read_bio->bi_rw = READ | do_sync; + read_bio->bi_rw = READ | (do_sync << BIO_RW_SYNCIO); read_bio->bi_private = r10_bio; generic_make_request(read_bio); @@ -950,7 +952,7 @@ static int make_request(struct request_queue *q, struct bio * bio) conf->mirrors[d].rdev->data_offset; mbio->bi_bdev = conf->mirrors[d].rdev->bdev; mbio->bi_end_io = raid10_end_write_request; - mbio->bi_rw = WRITE | do_sync; + mbio->bi_rw = WRITE | (do_sync << BIO_RW_SYNCIO); mbio->bi_private = r10_bio; atomic_inc(&r10_bio->remaining); @@ -1623,7 +1625,7 @@ static void raid10d(mddev_t *mddev) bio->bi_sector = r10_bio->devs[r10_bio->read_slot].addr + rdev->data_offset; bio->bi_bdev = rdev->bdev; - bio->bi_rw = READ | do_sync; + bio->bi_rw = READ | (do_sync << BIO_RW_SYNCIO); bio->bi_private = r10_bio; bio->bi_end_io = raid10_end_read_request; unplug = 1; @@ -1773,7 +1775,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i max_sync = RESYNC_PAGES << (PAGE_SHIFT-9); if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { /* recovery... the complicated one */ - int i, j, k; + int j, k; r10_bio = NULL; for (i=0 ; i<conf->raid_disks; i++) @@ -2188,7 +2190,7 @@ static int run(mddev_t *mddev) } - mddev->thread = md_register_thread(raid10d, mddev, "%s_raid10"); + mddev->thread = md_register_thread(raid10d, mddev, NULL); if (!mddev->thread) { printk(KERN_ERR "raid10: couldn't allocate thread for %s\n", diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 826eb346735..94829804ab7 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -47,7 +47,9 @@ #include <linux/kthread.h> #include <linux/raid/pq.h> #include <linux/async_tx.h> +#include <linux/async.h> #include <linux/seq_file.h> +#include <linux/cpu.h> #include "md.h" #include "raid5.h" #include "bitmap.h" @@ -499,11 +501,18 @@ async_copy_data(int frombio, struct bio *bio, struct page *page, struct page *bio_page; int i; int page_offset; + struct async_submit_ctl submit; + enum async_tx_flags flags = 0; if (bio->bi_sector >= sector) page_offset = (signed)(bio->bi_sector - sector) * 512; else page_offset = (signed)(sector - bio->bi_sector) * -512; + + if (frombio) + flags |= ASYNC_TX_FENCE; + init_async_submit(&submit, flags, tx, NULL, NULL, NULL); + bio_for_each_segment(bvl, bio, i) { int len = bio_iovec_idx(bio, i)->bv_len; int clen; @@ -525,15 +534,14 @@ async_copy_data(int frombio, struct bio *bio, struct page *page, bio_page = bio_iovec_idx(bio, i)->bv_page; if (frombio) tx = async_memcpy(page, bio_page, page_offset, - b_offset, clen, - ASYNC_TX_DEP_ACK, - tx, NULL, NULL); + b_offset, clen, &submit); else tx = async_memcpy(bio_page, page, b_offset, - page_offset, clen, - ASYNC_TX_DEP_ACK, - tx, NULL, NULL); + page_offset, clen, &submit); } + /* chain the operations */ + submit.depend_tx = tx; + if (clen < len) /* hit end of page */ break; page_offset += len; @@ -592,6 +600,7 @@ static void ops_run_biofill(struct stripe_head *sh) { struct dma_async_tx_descriptor *tx = NULL; raid5_conf_t *conf = sh->raid_conf; + struct async_submit_ctl submit; int i; pr_debug("%s: stripe %llu\n", __func__, @@ -615,22 +624,34 @@ static void ops_run_biofill(struct stripe_head *sh) } atomic_inc(&sh->count); - async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx, - ops_complete_biofill, sh); + init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_biofill, sh, NULL); + async_trigger_callback(&submit); } -static void ops_complete_compute5(void *stripe_head_ref) +static void mark_target_uptodate(struct stripe_head *sh, int target) { - struct stripe_head *sh = stripe_head_ref; - int target = sh->ops.target; - struct r5dev *tgt = &sh->dev[target]; + struct r5dev *tgt; - pr_debug("%s: stripe %llu\n", __func__, - (unsigned long long)sh->sector); + if (target < 0) + return; + tgt = &sh->dev[target]; set_bit(R5_UPTODATE, &tgt->flags); BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags)); clear_bit(R5_Wantcompute, &tgt->flags); +} + +static void ops_complete_compute(void *stripe_head_ref) +{ + struct stripe_head *sh = stripe_head_ref; + + pr_debug("%s: stripe %llu\n", __func__, + (unsigned long long)sh->sector); + + /* mark the computed target(s) as uptodate */ + mark_target_uptodate(sh, sh->ops.target); + mark_target_uptodate(sh, sh->ops.target2); + clear_bit(STRIPE_COMPUTE_RUN, &sh->state); if (sh->check_state == check_state_compute_run) sh->check_state = check_state_compute_result; @@ -638,16 +659,24 @@ static void ops_complete_compute5(void *stripe_head_ref) release_stripe(sh); } -static struct dma_async_tx_descriptor *ops_run_compute5(struct stripe_head *sh) +/* return a pointer to the address conversion region of the scribble buffer */ +static addr_conv_t *to_addr_conv(struct stripe_head *sh, + struct raid5_percpu *percpu) +{ + return percpu->scribble + sizeof(struct page *) * (sh->disks + 2); +} + +static struct dma_async_tx_descriptor * +ops_run_compute5(struct stripe_head *sh, struct raid5_percpu *percpu) { - /* kernel stack size limits the total number of disks */ int disks = sh->disks; - struct page *xor_srcs[disks]; + struct page **xor_srcs = percpu->scribble; int target = sh->ops.target; struct r5dev *tgt = &sh->dev[target]; struct page *xor_dest = tgt->page; int count = 0; struct dma_async_tx_descriptor *tx; + struct async_submit_ctl submit; int i; pr_debug("%s: stripe %llu block: %d\n", @@ -660,17 +689,215 @@ static struct dma_async_tx_descriptor *ops_run_compute5(struct stripe_head *sh) atomic_inc(&sh->count); + init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, NULL, + ops_complete_compute, sh, to_addr_conv(sh, percpu)); if (unlikely(count == 1)) - tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, - 0, NULL, ops_complete_compute5, sh); + tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit); else - tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, - ASYNC_TX_XOR_ZERO_DST, NULL, - ops_complete_compute5, sh); + tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit); return tx; } +/* set_syndrome_sources - populate source buffers for gen_syndrome + * @srcs - (struct page *) array of size sh->disks + * @sh - stripe_head to parse + * + * Populates srcs in proper layout order for the stripe and returns the + * 'count' of sources to be used in a call to async_gen_syndrome. The P + * destination buffer is recorded in srcs[count] and the Q destination + * is recorded in srcs[count+1]]. + */ +static int set_syndrome_sources(struct page **srcs, struct stripe_head *sh) +{ + int disks = sh->disks; + int syndrome_disks = sh->ddf_layout ? disks : (disks - 2); + int d0_idx = raid6_d0(sh); + int count; + int i; + + for (i = 0; i < disks; i++) + srcs[i] = (void *)raid6_empty_zero_page; + + count = 0; + i = d0_idx; + do { + int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks); + + srcs[slot] = sh->dev[i].page; + i = raid6_next_disk(i, disks); + } while (i != d0_idx); + BUG_ON(count != syndrome_disks); + + return count; +} + +static struct dma_async_tx_descriptor * +ops_run_compute6_1(struct stripe_head *sh, struct raid5_percpu *percpu) +{ + int disks = sh->disks; + struct page **blocks = percpu->scribble; + int target; + int qd_idx = sh->qd_idx; + struct dma_async_tx_descriptor *tx; + struct async_submit_ctl submit; + struct r5dev *tgt; + struct page *dest; + int i; + int count; + + if (sh->ops.target < 0) + target = sh->ops.target2; + else if (sh->ops.target2 < 0) + target = sh->ops.target; + else + /* we should only have one valid target */ + BUG(); + BUG_ON(target < 0); + pr_debug("%s: stripe %llu block: %d\n", + __func__, (unsigned long long)sh->sector, target); + + tgt = &sh->dev[target]; + BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags)); + dest = tgt->page; + + atomic_inc(&sh->count); + + if (target == qd_idx) { + count = set_syndrome_sources(blocks, sh); + blocks[count] = NULL; /* regenerating p is not necessary */ + BUG_ON(blocks[count+1] != dest); /* q should already be set */ + init_async_submit(&submit, ASYNC_TX_FENCE, NULL, + ops_complete_compute, sh, + to_addr_conv(sh, percpu)); + tx = async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit); + } else { + /* Compute any data- or p-drive using XOR */ + count = 0; + for (i = disks; i-- ; ) { + if (i == target || i == qd_idx) + continue; + blocks[count++] = sh->dev[i].page; + } + + init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, + NULL, ops_complete_compute, sh, + to_addr_conv(sh, percpu)); + tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE, &submit); + } + + return tx; +} + +static struct dma_async_tx_descriptor * +ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu) +{ + int i, count, disks = sh->disks; + int syndrome_disks = sh->ddf_layout ? disks : disks-2; + int d0_idx = raid6_d0(sh); + int faila = -1, failb = -1; + int target = sh->ops.target; + int target2 = sh->ops.target2; + struct r5dev *tgt = &sh->dev[target]; + struct r5dev *tgt2 = &sh->dev[target2]; + struct dma_async_tx_descriptor *tx; + struct page **blocks = percpu->scribble; + struct async_submit_ctl submit; + + pr_debug("%s: stripe %llu block1: %d block2: %d\n", + __func__, (unsigned long long)sh->sector, target, target2); + BUG_ON(target < 0 || target2 < 0); + BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags)); + BUG_ON(!test_bit(R5_Wantcompute, &tgt2->flags)); + + /* we need to open-code set_syndrome_sources to handle the + * slot number conversion for 'faila' and 'failb' + */ + for (i = 0; i < disks ; i++) + blocks[i] = (void *)raid6_empty_zero_page; + count = 0; + i = d0_idx; + do { + int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks); + + blocks[slot] = sh->dev[i].page; + + if (i == target) + faila = slot; + if (i == target2) + failb = slot; + i = raid6_next_disk(i, disks); + } while (i != d0_idx); + BUG_ON(count != syndrome_disks); + + BUG_ON(faila == failb); + if (failb < faila) + swap(faila, failb); + pr_debug("%s: stripe: %llu faila: %d failb: %d\n", + __func__, (unsigned long long)sh->sector, faila, failb); + + atomic_inc(&sh->count); + + if (failb == syndrome_disks+1) { + /* Q disk is one of the missing disks */ + if (faila == syndrome_disks) { + /* Missing P+Q, just recompute */ + init_async_submit(&submit, ASYNC_TX_FENCE, NULL, + ops_complete_compute, sh, + to_addr_conv(sh, percpu)); + return async_gen_syndrome(blocks, 0, count+2, + STRIPE_SIZE, &submit); + } else { + struct page *dest; + int data_target; + int qd_idx = sh->qd_idx; + + /* Missing D+Q: recompute D from P, then recompute Q */ + if (target == qd_idx) + data_target = target2; + else + data_target = target; + + count = 0; + for (i = disks; i-- ; ) { + if (i == data_target || i == qd_idx) + continue; + blocks[count++] = sh->dev[i].page; + } + dest = sh->dev[data_target].page; + init_async_submit(&submit, + ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, + NULL, NULL, NULL, + to_addr_conv(sh, percpu)); + tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE, + &submit); + + count = set_syndrome_sources(blocks, sh); + init_async_submit(&submit, ASYNC_TX_FENCE, tx, + ops_complete_compute, sh, + to_addr_conv(sh, percpu)); + return async_gen_syndrome(blocks, 0, count+2, + STRIPE_SIZE, &submit); + } + } else { + init_async_submit(&submit, ASYNC_TX_FENCE, NULL, + ops_complete_compute, sh, + to_addr_conv(sh, percpu)); + if (failb == syndrome_disks) { + /* We're missing D+P. */ + return async_raid6_datap_recov(syndrome_disks+2, + STRIPE_SIZE, faila, + blocks, &submit); + } else { + /* We're missing D+D. */ + return async_raid6_2data_recov(syndrome_disks+2, + STRIPE_SIZE, faila, failb, + blocks, &submit); + } + } +} + + static void ops_complete_prexor(void *stripe_head_ref) { struct stripe_head *sh = stripe_head_ref; @@ -680,12 +907,13 @@ static void ops_complete_prexor(void *stripe_head_ref) } static struct dma_async_tx_descriptor * -ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) +ops_run_prexor(struct stripe_head *sh, struct raid5_percpu *percpu, + struct dma_async_tx_descriptor *tx) { - /* kernel stack size limits the total number of disks */ int disks = sh->disks; - struct page *xor_srcs[disks]; + struct page **xor_srcs = percpu->scribble; int count = 0, pd_idx = sh->pd_idx, i; + struct async_submit_ctl submit; /* existing parity data subtracted */ struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; @@ -700,9 +928,9 @@ ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) xor_srcs[count++] = dev->page; } - tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, - ASYNC_TX_DEP_ACK | ASYNC_TX_XOR_DROP_DST, tx, - ops_complete_prexor, sh); + init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx, + ops_complete_prexor, sh, to_addr_conv(sh, percpu)); + tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit); return tx; } @@ -742,17 +970,21 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) return tx; } -static void ops_complete_postxor(void *stripe_head_ref) +static void ops_complete_reconstruct(void *stripe_head_ref) { struct stripe_head *sh = stripe_head_ref; - int disks = sh->disks, i, pd_idx = sh->pd_idx; + int disks = sh->disks; + int pd_idx = sh->pd_idx; + int qd_idx = sh->qd_idx; + int i; pr_debug("%s: stripe %llu\n", __func__, (unsigned long long)sh->sector); for (i = disks; i--; ) { struct r5dev *dev = &sh->dev[i]; - if (dev->written || i == pd_idx) + + if (dev->written || i == pd_idx || i == qd_idx) set_bit(R5_UPTODATE, &dev->flags); } @@ -770,12 +1002,12 @@ static void ops_complete_postxor(void *stripe_head_ref) } static void -ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) +ops_run_reconstruct5(struct stripe_head *sh, struct raid5_percpu *percpu, + struct dma_async_tx_descriptor *tx) { - /* kernel stack size limits the total number of disks */ int disks = sh->disks; - struct page *xor_srcs[disks]; - + struct page **xor_srcs = percpu->scribble; + struct async_submit_ctl submit; int count = 0, pd_idx = sh->pd_idx, i; struct page *xor_dest; int prexor = 0; @@ -809,18 +1041,36 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) * set ASYNC_TX_XOR_DROP_DST and ASYNC_TX_XOR_ZERO_DST * for the synchronous xor case */ - flags = ASYNC_TX_DEP_ACK | ASYNC_TX_ACK | + flags = ASYNC_TX_ACK | (prexor ? ASYNC_TX_XOR_DROP_DST : ASYNC_TX_XOR_ZERO_DST); atomic_inc(&sh->count); - if (unlikely(count == 1)) { - flags &= ~(ASYNC_TX_XOR_DROP_DST | ASYNC_TX_XOR_ZERO_DST); - tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, - flags, tx, ops_complete_postxor, sh); - } else - tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, - flags, tx, ops_complete_postxor, sh); + init_async_submit(&submit, flags, tx, ops_complete_reconstruct, sh, + to_addr_conv(sh, percpu)); + if (unlikely(count == 1)) + tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit); + else + tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit); +} + +static void +ops_run_reconstruct6(struct stripe_head *sh, struct raid5_percpu *percpu, + struct dma_async_tx_descriptor *tx) +{ + struct async_submit_ctl submit; + struct page **blocks = percpu->scribble; + int count; + + pr_debug("%s: stripe %llu\n", __func__, (unsigned long long)sh->sector); + + count = set_syndrome_sources(blocks, sh); + + atomic_inc(&sh->count); + + init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_reconstruct, + sh, to_addr_conv(sh, percpu)); + async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit); } static void ops_complete_check(void *stripe_head_ref) @@ -835,63 +1085,115 @@ static void ops_complete_check(void *stripe_head_ref) release_stripe(sh); } -static void ops_run_check(struct stripe_head *sh) +static void ops_run_check_p(struct stripe_head *sh, struct raid5_percpu *percpu) { - /* kernel stack size limits the total number of disks */ int disks = sh->disks; - struct page *xor_srcs[disks]; + int pd_idx = sh->pd_idx; + int qd_idx = sh->qd_idx; + struct page *xor_dest; + struct page **xor_srcs = percpu->scribble; struct dma_async_tx_descriptor *tx; - - int count = 0, pd_idx = sh->pd_idx, i; - struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; + struct async_submit_ctl submit; + int count; + int i; pr_debug("%s: stripe %llu\n", __func__, (unsigned long long)sh->sector); + count = 0; + xor_dest = sh->dev[pd_idx].page; + xor_srcs[count++] = xor_dest; for (i = disks; i--; ) { - struct r5dev *dev = &sh->dev[i]; - if (i != pd_idx) - xor_srcs[count++] = dev->page; + if (i == pd_idx || i == qd_idx) + continue; + xor_srcs[count++] = sh->dev[i].page; } - tx = async_xor_zero_sum(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, - &sh->ops.zero_sum_result, 0, NULL, NULL, NULL); + init_async_submit(&submit, 0, NULL, NULL, NULL, + to_addr_conv(sh, percpu)); + tx = async_xor_val(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, + &sh->ops.zero_sum_result, &submit); + + atomic_inc(&sh->count); + init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_check, sh, NULL); + tx = async_trigger_callback(&submit); +} + +static void ops_run_check_pq(struct stripe_head *sh, struct raid5_percpu *percpu, int checkp) +{ + struct page **srcs = percpu->scribble; + struct async_submit_ctl submit; + int count; + + pr_debug("%s: stripe %llu checkp: %d\n", __func__, + (unsigned long long)sh->sector, checkp); + + count = set_syndrome_sources(srcs, sh); + if (!checkp) + srcs[count] = NULL; atomic_inc(&sh->count); - tx = async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx, - ops_complete_check, sh); + init_async_submit(&submit, ASYNC_TX_ACK, NULL, ops_complete_check, + sh, to_addr_conv(sh, percpu)); + async_syndrome_val(srcs, 0, count+2, STRIPE_SIZE, + &sh->ops.zero_sum_result, percpu->spare_page, &submit); } -static void raid5_run_ops(struct stripe_head *sh, unsigned long ops_request) +static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request) { int overlap_clear = 0, i, disks = sh->disks; struct dma_async_tx_descriptor *tx = NULL; + raid5_conf_t *conf = sh->raid_conf; + int level = conf->level; + struct raid5_percpu *percpu; + unsigned long cpu; + cpu = get_cpu(); + percpu = per_cpu_ptr(conf->percpu, cpu); if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) { ops_run_biofill(sh); overlap_clear++; } if (test_bit(STRIPE_OP_COMPUTE_BLK, &ops_request)) { - tx = ops_run_compute5(sh); - /* terminate the chain if postxor is not set to be run */ - if (tx && !test_bit(STRIPE_OP_POSTXOR, &ops_request)) + if (level < 6) + tx = ops_run_compute5(sh, percpu); + else { + if (sh->ops.target2 < 0 || sh->ops.target < 0) + tx = ops_run_compute6_1(sh, percpu); + else + tx = ops_run_compute6_2(sh, percpu); + } + /* terminate the chain if reconstruct is not set to be run */ + if (tx && !test_bit(STRIPE_OP_RECONSTRUCT, &ops_request)) async_tx_ack(tx); } if (test_bit(STRIPE_OP_PREXOR, &ops_request)) - tx = ops_run_prexor(sh, tx); + tx = ops_run_prexor(sh, percpu, tx); if (test_bit(STRIPE_OP_BIODRAIN, &ops_request)) { tx = ops_run_biodrain(sh, tx); overlap_clear++; } - if (test_bit(STRIPE_OP_POSTXOR, &ops_request)) - ops_run_postxor(sh, tx); + if (test_bit(STRIPE_OP_RECONSTRUCT, &ops_request)) { + if (level < 6) + ops_run_reconstruct5(sh, percpu, tx); + else + ops_run_reconstruct6(sh, percpu, tx); + } - if (test_bit(STRIPE_OP_CHECK, &ops_request)) - ops_run_check(sh); + if (test_bit(STRIPE_OP_CHECK, &ops_request)) { + if (sh->check_state == check_state_run) + ops_run_check_p(sh, percpu); + else if (sh->check_state == check_state_run_q) + ops_run_check_pq(sh, percpu, 0); + else if (sh->check_state == check_state_run_pq) + ops_run_check_pq(sh, percpu, 1); + else + BUG(); + } if (overlap_clear) for (i = disks; i--; ) { @@ -899,6 +1201,7 @@ static void raid5_run_ops(struct stripe_head *sh, unsigned long ops_request) if (test_and_clear_bit(R5_Overlap, &dev->flags)) wake_up(&sh->raid_conf->wait_for_overlap); } + put_cpu(); } static int grow_one_stripe(raid5_conf_t *conf) @@ -948,6 +1251,28 @@ static int grow_stripes(raid5_conf_t *conf, int num) return 0; } +/** + * scribble_len - return the required size of the scribble region + * @num - total number of disks in the array + * + * The size must be enough to contain: + * 1/ a struct page pointer for each device in the array +2 + * 2/ room to convert each entry in (1) to its corresponding dma + * (dma_map_page()) or page (page_address()) address. + * + * Note: the +2 is for the destination buffers of the ddf/raid6 case where we + * calculate over all devices (not just the data blocks), using zeros in place + * of the P and Q blocks. + */ +static size_t scribble_len(int num) +{ + size_t len; + + len = sizeof(struct page *) * (num+2) + sizeof(addr_conv_t) * (num+2); + + return len; +} + static int resize_stripes(raid5_conf_t *conf, int newsize) { /* Make all the stripes able to hold 'newsize' devices. @@ -976,6 +1301,7 @@ static int resize_stripes(raid5_conf_t *conf, int newsize) struct stripe_head *osh, *nsh; LIST_HEAD(newstripes); struct disk_info *ndisks; + unsigned long cpu; int err; struct kmem_cache *sc; int i; @@ -1041,7 +1367,7 @@ static int resize_stripes(raid5_conf_t *conf, int newsize) /* Step 3. * At this point, we are holding all the stripes so the array * is completely stalled, so now is a good time to resize - * conf->disks. + * conf->disks and the scribble region */ ndisks = kzalloc(newsize * sizeof(struct disk_info), GFP_NOIO); if (ndisks) { @@ -1052,10 +1378,30 @@ static int resize_stripes(raid5_conf_t *conf, int newsize) } else err = -ENOMEM; + get_online_cpus(); + conf->scribble_len = scribble_len(newsize); + for_each_present_cpu(cpu) { + struct raid5_percpu *percpu; + void *scribble; + + percpu = per_cpu_ptr(conf->percpu, cpu); + scribble = kmalloc(conf->scribble_len, GFP_NOIO); + + if (scribble) { + kfree(percpu->scribble); + percpu->scribble = scribble; + } else { + err = -ENOMEM; + break; + } + } + put_online_cpus(); + /* Step 4, return new stripes to service */ while(!list_empty(&newstripes)) { nsh = list_entry(newstripes.next, struct stripe_head, lru); list_del_init(&nsh->lru); + for (i=conf->raid_disks; i < newsize; i++) if (nsh->dev[i].page == NULL) { struct page *p = alloc_page(GFP_NOIO); @@ -1594,258 +1940,13 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous) } - -/* - * Copy data between a page in the stripe cache, and one or more bion - * The page could align with the middle of the bio, or there could be - * several bion, each with several bio_vecs, which cover part of the page - * Multiple bion are linked together on bi_next. There may be extras - * at the end of this list. We ignore them. - */ -static void copy_data(int frombio, struct bio *bio, - struct page *page, - sector_t sector) -{ - char *pa = page_address(page); - struct bio_vec *bvl; - int i; - int page_offset; - - if (bio->bi_sector >= sector) - page_offset = (signed)(bio->bi_sector - sector) * 512; - else - page_offset = (signed)(sector - bio->bi_sector) * -512; - bio_for_each_segment(bvl, bio, i) { - int len = bio_iovec_idx(bio,i)->bv_len; - int clen; - int b_offset = 0; - - if (page_offset < 0) { - b_offset = -page_offset; - page_offset += b_offset; - len -= b_offset; - } - - if (len > 0 && page_offset + len > STRIPE_SIZE) - clen = STRIPE_SIZE - page_offset; - else clen = len; - - if (clen > 0) { - char *ba = __bio_kmap_atomic(bio, i, KM_USER0); - if (frombio) - memcpy(pa+page_offset, ba+b_offset, clen); - else - memcpy(ba+b_offset, pa+page_offset, clen); - __bio_kunmap_atomic(ba, KM_USER0); - } - if (clen < len) /* hit end of page */ - break; - page_offset += len; - } -} - -#define check_xor() do { \ - if (count == MAX_XOR_BLOCKS) { \ - xor_blocks(count, STRIPE_SIZE, dest, ptr);\ - count = 0; \ - } \ - } while(0) - -static void compute_parity6(struct stripe_head *sh, int method) -{ - raid5_conf_t *conf = sh->raid_conf; - int i, pd_idx, qd_idx, d0_idx, disks = sh->disks, count; - int syndrome_disks = sh->ddf_layout ? disks : (disks - 2); - struct bio *chosen; - /**** FIX THIS: This could be very bad if disks is close to 256 ****/ - void *ptrs[syndrome_disks+2]; - - pd_idx = sh->pd_idx; - qd_idx = sh->qd_idx; - d0_idx = raid6_d0(sh); - - pr_debug("compute_parity, stripe %llu, method %d\n", - (unsigned long long)sh->sector, method); - - switch(method) { - case READ_MODIFY_WRITE: - BUG(); /* READ_MODIFY_WRITE N/A for RAID-6 */ - case RECONSTRUCT_WRITE: - for (i= disks; i-- ;) - if ( i != pd_idx && i != qd_idx && sh->dev[i].towrite ) { - chosen = sh->dev[i].towrite; - sh->dev[i].towrite = NULL; - - if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) - wake_up(&conf->wait_for_overlap); - - BUG_ON(sh->dev[i].written); - sh->dev[i].written = chosen; - } - break; - case CHECK_PARITY: - BUG(); /* Not implemented yet */ - } - - for (i = disks; i--;) - if (sh->dev[i].written) { - sector_t sector = sh->dev[i].sector; - struct bio *wbi = sh->dev[i].written; - while (wbi && wbi->bi_sector < sector + STRIPE_SECTORS) { - copy_data(1, wbi, sh->dev[i].page, sector); - wbi = r5_next_bio(wbi, sector); - } - - set_bit(R5_LOCKED, &sh->dev[i].flags); - set_bit(R5_UPTODATE, &sh->dev[i].flags); - } - - /* Note that unlike RAID-5, the ordering of the disks matters greatly.*/ - - for (i = 0; i < disks; i++) - ptrs[i] = (void *)raid6_empty_zero_page; - - count = 0; - i = d0_idx; - do { - int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks); - - ptrs[slot] = page_address(sh->dev[i].page); - if (slot < syndrome_disks && - !test_bit(R5_UPTODATE, &sh->dev[i].flags)) { - printk(KERN_ERR "block %d/%d not uptodate " - "on parity calc\n", i, count); - BUG(); - } - - i = raid6_next_disk(i, disks); - } while (i != d0_idx); - BUG_ON(count != syndrome_disks); - - raid6_call.gen_syndrome(syndrome_disks+2, STRIPE_SIZE, ptrs); - - switch(method) { - case RECONSTRUCT_WRITE: - set_bit(R5_UPTODATE, &sh->dev[pd_idx].flags); - set_bit(R5_UPTODATE, &sh->dev[qd_idx].flags); - set_bit(R5_LOCKED, &sh->dev[pd_idx].flags); - set_bit(R5_LOCKED, &sh->dev[qd_idx].flags); - break; - case UPDATE_PARITY: - set_bit(R5_UPTODATE, &sh->dev[pd_idx].flags); - set_bit(R5_UPTODATE, &sh->dev[qd_idx].flags); - break; - } -} - - -/* Compute one missing block */ -static void compute_block_1(struct stripe_head *sh, int dd_idx, int nozero) -{ - int i, count, disks = sh->disks; - void *ptr[MAX_XOR_BLOCKS], *dest, *p; - int qd_idx = sh->qd_idx; - - pr_debug("compute_block_1, stripe %llu, idx %d\n", - (unsigned long long)sh->sector, dd_idx); - - if ( dd_idx == qd_idx ) { - /* We're actually computing the Q drive */ - compute_parity6(sh, UPDATE_PARITY); - } else { - dest = page_address(sh->dev[dd_idx].page); - if (!nozero) memset(dest, 0, STRIPE_SIZE); - count = 0; - for (i = disks ; i--; ) { - if (i == dd_idx || i == qd_idx) - continue; - p = page_address(sh->dev[i].page); - if (test_bit(R5_UPTODATE, &sh->dev[i].flags)) - ptr[count++] = p; - else - printk("compute_block() %d, stripe %llu, %d" - " not present\n", dd_idx, - (unsigned long long)sh->sector, i); - - check_xor(); - } - if (count) - xor_blocks(count, STRIPE_SIZE, dest, ptr); - if (!nozero) set_bit(R5_UPTODATE, &sh->dev[dd_idx].flags); - else clear_bit(R5_UPTODATE, &sh->dev[dd_idx].flags); - } -} - -/* Compute two missing blocks */ -static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2) -{ - int i, count, disks = sh->disks; - int syndrome_disks = sh->ddf_layout ? disks : disks-2; - int d0_idx = raid6_d0(sh); - int faila = -1, failb = -1; - /**** FIX THIS: This could be very bad if disks is close to 256 ****/ - void *ptrs[syndrome_disks+2]; - - for (i = 0; i < disks ; i++) - ptrs[i] = (void *)raid6_empty_zero_page; - count = 0; - i = d0_idx; - do { - int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks); - - ptrs[slot] = page_address(sh->dev[i].page); - - if (i == dd_idx1) - faila = slot; - if (i == dd_idx2) - failb = slot; - i = raid6_next_disk(i, disks); - } while (i != d0_idx); - BUG_ON(count != syndrome_disks); - - BUG_ON(faila == failb); - if ( failb < faila ) { int tmp = faila; faila = failb; failb = tmp; } - - pr_debug("compute_block_2, stripe %llu, idx %d,%d (%d,%d)\n", - (unsigned long long)sh->sector, dd_idx1, dd_idx2, - faila, failb); - - if (failb == syndrome_disks+1) { - /* Q disk is one of the missing disks */ - if (faila == syndrome_disks) { - /* Missing P+Q, just recompute */ - compute_parity6(sh, UPDATE_PARITY); - return; - } else { - /* We're missing D+Q; recompute D from P */ - compute_block_1(sh, ((dd_idx1 == sh->qd_idx) ? - dd_idx2 : dd_idx1), - 0); - compute_parity6(sh, UPDATE_PARITY); /* Is this necessary? */ - return; - } - } - - /* We're missing D+P or D+D; */ - if (failb == syndrome_disks) { - /* We're missing D+P. */ - raid6_datap_recov(syndrome_disks+2, STRIPE_SIZE, faila, ptrs); - } else { - /* We're missing D+D. */ - raid6_2data_recov(syndrome_disks+2, STRIPE_SIZE, faila, failb, - ptrs); - } - - /* Both the above update both missing blocks */ - set_bit(R5_UPTODATE, &sh->dev[dd_idx1].flags); - set_bit(R5_UPTODATE, &sh->dev[dd_idx2].flags); -} - static void -schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s, +schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s, int rcw, int expand) { int i, pd_idx = sh->pd_idx, disks = sh->disks; + raid5_conf_t *conf = sh->raid_conf; + int level = conf->level; if (rcw) { /* if we are not expanding this is a proper write request, and @@ -1858,7 +1959,7 @@ schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s, } else sh->reconstruct_state = reconstruct_state_run; - set_bit(STRIPE_OP_POSTXOR, &s->ops_request); + set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request); for (i = disks; i--; ) { struct r5dev *dev = &sh->dev[i]; @@ -1871,17 +1972,18 @@ schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s, s->locked++; } } - if (s->locked + 1 == disks) + if (s->locked + conf->max_degraded == disks) if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state)) - atomic_inc(&sh->raid_conf->pending_full_writes); + atomic_inc(&conf->pending_full_writes); } else { + BUG_ON(level == 6); BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) || test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags))); sh->reconstruct_state = reconstruct_state_prexor_drain_run; set_bit(STRIPE_OP_PREXOR, &s->ops_request); set_bit(STRIPE_OP_BIODRAIN, &s->ops_request); - set_bit(STRIPE_OP_POSTXOR, &s->ops_request); + set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request); for (i = disks; i--; ) { struct r5dev *dev = &sh->dev[i]; @@ -1899,13 +2001,22 @@ schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s, } } - /* keep the parity disk locked while asynchronous operations + /* keep the parity disk(s) locked while asynchronous operations * are in flight */ set_bit(R5_LOCKED, &sh->dev[pd_idx].flags); clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags); s->locked++; + if (level == 6) { + int qd_idx = sh->qd_idx; + struct r5dev *dev = &sh->dev[qd_idx]; + + set_bit(R5_LOCKED, &dev->flags); + clear_bit(R5_UPTODATE, &dev->flags); + s->locked++; + } + pr_debug("%s: stripe %llu locked: %d ops_request: %lx\n", __func__, (unsigned long long)sh->sector, s->locked, s->ops_request); @@ -1986,13 +2097,6 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in static void end_reshape(raid5_conf_t *conf); -static int page_is_zero(struct page *p) -{ - char *a = page_address(p); - return ((*(u32*)a) == 0 && - memcmp(a, a+4, STRIPE_SIZE-4)==0); -} - static void stripe_set_idx(sector_t stripe, raid5_conf_t *conf, int previous, struct stripe_head *sh) { @@ -2132,9 +2236,10 @@ static int fetch_block5(struct stripe_head *sh, struct stripe_head_state *s, set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request); set_bit(R5_Wantcompute, &dev->flags); sh->ops.target = disk_idx; + sh->ops.target2 = -1; s->req_compute = 1; /* Careful: from this point on 'uptodate' is in the eye - * of raid5_run_ops which services 'compute' operations + * of raid_run_ops which services 'compute' operations * before writes. R5_Wantcompute flags a block that will * be R5_UPTODATE by the time it is needed for a * subsequent operation. @@ -2173,61 +2278,104 @@ static void handle_stripe_fill5(struct stripe_head *sh, set_bit(STRIPE_HANDLE, &sh->state); } -static void handle_stripe_fill6(struct stripe_head *sh, - struct stripe_head_state *s, struct r6_state *r6s, - int disks) +/* fetch_block6 - checks the given member device to see if its data needs + * to be read or computed to satisfy a request. + * + * Returns 1 when no more member devices need to be checked, otherwise returns + * 0 to tell the loop in handle_stripe_fill6 to continue + */ +static int fetch_block6(struct stripe_head *sh, struct stripe_head_state *s, + struct r6_state *r6s, int disk_idx, int disks) { - int i; - for (i = disks; i--; ) { - struct r5dev *dev = &sh->dev[i]; - if (!test_bit(R5_LOCKED, &dev->flags) && - !test_bit(R5_UPTODATE, &dev->flags) && - (dev->toread || (dev->towrite && - !test_bit(R5_OVERWRITE, &dev->flags)) || - s->syncing || s->expanding || - (s->failed >= 1 && - (sh->dev[r6s->failed_num[0]].toread || - s->to_write)) || - (s->failed >= 2 && - (sh->dev[r6s->failed_num[1]].toread || - s->to_write)))) { - /* we would like to get this block, possibly - * by computing it, but we might not be able to + struct r5dev *dev = &sh->dev[disk_idx]; + struct r5dev *fdev[2] = { &sh->dev[r6s->failed_num[0]], + &sh->dev[r6s->failed_num[1]] }; + + if (!test_bit(R5_LOCKED, &dev->flags) && + !test_bit(R5_UPTODATE, &dev->flags) && + (dev->toread || + (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) || + s->syncing || s->expanding || + (s->failed >= 1 && + (fdev[0]->toread || s->to_write)) || + (s->failed >= 2 && + (fdev[1]->toread || s->to_write)))) { + /* we would like to get this block, possibly by computing it, + * otherwise read it if the backing disk is insync + */ + BUG_ON(test_bit(R5_Wantcompute, &dev->flags)); + BUG_ON(test_bit(R5_Wantread, &dev->flags)); + if ((s->uptodate == disks - 1) && + (s->failed && (disk_idx == r6s->failed_num[0] || + disk_idx == r6s->failed_num[1]))) { + /* have disk failed, and we're requested to fetch it; + * do compute it */ - if ((s->uptodate == disks - 1) && - (s->failed && (i == r6s->failed_num[0] || - i == r6s->failed_num[1]))) { - pr_debug("Computing stripe %llu block %d\n", - (unsigned long long)sh->sector, i); - compute_block_1(sh, i, 0); - s->uptodate++; - } else if ( s->uptodate == disks-2 && s->failed >= 2 ) { - /* Computing 2-failure is *very* expensive; only - * do it if failed >= 2 - */ - int other; - for (other = disks; other--; ) { - if (other == i) - continue; - if (!test_bit(R5_UPTODATE, - &sh->dev[other].flags)) - break; - } - BUG_ON(other < 0); - pr_debug("Computing stripe %llu blocks %d,%d\n", - (unsigned long long)sh->sector, - i, other); - compute_block_2(sh, i, other); - s->uptodate += 2; - } else if (test_bit(R5_Insync, &dev->flags)) { - set_bit(R5_LOCKED, &dev->flags); - set_bit(R5_Wantread, &dev->flags); - s->locked++; - pr_debug("Reading block %d (sync=%d)\n", - i, s->syncing); + pr_debug("Computing stripe %llu block %d\n", + (unsigned long long)sh->sector, disk_idx); + set_bit(STRIPE_COMPUTE_RUN, &sh->state); + set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request); + set_bit(R5_Wantcompute, &dev->flags); + sh->ops.target = disk_idx; + sh->ops.target2 = -1; /* no 2nd target */ + s->req_compute = 1; + s->uptodate++; + return 1; + } else if (s->uptodate == disks-2 && s->failed >= 2) { + /* Computing 2-failure is *very* expensive; only + * do it if failed >= 2 + */ + int other; + for (other = disks; other--; ) { + if (other == disk_idx) + continue; + if (!test_bit(R5_UPTODATE, + &sh->dev[other].flags)) + break; } + BUG_ON(other < 0); + pr_debug("Computing stripe %llu blocks %d,%d\n", + (unsigned long long)sh->sector, + disk_idx, other); + set_bit(STRIPE_COMPUTE_RUN, &sh->state); + set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request); + set_bit(R5_Wantcompute, &sh->dev[disk_idx].flags); + set_bit(R5_Wantcompute, &sh->dev[other].flags); + sh->ops.target = disk_idx; + sh->ops.target2 = other; + s->uptodate += 2; + s->req_compute = 1; + return 1; + } else if (test_bit(R5_Insync, &dev->flags)) { + set_bit(R5_LOCKED, &dev->flags); + set_bit(R5_Wantread, &dev->flags); + s->locked++; + pr_debug("Reading block %d (sync=%d)\n", + disk_idx, s->syncing); } } + + return 0; +} + +/** + * handle_stripe_fill6 - read or compute data to satisfy pending requests. + */ +static void handle_stripe_fill6(struct stripe_head *sh, + struct stripe_head_state *s, struct r6_state *r6s, + int disks) +{ + int i; + + /* look for blocks to read/compute, skip this if a compute + * is already in flight, or if the stripe contents are in the + * midst of changing due to a write + */ + if (!test_bit(STRIPE_COMPUTE_RUN, &sh->state) && !sh->check_state && + !sh->reconstruct_state) + for (i = disks; i--; ) + if (fetch_block6(sh, s, r6s, i, disks)) + break; set_bit(STRIPE_HANDLE, &sh->state); } @@ -2361,114 +2509,61 @@ static void handle_stripe_dirtying5(raid5_conf_t *conf, */ /* since handle_stripe can be called at any time we need to handle the * case where a compute block operation has been submitted and then a - * subsequent call wants to start a write request. raid5_run_ops only - * handles the case where compute block and postxor are requested + * subsequent call wants to start a write request. raid_run_ops only + * handles the case where compute block and reconstruct are requested * simultaneously. If this is not the case then new writes need to be * held off until the compute completes. */ if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) && (s->locked == 0 && (rcw == 0 || rmw == 0) && !test_bit(STRIPE_BIT_DELAY, &sh->state))) - schedule_reconstruction5(sh, s, rcw == 0, 0); + schedule_reconstruction(sh, s, rcw == 0, 0); } static void handle_stripe_dirtying6(raid5_conf_t *conf, struct stripe_head *sh, struct stripe_head_state *s, struct r6_state *r6s, int disks) { - int rcw = 0, must_compute = 0, pd_idx = sh->pd_idx, i; + int rcw = 0, pd_idx = sh->pd_idx, i; int qd_idx = sh->qd_idx; + + set_bit(STRIPE_HANDLE, &sh->state); for (i = disks; i--; ) { struct r5dev *dev = &sh->dev[i]; - /* Would I have to read this buffer for reconstruct_write */ - if (!test_bit(R5_OVERWRITE, &dev->flags) - && i != pd_idx && i != qd_idx - && (!test_bit(R5_LOCKED, &dev->flags) - ) && - !test_bit(R5_UPTODATE, &dev->flags)) { - if (test_bit(R5_Insync, &dev->flags)) rcw++; - else { - pr_debug("raid6: must_compute: " - "disk %d flags=%#lx\n", i, dev->flags); - must_compute++; + /* check if we haven't enough data */ + if (!test_bit(R5_OVERWRITE, &dev->flags) && + i != pd_idx && i != qd_idx && + !test_bit(R5_LOCKED, &dev->flags) && + !(test_bit(R5_UPTODATE, &dev->flags) || + test_bit(R5_Wantcompute, &dev->flags))) { + rcw++; + if (!test_bit(R5_Insync, &dev->flags)) + continue; /* it's a failed drive */ + + if ( + test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { + pr_debug("Read_old stripe %llu " + "block %d for Reconstruct\n", + (unsigned long long)sh->sector, i); + set_bit(R5_LOCKED, &dev->flags); + set_bit(R5_Wantread, &dev->flags); + s->locked++; + } else { + pr_debug("Request delayed stripe %llu " + "block %d for Reconstruct\n", + (unsigned long long)sh->sector, i); + set_bit(STRIPE_DELAYED, &sh->state); + set_bit(STRIPE_HANDLE, &sh->state); } } } - pr_debug("for sector %llu, rcw=%d, must_compute=%d\n", - (unsigned long long)sh->sector, rcw, must_compute); - set_bit(STRIPE_HANDLE, &sh->state); - - if (rcw > 0) - /* want reconstruct write, but need to get some data */ - for (i = disks; i--; ) { - struct r5dev *dev = &sh->dev[i]; - if (!test_bit(R5_OVERWRITE, &dev->flags) - && !(s->failed == 0 && (i == pd_idx || i == qd_idx)) - && !test_bit(R5_LOCKED, &dev->flags) && - !test_bit(R5_UPTODATE, &dev->flags) && - test_bit(R5_Insync, &dev->flags)) { - if ( - test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { - pr_debug("Read_old stripe %llu " - "block %d for Reconstruct\n", - (unsigned long long)sh->sector, i); - set_bit(R5_LOCKED, &dev->flags); - set_bit(R5_Wantread, &dev->flags); - s->locked++; - } else { - pr_debug("Request delayed stripe %llu " - "block %d for Reconstruct\n", - (unsigned long long)sh->sector, i); - set_bit(STRIPE_DELAYED, &sh->state); - set_bit(STRIPE_HANDLE, &sh->state); - } - } - } /* now if nothing is locked, and if we have enough data, we can start a * write request */ - if (s->locked == 0 && rcw == 0 && + if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) && + s->locked == 0 && rcw == 0 && !test_bit(STRIPE_BIT_DELAY, &sh->state)) { - if (must_compute > 0) { - /* We have failed blocks and need to compute them */ - switch (s->failed) { - case 0: - BUG(); - case 1: - compute_block_1(sh, r6s->failed_num[0], 0); - break; - case 2: - compute_block_2(sh, r6s->failed_num[0], - r6s->failed_num[1]); - break; - default: /* This request should have been failed? */ - BUG(); - } - } - - pr_debug("Computing parity for stripe %llu\n", - (unsigned long long)sh->sector); - compute_parity6(sh, RECONSTRUCT_WRITE); - /* now every locked buffer is ready to be written */ - for (i = disks; i--; ) - if (test_bit(R5_LOCKED, &sh->dev[i].flags)) { - pr_debug("Writing stripe %llu block %d\n", - (unsigned long long)sh->sector, i); - s->locked++; - set_bit(R5_Wantwrite, &sh->dev[i].flags); - } - if (s->locked == disks) - if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state)) - atomic_inc(&conf->pending_full_writes); - /* after a RECONSTRUCT_WRITE, the stripe MUST be in-sync */ - set_bit(STRIPE_INSYNC, &sh->state); - - if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { - atomic_dec(&conf->preread_active_stripes); - if (atomic_read(&conf->preread_active_stripes) < - IO_THRESHOLD) - md_wakeup_thread(conf->mddev->thread); - } + schedule_reconstruction(sh, s, 1, 0); } } @@ -2527,7 +2622,7 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh, * we are done. Otherwise update the mismatch count and repair * parity if !MD_RECOVERY_CHECK */ - if (sh->ops.zero_sum_result == 0) + if ((sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) == 0) /* parity is correct (on disc, * not in buffer any more) */ @@ -2544,6 +2639,7 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh, set_bit(R5_Wantcompute, &sh->dev[sh->pd_idx].flags); sh->ops.target = sh->pd_idx; + sh->ops.target2 = -1; s->uptodate++; } } @@ -2560,67 +2656,74 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh, static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh, - struct stripe_head_state *s, - struct r6_state *r6s, struct page *tmp_page, - int disks) + struct stripe_head_state *s, + struct r6_state *r6s, int disks) { - int update_p = 0, update_q = 0; - struct r5dev *dev; int pd_idx = sh->pd_idx; int qd_idx = sh->qd_idx; + struct r5dev *dev; set_bit(STRIPE_HANDLE, &sh->state); BUG_ON(s->failed > 2); - BUG_ON(s->uptodate < disks); + /* Want to check and possibly repair P and Q. * However there could be one 'failed' device, in which * case we can only check one of them, possibly using the * other to generate missing data */ - /* If !tmp_page, we cannot do the calculations, - * but as we have set STRIPE_HANDLE, we will soon be called - * by stripe_handle with a tmp_page - just wait until then. - */ - if (tmp_page) { + switch (sh->check_state) { + case check_state_idle: + /* start a new check operation if there are < 2 failures */ if (s->failed == r6s->q_failed) { - /* The only possible failed device holds 'Q', so it + /* The only possible failed device holds Q, so it * makes sense to check P (If anything else were failed, * we would have used P to recreate it). */ - compute_block_1(sh, pd_idx, 1); - if (!page_is_zero(sh->dev[pd_idx].page)) { - compute_block_1(sh, pd_idx, 0); - update_p = 1; - } + sh->check_state = check_state_run; } if (!r6s->q_failed && s->failed < 2) { - /* q is not failed, and we didn't use it to generate + /* Q is not failed, and we didn't use it to generate * anything, so it makes sense to check it */ - memcpy(page_address(tmp_page), - page_address(sh->dev[qd_idx].page), - STRIPE_SIZE); - compute_parity6(sh, UPDATE_PARITY); - if (memcmp(page_address(tmp_page), - page_address(sh->dev[qd_idx].page), - STRIPE_SIZE) != 0) { - clear_bit(STRIPE_INSYNC, &sh->state); - update_q = 1; - } + if (sh->check_state == check_state_run) + sh->check_state = check_state_run_pq; + else + sh->check_state = check_state_run_q; } - if (update_p || update_q) { - conf->mddev->resync_mismatches += STRIPE_SECTORS; - if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) - /* don't try to repair!! */ - update_p = update_q = 0; + + /* discard potentially stale zero_sum_result */ + sh->ops.zero_sum_result = 0; + + if (sh->check_state == check_state_run) { + /* async_xor_zero_sum destroys the contents of P */ + clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags); + s->uptodate--; + } + if (sh->check_state >= check_state_run && + sh->check_state <= check_state_run_pq) { + /* async_syndrome_zero_sum preserves P and Q, so + * no need to mark them !uptodate here + */ + set_bit(STRIPE_OP_CHECK, &s->ops_request); + break; } + /* we have 2-disk failure */ + BUG_ON(s->failed != 2); + /* fall through */ + case check_state_compute_result: + sh->check_state = check_state_idle; + + /* check that a write has not made the stripe insync */ + if (test_bit(STRIPE_INSYNC, &sh->state)) + break; + /* now write out any block on a failed drive, - * or P or Q if they need it + * or P or Q if they were recomputed */ - + BUG_ON(s->uptodate < disks - 1); /* We don't need Q to recover */ if (s->failed == 2) { dev = &sh->dev[r6s->failed_num[1]]; s->locked++; @@ -2633,14 +2736,13 @@ static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh, set_bit(R5_LOCKED, &dev->flags); set_bit(R5_Wantwrite, &dev->flags); } - - if (update_p) { + if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) { dev = &sh->dev[pd_idx]; s->locked++; set_bit(R5_LOCKED, &dev->flags); set_bit(R5_Wantwrite, &dev->flags); } - if (update_q) { + if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) { dev = &sh->dev[qd_idx]; s->locked++; set_bit(R5_LOCKED, &dev->flags); @@ -2649,6 +2751,70 @@ static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh, clear_bit(STRIPE_DEGRADED, &sh->state); set_bit(STRIPE_INSYNC, &sh->state); + break; + case check_state_run: + case check_state_run_q: + case check_state_run_pq: + break; /* we will be called again upon completion */ + case check_state_check_result: + sh->check_state = check_state_idle; + + /* handle a successful check operation, if parity is correct + * we are done. Otherwise update the mismatch count and repair + * parity if !MD_RECOVERY_CHECK + */ + if (sh->ops.zero_sum_result == 0) { + /* both parities are correct */ + if (!s->failed) + set_bit(STRIPE_INSYNC, &sh->state); + else { + /* in contrast to the raid5 case we can validate + * parity, but still have a failure to write + * back + */ + sh->check_state = check_state_compute_result; + /* Returning at this point means that we may go + * off and bring p and/or q uptodate again so + * we make sure to check zero_sum_result again + * to verify if p or q need writeback + */ + } + } else { + conf->mddev->resync_mismatches += STRIPE_SECTORS; + if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) + /* don't try to repair!! */ + set_bit(STRIPE_INSYNC, &sh->state); + else { + int *target = &sh->ops.target; + + sh->ops.target = -1; + sh->ops.target2 = -1; + sh->check_state = check_state_compute_run; + set_bit(STRIPE_COMPUTE_RUN, &sh->state); + set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request); + if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) { + set_bit(R5_Wantcompute, + &sh->dev[pd_idx].flags); + *target = pd_idx; + target = &sh->ops.target2; + s->uptodate++; + } + if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) { + set_bit(R5_Wantcompute, + &sh->dev[qd_idx].flags); + *target = qd_idx; + s->uptodate++; + } + } + } + break; + case check_state_compute_run: + break; + default: + printk(KERN_ERR "%s: unknown check_state: %d sector: %llu\n", + __func__, sh->check_state, + (unsigned long long) sh->sector); + BUG(); } } @@ -2666,6 +2832,7 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh, if (i != sh->pd_idx && i != sh->qd_idx) { int dd_idx, j; struct stripe_head *sh2; + struct async_submit_ctl submit; sector_t bn = compute_blocknr(sh, i, 1); sector_t s = raid5_compute_sector(conf, bn, 0, @@ -2685,9 +2852,10 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh, } /* place all the copies on one channel */ + init_async_submit(&submit, 0, tx, NULL, NULL, NULL); tx = async_memcpy(sh2->dev[dd_idx].page, - sh->dev[i].page, 0, 0, STRIPE_SIZE, - ASYNC_TX_DEP_ACK, tx, NULL, NULL); + sh->dev[i].page, 0, 0, STRIPE_SIZE, + &submit); set_bit(R5_Expanded, &sh2->dev[dd_idx].flags); set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags); @@ -2756,7 +2924,8 @@ static bool handle_stripe5(struct stripe_head *sh) rcu_read_lock(); for (i=disks; i--; ) { mdk_rdev_t *rdev; - struct r5dev *dev = &sh->dev[i]; + + dev = &sh->dev[i]; clear_bit(R5_Insync, &dev->flags); pr_debug("check %d: state 0x%lx toread %p read %p write %p " @@ -2973,7 +3142,7 @@ static bool handle_stripe5(struct stripe_head *sh) /* Need to write out all blocks after computing parity */ sh->disks = conf->raid_disks; stripe_set_idx(sh->sector, conf, 0, sh); - schedule_reconstruction5(sh, &s, 1, 1); + schedule_reconstruction(sh, &s, 1, 1); } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) { clear_bit(STRIPE_EXPAND_READY, &sh->state); atomic_dec(&conf->reshape_stripes); @@ -2993,7 +3162,7 @@ static bool handle_stripe5(struct stripe_head *sh) md_wait_for_blocked_rdev(blocked_rdev, conf->mddev); if (s.ops_request) - raid5_run_ops(sh, s.ops_request); + raid_run_ops(sh, s.ops_request); ops_run_io(sh, &s); @@ -3002,7 +3171,7 @@ static bool handle_stripe5(struct stripe_head *sh) return blocked_rdev == NULL; } -static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) +static bool handle_stripe6(struct stripe_head *sh) { raid5_conf_t *conf = sh->raid_conf; int disks = sh->disks; @@ -3014,9 +3183,10 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) mdk_rdev_t *blocked_rdev = NULL; pr_debug("handling stripe %llu, state=%#lx cnt=%d, " - "pd_idx=%d, qd_idx=%d\n", + "pd_idx=%d, qd_idx=%d\n, check:%d, reconstruct:%d\n", (unsigned long long)sh->sector, sh->state, - atomic_read(&sh->count), pd_idx, qd_idx); + atomic_read(&sh->count), pd_idx, qd_idx, + sh->check_state, sh->reconstruct_state); memset(&s, 0, sizeof(s)); spin_lock(&sh->lock); @@ -3036,35 +3206,26 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) pr_debug("check %d: state 0x%lx read %p write %p written %p\n", i, dev->flags, dev->toread, dev->towrite, dev->written); - /* maybe we can reply to a read */ - if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread) { - struct bio *rbi, *rbi2; - pr_debug("Return read for disc %d\n", i); - spin_lock_irq(&conf->device_lock); - rbi = dev->toread; - dev->toread = NULL; - if (test_and_clear_bit(R5_Overlap, &dev->flags)) - wake_up(&conf->wait_for_overlap); - spin_unlock_irq(&conf->device_lock); - while (rbi && rbi->bi_sector < dev->sector + STRIPE_SECTORS) { - copy_data(0, rbi, dev->page, dev->sector); - rbi2 = r5_next_bio(rbi, dev->sector); - spin_lock_irq(&conf->device_lock); - if (!raid5_dec_bi_phys_segments(rbi)) { - rbi->bi_next = return_bi; - return_bi = rbi; - } - spin_unlock_irq(&conf->device_lock); - rbi = rbi2; - } - } + /* maybe we can reply to a read + * + * new wantfill requests are only permitted while + * ops_complete_biofill is guaranteed to be inactive + */ + if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread && + !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) + set_bit(R5_Wantfill, &dev->flags); /* now count some things */ if (test_bit(R5_LOCKED, &dev->flags)) s.locked++; if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++; + if (test_bit(R5_Wantcompute, &dev->flags)) { + s.compute++; + BUG_ON(s.compute > 2); + } - - if (dev->toread) + if (test_bit(R5_Wantfill, &dev->flags)) { + s.to_fill++; + } else if (dev->toread) s.to_read++; if (dev->towrite) { s.to_write++; @@ -3105,6 +3266,11 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) blocked_rdev = NULL; } + if (s.to_fill && !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) { + set_bit(STRIPE_OP_BIOFILL, &s.ops_request); + set_bit(STRIPE_BIOFILL_RUN, &sh->state); + } + pr_debug("locked=%d uptodate=%d to_read=%d" " to_write=%d failed=%d failed_num=%d,%d\n", s.locked, s.uptodate, s.to_read, s.to_write, s.failed, @@ -3145,19 +3311,62 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) * or to load a block that is being partially written. */ if (s.to_read || s.non_overwrite || (s.to_write && s.failed) || - (s.syncing && (s.uptodate < disks)) || s.expanding) + (s.syncing && (s.uptodate + s.compute < disks)) || s.expanding) handle_stripe_fill6(sh, &s, &r6s, disks); - /* now to consider writing and what else, if anything should be read */ - if (s.to_write) + /* Now we check to see if any write operations have recently + * completed + */ + if (sh->reconstruct_state == reconstruct_state_drain_result) { + int qd_idx = sh->qd_idx; + + sh->reconstruct_state = reconstruct_state_idle; + /* All the 'written' buffers and the parity blocks are ready to + * be written back to disk + */ + BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags)); + BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[qd_idx].flags)); + for (i = disks; i--; ) { + dev = &sh->dev[i]; + if (test_bit(R5_LOCKED, &dev->flags) && + (i == sh->pd_idx || i == qd_idx || + dev->written)) { + pr_debug("Writing block %d\n", i); + BUG_ON(!test_bit(R5_UPTODATE, &dev->flags)); + set_bit(R5_Wantwrite, &dev->flags); + if (!test_bit(R5_Insync, &dev->flags) || + ((i == sh->pd_idx || i == qd_idx) && + s.failed == 0)) + set_bit(STRIPE_INSYNC, &sh->state); + } + } + if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { + atomic_dec(&conf->preread_active_stripes); + if (atomic_read(&conf->preread_active_stripes) < + IO_THRESHOLD) + md_wakeup_thread(conf->mddev->thread); + } + } + + /* Now to consider new write requests and what else, if anything + * should be read. We do not handle new writes when: + * 1/ A 'write' operation (copy+gen_syndrome) is already in flight. + * 2/ A 'check' operation is in flight, as it may clobber the parity + * block. + */ + if (s.to_write && !sh->reconstruct_state && !sh->check_state) handle_stripe_dirtying6(conf, sh, &s, &r6s, disks); /* maybe we need to check and possibly fix the parity for this stripe * Any reads will already have been scheduled, so we just see if enough - * data is available + * data is available. The parity check is held off while parity + * dependent operations are in flight. */ - if (s.syncing && s.locked == 0 && !test_bit(STRIPE_INSYNC, &sh->state)) - handle_parity_checks6(conf, sh, &s, &r6s, tmp_page, disks); + if (sh->check_state || + (s.syncing && s.locked == 0 && + !test_bit(STRIPE_COMPUTE_RUN, &sh->state) && + !test_bit(STRIPE_INSYNC, &sh->state))) + handle_parity_checks6(conf, sh, &s, &r6s, disks); if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) { md_done_sync(conf->mddev, STRIPE_SECTORS,1); @@ -3178,15 +3387,29 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) set_bit(R5_Wantwrite, &dev->flags); set_bit(R5_ReWrite, &dev->flags); set_bit(R5_LOCKED, &dev->flags); + s.locked++; } else { /* let's read it back */ set_bit(R5_Wantread, &dev->flags); set_bit(R5_LOCKED, &dev->flags); + s.locked++; } } } - if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state)) { + /* Finish reconstruct operations initiated by the expansion process */ + if (sh->reconstruct_state == reconstruct_state_result) { + sh->reconstruct_state = reconstruct_state_idle; + clear_bit(STRIPE_EXPANDING, &sh->state); + for (i = conf->raid_disks; i--; ) { + set_bit(R5_Wantwrite, &sh->dev[i].flags); + set_bit(R5_LOCKED, &sh->dev[i].flags); + s.locked++; + } + } + + if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) && + !sh->reconstruct_state) { struct stripe_head *sh2 = get_active_stripe(conf, sh->sector, 1, 1, 1); if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) { @@ -3207,14 +3430,8 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) /* Need to write out all blocks after computing P&Q */ sh->disks = conf->raid_disks; stripe_set_idx(sh->sector, conf, 0, sh); - compute_parity6(sh, RECONSTRUCT_WRITE); - for (i = conf->raid_disks ; i-- ; ) { - set_bit(R5_LOCKED, &sh->dev[i].flags); - s.locked++; - set_bit(R5_Wantwrite, &sh->dev[i].flags); - } - clear_bit(STRIPE_EXPANDING, &sh->state); - } else if (s.expanded) { + schedule_reconstruction(sh, &s, 1, 1); + } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) { clear_bit(STRIPE_EXPAND_READY, &sh->state); atomic_dec(&conf->reshape_stripes); wake_up(&conf->wait_for_overlap); @@ -3232,6 +3449,9 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) if (unlikely(blocked_rdev)) md_wait_for_blocked_rdev(blocked_rdev, conf->mddev); + if (s.ops_request) + raid_run_ops(sh, s.ops_request); + ops_run_io(sh, &s); return_io(return_bi); @@ -3240,16 +3460,14 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) } /* returns true if the stripe was handled */ -static bool handle_stripe(struct stripe_head *sh, struct page *tmp_page) +static bool handle_stripe(struct stripe_head *sh) { if (sh->raid_conf->level == 6) - return handle_stripe6(sh, tmp_page); + return handle_stripe6(sh); else return handle_stripe5(sh); } - - static void raid5_activate_delayed(raid5_conf_t *conf) { if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) { @@ -3331,6 +3549,9 @@ static int raid5_congested(void *data, int bits) /* No difference between reads and writes. Just check * how busy the stripe_cache is */ + + if (mddev_congested(mddev, bits)) + return 1; if (conf->inactive_blocked) return 1; if (conf->quiesce) @@ -3880,7 +4101,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped INIT_LIST_HEAD(&stripes); for (i = 0; i < reshape_sectors; i += STRIPE_SECTORS) { int j; - int skipped = 0; + int skipped_disk = 0; sh = get_active_stripe(conf, stripe_addr+i, 0, 0, 1); set_bit(STRIPE_EXPANDING, &sh->state); atomic_inc(&conf->reshape_stripes); @@ -3896,14 +4117,14 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped continue; s = compute_blocknr(sh, j, 0); if (s < raid5_size(mddev, 0, 0)) { - skipped = 1; + skipped_disk = 1; continue; } memset(page_address(sh->dev[j].page), 0, STRIPE_SIZE); set_bit(R5_Expanded, &sh->dev[j].flags); set_bit(R5_UPTODATE, &sh->dev[j].flags); } - if (!skipped) { + if (!skipped_disk) { set_bit(STRIPE_EXPAND_READY, &sh->state); set_bit(STRIPE_HANDLE, &sh->state); } @@ -4057,7 +4278,7 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski spin_unlock(&sh->lock); /* wait for any blocked device to be handled */ - while(unlikely(!handle_stripe(sh, NULL))) + while (unlikely(!handle_stripe(sh))) ; release_stripe(sh); @@ -4114,7 +4335,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio) return handled; } - handle_stripe(sh, NULL); + handle_stripe(sh); release_stripe(sh); handled++; } @@ -4128,6 +4349,36 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio) return handled; } +#ifdef CONFIG_MULTICORE_RAID456 +static void __process_stripe(void *param, async_cookie_t cookie) +{ + struct stripe_head *sh = param; + + handle_stripe(sh); + release_stripe(sh); +} + +static void process_stripe(struct stripe_head *sh, struct list_head *domain) +{ + async_schedule_domain(__process_stripe, sh, domain); +} + +static void synchronize_stripe_processing(struct list_head *domain) +{ + async_synchronize_full_domain(domain); +} +#else +static void process_stripe(struct stripe_head *sh, struct list_head *domain) +{ + handle_stripe(sh); + release_stripe(sh); + cond_resched(); +} + +static void synchronize_stripe_processing(struct list_head *domain) +{ +} +#endif /* @@ -4142,6 +4393,7 @@ static void raid5d(mddev_t *mddev) struct stripe_head *sh; raid5_conf_t *conf = mddev->private; int handled; + LIST_HEAD(raid_domain); pr_debug("+++ raid5d active\n"); @@ -4178,8 +4430,7 @@ static void raid5d(mddev_t *mddev) spin_unlock_irq(&conf->device_lock); handled++; - handle_stripe(sh, conf->spare_page); - release_stripe(sh); + process_stripe(sh, &raid_domain); spin_lock_irq(&conf->device_lock); } @@ -4187,6 +4438,7 @@ static void raid5d(mddev_t *mddev) spin_unlock_irq(&conf->device_lock); + synchronize_stripe_processing(&raid_domain); async_tx_issue_pending_all(); unplug_slaves(mddev); @@ -4319,15 +4571,118 @@ raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks) return sectors * (raid_disks - conf->max_degraded); } +static void raid5_free_percpu(raid5_conf_t *conf) +{ + struct raid5_percpu *percpu; + unsigned long cpu; + + if (!conf->percpu) + return; + + get_online_cpus(); + for_each_possible_cpu(cpu) { + percpu = per_cpu_ptr(conf->percpu, cpu); + safe_put_page(percpu->spare_page); + kfree(percpu->scribble); + } +#ifdef CONFIG_HOTPLUG_CPU + unregister_cpu_notifier(&conf->cpu_notify); +#endif + put_online_cpus(); + + free_percpu(conf->percpu); +} + static void free_conf(raid5_conf_t *conf) { shrink_stripes(conf); - safe_put_page(conf->spare_page); + raid5_free_percpu(conf); kfree(conf->disks); kfree(conf->stripe_hashtbl); kfree(conf); } +#ifdef CONFIG_HOTPLUG_CPU +static int raid456_cpu_notify(struct notifier_block *nfb, unsigned long action, + void *hcpu) +{ + raid5_conf_t *conf = container_of(nfb, raid5_conf_t, cpu_notify); + long cpu = (long)hcpu; + struct raid5_percpu *percpu = per_cpu_ptr(conf->percpu, cpu); + + switch (action) { + case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: + if (conf->level == 6 && !percpu->spare_page) + percpu->spare_page = alloc_page(GFP_KERNEL); + if (!percpu->scribble) + percpu->scribble = kmalloc(conf->scribble_len, GFP_KERNEL); + + if (!percpu->scribble || + (conf->level == 6 && !percpu->spare_page)) { + safe_put_page(percpu->spare_page); + kfree(percpu->scribble); + pr_err("%s: failed memory allocation for cpu%ld\n", + __func__, cpu); + return NOTIFY_BAD; + } + break; + case CPU_DEAD: + case CPU_DEAD_FROZEN: + safe_put_page(percpu->spare_page); + kfree(percpu->scribble); + percpu->spare_page = NULL; + percpu->scribble = NULL; + break; + default: + break; + } + return NOTIFY_OK; +} +#endif + +static int raid5_alloc_percpu(raid5_conf_t *conf) +{ + unsigned long cpu; + struct page *spare_page; + struct raid5_percpu *allcpus; + void *scribble; + int err; + + allcpus = alloc_percpu(struct raid5_percpu); + if (!allcpus) + return -ENOMEM; + conf->percpu = allcpus; + + get_online_cpus(); + err = 0; + for_each_present_cpu(cpu) { + if (conf->level == 6) { + spare_page = alloc_page(GFP_KERNEL); + if (!spare_page) { + err = -ENOMEM; + break; + } + per_cpu_ptr(conf->percpu, cpu)->spare_page = spare_page; + } + scribble = kmalloc(scribble_len(conf->raid_disks), GFP_KERNEL); + if (!scribble) { + err = -ENOMEM; + break; + } + per_cpu_ptr(conf->percpu, cpu)->scribble = scribble; + } +#ifdef CONFIG_HOTPLUG_CPU + conf->cpu_notify.notifier_call = raid456_cpu_notify; + conf->cpu_notify.priority = 0; + if (err == 0) + err = register_cpu_notifier(&conf->cpu_notify); +#endif + put_online_cpus(); + + return err; +} + static raid5_conf_t *setup_conf(mddev_t *mddev) { raid5_conf_t *conf; @@ -4369,6 +4724,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev) goto abort; conf->raid_disks = mddev->raid_disks; + conf->scribble_len = scribble_len(conf->raid_disks); if (mddev->reshape_position == MaxSector) conf->previous_raid_disks = mddev->raid_disks; else @@ -4384,11 +4740,10 @@ static raid5_conf_t *setup_conf(mddev_t *mddev) if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL) goto abort; - if (mddev->new_level == 6) { - conf->spare_page = alloc_page(GFP_KERNEL); - if (!conf->spare_page) - goto abort; - } + conf->level = mddev->new_level; + if (raid5_alloc_percpu(conf) != 0) + goto abort; + spin_lock_init(&conf->device_lock); init_waitqueue_head(&conf->wait_for_stripe); init_waitqueue_head(&conf->wait_for_overlap); @@ -4447,7 +4802,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev) printk(KERN_INFO "raid5: allocated %dkB for %s\n", memory, mdname(mddev)); - conf->thread = md_register_thread(raid5d, mddev, "%s_raid5"); + conf->thread = md_register_thread(raid5d, mddev, NULL); if (!conf->thread) { printk(KERN_ERR "raid5: couldn't allocate thread for %s\n", @@ -4613,7 +4968,7 @@ static int run(mddev_t *mddev) set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); mddev->sync_thread = md_register_thread(md_do_sync, mddev, - "%s_reshape"); + "reshape"); } /* read-ahead size must cover two whole stripes, which is @@ -5031,7 +5386,7 @@ static int raid5_start_reshape(mddev_t *mddev) set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); mddev->sync_thread = md_register_thread(md_do_sync, mddev, - "%s_reshape"); + "reshape"); if (!mddev->sync_thread) { mddev->recovery = 0; spin_lock_irq(&conf->device_lock); diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 9459689c4ea..2390e0e83da 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -2,6 +2,7 @@ #define _RAID5_H #include <linux/raid/xor.h> +#include <linux/dmaengine.h> /* * @@ -175,7 +176,9 @@ */ enum check_states { check_state_idle = 0, - check_state_run, /* parity check */ + check_state_run, /* xor parity check */ + check_state_run_q, /* q-parity check */ + check_state_run_pq, /* pq dual parity check */ check_state_check_result, check_state_compute_run, /* parity repair */ check_state_compute_result, @@ -215,8 +218,8 @@ struct stripe_head { * @target - STRIPE_OP_COMPUTE_BLK target */ struct stripe_operations { - int target; - u32 zero_sum_result; + int target, target2; + enum sum_check_flags zero_sum_result; } ops; struct r5dev { struct bio req; @@ -298,7 +301,7 @@ struct r6_state { #define STRIPE_OP_COMPUTE_BLK 1 #define STRIPE_OP_PREXOR 2 #define STRIPE_OP_BIODRAIN 3 -#define STRIPE_OP_POSTXOR 4 +#define STRIPE_OP_RECONSTRUCT 4 #define STRIPE_OP_CHECK 5 /* @@ -385,8 +388,21 @@ struct raid5_private_data { * (fresh device added). * Cleared when a sync completes. */ - - struct page *spare_page; /* Used when checking P/Q in raid6 */ + /* per cpu variables */ + struct raid5_percpu { + struct page *spare_page; /* Used when checking P/Q in raid6 */ + void *scribble; /* space for constructing buffer + * lists and performing address + * conversions + */ + } *percpu; + size_t scribble_len; /* size of scribble region must be + * associated with conf to handle + * cpu hotplug while reshaping + */ +#ifdef CONFIG_HOTPLUG_CPU + struct notifier_block cpu_notify; +#endif /* * Free stripes pool diff --git a/drivers/media/dvb/dvb-core/dvbdev.h b/drivers/media/dvb/dvb-core/dvbdev.h index 895e2efca8a..01fc7048474 100644 --- a/drivers/media/dvb/dvb-core/dvbdev.h +++ b/drivers/media/dvb/dvb-core/dvbdev.h @@ -31,10 +31,9 @@ #define DVB_MAJOR 212 #if defined(CONFIG_DVB_MAX_ADAPTERS) && CONFIG_DVB_MAX_ADAPTERS > 0 -#define DVB_MAX_ADAPTERS CONFIG_DVB_MAX_ADAPTERS + #define DVB_MAX_ADAPTERS CONFIG_DVB_MAX_ADAPTERS #else -#warning invalid CONFIG_DVB_MAX_ADAPTERS value -#define DVB_MAX_ADAPTERS 8 + #define DVB_MAX_ADAPTERS 8 #endif #define DVB_UNSET (-1) diff --git a/drivers/media/dvb/dvb-usb/Kconfig b/drivers/media/dvb/dvb-usb/Kconfig index 0e4b97fba38..9744b069241 100644 --- a/drivers/media/dvb/dvb-usb/Kconfig +++ b/drivers/media/dvb/dvb-usb/Kconfig @@ -75,7 +75,7 @@ config DVB_USB_DIB0700 select DVB_DIB3000MC if !DVB_FE_CUSTOMISE select DVB_S5H1411 if !DVB_FE_CUSTOMISE select DVB_LGDT3305 if !DVB_FE_CUSTOMISE - select DVB_TUNER_DIB0070 if !DVB_FE_CUSTOMISE + select DVB_TUNER_DIB0070 select MEDIA_TUNER_MT2060 if !MEDIA_TUNER_CUSTOMISE select MEDIA_TUNER_MT2266 if !MEDIA_TUNER_CUSTOMISE select MEDIA_TUNER_XC2028 if !MEDIA_TUNER_CUSTOMISE diff --git a/drivers/media/video/saa7164/saa7164-api.c b/drivers/media/video/saa7164/saa7164-api.c index bb6df1b276b..6f094a96ac8 100644 --- a/drivers/media/video/saa7164/saa7164-api.c +++ b/drivers/media/video/saa7164/saa7164-api.c @@ -415,7 +415,7 @@ int saa7164_api_enum_subdevs(struct saa7164_dev *dev) goto out; } - if (debug & DBGLVL_API) + if (saa_debug & DBGLVL_API) saa7164_dumphex16(dev, buf, (buflen/16)*16); saa7164_api_dump_subdevs(dev, buf, buflen); @@ -480,7 +480,7 @@ int saa7164_api_i2c_read(struct saa7164_i2c *bus, u8 addr, u32 reglen, u8 *reg, dprintk(DBGLVL_API, "%s() len = %d bytes\n", __func__, len); - if (debug & DBGLVL_I2C) + if (saa_debug & DBGLVL_I2C) saa7164_dumphex16(dev, buf, 2 * 16); ret = saa7164_cmd_send(bus->dev, unitid, GET_CUR, @@ -488,7 +488,7 @@ int saa7164_api_i2c_read(struct saa7164_i2c *bus, u8 addr, u32 reglen, u8 *reg, if (ret != SAA_OK) printk(KERN_ERR "%s() error, ret(2) = 0x%x\n", __func__, ret); else { - if (debug & DBGLVL_I2C) + if (saa_debug & DBGLVL_I2C) saa7164_dumphex16(dev, buf, sizeof(buf)); memcpy(data, (buf + 2 * sizeof(u32) + reglen), datalen); } @@ -548,7 +548,7 @@ int saa7164_api_i2c_write(struct saa7164_i2c *bus, u8 addr, u32 datalen, *((u32 *)(buf + 1 * sizeof(u32))) = datalen - reglen; memcpy((buf + 2 * sizeof(u32)), data, datalen); - if (debug & DBGLVL_I2C) + if (saa_debug & DBGLVL_I2C) saa7164_dumphex16(dev, buf, sizeof(buf)); ret = saa7164_cmd_send(bus->dev, unitid, SET_CUR, diff --git a/drivers/media/video/saa7164/saa7164-cmd.c b/drivers/media/video/saa7164/saa7164-cmd.c index e097f1a0969..c45966edc0c 100644 --- a/drivers/media/video/saa7164/saa7164-cmd.c +++ b/drivers/media/video/saa7164/saa7164-cmd.c @@ -250,7 +250,7 @@ int saa7164_cmd_wait(struct saa7164_dev *dev, u8 seqno) unsigned long stamp; int r; - if (debug >= 4) + if (saa_debug >= 4) saa7164_bus_dump(dev); dprintk(DBGLVL_CMD, "%s(seqno=%d)\n", __func__, seqno); diff --git a/drivers/media/video/saa7164/saa7164-core.c b/drivers/media/video/saa7164/saa7164-core.c index f0dbead188c..709affc3104 100644 --- a/drivers/media/video/saa7164/saa7164-core.c +++ b/drivers/media/video/saa7164/saa7164-core.c @@ -45,8 +45,8 @@ MODULE_LICENSE("GPL"); 32 bus */ -unsigned int debug; -module_param(debug, int, 0644); +unsigned int saa_debug; +module_param_named(debug, saa_debug, int, 0644); MODULE_PARM_DESC(debug, "enable debug messages"); unsigned int waitsecs = 10; @@ -653,7 +653,7 @@ static int __devinit saa7164_initdev(struct pci_dev *pci_dev, printk(KERN_ERR "%s() Unsupported board detected, " "registering without firmware\n", __func__); - dprintk(1, "%s() parameter debug = %d\n", __func__, debug); + dprintk(1, "%s() parameter debug = %d\n", __func__, saa_debug); dprintk(1, "%s() parameter waitsecs = %d\n", __func__, waitsecs); fail_fw: diff --git a/drivers/media/video/saa7164/saa7164.h b/drivers/media/video/saa7164/saa7164.h index 6753008a9c9..42660b546f0 100644 --- a/drivers/media/video/saa7164/saa7164.h +++ b/drivers/media/video/saa7164/saa7164.h @@ -375,9 +375,9 @@ extern int saa7164_buffer_dealloc(struct saa7164_tsport *port, /* ----------------------------------------------------------- */ -extern unsigned int debug; +extern unsigned int saa_debug; #define dprintk(level, fmt, arg...)\ - do { if (debug & level)\ + do { if (saa_debug & level)\ printk(KERN_DEBUG "%s: " fmt, dev->name, ## arg);\ } while (0) diff --git a/drivers/memstick/core/memstick.c b/drivers/memstick/core/memstick.c index a5b448ea4ea..b3bf1c44d74 100644 --- a/drivers/memstick/core/memstick.c +++ b/drivers/memstick/core/memstick.c @@ -339,9 +339,9 @@ static int h_memstick_read_dev_id(struct memstick_dev *card, card->id.type = id_reg.type; card->id.category = id_reg.category; card->id.class = id_reg.class; + dev_dbg(&card->dev, "if_mode = %02x\n", id_reg.if_mode); } complete(&card->mrq_complete); - dev_dbg(&card->dev, "if_mode = %02x\n", id_reg.if_mode); return -EAGAIN; } } diff --git a/drivers/misc/sgi-gru/grukservices.c b/drivers/misc/sgi-gru/grukservices.c index 79689b10f93..766e21e1557 100644 --- a/drivers/misc/sgi-gru/grukservices.c +++ b/drivers/misc/sgi-gru/grukservices.c @@ -937,6 +937,8 @@ static int quicktest1(unsigned long arg) /* Need 1K cacheline aligned that does not cross page boundary */ p = kmalloc(4096, 0); + if (p == NULL) + return -ENOMEM; mq = ALIGNUP(p, 1024); memset(mes, 0xee, sizeof(mes)); dw = mq; diff --git a/drivers/misc/sgi-gru/gruprocfs.c b/drivers/misc/sgi-gru/gruprocfs.c index 9cbf95bedce..ccd4408a26c 100644 --- a/drivers/misc/sgi-gru/gruprocfs.c +++ b/drivers/misc/sgi-gru/gruprocfs.c @@ -340,10 +340,9 @@ static struct proc_dir_entry *proc_gru __read_mostly; static int create_proc_file(struct proc_entry *p) { - p->entry = create_proc_entry(p->name, p->mode, proc_gru); + p->entry = proc_create(p->name, p->mode, proc_gru, p->fops); if (!p->entry) return -1; - p->entry->proc_fops = p->fops; return 0; } diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c index 065fa818be5..fc25586b7ee 100644 --- a/drivers/mmc/host/atmel-mci.c +++ b/drivers/mmc/host/atmel-mci.c @@ -599,6 +599,7 @@ atmci_submit_data_dma(struct atmel_mci *host, struct mmc_data *data) struct scatterlist *sg; unsigned int i; enum dma_data_direction direction; + unsigned int sglen; /* * We don't do DMA on "complex" transfers, i.e. with @@ -628,11 +629,14 @@ atmci_submit_data_dma(struct atmel_mci *host, struct mmc_data *data) else direction = DMA_TO_DEVICE; + sglen = dma_map_sg(&host->pdev->dev, data->sg, data->sg_len, direction); + if (sglen != data->sg_len) + goto unmap_exit; desc = chan->device->device_prep_slave_sg(chan, data->sg, data->sg_len, direction, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); if (!desc) - return -ENOMEM; + goto unmap_exit; host->dma.data_desc = desc; desc->callback = atmci_dma_complete; @@ -643,6 +647,9 @@ atmci_submit_data_dma(struct atmel_mci *host, struct mmc_data *data) chan->device->device_issue_pending(chan); return 0; +unmap_exit: + dma_unmap_sg(&host->pdev->dev, data->sg, sglen, direction); + return -ENOMEM; } #else /* CONFIG_MMC_ATMELMCI_DMA */ diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c index 8741d0f5146..3d1e5329da1 100644 --- a/drivers/mmc/host/mmci.c +++ b/drivers/mmc/host/mmci.c @@ -22,12 +22,13 @@ #include <linux/clk.h> #include <linux/scatterlist.h> #include <linux/gpio.h> +#include <linux/amba/mmci.h> +#include <linux/regulator/consumer.h> #include <asm/cacheflush.h> #include <asm/div64.h> #include <asm/io.h> #include <asm/sizes.h> -#include <asm/mach/mmc.h> #include "mmci.h" @@ -38,6 +39,36 @@ static unsigned int fmax = 515633; +/* + * This must be called with host->lock held + */ +static void mmci_set_clkreg(struct mmci_host *host, unsigned int desired) +{ + u32 clk = 0; + + if (desired) { + if (desired >= host->mclk) { + clk = MCI_CLK_BYPASS; + host->cclk = host->mclk; + } else { + clk = host->mclk / (2 * desired) - 1; + if (clk >= 256) + clk = 255; + host->cclk = host->mclk / (2 * (clk + 1)); + } + if (host->hw_designer == 0x80) + clk |= MCI_FCEN; /* Bug fix in ST IP block */ + clk |= MCI_CLK_ENABLE; + /* This hasn't proven to be worthwhile */ + /* clk |= MCI_CLK_PWRSAVE; */ + } + + if (host->mmc->ios.bus_width == MMC_BUS_WIDTH_4) + clk |= MCI_WIDE_BUS; + + writel(clk, host->base + MMCICLOCK); +} + static void mmci_request_end(struct mmci_host *host, struct mmc_request *mrq) { @@ -419,30 +450,31 @@ static void mmci_request(struct mmc_host *mmc, struct mmc_request *mrq) static void mmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) { struct mmci_host *host = mmc_priv(mmc); - u32 clk = 0, pwr = 0; - - if (ios->clock) { - if (ios->clock >= host->mclk) { - clk = MCI_CLK_BYPASS; - host->cclk = host->mclk; - } else { - clk = host->mclk / (2 * ios->clock) - 1; - if (clk >= 256) - clk = 255; - host->cclk = host->mclk / (2 * (clk + 1)); - } - if (host->hw_designer == AMBA_VENDOR_ST) - clk |= MCI_FCEN; /* Bug fix in ST IP block */ - clk |= MCI_CLK_ENABLE; - } - - if (host->plat->translate_vdd) - pwr |= host->plat->translate_vdd(mmc_dev(mmc), ios->vdd); + u32 pwr = 0; + unsigned long flags; switch (ios->power_mode) { case MMC_POWER_OFF: + if(host->vcc && + regulator_is_enabled(host->vcc)) + regulator_disable(host->vcc); break; case MMC_POWER_UP: +#ifdef CONFIG_REGULATOR + if (host->vcc) + /* This implicitly enables the regulator */ + mmc_regulator_set_ocr(host->vcc, ios->vdd); +#endif + /* + * The translate_vdd function is not used if you have + * an external regulator, or your design is really weird. + * Using it would mean sending in power control BOTH using + * a regulator AND the 4 MMCIPWR bits. If we don't have + * a regulator, we might have some other platform specific + * power control behind this translate function. + */ + if (!host->vcc && host->plat->translate_vdd) + pwr |= host->plat->translate_vdd(mmc_dev(mmc), ios->vdd); /* The ST version does not have this, fall through to POWER_ON */ if (host->hw_designer != AMBA_VENDOR_ST) { pwr |= MCI_PWR_UP; @@ -465,12 +497,16 @@ static void mmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) } } - writel(clk, host->base + MMCICLOCK); + spin_lock_irqsave(&host->lock, flags); + + mmci_set_clkreg(host, ios->clock); if (host->pwr != pwr) { host->pwr = pwr; writel(pwr, host->base + MMCIPOWER); } + + spin_unlock_irqrestore(&host->lock, flags); } static int mmci_get_ro(struct mmc_host *mmc) @@ -517,7 +553,7 @@ static void mmci_check_status(unsigned long data) static int __devinit mmci_probe(struct amba_device *dev, struct amba_id *id) { - struct mmc_platform_data *plat = dev->dev.platform_data; + struct mmci_platform_data *plat = dev->dev.platform_data; struct mmci_host *host; struct mmc_host *mmc; int ret; @@ -583,7 +619,30 @@ static int __devinit mmci_probe(struct amba_device *dev, struct amba_id *id) mmc->ops = &mmci_ops; mmc->f_min = (host->mclk + 511) / 512; mmc->f_max = min(host->mclk, fmax); - mmc->ocr_avail = plat->ocr_mask; +#ifdef CONFIG_REGULATOR + /* If we're using the regulator framework, try to fetch a regulator */ + host->vcc = regulator_get(&dev->dev, "vmmc"); + if (IS_ERR(host->vcc)) + host->vcc = NULL; + else { + int mask = mmc_regulator_get_ocrmask(host->vcc); + + if (mask < 0) + dev_err(&dev->dev, "error getting OCR mask (%d)\n", + mask); + else { + host->mmc->ocr_avail = (u32) mask; + if (plat->ocr_mask) + dev_warn(&dev->dev, + "Provided ocr_mask/setpower will not be used " + "(using regulator instead)\n"); + } + } +#endif + /* Fall back to platform data if no regulator is found */ + if (host->vcc == NULL) + mmc->ocr_avail = plat->ocr_mask; + mmc->caps = plat->capabilities; /* * We can do SGIO @@ -720,6 +779,10 @@ static int __devexit mmci_remove(struct amba_device *dev) clk_disable(host->clk); clk_put(host->clk); + if (regulator_is_enabled(host->vcc)) + regulator_disable(host->vcc); + regulator_put(host->vcc); + mmc_free_host(mmc); amba_release_regions(dev); diff --git a/drivers/mmc/host/mmci.h b/drivers/mmc/host/mmci.h index 839f264c972..1ceb9a90f59 100644 --- a/drivers/mmc/host/mmci.h +++ b/drivers/mmc/host/mmci.h @@ -161,7 +161,7 @@ struct mmci_host { unsigned int mclk; unsigned int cclk; u32 pwr; - struct mmc_platform_data *plat; + struct mmci_platform_data *plat; u8 hw_designer; u8 hw_revision:4; @@ -175,6 +175,7 @@ struct mmci_host { struct scatterlist *sg_ptr; unsigned int sg_off; unsigned int size; + struct regulator *vcc; }; static inline void mmci_init_sg(struct mmci_host *host, struct mmc_data *data) diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c index e55ac792d68..5e0b1529964 100644 --- a/drivers/mmc/host/pxamci.c +++ b/drivers/mmc/host/pxamci.c @@ -28,6 +28,7 @@ #include <linux/mmc/host.h> #include <linux/io.h> #include <linux/regulator/consumer.h> +#include <linux/gpio.h> #include <asm/sizes.h> @@ -96,10 +97,18 @@ static inline void pxamci_init_ocr(struct pxamci_host *host) static inline void pxamci_set_power(struct pxamci_host *host, unsigned int vdd) { + int on; + #ifdef CONFIG_REGULATOR if (host->vcc) mmc_regulator_set_ocr(host->vcc, vdd); #endif + if (!host->vcc && host->pdata && + gpio_is_valid(host->pdata->gpio_power)) { + on = ((1 << vdd) & host->pdata->ocr_mask); + gpio_set_value(host->pdata->gpio_power, + !!on ^ host->pdata->gpio_power_invert); + } if (!host->vcc && host->pdata && host->pdata->setpower) host->pdata->setpower(mmc_dev(host->mmc), vdd); } @@ -421,6 +430,12 @@ static int pxamci_get_ro(struct mmc_host *mmc) { struct pxamci_host *host = mmc_priv(mmc); + if (host->pdata && gpio_is_valid(host->pdata->gpio_card_ro)) { + if (host->pdata->gpio_card_ro_invert) + return !gpio_get_value(host->pdata->gpio_card_ro); + else + return gpio_get_value(host->pdata->gpio_card_ro); + } if (host->pdata && host->pdata->get_ro) return !!host->pdata->get_ro(mmc_dev(mmc)); /* @@ -534,7 +549,7 @@ static int pxamci_probe(struct platform_device *pdev) struct mmc_host *mmc; struct pxamci_host *host = NULL; struct resource *r, *dmarx, *dmatx; - int ret, irq; + int ret, irq, gpio_cd = -1, gpio_ro = -1, gpio_power = -1; r = platform_get_resource(pdev, IORESOURCE_MEM, 0); irq = platform_get_irq(pdev, 0); @@ -661,13 +676,63 @@ static int pxamci_probe(struct platform_device *pdev) } host->dma_drcmrtx = dmatx->start; + if (host->pdata) { + gpio_cd = host->pdata->gpio_card_detect; + gpio_ro = host->pdata->gpio_card_ro; + gpio_power = host->pdata->gpio_power; + } + if (gpio_is_valid(gpio_power)) { + ret = gpio_request(gpio_power, "mmc card power"); + if (ret) { + dev_err(&pdev->dev, "Failed requesting gpio_power %d\n", gpio_power); + goto out; + } + gpio_direction_output(gpio_power, + host->pdata->gpio_power_invert); + } + if (gpio_is_valid(gpio_ro)) { + ret = gpio_request(gpio_ro, "mmc card read only"); + if (ret) { + dev_err(&pdev->dev, "Failed requesting gpio_ro %d\n", gpio_power); + goto err_gpio_ro; + } + gpio_direction_input(gpio_ro); + } + if (gpio_is_valid(gpio_cd)) { + ret = gpio_request(gpio_cd, "mmc card detect"); + if (ret) { + dev_err(&pdev->dev, "Failed requesting gpio_cd %d\n", gpio_power); + goto err_gpio_cd; + } + gpio_direction_input(gpio_cd); + + ret = request_irq(gpio_to_irq(gpio_cd), pxamci_detect_irq, + IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING, + "mmc card detect", mmc); + if (ret) { + dev_err(&pdev->dev, "failed to request card detect IRQ\n"); + goto err_request_irq; + } + } + if (host->pdata && host->pdata->init) host->pdata->init(&pdev->dev, pxamci_detect_irq, mmc); + if (gpio_is_valid(gpio_power) && host->pdata->setpower) + dev_warn(&pdev->dev, "gpio_power and setpower() both defined\n"); + if (gpio_is_valid(gpio_ro) && host->pdata->get_ro) + dev_warn(&pdev->dev, "gpio_ro and get_ro() both defined\n"); + mmc_add_host(mmc); return 0; +err_request_irq: + gpio_free(gpio_cd); +err_gpio_cd: + gpio_free(gpio_ro); +err_gpio_ro: + gpio_free(gpio_power); out: if (host) { if (host->dma >= 0) @@ -688,12 +753,26 @@ static int pxamci_probe(struct platform_device *pdev) static int pxamci_remove(struct platform_device *pdev) { struct mmc_host *mmc = platform_get_drvdata(pdev); + int gpio_cd = -1, gpio_ro = -1, gpio_power = -1; platform_set_drvdata(pdev, NULL); if (mmc) { struct pxamci_host *host = mmc_priv(mmc); + if (host->pdata) { + gpio_cd = host->pdata->gpio_card_detect; + gpio_ro = host->pdata->gpio_card_ro; + gpio_power = host->pdata->gpio_power; + } + if (gpio_is_valid(gpio_cd)) { + free_irq(gpio_to_irq(gpio_cd), mmc); + gpio_free(gpio_cd); + } + if (gpio_is_valid(gpio_ro)) + gpio_free(gpio_ro); + if (gpio_is_valid(gpio_power)) + gpio_free(gpio_power); if (host->vcc) regulator_put(host->vcc); @@ -725,20 +804,20 @@ static int pxamci_remove(struct platform_device *pdev) } #ifdef CONFIG_PM -static int pxamci_suspend(struct platform_device *dev, pm_message_t state) +static int pxamci_suspend(struct device *dev) { - struct mmc_host *mmc = platform_get_drvdata(dev); + struct mmc_host *mmc = dev_get_drvdata(dev); int ret = 0; if (mmc) - ret = mmc_suspend_host(mmc, state); + ret = mmc_suspend_host(mmc, PMSG_SUSPEND); return ret; } -static int pxamci_resume(struct platform_device *dev) +static int pxamci_resume(struct device *dev) { - struct mmc_host *mmc = platform_get_drvdata(dev); + struct mmc_host *mmc = dev_get_drvdata(dev); int ret = 0; if (mmc) @@ -746,19 +825,22 @@ static int pxamci_resume(struct platform_device *dev) return ret; } -#else -#define pxamci_suspend NULL -#define pxamci_resume NULL + +static struct dev_pm_ops pxamci_pm_ops = { + .suspend = pxamci_suspend, + .resume = pxamci_resume, +}; #endif static struct platform_driver pxamci_driver = { .probe = pxamci_probe, .remove = pxamci_remove, - .suspend = pxamci_suspend, - .resume = pxamci_resume, .driver = { .name = DRIVER_NAME, .owner = THIS_MODULE, +#ifdef CONFIG_PM + .pm = &pxamci_pm_ops, +#endif }, }; diff --git a/drivers/mtd/Kconfig b/drivers/mtd/Kconfig index e4ec3659759..ecf90f5c97c 100644 --- a/drivers/mtd/Kconfig +++ b/drivers/mtd/Kconfig @@ -159,7 +159,7 @@ config MTD_AFS_PARTS config MTD_OF_PARTS tristate "Flash partition map based on OF description" - depends on PPC_OF && MTD_PARTITIONS + depends on (MICROBLAZE || PPC_OF) && MTD_PARTITIONS help This provides a partition parsing function which derives the partition map from the children of the flash node, diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig index 3a9a960644b..841e085ab74 100644 --- a/drivers/mtd/maps/Kconfig +++ b/drivers/mtd/maps/Kconfig @@ -74,7 +74,7 @@ config MTD_PHYSMAP_BANKWIDTH config MTD_PHYSMAP_OF tristate "Flash device in physical memory map based on OF description" - depends on PPC_OF && (MTD_CFI || MTD_JEDECPROBE || MTD_ROM) + depends on (MICROBLAZE || PPC_OF) && (MTD_CFI || MTD_JEDECPROBE || MTD_ROM) help This provides a 'mapping' driver which allows the NOR Flash and ROM driver code to communicate with chips which are mapped diff --git a/drivers/net/cris/eth_v10.c b/drivers/net/cris/eth_v10.c index 15c0195ebd3..a24be34a3f7 100644 --- a/drivers/net/cris/eth_v10.c +++ b/drivers/net/cris/eth_v10.c @@ -768,10 +768,24 @@ e100_negotiate(struct net_device* dev) e100_set_mdio_reg(dev, np->mii_if.phy_id, MII_ADVERTISE, data); - /* Renegotiate with link partner */ + data = e100_get_mdio_reg(dev, np->mii_if.phy_id, MII_BMCR); if (autoneg_normal) { - data = e100_get_mdio_reg(dev, np->mii_if.phy_id, MII_BMCR); - data |= BMCR_ANENABLE | BMCR_ANRESTART; + /* Renegotiate with link partner */ + data |= BMCR_ANENABLE | BMCR_ANRESTART; + } else { + /* Don't negotiate speed or duplex */ + data &= ~(BMCR_ANENABLE | BMCR_ANRESTART); + + /* Set speed and duplex static */ + if (current_speed_selection == 10) + data &= ~BMCR_SPEED100; + else + data |= BMCR_SPEED100; + + if (current_duplex != full) + data &= ~BMCR_FULLDPLX; + else + data |= BMCR_FULLDPLX; } e100_set_mdio_reg(dev, np->mii_if.phy_id, MII_BMCR, data); } diff --git a/drivers/net/irda/pxaficp_ir.c b/drivers/net/irda/pxaficp_ir.c index 1445e586519..84db145d2b5 100644 --- a/drivers/net/irda/pxaficp_ir.c +++ b/drivers/net/irda/pxaficp_ir.c @@ -17,6 +17,7 @@ #include <linux/etherdevice.h> #include <linux/platform_device.h> #include <linux/clk.h> +#include <linux/gpio.h> #include <net/irda/irda.h> #include <net/irda/irmod.h> @@ -163,6 +164,22 @@ inline static void pxa_irda_fir_dma_tx_start(struct pxa_irda *si) } /* + * Set the IrDA communications mode. + */ +static void pxa_irda_set_mode(struct pxa_irda *si, int mode) +{ + if (si->pdata->transceiver_mode) + si->pdata->transceiver_mode(si->dev, mode); + else { + if (gpio_is_valid(si->pdata->gpio_pwdown)) + gpio_set_value(si->pdata->gpio_pwdown, + !(mode & IR_OFF) ^ + !si->pdata->gpio_pwdown_inverted); + pxa2xx_transceiver_mode(si->dev, mode); + } +} + +/* * Set the IrDA communications speed. */ static int pxa_irda_set_speed(struct pxa_irda *si, int speed) @@ -188,7 +205,7 @@ static int pxa_irda_set_speed(struct pxa_irda *si, int speed) pxa_irda_disable_clk(si); /* set board transceiver to SIR mode */ - si->pdata->transceiver_mode(si->dev, IR_SIRMODE); + pxa_irda_set_mode(si, IR_SIRMODE); /* enable the STUART clock */ pxa_irda_enable_sirclk(si); @@ -222,7 +239,7 @@ static int pxa_irda_set_speed(struct pxa_irda *si, int speed) ICCR0 = 0; /* set board transceiver to FIR mode */ - si->pdata->transceiver_mode(si->dev, IR_FIRMODE); + pxa_irda_set_mode(si, IR_FIRMODE); /* enable the FICP clock */ pxa_irda_enable_firclk(si); @@ -641,7 +658,7 @@ static void pxa_irda_shutdown(struct pxa_irda *si) local_irq_restore(flags); /* power off board transceiver */ - si->pdata->transceiver_mode(si->dev, IR_OFF); + pxa_irda_set_mode(si, IR_OFF); printk(KERN_DEBUG "pxa_ir: irda shutdown\n"); } @@ -849,10 +866,26 @@ static int pxa_irda_probe(struct platform_device *pdev) if (err) goto err_mem_5; - if (si->pdata->startup) + if (gpio_is_valid(si->pdata->gpio_pwdown)) { + err = gpio_request(si->pdata->gpio_pwdown, "IrDA switch"); + if (err) + goto err_startup; + err = gpio_direction_output(si->pdata->gpio_pwdown, + !si->pdata->gpio_pwdown_inverted); + if (err) { + gpio_free(si->pdata->gpio_pwdown); + goto err_startup; + } + } + + if (si->pdata->startup) { err = si->pdata->startup(si->dev); - if (err) - goto err_startup; + if (err) + goto err_startup; + } + + if (gpio_is_valid(si->pdata->gpio_pwdown) && si->pdata->startup) + dev_warn(si->dev, "gpio_pwdown and startup() both defined!\n"); dev->netdev_ops = &pxa_irda_netdev_ops; @@ -903,6 +936,8 @@ static int pxa_irda_remove(struct platform_device *_dev) if (dev) { struct pxa_irda *si = netdev_priv(dev); unregister_netdev(dev); + if (gpio_is_valid(si->pdata->gpio_pwdown)) + gpio_free(si->pdata->gpio_pwdown); if (si->pdata->shutdown) si->pdata->shutdown(si->dev); kfree(si->tx_buff.head); diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c index cee199ceba2..3c16602172f 100644 --- a/drivers/net/mlx4/fw.c +++ b/drivers/net/mlx4/fw.c @@ -33,6 +33,7 @@ */ #include <linux/mlx4/cmd.h> +#include <linux/cache.h> #include "fw.h" #include "icm.h" @@ -698,6 +699,7 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) #define INIT_HCA_IN_SIZE 0x200 #define INIT_HCA_VERSION_OFFSET 0x000 #define INIT_HCA_VERSION 2 +#define INIT_HCA_CACHELINE_SZ_OFFSET 0x0e #define INIT_HCA_FLAGS_OFFSET 0x014 #define INIT_HCA_QPC_OFFSET 0x020 #define INIT_HCA_QPC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x10) @@ -735,6 +737,9 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) *((u8 *) mailbox->buf + INIT_HCA_VERSION_OFFSET) = INIT_HCA_VERSION; + *((u8 *) mailbox->buf + INIT_HCA_CACHELINE_SZ_OFFSET) = + (ilog2(cache_line_size()) - 4) << 5; + #if defined(__LITTLE_ENDIAN) *(inbox + INIT_HCA_FLAGS_OFFSET / 4) &= ~cpu_to_be32(1 << 1); #elif defined(__BIG_ENDIAN) diff --git a/drivers/net/wireless/arlan-proc.c b/drivers/net/wireless/arlan-proc.c index 2ab1d59870f..a8b689635a3 100644 --- a/drivers/net/wireless/arlan-proc.c +++ b/drivers/net/wireless/arlan-proc.c @@ -402,7 +402,7 @@ static int arlan_setup_card_by_book(struct net_device *dev) static char arlan_drive_info[ARLAN_STR_SIZE] = "A655\n\0"; -static int arlan_sysctl_info(ctl_table * ctl, int write, struct file *filp, +static int arlan_sysctl_info(ctl_table * ctl, int write, void __user *buffer, size_t * lenp, loff_t *ppos) { int i; @@ -629,7 +629,7 @@ final: *lenp = pos; if (!write) - retv = proc_dostring(ctl, write, filp, buffer, lenp, ppos); + retv = proc_dostring(ctl, write, buffer, lenp, ppos); else { *lenp = 0; @@ -639,7 +639,7 @@ final: } -static int arlan_sysctl_info161719(ctl_table * ctl, int write, struct file *filp, +static int arlan_sysctl_info161719(ctl_table * ctl, int write, void __user *buffer, size_t * lenp, loff_t *ppos) { int i; @@ -669,11 +669,11 @@ static int arlan_sysctl_info161719(ctl_table * ctl, int write, struct file *filp final: *lenp = pos; - retv = proc_dostring(ctl, write, filp, buffer, lenp, ppos); + retv = proc_dostring(ctl, write, buffer, lenp, ppos); return retv; } -static int arlan_sysctl_infotxRing(ctl_table * ctl, int write, struct file *filp, +static int arlan_sysctl_infotxRing(ctl_table * ctl, int write, void __user *buffer, size_t * lenp, loff_t *ppos) { int i; @@ -698,11 +698,11 @@ static int arlan_sysctl_infotxRing(ctl_table * ctl, int write, struct file *filp SARLBNpln(u_char, txBuffer, 0x800); final: *lenp = pos; - retv = proc_dostring(ctl, write, filp, buffer, lenp, ppos); + retv = proc_dostring(ctl, write, buffer, lenp, ppos); return retv; } -static int arlan_sysctl_inforxRing(ctl_table * ctl, int write, struct file *filp, +static int arlan_sysctl_inforxRing(ctl_table * ctl, int write, void __user *buffer, size_t * lenp, loff_t *ppos) { int i; @@ -726,11 +726,11 @@ static int arlan_sysctl_inforxRing(ctl_table * ctl, int write, struct file *filp SARLBNpln(u_char, rxBuffer, 0x800); final: *lenp = pos; - retv = proc_dostring(ctl, write, filp, buffer, lenp, ppos); + retv = proc_dostring(ctl, write, buffer, lenp, ppos); return retv; } -static int arlan_sysctl_info18(ctl_table * ctl, int write, struct file *filp, +static int arlan_sysctl_info18(ctl_table * ctl, int write, void __user *buffer, size_t * lenp, loff_t *ppos) { int i; @@ -756,7 +756,7 @@ static int arlan_sysctl_info18(ctl_table * ctl, int write, struct file *filp, final: *lenp = pos; - retv = proc_dostring(ctl, write, filp, buffer, lenp, ppos); + retv = proc_dostring(ctl, write, buffer, lenp, ppos); return retv; } @@ -766,7 +766,7 @@ final: static char conf_reset_result[200]; -static int arlan_configure(ctl_table * ctl, int write, struct file *filp, +static int arlan_configure(ctl_table * ctl, int write, void __user *buffer, size_t * lenp, loff_t *ppos) { int pos = 0; @@ -788,10 +788,10 @@ static int arlan_configure(ctl_table * ctl, int write, struct file *filp, return -1; *lenp = pos; - return proc_dostring(ctl, write, filp, buffer, lenp, ppos); + return proc_dostring(ctl, write, buffer, lenp, ppos); } -static int arlan_sysctl_reset(ctl_table * ctl, int write, struct file *filp, +static int arlan_sysctl_reset(ctl_table * ctl, int write, void __user *buffer, size_t * lenp, loff_t *ppos) { int pos = 0; @@ -811,7 +811,7 @@ static int arlan_sysctl_reset(ctl_table * ctl, int write, struct file *filp, } else return -1; *lenp = pos + 3; - return proc_dostring(ctl, write, filp, buffer, lenp, ppos); + return proc_dostring(ctl, write, buffer, lenp, ppos); } diff --git a/drivers/parport/procfs.c b/drivers/parport/procfs.c index 554e11f9e1c..8eefe56f1cb 100644 --- a/drivers/parport/procfs.c +++ b/drivers/parport/procfs.c @@ -31,7 +31,7 @@ #define PARPORT_MIN_SPINTIME_VALUE 1 #define PARPORT_MAX_SPINTIME_VALUE 1000 -static int do_active_device(ctl_table *table, int write, struct file *filp, +static int do_active_device(ctl_table *table, int write, void __user *result, size_t *lenp, loff_t *ppos) { struct parport *port = (struct parport *)table->extra1; @@ -68,7 +68,7 @@ static int do_active_device(ctl_table *table, int write, struct file *filp, } #ifdef CONFIG_PARPORT_1284 -static int do_autoprobe(ctl_table *table, int write, struct file *filp, +static int do_autoprobe(ctl_table *table, int write, void __user *result, size_t *lenp, loff_t *ppos) { struct parport_device_info *info = table->extra2; @@ -111,7 +111,7 @@ static int do_autoprobe(ctl_table *table, int write, struct file *filp, #endif /* IEEE1284.3 support. */ static int do_hardware_base_addr (ctl_table *table, int write, - struct file *filp, void __user *result, + void __user *result, size_t *lenp, loff_t *ppos) { struct parport *port = (struct parport *)table->extra1; @@ -139,7 +139,7 @@ static int do_hardware_base_addr (ctl_table *table, int write, } static int do_hardware_irq (ctl_table *table, int write, - struct file *filp, void __user *result, + void __user *result, size_t *lenp, loff_t *ppos) { struct parport *port = (struct parport *)table->extra1; @@ -167,7 +167,7 @@ static int do_hardware_irq (ctl_table *table, int write, } static int do_hardware_dma (ctl_table *table, int write, - struct file *filp, void __user *result, + void __user *result, size_t *lenp, loff_t *ppos) { struct parport *port = (struct parport *)table->extra1; @@ -195,7 +195,7 @@ static int do_hardware_dma (ctl_table *table, int write, } static int do_hardware_modes (ctl_table *table, int write, - struct file *filp, void __user *result, + void __user *result, size_t *lenp, loff_t *ppos) { struct parport *port = (struct parport *)table->extra1; diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h index 36faa9a8e18..3070f77eb56 100644 --- a/drivers/pci/hotplug/pciehp.h +++ b/drivers/pci/hotplug/pciehp.h @@ -72,15 +72,9 @@ do { \ #define SLOT_NAME_SIZE 10 struct slot { - u8 bus; - u8 device; u8 state; - u8 hp_slot; - u32 number; struct controller *ctrl; - struct hpc_ops *hpc_ops; struct hotplug_slot *hotplug_slot; - struct list_head slot_list; struct delayed_work work; /* work for button event */ struct mutex lock; }; @@ -92,18 +86,10 @@ struct event_info { }; struct controller { - struct mutex crit_sect; /* critical section mutex */ struct mutex ctrl_lock; /* controller lock */ - int num_slots; /* Number of slots on ctlr */ - int slot_num_inc; /* 1 or -1 */ - struct pci_dev *pci_dev; struct pcie_device *pcie; /* PCI Express port service */ - struct list_head slot_list; - struct hpc_ops *hpc_ops; + struct slot *slot; wait_queue_head_t queue; /* sleep & wake process */ - u8 slot_device_offset; - u32 first_slot; /* First physical slot number */ /* PCIE only has 1 slot */ - u8 slot_bus; /* Bus where the slots handled by this controller sit */ u32 slot_cap; u8 cap_base; struct timer_list poll_timer; @@ -131,40 +117,20 @@ struct controller { #define POWERON_STATE 3 #define POWEROFF_STATE 4 -/* Error messages */ -#define INTERLOCK_OPEN 0x00000002 -#define ADD_NOT_SUPPORTED 0x00000003 -#define CARD_FUNCTIONING 0x00000005 -#define ADAPTER_NOT_SAME 0x00000006 -#define NO_ADAPTER_PRESENT 0x00000009 -#define NOT_ENOUGH_RESOURCES 0x0000000B -#define DEVICE_TYPE_NOT_SUPPORTED 0x0000000C -#define WRONG_BUS_FREQUENCY 0x0000000D -#define POWER_FAILURE 0x0000000E - -/* Field definitions in Slot Capabilities Register */ -#define ATTN_BUTTN_PRSN 0x00000001 -#define PWR_CTRL_PRSN 0x00000002 -#define MRL_SENS_PRSN 0x00000004 -#define ATTN_LED_PRSN 0x00000008 -#define PWR_LED_PRSN 0x00000010 -#define HP_SUPR_RM_SUP 0x00000020 -#define EMI_PRSN 0x00020000 -#define NO_CMD_CMPL_SUP 0x00040000 - -#define ATTN_BUTTN(ctrl) ((ctrl)->slot_cap & ATTN_BUTTN_PRSN) -#define POWER_CTRL(ctrl) ((ctrl)->slot_cap & PWR_CTRL_PRSN) -#define MRL_SENS(ctrl) ((ctrl)->slot_cap & MRL_SENS_PRSN) -#define ATTN_LED(ctrl) ((ctrl)->slot_cap & ATTN_LED_PRSN) -#define PWR_LED(ctrl) ((ctrl)->slot_cap & PWR_LED_PRSN) -#define HP_SUPR_RM(ctrl) ((ctrl)->slot_cap & HP_SUPR_RM_SUP) -#define EMI(ctrl) ((ctrl)->slot_cap & EMI_PRSN) -#define NO_CMD_CMPL(ctrl) ((ctrl)->slot_cap & NO_CMD_CMPL_SUP) +#define ATTN_BUTTN(ctrl) ((ctrl)->slot_cap & PCI_EXP_SLTCAP_ABP) +#define POWER_CTRL(ctrl) ((ctrl)->slot_cap & PCI_EXP_SLTCAP_PCP) +#define MRL_SENS(ctrl) ((ctrl)->slot_cap & PCI_EXP_SLTCAP_MRLSP) +#define ATTN_LED(ctrl) ((ctrl)->slot_cap & PCI_EXP_SLTCAP_AIP) +#define PWR_LED(ctrl) ((ctrl)->slot_cap & PCI_EXP_SLTCAP_PIP) +#define HP_SUPR_RM(ctrl) ((ctrl)->slot_cap & PCI_EXP_SLTCAP_HPS) +#define EMI(ctrl) ((ctrl)->slot_cap & PCI_EXP_SLTCAP_EIP) +#define NO_CMD_CMPL(ctrl) ((ctrl)->slot_cap & PCI_EXP_SLTCAP_NCCS) +#define PSN(ctrl) ((ctrl)->slot_cap >> 19) extern int pciehp_sysfs_enable_slot(struct slot *slot); extern int pciehp_sysfs_disable_slot(struct slot *slot); extern u8 pciehp_handle_attention_button(struct slot *p_slot); - extern u8 pciehp_handle_switch_change(struct slot *p_slot); +extern u8 pciehp_handle_switch_change(struct slot *p_slot); extern u8 pciehp_handle_presence_change(struct slot *p_slot); extern u8 pciehp_handle_power_fault(struct slot *p_slot); extern int pciehp_configure_device(struct slot *p_slot); @@ -175,45 +141,30 @@ int pcie_init_notification(struct controller *ctrl); int pciehp_enable_slot(struct slot *p_slot); int pciehp_disable_slot(struct slot *p_slot); int pcie_enable_notification(struct controller *ctrl); +int pciehp_power_on_slot(struct slot *slot); +int pciehp_power_off_slot(struct slot *slot); +int pciehp_get_power_status(struct slot *slot, u8 *status); +int pciehp_get_attention_status(struct slot *slot, u8 *status); + +int pciehp_set_attention_status(struct slot *slot, u8 status); +int pciehp_get_latch_status(struct slot *slot, u8 *status); +int pciehp_get_adapter_status(struct slot *slot, u8 *status); +int pciehp_get_max_link_speed(struct slot *slot, enum pci_bus_speed *speed); +int pciehp_get_max_link_width(struct slot *slot, enum pcie_link_width *val); +int pciehp_get_cur_link_speed(struct slot *slot, enum pci_bus_speed *speed); +int pciehp_get_cur_link_width(struct slot *slot, enum pcie_link_width *val); +int pciehp_query_power_fault(struct slot *slot); +void pciehp_green_led_on(struct slot *slot); +void pciehp_green_led_off(struct slot *slot); +void pciehp_green_led_blink(struct slot *slot); +int pciehp_check_link_status(struct controller *ctrl); +void pciehp_release_ctrl(struct controller *ctrl); static inline const char *slot_name(struct slot *slot) { return hotplug_slot_name(slot->hotplug_slot); } -static inline struct slot *pciehp_find_slot(struct controller *ctrl, u8 device) -{ - struct slot *slot; - - list_for_each_entry(slot, &ctrl->slot_list, slot_list) { - if (slot->device == device) - return slot; - } - - ctrl_err(ctrl, "Slot (device=0x%02x) not found\n", device); - return NULL; -} - -struct hpc_ops { - int (*power_on_slot)(struct slot *slot); - int (*power_off_slot)(struct slot *slot); - int (*get_power_status)(struct slot *slot, u8 *status); - int (*get_attention_status)(struct slot *slot, u8 *status); - int (*set_attention_status)(struct slot *slot, u8 status); - int (*get_latch_status)(struct slot *slot, u8 *status); - int (*get_adapter_status)(struct slot *slot, u8 *status); - int (*get_max_bus_speed)(struct slot *slot, enum pci_bus_speed *speed); - int (*get_cur_bus_speed)(struct slot *slot, enum pci_bus_speed *speed); - int (*get_max_lnk_width)(struct slot *slot, enum pcie_link_width *val); - int (*get_cur_lnk_width)(struct slot *slot, enum pcie_link_width *val); - int (*query_power_fault)(struct slot *slot); - void (*green_led_on)(struct slot *slot); - void (*green_led_off)(struct slot *slot); - void (*green_led_blink)(struct slot *slot); - void (*release_ctlr)(struct controller *ctrl); - int (*check_lnk_status)(struct controller *ctrl); -}; - #ifdef CONFIG_ACPI #include <acpi/acpi.h> #include <acpi/acpi_bus.h> diff --git a/drivers/pci/hotplug/pciehp_acpi.c b/drivers/pci/hotplug/pciehp_acpi.c index 7163e6a6cfa..37c8d3d0323 100644 --- a/drivers/pci/hotplug/pciehp_acpi.c +++ b/drivers/pci/hotplug/pciehp_acpi.c @@ -33,6 +33,11 @@ #define PCIEHP_DETECT_AUTO (2) #define PCIEHP_DETECT_DEFAULT PCIEHP_DETECT_AUTO +struct dummy_slot { + u32 number; + struct list_head list; +}; + static int slot_detection_mode; static char *pciehp_detect_mode; module_param(pciehp_detect_mode, charp, 0444); @@ -77,7 +82,7 @@ static int __init dummy_probe(struct pcie_device *dev) int pos; u32 slot_cap; acpi_handle handle; - struct slot *slot, *tmp; + struct dummy_slot *slot, *tmp; struct pci_dev *pdev = dev->port; /* Note: pciehp_detect_mode != PCIEHP_DETECT_ACPI here */ if (pciehp_get_hp_hw_control_from_firmware(pdev)) @@ -89,11 +94,11 @@ static int __init dummy_probe(struct pcie_device *dev) if (!slot) return -ENOMEM; slot->number = slot_cap >> 19; - list_for_each_entry(tmp, &dummy_slots, slot_list) { + list_for_each_entry(tmp, &dummy_slots, list) { if (tmp->number == slot->number) dup_slot_id++; } - list_add_tail(&slot->slot_list, &dummy_slots); + list_add_tail(&slot->list, &dummy_slots); handle = DEVICE_ACPI_HANDLE(&pdev->dev); if (!acpi_slot_detected && acpi_pci_detect_ejectable(handle)) acpi_slot_detected = 1; @@ -109,11 +114,11 @@ static struct pcie_port_service_driver __initdata dummy_driver = { static int __init select_detection_mode(void) { - struct slot *slot, *tmp; + struct dummy_slot *slot, *tmp; pcie_port_service_register(&dummy_driver); pcie_port_service_unregister(&dummy_driver); - list_for_each_entry_safe(slot, tmp, &dummy_slots, slot_list) { - list_del(&slot->slot_list); + list_for_each_entry_safe(slot, tmp, &dummy_slots, list) { + list_del(&slot->list); kfree(slot); } if (acpi_slot_detected && dup_slot_id) diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c index 2317557fdee..bc234719b1d 100644 --- a/drivers/pci/hotplug/pciehp_core.c +++ b/drivers/pci/hotplug/pciehp_core.c @@ -99,65 +99,55 @@ static void release_slot(struct hotplug_slot *hotplug_slot) kfree(hotplug_slot); } -static int init_slots(struct controller *ctrl) +static int init_slot(struct controller *ctrl) { - struct slot *slot; - struct hotplug_slot *hotplug_slot; - struct hotplug_slot_info *info; + struct slot *slot = ctrl->slot; + struct hotplug_slot *hotplug = NULL; + struct hotplug_slot_info *info = NULL; char name[SLOT_NAME_SIZE]; int retval = -ENOMEM; - list_for_each_entry(slot, &ctrl->slot_list, slot_list) { - hotplug_slot = kzalloc(sizeof(*hotplug_slot), GFP_KERNEL); - if (!hotplug_slot) - goto error; - - info = kzalloc(sizeof(*info), GFP_KERNEL); - if (!info) - goto error_hpslot; - - /* register this slot with the hotplug pci core */ - hotplug_slot->info = info; - hotplug_slot->private = slot; - hotplug_slot->release = &release_slot; - hotplug_slot->ops = &pciehp_hotplug_slot_ops; - slot->hotplug_slot = hotplug_slot; - snprintf(name, SLOT_NAME_SIZE, "%u", slot->number); - - ctrl_dbg(ctrl, "Registering domain:bus:dev=%04x:%02x:%02x " - "hp_slot=%x sun=%x slot_device_offset=%x\n", - pci_domain_nr(ctrl->pci_dev->subordinate), - slot->bus, slot->device, slot->hp_slot, slot->number, - ctrl->slot_device_offset); - retval = pci_hp_register(hotplug_slot, - ctrl->pci_dev->subordinate, - slot->device, - name); - if (retval) { - ctrl_err(ctrl, "pci_hp_register failed with error %d\n", - retval); - goto error_info; - } - get_power_status(hotplug_slot, &info->power_status); - get_attention_status(hotplug_slot, &info->attention_status); - get_latch_status(hotplug_slot, &info->latch_status); - get_adapter_status(hotplug_slot, &info->adapter_status); + hotplug = kzalloc(sizeof(*hotplug), GFP_KERNEL); + if (!hotplug) + goto out; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) + goto out; + + /* register this slot with the hotplug pci core */ + hotplug->info = info; + hotplug->private = slot; + hotplug->release = &release_slot; + hotplug->ops = &pciehp_hotplug_slot_ops; + slot->hotplug_slot = hotplug; + snprintf(name, SLOT_NAME_SIZE, "%u", PSN(ctrl)); + + ctrl_dbg(ctrl, "Registering domain:bus:dev=%04x:%02x:00 sun=%x\n", + pci_domain_nr(ctrl->pcie->port->subordinate), + ctrl->pcie->port->subordinate->number, PSN(ctrl)); + retval = pci_hp_register(hotplug, + ctrl->pcie->port->subordinate, 0, name); + if (retval) { + ctrl_err(ctrl, + "pci_hp_register failed with error %d\n", retval); + goto out; + } + get_power_status(hotplug, &info->power_status); + get_attention_status(hotplug, &info->attention_status); + get_latch_status(hotplug, &info->latch_status); + get_adapter_status(hotplug, &info->adapter_status); +out: + if (retval) { + kfree(info); + kfree(hotplug); } - - return 0; -error_info: - kfree(info); -error_hpslot: - kfree(hotplug_slot); -error: return retval; } -static void cleanup_slots(struct controller *ctrl) +static void cleanup_slot(struct controller *ctrl) { - struct slot *slot; - list_for_each_entry(slot, &ctrl->slot_list, slot_list) - pci_hp_deregister(slot->hotplug_slot); + pci_hp_deregister(ctrl->slot->hotplug_slot); } /* @@ -173,7 +163,7 @@ static int set_attention_status(struct hotplug_slot *hotplug_slot, u8 status) hotplug_slot->info->attention_status = status; if (ATTN_LED(slot->ctrl)) - slot->hpc_ops->set_attention_status(slot, status); + pciehp_set_attention_status(slot, status); return 0; } @@ -208,7 +198,7 @@ static int get_power_status(struct hotplug_slot *hotplug_slot, u8 *value) ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n", __func__, slot_name(slot)); - retval = slot->hpc_ops->get_power_status(slot, value); + retval = pciehp_get_power_status(slot, value); if (retval < 0) *value = hotplug_slot->info->power_status; @@ -223,7 +213,7 @@ static int get_attention_status(struct hotplug_slot *hotplug_slot, u8 *value) ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n", __func__, slot_name(slot)); - retval = slot->hpc_ops->get_attention_status(slot, value); + retval = pciehp_get_attention_status(slot, value); if (retval < 0) *value = hotplug_slot->info->attention_status; @@ -238,7 +228,7 @@ static int get_latch_status(struct hotplug_slot *hotplug_slot, u8 *value) ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n", __func__, slot_name(slot)); - retval = slot->hpc_ops->get_latch_status(slot, value); + retval = pciehp_get_latch_status(slot, value); if (retval < 0) *value = hotplug_slot->info->latch_status; @@ -253,7 +243,7 @@ static int get_adapter_status(struct hotplug_slot *hotplug_slot, u8 *value) ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n", __func__, slot_name(slot)); - retval = slot->hpc_ops->get_adapter_status(slot, value); + retval = pciehp_get_adapter_status(slot, value); if (retval < 0) *value = hotplug_slot->info->adapter_status; @@ -269,7 +259,7 @@ static int get_max_bus_speed(struct hotplug_slot *hotplug_slot, ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n", __func__, slot_name(slot)); - retval = slot->hpc_ops->get_max_bus_speed(slot, value); + retval = pciehp_get_max_link_speed(slot, value); if (retval < 0) *value = PCI_SPEED_UNKNOWN; @@ -284,7 +274,7 @@ static int get_cur_bus_speed(struct hotplug_slot *hotplug_slot, enum pci_bus_spe ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n", __func__, slot_name(slot)); - retval = slot->hpc_ops->get_cur_bus_speed(slot, value); + retval = pciehp_get_cur_link_speed(slot, value); if (retval < 0) *value = PCI_SPEED_UNKNOWN; @@ -295,7 +285,7 @@ static int pciehp_probe(struct pcie_device *dev) { int rc; struct controller *ctrl; - struct slot *t_slot; + struct slot *slot; u8 value; struct pci_dev *pdev = dev->port; @@ -314,7 +304,7 @@ static int pciehp_probe(struct pcie_device *dev) set_service_data(dev, ctrl); /* Setup the slot information structures */ - rc = init_slots(ctrl); + rc = init_slot(ctrl); if (rc) { if (rc == -EBUSY) ctrl_warn(ctrl, "Slot already registered by another " @@ -332,15 +322,15 @@ static int pciehp_probe(struct pcie_device *dev) } /* Check if slot is occupied */ - t_slot = pciehp_find_slot(ctrl, ctrl->slot_device_offset); - t_slot->hpc_ops->get_adapter_status(t_slot, &value); + slot = ctrl->slot; + pciehp_get_adapter_status(slot, &value); if (value) { if (pciehp_force) - pciehp_enable_slot(t_slot); + pciehp_enable_slot(slot); } else { /* Power off slot if not occupied */ if (POWER_CTRL(ctrl)) { - rc = t_slot->hpc_ops->power_off_slot(t_slot); + rc = pciehp_power_off_slot(slot); if (rc) goto err_out_free_ctrl_slot; } @@ -349,19 +339,19 @@ static int pciehp_probe(struct pcie_device *dev) return 0; err_out_free_ctrl_slot: - cleanup_slots(ctrl); + cleanup_slot(ctrl); err_out_release_ctlr: - ctrl->hpc_ops->release_ctlr(ctrl); + pciehp_release_ctrl(ctrl); err_out_none: return -ENODEV; } -static void pciehp_remove (struct pcie_device *dev) +static void pciehp_remove(struct pcie_device *dev) { struct controller *ctrl = get_service_data(dev); - cleanup_slots(ctrl); - ctrl->hpc_ops->release_ctlr(ctrl); + cleanup_slot(ctrl); + pciehp_release_ctrl(ctrl); } #ifdef CONFIG_PM @@ -376,20 +366,20 @@ static int pciehp_resume (struct pcie_device *dev) dev_info(&dev->device, "%s ENTRY\n", __func__); if (pciehp_force) { struct controller *ctrl = get_service_data(dev); - struct slot *t_slot; + struct slot *slot; u8 status; /* reinitialize the chipset's event detection logic */ pcie_enable_notification(ctrl); - t_slot = pciehp_find_slot(ctrl, ctrl->slot_device_offset); + slot = ctrl->slot; /* Check if slot is occupied */ - t_slot->hpc_ops->get_adapter_status(t_slot, &status); + pciehp_get_adapter_status(slot, &status); if (status) - pciehp_enable_slot(t_slot); + pciehp_enable_slot(slot); else - pciehp_disable_slot(t_slot); + pciehp_disable_slot(slot); } return 0; } diff --git a/drivers/pci/hotplug/pciehp_ctrl.c b/drivers/pci/hotplug/pciehp_ctrl.c index b97cb4c3e0f..84487d126e4 100644 --- a/drivers/pci/hotplug/pciehp_ctrl.c +++ b/drivers/pci/hotplug/pciehp_ctrl.c @@ -82,7 +82,7 @@ u8 pciehp_handle_switch_change(struct slot *p_slot) /* Switch Change */ ctrl_dbg(ctrl, "Switch interrupt received\n"); - p_slot->hpc_ops->get_latch_status(p_slot, &getstatus); + pciehp_get_latch_status(p_slot, &getstatus); if (getstatus) { /* * Switch opened @@ -114,7 +114,7 @@ u8 pciehp_handle_presence_change(struct slot *p_slot) /* Switch is open, assume a presence change * Save the presence state */ - p_slot->hpc_ops->get_adapter_status(p_slot, &presence_save); + pciehp_get_adapter_status(p_slot, &presence_save); if (presence_save) { /* * Card Present @@ -143,7 +143,7 @@ u8 pciehp_handle_power_fault(struct slot *p_slot) /* power fault */ ctrl_dbg(ctrl, "Power fault interrupt received\n"); - if ( !(p_slot->hpc_ops->query_power_fault(p_slot))) { + if (!pciehp_query_power_fault(p_slot)) { /* * power fault Cleared */ @@ -172,7 +172,7 @@ static void set_slot_off(struct controller *ctrl, struct slot * pslot) { /* turn off slot, turn on Amber LED, turn off Green LED if supported*/ if (POWER_CTRL(ctrl)) { - if (pslot->hpc_ops->power_off_slot(pslot)) { + if (pciehp_power_off_slot(pslot)) { ctrl_err(ctrl, "Issue of Slot Power Off command failed\n"); return; @@ -186,10 +186,10 @@ static void set_slot_off(struct controller *ctrl, struct slot * pslot) } if (PWR_LED(ctrl)) - pslot->hpc_ops->green_led_off(pslot); + pciehp_green_led_off(pslot); if (ATTN_LED(ctrl)) { - if (pslot->hpc_ops->set_attention_status(pslot, 1)) { + if (pciehp_set_attention_status(pslot, 1)) { ctrl_err(ctrl, "Issue of Set Attention Led command failed\n"); return; @@ -208,24 +208,20 @@ static int board_added(struct slot *p_slot) { int retval = 0; struct controller *ctrl = p_slot->ctrl; - struct pci_bus *parent = ctrl->pci_dev->subordinate; - - ctrl_dbg(ctrl, "%s: slot device, slot offset, hp slot = %d, %d, %d\n", - __func__, p_slot->device, ctrl->slot_device_offset, - p_slot->hp_slot); + struct pci_bus *parent = ctrl->pcie->port->subordinate; if (POWER_CTRL(ctrl)) { /* Power on slot */ - retval = p_slot->hpc_ops->power_on_slot(p_slot); + retval = pciehp_power_on_slot(p_slot); if (retval) return retval; } if (PWR_LED(ctrl)) - p_slot->hpc_ops->green_led_blink(p_slot); + pciehp_green_led_blink(p_slot); /* Check link training status */ - retval = p_slot->hpc_ops->check_lnk_status(ctrl); + retval = pciehp_check_link_status(ctrl); if (retval) { ctrl_err(ctrl, "Failed to check link status\n"); set_slot_off(ctrl, p_slot); @@ -233,21 +229,21 @@ static int board_added(struct slot *p_slot) } /* Check for a power fault */ - if (p_slot->hpc_ops->query_power_fault(p_slot)) { + if (pciehp_query_power_fault(p_slot)) { ctrl_dbg(ctrl, "Power fault detected\n"); - retval = POWER_FAILURE; + retval = -EIO; goto err_exit; } retval = pciehp_configure_device(p_slot); if (retval) { - ctrl_err(ctrl, "Cannot add device at %04x:%02x:%02x\n", - pci_domain_nr(parent), p_slot->bus, p_slot->device); + ctrl_err(ctrl, "Cannot add device at %04x:%02x:00\n", + pci_domain_nr(parent), parent->number); goto err_exit; } if (PWR_LED(ctrl)) - p_slot->hpc_ops->green_led_on(p_slot); + pciehp_green_led_on(p_slot); return 0; @@ -269,11 +265,9 @@ static int remove_board(struct slot *p_slot) if (retval) return retval; - ctrl_dbg(ctrl, "%s: hp_slot = %d\n", __func__, p_slot->hp_slot); - if (POWER_CTRL(ctrl)) { /* power off slot */ - retval = p_slot->hpc_ops->power_off_slot(p_slot); + retval = pciehp_power_off_slot(p_slot); if (retval) { ctrl_err(ctrl, "Issue of Slot Disable command failed\n"); @@ -287,9 +281,9 @@ static int remove_board(struct slot *p_slot) msleep(1000); } + /* turn off Green LED */ if (PWR_LED(ctrl)) - /* turn off Green LED */ - p_slot->hpc_ops->green_led_off(p_slot); + pciehp_green_led_off(p_slot); return 0; } @@ -317,18 +311,17 @@ static void pciehp_power_thread(struct work_struct *work) case POWEROFF_STATE: mutex_unlock(&p_slot->lock); ctrl_dbg(p_slot->ctrl, - "Disabling domain:bus:device=%04x:%02x:%02x\n", - pci_domain_nr(p_slot->ctrl->pci_dev->subordinate), - p_slot->bus, p_slot->device); + "Disabling domain:bus:device=%04x:%02x:00\n", + pci_domain_nr(p_slot->ctrl->pcie->port->subordinate), + p_slot->ctrl->pcie->port->subordinate->number); pciehp_disable_slot(p_slot); mutex_lock(&p_slot->lock); p_slot->state = STATIC_STATE; break; case POWERON_STATE: mutex_unlock(&p_slot->lock); - if (pciehp_enable_slot(p_slot) && - PWR_LED(p_slot->ctrl)) - p_slot->hpc_ops->green_led_off(p_slot); + if (pciehp_enable_slot(p_slot) && PWR_LED(p_slot->ctrl)) + pciehp_green_led_off(p_slot); mutex_lock(&p_slot->lock); p_slot->state = STATIC_STATE; break; @@ -379,10 +372,10 @@ static int update_slot_info(struct slot *slot) if (!info) return -ENOMEM; - slot->hpc_ops->get_power_status(slot, &(info->power_status)); - slot->hpc_ops->get_attention_status(slot, &(info->attention_status)); - slot->hpc_ops->get_latch_status(slot, &(info->latch_status)); - slot->hpc_ops->get_adapter_status(slot, &(info->adapter_status)); + pciehp_get_power_status(slot, &info->power_status); + pciehp_get_attention_status(slot, &info->attention_status); + pciehp_get_latch_status(slot, &info->latch_status); + pciehp_get_adapter_status(slot, &info->adapter_status); result = pci_hp_change_slot_info(slot->hotplug_slot, info); kfree (info); @@ -399,7 +392,7 @@ static void handle_button_press_event(struct slot *p_slot) switch (p_slot->state) { case STATIC_STATE: - p_slot->hpc_ops->get_power_status(p_slot, &getstatus); + pciehp_get_power_status(p_slot, &getstatus); if (getstatus) { p_slot->state = BLINKINGOFF_STATE; ctrl_info(ctrl, @@ -413,9 +406,9 @@ static void handle_button_press_event(struct slot *p_slot) } /* blink green LED and turn off amber */ if (PWR_LED(ctrl)) - p_slot->hpc_ops->green_led_blink(p_slot); + pciehp_green_led_blink(p_slot); if (ATTN_LED(ctrl)) - p_slot->hpc_ops->set_attention_status(p_slot, 0); + pciehp_set_attention_status(p_slot, 0); schedule_delayed_work(&p_slot->work, 5*HZ); break; @@ -430,13 +423,13 @@ static void handle_button_press_event(struct slot *p_slot) cancel_delayed_work(&p_slot->work); if (p_slot->state == BLINKINGOFF_STATE) { if (PWR_LED(ctrl)) - p_slot->hpc_ops->green_led_on(p_slot); + pciehp_green_led_on(p_slot); } else { if (PWR_LED(ctrl)) - p_slot->hpc_ops->green_led_off(p_slot); + pciehp_green_led_off(p_slot); } if (ATTN_LED(ctrl)) - p_slot->hpc_ops->set_attention_status(p_slot, 0); + pciehp_set_attention_status(p_slot, 0); ctrl_info(ctrl, "PCI slot #%s - action canceled " "due to button press\n", slot_name(p_slot)); p_slot->state = STATIC_STATE; @@ -474,7 +467,7 @@ static void handle_surprise_event(struct slot *p_slot) info->p_slot = p_slot; INIT_WORK(&info->work, pciehp_power_thread); - p_slot->hpc_ops->get_adapter_status(p_slot, &getstatus); + pciehp_get_adapter_status(p_slot, &getstatus); if (!getstatus) p_slot->state = POWEROFF_STATE; else @@ -498,9 +491,9 @@ static void interrupt_event_handler(struct work_struct *work) if (!POWER_CTRL(ctrl)) break; if (ATTN_LED(ctrl)) - p_slot->hpc_ops->set_attention_status(p_slot, 1); + pciehp_set_attention_status(p_slot, 1); if (PWR_LED(ctrl)) - p_slot->hpc_ops->green_led_off(p_slot); + pciehp_green_led_off(p_slot); break; case INT_PRESENCE_ON: case INT_PRESENCE_OFF: @@ -525,45 +518,38 @@ int pciehp_enable_slot(struct slot *p_slot) int rc; struct controller *ctrl = p_slot->ctrl; - /* Check to see if (latch closed, card present, power off) */ - mutex_lock(&p_slot->ctrl->crit_sect); - - rc = p_slot->hpc_ops->get_adapter_status(p_slot, &getstatus); + rc = pciehp_get_adapter_status(p_slot, &getstatus); if (rc || !getstatus) { ctrl_info(ctrl, "No adapter on slot(%s)\n", slot_name(p_slot)); - mutex_unlock(&p_slot->ctrl->crit_sect); return -ENODEV; } if (MRL_SENS(p_slot->ctrl)) { - rc = p_slot->hpc_ops->get_latch_status(p_slot, &getstatus); + rc = pciehp_get_latch_status(p_slot, &getstatus); if (rc || getstatus) { ctrl_info(ctrl, "Latch open on slot(%s)\n", slot_name(p_slot)); - mutex_unlock(&p_slot->ctrl->crit_sect); return -ENODEV; } } if (POWER_CTRL(p_slot->ctrl)) { - rc = p_slot->hpc_ops->get_power_status(p_slot, &getstatus); + rc = pciehp_get_power_status(p_slot, &getstatus); if (rc || getstatus) { ctrl_info(ctrl, "Already enabled on slot(%s)\n", slot_name(p_slot)); - mutex_unlock(&p_slot->ctrl->crit_sect); return -EINVAL; } } - p_slot->hpc_ops->get_latch_status(p_slot, &getstatus); + pciehp_get_latch_status(p_slot, &getstatus); rc = board_added(p_slot); if (rc) { - p_slot->hpc_ops->get_latch_status(p_slot, &getstatus); + pciehp_get_latch_status(p_slot, &getstatus); } update_slot_info(p_slot); - mutex_unlock(&p_slot->ctrl->crit_sect); return rc; } @@ -577,35 +563,29 @@ int pciehp_disable_slot(struct slot *p_slot) if (!p_slot->ctrl) return 1; - /* Check to see if (latch closed, card present, power on) */ - mutex_lock(&p_slot->ctrl->crit_sect); - if (!HP_SUPR_RM(p_slot->ctrl)) { - ret = p_slot->hpc_ops->get_adapter_status(p_slot, &getstatus); + ret = pciehp_get_adapter_status(p_slot, &getstatus); if (ret || !getstatus) { ctrl_info(ctrl, "No adapter on slot(%s)\n", slot_name(p_slot)); - mutex_unlock(&p_slot->ctrl->crit_sect); return -ENODEV; } } if (MRL_SENS(p_slot->ctrl)) { - ret = p_slot->hpc_ops->get_latch_status(p_slot, &getstatus); + ret = pciehp_get_latch_status(p_slot, &getstatus); if (ret || getstatus) { ctrl_info(ctrl, "Latch open on slot(%s)\n", slot_name(p_slot)); - mutex_unlock(&p_slot->ctrl->crit_sect); return -ENODEV; } } if (POWER_CTRL(p_slot->ctrl)) { - ret = p_slot->hpc_ops->get_power_status(p_slot, &getstatus); + ret = pciehp_get_power_status(p_slot, &getstatus); if (ret || !getstatus) { ctrl_info(ctrl, "Already disabled on slot(%s)\n", slot_name(p_slot)); - mutex_unlock(&p_slot->ctrl->crit_sect); return -EINVAL; } } @@ -613,7 +593,6 @@ int pciehp_disable_slot(struct slot *p_slot) ret = remove_board(p_slot); update_slot_info(p_slot); - mutex_unlock(&p_slot->ctrl->crit_sect); return ret; } diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 271f917b6f2..9ef4605c1ef 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -44,25 +44,25 @@ static atomic_t pciehp_num_controllers = ATOMIC_INIT(0); static inline int pciehp_readw(struct controller *ctrl, int reg, u16 *value) { - struct pci_dev *dev = ctrl->pci_dev; + struct pci_dev *dev = ctrl->pcie->port; return pci_read_config_word(dev, ctrl->cap_base + reg, value); } static inline int pciehp_readl(struct controller *ctrl, int reg, u32 *value) { - struct pci_dev *dev = ctrl->pci_dev; + struct pci_dev *dev = ctrl->pcie->port; return pci_read_config_dword(dev, ctrl->cap_base + reg, value); } static inline int pciehp_writew(struct controller *ctrl, int reg, u16 value) { - struct pci_dev *dev = ctrl->pci_dev; + struct pci_dev *dev = ctrl->pcie->port; return pci_write_config_word(dev, ctrl->cap_base + reg, value); } static inline int pciehp_writel(struct controller *ctrl, int reg, u32 value) { - struct pci_dev *dev = ctrl->pci_dev; + struct pci_dev *dev = ctrl->pcie->port; return pci_write_config_dword(dev, ctrl->cap_base + reg, value); } @@ -266,7 +266,7 @@ static void pcie_wait_link_active(struct controller *ctrl) ctrl_dbg(ctrl, "Data Link Layer Link Active not set in 1000 msec\n"); } -static int hpc_check_lnk_status(struct controller *ctrl) +int pciehp_check_link_status(struct controller *ctrl) { u16 lnk_status; int retval = 0; @@ -305,7 +305,7 @@ static int hpc_check_lnk_status(struct controller *ctrl) return retval; } -static int hpc_get_attention_status(struct slot *slot, u8 *status) +int pciehp_get_attention_status(struct slot *slot, u8 *status) { struct controller *ctrl = slot->ctrl; u16 slot_ctrl; @@ -344,7 +344,7 @@ static int hpc_get_attention_status(struct slot *slot, u8 *status) return 0; } -static int hpc_get_power_status(struct slot *slot, u8 *status) +int pciehp_get_power_status(struct slot *slot, u8 *status) { struct controller *ctrl = slot->ctrl; u16 slot_ctrl; @@ -376,7 +376,7 @@ static int hpc_get_power_status(struct slot *slot, u8 *status) return retval; } -static int hpc_get_latch_status(struct slot *slot, u8 *status) +int pciehp_get_latch_status(struct slot *slot, u8 *status) { struct controller *ctrl = slot->ctrl; u16 slot_status; @@ -392,7 +392,7 @@ static int hpc_get_latch_status(struct slot *slot, u8 *status) return 0; } -static int hpc_get_adapter_status(struct slot *slot, u8 *status) +int pciehp_get_adapter_status(struct slot *slot, u8 *status) { struct controller *ctrl = slot->ctrl; u16 slot_status; @@ -408,7 +408,7 @@ static int hpc_get_adapter_status(struct slot *slot, u8 *status) return 0; } -static int hpc_query_power_fault(struct slot *slot) +int pciehp_query_power_fault(struct slot *slot) { struct controller *ctrl = slot->ctrl; u16 slot_status; @@ -422,7 +422,7 @@ static int hpc_query_power_fault(struct slot *slot) return !!(slot_status & PCI_EXP_SLTSTA_PFD); } -static int hpc_set_attention_status(struct slot *slot, u8 value) +int pciehp_set_attention_status(struct slot *slot, u8 value) { struct controller *ctrl = slot->ctrl; u16 slot_cmd; @@ -450,7 +450,7 @@ static int hpc_set_attention_status(struct slot *slot, u8 value) return rc; } -static void hpc_set_green_led_on(struct slot *slot) +void pciehp_green_led_on(struct slot *slot) { struct controller *ctrl = slot->ctrl; u16 slot_cmd; @@ -463,7 +463,7 @@ static void hpc_set_green_led_on(struct slot *slot) __func__, ctrl->cap_base + PCI_EXP_SLTCTL, slot_cmd); } -static void hpc_set_green_led_off(struct slot *slot) +void pciehp_green_led_off(struct slot *slot) { struct controller *ctrl = slot->ctrl; u16 slot_cmd; @@ -476,7 +476,7 @@ static void hpc_set_green_led_off(struct slot *slot) __func__, ctrl->cap_base + PCI_EXP_SLTCTL, slot_cmd); } -static void hpc_set_green_led_blink(struct slot *slot) +void pciehp_green_led_blink(struct slot *slot) { struct controller *ctrl = slot->ctrl; u16 slot_cmd; @@ -489,7 +489,7 @@ static void hpc_set_green_led_blink(struct slot *slot) __func__, ctrl->cap_base + PCI_EXP_SLTCTL, slot_cmd); } -static int hpc_power_on_slot(struct slot * slot) +int pciehp_power_on_slot(struct slot * slot) { struct controller *ctrl = slot->ctrl; u16 slot_cmd; @@ -497,8 +497,6 @@ static int hpc_power_on_slot(struct slot * slot) u16 slot_status; int retval = 0; - ctrl_dbg(ctrl, "%s: slot->hp_slot %x\n", __func__, slot->hp_slot); - /* Clear sticky power-fault bit from previous power failures */ retval = pciehp_readw(ctrl, PCI_EXP_SLTSTA, &slot_status); if (retval) { @@ -539,7 +537,7 @@ static int hpc_power_on_slot(struct slot * slot) static inline int pcie_mask_bad_dllp(struct controller *ctrl) { - struct pci_dev *dev = ctrl->pci_dev; + struct pci_dev *dev = ctrl->pcie->port; int pos; u32 reg; @@ -556,7 +554,7 @@ static inline int pcie_mask_bad_dllp(struct controller *ctrl) static inline void pcie_unmask_bad_dllp(struct controller *ctrl) { - struct pci_dev *dev = ctrl->pci_dev; + struct pci_dev *dev = ctrl->pcie->port; u32 reg; int pos; @@ -570,7 +568,7 @@ static inline void pcie_unmask_bad_dllp(struct controller *ctrl) pci_write_config_dword(dev, pos + PCI_ERR_COR_MASK, reg); } -static int hpc_power_off_slot(struct slot * slot) +int pciehp_power_off_slot(struct slot * slot) { struct controller *ctrl = slot->ctrl; u16 slot_cmd; @@ -578,8 +576,6 @@ static int hpc_power_off_slot(struct slot * slot) int retval = 0; int changed; - ctrl_dbg(ctrl, "%s: slot->hp_slot %x\n", __func__, slot->hp_slot); - /* * Set Bad DLLP Mask bit in Correctable Error Mask * Register. This is the workaround against Bad DLLP error @@ -614,8 +610,8 @@ static int hpc_power_off_slot(struct slot * slot) static irqreturn_t pcie_isr(int irq, void *dev_id) { struct controller *ctrl = (struct controller *)dev_id; + struct slot *slot = ctrl->slot; u16 detected, intr_loc; - struct slot *p_slot; /* * In order to guarantee that all interrupt events are @@ -656,29 +652,27 @@ static irqreturn_t pcie_isr(int irq, void *dev_id) if (!(intr_loc & ~PCI_EXP_SLTSTA_CC)) return IRQ_HANDLED; - p_slot = pciehp_find_slot(ctrl, ctrl->slot_device_offset); - /* Check MRL Sensor Changed */ if (intr_loc & PCI_EXP_SLTSTA_MRLSC) - pciehp_handle_switch_change(p_slot); + pciehp_handle_switch_change(slot); /* Check Attention Button Pressed */ if (intr_loc & PCI_EXP_SLTSTA_ABP) - pciehp_handle_attention_button(p_slot); + pciehp_handle_attention_button(slot); /* Check Presence Detect Changed */ if (intr_loc & PCI_EXP_SLTSTA_PDC) - pciehp_handle_presence_change(p_slot); + pciehp_handle_presence_change(slot); /* Check Power Fault Detected */ if ((intr_loc & PCI_EXP_SLTSTA_PFD) && !ctrl->power_fault_detected) { ctrl->power_fault_detected = 1; - pciehp_handle_power_fault(p_slot); + pciehp_handle_power_fault(slot); } return IRQ_HANDLED; } -static int hpc_get_max_lnk_speed(struct slot *slot, enum pci_bus_speed *value) +int pciehp_get_max_link_speed(struct slot *slot, enum pci_bus_speed *value) { struct controller *ctrl = slot->ctrl; enum pcie_link_speed lnk_speed; @@ -709,7 +703,7 @@ static int hpc_get_max_lnk_speed(struct slot *slot, enum pci_bus_speed *value) return retval; } -static int hpc_get_max_lnk_width(struct slot *slot, +int pciehp_get_max_lnk_width(struct slot *slot, enum pcie_link_width *value) { struct controller *ctrl = slot->ctrl; @@ -759,7 +753,7 @@ static int hpc_get_max_lnk_width(struct slot *slot, return retval; } -static int hpc_get_cur_lnk_speed(struct slot *slot, enum pci_bus_speed *value) +int pciehp_get_cur_link_speed(struct slot *slot, enum pci_bus_speed *value) { struct controller *ctrl = slot->ctrl; enum pcie_link_speed lnk_speed = PCI_SPEED_UNKNOWN; @@ -791,7 +785,7 @@ static int hpc_get_cur_lnk_speed(struct slot *slot, enum pci_bus_speed *value) return retval; } -static int hpc_get_cur_lnk_width(struct slot *slot, +int pciehp_get_cur_lnk_width(struct slot *slot, enum pcie_link_width *value) { struct controller *ctrl = slot->ctrl; @@ -842,30 +836,6 @@ static int hpc_get_cur_lnk_width(struct slot *slot, return retval; } -static void pcie_release_ctrl(struct controller *ctrl); -static struct hpc_ops pciehp_hpc_ops = { - .power_on_slot = hpc_power_on_slot, - .power_off_slot = hpc_power_off_slot, - .set_attention_status = hpc_set_attention_status, - .get_power_status = hpc_get_power_status, - .get_attention_status = hpc_get_attention_status, - .get_latch_status = hpc_get_latch_status, - .get_adapter_status = hpc_get_adapter_status, - - .get_max_bus_speed = hpc_get_max_lnk_speed, - .get_cur_bus_speed = hpc_get_cur_lnk_speed, - .get_max_lnk_width = hpc_get_max_lnk_width, - .get_cur_lnk_width = hpc_get_cur_lnk_width, - - .query_power_fault = hpc_query_power_fault, - .green_led_on = hpc_set_green_led_on, - .green_led_off = hpc_set_green_led_off, - .green_led_blink = hpc_set_green_led_blink, - - .release_ctlr = pcie_release_ctrl, - .check_lnk_status = hpc_check_lnk_status, -}; - int pcie_enable_notification(struct controller *ctrl) { u16 cmd, mask; @@ -930,23 +900,16 @@ static int pcie_init_slot(struct controller *ctrl) if (!slot) return -ENOMEM; - slot->hp_slot = 0; slot->ctrl = ctrl; - slot->bus = ctrl->pci_dev->subordinate->number; - slot->device = ctrl->slot_device_offset + slot->hp_slot; - slot->hpc_ops = ctrl->hpc_ops; - slot->number = ctrl->first_slot; mutex_init(&slot->lock); INIT_DELAYED_WORK(&slot->work, pciehp_queue_pushbutton_work); - list_add(&slot->slot_list, &ctrl->slot_list); + ctrl->slot = slot; return 0; } static void pcie_cleanup_slot(struct controller *ctrl) { - struct slot *slot; - slot = list_first_entry(&ctrl->slot_list, struct slot, slot_list); - list_del(&slot->slot_list); + struct slot *slot = ctrl->slot; cancel_delayed_work(&slot->work); flush_scheduled_work(); flush_workqueue(pciehp_wq); @@ -957,7 +920,7 @@ static inline void dbg_ctrl(struct controller *ctrl) { int i; u16 reg16; - struct pci_dev *pdev = ctrl->pci_dev; + struct pci_dev *pdev = ctrl->pcie->port; if (!pciehp_debug) return; @@ -980,7 +943,7 @@ static inline void dbg_ctrl(struct controller *ctrl) (unsigned long long)pci_resource_start(pdev, i)); } ctrl_info(ctrl, "Slot Capabilities : 0x%08x\n", ctrl->slot_cap); - ctrl_info(ctrl, " Physical Slot Number : %d\n", ctrl->first_slot); + ctrl_info(ctrl, " Physical Slot Number : %d\n", PSN(ctrl)); ctrl_info(ctrl, " Attention Button : %3s\n", ATTN_BUTTN(ctrl) ? "yes" : "no"); ctrl_info(ctrl, " Power Controller : %3s\n", @@ -1014,10 +977,7 @@ struct controller *pcie_init(struct pcie_device *dev) dev_err(&dev->device, "%s: Out of memory\n", __func__); goto abort; } - INIT_LIST_HEAD(&ctrl->slot_list); - ctrl->pcie = dev; - ctrl->pci_dev = pdev; ctrl->cap_base = pci_find_capability(pdev, PCI_CAP_ID_EXP); if (!ctrl->cap_base) { ctrl_err(ctrl, "Cannot find PCI Express capability\n"); @@ -1029,11 +989,6 @@ struct controller *pcie_init(struct pcie_device *dev) } ctrl->slot_cap = slot_cap; - ctrl->first_slot = slot_cap >> 19; - ctrl->slot_device_offset = 0; - ctrl->num_slots = 1; - ctrl->hpc_ops = &pciehp_hpc_ops; - mutex_init(&ctrl->crit_sect); mutex_init(&ctrl->ctrl_lock); init_waitqueue_head(&ctrl->queue); dbg_ctrl(ctrl); @@ -1089,7 +1044,7 @@ abort: return NULL; } -void pcie_release_ctrl(struct controller *ctrl) +void pciehp_release_ctrl(struct controller *ctrl) { pcie_shutdown_notification(ctrl); pcie_cleanup_slot(ctrl); diff --git a/drivers/pci/hotplug/pciehp_pci.c b/drivers/pci/hotplug/pciehp_pci.c index 02e24d63b3e..21733108add 100644 --- a/drivers/pci/hotplug/pciehp_pci.c +++ b/drivers/pci/hotplug/pciehp_pci.c @@ -63,27 +63,27 @@ static int __ref pciehp_add_bridge(struct pci_dev *dev) int pciehp_configure_device(struct slot *p_slot) { struct pci_dev *dev; - struct pci_bus *parent = p_slot->ctrl->pci_dev->subordinate; + struct pci_bus *parent = p_slot->ctrl->pcie->port->subordinate; int num, fn; struct controller *ctrl = p_slot->ctrl; - dev = pci_get_slot(parent, PCI_DEVFN(p_slot->device, 0)); + dev = pci_get_slot(parent, PCI_DEVFN(0, 0)); if (dev) { ctrl_err(ctrl, "Device %s already exists " - "at %04x:%02x:%02x, cannot hot-add\n", pci_name(dev), - pci_domain_nr(parent), p_slot->bus, p_slot->device); + "at %04x:%02x:00, cannot hot-add\n", pci_name(dev), + pci_domain_nr(parent), parent->number); pci_dev_put(dev); return -EINVAL; } - num = pci_scan_slot(parent, PCI_DEVFN(p_slot->device, 0)); + num = pci_scan_slot(parent, PCI_DEVFN(0, 0)); if (num == 0) { ctrl_err(ctrl, "No new device found\n"); return -ENODEV; } for (fn = 0; fn < 8; fn++) { - dev = pci_get_slot(parent, PCI_DEVFN(p_slot->device, fn)); + dev = pci_get_slot(parent, PCI_DEVFN(0, fn)); if (!dev) continue; if ((dev->class >> 16) == PCI_BASE_CLASS_DISPLAY) { @@ -111,19 +111,18 @@ int pciehp_unconfigure_device(struct slot *p_slot) int j; u8 bctl = 0; u8 presence = 0; - struct pci_bus *parent = p_slot->ctrl->pci_dev->subordinate; + struct pci_bus *parent = p_slot->ctrl->pcie->port->subordinate; u16 command; struct controller *ctrl = p_slot->ctrl; - ctrl_dbg(ctrl, "%s: domain:bus:dev = %04x:%02x:%02x\n", - __func__, pci_domain_nr(parent), p_slot->bus, p_slot->device); - ret = p_slot->hpc_ops->get_adapter_status(p_slot, &presence); + ctrl_dbg(ctrl, "%s: domain:bus:dev = %04x:%02x:00\n", + __func__, pci_domain_nr(parent), parent->number); + ret = pciehp_get_adapter_status(p_slot, &presence); if (ret) presence = 0; for (j = 0; j < 8; j++) { - struct pci_dev* temp = pci_get_slot(parent, - (p_slot->device << 3) | j); + struct pci_dev* temp = pci_get_slot(parent, PCI_DEVFN(0, j)); if (!temp) continue; if ((temp->class >> 16) == PCI_BASE_CLASS_DISPLAY) { diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c index 10c0e62bd5a..2ce8f9ccc66 100644 --- a/drivers/pci/pcie/aer/aerdrv.c +++ b/drivers/pci/pcie/aer/aerdrv.c @@ -318,6 +318,8 @@ static int __init aer_service_init(void) { if (pcie_aer_disable) return -ENXIO; + if (!pci_msi_enabled()) + return -ENXIO; return pcie_port_service_register(&aerdriver); } diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index f289ca9bf18..745402e8e49 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -303,9 +303,6 @@ static void pcie_get_aspm_reg(struct pci_dev *pdev, pos = pci_find_capability(pdev, PCI_CAP_ID_EXP); pci_read_config_dword(pdev, pos + PCI_EXP_LNKCAP, ®32); info->support = (reg32 & PCI_EXP_LNKCAP_ASPMS) >> 10; - /* 00b and 10b are defined as "Reserved". */ - if (info->support == PCIE_LINK_STATE_L1) - info->support = 0; info->latency_encoding_l0s = (reg32 & PCI_EXP_LNKCAP_L0SEL) >> 12; info->latency_encoding_l1 = (reg32 & PCI_EXP_LNKCAP_L1EL) >> 15; pci_read_config_word(pdev, pos + PCI_EXP_LNKCTL, ®16); diff --git a/drivers/pcmcia/Makefile b/drivers/pcmcia/Makefile index 047394d98ac..3247828aa20 100644 --- a/drivers/pcmcia/Makefile +++ b/drivers/pcmcia/Makefile @@ -71,6 +71,7 @@ pxa2xx-obj-$(CONFIG_MACH_ARMCORE) += pxa2xx_cm_x2xx_cs.o pxa2xx-obj-$(CONFIG_ARCH_VIPER) += pxa2xx_viper.o pxa2xx-obj-$(CONFIG_TRIZEPS_PCMCIA) += pxa2xx_trizeps4.o pxa2xx-obj-$(CONFIG_MACH_PALMTX) += pxa2xx_palmtx.o +pxa2xx-obj-$(CONFIG_MACH_PALMTC) += pxa2xx_palmtc.o pxa2xx-obj-$(CONFIG_MACH_PALMLD) += pxa2xx_palmld.o pxa2xx-obj-$(CONFIG_MACH_E740) += pxa2xx_e740.o pxa2xx-obj-$(CONFIG_MACH_STARGATE2) += pxa2xx_stargate2.o diff --git a/drivers/pcmcia/pxa2xx_base.c b/drivers/pcmcia/pxa2xx_base.c index c49a7269f6d..87e22ef8eb0 100644 --- a/drivers/pcmcia/pxa2xx_base.c +++ b/drivers/pcmcia/pxa2xx_base.c @@ -300,25 +300,29 @@ static int pxa2xx_drv_pcmcia_remove(struct platform_device *dev) return soc_common_drv_pcmcia_remove(&dev->dev); } -static int pxa2xx_drv_pcmcia_suspend(struct platform_device *dev, pm_message_t state) +static int pxa2xx_drv_pcmcia_suspend(struct device *dev) { - return pcmcia_socket_dev_suspend(&dev->dev, state); + return pcmcia_socket_dev_suspend(dev, PMSG_SUSPEND); } -static int pxa2xx_drv_pcmcia_resume(struct platform_device *dev) +static int pxa2xx_drv_pcmcia_resume(struct device *dev) { - pxa2xx_configure_sockets(&dev->dev); - return pcmcia_socket_dev_resume(&dev->dev); + pxa2xx_configure_sockets(dev); + return pcmcia_socket_dev_resume(dev); } +static struct dev_pm_ops pxa2xx_drv_pcmcia_pm_ops = { + .suspend = pxa2xx_drv_pcmcia_suspend, + .resume = pxa2xx_drv_pcmcia_resume, +}; + static struct platform_driver pxa2xx_pcmcia_driver = { .probe = pxa2xx_drv_pcmcia_probe, .remove = pxa2xx_drv_pcmcia_remove, - .suspend = pxa2xx_drv_pcmcia_suspend, - .resume = pxa2xx_drv_pcmcia_resume, .driver = { .name = "pxa2xx-pcmcia", .owner = THIS_MODULE, + .pm = &pxa2xx_drv_pcmcia_pm_ops, }, }; diff --git a/drivers/pcmcia/pxa2xx_palmtc.c b/drivers/pcmcia/pxa2xx_palmtc.c new file mode 100644 index 00000000000..3a8993ed562 --- /dev/null +++ b/drivers/pcmcia/pxa2xx_palmtc.c @@ -0,0 +1,230 @@ +/* + * linux/drivers/pcmcia/pxa2xx_palmtc.c + * + * Driver for Palm Tungsten|C PCMCIA + * + * Copyright (C) 2008 Alex Osborne <ato@meshy.org> + * Copyright (C) 2009 Marek Vasut <marek.vasut@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/gpio.h> +#include <linux/delay.h> + +#include <asm/mach-types.h> +#include <mach/palmtc.h> +#include "soc_common.h" + +static int palmtc_pcmcia_hw_init(struct soc_pcmcia_socket *skt) +{ + int ret; + + ret = gpio_request(GPIO_NR_PALMTC_PCMCIA_POWER1, "PCMCIA PWR1"); + if (ret) + goto err1; + ret = gpio_direction_output(GPIO_NR_PALMTC_PCMCIA_POWER1, 0); + if (ret) + goto err2; + + ret = gpio_request(GPIO_NR_PALMTC_PCMCIA_POWER2, "PCMCIA PWR2"); + if (ret) + goto err2; + ret = gpio_direction_output(GPIO_NR_PALMTC_PCMCIA_POWER2, 0); + if (ret) + goto err3; + + ret = gpio_request(GPIO_NR_PALMTC_PCMCIA_POWER3, "PCMCIA PWR3"); + if (ret) + goto err3; + ret = gpio_direction_output(GPIO_NR_PALMTC_PCMCIA_POWER3, 0); + if (ret) + goto err4; + + ret = gpio_request(GPIO_NR_PALMTC_PCMCIA_RESET, "PCMCIA RST"); + if (ret) + goto err4; + ret = gpio_direction_output(GPIO_NR_PALMTC_PCMCIA_RESET, 1); + if (ret) + goto err5; + + ret = gpio_request(GPIO_NR_PALMTC_PCMCIA_READY, "PCMCIA RDY"); + if (ret) + goto err5; + ret = gpio_direction_input(GPIO_NR_PALMTC_PCMCIA_READY); + if (ret) + goto err6; + + ret = gpio_request(GPIO_NR_PALMTC_PCMCIA_PWRREADY, "PCMCIA PWRRDY"); + if (ret) + goto err6; + ret = gpio_direction_input(GPIO_NR_PALMTC_PCMCIA_PWRREADY); + if (ret) + goto err7; + + skt->irq = IRQ_GPIO(GPIO_NR_PALMTC_PCMCIA_READY); + return 0; + +err7: + gpio_free(GPIO_NR_PALMTC_PCMCIA_PWRREADY); +err6: + gpio_free(GPIO_NR_PALMTC_PCMCIA_READY); +err5: + gpio_free(GPIO_NR_PALMTC_PCMCIA_RESET); +err4: + gpio_free(GPIO_NR_PALMTC_PCMCIA_POWER3); +err3: + gpio_free(GPIO_NR_PALMTC_PCMCIA_POWER2); +err2: + gpio_free(GPIO_NR_PALMTC_PCMCIA_POWER1); +err1: + return ret; +} + +static void palmtc_pcmcia_hw_shutdown(struct soc_pcmcia_socket *skt) +{ + gpio_free(GPIO_NR_PALMTC_PCMCIA_PWRREADY); + gpio_free(GPIO_NR_PALMTC_PCMCIA_READY); + gpio_free(GPIO_NR_PALMTC_PCMCIA_RESET); + gpio_free(GPIO_NR_PALMTC_PCMCIA_POWER3); + gpio_free(GPIO_NR_PALMTC_PCMCIA_POWER2); + gpio_free(GPIO_NR_PALMTC_PCMCIA_POWER1); +} + +static void palmtc_pcmcia_socket_state(struct soc_pcmcia_socket *skt, + struct pcmcia_state *state) +{ + state->detect = 1; /* always inserted */ + state->ready = !!gpio_get_value(GPIO_NR_PALMTC_PCMCIA_READY); + state->bvd1 = 1; + state->bvd2 = 1; + state->wrprot = 0; + state->vs_3v = 1; + state->vs_Xv = 0; +} + +static int palmtc_wifi_powerdown(void) +{ + gpio_set_value(GPIO_NR_PALMTC_PCMCIA_RESET, 1); + gpio_set_value(GPIO_NR_PALMTC_PCMCIA_POWER2, 0); + mdelay(40); + gpio_set_value(GPIO_NR_PALMTC_PCMCIA_POWER1, 0); + return 0; +} + +static int palmtc_wifi_powerup(void) +{ + int timeout = 50; + + gpio_set_value(GPIO_NR_PALMTC_PCMCIA_POWER3, 1); + mdelay(50); + + /* Power up the card, 1.8V first, after a while 3.3V */ + gpio_set_value(GPIO_NR_PALMTC_PCMCIA_POWER1, 1); + mdelay(100); + gpio_set_value(GPIO_NR_PALMTC_PCMCIA_POWER2, 1); + + /* Wait till the card is ready */ + while (!gpio_get_value(GPIO_NR_PALMTC_PCMCIA_PWRREADY) && + timeout) { + mdelay(1); + timeout--; + } + + /* Power down the WiFi in case of error */ + if (!timeout) { + palmtc_wifi_powerdown(); + return 1; + } + + /* Reset the card */ + gpio_set_value(GPIO_NR_PALMTC_PCMCIA_RESET, 1); + mdelay(20); + gpio_set_value(GPIO_NR_PALMTC_PCMCIA_RESET, 0); + mdelay(25); + + gpio_set_value(GPIO_NR_PALMTC_PCMCIA_POWER3, 0); + + return 0; +} + +static int palmtc_pcmcia_configure_socket(struct soc_pcmcia_socket *skt, + const socket_state_t *state) +{ + int ret = 1; + + if (state->Vcc == 0) + ret = palmtc_wifi_powerdown(); + else if (state->Vcc == 33) + ret = palmtc_wifi_powerup(); + + return ret; +} + +static void palmtc_pcmcia_socket_init(struct soc_pcmcia_socket *skt) +{ +} + +static void palmtc_pcmcia_socket_suspend(struct soc_pcmcia_socket *skt) +{ +} + +static struct pcmcia_low_level palmtc_pcmcia_ops = { + .owner = THIS_MODULE, + + .first = 0, + .nr = 1, + + .hw_init = palmtc_pcmcia_hw_init, + .hw_shutdown = palmtc_pcmcia_hw_shutdown, + + .socket_state = palmtc_pcmcia_socket_state, + .configure_socket = palmtc_pcmcia_configure_socket, + + .socket_init = palmtc_pcmcia_socket_init, + .socket_suspend = palmtc_pcmcia_socket_suspend, +}; + +static struct platform_device *palmtc_pcmcia_device; + +static int __init palmtc_pcmcia_init(void) +{ + int ret; + + if (!machine_is_palmtc()) + return -ENODEV; + + palmtc_pcmcia_device = platform_device_alloc("pxa2xx-pcmcia", -1); + if (!palmtc_pcmcia_device) + return -ENOMEM; + + ret = platform_device_add_data(palmtc_pcmcia_device, &palmtc_pcmcia_ops, + sizeof(palmtc_pcmcia_ops)); + + if (!ret) + ret = platform_device_add(palmtc_pcmcia_device); + + if (ret) + platform_device_put(palmtc_pcmcia_device); + + return ret; +} + +static void __exit palmtc_pcmcia_exit(void) +{ + platform_device_unregister(palmtc_pcmcia_device); +} + +module_init(palmtc_pcmcia_init); +module_exit(palmtc_pcmcia_exit); + +MODULE_AUTHOR("Alex Osborne <ato@meshy.org>," + " Marek Vasut <marek.vasut@gmail.com>"); +MODULE_DESCRIPTION("PCMCIA support for Palm Tungsten|C"); +MODULE_ALIAS("platform:pxa2xx-pcmcia"); +MODULE_LICENSE("GPL"); diff --git a/drivers/rtc/rtc-pxa.c b/drivers/rtc/rtc-pxa.c index bb8cc05605a..747ca194fad 100644 --- a/drivers/rtc/rtc-pxa.c +++ b/drivers/rtc/rtc-pxa.c @@ -438,34 +438,37 @@ static int __exit pxa_rtc_remove(struct platform_device *pdev) } #ifdef CONFIG_PM -static int pxa_rtc_suspend(struct platform_device *pdev, pm_message_t state) +static int pxa_rtc_suspend(struct device *dev) { - struct pxa_rtc *pxa_rtc = platform_get_drvdata(pdev); + struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev); - if (device_may_wakeup(&pdev->dev)) + if (device_may_wakeup(dev)) enable_irq_wake(pxa_rtc->irq_Alrm); return 0; } -static int pxa_rtc_resume(struct platform_device *pdev) +static int pxa_rtc_resume(struct device *dev) { - struct pxa_rtc *pxa_rtc = platform_get_drvdata(pdev); + struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev); - if (device_may_wakeup(&pdev->dev)) + if (device_may_wakeup(dev)) disable_irq_wake(pxa_rtc->irq_Alrm); return 0; } -#else -#define pxa_rtc_suspend NULL -#define pxa_rtc_resume NULL + +static struct dev_pm_ops pxa_rtc_pm_ops = { + .suspend = pxa_rtc_suspend, + .resume = pxa_rtc_resume, +}; #endif static struct platform_driver pxa_rtc_driver = { .remove = __exit_p(pxa_rtc_remove), - .suspend = pxa_rtc_suspend, - .resume = pxa_rtc_resume, .driver = { - .name = "pxa-rtc", + .name = "pxa-rtc", +#ifdef CONFIG_PM + .pm = &pxa_rtc_pm_ops, +#endif }, }; diff --git a/drivers/rtc/rtc-sa1100.c b/drivers/rtc/rtc-sa1100.c index 021b2928f0b..29f98a70586 100644 --- a/drivers/rtc/rtc-sa1100.c +++ b/drivers/rtc/rtc-sa1100.c @@ -393,31 +393,34 @@ static int sa1100_rtc_remove(struct platform_device *pdev) } #ifdef CONFIG_PM -static int sa1100_rtc_suspend(struct platform_device *pdev, pm_message_t state) +static int sa1100_rtc_suspend(struct device *dev) { - if (device_may_wakeup(&pdev->dev)) + if (device_may_wakeup(dev)) enable_irq_wake(IRQ_RTCAlrm); return 0; } -static int sa1100_rtc_resume(struct platform_device *pdev) +static int sa1100_rtc_resume(struct device *dev) { - if (device_may_wakeup(&pdev->dev)) + if (device_may_wakeup(dev)) disable_irq_wake(IRQ_RTCAlrm); return 0; } -#else -#define sa1100_rtc_suspend NULL -#define sa1100_rtc_resume NULL + +static struct dev_pm_ops sa1100_rtc_pm_ops = { + .suspend = sa1100_rtc_suspend, + .resume = sa1100_rtc_resume, +}; #endif static struct platform_driver sa1100_rtc_driver = { .probe = sa1100_rtc_probe, .remove = sa1100_rtc_remove, - .suspend = sa1100_rtc_suspend, - .resume = sa1100_rtc_resume, .driver = { - .name = "sa1100-rtc", + .name = "sa1100-rtc", +#ifdef CONFIG_PM + .pm = &sa1100_rtc_pm_ops, +#endif }, }; diff --git a/drivers/serial/cpm_uart/cpm_uart_core.c b/drivers/serial/cpm_uart/cpm_uart_core.c index 8d349b23249..300cea768d7 100644 --- a/drivers/serial/cpm_uart/cpm_uart_core.c +++ b/drivers/serial/cpm_uart/cpm_uart_core.c @@ -649,7 +649,7 @@ static int cpm_uart_tx_pump(struct uart_port *port) u8 *p; int count; struct uart_cpm_port *pinfo = (struct uart_cpm_port *)port; - struct circ_buf *xmit = &port->info->xmit; + struct circ_buf *xmit = &port->state->xmit; /* Handle xon/xoff */ if (port->x_char) { diff --git a/drivers/serial/crisv10.c b/drivers/serial/crisv10.c index 7be52fe288e..31f172397af 100644 --- a/drivers/serial/crisv10.c +++ b/drivers/serial/crisv10.c @@ -18,6 +18,7 @@ static char *serial_version = "$Revision: 1.25 $"; #include <linux/tty.h> #include <linux/tty_flip.h> #include <linux/major.h> +#include <linux/smp_lock.h> #include <linux/string.h> #include <linux/fcntl.h> #include <linux/mm.h> diff --git a/drivers/serial/pxa.c b/drivers/serial/pxa.c index 6443b7ff274..b8629d74f6a 100644 --- a/drivers/serial/pxa.c +++ b/drivers/serial/pxa.c @@ -726,9 +726,10 @@ static struct uart_driver serial_pxa_reg = { .cons = PXA_CONSOLE, }; -static int serial_pxa_suspend(struct platform_device *dev, pm_message_t state) +#ifdef CONFIG_PM +static int serial_pxa_suspend(struct device *dev) { - struct uart_pxa_port *sport = platform_get_drvdata(dev); + struct uart_pxa_port *sport = dev_get_drvdata(dev); if (sport) uart_suspend_port(&serial_pxa_reg, &sport->port); @@ -736,9 +737,9 @@ static int serial_pxa_suspend(struct platform_device *dev, pm_message_t state) return 0; } -static int serial_pxa_resume(struct platform_device *dev) +static int serial_pxa_resume(struct device *dev) { - struct uart_pxa_port *sport = platform_get_drvdata(dev); + struct uart_pxa_port *sport = dev_get_drvdata(dev); if (sport) uart_resume_port(&serial_pxa_reg, &sport->port); @@ -746,6 +747,12 @@ static int serial_pxa_resume(struct platform_device *dev) return 0; } +static struct dev_pm_ops serial_pxa_pm_ops = { + .suspend = serial_pxa_suspend, + .resume = serial_pxa_resume, +}; +#endif + static int serial_pxa_probe(struct platform_device *dev) { struct uart_pxa_port *sport; @@ -825,11 +832,12 @@ static struct platform_driver serial_pxa_driver = { .probe = serial_pxa_probe, .remove = serial_pxa_remove, - .suspend = serial_pxa_suspend, - .resume = serial_pxa_resume, .driver = { .name = "pxa2xx-uart", .owner = THIS_MODULE, +#ifdef CONFIG_PM + .pm = &serial_pxa_pm_ops, +#endif }, }; diff --git a/drivers/spi/amba-pl022.c b/drivers/spi/amba-pl022.c index c0f950a7cbe..958a3ffc898 100644 --- a/drivers/spi/amba-pl022.c +++ b/drivers/spi/amba-pl022.c @@ -532,7 +532,7 @@ static void restore_state(struct pl022 *pl022) GEN_MASK_BITS(SSP_DATA_BITS_12, SSP_CR0_MASK_DSS, 0) | \ GEN_MASK_BITS(SSP_MICROWIRE_CHANNEL_FULL_DUPLEX, SSP_CR0_MASK_HALFDUP, 5) | \ GEN_MASK_BITS(SSP_CLK_POL_IDLE_LOW, SSP_CR0_MASK_SPO, 6) | \ - GEN_MASK_BITS(SSP_CLK_FALLING_EDGE, SSP_CR0_MASK_SPH, 7) | \ + GEN_MASK_BITS(SSP_CLK_SECOND_EDGE, SSP_CR0_MASK_SPH, 7) | \ GEN_MASK_BITS(NMDK_SSP_DEFAULT_CLKRATE, SSP_CR0_MASK_SCR, 8) | \ GEN_MASK_BITS(SSP_BITS_8, SSP_CR0_MASK_CSS, 16) | \ GEN_MASK_BITS(SSP_INTERFACE_MOTOROLA_SPI, SSP_CR0_MASK_FRF, 21) \ @@ -1247,8 +1247,8 @@ static int verify_controller_parameters(struct pl022 *pl022, return -EINVAL; } if (chip_info->iface == SSP_INTERFACE_MOTOROLA_SPI) { - if ((chip_info->clk_phase != SSP_CLK_RISING_EDGE) - && (chip_info->clk_phase != SSP_CLK_FALLING_EDGE)) { + if ((chip_info->clk_phase != SSP_CLK_FIRST_EDGE) + && (chip_info->clk_phase != SSP_CLK_SECOND_EDGE)) { dev_err(chip_info->dev, "Clock Phase is configured incorrectly\n"); return -EINVAL; @@ -1485,7 +1485,7 @@ static int pl022_setup(struct spi_device *spi) chip_info->data_size = SSP_DATA_BITS_12; chip_info->rx_lev_trig = SSP_RX_1_OR_MORE_ELEM; chip_info->tx_lev_trig = SSP_TX_1_OR_MORE_EMPTY_LOC; - chip_info->clk_phase = SSP_CLK_FALLING_EDGE; + chip_info->clk_phase = SSP_CLK_SECOND_EDGE; chip_info->clk_pol = SSP_CLK_POL_IDLE_LOW; chip_info->ctrl_len = SSP_BITS_8; chip_info->wait_state = SSP_MWIRE_WAIT_ZERO; diff --git a/drivers/spi/pxa2xx_spi.c b/drivers/spi/pxa2xx_spi.c index 31dd56f0dae..c8c2b693ffa 100644 --- a/drivers/spi/pxa2xx_spi.c +++ b/drivers/spi/pxa2xx_spi.c @@ -1668,10 +1668,9 @@ static void pxa2xx_spi_shutdown(struct platform_device *pdev) } #ifdef CONFIG_PM - -static int pxa2xx_spi_suspend(struct platform_device *pdev, pm_message_t state) +static int pxa2xx_spi_suspend(struct device *dev) { - struct driver_data *drv_data = platform_get_drvdata(pdev); + struct driver_data *drv_data = dev_get_drvdata(dev); struct ssp_device *ssp = drv_data->ssp; int status = 0; @@ -1684,9 +1683,9 @@ static int pxa2xx_spi_suspend(struct platform_device *pdev, pm_message_t state) return 0; } -static int pxa2xx_spi_resume(struct platform_device *pdev) +static int pxa2xx_spi_resume(struct device *dev) { - struct driver_data *drv_data = platform_get_drvdata(pdev); + struct driver_data *drv_data = dev_get_drvdata(dev); struct ssp_device *ssp = drv_data->ssp; int status = 0; @@ -1703,26 +1702,29 @@ static int pxa2xx_spi_resume(struct platform_device *pdev) /* Start the queue running */ status = start_queue(drv_data); if (status != 0) { - dev_err(&pdev->dev, "problem starting queue (%d)\n", status); + dev_err(dev, "problem starting queue (%d)\n", status); return status; } return 0; } -#else -#define pxa2xx_spi_suspend NULL -#define pxa2xx_spi_resume NULL -#endif /* CONFIG_PM */ + +static struct dev_pm_ops pxa2xx_spi_pm_ops = { + .suspend = pxa2xx_spi_suspend, + .resume = pxa2xx_spi_resume, +}; +#endif static struct platform_driver driver = { .driver = { - .name = "pxa2xx-spi", - .owner = THIS_MODULE, + .name = "pxa2xx-spi", + .owner = THIS_MODULE, +#ifdef CONFIG_PM + .pm = &pxa2xx_spi_pm_ops, +#endif }, .remove = pxa2xx_spi_remove, .shutdown = pxa2xx_spi_shutdown, - .suspend = pxa2xx_spi_suspend, - .resume = pxa2xx_spi_resume, }; static int __init pxa2xx_spi_init(void) diff --git a/drivers/staging/go7007/Makefile b/drivers/staging/go7007/Makefile index d14ea84a01f..1301caa7495 100644 --- a/drivers/staging/go7007/Makefile +++ b/drivers/staging/go7007/Makefile @@ -32,8 +32,3 @@ endif EXTRA_CFLAGS += -Idrivers/media/dvb/frontends EXTRA_CFLAGS += -Idrivers/media/dvb/dvb-core - -# Ubuntu 8.04 has CONFIG_SND undefined, so include lum sound/config.h too -ifeq ($(CONFIG_SND),) -EXTRA_CFLAGS += -include sound/config.h -endif diff --git a/drivers/usb/Kconfig b/drivers/usb/Kconfig index ebd7237230e..240750881d2 100644 --- a/drivers/usb/Kconfig +++ b/drivers/usb/Kconfig @@ -22,7 +22,6 @@ config USB_ARCH_HAS_HCD default y if PCMCIA && !M32R # sl811_cs default y if ARM # SL-811 default y if SUPERH # r8a66597-hcd - default y if MICROBLAZE default PCI # many non-PCI SOC chips embed OHCI diff --git a/drivers/usb/host/ohci-pxa27x.c b/drivers/usb/host/ohci-pxa27x.c index b5294a9344d..f1c06202fdf 100644 --- a/drivers/usb/host/ohci-pxa27x.c +++ b/drivers/usb/host/ohci-pxa27x.c @@ -481,38 +481,47 @@ static int ohci_hcd_pxa27x_drv_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM -static int ohci_hcd_pxa27x_drv_suspend(struct platform_device *pdev, pm_message_t state) +#ifdef CONFIG_PM +static int ohci_hcd_pxa27x_drv_suspend(struct device *dev) { - struct usb_hcd *hcd = platform_get_drvdata(pdev); + struct usb_hcd *hcd = dev_get_drvdata(dev); struct pxa27x_ohci *ohci = to_pxa27x_ohci(hcd); if (time_before(jiffies, ohci->ohci.next_statechange)) msleep(5); ohci->ohci.next_statechange = jiffies; - pxa27x_stop_hc(ohci, &pdev->dev); + pxa27x_stop_hc(ohci, dev); hcd->state = HC_STATE_SUSPENDED; return 0; } -static int ohci_hcd_pxa27x_drv_resume(struct platform_device *pdev) +static int ohci_hcd_pxa27x_drv_resume(struct device *dev) { - struct usb_hcd *hcd = platform_get_drvdata(pdev); + struct usb_hcd *hcd = dev_get_drvdata(dev); struct pxa27x_ohci *ohci = to_pxa27x_ohci(hcd); + struct pxaohci_platform_data *inf = dev->platform_data; int status; if (time_before(jiffies, ohci->ohci.next_statechange)) msleep(5); ohci->ohci.next_statechange = jiffies; - if ((status = pxa27x_start_hc(ohci, &pdev->dev)) < 0) + if ((status = pxa27x_start_hc(ohci, dev)) < 0) return status; + /* Select Power Management Mode */ + pxa27x_ohci_select_pmm(ohci, inf->port_mode); + ohci_finish_controller_resume(hcd); return 0; } + +static struct dev_pm_ops ohci_hcd_pxa27x_pm_ops = { + .suspend = ohci_hcd_pxa27x_drv_suspend, + .resume = ohci_hcd_pxa27x_drv_resume, +}; #endif /* work with hotplug and coldplug */ @@ -522,13 +531,12 @@ static struct platform_driver ohci_hcd_pxa27x_driver = { .probe = ohci_hcd_pxa27x_drv_probe, .remove = ohci_hcd_pxa27x_drv_remove, .shutdown = usb_hcd_platform_shutdown, -#ifdef CONFIG_PM - .suspend = ohci_hcd_pxa27x_drv_suspend, - .resume = ohci_hcd_pxa27x_drv_resume, -#endif .driver = { .name = "pxa27x-ohci", .owner = THIS_MODULE, +#ifdef CONFIG_PM + .pm = &ohci_hcd_pxa27x_pm_ops, +#endif }, }; diff --git a/drivers/usb/serial/sierra.c b/drivers/usb/serial/sierra.c index 68fa0e43b78..8c075b2416b 100644 --- a/drivers/usb/serial/sierra.c +++ b/drivers/usb/serial/sierra.c @@ -912,6 +912,7 @@ static void sierra_release(struct usb_serial *serial) } } +#ifdef CONFIG_PM static void stop_read_write_urbs(struct usb_serial *serial) { int i, j; @@ -988,6 +989,10 @@ static int sierra_resume(struct usb_serial *serial) return ec ? -EIO : 0; } +#else +#define sierra_suspend NULL +#define sierra_resume NULL +#endif static struct usb_serial_driver sierra_device = { .driver = { diff --git a/drivers/video/backlight/da903x_bl.c b/drivers/video/backlight/da903x_bl.c index 93bb4340cc6..701a1081e19 100644 --- a/drivers/video/backlight/da903x_bl.c +++ b/drivers/video/backlight/da903x_bl.c @@ -154,34 +154,38 @@ static int da903x_backlight_remove(struct platform_device *pdev) } #ifdef CONFIG_PM -static int da903x_backlight_suspend(struct platform_device *pdev, - pm_message_t state) +static int da903x_backlight_suspend(struct device *dev) { + struct platform_device *pdev = to_platform_device(dev); struct backlight_device *bl = platform_get_drvdata(pdev); return da903x_backlight_set(bl, 0); } -static int da903x_backlight_resume(struct platform_device *pdev) +static int da903x_backlight_resume(struct device *dev) { + struct platform_device *pdev = to_platform_device(dev); struct backlight_device *bl = platform_get_drvdata(pdev); backlight_update_status(bl); return 0; } -#else -#define da903x_backlight_suspend NULL -#define da903x_backlight_resume NULL + +static struct dev_pm_ops da903x_backlight_pm_ops = { + .suspend = da903x_backlight_suspend, + .resume = da903x_backlight_resume, +}; #endif static struct platform_driver da903x_backlight_driver = { .driver = { .name = "da903x-backlight", .owner = THIS_MODULE, +#ifdef CONFIG_PM + .pm = &da903x_backlight_pm_ops, +#endif }, .probe = da903x_backlight_probe, .remove = da903x_backlight_remove, - .suspend = da903x_backlight_suspend, - .resume = da903x_backlight_resume, }; static int __init da903x_backlight_init(void) diff --git a/drivers/video/pxafb.c b/drivers/video/pxafb.c index 6506117c134..1820c4a2443 100644 --- a/drivers/video/pxafb.c +++ b/drivers/video/pxafb.c @@ -1638,24 +1638,26 @@ pxafb_freq_policy(struct notifier_block *nb, unsigned long val, void *data) * Power management hooks. Note that we won't be called from IRQ context, * unlike the blank functions above, so we may sleep. */ -static int pxafb_suspend(struct platform_device *dev, pm_message_t state) +static int pxafb_suspend(struct device *dev) { - struct pxafb_info *fbi = platform_get_drvdata(dev); + struct pxafb_info *fbi = dev_get_drvdata(dev); set_ctrlr_state(fbi, C_DISABLE_PM); return 0; } -static int pxafb_resume(struct platform_device *dev) +static int pxafb_resume(struct device *dev) { - struct pxafb_info *fbi = platform_get_drvdata(dev); + struct pxafb_info *fbi = dev_get_drvdata(dev); set_ctrlr_state(fbi, C_ENABLE_PM); return 0; } -#else -#define pxafb_suspend NULL -#define pxafb_resume NULL + +static struct dev_pm_ops pxafb_pm_ops = { + .suspend = pxafb_suspend, + .resume = pxafb_resume, +}; #endif static int __devinit pxafb_init_video_memory(struct pxafb_info *fbi) @@ -2081,6 +2083,9 @@ static int __devinit pxafb_probe(struct platform_device *dev) goto failed; } + if (cpu_is_pxa3xx() && inf->acceleration_enabled) + fbi->fb.fix.accel = FB_ACCEL_PXA3XX; + fbi->backlight_power = inf->pxafb_backlight_power; fbi->lcd_power = inf->pxafb_lcd_power; @@ -2091,14 +2096,14 @@ static int __devinit pxafb_probe(struct platform_device *dev) goto failed_fbi; } - r = request_mem_region(r->start, r->end - r->start + 1, dev->name); + r = request_mem_region(r->start, resource_size(r), dev->name); if (r == NULL) { dev_err(&dev->dev, "failed to request I/O memory\n"); ret = -EBUSY; goto failed_fbi; } - fbi->mmio_base = ioremap(r->start, r->end - r->start + 1); + fbi->mmio_base = ioremap(r->start, resource_size(r)); if (fbi->mmio_base == NULL) { dev_err(&dev->dev, "failed to map I/O memory\n"); ret = -EBUSY; @@ -2197,7 +2202,7 @@ failed_free_dma: failed_free_io: iounmap(fbi->mmio_base); failed_free_res: - release_mem_region(r->start, r->end - r->start + 1); + release_mem_region(r->start, resource_size(r)); failed_fbi: clk_put(fbi->clk); platform_set_drvdata(dev, NULL); @@ -2237,7 +2242,7 @@ static int __devexit pxafb_remove(struct platform_device *dev) iounmap(fbi->mmio_base); r = platform_get_resource(dev, IORESOURCE_MEM, 0); - release_mem_region(r->start, r->end - r->start + 1); + release_mem_region(r->start, resource_size(r)); clk_put(fbi->clk); kfree(fbi); @@ -2248,11 +2253,12 @@ static int __devexit pxafb_remove(struct platform_device *dev) static struct platform_driver pxafb_driver = { .probe = pxafb_probe, .remove = __devexit_p(pxafb_remove), - .suspend = pxafb_suspend, - .resume = pxafb_resume, .driver = { .owner = THIS_MODULE, .name = "pxa2xx-fb", +#ifdef CONFIG_PM + .pm = &pxafb_pm_ops, +#endif }, }; diff --git a/drivers/vlynq/vlynq.c b/drivers/vlynq/vlynq.c index ba3d71f5c7d..9554ad5f9af 100644 --- a/drivers/vlynq/vlynq.c +++ b/drivers/vlynq/vlynq.c @@ -702,7 +702,7 @@ static int vlynq_probe(struct platform_device *pdev) dev->mem_start = mem_res->start; dev->mem_end = mem_res->end; - len = regs_res->end - regs_res->start; + len = resource_size(regs_res); if (!request_mem_region(regs_res->start, len, dev_name(&dev->dev))) { printk(KERN_ERR "%s: Can't request vlynq registers\n", dev_name(&dev->dev)); diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c index 798cb071d13..3f57ce4bee5 100644 --- a/fs/adfs/inode.c +++ b/fs/adfs/inode.c @@ -19,9 +19,6 @@ static int adfs_get_block(struct inode *inode, sector_t block, struct buffer_head *bh, int create) { - if (block < 0) - goto abort_negative; - if (!create) { if (block >= inode->i_blocks) goto abort_toobig; @@ -34,10 +31,6 @@ adfs_get_block(struct inode *inode, sector_t block, struct buffer_head *bh, /* don't support allocation of blocks yet */ return -EIO; -abort_negative: - adfs_error(inode->i_sb, "block %d < 0", block); - return -EIO; - abort_toobig: return 0; } diff --git a/fs/attr.c b/fs/attr.c index 9fe1b1bd30a..96d394bdadd 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -18,7 +18,7 @@ /* Taken over from the old code... */ /* POSIX UID/GID verification for setting inode attributes. */ -int inode_change_ok(struct inode *inode, struct iattr *attr) +int inode_change_ok(const struct inode *inode, struct iattr *attr) { int retval = -EPERM; unsigned int ia_valid = attr->ia_valid; @@ -60,9 +60,51 @@ fine: error: return retval; } - EXPORT_SYMBOL(inode_change_ok); +/** + * inode_newsize_ok - may this inode be truncated to a given size + * @inode: the inode to be truncated + * @offset: the new size to assign to the inode + * @Returns: 0 on success, -ve errno on failure + * + * inode_newsize_ok will check filesystem limits and ulimits to check that the + * new inode size is within limits. inode_newsize_ok will also send SIGXFSZ + * when necessary. Caller must not proceed with inode size change if failure is + * returned. @inode must be a file (not directory), with appropriate + * permissions to allow truncate (inode_newsize_ok does NOT check these + * conditions). + * + * inode_newsize_ok must be called with i_mutex held. + */ +int inode_newsize_ok(const struct inode *inode, loff_t offset) +{ + if (inode->i_size < offset) { + unsigned long limit; + + limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; + if (limit != RLIM_INFINITY && offset > limit) + goto out_sig; + if (offset > inode->i_sb->s_maxbytes) + goto out_big; + } else { + /* + * truncation of in-use swapfiles is disallowed - it would + * cause subsequent swapout to scribble on the now-freed + * blocks. + */ + if (IS_SWAPFILE(inode)) + return -ETXTBSY; + } + + return 0; +out_sig: + send_sig(SIGXFSZ, current, 0); +out_big: + return -EFBIG; +} +EXPORT_SYMBOL(inode_newsize_ok); + int inode_setattr(struct inode * inode, struct iattr * attr) { unsigned int ia_valid = attr->ia_valid; diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index dd376c124e7..33baf27fac7 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -737,12 +737,7 @@ befs_put_super(struct super_block *sb) { kfree(BEFS_SB(sb)->mount_opts.iocharset); BEFS_SB(sb)->mount_opts.iocharset = NULL; - - if (BEFS_SB(sb)->nls) { - unload_nls(BEFS_SB(sb)->nls); - BEFS_SB(sb)->nls = NULL; - } - + unload_nls(BEFS_SB(sb)->nls); kfree(sb->s_fs_info); sb->s_fs_info = NULL; } diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 442d94fe255..b9b3bb51b1e 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1711,42 +1711,52 @@ struct elf_note_info { int numnote; }; -static int fill_note_info(struct elfhdr *elf, int phdrs, - struct elf_note_info *info, - long signr, struct pt_regs *regs) +static int elf_note_info_init(struct elf_note_info *info) { -#define NUM_NOTES 6 - struct list_head *t; - - info->notes = NULL; - info->prstatus = NULL; - info->psinfo = NULL; - info->fpu = NULL; -#ifdef ELF_CORE_COPY_XFPREGS - info->xfpu = NULL; -#endif + memset(info, 0, sizeof(*info)); INIT_LIST_HEAD(&info->thread_list); - info->notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote), - GFP_KERNEL); + /* Allocate space for six ELF notes */ + info->notes = kmalloc(6 * sizeof(struct memelfnote), GFP_KERNEL); if (!info->notes) return 0; info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL); if (!info->psinfo) - return 0; + goto notes_free; info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL); if (!info->prstatus) - return 0; + goto psinfo_free; info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL); if (!info->fpu) - return 0; + goto prstatus_free; #ifdef ELF_CORE_COPY_XFPREGS info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL); if (!info->xfpu) - return 0; + goto fpu_free; +#endif + return 1; +#ifdef ELF_CORE_COPY_XFPREGS + fpu_free: + kfree(info->fpu); #endif + prstatus_free: + kfree(info->prstatus); + psinfo_free: + kfree(info->psinfo); + notes_free: + kfree(info->notes); + return 0; +} + +static int fill_note_info(struct elfhdr *elf, int phdrs, + struct elf_note_info *info, + long signr, struct pt_regs *regs) +{ + struct list_head *t; + + if (!elf_note_info_init(info)) + return 0; - info->thread_status_size = 0; if (signr) { struct core_thread *ct; struct elf_thread_status *ets; @@ -1806,8 +1816,6 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, #endif return 1; - -#undef NUM_NOTES } static size_t get_note_info_size(struct elf_note_info *info) diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 76285471073..38502c67987 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -283,20 +283,23 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, } stack_size = exec_params.stack_size; - if (stack_size < interp_params.stack_size) - stack_size = interp_params.stack_size; - if (exec_params.flags & ELF_FDPIC_FLAG_EXEC_STACK) executable_stack = EXSTACK_ENABLE_X; else if (exec_params.flags & ELF_FDPIC_FLAG_NOEXEC_STACK) executable_stack = EXSTACK_DISABLE_X; - else if (interp_params.flags & ELF_FDPIC_FLAG_EXEC_STACK) - executable_stack = EXSTACK_ENABLE_X; - else if (interp_params.flags & ELF_FDPIC_FLAG_NOEXEC_STACK) - executable_stack = EXSTACK_DISABLE_X; else executable_stack = EXSTACK_DEFAULT; + if (stack_size == 0) { + stack_size = interp_params.stack_size; + if (interp_params.flags & ELF_FDPIC_FLAG_EXEC_STACK) + executable_stack = EXSTACK_ENABLE_X; + else if (interp_params.flags & ELF_FDPIC_FLAG_NOEXEC_STACK) + executable_stack = EXSTACK_DISABLE_X; + else + executable_stack = EXSTACK_DEFAULT; + } + retval = -ENOEXEC; if (stack_size == 0) goto error; diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index e92f229e3c6..a2796651e75 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -278,8 +278,6 @@ static int decompress_exec( ret = bprm->file->f_op->read(bprm->file, buf, LBUFSIZE, &fpos); if (ret <= 0) break; - if (ret >= (unsigned long) -4096) - break; len -= ret; strm.next_in = buf; @@ -335,7 +333,7 @@ calc_reloc(unsigned long r, struct lib_info *p, int curid, int internalp) "(%d != %d)", (unsigned) r, curid, id); goto failed; } else if ( ! p->lib_list[id].loaded && - load_flat_shared_library(id, p) > (unsigned long) -4096) { + IS_ERR_VALUE(load_flat_shared_library(id, p))) { printk("BINFMT_FLAT: failed to load library %d", id); goto failed; } @@ -545,7 +543,7 @@ static int load_flat_file(struct linux_binprm * bprm, textpos = do_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_EXECUTABLE, 0); up_write(¤t->mm->mmap_sem); - if (!textpos || textpos >= (unsigned long) -4096) { + if (!textpos || IS_ERR_VALUE(textpos)) { if (!textpos) textpos = (unsigned long) -ENOMEM; printk("Unable to mmap process text, errno %d\n", (int)-textpos); @@ -560,7 +558,7 @@ static int load_flat_file(struct linux_binprm * bprm, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, 0); up_write(¤t->mm->mmap_sem); - if (realdatastart == 0 || realdatastart >= (unsigned long)-4096) { + if (realdatastart == 0 || IS_ERR_VALUE(realdatastart)) { if (!realdatastart) realdatastart = (unsigned long) -ENOMEM; printk("Unable to allocate RAM for process data, errno %d\n", @@ -587,7 +585,7 @@ static int load_flat_file(struct linux_binprm * bprm, result = bprm->file->f_op->read(bprm->file, (char *) datapos, data_len + (relocs * sizeof(unsigned long)), &fpos); } - if (result >= (unsigned long)-4096) { + if (IS_ERR_VALUE(result)) { printk("Unable to read data+bss, errno %d\n", (int)-result); do_munmap(current->mm, textpos, text_len); do_munmap(current->mm, realdatastart, data_len + extra); @@ -607,7 +605,7 @@ static int load_flat_file(struct linux_binprm * bprm, PROT_READ | PROT_EXEC | PROT_WRITE, MAP_PRIVATE, 0); up_write(¤t->mm->mmap_sem); - if (!textpos || textpos >= (unsigned long) -4096) { + if (!textpos || IS_ERR_VALUE(textpos)) { if (!textpos) textpos = (unsigned long) -ENOMEM; printk("Unable to allocate RAM for process text/data, errno %d\n", @@ -641,7 +639,7 @@ static int load_flat_file(struct linux_binprm * bprm, fpos = 0; result = bprm->file->f_op->read(bprm->file, (char *) textpos, text_len, &fpos); - if (result < (unsigned long) -4096) + if (!IS_ERR_VALUE(result)) result = decompress_exec(bprm, text_len, (char *) datapos, data_len + (relocs * sizeof(unsigned long)), 0); } @@ -651,13 +649,13 @@ static int load_flat_file(struct linux_binprm * bprm, fpos = 0; result = bprm->file->f_op->read(bprm->file, (char *) textpos, text_len, &fpos); - if (result < (unsigned long) -4096) { + if (!IS_ERR_VALUE(result)) { fpos = ntohl(hdr->data_start); result = bprm->file->f_op->read(bprm->file, (char *) datapos, data_len + (relocs * sizeof(unsigned long)), &fpos); } } - if (result >= (unsigned long)-4096) { + if (IS_ERR_VALUE(result)) { printk("Unable to read code+data+bss, errno %d\n",(int)-result); do_munmap(current->mm, textpos, text_len + data_len + extra + MAX_SHARED_LIBS * sizeof(unsigned long)); @@ -835,7 +833,7 @@ static int load_flat_shared_library(int id, struct lib_info *libs) res = prepare_binprm(&bprm); - if (res <= (unsigned long)-4096) + if (!IS_ERR_VALUE(res)) res = load_flat_file(&bprm, libs, id, NULL); abort_creds(bprm.cred); @@ -880,7 +878,7 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs) stack_len += FLAT_DATA_ALIGN - 1; /* reserve for upcoming alignment */ res = load_flat_file(bprm, &libinfo, 0, &stack_len); - if (res > (unsigned long)-4096) + if (IS_ERR_VALUE(res)) return res; /* Update data segment pointers for all libraries */ diff --git a/fs/block_dev.c b/fs/block_dev.c index 5d1ed50bd46..9cf4b926f8e 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -216,8 +216,6 @@ EXPORT_SYMBOL(fsync_bdev); * freeze_bdev -- lock a filesystem and force it into a consistent state * @bdev: blockdevice to lock * - * This takes the block device bd_mount_sem to make sure no new mounts - * happen on bdev until thaw_bdev() is called. * If a superblock is found on this device, we take the s_umount semaphore * on it to make sure nobody unmounts until the snapshot creation is done. * The reference counter (bd_fsfreeze_count) guarantees that only the last @@ -232,46 +230,55 @@ struct super_block *freeze_bdev(struct block_device *bdev) int error = 0; mutex_lock(&bdev->bd_fsfreeze_mutex); - if (bdev->bd_fsfreeze_count > 0) { - bdev->bd_fsfreeze_count++; + if (++bdev->bd_fsfreeze_count > 1) { + /* + * We don't even need to grab a reference - the first call + * to freeze_bdev grab an active reference and only the last + * thaw_bdev drops it. + */ sb = get_super(bdev); + drop_super(sb); mutex_unlock(&bdev->bd_fsfreeze_mutex); return sb; } - bdev->bd_fsfreeze_count++; - - down(&bdev->bd_mount_sem); - sb = get_super(bdev); - if (sb && !(sb->s_flags & MS_RDONLY)) { - sb->s_frozen = SB_FREEZE_WRITE; - smp_wmb(); - - sync_filesystem(sb); - - sb->s_frozen = SB_FREEZE_TRANS; - smp_wmb(); - - sync_blockdev(sb->s_bdev); - - if (sb->s_op->freeze_fs) { - error = sb->s_op->freeze_fs(sb); - if (error) { - printk(KERN_ERR - "VFS:Filesystem freeze failed\n"); - sb->s_frozen = SB_UNFROZEN; - drop_super(sb); - up(&bdev->bd_mount_sem); - bdev->bd_fsfreeze_count--; - mutex_unlock(&bdev->bd_fsfreeze_mutex); - return ERR_PTR(error); - } + + sb = get_active_super(bdev); + if (!sb) + goto out; + if (sb->s_flags & MS_RDONLY) { + deactivate_locked_super(sb); + mutex_unlock(&bdev->bd_fsfreeze_mutex); + return sb; + } + + sb->s_frozen = SB_FREEZE_WRITE; + smp_wmb(); + + sync_filesystem(sb); + + sb->s_frozen = SB_FREEZE_TRANS; + smp_wmb(); + + sync_blockdev(sb->s_bdev); + + if (sb->s_op->freeze_fs) { + error = sb->s_op->freeze_fs(sb); + if (error) { + printk(KERN_ERR + "VFS:Filesystem freeze failed\n"); + sb->s_frozen = SB_UNFROZEN; + deactivate_locked_super(sb); + bdev->bd_fsfreeze_count--; + mutex_unlock(&bdev->bd_fsfreeze_mutex); + return ERR_PTR(error); } } + up_write(&sb->s_umount); + out: sync_blockdev(bdev); mutex_unlock(&bdev->bd_fsfreeze_mutex); - - return sb; /* thaw_bdev releases s->s_umount and bd_mount_sem */ + return sb; /* thaw_bdev releases s->s_umount */ } EXPORT_SYMBOL(freeze_bdev); @@ -284,44 +291,44 @@ EXPORT_SYMBOL(freeze_bdev); */ int thaw_bdev(struct block_device *bdev, struct super_block *sb) { - int error = 0; + int error = -EINVAL; mutex_lock(&bdev->bd_fsfreeze_mutex); - if (!bdev->bd_fsfreeze_count) { - mutex_unlock(&bdev->bd_fsfreeze_mutex); - return -EINVAL; - } - - bdev->bd_fsfreeze_count--; - if (bdev->bd_fsfreeze_count > 0) { - if (sb) - drop_super(sb); - mutex_unlock(&bdev->bd_fsfreeze_mutex); - return 0; - } - - if (sb) { - BUG_ON(sb->s_bdev != bdev); - if (!(sb->s_flags & MS_RDONLY)) { - if (sb->s_op->unfreeze_fs) { - error = sb->s_op->unfreeze_fs(sb); - if (error) { - printk(KERN_ERR - "VFS:Filesystem thaw failed\n"); - sb->s_frozen = SB_FREEZE_TRANS; - bdev->bd_fsfreeze_count++; - mutex_unlock(&bdev->bd_fsfreeze_mutex); - return error; - } - } - sb->s_frozen = SB_UNFROZEN; - smp_wmb(); - wake_up(&sb->s_wait_unfrozen); + if (!bdev->bd_fsfreeze_count) + goto out_unlock; + + error = 0; + if (--bdev->bd_fsfreeze_count > 0) + goto out_unlock; + + if (!sb) + goto out_unlock; + + BUG_ON(sb->s_bdev != bdev); + down_write(&sb->s_umount); + if (sb->s_flags & MS_RDONLY) + goto out_deactivate; + + if (sb->s_op->unfreeze_fs) { + error = sb->s_op->unfreeze_fs(sb); + if (error) { + printk(KERN_ERR + "VFS:Filesystem thaw failed\n"); + sb->s_frozen = SB_FREEZE_TRANS; + bdev->bd_fsfreeze_count++; + mutex_unlock(&bdev->bd_fsfreeze_mutex); + return error; } - drop_super(sb); } - up(&bdev->bd_mount_sem); + sb->s_frozen = SB_UNFROZEN; + smp_wmb(); + wake_up(&sb->s_wait_unfrozen); + +out_deactivate: + if (sb) + deactivate_locked_super(sb); +out_unlock: mutex_unlock(&bdev->bd_fsfreeze_mutex); return 0; } @@ -430,7 +437,6 @@ static void init_once(void *foo) memset(bdev, 0, sizeof(*bdev)); mutex_init(&bdev->bd_mutex); - sema_init(&bdev->bd_mount_sem, 1); INIT_LIST_HEAD(&bdev->bd_inodes); INIT_LIST_HEAD(&bdev->bd_list); #ifdef CONFIG_SYSFS diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 019e8af449a..282ca085c2f 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -48,6 +48,9 @@ struct btrfs_worker_thread { /* number of things on the pending list */ atomic_t num_pending; + /* reference counter for this struct */ + atomic_t refs; + unsigned long sequence; /* protects the pending list. */ @@ -71,7 +74,12 @@ static void check_idle_worker(struct btrfs_worker_thread *worker) unsigned long flags; spin_lock_irqsave(&worker->workers->lock, flags); worker->idle = 1; - list_move(&worker->worker_list, &worker->workers->idle_list); + + /* the list may be empty if the worker is just starting */ + if (!list_empty(&worker->worker_list)) { + list_move(&worker->worker_list, + &worker->workers->idle_list); + } spin_unlock_irqrestore(&worker->workers->lock, flags); } } @@ -87,23 +95,49 @@ static void check_busy_worker(struct btrfs_worker_thread *worker) unsigned long flags; spin_lock_irqsave(&worker->workers->lock, flags); worker->idle = 0; - list_move_tail(&worker->worker_list, - &worker->workers->worker_list); + + if (!list_empty(&worker->worker_list)) { + list_move_tail(&worker->worker_list, + &worker->workers->worker_list); + } spin_unlock_irqrestore(&worker->workers->lock, flags); } } -static noinline int run_ordered_completions(struct btrfs_workers *workers, - struct btrfs_work *work) +static void check_pending_worker_creates(struct btrfs_worker_thread *worker) { + struct btrfs_workers *workers = worker->workers; unsigned long flags; + rmb(); + if (!workers->atomic_start_pending) + return; + + spin_lock_irqsave(&workers->lock, flags); + if (!workers->atomic_start_pending) + goto out; + + workers->atomic_start_pending = 0; + if (workers->num_workers >= workers->max_workers) + goto out; + + spin_unlock_irqrestore(&workers->lock, flags); + btrfs_start_workers(workers, 1); + return; + +out: + spin_unlock_irqrestore(&workers->lock, flags); +} + +static noinline int run_ordered_completions(struct btrfs_workers *workers, + struct btrfs_work *work) +{ if (!workers->ordered) return 0; set_bit(WORK_DONE_BIT, &work->flags); - spin_lock_irqsave(&workers->lock, flags); + spin_lock(&workers->order_lock); while (1) { if (!list_empty(&workers->prio_order_list)) { @@ -126,45 +160,118 @@ static noinline int run_ordered_completions(struct btrfs_workers *workers, if (test_and_set_bit(WORK_ORDER_DONE_BIT, &work->flags)) break; - spin_unlock_irqrestore(&workers->lock, flags); + spin_unlock(&workers->order_lock); work->ordered_func(work); /* now take the lock again and call the freeing code */ - spin_lock_irqsave(&workers->lock, flags); + spin_lock(&workers->order_lock); list_del(&work->order_list); work->ordered_free(work); } - spin_unlock_irqrestore(&workers->lock, flags); + spin_unlock(&workers->order_lock); return 0; } +static void put_worker(struct btrfs_worker_thread *worker) +{ + if (atomic_dec_and_test(&worker->refs)) + kfree(worker); +} + +static int try_worker_shutdown(struct btrfs_worker_thread *worker) +{ + int freeit = 0; + + spin_lock_irq(&worker->lock); + spin_lock(&worker->workers->lock); + if (worker->workers->num_workers > 1 && + worker->idle && + !worker->working && + !list_empty(&worker->worker_list) && + list_empty(&worker->prio_pending) && + list_empty(&worker->pending) && + atomic_read(&worker->num_pending) == 0) { + freeit = 1; + list_del_init(&worker->worker_list); + worker->workers->num_workers--; + } + spin_unlock(&worker->workers->lock); + spin_unlock_irq(&worker->lock); + + if (freeit) + put_worker(worker); + return freeit; +} + +static struct btrfs_work *get_next_work(struct btrfs_worker_thread *worker, + struct list_head *prio_head, + struct list_head *head) +{ + struct btrfs_work *work = NULL; + struct list_head *cur = NULL; + + if(!list_empty(prio_head)) + cur = prio_head->next; + + smp_mb(); + if (!list_empty(&worker->prio_pending)) + goto refill; + + if (!list_empty(head)) + cur = head->next; + + if (cur) + goto out; + +refill: + spin_lock_irq(&worker->lock); + list_splice_tail_init(&worker->prio_pending, prio_head); + list_splice_tail_init(&worker->pending, head); + + if (!list_empty(prio_head)) + cur = prio_head->next; + else if (!list_empty(head)) + cur = head->next; + spin_unlock_irq(&worker->lock); + + if (!cur) + goto out_fail; + +out: + work = list_entry(cur, struct btrfs_work, list); + +out_fail: + return work; +} + /* * main loop for servicing work items */ static int worker_loop(void *arg) { struct btrfs_worker_thread *worker = arg; - struct list_head *cur; + struct list_head head; + struct list_head prio_head; struct btrfs_work *work; + + INIT_LIST_HEAD(&head); + INIT_LIST_HEAD(&prio_head); + do { - spin_lock_irq(&worker->lock); -again_locked: +again: while (1) { - if (!list_empty(&worker->prio_pending)) - cur = worker->prio_pending.next; - else if (!list_empty(&worker->pending)) - cur = worker->pending.next; - else + + + work = get_next_work(worker, &prio_head, &head); + if (!work) break; - work = list_entry(cur, struct btrfs_work, list); list_del(&work->list); clear_bit(WORK_QUEUED_BIT, &work->flags); work->worker = worker; - spin_unlock_irq(&worker->lock); work->func(work); @@ -175,9 +282,13 @@ again_locked: */ run_ordered_completions(worker->workers, work); - spin_lock_irq(&worker->lock); - check_idle_worker(worker); + check_pending_worker_creates(worker); + } + + spin_lock_irq(&worker->lock); + check_idle_worker(worker); + if (freezing(current)) { worker->working = 0; spin_unlock_irq(&worker->lock); @@ -216,8 +327,10 @@ again_locked: spin_lock_irq(&worker->lock); set_current_state(TASK_INTERRUPTIBLE); if (!list_empty(&worker->pending) || - !list_empty(&worker->prio_pending)) - goto again_locked; + !list_empty(&worker->prio_pending)) { + spin_unlock_irq(&worker->lock); + goto again; + } /* * this makes sure we get a wakeup when someone @@ -226,8 +339,13 @@ again_locked: worker->working = 0; spin_unlock_irq(&worker->lock); - if (!kthread_should_stop()) - schedule(); + if (!kthread_should_stop()) { + schedule_timeout(HZ * 120); + if (!worker->working && + try_worker_shutdown(worker)) { + return 0; + } + } } __set_current_state(TASK_RUNNING); } @@ -242,16 +360,30 @@ int btrfs_stop_workers(struct btrfs_workers *workers) { struct list_head *cur; struct btrfs_worker_thread *worker; + int can_stop; + spin_lock_irq(&workers->lock); list_splice_init(&workers->idle_list, &workers->worker_list); while (!list_empty(&workers->worker_list)) { cur = workers->worker_list.next; worker = list_entry(cur, struct btrfs_worker_thread, worker_list); - kthread_stop(worker->task); - list_del(&worker->worker_list); - kfree(worker); + + atomic_inc(&worker->refs); + workers->num_workers -= 1; + if (!list_empty(&worker->worker_list)) { + list_del_init(&worker->worker_list); + put_worker(worker); + can_stop = 1; + } else + can_stop = 0; + spin_unlock_irq(&workers->lock); + if (can_stop) + kthread_stop(worker->task); + spin_lock_irq(&workers->lock); + put_worker(worker); } + spin_unlock_irq(&workers->lock); return 0; } @@ -266,10 +398,13 @@ void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max) INIT_LIST_HEAD(&workers->order_list); INIT_LIST_HEAD(&workers->prio_order_list); spin_lock_init(&workers->lock); + spin_lock_init(&workers->order_lock); workers->max_workers = max; workers->idle_thresh = 32; workers->name = name; workers->ordered = 0; + workers->atomic_start_pending = 0; + workers->atomic_worker_start = 0; } /* @@ -293,7 +428,9 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers) INIT_LIST_HEAD(&worker->prio_pending); INIT_LIST_HEAD(&worker->worker_list); spin_lock_init(&worker->lock); + atomic_set(&worker->num_pending, 0); + atomic_set(&worker->refs, 1); worker->workers = workers; worker->task = kthread_run(worker_loop, worker, "btrfs-%s-%d", workers->name, @@ -303,7 +440,6 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers) kfree(worker); goto fail; } - spin_lock_irq(&workers->lock); list_add_tail(&worker->worker_list, &workers->idle_list); worker->idle = 1; @@ -350,7 +486,6 @@ static struct btrfs_worker_thread *next_worker(struct btrfs_workers *workers) */ next = workers->worker_list.next; worker = list_entry(next, struct btrfs_worker_thread, worker_list); - atomic_inc(&worker->num_pending); worker->sequence++; if (worker->sequence % workers->idle_thresh == 0) @@ -367,28 +502,18 @@ static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers) { struct btrfs_worker_thread *worker; unsigned long flags; + struct list_head *fallback; again: spin_lock_irqsave(&workers->lock, flags); worker = next_worker(workers); - spin_unlock_irqrestore(&workers->lock, flags); if (!worker) { - spin_lock_irqsave(&workers->lock, flags); if (workers->num_workers >= workers->max_workers) { - struct list_head *fallback = NULL; - /* - * we have failed to find any workers, just - * return the force one - */ - if (!list_empty(&workers->worker_list)) - fallback = workers->worker_list.next; - if (!list_empty(&workers->idle_list)) - fallback = workers->idle_list.next; - BUG_ON(!fallback); - worker = list_entry(fallback, - struct btrfs_worker_thread, worker_list); - spin_unlock_irqrestore(&workers->lock, flags); + goto fallback; + } else if (workers->atomic_worker_start) { + workers->atomic_start_pending = 1; + goto fallback; } else { spin_unlock_irqrestore(&workers->lock, flags); /* we're below the limit, start another worker */ @@ -396,6 +521,28 @@ again: goto again; } } + goto found; + +fallback: + fallback = NULL; + /* + * we have failed to find any workers, just + * return the first one we can find. + */ + if (!list_empty(&workers->worker_list)) + fallback = workers->worker_list.next; + if (!list_empty(&workers->idle_list)) + fallback = workers->idle_list.next; + BUG_ON(!fallback); + worker = list_entry(fallback, + struct btrfs_worker_thread, worker_list); +found: + /* + * this makes sure the worker doesn't exit before it is placed + * onto a busy/idle list + */ + atomic_inc(&worker->num_pending); + spin_unlock_irqrestore(&workers->lock, flags); return worker; } @@ -427,7 +574,7 @@ int btrfs_requeue_work(struct btrfs_work *work) spin_lock(&worker->workers->lock); worker->idle = 0; list_move_tail(&worker->worker_list, - &worker->workers->worker_list); + &worker->workers->worker_list); spin_unlock(&worker->workers->lock); } if (!worker->working) { @@ -435,9 +582,9 @@ int btrfs_requeue_work(struct btrfs_work *work) worker->working = 1; } - spin_unlock_irqrestore(&worker->lock, flags); if (wake) wake_up_process(worker->task); + spin_unlock_irqrestore(&worker->lock, flags); out: return 0; @@ -463,14 +610,18 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) worker = find_worker(workers); if (workers->ordered) { - spin_lock_irqsave(&workers->lock, flags); + /* + * you're not allowed to do ordered queues from an + * interrupt handler + */ + spin_lock(&workers->order_lock); if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) { list_add_tail(&work->order_list, &workers->prio_order_list); } else { list_add_tail(&work->order_list, &workers->order_list); } - spin_unlock_irqrestore(&workers->lock, flags); + spin_unlock(&workers->order_lock); } else { INIT_LIST_HEAD(&work->order_list); } @@ -481,7 +632,6 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) list_add_tail(&work->list, &worker->prio_pending); else list_add_tail(&work->list, &worker->pending); - atomic_inc(&worker->num_pending); check_busy_worker(worker); /* @@ -492,10 +642,10 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) wake = 1; worker->working = 1; - spin_unlock_irqrestore(&worker->lock, flags); - if (wake) wake_up_process(worker->task); + spin_unlock_irqrestore(&worker->lock, flags); + out: return 0; } diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h index 1b511c109db..fc089b95ec1 100644 --- a/fs/btrfs/async-thread.h +++ b/fs/btrfs/async-thread.h @@ -73,6 +73,15 @@ struct btrfs_workers { /* force completions in the order they were queued */ int ordered; + /* more workers required, but in an interrupt handler */ + int atomic_start_pending; + + /* + * are we allowed to sleep while starting workers or are we required + * to start them at a later time? + */ + int atomic_worker_start; + /* list with all the work threads. The workers on the idle thread * may be actively servicing jobs, but they haven't yet hit the * idle thresh limit above. @@ -90,6 +99,9 @@ struct btrfs_workers { /* lock for finding the next worker thread to queue on */ spinlock_t lock; + /* lock for the ordered lists */ + spinlock_t order_lock; + /* extra name for this worker, used for current->name */ char *name; }; diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index ea1ea0af8c0..82ee56bba29 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -138,6 +138,7 @@ struct btrfs_inode { * of these. */ unsigned ordered_data_close:1; + unsigned dummy_inode:1; struct inode vfs_inode; }; diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 9d8ba4d54a3..a11a32058b5 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -506,10 +506,10 @@ static noinline int add_ra_bio_pages(struct inode *inode, */ set_page_extent_mapped(page); lock_extent(tree, last_offset, end, GFP_NOFS); - spin_lock(&em_tree->lock); + read_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, last_offset, PAGE_CACHE_SIZE); - spin_unlock(&em_tree->lock); + read_unlock(&em_tree->lock); if (!em || last_offset < em->start || (last_offset + PAGE_CACHE_SIZE > extent_map_end(em)) || @@ -593,11 +593,11 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, em_tree = &BTRFS_I(inode)->extent_tree; /* we need the actual starting offset of this extent in the file */ - spin_lock(&em_tree->lock); + read_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, page_offset(bio->bi_io_vec->bv_page), PAGE_CACHE_SIZE); - spin_unlock(&em_tree->lock); + read_unlock(&em_tree->lock); compressed_len = em->block_len; cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 3fdcc0512d3..ec96f3a6d53 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2853,6 +2853,12 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans, int split; int num_doubles = 0; + l = path->nodes[0]; + slot = path->slots[0]; + if (extend && data_size + btrfs_item_size_nr(l, slot) + + sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(root)) + return -EOVERFLOW; + /* first try to make some room by pushing left and right */ if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) { wret = push_leaf_right(trans, root, path, data_size, 0); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 837435ce84c..80599b4e42b 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -114,6 +114,10 @@ struct btrfs_ordered_sum; */ #define BTRFS_DEV_ITEMS_OBJECTID 1ULL +#define BTRFS_BTREE_INODE_OBJECTID 1 + +#define BTRFS_EMPTY_SUBVOL_DIR_OBJECTID 2 + /* * we can actually store much bigger names, but lets not confuse the rest * of linux @@ -670,6 +674,7 @@ struct btrfs_space_info { u64 bytes_reserved; /* total bytes the allocator has reserved for current allocations */ u64 bytes_readonly; /* total bytes that are read only */ + u64 bytes_super; /* total bytes reserved for the super blocks */ /* delalloc accounting */ u64 bytes_delalloc; /* number of bytes reserved for allocation, @@ -726,6 +731,15 @@ enum btrfs_caching_type { BTRFS_CACHE_FINISHED = 2, }; +struct btrfs_caching_control { + struct list_head list; + struct mutex mutex; + wait_queue_head_t wait; + struct btrfs_block_group_cache *block_group; + u64 progress; + atomic_t count; +}; + struct btrfs_block_group_cache { struct btrfs_key key; struct btrfs_block_group_item item; @@ -733,6 +747,7 @@ struct btrfs_block_group_cache { spinlock_t lock; u64 pinned; u64 reserved; + u64 bytes_super; u64 flags; u64 sectorsize; int extents_thresh; @@ -742,8 +757,9 @@ struct btrfs_block_group_cache { int dirty; /* cache tracking stuff */ - wait_queue_head_t caching_q; int cached; + struct btrfs_caching_control *caching_ctl; + u64 last_byte_to_unpin; struct btrfs_space_info *space_info; @@ -782,13 +798,16 @@ struct btrfs_fs_info { /* the log root tree is a directory of all the other log roots */ struct btrfs_root *log_root_tree; + + spinlock_t fs_roots_radix_lock; struct radix_tree_root fs_roots_radix; /* block group cache stuff */ spinlock_t block_group_cache_lock; struct rb_root block_group_cache_tree; - struct extent_io_tree pinned_extents; + struct extent_io_tree freed_extents[2]; + struct extent_io_tree *pinned_extents; /* logical->physical extent mapping */ struct btrfs_mapping_tree mapping_tree; @@ -822,11 +841,7 @@ struct btrfs_fs_info { struct mutex transaction_kthread_mutex; struct mutex cleaner_mutex; struct mutex chunk_mutex; - struct mutex drop_mutex; struct mutex volume_mutex; - struct mutex tree_reloc_mutex; - struct rw_semaphore extent_commit_sem; - /* * this protects the ordered operations list only while we are * processing all of the entries on it. This way we make @@ -835,10 +850,16 @@ struct btrfs_fs_info { * before jumping into the main commit. */ struct mutex ordered_operations_mutex; + struct rw_semaphore extent_commit_sem; + + struct rw_semaphore subvol_sem; + + struct srcu_struct subvol_srcu; struct list_head trans_list; struct list_head hashers; struct list_head dead_roots; + struct list_head caching_block_groups; atomic_t nr_async_submits; atomic_t async_submit_draining; @@ -996,10 +1017,12 @@ struct btrfs_root { u32 stripesize; u32 type; - u64 highest_inode; - u64 last_inode_alloc; + + u64 highest_objectid; int ref_cows; int track_dirty; + int in_radix; + u64 defrag_trans_start; struct btrfs_key defrag_progress; struct btrfs_key defrag_max; @@ -1920,8 +1943,8 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache); int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, struct btrfs_root *root, unsigned long count); int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); -int btrfs_update_pinned_extents(struct btrfs_root *root, - u64 bytenr, u64 num, int pin); +int btrfs_pin_extent(struct btrfs_root *root, + u64 bytenr, u64 num, int reserved); int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *leaf); int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, @@ -1971,9 +1994,10 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, u64 root_objectid, u64 owner, u64 offset); int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); +int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, + struct btrfs_root *root); int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct extent_io_tree *unpin); + struct btrfs_root *root); int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 parent, @@ -1984,6 +2008,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr); int btrfs_free_block_groups(struct btrfs_fs_info *info); int btrfs_read_block_groups(struct btrfs_root *root); +int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr); int btrfs_make_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytes_used, u64 type, u64 chunk_objectid, u64 chunk_offset, @@ -2006,7 +2031,6 @@ void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode, u64 bytes); void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, u64 bytes); -void btrfs_free_pinned_extents(struct btrfs_fs_info *info); /* ctree.c */ int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, int level, int *slot); @@ -2100,12 +2124,15 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, struct extent_buffer *parent); /* root-item.c */ int btrfs_find_root_ref(struct btrfs_root *tree_root, - struct btrfs_path *path, - u64 root_id, u64 ref_id); + struct btrfs_path *path, + u64 root_id, u64 ref_id); int btrfs_add_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *tree_root, - u64 root_id, u8 type, u64 ref_id, - u64 dirid, u64 sequence, + u64 root_id, u64 ref_id, u64 dirid, u64 sequence, + const char *name, int name_len); +int btrfs_del_root_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *tree_root, + u64 root_id, u64 ref_id, u64 dirid, u64 *sequence, const char *name, int name_len); int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key); @@ -2120,6 +2147,7 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct int btrfs_search_root(struct btrfs_root *root, u64 search_start, u64 *found_objectid); int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid); +int btrfs_find_orphan_roots(struct btrfs_root *tree_root); int btrfs_set_root_node(struct btrfs_root_item *item, struct extent_buffer *node); /* dir-item.c */ @@ -2138,6 +2166,10 @@ btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, struct btrfs_path *path, u64 dir, u64 objectid, const char *name, int name_len, int mod); +struct btrfs_dir_item * +btrfs_search_dir_index_item(struct btrfs_root *root, + struct btrfs_path *path, u64 dirid, + const char *name, int name_len); struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, struct btrfs_path *path, const char *name, int name_len); @@ -2160,6 +2192,7 @@ int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 offset); int btrfs_del_orphan_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 offset); +int btrfs_find_orphan_item(struct btrfs_root *root, u64 offset); /* inode-map.c */ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, @@ -2232,6 +2265,10 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans, int btrfs_add_link(struct btrfs_trans_handle *trans, struct inode *parent_inode, struct inode *inode, const char *name, int name_len, int add_backref, u64 index); +int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *dir, u64 objectid, + const char *name, int name_len); int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, u64 new_size, @@ -2242,7 +2279,7 @@ int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end); int btrfs_writepages(struct address_space *mapping, struct writeback_control *wbc); int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, - struct btrfs_root *new_root, struct dentry *dentry, + struct btrfs_root *new_root, u64 new_dirid, u64 alloc_hint); int btrfs_merge_bio_hook(struct page *page, unsigned long offset, size_t size, struct bio *bio, unsigned long bio_flags); @@ -2258,6 +2295,7 @@ int btrfs_write_inode(struct inode *inode, int wait); void btrfs_dirty_inode(struct inode *inode); struct inode *btrfs_alloc_inode(struct super_block *sb); void btrfs_destroy_inode(struct inode *inode); +void btrfs_drop_inode(struct inode *inode); int btrfs_init_cachep(void); void btrfs_destroy_cachep(void); long btrfs_ioctl_trans_end(struct file *file); @@ -2275,6 +2313,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode); int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode); void btrfs_orphan_cleanup(struct btrfs_root *root); int btrfs_cont_expand(struct inode *inode, loff_t size); +int btrfs_invalidate_inodes(struct btrfs_root *root); +extern struct dentry_operations btrfs_dentry_operations; /* ioctl.c */ long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); @@ -2290,7 +2330,7 @@ extern struct file_operations btrfs_file_operations; int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, u64 start, u64 end, u64 locked_end, - u64 inline_limit, u64 *hint_block); + u64 inline_limit, u64 *hint_block, int drop_cache); int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, u64 start, u64 end); diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 1d70236ba00..f3a6075519c 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -281,6 +281,53 @@ btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, return btrfs_match_dir_item_name(root, path, name, name_len); } +struct btrfs_dir_item * +btrfs_search_dir_index_item(struct btrfs_root *root, + struct btrfs_path *path, u64 dirid, + const char *name, int name_len) +{ + struct extent_buffer *leaf; + struct btrfs_dir_item *di; + struct btrfs_key key; + u32 nritems; + int ret; + + key.objectid = dirid; + key.type = BTRFS_DIR_INDEX_KEY; + key.offset = 0; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + return ERR_PTR(ret); + + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); + + while (1) { + if (path->slots[0] >= nritems) { + ret = btrfs_next_leaf(root, path); + if (ret < 0) + return ERR_PTR(ret); + if (ret > 0) + break; + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); + continue; + } + + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + if (key.objectid != dirid || key.type != BTRFS_DIR_INDEX_KEY) + break; + + di = btrfs_match_dir_item_name(root, path, name, name_len); + if (di) + return di; + + path->slots[0]++; + } + return NULL; +} + struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 dir, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6c4173146bb..644e796fd64 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -41,6 +41,7 @@ static struct extent_io_ops btree_extent_io_ops; static void end_workqueue_fn(struct btrfs_work *work); +static void free_fs_root(struct btrfs_root *root); static atomic_t btrfs_bdi_num = ATOMIC_INIT(0); @@ -123,15 +124,15 @@ static struct extent_map *btree_get_extent(struct inode *inode, struct extent_map *em; int ret; - spin_lock(&em_tree->lock); + read_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, start, len); if (em) { em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; - spin_unlock(&em_tree->lock); + read_unlock(&em_tree->lock); goto out; } - spin_unlock(&em_tree->lock); + read_unlock(&em_tree->lock); em = alloc_extent_map(GFP_NOFS); if (!em) { @@ -144,7 +145,7 @@ static struct extent_map *btree_get_extent(struct inode *inode, em->block_start = 0; em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; - spin_lock(&em_tree->lock); + write_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em); if (ret == -EEXIST) { u64 failed_start = em->start; @@ -163,7 +164,7 @@ static struct extent_map *btree_get_extent(struct inode *inode, free_extent_map(em); em = NULL; } - spin_unlock(&em_tree->lock); + write_unlock(&em_tree->lock); if (ret) em = ERR_PTR(ret); @@ -895,8 +896,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, root->fs_info = fs_info; root->objectid = objectid; root->last_trans = 0; - root->highest_inode = 0; - root->last_inode_alloc = 0; + root->highest_objectid = 0; root->name = NULL; root->in_sysfs = 0; root->inode_tree.rb_node = NULL; @@ -952,14 +952,16 @@ static int find_and_setup_root(struct btrfs_root *tree_root, root, fs_info, objectid); ret = btrfs_find_last_root(tree_root, objectid, &root->root_item, &root->root_key); + if (ret > 0) + return -ENOENT; BUG_ON(ret); generation = btrfs_root_generation(&root->root_item); blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), blocksize, generation); - root->commit_root = btrfs_root_node(root); BUG_ON(!root->node); + root->commit_root = btrfs_root_node(root); return 0; } @@ -1095,7 +1097,6 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, struct btrfs_fs_info *fs_info = tree_root->fs_info; struct btrfs_path *path; struct extent_buffer *l; - u64 highest_inode; u64 generation; u32 blocksize; int ret = 0; @@ -1110,7 +1111,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, kfree(root); return ERR_PTR(ret); } - goto insert; + goto out; } __setup_root(tree_root->nodesize, tree_root->leafsize, @@ -1120,39 +1121,30 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, path = btrfs_alloc_path(); BUG_ON(!path); ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0); - if (ret != 0) { - if (ret > 0) - ret = -ENOENT; - goto out; + if (ret == 0) { + l = path->nodes[0]; + read_extent_buffer(l, &root->root_item, + btrfs_item_ptr_offset(l, path->slots[0]), + sizeof(root->root_item)); + memcpy(&root->root_key, location, sizeof(*location)); } - l = path->nodes[0]; - read_extent_buffer(l, &root->root_item, - btrfs_item_ptr_offset(l, path->slots[0]), - sizeof(root->root_item)); - memcpy(&root->root_key, location, sizeof(*location)); - ret = 0; -out: - btrfs_release_path(root, path); btrfs_free_path(path); if (ret) { - kfree(root); + if (ret > 0) + ret = -ENOENT; return ERR_PTR(ret); } + generation = btrfs_root_generation(&root->root_item); blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), blocksize, generation); root->commit_root = btrfs_root_node(root); BUG_ON(!root->node); -insert: - if (location->objectid != BTRFS_TREE_LOG_OBJECTID) { +out: + if (location->objectid != BTRFS_TREE_LOG_OBJECTID) root->ref_cows = 1; - ret = btrfs_find_highest_inode(root, &highest_inode); - if (ret == 0) { - root->highest_inode = highest_inode; - root->last_inode_alloc = highest_inode; - } - } + return root; } @@ -1187,39 +1179,66 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, return fs_info->dev_root; if (location->objectid == BTRFS_CSUM_TREE_OBJECTID) return fs_info->csum_root; - +again: + spin_lock(&fs_info->fs_roots_radix_lock); root = radix_tree_lookup(&fs_info->fs_roots_radix, (unsigned long)location->objectid); + spin_unlock(&fs_info->fs_roots_radix_lock); if (root) return root; + ret = btrfs_find_orphan_item(fs_info->tree_root, location->objectid); + if (ret == 0) + ret = -ENOENT; + if (ret < 0) + return ERR_PTR(ret); + root = btrfs_read_fs_root_no_radix(fs_info->tree_root, location); if (IS_ERR(root)) return root; + WARN_ON(btrfs_root_refs(&root->root_item) == 0); set_anon_super(&root->anon_super, NULL); + ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); + if (ret) + goto fail; + + spin_lock(&fs_info->fs_roots_radix_lock); ret = radix_tree_insert(&fs_info->fs_roots_radix, (unsigned long)root->root_key.objectid, root); + if (ret == 0) + root->in_radix = 1; + spin_unlock(&fs_info->fs_roots_radix_lock); + radix_tree_preload_end(); if (ret) { - free_extent_buffer(root->node); - kfree(root); - return ERR_PTR(ret); + if (ret == -EEXIST) { + free_fs_root(root); + goto again; + } + goto fail; } - if (!(fs_info->sb->s_flags & MS_RDONLY)) { - ret = btrfs_find_dead_roots(fs_info->tree_root, - root->root_key.objectid); - BUG_ON(ret); + + ret = btrfs_find_dead_roots(fs_info->tree_root, + root->root_key.objectid); + WARN_ON(ret); + + if (!(fs_info->sb->s_flags & MS_RDONLY)) btrfs_orphan_cleanup(root); - } + return root; +fail: + free_fs_root(root); + return ERR_PTR(ret); } struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_key *location, const char *name, int namelen) { + return btrfs_read_fs_root_no_name(fs_info, location); +#if 0 struct btrfs_root *root; int ret; @@ -1236,7 +1255,7 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, kfree(root); return ERR_PTR(ret); } -#if 0 + ret = btrfs_sysfs_add_root(root); if (ret) { free_extent_buffer(root->node); @@ -1244,9 +1263,9 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, kfree(root); return ERR_PTR(ret); } -#endif root->in_sysfs = 1; return root; +#endif } static int btrfs_congested_fn(void *congested_data, int bdi_bits) @@ -1325,9 +1344,9 @@ static void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) offset = page_offset(page); em_tree = &BTRFS_I(inode)->extent_tree; - spin_lock(&em_tree->lock); + read_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE); - spin_unlock(&em_tree->lock); + read_unlock(&em_tree->lock); if (!em) { __unplug_io_fn(bdi, page); return; @@ -1360,8 +1379,10 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) err = bdi_register(bdi, NULL, "btrfs-%d", atomic_inc_return(&btrfs_bdi_num)); - if (err) + if (err) { + bdi_destroy(bdi); return err; + } bdi->ra_pages = default_backing_dev_info.ra_pages; bdi->unplug_io_fn = btrfs_unplug_io_fn; @@ -1451,9 +1472,12 @@ static int cleaner_kthread(void *arg) break; vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); - mutex_lock(&root->fs_info->cleaner_mutex); - btrfs_clean_old_snapshots(root); - mutex_unlock(&root->fs_info->cleaner_mutex); + + if (!(root->fs_info->sb->s_flags & MS_RDONLY) && + mutex_trylock(&root->fs_info->cleaner_mutex)) { + btrfs_clean_old_snapshots(root); + mutex_unlock(&root->fs_info->cleaner_mutex); + } if (freezing(current)) { refrigerator(); @@ -1558,15 +1582,36 @@ struct btrfs_root *open_ctree(struct super_block *sb, err = -ENOMEM; goto fail; } - INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); + + ret = init_srcu_struct(&fs_info->subvol_srcu); + if (ret) { + err = ret; + goto fail; + } + + ret = setup_bdi(fs_info, &fs_info->bdi); + if (ret) { + err = ret; + goto fail_srcu; + } + + fs_info->btree_inode = new_inode(sb); + if (!fs_info->btree_inode) { + err = -ENOMEM; + goto fail_bdi; + } + + INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); INIT_LIST_HEAD(&fs_info->trans_list); INIT_LIST_HEAD(&fs_info->dead_roots); INIT_LIST_HEAD(&fs_info->hashers); INIT_LIST_HEAD(&fs_info->delalloc_inodes); INIT_LIST_HEAD(&fs_info->ordered_operations); + INIT_LIST_HEAD(&fs_info->caching_block_groups); spin_lock_init(&fs_info->delalloc_lock); spin_lock_init(&fs_info->new_trans_lock); spin_lock_init(&fs_info->ref_cache_lock); + spin_lock_init(&fs_info->fs_roots_radix_lock); init_completion(&fs_info->kobj_unregister); fs_info->tree_root = tree_root; @@ -1585,11 +1630,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->sb = sb; fs_info->max_extent = (u64)-1; fs_info->max_inline = 8192 * 1024; - if (setup_bdi(fs_info, &fs_info->bdi)) - goto fail_bdi; - fs_info->btree_inode = new_inode(sb); - fs_info->btree_inode->i_ino = 1; - fs_info->btree_inode->i_nlink = 1; fs_info->metadata_ratio = 8; fs_info->thread_pool_size = min_t(unsigned long, @@ -1602,6 +1642,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, sb->s_blocksize_bits = blksize_bits(4096); sb->s_bdi = &fs_info->bdi; + fs_info->btree_inode->i_ino = BTRFS_BTREE_INODE_OBJECTID; + fs_info->btree_inode->i_nlink = 1; /* * we set the i_size on the btree inode to the max possible int. * the real end of the address space is determined by all of @@ -1620,28 +1662,32 @@ struct btrfs_root *open_ctree(struct super_block *sb, BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops; + BTRFS_I(fs_info->btree_inode)->root = tree_root; + memset(&BTRFS_I(fs_info->btree_inode)->location, 0, + sizeof(struct btrfs_key)); + BTRFS_I(fs_info->btree_inode)->dummy_inode = 1; + insert_inode_hash(fs_info->btree_inode); + spin_lock_init(&fs_info->block_group_cache_lock); fs_info->block_group_cache_tree.rb_node = NULL; - extent_io_tree_init(&fs_info->pinned_extents, + extent_io_tree_init(&fs_info->freed_extents[0], fs_info->btree_inode->i_mapping, GFP_NOFS); + extent_io_tree_init(&fs_info->freed_extents[1], + fs_info->btree_inode->i_mapping, GFP_NOFS); + fs_info->pinned_extents = &fs_info->freed_extents[0]; fs_info->do_barriers = 1; - BTRFS_I(fs_info->btree_inode)->root = tree_root; - memset(&BTRFS_I(fs_info->btree_inode)->location, 0, - sizeof(struct btrfs_key)); - insert_inode_hash(fs_info->btree_inode); mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->ordered_operations_mutex); mutex_init(&fs_info->tree_log_mutex); - mutex_init(&fs_info->drop_mutex); mutex_init(&fs_info->chunk_mutex); mutex_init(&fs_info->transaction_kthread_mutex); mutex_init(&fs_info->cleaner_mutex); mutex_init(&fs_info->volume_mutex); - mutex_init(&fs_info->tree_reloc_mutex); init_rwsem(&fs_info->extent_commit_sem); + init_rwsem(&fs_info->subvol_sem); btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); btrfs_init_free_cluster(&fs_info->data_alloc_cluster); @@ -1700,7 +1746,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, err = -EINVAL; goto fail_iput; } - +printk("thread pool is %d\n", fs_info->thread_pool_size); /* * we need to start all the end_io workers up front because the * queue work function gets called at interrupt time, and so it @@ -1745,20 +1791,22 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->endio_workers.idle_thresh = 4; fs_info->endio_meta_workers.idle_thresh = 4; - fs_info->endio_write_workers.idle_thresh = 64; - fs_info->endio_meta_write_workers.idle_thresh = 64; + fs_info->endio_write_workers.idle_thresh = 2; + fs_info->endio_meta_write_workers.idle_thresh = 2; + + fs_info->endio_workers.atomic_worker_start = 1; + fs_info->endio_meta_workers.atomic_worker_start = 1; + fs_info->endio_write_workers.atomic_worker_start = 1; + fs_info->endio_meta_write_workers.atomic_worker_start = 1; btrfs_start_workers(&fs_info->workers, 1); btrfs_start_workers(&fs_info->submit_workers, 1); btrfs_start_workers(&fs_info->delalloc_workers, 1); btrfs_start_workers(&fs_info->fixup_workers, 1); - btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size); - btrfs_start_workers(&fs_info->endio_meta_workers, - fs_info->thread_pool_size); - btrfs_start_workers(&fs_info->endio_meta_write_workers, - fs_info->thread_pool_size); - btrfs_start_workers(&fs_info->endio_write_workers, - fs_info->thread_pool_size); + btrfs_start_workers(&fs_info->endio_workers, 1); + btrfs_start_workers(&fs_info->endio_meta_workers, 1); + btrfs_start_workers(&fs_info->endio_meta_write_workers, 1); + btrfs_start_workers(&fs_info->endio_write_workers, 1); fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, @@ -1918,6 +1966,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, } } + ret = btrfs_find_orphan_roots(tree_root); + BUG_ON(ret); + if (!(sb->s_flags & MS_RDONLY)) { ret = btrfs_recover_relocation(tree_root); BUG_ON(ret); @@ -1977,6 +2028,8 @@ fail_iput: btrfs_mapping_tree_free(&fs_info->mapping_tree); fail_bdi: bdi_destroy(&fs_info->bdi); +fail_srcu: + cleanup_srcu_struct(&fs_info->subvol_srcu); fail: kfree(extent_root); kfree(tree_root); @@ -2236,20 +2289,29 @@ int write_ctree_super(struct btrfs_trans_handle *trans, int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) { - WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); + spin_lock(&fs_info->fs_roots_radix_lock); radix_tree_delete(&fs_info->fs_roots_radix, (unsigned long)root->root_key.objectid); + spin_unlock(&fs_info->fs_roots_radix_lock); + + if (btrfs_root_refs(&root->root_item) == 0) + synchronize_srcu(&fs_info->subvol_srcu); + + free_fs_root(root); + return 0; +} + +static void free_fs_root(struct btrfs_root *root) +{ + WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); if (root->anon_super.s_dev) { down_write(&root->anon_super.s_umount); kill_anon_super(&root->anon_super); } - if (root->node) - free_extent_buffer(root->node); - if (root->commit_root) - free_extent_buffer(root->commit_root); + free_extent_buffer(root->node); + free_extent_buffer(root->commit_root); kfree(root->name); kfree(root); - return 0; } static int del_fs_roots(struct btrfs_fs_info *fs_info) @@ -2258,6 +2320,20 @@ static int del_fs_roots(struct btrfs_fs_info *fs_info) struct btrfs_root *gang[8]; int i; + while (!list_empty(&fs_info->dead_roots)) { + gang[0] = list_entry(fs_info->dead_roots.next, + struct btrfs_root, root_list); + list_del(&gang[0]->root_list); + + if (gang[0]->in_radix) { + btrfs_free_fs_root(fs_info, gang[0]); + } else { + free_extent_buffer(gang[0]->node); + free_extent_buffer(gang[0]->commit_root); + kfree(gang[0]); + } + } + while (1) { ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix, (void **)gang, 0, @@ -2287,9 +2363,6 @@ int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info) root_objectid = gang[ret - 1]->root_key.objectid + 1; for (i = 0; i < ret; i++) { root_objectid = gang[i]->root_key.objectid; - ret = btrfs_find_dead_roots(fs_info->tree_root, - root_objectid); - BUG_ON(ret); btrfs_orphan_cleanup(gang[i]); } root_objectid++; @@ -2359,7 +2432,6 @@ int close_ctree(struct btrfs_root *root) free_extent_buffer(root->fs_info->csum_root->commit_root); btrfs_free_block_groups(root->fs_info); - btrfs_free_pinned_extents(root->fs_info); del_fs_roots(fs_info); @@ -2378,6 +2450,7 @@ int close_ctree(struct btrfs_root *root) btrfs_mapping_tree_free(&fs_info->mapping_tree); bdi_destroy(&fs_info->bdi); + cleanup_srcu_struct(&fs_info->subvol_srcu); kfree(fs_info->extent_root); kfree(fs_info->tree_root); diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index 9596b40caa4..ba5c3fd5ab8 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c @@ -28,7 +28,7 @@ static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len, len = BTRFS_FID_SIZE_NON_CONNECTABLE; type = FILEID_BTRFS_WITHOUT_PARENT; - fid->objectid = BTRFS_I(inode)->location.objectid; + fid->objectid = inode->i_ino; fid->root_objectid = BTRFS_I(inode)->root->objectid; fid->gen = inode->i_generation; @@ -60,34 +60,61 @@ static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len, } static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, - u64 root_objectid, u32 generation) + u64 root_objectid, u32 generation, + int check_generation) { + struct btrfs_fs_info *fs_info = btrfs_sb(sb)->fs_info; struct btrfs_root *root; + struct dentry *dentry; struct inode *inode; struct btrfs_key key; + int index; + int err = 0; + + if (objectid < BTRFS_FIRST_FREE_OBJECTID) + return ERR_PTR(-ESTALE); key.objectid = root_objectid; btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); key.offset = (u64)-1; - root = btrfs_read_fs_root_no_name(btrfs_sb(sb)->fs_info, &key); - if (IS_ERR(root)) - return ERR_CAST(root); + index = srcu_read_lock(&fs_info->subvol_srcu); + + root = btrfs_read_fs_root_no_name(fs_info, &key); + if (IS_ERR(root)) { + err = PTR_ERR(root); + goto fail; + } + + if (btrfs_root_refs(&root->root_item) == 0) { + err = -ENOENT; + goto fail; + } key.objectid = objectid; btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); key.offset = 0; inode = btrfs_iget(sb, &key, root); - if (IS_ERR(inode)) - return (void *)inode; + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto fail; + } + + srcu_read_unlock(&fs_info->subvol_srcu, index); - if (generation != inode->i_generation) { + if (check_generation && generation != inode->i_generation) { iput(inode); return ERR_PTR(-ESTALE); } - return d_obtain_alias(inode); + dentry = d_obtain_alias(inode); + if (!IS_ERR(dentry)) + dentry->d_op = &btrfs_dentry_operations; + return dentry; +fail: + srcu_read_unlock(&fs_info->subvol_srcu, index); + return ERR_PTR(err); } static struct dentry *btrfs_fh_to_parent(struct super_block *sb, struct fid *fh, @@ -111,7 +138,7 @@ static struct dentry *btrfs_fh_to_parent(struct super_block *sb, struct fid *fh, objectid = fid->parent_objectid; generation = fid->parent_gen; - return btrfs_get_dentry(sb, objectid, root_objectid, generation); + return btrfs_get_dentry(sb, objectid, root_objectid, generation, 1); } static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh, @@ -133,66 +160,76 @@ static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh, root_objectid = fid->root_objectid; generation = fid->gen; - return btrfs_get_dentry(sb, objectid, root_objectid, generation); + return btrfs_get_dentry(sb, objectid, root_objectid, generation, 1); } static struct dentry *btrfs_get_parent(struct dentry *child) { struct inode *dir = child->d_inode; + static struct dentry *dentry; struct btrfs_root *root = BTRFS_I(dir)->root; - struct btrfs_key key; struct btrfs_path *path; struct extent_buffer *leaf; - int slot; - u64 objectid; + struct btrfs_root_ref *ref; + struct btrfs_key key; + struct btrfs_key found_key; int ret; path = btrfs_alloc_path(); - key.objectid = dir->i_ino; - btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY); - key.offset = (u64)-1; + if (dir->i_ino == BTRFS_FIRST_FREE_OBJECTID) { + key.objectid = root->root_key.objectid; + key.type = BTRFS_ROOT_BACKREF_KEY; + key.offset = (u64)-1; + root = root->fs_info->tree_root; + } else { + key.objectid = dir->i_ino; + key.type = BTRFS_INODE_REF_KEY; + key.offset = (u64)-1; + } ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (ret < 0) { - /* Error */ - btrfs_free_path(path); - return ERR_PTR(ret); + if (ret < 0) + goto fail; + + BUG_ON(ret == 0); + if (path->slots[0] == 0) { + ret = -ENOENT; + goto fail; } + + path->slots[0]--; leaf = path->nodes[0]; - slot = path->slots[0]; - if (ret) { - /* btrfs_search_slot() returns the slot where we'd want to - insert a backref for parent inode #0xFFFFFFFFFFFFFFFF. - The _real_ backref, telling us what the parent inode - _actually_ is, will be in the slot _before_ the one - that btrfs_search_slot() returns. */ - if (!slot) { - /* Unless there is _no_ key in the tree before... */ - btrfs_free_path(path); - return ERR_PTR(-EIO); - } - slot--; + + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + if (found_key.objectid != key.objectid || found_key.type != key.type) { + ret = -ENOENT; + goto fail; } - btrfs_item_key_to_cpu(leaf, &key, slot); + if (found_key.type == BTRFS_ROOT_BACKREF_KEY) { + ref = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_root_ref); + key.objectid = btrfs_root_ref_dirid(leaf, ref); + } else { + key.objectid = found_key.offset; + } btrfs_free_path(path); - if (key.objectid != dir->i_ino || key.type != BTRFS_INODE_REF_KEY) - return ERR_PTR(-EINVAL); - - objectid = key.offset; - - /* If we are already at the root of a subvol, return the real root */ - if (objectid == dir->i_ino) - return dget(dir->i_sb->s_root); + if (found_key.type == BTRFS_ROOT_BACKREF_KEY) { + return btrfs_get_dentry(root->fs_info->sb, key.objectid, + found_key.offset, 0, 0); + } - /* Build a new key for the inode item */ - key.objectid = objectid; - btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); + key.type = BTRFS_INODE_ITEM_KEY; key.offset = 0; - - return d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root)); + dentry = d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root)); + if (!IS_ERR(dentry)) + dentry->d_op = &btrfs_dentry_operations; + return dentry; +fail: + btrfs_free_path(path); + return ERR_PTR(ret); } const struct export_operations btrfs_export_ops = { diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 535f85ba104..993f93ff7ba 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -32,12 +32,12 @@ #include "locking.h" #include "free-space-cache.h" -static int update_reserved_extents(struct btrfs_root *root, - u64 bytenr, u64 num, int reserve); static int update_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, int alloc, int mark_free); +static int update_reserved_extents(struct btrfs_block_group_cache *cache, + u64 num_bytes, int reserve); static int __btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 parent, @@ -57,10 +57,17 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, u64 parent, u64 root_objectid, u64 flags, struct btrfs_disk_key *key, int level, struct btrfs_key *ins); - static int do_chunk_alloc(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, u64 alloc_bytes, u64 flags, int force); +static int pin_down_bytes(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + u64 bytenr, u64 num_bytes, + int is_data, int reserved, + struct extent_buffer **must_clean); +static int find_next_key(struct btrfs_path *path, int level, + struct btrfs_key *key); static noinline int block_group_cache_done(struct btrfs_block_group_cache *cache) @@ -153,34 +160,34 @@ block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr, return ret; } -/* - * We always set EXTENT_LOCKED for the super mirror extents so we don't - * overwrite them, so those bits need to be unset. Also, if we are unmounting - * with pinned extents still sitting there because we had a block group caching, - * we need to clear those now, since we are done. - */ -void btrfs_free_pinned_extents(struct btrfs_fs_info *info) +static int add_excluded_extent(struct btrfs_root *root, + u64 start, u64 num_bytes) { - u64 start, end, last = 0; - int ret; + u64 end = start + num_bytes - 1; + set_extent_bits(&root->fs_info->freed_extents[0], + start, end, EXTENT_UPTODATE, GFP_NOFS); + set_extent_bits(&root->fs_info->freed_extents[1], + start, end, EXTENT_UPTODATE, GFP_NOFS); + return 0; +} - while (1) { - ret = find_first_extent_bit(&info->pinned_extents, last, - &start, &end, - EXTENT_LOCKED|EXTENT_DIRTY); - if (ret) - break; +static void free_excluded_extents(struct btrfs_root *root, + struct btrfs_block_group_cache *cache) +{ + u64 start, end; - clear_extent_bits(&info->pinned_extents, start, end, - EXTENT_LOCKED|EXTENT_DIRTY, GFP_NOFS); - last = end+1; - } + start = cache->key.objectid; + end = start + cache->key.offset - 1; + + clear_extent_bits(&root->fs_info->freed_extents[0], + start, end, EXTENT_UPTODATE, GFP_NOFS); + clear_extent_bits(&root->fs_info->freed_extents[1], + start, end, EXTENT_UPTODATE, GFP_NOFS); } -static int remove_sb_from_cache(struct btrfs_root *root, - struct btrfs_block_group_cache *cache) +static int exclude_super_stripes(struct btrfs_root *root, + struct btrfs_block_group_cache *cache) { - struct btrfs_fs_info *fs_info = root->fs_info; u64 bytenr; u64 *logical; int stripe_len; @@ -192,17 +199,42 @@ static int remove_sb_from_cache(struct btrfs_root *root, cache->key.objectid, bytenr, 0, &logical, &nr, &stripe_len); BUG_ON(ret); + while (nr--) { - try_lock_extent(&fs_info->pinned_extents, - logical[nr], - logical[nr] + stripe_len - 1, GFP_NOFS); + cache->bytes_super += stripe_len; + ret = add_excluded_extent(root, logical[nr], + stripe_len); + BUG_ON(ret); } + kfree(logical); } - return 0; } +static struct btrfs_caching_control * +get_caching_control(struct btrfs_block_group_cache *cache) +{ + struct btrfs_caching_control *ctl; + + spin_lock(&cache->lock); + if (cache->cached != BTRFS_CACHE_STARTED) { + spin_unlock(&cache->lock); + return NULL; + } + + ctl = cache->caching_ctl; + atomic_inc(&ctl->count); + spin_unlock(&cache->lock); + return ctl; +} + +static void put_caching_control(struct btrfs_caching_control *ctl) +{ + if (atomic_dec_and_test(&ctl->count)) + kfree(ctl); +} + /* * this is only called by cache_block_group, since we could have freed extents * we need to check the pinned_extents for any extents that can't be used yet @@ -215,9 +247,9 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group, int ret; while (start < end) { - ret = find_first_extent_bit(&info->pinned_extents, start, + ret = find_first_extent_bit(info->pinned_extents, start, &extent_start, &extent_end, - EXTENT_DIRTY|EXTENT_LOCKED); + EXTENT_DIRTY | EXTENT_UPTODATE); if (ret) break; @@ -249,22 +281,27 @@ static int caching_kthread(void *data) { struct btrfs_block_group_cache *block_group = data; struct btrfs_fs_info *fs_info = block_group->fs_info; - u64 last = 0; + struct btrfs_caching_control *caching_ctl = block_group->caching_ctl; + struct btrfs_root *extent_root = fs_info->extent_root; struct btrfs_path *path; - int ret = 0; - struct btrfs_key key; struct extent_buffer *leaf; - int slot; + struct btrfs_key key; u64 total_found = 0; - - BUG_ON(!fs_info); + u64 last = 0; + u32 nritems; + int ret = 0; path = btrfs_alloc_path(); if (!path) return -ENOMEM; - atomic_inc(&block_group->space_info->caching_threads); + exclude_super_stripes(extent_root, block_group); + spin_lock(&block_group->space_info->lock); + block_group->space_info->bytes_super += block_group->bytes_super; + spin_unlock(&block_group->space_info->lock); + last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); + /* * We don't want to deadlock with somebody trying to allocate a new * extent for the extent root while also trying to search the extent @@ -277,74 +314,64 @@ static int caching_kthread(void *data) key.objectid = last; key.offset = 0; - btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); + key.type = BTRFS_EXTENT_ITEM_KEY; again: + mutex_lock(&caching_ctl->mutex); /* need to make sure the commit_root doesn't disappear */ down_read(&fs_info->extent_commit_sem); - ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0); + ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); if (ret < 0) goto err; + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); + while (1) { smp_mb(); - if (block_group->fs_info->closing > 1) { + if (fs_info->closing > 1) { last = (u64)-1; break; } - leaf = path->nodes[0]; - slot = path->slots[0]; - if (slot >= btrfs_header_nritems(leaf)) { - ret = btrfs_next_leaf(fs_info->extent_root, path); - if (ret < 0) - goto err; - else if (ret) + if (path->slots[0] < nritems) { + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + } else { + ret = find_next_key(path, 0, &key); + if (ret) break; - if (need_resched() || - btrfs_transaction_in_commit(fs_info)) { - leaf = path->nodes[0]; - - /* this shouldn't happen, but if the - * leaf is empty just move on. - */ - if (btrfs_header_nritems(leaf) == 0) - break; - /* - * we need to copy the key out so that - * we are sure the next search advances - * us forward in the btree. - */ - btrfs_item_key_to_cpu(leaf, &key, 0); - btrfs_release_path(fs_info->extent_root, path); - up_read(&fs_info->extent_commit_sem); + caching_ctl->progress = last; + btrfs_release_path(extent_root, path); + up_read(&fs_info->extent_commit_sem); + mutex_unlock(&caching_ctl->mutex); + if (btrfs_transaction_in_commit(fs_info)) schedule_timeout(1); - goto again; - } + else + cond_resched(); + goto again; + } + if (key.objectid < block_group->key.objectid) { + path->slots[0]++; continue; } - btrfs_item_key_to_cpu(leaf, &key, slot); - if (key.objectid < block_group->key.objectid) - goto next; if (key.objectid >= block_group->key.objectid + block_group->key.offset) break; - if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) { + if (key.type == BTRFS_EXTENT_ITEM_KEY) { total_found += add_new_free_space(block_group, fs_info, last, key.objectid); last = key.objectid + key.offset; - } - if (total_found > (1024 * 1024 * 2)) { - total_found = 0; - wake_up(&block_group->caching_q); + if (total_found > (1024 * 1024 * 2)) { + total_found = 0; + wake_up(&caching_ctl->wait); + } } -next: path->slots[0]++; } ret = 0; @@ -352,33 +379,65 @@ next: total_found += add_new_free_space(block_group, fs_info, last, block_group->key.objectid + block_group->key.offset); + caching_ctl->progress = (u64)-1; spin_lock(&block_group->lock); + block_group->caching_ctl = NULL; block_group->cached = BTRFS_CACHE_FINISHED; spin_unlock(&block_group->lock); err: btrfs_free_path(path); up_read(&fs_info->extent_commit_sem); - atomic_dec(&block_group->space_info->caching_threads); - wake_up(&block_group->caching_q); + free_excluded_extents(extent_root, block_group); + + mutex_unlock(&caching_ctl->mutex); + wake_up(&caching_ctl->wait); + + put_caching_control(caching_ctl); + atomic_dec(&block_group->space_info->caching_threads); return 0; } static int cache_block_group(struct btrfs_block_group_cache *cache) { + struct btrfs_fs_info *fs_info = cache->fs_info; + struct btrfs_caching_control *caching_ctl; struct task_struct *tsk; int ret = 0; + smp_mb(); + if (cache->cached != BTRFS_CACHE_NO) + return 0; + + caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL); + BUG_ON(!caching_ctl); + + INIT_LIST_HEAD(&caching_ctl->list); + mutex_init(&caching_ctl->mutex); + init_waitqueue_head(&caching_ctl->wait); + caching_ctl->block_group = cache; + caching_ctl->progress = cache->key.objectid; + /* one for caching kthread, one for caching block group list */ + atomic_set(&caching_ctl->count, 2); + spin_lock(&cache->lock); if (cache->cached != BTRFS_CACHE_NO) { spin_unlock(&cache->lock); - return ret; + kfree(caching_ctl); + return 0; } + cache->caching_ctl = caching_ctl; cache->cached = BTRFS_CACHE_STARTED; spin_unlock(&cache->lock); + down_write(&fs_info->extent_commit_sem); + list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups); + up_write(&fs_info->extent_commit_sem); + + atomic_inc(&cache->space_info->caching_threads); + tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n", cache->key.objectid); if (IS_ERR(tsk)) { @@ -1657,7 +1716,6 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans, parent, ref_root, flags, ref->objectid, ref->offset, &ins, node->ref_mod); - update_reserved_extents(root, ins.objectid, ins.offset, 0); } else if (node->action == BTRFS_ADD_DELAYED_REF) { ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, node->num_bytes, parent, @@ -1783,7 +1841,6 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, extent_op->flags_to_set, &extent_op->key, ref->level, &ins); - update_reserved_extents(root, ins.objectid, ins.offset, 0); } else if (node->action == BTRFS_ADD_DELAYED_REF) { ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, node->num_bytes, parent, ref_root, @@ -1818,16 +1875,32 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans, BUG_ON(extent_op); head = btrfs_delayed_node_to_head(node); if (insert_reserved) { + int mark_free = 0; + struct extent_buffer *must_clean = NULL; + + ret = pin_down_bytes(trans, root, NULL, + node->bytenr, node->num_bytes, + head->is_data, 1, &must_clean); + if (ret > 0) + mark_free = 1; + + if (must_clean) { + clean_tree_block(NULL, root, must_clean); + btrfs_tree_unlock(must_clean); + free_extent_buffer(must_clean); + } if (head->is_data) { ret = btrfs_del_csums(trans, root, node->bytenr, node->num_bytes); BUG_ON(ret); } - btrfs_update_pinned_extents(root, node->bytenr, - node->num_bytes, 1); - update_reserved_extents(root, node->bytenr, - node->num_bytes, 0); + if (mark_free) { + ret = btrfs_free_reserved_extent(root, + node->bytenr, + node->num_bytes); + BUG_ON(ret); + } } mutex_unlock(&head->mutex); return 0; @@ -2706,6 +2779,8 @@ int btrfs_check_metadata_free_space(struct btrfs_root *root) /* get the space info for where the metadata will live */ alloc_target = btrfs_get_alloc_profile(root, 0); meta_sinfo = __find_space_info(info, alloc_target); + if (!meta_sinfo) + goto alloc; again: spin_lock(&meta_sinfo->lock); @@ -2717,12 +2792,13 @@ again: do_div(thresh, 100); if (meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + - meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly > thresh) { + meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + + meta_sinfo->bytes_super > thresh) { struct btrfs_trans_handle *trans; if (!meta_sinfo->full) { meta_sinfo->force_alloc = 1; spin_unlock(&meta_sinfo->lock); - +alloc: trans = btrfs_start_transaction(root, 1); if (!trans) return -ENOMEM; @@ -2730,6 +2806,10 @@ again: ret = do_chunk_alloc(trans, root->fs_info->extent_root, 2 * 1024 * 1024, alloc_target, 0); btrfs_end_transaction(trans, root); + if (!meta_sinfo) { + meta_sinfo = __find_space_info(info, + alloc_target); + } goto again; } spin_unlock(&meta_sinfo->lock); @@ -2765,13 +2845,16 @@ int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); data_sinfo = BTRFS_I(inode)->space_info; + if (!data_sinfo) + goto alloc; + again: /* make sure we have enough space to handle the data first */ spin_lock(&data_sinfo->lock); if (data_sinfo->total_bytes - data_sinfo->bytes_used - data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved - data_sinfo->bytes_pinned - data_sinfo->bytes_readonly - - data_sinfo->bytes_may_use < bytes) { + data_sinfo->bytes_may_use - data_sinfo->bytes_super < bytes) { struct btrfs_trans_handle *trans; /* @@ -2783,7 +2866,7 @@ again: data_sinfo->force_alloc = 1; spin_unlock(&data_sinfo->lock); - +alloc: alloc_target = btrfs_get_alloc_profile(root, 1); trans = btrfs_start_transaction(root, 1); if (!trans) @@ -2795,6 +2878,11 @@ again: btrfs_end_transaction(trans, root); if (ret) return ret; + + if (!data_sinfo) { + btrfs_set_inode_space_info(root, inode); + data_sinfo = BTRFS_I(inode)->space_info; + } goto again; } spin_unlock(&data_sinfo->lock); @@ -3009,10 +3097,12 @@ static int update_block_group(struct btrfs_trans_handle *trans, num_bytes = min(total, cache->key.offset - byte_in_group); if (alloc) { old_val += num_bytes; + btrfs_set_block_group_used(&cache->item, old_val); + cache->reserved -= num_bytes; cache->space_info->bytes_used += num_bytes; + cache->space_info->bytes_reserved -= num_bytes; if (cache->ro) cache->space_info->bytes_readonly -= num_bytes; - btrfs_set_block_group_used(&cache->item, old_val); spin_unlock(&cache->lock); spin_unlock(&cache->space_info->lock); } else { @@ -3057,127 +3147,136 @@ static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) return bytenr; } -int btrfs_update_pinned_extents(struct btrfs_root *root, - u64 bytenr, u64 num, int pin) +/* + * this function must be called within transaction + */ +int btrfs_pin_extent(struct btrfs_root *root, + u64 bytenr, u64 num_bytes, int reserved) { - u64 len; - struct btrfs_block_group_cache *cache; struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_block_group_cache *cache; - if (pin) - set_extent_dirty(&fs_info->pinned_extents, - bytenr, bytenr + num - 1, GFP_NOFS); - - while (num > 0) { - cache = btrfs_lookup_block_group(fs_info, bytenr); - BUG_ON(!cache); - len = min(num, cache->key.offset - - (bytenr - cache->key.objectid)); - if (pin) { - spin_lock(&cache->space_info->lock); - spin_lock(&cache->lock); - cache->pinned += len; - cache->space_info->bytes_pinned += len; - spin_unlock(&cache->lock); - spin_unlock(&cache->space_info->lock); - fs_info->total_pinned += len; - } else { - int unpin = 0; + cache = btrfs_lookup_block_group(fs_info, bytenr); + BUG_ON(!cache); - /* - * in order to not race with the block group caching, we - * only want to unpin the extent if we are cached. If - * we aren't cached, we want to start async caching this - * block group so we can free the extent the next time - * around. - */ - spin_lock(&cache->space_info->lock); - spin_lock(&cache->lock); - unpin = (cache->cached == BTRFS_CACHE_FINISHED); - if (likely(unpin)) { - cache->pinned -= len; - cache->space_info->bytes_pinned -= len; - fs_info->total_pinned -= len; - } - spin_unlock(&cache->lock); - spin_unlock(&cache->space_info->lock); + spin_lock(&cache->space_info->lock); + spin_lock(&cache->lock); + cache->pinned += num_bytes; + cache->space_info->bytes_pinned += num_bytes; + if (reserved) { + cache->reserved -= num_bytes; + cache->space_info->bytes_reserved -= num_bytes; + } + spin_unlock(&cache->lock); + spin_unlock(&cache->space_info->lock); - if (likely(unpin)) - clear_extent_dirty(&fs_info->pinned_extents, - bytenr, bytenr + len -1, - GFP_NOFS); - else - cache_block_group(cache); + btrfs_put_block_group(cache); - if (unpin) - btrfs_add_free_space(cache, bytenr, len); - } - btrfs_put_block_group(cache); - bytenr += len; - num -= len; + set_extent_dirty(fs_info->pinned_extents, + bytenr, bytenr + num_bytes - 1, GFP_NOFS); + return 0; +} + +static int update_reserved_extents(struct btrfs_block_group_cache *cache, + u64 num_bytes, int reserve) +{ + spin_lock(&cache->space_info->lock); + spin_lock(&cache->lock); + if (reserve) { + cache->reserved += num_bytes; + cache->space_info->bytes_reserved += num_bytes; + } else { + cache->reserved -= num_bytes; + cache->space_info->bytes_reserved -= num_bytes; } + spin_unlock(&cache->lock); + spin_unlock(&cache->space_info->lock); return 0; } -static int update_reserved_extents(struct btrfs_root *root, - u64 bytenr, u64 num, int reserve) +int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, + struct btrfs_root *root) { - u64 len; - struct btrfs_block_group_cache *cache; struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_caching_control *next; + struct btrfs_caching_control *caching_ctl; + struct btrfs_block_group_cache *cache; - while (num > 0) { - cache = btrfs_lookup_block_group(fs_info, bytenr); - BUG_ON(!cache); - len = min(num, cache->key.offset - - (bytenr - cache->key.objectid)); + down_write(&fs_info->extent_commit_sem); - spin_lock(&cache->space_info->lock); - spin_lock(&cache->lock); - if (reserve) { - cache->reserved += len; - cache->space_info->bytes_reserved += len; + list_for_each_entry_safe(caching_ctl, next, + &fs_info->caching_block_groups, list) { + cache = caching_ctl->block_group; + if (block_group_cache_done(cache)) { + cache->last_byte_to_unpin = (u64)-1; + list_del_init(&caching_ctl->list); + put_caching_control(caching_ctl); } else { - cache->reserved -= len; - cache->space_info->bytes_reserved -= len; + cache->last_byte_to_unpin = caching_ctl->progress; } - spin_unlock(&cache->lock); - spin_unlock(&cache->space_info->lock); - btrfs_put_block_group(cache); - bytenr += len; - num -= len; } + + if (fs_info->pinned_extents == &fs_info->freed_extents[0]) + fs_info->pinned_extents = &fs_info->freed_extents[1]; + else + fs_info->pinned_extents = &fs_info->freed_extents[0]; + + up_write(&fs_info->extent_commit_sem); return 0; } -int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy) +static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) { - u64 last = 0; - u64 start; - u64 end; - struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents; - int ret; + struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_block_group_cache *cache = NULL; + u64 len; - while (1) { - ret = find_first_extent_bit(pinned_extents, last, - &start, &end, EXTENT_DIRTY); - if (ret) - break; + while (start <= end) { + if (!cache || + start >= cache->key.objectid + cache->key.offset) { + if (cache) + btrfs_put_block_group(cache); + cache = btrfs_lookup_block_group(fs_info, start); + BUG_ON(!cache); + } + + len = cache->key.objectid + cache->key.offset - start; + len = min(len, end + 1 - start); - set_extent_dirty(copy, start, end, GFP_NOFS); - last = end + 1; + if (start < cache->last_byte_to_unpin) { + len = min(len, cache->last_byte_to_unpin - start); + btrfs_add_free_space(cache, start, len); + } + + spin_lock(&cache->space_info->lock); + spin_lock(&cache->lock); + cache->pinned -= len; + cache->space_info->bytes_pinned -= len; + spin_unlock(&cache->lock); + spin_unlock(&cache->space_info->lock); + + start += len; } + + if (cache) + btrfs_put_block_group(cache); return 0; } int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct extent_io_tree *unpin) + struct btrfs_root *root) { + struct btrfs_fs_info *fs_info = root->fs_info; + struct extent_io_tree *unpin; u64 start; u64 end; int ret; + if (fs_info->pinned_extents == &fs_info->freed_extents[0]) + unpin = &fs_info->freed_extents[1]; + else + unpin = &fs_info->freed_extents[0]; + while (1) { ret = find_first_extent_bit(unpin, 0, &start, &end, EXTENT_DIRTY); @@ -3186,10 +3285,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, ret = btrfs_discard_extent(root, start, end + 1 - start); - /* unlocks the pinned mutex */ - btrfs_update_pinned_extents(root, start, end + 1 - start, 0); clear_extent_dirty(unpin, start, end, GFP_NOFS); - + unpin_extent_range(root, start, end); cond_resched(); } @@ -3199,7 +3296,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, static int pin_down_bytes(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, - u64 bytenr, u64 num_bytes, int is_data, + u64 bytenr, u64 num_bytes, + int is_data, int reserved, struct extent_buffer **must_clean) { int err = 0; @@ -3231,15 +3329,15 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans, } free_extent_buffer(buf); pinit: - btrfs_set_path_blocking(path); + if (path) + btrfs_set_path_blocking(path); /* unlocks the pinned mutex */ - btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); + btrfs_pin_extent(root, bytenr, num_bytes, reserved); BUG_ON(err < 0); return 0; } - static int __btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 parent, @@ -3413,7 +3511,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, } ret = pin_down_bytes(trans, root, path, bytenr, - num_bytes, is_data, &must_clean); + num_bytes, is_data, 0, &must_clean); if (ret > 0) mark_free = 1; BUG_ON(ret < 0); @@ -3544,8 +3642,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, if (root_objectid == BTRFS_TREE_LOG_OBJECTID) { WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID); /* unlocks the pinned mutex */ - btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); - update_reserved_extents(root, bytenr, num_bytes, 0); + btrfs_pin_extent(root, bytenr, num_bytes, 1); ret = 0; } else if (owner < BTRFS_FIRST_FREE_OBJECTID) { ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes, @@ -3585,19 +3682,33 @@ static noinline int wait_block_group_cache_progress(struct btrfs_block_group_cache *cache, u64 num_bytes) { + struct btrfs_caching_control *caching_ctl; DEFINE_WAIT(wait); - prepare_to_wait(&cache->caching_q, &wait, TASK_UNINTERRUPTIBLE); - - if (block_group_cache_done(cache)) { - finish_wait(&cache->caching_q, &wait); + caching_ctl = get_caching_control(cache); + if (!caching_ctl) return 0; - } - schedule(); - finish_wait(&cache->caching_q, &wait); - wait_event(cache->caching_q, block_group_cache_done(cache) || + wait_event(caching_ctl->wait, block_group_cache_done(cache) || (cache->free_space >= num_bytes)); + + put_caching_control(caching_ctl); + return 0; +} + +static noinline int +wait_block_group_cache_done(struct btrfs_block_group_cache *cache) +{ + struct btrfs_caching_control *caching_ctl; + DEFINE_WAIT(wait); + + caching_ctl = get_caching_control(cache); + if (!caching_ctl) + return 0; + + wait_event(caching_ctl->wait, block_group_cache_done(cache)); + + put_caching_control(caching_ctl); return 0; } @@ -3635,6 +3746,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, int last_ptr_loop = 0; int loop = 0; bool found_uncached_bg = false; + bool failed_cluster_refill = false; WARN_ON(num_bytes < root->sectorsize); btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); @@ -3732,7 +3844,16 @@ have_block_group: if (unlikely(block_group->ro)) goto loop; - if (last_ptr) { + /* + * Ok we want to try and use the cluster allocator, so lets look + * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will + * have tried the cluster allocator plenty of times at this + * point and not have found anything, so we are likely way too + * fragmented for the clustering stuff to find anything, so lets + * just skip it and let the allocator find whatever block it can + * find + */ + if (last_ptr && loop < LOOP_NO_EMPTY_SIZE) { /* * the refill lock keeps out other * people trying to start a new cluster @@ -3807,9 +3928,11 @@ refill_cluster: spin_unlock(&last_ptr->refill_lock); goto checks; } - } else if (!cached && loop > LOOP_CACHING_NOWAIT) { + } else if (!cached && loop > LOOP_CACHING_NOWAIT + && !failed_cluster_refill) { spin_unlock(&last_ptr->refill_lock); + failed_cluster_refill = true; wait_block_group_cache_progress(block_group, num_bytes + empty_cluster + empty_size); goto have_block_group; @@ -3821,13 +3944,9 @@ refill_cluster: * cluster. Free the cluster we've been trying * to use, and go to the next block group */ - if (loop < LOOP_NO_EMPTY_SIZE) { - btrfs_return_cluster_to_free_space(NULL, - last_ptr); - spin_unlock(&last_ptr->refill_lock); - goto loop; - } + btrfs_return_cluster_to_free_space(NULL, last_ptr); spin_unlock(&last_ptr->refill_lock); + goto loop; } offset = btrfs_find_space_for_alloc(block_group, search_start, @@ -3881,9 +4000,12 @@ checks: search_start - offset); BUG_ON(offset > search_start); + update_reserved_extents(block_group, num_bytes, 1); + /* we are all good, lets return */ break; loop: + failed_cluster_refill = false; btrfs_put_block_group(block_group); } up_read(&space_info->groups_sem); @@ -3973,12 +4095,12 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes) up_read(&info->groups_sem); } -static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 num_bytes, u64 min_alloc_size, - u64 empty_size, u64 hint_byte, - u64 search_end, struct btrfs_key *ins, - u64 data) +int btrfs_reserve_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 num_bytes, u64 min_alloc_size, + u64 empty_size, u64 hint_byte, + u64 search_end, struct btrfs_key *ins, + u64 data) { int ret; u64 search_start = 0; @@ -4044,25 +4166,8 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) ret = btrfs_discard_extent(root, start, len); btrfs_add_free_space(cache, start, len); + update_reserved_extents(cache, len, 0); btrfs_put_block_group(cache); - update_reserved_extents(root, start, len, 0); - - return ret; -} - -int btrfs_reserve_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 num_bytes, u64 min_alloc_size, - u64 empty_size, u64 hint_byte, - u64 search_end, struct btrfs_key *ins, - u64 data) -{ - int ret; - ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size, - empty_size, hint_byte, search_end, ins, - data); - if (!ret) - update_reserved_extents(root, ins->objectid, ins->offset, 1); return ret; } @@ -4223,15 +4328,46 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, { int ret; struct btrfs_block_group_cache *block_group; + struct btrfs_caching_control *caching_ctl; + u64 start = ins->objectid; + u64 num_bytes = ins->offset; block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); cache_block_group(block_group); - wait_event(block_group->caching_q, - block_group_cache_done(block_group)); + caching_ctl = get_caching_control(block_group); - ret = btrfs_remove_free_space(block_group, ins->objectid, - ins->offset); - BUG_ON(ret); + if (!caching_ctl) { + BUG_ON(!block_group_cache_done(block_group)); + ret = btrfs_remove_free_space(block_group, start, num_bytes); + BUG_ON(ret); + } else { + mutex_lock(&caching_ctl->mutex); + + if (start >= caching_ctl->progress) { + ret = add_excluded_extent(root, start, num_bytes); + BUG_ON(ret); + } else if (start + num_bytes <= caching_ctl->progress) { + ret = btrfs_remove_free_space(block_group, + start, num_bytes); + BUG_ON(ret); + } else { + num_bytes = caching_ctl->progress - start; + ret = btrfs_remove_free_space(block_group, + start, num_bytes); + BUG_ON(ret); + + start = caching_ctl->progress; + num_bytes = ins->objectid + ins->offset - + caching_ctl->progress; + ret = add_excluded_extent(root, start, num_bytes); + BUG_ON(ret); + } + + mutex_unlock(&caching_ctl->mutex); + put_caching_control(caching_ctl); + } + + update_reserved_extents(block_group, ins->offset, 1); btrfs_put_block_group(block_group); ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, 0, owner, offset, ins, 1); @@ -4255,9 +4391,9 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans, int ret; u64 flags = 0; - ret = __btrfs_reserve_extent(trans, root, num_bytes, num_bytes, - empty_size, hint_byte, search_end, - ins, 0); + ret = btrfs_reserve_extent(trans, root, num_bytes, num_bytes, + empty_size, hint_byte, search_end, + ins, 0); if (ret) return ret; @@ -4268,7 +4404,6 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans, } else BUG_ON(parent > 0); - update_reserved_extents(root, ins->objectid, ins->offset, 1); if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { struct btrfs_delayed_extent_op *extent_op; extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); @@ -4347,452 +4482,99 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, return buf; } -#if 0 -int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct extent_buffer *leaf) -{ - u64 disk_bytenr; - u64 num_bytes; - struct btrfs_key key; - struct btrfs_file_extent_item *fi; - u32 nritems; - int i; - int ret; - - BUG_ON(!btrfs_is_leaf(leaf)); - nritems = btrfs_header_nritems(leaf); - - for (i = 0; i < nritems; i++) { - cond_resched(); - btrfs_item_key_to_cpu(leaf, &key, i); - - /* only extents have references, skip everything else */ - if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) - continue; - - fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); - - /* inline extents live in the btree, they don't have refs */ - if (btrfs_file_extent_type(leaf, fi) == - BTRFS_FILE_EXTENT_INLINE) - continue; - - disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); - - /* holes don't have refs */ - if (disk_bytenr == 0) - continue; - - num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); - ret = btrfs_free_extent(trans, root, disk_bytenr, num_bytes, - leaf->start, 0, key.objectid, 0); - BUG_ON(ret); - } - return 0; -} - -static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_leaf_ref *ref) -{ - int i; - int ret; - struct btrfs_extent_info *info; - struct refsort *sorted; - - if (ref->nritems == 0) - return 0; - - sorted = kmalloc(sizeof(*sorted) * ref->nritems, GFP_NOFS); - for (i = 0; i < ref->nritems; i++) { - sorted[i].bytenr = ref->extents[i].bytenr; - sorted[i].slot = i; - } - sort(sorted, ref->nritems, sizeof(struct refsort), refsort_cmp, NULL); - - /* - * the items in the ref were sorted when the ref was inserted - * into the ref cache, so this is already in order - */ - for (i = 0; i < ref->nritems; i++) { - info = ref->extents + sorted[i].slot; - ret = btrfs_free_extent(trans, root, info->bytenr, - info->num_bytes, ref->bytenr, - ref->owner, ref->generation, - info->objectid, 0); - - atomic_inc(&root->fs_info->throttle_gen); - wake_up(&root->fs_info->transaction_throttle); - cond_resched(); - - BUG_ON(ret); - info++; - } - - kfree(sorted); - return 0; -} - - -static int drop_snap_lookup_refcount(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 start, - u64 len, u32 *refs) -{ - int ret; - - ret = btrfs_lookup_extent_refs(trans, root, start, len, refs); - BUG_ON(ret); - -#if 0 /* some debugging code in case we see problems here */ - /* if the refs count is one, it won't get increased again. But - * if the ref count is > 1, someone may be decreasing it at - * the same time we are. - */ - if (*refs != 1) { - struct extent_buffer *eb = NULL; - eb = btrfs_find_create_tree_block(root, start, len); - if (eb) - btrfs_tree_lock(eb); - - mutex_lock(&root->fs_info->alloc_mutex); - ret = lookup_extent_ref(NULL, root, start, len, refs); - BUG_ON(ret); - mutex_unlock(&root->fs_info->alloc_mutex); - - if (eb) { - btrfs_tree_unlock(eb); - free_extent_buffer(eb); - } - if (*refs == 1) { - printk(KERN_ERR "btrfs block %llu went down to one " - "during drop_snap\n", (unsigned long long)start); - } - - } -#endif - - cond_resched(); - return ret; -} +struct walk_control { + u64 refs[BTRFS_MAX_LEVEL]; + u64 flags[BTRFS_MAX_LEVEL]; + struct btrfs_key update_progress; + int stage; + int level; + int shared_level; + int update_ref; + int keep_locks; + int reada_slot; + int reada_count; +}; +#define DROP_REFERENCE 1 +#define UPDATE_BACKREF 2 -/* - * this is used while deleting old snapshots, and it drops the refs - * on a whole subtree starting from a level 1 node. - * - * The idea is to sort all the leaf pointers, and then drop the - * ref on all the leaves in order. Most of the time the leaves - * will have ref cache entries, so no leaf IOs will be required to - * find the extents they have references on. - * - * For each leaf, any references it has are also dropped in order - * - * This ends up dropping the references in something close to optimal - * order for reading and modifying the extent allocation tree. - */ -static noinline int drop_level_one_refs(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path) +static noinline void reada_walk_down(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct walk_control *wc, + struct btrfs_path *path) { u64 bytenr; - u64 root_owner; - u64 root_gen; - struct extent_buffer *eb = path->nodes[1]; - struct extent_buffer *leaf; - struct btrfs_leaf_ref *ref; - struct refsort *sorted = NULL; - int nritems = btrfs_header_nritems(eb); + u64 generation; + u64 refs; + u64 last = 0; + u32 nritems; + u32 blocksize; + struct btrfs_key key; + struct extent_buffer *eb; int ret; - int i; - int refi = 0; - int slot = path->slots[1]; - u32 blocksize = btrfs_level_size(root, 0); - u32 refs; - - if (nritems == 0) - goto out; - - root_owner = btrfs_header_owner(eb); - root_gen = btrfs_header_generation(eb); - sorted = kmalloc(sizeof(*sorted) * nritems, GFP_NOFS); + int slot; + int nread = 0; - /* - * step one, sort all the leaf pointers so we don't scribble - * randomly into the extent allocation tree - */ - for (i = slot; i < nritems; i++) { - sorted[refi].bytenr = btrfs_node_blockptr(eb, i); - sorted[refi].slot = i; - refi++; + if (path->slots[wc->level] < wc->reada_slot) { + wc->reada_count = wc->reada_count * 2 / 3; + wc->reada_count = max(wc->reada_count, 2); + } else { + wc->reada_count = wc->reada_count * 3 / 2; + wc->reada_count = min_t(int, wc->reada_count, + BTRFS_NODEPTRS_PER_BLOCK(root)); } - /* - * nritems won't be zero, but if we're picking up drop_snapshot - * after a crash, slot might be > 0, so double check things - * just in case. - */ - if (refi == 0) - goto out; + eb = path->nodes[wc->level]; + nritems = btrfs_header_nritems(eb); + blocksize = btrfs_level_size(root, wc->level - 1); - sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL); + for (slot = path->slots[wc->level]; slot < nritems; slot++) { + if (nread >= wc->reada_count) + break; - /* - * the first loop frees everything the leaves point to - */ - for (i = 0; i < refi; i++) { - u64 ptr_gen; + cond_resched(); + bytenr = btrfs_node_blockptr(eb, slot); + generation = btrfs_node_ptr_generation(eb, slot); - bytenr = sorted[i].bytenr; + if (slot == path->slots[wc->level]) + goto reada; - /* - * check the reference count on this leaf. If it is > 1 - * we just decrement it below and don't update any - * of the refs the leaf points to. - */ - ret = drop_snap_lookup_refcount(trans, root, bytenr, - blocksize, &refs); - BUG_ON(ret); - if (refs != 1) + if (wc->stage == UPDATE_BACKREF && + generation <= root->root_key.offset) continue; - ptr_gen = btrfs_node_ptr_generation(eb, sorted[i].slot); - - /* - * the leaf only had one reference, which means the - * only thing pointing to this leaf is the snapshot - * we're deleting. It isn't possible for the reference - * count to increase again later - * - * The reference cache is checked for the leaf, - * and if found we'll be able to drop any refs held by - * the leaf without needing to read it in. - */ - ref = btrfs_lookup_leaf_ref(root, bytenr); - if (ref && ref->generation != ptr_gen) { - btrfs_free_leaf_ref(root, ref); - ref = NULL; - } - if (ref) { - ret = cache_drop_leaf_ref(trans, root, ref); - BUG_ON(ret); - btrfs_remove_leaf_ref(root, ref); - btrfs_free_leaf_ref(root, ref); - } else { - /* - * the leaf wasn't in the reference cache, so - * we have to read it. - */ - leaf = read_tree_block(root, bytenr, blocksize, - ptr_gen); - ret = btrfs_drop_leaf_ref(trans, root, leaf); + if (wc->stage == DROP_REFERENCE) { + ret = btrfs_lookup_extent_info(trans, root, + bytenr, blocksize, + &refs, NULL); BUG_ON(ret); - free_extent_buffer(leaf); - } - atomic_inc(&root->fs_info->throttle_gen); - wake_up(&root->fs_info->transaction_throttle); - cond_resched(); - } - - /* - * run through the loop again to free the refs on the leaves. - * This is faster than doing it in the loop above because - * the leaves are likely to be clustered together. We end up - * working in nice chunks on the extent allocation tree. - */ - for (i = 0; i < refi; i++) { - bytenr = sorted[i].bytenr; - ret = btrfs_free_extent(trans, root, bytenr, - blocksize, eb->start, - root_owner, root_gen, 0, 1); - BUG_ON(ret); - - atomic_inc(&root->fs_info->throttle_gen); - wake_up(&root->fs_info->transaction_throttle); - cond_resched(); - } -out: - kfree(sorted); - - /* - * update the path to show we've processed the entire level 1 - * node. This will get saved into the root's drop_snapshot_progress - * field so these drops are not repeated again if this transaction - * commits. - */ - path->slots[1] = nritems; - return 0; -} - -/* - * helper function for drop_snapshot, this walks down the tree dropping ref - * counts as it goes. - */ -static noinline int walk_down_tree(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, int *level) -{ - u64 root_owner; - u64 root_gen; - u64 bytenr; - u64 ptr_gen; - struct extent_buffer *next; - struct extent_buffer *cur; - struct extent_buffer *parent; - u32 blocksize; - int ret; - u32 refs; - - WARN_ON(*level < 0); - WARN_ON(*level >= BTRFS_MAX_LEVEL); - ret = drop_snap_lookup_refcount(trans, root, path->nodes[*level]->start, - path->nodes[*level]->len, &refs); - BUG_ON(ret); - if (refs > 1) - goto out; - - /* - * walk down to the last node level and free all the leaves - */ - while (*level >= 0) { - WARN_ON(*level < 0); - WARN_ON(*level >= BTRFS_MAX_LEVEL); - cur = path->nodes[*level]; - - if (btrfs_header_level(cur) != *level) - WARN_ON(1); + BUG_ON(refs == 0); + if (refs == 1) + goto reada; - if (path->slots[*level] >= - btrfs_header_nritems(cur)) - break; - - /* the new code goes down to level 1 and does all the - * leaves pointed to that node in bulk. So, this check - * for level 0 will always be false. - * - * But, the disk format allows the drop_snapshot_progress - * field in the root to leave things in a state where - * a leaf will need cleaning up here. If someone crashes - * with the old code and then boots with the new code, - * we might find a leaf here. - */ - if (*level == 0) { - ret = btrfs_drop_leaf_ref(trans, root, cur); - BUG_ON(ret); - break; + if (!wc->update_ref || + generation <= root->root_key.offset) + continue; + btrfs_node_key_to_cpu(eb, &key, slot); + ret = btrfs_comp_cpu_keys(&key, + &wc->update_progress); + if (ret < 0) + continue; } - - /* - * once we get to level one, process the whole node - * at once, including everything below it. - */ - if (*level == 1) { - ret = drop_level_one_refs(trans, root, path); - BUG_ON(ret); +reada: + ret = readahead_tree_block(root, bytenr, blocksize, + generation); + if (ret) break; - } - - bytenr = btrfs_node_blockptr(cur, path->slots[*level]); - ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); - blocksize = btrfs_level_size(root, *level - 1); - - ret = drop_snap_lookup_refcount(trans, root, bytenr, - blocksize, &refs); - BUG_ON(ret); - - /* - * if there is more than one reference, we don't need - * to read that node to drop any references it has. We - * just drop the ref we hold on that node and move on to the - * next slot in this level. - */ - if (refs != 1) { - parent = path->nodes[*level]; - root_owner = btrfs_header_owner(parent); - root_gen = btrfs_header_generation(parent); - path->slots[*level]++; - - ret = btrfs_free_extent(trans, root, bytenr, - blocksize, parent->start, - root_owner, root_gen, - *level - 1, 1); - BUG_ON(ret); - - atomic_inc(&root->fs_info->throttle_gen); - wake_up(&root->fs_info->transaction_throttle); - cond_resched(); - - continue; - } - - /* - * we need to keep freeing things in the next level down. - * read the block and loop around to process it - */ - next = read_tree_block(root, bytenr, blocksize, ptr_gen); - WARN_ON(*level <= 0); - if (path->nodes[*level-1]) - free_extent_buffer(path->nodes[*level-1]); - path->nodes[*level-1] = next; - *level = btrfs_header_level(next); - path->slots[*level] = 0; - cond_resched(); + last = bytenr + blocksize; + nread++; } -out: - WARN_ON(*level < 0); - WARN_ON(*level >= BTRFS_MAX_LEVEL); - - if (path->nodes[*level] == root->node) { - parent = path->nodes[*level]; - bytenr = path->nodes[*level]->start; - } else { - parent = path->nodes[*level + 1]; - bytenr = btrfs_node_blockptr(parent, path->slots[*level + 1]); - } - - blocksize = btrfs_level_size(root, *level); - root_owner = btrfs_header_owner(parent); - root_gen = btrfs_header_generation(parent); - - /* - * cleanup and free the reference on the last node - * we processed - */ - ret = btrfs_free_extent(trans, root, bytenr, blocksize, - parent->start, root_owner, root_gen, - *level, 1); - free_extent_buffer(path->nodes[*level]); - path->nodes[*level] = NULL; - - *level += 1; - BUG_ON(ret); - - cond_resched(); - return 0; + wc->reada_slot = slot; } -#endif - -struct walk_control { - u64 refs[BTRFS_MAX_LEVEL]; - u64 flags[BTRFS_MAX_LEVEL]; - struct btrfs_key update_progress; - int stage; - int level; - int shared_level; - int update_ref; - int keep_locks; -}; - -#define DROP_REFERENCE 1 -#define UPDATE_BACKREF 2 /* * hepler to process tree block while walking down the tree. * - * when wc->stage == DROP_REFERENCE, this function checks - * reference count of the block. if the block is shared and - * we need update back refs for the subtree rooted at the - * block, this function changes wc->stage to UPDATE_BACKREF - * * when wc->stage == UPDATE_BACKREF, this function updates * back refs for pointers in the block. * @@ -4805,7 +4587,6 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, { int level = wc->level; struct extent_buffer *eb = path->nodes[level]; - struct btrfs_key key; u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF; int ret; @@ -4828,21 +4609,6 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, BUG_ON(wc->refs[level] == 0); } - if (wc->stage == DROP_REFERENCE && - wc->update_ref && wc->refs[level] > 1) { - BUG_ON(eb == root->node); - BUG_ON(path->slots[level] > 0); - if (level == 0) - btrfs_item_key_to_cpu(eb, &key, path->slots[level]); - else - btrfs_node_key_to_cpu(eb, &key, path->slots[level]); - if (btrfs_header_owner(eb) == root->root_key.objectid && - btrfs_comp_cpu_keys(&key, &wc->update_progress) >= 0) { - wc->stage = UPDATE_BACKREF; - wc->shared_level = level; - } - } - if (wc->stage == DROP_REFERENCE) { if (wc->refs[level] > 1) return 1; @@ -4879,6 +4645,123 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, } /* + * hepler to process tree block pointer. + * + * when wc->stage == DROP_REFERENCE, this function checks + * reference count of the block pointed to. if the block + * is shared and we need update back refs for the subtree + * rooted at the block, this function changes wc->stage to + * UPDATE_BACKREF. if the block is shared and there is no + * need to update back, this function drops the reference + * to the block. + * + * NOTE: return value 1 means we should stop walking down. + */ +static noinline int do_walk_down(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct walk_control *wc) +{ + u64 bytenr; + u64 generation; + u64 parent; + u32 blocksize; + struct btrfs_key key; + struct extent_buffer *next; + int level = wc->level; + int reada = 0; + int ret = 0; + + generation = btrfs_node_ptr_generation(path->nodes[level], + path->slots[level]); + /* + * if the lower level block was created before the snapshot + * was created, we know there is no need to update back refs + * for the subtree + */ + if (wc->stage == UPDATE_BACKREF && + generation <= root->root_key.offset) + return 1; + + bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]); + blocksize = btrfs_level_size(root, level - 1); + + next = btrfs_find_tree_block(root, bytenr, blocksize); + if (!next) { + next = btrfs_find_create_tree_block(root, bytenr, blocksize); + reada = 1; + } + btrfs_tree_lock(next); + btrfs_set_lock_blocking(next); + + if (wc->stage == DROP_REFERENCE) { + ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, + &wc->refs[level - 1], + &wc->flags[level - 1]); + BUG_ON(ret); + BUG_ON(wc->refs[level - 1] == 0); + + if (wc->refs[level - 1] > 1) { + if (!wc->update_ref || + generation <= root->root_key.offset) + goto skip; + + btrfs_node_key_to_cpu(path->nodes[level], &key, + path->slots[level]); + ret = btrfs_comp_cpu_keys(&key, &wc->update_progress); + if (ret < 0) + goto skip; + + wc->stage = UPDATE_BACKREF; + wc->shared_level = level - 1; + } + } + + if (!btrfs_buffer_uptodate(next, generation)) { + btrfs_tree_unlock(next); + free_extent_buffer(next); + next = NULL; + } + + if (!next) { + if (reada && level == 1) + reada_walk_down(trans, root, wc, path); + next = read_tree_block(root, bytenr, blocksize, generation); + btrfs_tree_lock(next); + btrfs_set_lock_blocking(next); + } + + level--; + BUG_ON(level != btrfs_header_level(next)); + path->nodes[level] = next; + path->slots[level] = 0; + path->locks[level] = 1; + wc->level = level; + if (wc->level == 1) + wc->reada_slot = 0; + return 0; +skip: + wc->refs[level - 1] = 0; + wc->flags[level - 1] = 0; + + if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) { + parent = path->nodes[level]->start; + } else { + BUG_ON(root->root_key.objectid != + btrfs_header_owner(path->nodes[level])); + parent = 0; + } + + ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent, + root->root_key.objectid, level - 1, 0); + BUG_ON(ret); + + btrfs_tree_unlock(next); + free_extent_buffer(next); + return 1; +} + +/* * hepler to process tree block while walking up the tree. * * when wc->stage == DROP_REFERENCE, this function drops @@ -4905,7 +4788,6 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, if (level < wc->shared_level) goto out; - BUG_ON(wc->refs[level] <= 1); ret = find_next_key(path, level + 1, &wc->update_progress); if (ret > 0) wc->update_ref = 0; @@ -4936,8 +4818,6 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, path->locks[level] = 0; return 1; } - } else { - BUG_ON(level != 0); } } @@ -4990,17 +4870,13 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_path *path, struct walk_control *wc) { - struct extent_buffer *next; - struct extent_buffer *cur; - u64 bytenr; - u64 ptr_gen; - u32 blocksize; int level = wc->level; int ret; while (level >= 0) { - cur = path->nodes[level]; - BUG_ON(path->slots[level] >= btrfs_header_nritems(cur)); + if (path->slots[level] >= + btrfs_header_nritems(path->nodes[level])) + break; ret = walk_down_proc(trans, root, path, wc); if (ret > 0) @@ -5009,20 +4885,12 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, if (level == 0) break; - bytenr = btrfs_node_blockptr(cur, path->slots[level]); - blocksize = btrfs_level_size(root, level - 1); - ptr_gen = btrfs_node_ptr_generation(cur, path->slots[level]); - - next = read_tree_block(root, bytenr, blocksize, ptr_gen); - btrfs_tree_lock(next); - btrfs_set_lock_blocking(next); - - level--; - BUG_ON(level != btrfs_header_level(next)); - path->nodes[level] = next; - path->slots[level] = 0; - path->locks[level] = 1; - wc->level = level; + ret = do_walk_down(trans, root, path, wc); + if (ret > 0) { + path->slots[level]++; + continue; + } + level = wc->level; } return 0; } @@ -5112,9 +4980,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) err = ret; goto out; } - btrfs_node_key_to_cpu(path->nodes[level], &key, - path->slots[level]); - WARN_ON(memcmp(&key, &wc->update_progress, sizeof(key))); + WARN_ON(ret > 0); /* * unlock our path, this is safe because only this @@ -5149,6 +5015,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) wc->stage = DROP_REFERENCE; wc->update_ref = update_ref; wc->keep_locks = 0; + wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root); while (1) { ret = walk_down_tree(trans, root, path, wc); @@ -5201,9 +5068,24 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) ret = btrfs_del_root(trans, tree_root, &root->root_key); BUG_ON(ret); - free_extent_buffer(root->node); - free_extent_buffer(root->commit_root); - kfree(root); + if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { + ret = btrfs_find_last_root(tree_root, root->root_key.objectid, + NULL, NULL); + BUG_ON(ret < 0); + if (ret > 0) { + ret = btrfs_del_orphan_item(trans, tree_root, + root->root_key.objectid); + BUG_ON(ret); + } + } + + if (root->in_radix) { + btrfs_free_fs_root(tree_root->fs_info, root); + } else { + free_extent_buffer(root->node); + free_extent_buffer(root->commit_root); + kfree(root); + } out: btrfs_end_transaction(trans, tree_root); kfree(wc); @@ -5255,6 +5137,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, wc->stage = DROP_REFERENCE; wc->update_ref = 0; wc->keep_locks = 1; + wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root); while (1) { wret = walk_down_tree(trans, root, path, wc); @@ -5397,9 +5280,9 @@ static noinline int relocate_data_extent(struct inode *reloc_inode, lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS); while (1) { int ret; - spin_lock(&em_tree->lock); + write_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em); - spin_unlock(&em_tree->lock); + write_unlock(&em_tree->lock); if (ret != -EEXIST) { free_extent_map(em); break; @@ -6842,287 +6725,86 @@ int btrfs_prepare_block_group_relocation(struct btrfs_root *root, return 0; } -#if 0 -static int __insert_orphan_inode(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 objectid, u64 size) -{ - struct btrfs_path *path; - struct btrfs_inode_item *item; - struct extent_buffer *leaf; - int ret; - - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - - path->leave_spinning = 1; - ret = btrfs_insert_empty_inode(trans, root, path, objectid); - if (ret) - goto out; - - leaf = path->nodes[0]; - item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); - memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item)); - btrfs_set_inode_generation(leaf, item, 1); - btrfs_set_inode_size(leaf, item, size); - btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); - btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS); - btrfs_mark_buffer_dirty(leaf); - btrfs_release_path(root, path); -out: - btrfs_free_path(path); - return ret; -} - -static noinline struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, - struct btrfs_block_group_cache *group) +/* + * checks to see if its even possible to relocate this block group. + * + * @return - -1 if it's not a good idea to relocate this block group, 0 if its + * ok to go ahead and try. + */ +int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) { - struct inode *inode = NULL; - struct btrfs_trans_handle *trans; - struct btrfs_root *root; - struct btrfs_key root_key; - u64 objectid = BTRFS_FIRST_FREE_OBJECTID; - int err = 0; + struct btrfs_block_group_cache *block_group; + struct btrfs_space_info *space_info; + struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; + struct btrfs_device *device; + int full = 0; + int ret = 0; - root_key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID; - root_key.type = BTRFS_ROOT_ITEM_KEY; - root_key.offset = (u64)-1; - root = btrfs_read_fs_root_no_name(fs_info, &root_key); - if (IS_ERR(root)) - return ERR_CAST(root); + block_group = btrfs_lookup_block_group(root->fs_info, bytenr); - trans = btrfs_start_transaction(root, 1); - BUG_ON(!trans); + /* odd, couldn't find the block group, leave it alone */ + if (!block_group) + return -1; - err = btrfs_find_free_objectid(trans, root, objectid, &objectid); - if (err) + /* no bytes used, we're good */ + if (!btrfs_block_group_used(&block_group->item)) goto out; - err = __insert_orphan_inode(trans, root, objectid, group->key.offset); - BUG_ON(err); - - err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0, - group->key.offset, 0, group->key.offset, - 0, 0, 0); - BUG_ON(err); - - inode = btrfs_iget_locked(root->fs_info->sb, objectid, root); - if (inode->i_state & I_NEW) { - BTRFS_I(inode)->root = root; - BTRFS_I(inode)->location.objectid = objectid; - BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY; - BTRFS_I(inode)->location.offset = 0; - btrfs_read_locked_inode(inode); - unlock_new_inode(inode); - BUG_ON(is_bad_inode(inode)); - } else { - BUG_ON(1); - } - BTRFS_I(inode)->index_cnt = group->key.objectid; - - err = btrfs_orphan_add(trans, inode); -out: - btrfs_end_transaction(trans, root); - if (err) { - if (inode) - iput(inode); - inode = ERR_PTR(err); - } - return inode; -} - -int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) -{ - - struct btrfs_ordered_sum *sums; - struct btrfs_sector_sum *sector_sum; - struct btrfs_ordered_extent *ordered; - struct btrfs_root *root = BTRFS_I(inode)->root; - struct list_head list; - size_t offset; - int ret; - u64 disk_bytenr; - - INIT_LIST_HEAD(&list); - - ordered = btrfs_lookup_ordered_extent(inode, file_pos); - BUG_ON(ordered->file_offset != file_pos || ordered->len != len); - - disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt; - ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr, - disk_bytenr + len - 1, &list); - - while (!list_empty(&list)) { - sums = list_entry(list.next, struct btrfs_ordered_sum, list); - list_del_init(&sums->list); - - sector_sum = sums->sums; - sums->bytenr = ordered->start; + space_info = block_group->space_info; + spin_lock(&space_info->lock); - offset = 0; - while (offset < sums->len) { - sector_sum->bytenr += ordered->start - disk_bytenr; - sector_sum++; - offset += root->sectorsize; - } + full = space_info->full; - btrfs_add_ordered_sum(inode, ordered, sums); + /* + * if this is the last block group we have in this space, we can't + * relocate it unless we're able to allocate a new chunk below. + * + * Otherwise, we need to make sure we have room in the space to handle + * all of the extents from this block group. If we can, we're good + */ + if ((space_info->total_bytes != block_group->key.offset) && + (space_info->bytes_used + space_info->bytes_reserved + + space_info->bytes_pinned + space_info->bytes_readonly + + btrfs_block_group_used(&block_group->item) < + space_info->total_bytes)) { + spin_unlock(&space_info->lock); + goto out; } - btrfs_put_ordered_extent(ordered); - return 0; -} - -int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start) -{ - struct btrfs_trans_handle *trans; - struct btrfs_path *path; - struct btrfs_fs_info *info = root->fs_info; - struct extent_buffer *leaf; - struct inode *reloc_inode; - struct btrfs_block_group_cache *block_group; - struct btrfs_key key; - u64 skipped; - u64 cur_byte; - u64 total_found; - u32 nritems; - int ret; - int progress; - int pass = 0; - - root = root->fs_info->extent_root; - - block_group = btrfs_lookup_block_group(info, group_start); - BUG_ON(!block_group); - - printk(KERN_INFO "btrfs relocating block group %llu flags %llu\n", - (unsigned long long)block_group->key.objectid, - (unsigned long long)block_group->flags); - - path = btrfs_alloc_path(); - BUG_ON(!path); - - reloc_inode = create_reloc_inode(info, block_group); - BUG_ON(IS_ERR(reloc_inode)); - - __alloc_chunk_for_shrink(root, block_group, 1); - set_block_group_readonly(block_group); - - btrfs_start_delalloc_inodes(info->tree_root); - btrfs_wait_ordered_extents(info->tree_root, 0); -again: - skipped = 0; - total_found = 0; - progress = 0; - key.objectid = block_group->key.objectid; - key.offset = 0; - key.type = 0; - cur_byte = key.objectid; - - trans = btrfs_start_transaction(info->tree_root, 1); - btrfs_commit_transaction(trans, info->tree_root); + spin_unlock(&space_info->lock); - mutex_lock(&root->fs_info->cleaner_mutex); - btrfs_clean_old_snapshots(info->tree_root); - btrfs_remove_leaf_refs(info->tree_root, (u64)-1, 1); - mutex_unlock(&root->fs_info->cleaner_mutex); + /* + * ok we don't have enough space, but maybe we have free space on our + * devices to allocate new chunks for relocation, so loop through our + * alloc devices and guess if we have enough space. However, if we + * were marked as full, then we know there aren't enough chunks, and we + * can just return. + */ + ret = -1; + if (full) + goto out; - trans = btrfs_start_transaction(info->tree_root, 1); - btrfs_commit_transaction(trans, info->tree_root); + mutex_lock(&root->fs_info->chunk_mutex); + list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { + u64 min_free = btrfs_block_group_used(&block_group->item); + u64 dev_offset, max_avail; - while (1) { - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (ret < 0) - goto out; -next: - leaf = path->nodes[0]; - nritems = btrfs_header_nritems(leaf); - if (path->slots[0] >= nritems) { - ret = btrfs_next_leaf(root, path); - if (ret < 0) - goto out; - if (ret == 1) { - ret = 0; + /* + * check to make sure we can actually find a chunk with enough + * space to fit our block group in. + */ + if (device->total_bytes > device->bytes_used + min_free) { + ret = find_free_dev_extent(NULL, device, min_free, + &dev_offset, &max_avail); + if (!ret) break; - } - leaf = path->nodes[0]; - nritems = btrfs_header_nritems(leaf); + ret = -1; } - - btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); - - if (key.objectid >= block_group->key.objectid + - block_group->key.offset) - break; - - if (progress && need_resched()) { - btrfs_release_path(root, path); - cond_resched(); - progress = 0; - continue; - } - progress = 1; - - if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY || - key.objectid + key.offset <= cur_byte) { - path->slots[0]++; - goto next; - } - - total_found++; - cur_byte = key.objectid + key.offset; - btrfs_release_path(root, path); - - __alloc_chunk_for_shrink(root, block_group, 0); - ret = relocate_one_extent(root, path, &key, block_group, - reloc_inode, pass); - BUG_ON(ret < 0); - if (ret > 0) - skipped++; - - key.objectid = cur_byte; - key.type = 0; - key.offset = 0; - } - - btrfs_release_path(root, path); - - if (pass == 0) { - btrfs_wait_ordered_range(reloc_inode, 0, (u64)-1); - invalidate_mapping_pages(reloc_inode->i_mapping, 0, -1); - } - - if (total_found > 0) { - printk(KERN_INFO "btrfs found %llu extents in pass %d\n", - (unsigned long long)total_found, pass); - pass++; - if (total_found == skipped && pass > 2) { - iput(reloc_inode); - reloc_inode = create_reloc_inode(info, block_group); - pass = 0; - } - goto again; } - - /* delete reloc_inode */ - iput(reloc_inode); - - /* unpin extents in this range */ - trans = btrfs_start_transaction(info->tree_root, 1); - btrfs_commit_transaction(trans, info->tree_root); - - spin_lock(&block_group->lock); - WARN_ON(block_group->pinned > 0); - WARN_ON(block_group->reserved > 0); - WARN_ON(btrfs_block_group_used(&block_group->item) > 0); - spin_unlock(&block_group->lock); - btrfs_put_block_group(block_group); - ret = 0; + mutex_unlock(&root->fs_info->chunk_mutex); out: - btrfs_free_path(path); + btrfs_put_block_group(block_group); return ret; } -#endif static int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path, struct btrfs_key *key) @@ -7165,8 +6847,18 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) { struct btrfs_block_group_cache *block_group; struct btrfs_space_info *space_info; + struct btrfs_caching_control *caching_ctl; struct rb_node *n; + down_write(&info->extent_commit_sem); + while (!list_empty(&info->caching_block_groups)) { + caching_ctl = list_entry(info->caching_block_groups.next, + struct btrfs_caching_control, list); + list_del(&caching_ctl->list); + put_caching_control(caching_ctl); + } + up_write(&info->extent_commit_sem); + spin_lock(&info->block_group_cache_lock); while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { block_group = rb_entry(n, struct btrfs_block_group_cache, @@ -7180,8 +6872,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) up_write(&block_group->space_info->groups_sem); if (block_group->cached == BTRFS_CACHE_STARTED) - wait_event(block_group->caching_q, - block_group_cache_done(block_group)); + wait_block_group_cache_done(block_group); btrfs_remove_free_space_cache(block_group); @@ -7251,7 +6942,6 @@ int btrfs_read_block_groups(struct btrfs_root *root) spin_lock_init(&cache->lock); spin_lock_init(&cache->tree_lock); cache->fs_info = info; - init_waitqueue_head(&cache->caching_q); INIT_LIST_HEAD(&cache->list); INIT_LIST_HEAD(&cache->cluster_list); @@ -7273,8 +6963,6 @@ int btrfs_read_block_groups(struct btrfs_root *root) cache->flags = btrfs_block_group_flags(&cache->item); cache->sectorsize = root->sectorsize; - remove_sb_from_cache(root, cache); - /* * check for two cases, either we are full, and therefore * don't need to bother with the caching work since we won't @@ -7283,13 +6971,19 @@ int btrfs_read_block_groups(struct btrfs_root *root) * time, particularly in the full case. */ if (found_key.offset == btrfs_block_group_used(&cache->item)) { + exclude_super_stripes(root, cache); + cache->last_byte_to_unpin = (u64)-1; cache->cached = BTRFS_CACHE_FINISHED; + free_excluded_extents(root, cache); } else if (btrfs_block_group_used(&cache->item) == 0) { + exclude_super_stripes(root, cache); + cache->last_byte_to_unpin = (u64)-1; cache->cached = BTRFS_CACHE_FINISHED; add_new_free_space(cache, root->fs_info, found_key.objectid, found_key.objectid + found_key.offset); + free_excluded_extents(root, cache); } ret = update_space_info(info, cache->flags, found_key.offset, @@ -7297,6 +6991,10 @@ int btrfs_read_block_groups(struct btrfs_root *root) &space_info); BUG_ON(ret); cache->space_info = space_info; + spin_lock(&cache->space_info->lock); + cache->space_info->bytes_super += cache->bytes_super; + spin_unlock(&cache->space_info->lock); + down_write(&space_info->groups_sem); list_add_tail(&cache->list, &space_info->block_groups); up_write(&space_info->groups_sem); @@ -7346,7 +7044,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, atomic_set(&cache->count, 1); spin_lock_init(&cache->lock); spin_lock_init(&cache->tree_lock); - init_waitqueue_head(&cache->caching_q); INIT_LIST_HEAD(&cache->list); INIT_LIST_HEAD(&cache->cluster_list); @@ -7355,15 +7052,23 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, cache->flags = type; btrfs_set_block_group_flags(&cache->item, type); + cache->last_byte_to_unpin = (u64)-1; cache->cached = BTRFS_CACHE_FINISHED; - remove_sb_from_cache(root, cache); + exclude_super_stripes(root, cache); add_new_free_space(cache, root->fs_info, chunk_offset, chunk_offset + size); + free_excluded_extents(root, cache); + ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, &cache->space_info); BUG_ON(ret); + + spin_lock(&cache->space_info->lock); + cache->space_info->bytes_super += cache->bytes_super; + spin_unlock(&cache->space_info->lock); + down_write(&cache->space_info->groups_sem); list_add_tail(&cache->list, &cache->space_info->block_groups); up_write(&cache->space_info->groups_sem); @@ -7429,8 +7134,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, up_write(&block_group->space_info->groups_sem); if (block_group->cached == BTRFS_CACHE_STARTED) - wait_event(block_group->caching_q, - block_group_cache_done(block_group)); + wait_block_group_cache_done(block_group); btrfs_remove_free_space_cache(block_group); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 68260180f58..0cb88f8146e 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -367,10 +367,10 @@ static int insert_state(struct extent_io_tree *tree, } if (bits & EXTENT_DIRTY) tree->dirty_bytes += end - start + 1; - set_state_cb(tree, state, bits); - state->state |= bits; state->start = start; state->end = end; + set_state_cb(tree, state, bits); + state->state |= bits; node = tree_insert(&tree->state, end, &state->rb_node); if (node) { struct extent_state *found; @@ -471,10 +471,14 @@ static int clear_state_bit(struct extent_io_tree *tree, * bits were already set, or zero if none of the bits were already set. */ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, - int bits, int wake, int delete, gfp_t mask) + int bits, int wake, int delete, + struct extent_state **cached_state, + gfp_t mask) { struct extent_state *state; + struct extent_state *cached; struct extent_state *prealloc = NULL; + struct rb_node *next_node; struct rb_node *node; u64 last_end; int err; @@ -488,6 +492,17 @@ again: } spin_lock(&tree->lock); + if (cached_state) { + cached = *cached_state; + *cached_state = NULL; + cached_state = NULL; + if (cached && cached->tree && cached->start == start) { + atomic_dec(&cached->refs); + state = cached; + goto hit_next; + } + free_extent_state(cached); + } /* * this search will find the extents that end after * our range starts @@ -496,6 +511,7 @@ again: if (!node) goto out; state = rb_entry(node, struct extent_state, rb_node); +hit_next: if (state->start > end) goto out; WARN_ON(state->end < start); @@ -531,8 +547,6 @@ again: if (last_end == (u64)-1) goto out; start = last_end + 1; - } else { - start = state->start; } goto search_again; } @@ -550,16 +564,28 @@ again: if (wake) wake_up(&state->wq); + set |= clear_state_bit(tree, prealloc, bits, wake, delete); prealloc = NULL; goto out; } + if (state->end < end && prealloc && !need_resched()) + next_node = rb_next(&state->rb_node); + else + next_node = NULL; + set |= clear_state_bit(tree, state, bits, wake, delete); if (last_end == (u64)-1) goto out; start = last_end + 1; + if (start <= end && next_node) { + state = rb_entry(next_node, struct extent_state, + rb_node); + if (state->start == start) + goto hit_next; + } goto search_again; out: @@ -653,28 +679,40 @@ static void set_state_bits(struct extent_io_tree *tree, state->state |= bits; } +static void cache_state(struct extent_state *state, + struct extent_state **cached_ptr) +{ + if (cached_ptr && !(*cached_ptr)) { + if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY)) { + *cached_ptr = state; + atomic_inc(&state->refs); + } + } +} + /* - * set some bits on a range in the tree. This may require allocations - * or sleeping, so the gfp mask is used to indicate what is allowed. + * set some bits on a range in the tree. This may require allocations or + * sleeping, so the gfp mask is used to indicate what is allowed. * - * If 'exclusive' == 1, this will fail with -EEXIST if some part of the - * range already has the desired bits set. The start of the existing - * range is returned in failed_start in this case. + * If any of the exclusive bits are set, this will fail with -EEXIST if some + * part of the range already has the desired bits set. The start of the + * existing range is returned in failed_start in this case. * - * [start, end] is inclusive - * This takes the tree lock. + * [start, end] is inclusive This takes the tree lock. */ + static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, - int bits, int exclusive, u64 *failed_start, + int bits, int exclusive_bits, u64 *failed_start, + struct extent_state **cached_state, gfp_t mask) { struct extent_state *state; struct extent_state *prealloc = NULL; struct rb_node *node; int err = 0; - int set; u64 last_start; u64 last_end; + again: if (!prealloc && (mask & __GFP_WAIT)) { prealloc = alloc_extent_state(mask); @@ -683,6 +721,13 @@ again: } spin_lock(&tree->lock); + if (cached_state && *cached_state) { + state = *cached_state; + if (state->start == start && state->tree) { + node = &state->rb_node; + goto hit_next; + } + } /* * this search will find all the extents that end after * our range starts. @@ -694,8 +739,8 @@ again: BUG_ON(err == -EEXIST); goto out; } - state = rb_entry(node, struct extent_state, rb_node); +hit_next: last_start = state->start; last_end = state->end; @@ -706,17 +751,29 @@ again: * Just lock what we found and keep going */ if (state->start == start && state->end <= end) { - set = state->state & bits; - if (set && exclusive) { + struct rb_node *next_node; + if (state->state & exclusive_bits) { *failed_start = state->start; err = -EEXIST; goto out; } + set_state_bits(tree, state, bits); + cache_state(state, cached_state); merge_state(tree, state); if (last_end == (u64)-1) goto out; + start = last_end + 1; + if (start < end && prealloc && !need_resched()) { + next_node = rb_next(node); + if (next_node) { + state = rb_entry(next_node, struct extent_state, + rb_node); + if (state->start == start) + goto hit_next; + } + } goto search_again; } @@ -737,8 +794,7 @@ again: * desired bit on it. */ if (state->start < start) { - set = state->state & bits; - if (exclusive && set) { + if (state->state & exclusive_bits) { *failed_start = start; err = -EEXIST; goto out; @@ -750,12 +806,11 @@ again: goto out; if (state->end <= end) { set_state_bits(tree, state, bits); + cache_state(state, cached_state); merge_state(tree, state); if (last_end == (u64)-1) goto out; start = last_end + 1; - } else { - start = state->start; } goto search_again; } @@ -774,6 +829,7 @@ again: this_end = last_start - 1; err = insert_state(tree, prealloc, start, this_end, bits); + cache_state(prealloc, cached_state); prealloc = NULL; BUG_ON(err == -EEXIST); if (err) @@ -788,8 +844,7 @@ again: * on the first half */ if (state->start <= end && state->end > end) { - set = state->state & bits; - if (exclusive && set) { + if (state->state & exclusive_bits) { *failed_start = start; err = -EEXIST; goto out; @@ -798,6 +853,7 @@ again: BUG_ON(err == -EEXIST); set_state_bits(tree, prealloc, bits); + cache_state(prealloc, cached_state); merge_state(tree, prealloc); prealloc = NULL; goto out; @@ -826,86 +882,64 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) { return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL, - mask); -} - -int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return set_extent_bit(tree, start, end, EXTENT_ORDERED, 0, NULL, mask); + NULL, mask); } int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int bits, gfp_t mask) { return set_extent_bit(tree, start, end, bits, 0, NULL, - mask); + NULL, mask); } int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int bits, gfp_t mask) { - return clear_extent_bit(tree, start, end, bits, 0, 0, mask); + return clear_extent_bit(tree, start, end, bits, 0, 0, NULL, mask); } int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) { return set_extent_bit(tree, start, end, - EXTENT_DELALLOC | EXTENT_DIRTY, - 0, NULL, mask); + EXTENT_DELALLOC | EXTENT_DIRTY | EXTENT_UPTODATE, + 0, NULL, NULL, mask); } int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) { return clear_extent_bit(tree, start, end, - EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask); -} - -int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return clear_extent_bit(tree, start, end, EXTENT_ORDERED, 1, 0, mask); + EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, + NULL, mask); } int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) { return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL, - mask); + NULL, mask); } static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) { - return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask); + return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, + NULL, mask); } int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) { return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL, - mask); + NULL, mask); } static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) { - return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask); -} - -static int set_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return set_extent_bit(tree, start, end, EXTENT_WRITEBACK, - 0, NULL, mask); -} - -static int clear_extent_writeback(struct extent_io_tree *tree, u64 start, - u64 end, gfp_t mask) -{ - return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask); + return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, + NULL, mask); } int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end) @@ -917,13 +951,15 @@ int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end) * either insert or lock state struct between start and end use mask to tell * us if waiting is desired. */ -int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) +int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, + int bits, struct extent_state **cached_state, gfp_t mask) { int err; u64 failed_start; while (1) { - err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1, - &failed_start, mask); + err = set_extent_bit(tree, start, end, EXTENT_LOCKED | bits, + EXTENT_LOCKED, &failed_start, + cached_state, mask); if (err == -EEXIST && (mask & __GFP_WAIT)) { wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED); start = failed_start; @@ -935,27 +971,40 @@ int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) return err; } +int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) +{ + return lock_extent_bits(tree, start, end, 0, NULL, mask); +} + int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) { int err; u64 failed_start; - err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1, - &failed_start, mask); + err = set_extent_bit(tree, start, end, EXTENT_LOCKED, EXTENT_LOCKED, + &failed_start, NULL, mask); if (err == -EEXIST) { if (failed_start > start) clear_extent_bit(tree, start, failed_start - 1, - EXTENT_LOCKED, 1, 0, mask); + EXTENT_LOCKED, 1, 0, NULL, mask); return 0; } return 1; } +int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end, + struct extent_state **cached, gfp_t mask) +{ + return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached, + mask); +} + int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) { - return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask); + return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL, + mask); } /* @@ -974,7 +1023,6 @@ int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end) page_cache_release(page); index++; } - set_extent_dirty(tree, start, end, GFP_NOFS); return 0; } @@ -994,7 +1042,6 @@ static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) page_cache_release(page); index++; } - set_extent_writeback(tree, start, end, GFP_NOFS); return 0; } @@ -1232,6 +1279,7 @@ static noinline u64 find_lock_delalloc_range(struct inode *inode, u64 delalloc_start; u64 delalloc_end; u64 found; + struct extent_state *cached_state = NULL; int ret; int loops = 0; @@ -1269,6 +1317,7 @@ again: /* some of the pages are gone, lets avoid looping by * shortening the size of the delalloc range we're searching */ + free_extent_state(cached_state); if (!loops) { unsigned long offset = (*start) & (PAGE_CACHE_SIZE - 1); max_bytes = PAGE_CACHE_SIZE - offset; @@ -1282,18 +1331,21 @@ again: BUG_ON(ret); /* step three, lock the state bits for the whole range */ - lock_extent(tree, delalloc_start, delalloc_end, GFP_NOFS); + lock_extent_bits(tree, delalloc_start, delalloc_end, + 0, &cached_state, GFP_NOFS); /* then test to make sure it is all still delalloc */ ret = test_range_bit(tree, delalloc_start, delalloc_end, - EXTENT_DELALLOC, 1); + EXTENT_DELALLOC, 1, cached_state); if (!ret) { - unlock_extent(tree, delalloc_start, delalloc_end, GFP_NOFS); + unlock_extent_cached(tree, delalloc_start, delalloc_end, + &cached_state, GFP_NOFS); __unlock_for_delalloc(inode, locked_page, delalloc_start, delalloc_end); cond_resched(); goto again; } + free_extent_state(cached_state); *start = delalloc_start; *end = delalloc_end; out_failed: @@ -1307,7 +1359,8 @@ int extent_clear_unlock_delalloc(struct inode *inode, int clear_unlock, int clear_delalloc, int clear_dirty, int set_writeback, - int end_writeback) + int end_writeback, + int set_private2) { int ret; struct page *pages[16]; @@ -1325,8 +1378,9 @@ int extent_clear_unlock_delalloc(struct inode *inode, if (clear_delalloc) clear_bits |= EXTENT_DELALLOC; - clear_extent_bit(tree, start, end, clear_bits, 1, 0, GFP_NOFS); - if (!(unlock_pages || clear_dirty || set_writeback || end_writeback)) + clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS); + if (!(unlock_pages || clear_dirty || set_writeback || end_writeback || + set_private2)) return 0; while (nr_pages > 0) { @@ -1334,6 +1388,10 @@ int extent_clear_unlock_delalloc(struct inode *inode, min_t(unsigned long, nr_pages, ARRAY_SIZE(pages)), pages); for (i = 0; i < ret; i++) { + + if (set_private2) + SetPagePrivate2(pages[i]); + if (pages[i] == locked_page) { page_cache_release(pages[i]); continue; @@ -1476,14 +1534,17 @@ out: * range is found set. */ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, - int bits, int filled) + int bits, int filled, struct extent_state *cached) { struct extent_state *state = NULL; struct rb_node *node; int bitset = 0; spin_lock(&tree->lock); - node = tree_search(tree, start); + if (cached && cached->tree && cached->start == start) + node = &cached->rb_node; + else + node = tree_search(tree, start); while (node && start <= end) { state = rb_entry(node, struct extent_state, rb_node); @@ -1503,6 +1564,10 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, bitset = 0; break; } + + if (state->end == (u64)-1) + break; + start = state->end + 1; if (start > end) break; @@ -1526,7 +1591,7 @@ static int check_page_uptodate(struct extent_io_tree *tree, { u64 start = (u64)page->index << PAGE_CACHE_SHIFT; u64 end = start + PAGE_CACHE_SIZE - 1; - if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1)) + if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL)) SetPageUptodate(page); return 0; } @@ -1540,7 +1605,7 @@ static int check_page_locked(struct extent_io_tree *tree, { u64 start = (u64)page->index << PAGE_CACHE_SHIFT; u64 end = start + PAGE_CACHE_SIZE - 1; - if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0)) + if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL)) unlock_page(page); return 0; } @@ -1552,10 +1617,7 @@ static int check_page_locked(struct extent_io_tree *tree, static int check_page_writeback(struct extent_io_tree *tree, struct page *page) { - u64 start = (u64)page->index << PAGE_CACHE_SHIFT; - u64 end = start + PAGE_CACHE_SIZE - 1; - if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0)) - end_page_writeback(page); + end_page_writeback(page); return 0; } @@ -1613,13 +1675,11 @@ static void end_bio_extent_writepage(struct bio *bio, int err) } if (!uptodate) { - clear_extent_uptodate(tree, start, end, GFP_ATOMIC); + clear_extent_uptodate(tree, start, end, GFP_NOFS); ClearPageUptodate(page); SetPageError(page); } - clear_extent_writeback(tree, start, end, GFP_ATOMIC); - if (whole_page) end_page_writeback(page); else @@ -1983,7 +2043,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree, continue; } /* the get_extent function already copied into the page */ - if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) { + if (test_range_bit(tree, cur, cur_end, + EXTENT_UPTODATE, 1, NULL)) { check_page_uptodate(tree, page); unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); cur = cur + iosize; @@ -2078,6 +2139,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, u64 iosize; u64 unlock_start; sector_t sector; + struct extent_state *cached_state = NULL; struct extent_map *em; struct block_device *bdev; int ret; @@ -2124,6 +2186,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, delalloc_end = 0; page_started = 0; if (!epd->extent_locked) { + u64 delalloc_to_write = 0; /* * make sure the wbc mapping index is at least updated * to this page. @@ -2143,8 +2206,24 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, tree->ops->fill_delalloc(inode, page, delalloc_start, delalloc_end, &page_started, &nr_written); + /* + * delalloc_end is already one less than the total + * length, so we don't subtract one from + * PAGE_CACHE_SIZE + */ + delalloc_to_write += (delalloc_end - delalloc_start + + PAGE_CACHE_SIZE) >> + PAGE_CACHE_SHIFT; delalloc_start = delalloc_end + 1; } + if (wbc->nr_to_write < delalloc_to_write) { + int thresh = 8192; + + if (delalloc_to_write < thresh * 2) + thresh = delalloc_to_write; + wbc->nr_to_write = min_t(u64, delalloc_to_write, + thresh); + } /* did the fill delalloc function already unlock and start * the IO? @@ -2160,15 +2239,10 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, goto done_unlocked; } } - lock_extent(tree, start, page_end, GFP_NOFS); - - unlock_start = start; - if (tree->ops && tree->ops->writepage_start_hook) { ret = tree->ops->writepage_start_hook(page, start, page_end); if (ret == -EAGAIN) { - unlock_extent(tree, start, page_end, GFP_NOFS); redirty_page_for_writepage(wbc, page); update_nr_written(page, wbc, nr_written); unlock_page(page); @@ -2184,12 +2258,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, update_nr_written(page, wbc, nr_written + 1); end = page_end; - if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) - printk(KERN_ERR "btrfs delalloc bits after lock_extent\n"); - if (last_byte <= start) { - clear_extent_dirty(tree, start, page_end, GFP_NOFS); - unlock_extent(tree, start, page_end, GFP_NOFS); if (tree->ops && tree->ops->writepage_end_io_hook) tree->ops->writepage_end_io_hook(page, start, page_end, NULL, 1); @@ -2197,13 +2266,10 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, goto done; } - set_extent_uptodate(tree, start, page_end, GFP_NOFS); blocksize = inode->i_sb->s_blocksize; while (cur <= end) { if (cur >= last_byte) { - clear_extent_dirty(tree, cur, page_end, GFP_NOFS); - unlock_extent(tree, unlock_start, page_end, GFP_NOFS); if (tree->ops && tree->ops->writepage_end_io_hook) tree->ops->writepage_end_io_hook(page, cur, page_end, NULL, 1); @@ -2235,12 +2301,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, */ if (compressed || block_start == EXTENT_MAP_HOLE || block_start == EXTENT_MAP_INLINE) { - clear_extent_dirty(tree, cur, - cur + iosize - 1, GFP_NOFS); - - unlock_extent(tree, unlock_start, cur + iosize - 1, - GFP_NOFS); - /* * end_io notification does not happen here for * compressed extents @@ -2265,13 +2325,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, } /* leave this out until we have a page_mkwrite call */ if (0 && !test_range_bit(tree, cur, cur + iosize - 1, - EXTENT_DIRTY, 0)) { + EXTENT_DIRTY, 0, NULL)) { cur = cur + iosize; pg_offset += iosize; continue; } - clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); if (tree->ops && tree->ops->writepage_io_hook) { ret = tree->ops->writepage_io_hook(page, cur, cur + iosize - 1); @@ -2309,12 +2368,12 @@ done: set_page_writeback(page); end_page_writeback(page); } - if (unlock_start <= page_end) - unlock_extent(tree, unlock_start, page_end, GFP_NOFS); unlock_page(page); done_unlocked: + /* drop our reference on any cached states */ + free_extent_state(cached_state); return 0; } @@ -2339,9 +2398,9 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, writepage_t writepage, void *data, void (*flush_fn)(void *)) { - struct backing_dev_info *bdi = mapping->backing_dev_info; int ret = 0; int done = 0; + int nr_to_write_done = 0; struct pagevec pvec; int nr_pages; pgoff_t index; @@ -2361,7 +2420,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, scanned = 1; } retry: - while (!done && (index <= end) && + while (!done && !nr_to_write_done && (index <= end) && (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY, min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { @@ -2412,12 +2471,15 @@ retry: unlock_page(page); ret = 0; } - if (ret || wbc->nr_to_write <= 0) - done = 1; - if (wbc->nonblocking && bdi_write_congested(bdi)) { - wbc->encountered_congestion = 1; + if (ret) done = 1; - } + + /* + * the filesystem may choose to bump up nr_to_write. + * We have to make sure to honor the new nr_to_write + * at any time + */ + nr_to_write_done = wbc->nr_to_write <= 0; } pagevec_release(&pvec); cond_resched(); @@ -2604,10 +2666,10 @@ int extent_invalidatepage(struct extent_io_tree *tree, return 0; lock_extent(tree, start, end, GFP_NOFS); - wait_on_extent_writeback(tree, start, end); + wait_on_page_writeback(page); clear_extent_bit(tree, start, end, EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC, - 1, 1, GFP_NOFS); + 1, 1, NULL, GFP_NOFS); return 0; } @@ -2687,7 +2749,7 @@ int extent_prepare_write(struct extent_io_tree *tree, !isnew && !PageUptodate(page) && (block_off_end > to || block_off_start < from) && !test_range_bit(tree, block_start, cur_end, - EXTENT_UPTODATE, 1)) { + EXTENT_UPTODATE, 1, NULL)) { u64 sector; u64 extent_offset = block_start - em->start; size_t iosize; @@ -2701,7 +2763,7 @@ int extent_prepare_write(struct extent_io_tree *tree, */ set_extent_bit(tree, block_start, block_start + iosize - 1, - EXTENT_LOCKED, 0, NULL, GFP_NOFS); + EXTENT_LOCKED, 0, NULL, NULL, GFP_NOFS); ret = submit_extent_page(READ, tree, page, sector, iosize, page_offset, em->bdev, NULL, 1, @@ -2742,13 +2804,18 @@ int try_release_extent_state(struct extent_map_tree *map, int ret = 1; if (test_range_bit(tree, start, end, - EXTENT_IOBITS | EXTENT_ORDERED, 0)) + EXTENT_IOBITS, 0, NULL)) ret = 0; else { if ((mask & GFP_NOFS) == GFP_NOFS) mask = GFP_NOFS; - clear_extent_bit(tree, start, end, EXTENT_UPTODATE, - 1, 1, mask); + /* + * at this point we can safely clear everything except the + * locked bit and the nodatasum bit + */ + clear_extent_bit(tree, start, end, + ~(EXTENT_LOCKED | EXTENT_NODATASUM), + 0, 0, NULL, mask); } return ret; } @@ -2771,29 +2838,28 @@ int try_release_extent_mapping(struct extent_map_tree *map, u64 len; while (start <= end) { len = end - start + 1; - spin_lock(&map->lock); + write_lock(&map->lock); em = lookup_extent_mapping(map, start, len); if (!em || IS_ERR(em)) { - spin_unlock(&map->lock); + write_unlock(&map->lock); break; } if (test_bit(EXTENT_FLAG_PINNED, &em->flags) || em->start != start) { - spin_unlock(&map->lock); + write_unlock(&map->lock); free_extent_map(em); break; } if (!test_range_bit(tree, em->start, extent_map_end(em) - 1, - EXTENT_LOCKED | EXTENT_WRITEBACK | - EXTENT_ORDERED, - 0)) { + EXTENT_LOCKED | EXTENT_WRITEBACK, + 0, NULL)) { remove_extent_mapping(map, em); /* once for the rb tree */ free_extent_map(em); } start = extent_map_end(em); - spin_unlock(&map->lock); + write_unlock(&map->lock); /* once for us */ free_extent_map(em); @@ -3203,7 +3269,7 @@ int extent_range_uptodate(struct extent_io_tree *tree, int uptodate; unsigned long index; - ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1); + ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL); if (ret) return 1; while (start <= end) { @@ -3233,7 +3299,7 @@ int extent_buffer_uptodate(struct extent_io_tree *tree, return 1; ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1, - EXTENT_UPTODATE, 1); + EXTENT_UPTODATE, 1, NULL); if (ret) return ret; @@ -3269,7 +3335,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, return 0; if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, - EXTENT_UPTODATE, 1)) { + EXTENT_UPTODATE, 1, NULL)) { return 0; } diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 5bc20abf3f3..14ed16fd862 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -13,10 +13,8 @@ #define EXTENT_DEFRAG (1 << 6) #define EXTENT_DEFRAG_DONE (1 << 7) #define EXTENT_BUFFER_FILLED (1 << 8) -#define EXTENT_ORDERED (1 << 9) -#define EXTENT_ORDERED_METADATA (1 << 10) -#define EXTENT_BOUNDARY (1 << 11) -#define EXTENT_NODATASUM (1 << 12) +#define EXTENT_BOUNDARY (1 << 9) +#define EXTENT_NODATASUM (1 << 10) #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) /* flags for bio submission */ @@ -142,6 +140,8 @@ int try_release_extent_state(struct extent_map_tree *map, struct extent_io_tree *tree, struct page *page, gfp_t mask); int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); +int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, + int bits, struct extent_state **cached, gfp_t mask); int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); @@ -155,11 +155,12 @@ u64 count_range_bits(struct extent_io_tree *tree, u64 max_bytes, unsigned long bits); int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, - int bits, int filled); + int bits, int filled, struct extent_state *cached_state); int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int bits, gfp_t mask); int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, - int bits, int wake, int delete, gfp_t mask); + int bits, int wake, int delete, struct extent_state **cached, + gfp_t mask); int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int bits, gfp_t mask); int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, @@ -282,5 +283,6 @@ int extent_clear_unlock_delalloc(struct inode *inode, int clear_unlock, int clear_delalloc, int clear_dirty, int set_writeback, - int end_writeback); + int end_writeback, + int set_private2); #endif diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 30c9365861e..2c726b7b9fa 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -36,7 +36,7 @@ void extent_map_exit(void) void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask) { tree->map.rb_node = NULL; - spin_lock_init(&tree->lock); + rwlock_init(&tree->lock); } /** @@ -198,6 +198,56 @@ static int mergable_maps(struct extent_map *prev, struct extent_map *next) return 0; } +int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len) +{ + int ret = 0; + struct extent_map *merge = NULL; + struct rb_node *rb; + struct extent_map *em; + + write_lock(&tree->lock); + em = lookup_extent_mapping(tree, start, len); + + WARN_ON(em->start != start || !em); + + if (!em) + goto out; + + clear_bit(EXTENT_FLAG_PINNED, &em->flags); + + if (em->start != 0) { + rb = rb_prev(&em->rb_node); + if (rb) + merge = rb_entry(rb, struct extent_map, rb_node); + if (rb && mergable_maps(merge, em)) { + em->start = merge->start; + em->len += merge->len; + em->block_len += merge->block_len; + em->block_start = merge->block_start; + merge->in_tree = 0; + rb_erase(&merge->rb_node, &tree->map); + free_extent_map(merge); + } + } + + rb = rb_next(&em->rb_node); + if (rb) + merge = rb_entry(rb, struct extent_map, rb_node); + if (rb && mergable_maps(em, merge)) { + em->len += merge->len; + em->block_len += merge->len; + rb_erase(&merge->rb_node, &tree->map); + merge->in_tree = 0; + free_extent_map(merge); + } + + free_extent_map(em); +out: + write_unlock(&tree->lock); + return ret; + +} + /** * add_extent_mapping - add new extent map to the extent tree * @tree: tree to insert new map in @@ -222,7 +272,6 @@ int add_extent_mapping(struct extent_map_tree *tree, ret = -EEXIST; goto out; } - assert_spin_locked(&tree->lock); rb = tree_insert(&tree->map, em->start, &em->rb_node); if (rb) { ret = -EEXIST; @@ -285,7 +334,6 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, struct rb_node *next = NULL; u64 end = range_end(start, len); - assert_spin_locked(&tree->lock); rb_node = __tree_search(&tree->map, start, &prev, &next); if (!rb_node && prev) { em = rb_entry(prev, struct extent_map, rb_node); @@ -319,6 +367,54 @@ out: } /** + * search_extent_mapping - find a nearby extent map + * @tree: tree to lookup in + * @start: byte offset to start the search + * @len: length of the lookup range + * + * Find and return the first extent_map struct in @tree that intersects the + * [start, len] range. + * + * If one can't be found, any nearby extent may be returned + */ +struct extent_map *search_extent_mapping(struct extent_map_tree *tree, + u64 start, u64 len) +{ + struct extent_map *em; + struct rb_node *rb_node; + struct rb_node *prev = NULL; + struct rb_node *next = NULL; + + rb_node = __tree_search(&tree->map, start, &prev, &next); + if (!rb_node && prev) { + em = rb_entry(prev, struct extent_map, rb_node); + goto found; + } + if (!rb_node && next) { + em = rb_entry(next, struct extent_map, rb_node); + goto found; + } + if (!rb_node) { + em = NULL; + goto out; + } + if (IS_ERR(rb_node)) { + em = ERR_PTR(PTR_ERR(rb_node)); + goto out; + } + em = rb_entry(rb_node, struct extent_map, rb_node); + goto found; + + em = NULL; + goto out; + +found: + atomic_inc(&em->refs); +out: + return em; +} + +/** * remove_extent_mapping - removes an extent_map from the extent tree * @tree: extent tree to remove from * @em: extent map beeing removed @@ -331,7 +427,6 @@ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) int ret = 0; WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags)); - assert_spin_locked(&tree->lock); rb_erase(&em->rb_node, &tree->map); em->in_tree = 0; return ret; diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index fb6eeef06bb..ab6d74b6e64 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -31,7 +31,7 @@ struct extent_map { struct extent_map_tree { struct rb_root map; - spinlock_t lock; + rwlock_t lock; }; static inline u64 extent_map_end(struct extent_map *em) @@ -59,4 +59,7 @@ struct extent_map *alloc_extent_map(gfp_t mask); void free_extent_map(struct extent_map *em); int __init extent_map_init(void); void extent_map_exit(void); +int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len); +struct extent_map *search_extent_mapping(struct extent_map_tree *tree, + u64 start, u64 len); #endif diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 4b833972273..571ad3c13b4 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -112,8 +112,6 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, int err = 0; int i; struct inode *inode = fdentry(file)->d_inode; - struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; - u64 hint_byte; u64 num_bytes; u64 start_pos; u64 end_of_last_block; @@ -125,22 +123,6 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, root->sectorsize - 1) & ~((u64)root->sectorsize - 1); end_of_last_block = start_pos + num_bytes - 1; - - lock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS); - trans = btrfs_join_transaction(root, 1); - if (!trans) { - err = -ENOMEM; - goto out_unlock; - } - btrfs_set_trans_block_group(trans, inode); - hint_byte = 0; - - set_extent_uptodate(io_tree, start_pos, end_of_last_block, GFP_NOFS); - - /* check for reserved extents on each page, we don't want - * to reset the delalloc bit on things that already have - * extents reserved. - */ btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block); for (i = 0; i < num_pages; i++) { struct page *p = pages[i]; @@ -155,9 +137,6 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, * at this time. */ } - err = btrfs_end_transaction(trans, root); -out_unlock: - unlock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS); return err; } @@ -189,18 +168,18 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, if (!split2) split2 = alloc_extent_map(GFP_NOFS); - spin_lock(&em_tree->lock); + write_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, start, len); if (!em) { - spin_unlock(&em_tree->lock); + write_unlock(&em_tree->lock); break; } flags = em->flags; if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) { - spin_unlock(&em_tree->lock); if (em->start <= start && (!testend || em->start + em->len >= start + len)) { free_extent_map(em); + write_unlock(&em_tree->lock); break; } if (start < em->start) { @@ -210,6 +189,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, start = em->start + em->len; } free_extent_map(em); + write_unlock(&em_tree->lock); continue; } compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags); @@ -260,7 +240,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, free_extent_map(split); split = NULL; } - spin_unlock(&em_tree->lock); + write_unlock(&em_tree->lock); /* once for us */ free_extent_map(em); @@ -289,7 +269,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, u64 start, u64 end, u64 locked_end, - u64 inline_limit, u64 *hint_byte) + u64 inline_limit, u64 *hint_byte, int drop_cache) { u64 extent_end = 0; u64 search_start = start; @@ -314,7 +294,8 @@ noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans, int ret; inline_limit = 0; - btrfs_drop_extent_cache(inode, start, end - 1, 0); + if (drop_cache) + btrfs_drop_extent_cache(inode, start, end - 1, 0); path = btrfs_alloc_path(); if (!path) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 5edcee3a617..5c2caad7621 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -259,7 +259,9 @@ static int link_free_space(struct btrfs_block_group_cache *block_group, static void recalculate_thresholds(struct btrfs_block_group_cache *block_group) { - u64 max_bytes, possible_bytes; + u64 max_bytes; + u64 bitmap_bytes; + u64 extent_bytes; /* * The goal is to keep the total amount of memory used per 1gb of space @@ -269,22 +271,27 @@ static void recalculate_thresholds(struct btrfs_block_group_cache *block_group) max_bytes = MAX_CACHE_BYTES_PER_GIG * (div64_u64(block_group->key.offset, 1024 * 1024 * 1024)); - possible_bytes = (block_group->total_bitmaps * PAGE_CACHE_SIZE) + - (sizeof(struct btrfs_free_space) * - block_group->extents_thresh); + /* + * we want to account for 1 more bitmap than what we have so we can make + * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as + * we add more bitmaps. + */ + bitmap_bytes = (block_group->total_bitmaps + 1) * PAGE_CACHE_SIZE; - if (possible_bytes > max_bytes) { - int extent_bytes = max_bytes - - (block_group->total_bitmaps * PAGE_CACHE_SIZE); + if (bitmap_bytes >= max_bytes) { + block_group->extents_thresh = 0; + return; + } - if (extent_bytes <= 0) { - block_group->extents_thresh = 0; - return; - } + /* + * we want the extent entry threshold to always be at most 1/2 the maxw + * bytes we can have, or whatever is less than that. + */ + extent_bytes = max_bytes - bitmap_bytes; + extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2)); - block_group->extents_thresh = extent_bytes / - (sizeof(struct btrfs_free_space)); - } + block_group->extents_thresh = + div64_u64(extent_bytes, (sizeof(struct btrfs_free_space))); } static void bitmap_clear_bits(struct btrfs_block_group_cache *block_group, @@ -403,6 +410,7 @@ static void add_new_bitmap(struct btrfs_block_group_cache *block_group, BUG_ON(block_group->total_bitmaps >= max_bitmaps); info->offset = offset_to_bitmap(block_group, offset); + info->bytes = 0; link_free_space(block_group, info); block_group->total_bitmaps++; diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index 6b627c61180..72ce3c173d6 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c @@ -149,6 +149,8 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, ptr = (unsigned long)(ref + 1); ret = 0; } else if (ret < 0) { + if (ret == -EOVERFLOW) + ret = -EMLINK; goto out; } else { ref = btrfs_item_ptr(path->nodes[0], path->slots[0], @@ -177,8 +179,6 @@ int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans, ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(struct btrfs_inode_item)); - if (ret == 0 && objectid > root->highest_inode) - root->highest_inode = objectid; return ret; } diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 9abbced1123..c56eb590917 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -43,9 +43,10 @@ int btrfs_find_highest_inode(struct btrfs_root *root, u64 *objectid) slot = path->slots[0] - 1; l = path->nodes[0]; btrfs_item_key_to_cpu(l, &found_key, slot); - *objectid = found_key.objectid; + *objectid = max_t(u64, found_key.objectid, + BTRFS_FIRST_FREE_OBJECTID - 1); } else { - *objectid = BTRFS_FIRST_FREE_OBJECTID; + *objectid = BTRFS_FIRST_FREE_OBJECTID - 1; } ret = 0; error: @@ -53,91 +54,27 @@ error: return ret; } -/* - * walks the btree of allocated inodes and find a hole. - */ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 dirid, u64 *objectid) { - struct btrfs_path *path; - struct btrfs_key key; int ret; - int slot = 0; - u64 last_ino = 0; - int start_found; - struct extent_buffer *l; - struct btrfs_key search_key; - u64 search_start = dirid; - mutex_lock(&root->objectid_mutex); - if (root->last_inode_alloc >= BTRFS_FIRST_FREE_OBJECTID && - root->last_inode_alloc < BTRFS_LAST_FREE_OBJECTID) { - *objectid = ++root->last_inode_alloc; - mutex_unlock(&root->objectid_mutex); - return 0; - } - path = btrfs_alloc_path(); - BUG_ON(!path); - search_start = max(search_start, (u64)BTRFS_FIRST_FREE_OBJECTID); - search_key.objectid = search_start; - search_key.type = 0; - search_key.offset = 0; - - start_found = 0; - ret = btrfs_search_slot(trans, root, &search_key, path, 0, 0); - if (ret < 0) - goto error; - while (1) { - l = path->nodes[0]; - slot = path->slots[0]; - if (slot >= btrfs_header_nritems(l)) { - ret = btrfs_next_leaf(root, path); - if (ret == 0) - continue; - if (ret < 0) - goto error; - if (!start_found) { - *objectid = search_start; - start_found = 1; - goto found; - } - *objectid = last_ino > search_start ? - last_ino : search_start; - goto found; - } - btrfs_item_key_to_cpu(l, &key, slot); - if (key.objectid >= search_start) { - if (start_found) { - if (last_ino < search_start) - last_ino = search_start; - if (key.objectid > last_ino) { - *objectid = last_ino; - goto found; - } - } else if (key.objectid > search_start) { - *objectid = search_start; - goto found; - } - } - if (key.objectid >= BTRFS_LAST_FREE_OBJECTID) - break; + if (unlikely(root->highest_objectid < BTRFS_FIRST_FREE_OBJECTID)) { + ret = btrfs_find_highest_inode(root, &root->highest_objectid); + if (ret) + goto out; + } - start_found = 1; - last_ino = key.objectid + 1; - path->slots[0]++; + if (unlikely(root->highest_objectid >= BTRFS_LAST_FREE_OBJECTID)) { + ret = -ENOSPC; + goto out; } - BUG_ON(1); -found: - btrfs_release_path(root, path); - btrfs_free_path(path); - BUG_ON(*objectid < search_start); - mutex_unlock(&root->objectid_mutex); - return 0; -error: - btrfs_release_path(root, path); - btrfs_free_path(path); + + *objectid = ++root->highest_objectid; + ret = 0; +out: mutex_unlock(&root->objectid_mutex); return ret; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 9096fd0ca3c..e9b76bcd1c1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -231,7 +231,8 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, } ret = btrfs_drop_extents(trans, root, inode, start, - aligned_end, aligned_end, start, &hint_byte); + aligned_end, aligned_end, start, + &hint_byte, 1); BUG_ON(ret); if (isize > actual_end) @@ -240,7 +241,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, inline_len, compressed_size, compressed_pages); BUG_ON(ret); - btrfs_drop_extent_cache(inode, start, aligned_end, 0); + btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); return 0; } @@ -425,7 +426,7 @@ again: extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, start, end, NULL, 1, 0, - 0, 1, 1, 1); + 0, 1, 1, 1, 0); ret = 0; goto free_pages_out; } @@ -611,9 +612,9 @@ static noinline int submit_compressed_extents(struct inode *inode, set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); while (1) { - spin_lock(&em_tree->lock); + write_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em); - spin_unlock(&em_tree->lock); + write_unlock(&em_tree->lock); if (ret != -EEXIST) { free_extent_map(em); break; @@ -640,7 +641,7 @@ static noinline int submit_compressed_extents(struct inode *inode, async_extent->start, async_extent->start + async_extent->ram_size - 1, - NULL, 1, 1, 0, 1, 1, 0); + NULL, 1, 1, 0, 1, 1, 0, 0); ret = btrfs_submit_compressed_write(inode, async_extent->start, @@ -713,7 +714,7 @@ static noinline int cow_file_range(struct inode *inode, extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, start, end, NULL, 1, 1, - 1, 1, 1, 1); + 1, 1, 1, 1, 0); *nr_written = *nr_written + (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE; *page_started = 1; @@ -725,6 +726,15 @@ static noinline int cow_file_range(struct inode *inode, BUG_ON(disk_num_bytes > btrfs_super_total_bytes(&root->fs_info->super_copy)); + + read_lock(&BTRFS_I(inode)->extent_tree.lock); + em = search_extent_mapping(&BTRFS_I(inode)->extent_tree, + start, num_bytes); + if (em) { + alloc_hint = em->block_start; + free_extent_map(em); + } + read_unlock(&BTRFS_I(inode)->extent_tree.lock); btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); while (disk_num_bytes > 0) { @@ -737,7 +747,6 @@ static noinline int cow_file_range(struct inode *inode, em = alloc_extent_map(GFP_NOFS); em->start = start; em->orig_start = em->start; - ram_size = ins.offset; em->len = ins.offset; @@ -747,9 +756,9 @@ static noinline int cow_file_range(struct inode *inode, set_bit(EXTENT_FLAG_PINNED, &em->flags); while (1) { - spin_lock(&em_tree->lock); + write_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em); - spin_unlock(&em_tree->lock); + write_unlock(&em_tree->lock); if (ret != -EEXIST) { free_extent_map(em); break; @@ -776,11 +785,14 @@ static noinline int cow_file_range(struct inode *inode, /* we're not doing compressed IO, don't unlock the first * page (which the caller expects to stay locked), don't * clear any dirty bits and don't set any writeback bits + * + * Do set the Private2 bit so we know this page was properly + * setup for writepage */ extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, start, start + ram_size - 1, locked_page, unlock, 1, - 1, 0, 0, 0); + 1, 0, 0, 0, 1); disk_num_bytes -= cur_alloc_size; num_bytes -= cur_alloc_size; alloc_hint = ins.objectid + ins.offset; @@ -853,7 +865,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, int limit = 10 * 1024 * 1042; clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED | - EXTENT_DELALLOC, 1, 0, GFP_NOFS); + EXTENT_DELALLOC, 1, 0, NULL, GFP_NOFS); while (start < end) { async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS); async_cow->inode = inode; @@ -1080,9 +1092,9 @@ out_check: em->bdev = root->fs_info->fs_devices->latest_bdev; set_bit(EXTENT_FLAG_PINNED, &em->flags); while (1) { - spin_lock(&em_tree->lock); + write_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em); - spin_unlock(&em_tree->lock); + write_unlock(&em_tree->lock); if (ret != -EEXIST) { free_extent_map(em); break; @@ -1101,7 +1113,7 @@ out_check: extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, cur_offset, cur_offset + num_bytes - 1, - locked_page, 1, 1, 1, 0, 0, 0); + locked_page, 1, 1, 1, 0, 0, 0, 1); cur_offset = extent_end; if (cur_offset > end) break; @@ -1374,10 +1386,8 @@ again: lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); /* already ordered? We're done */ - if (test_range_bit(&BTRFS_I(inode)->io_tree, page_start, page_end, - EXTENT_ORDERED, 0)) { + if (PagePrivate2(page)) goto out; - } ordered = btrfs_lookup_ordered_extent(inode, page_start); if (ordered) { @@ -1413,11 +1423,9 @@ static int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end) struct inode *inode = page->mapping->host; struct btrfs_writepage_fixup *fixup; struct btrfs_root *root = BTRFS_I(inode)->root; - int ret; - ret = test_range_bit(&BTRFS_I(inode)->io_tree, start, end, - EXTENT_ORDERED, 0); - if (ret) + /* this page is properly in the ordered list */ + if (TestClearPagePrivate2(page)) return 0; if (PageChecked(page)) @@ -1455,9 +1463,19 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, BUG_ON(!path); path->leave_spinning = 1; + + /* + * we may be replacing one extent in the tree with another. + * The new extent is pinned in the extent map, and we don't want + * to drop it from the cache until it is completely in the btree. + * + * So, tell btrfs_drop_extents to leave this extent in the cache. + * the caller is expected to unpin it and allow it to be merged + * with the others. + */ ret = btrfs_drop_extents(trans, root, inode, file_pos, file_pos + num_bytes, locked_end, - file_pos, &hint); + file_pos, &hint, 0); BUG_ON(ret); ins.objectid = inode->i_ino; @@ -1485,7 +1503,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(leaf); inode_add_bytes(inode, num_bytes); - btrfs_drop_extent_cache(inode, file_pos, file_pos + num_bytes - 1, 0); ins.objectid = disk_bytenr; ins.offset = disk_num_bytes; @@ -1596,6 +1613,9 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) ordered_extent->len, compressed, 0, 0, BTRFS_FILE_EXTENT_REG); + unpin_extent_cache(&BTRFS_I(inode)->extent_tree, + ordered_extent->file_offset, + ordered_extent->len); BUG_ON(ret); } unlock_extent(io_tree, ordered_extent->file_offset, @@ -1623,6 +1643,7 @@ nocow: static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, struct extent_state *state, int uptodate) { + ClearPagePrivate2(page); return btrfs_finish_ordered_io(page->mapping->host, start, end); } @@ -1669,13 +1690,13 @@ static int btrfs_io_failed_hook(struct bio *failed_bio, failrec->last_mirror = 0; failrec->bio_flags = 0; - spin_lock(&em_tree->lock); + read_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, start, failrec->len); if (em->start > start || em->start + em->len < start) { free_extent_map(em); em = NULL; } - spin_unlock(&em_tree->lock); + read_unlock(&em_tree->lock); if (!em || IS_ERR(em)) { kfree(failrec); @@ -1794,7 +1815,7 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, return 0; if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID && - test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1)) { + test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) { clear_extent_bits(io_tree, start, end, EXTENT_NODATASUM, GFP_NOFS); return 0; @@ -2352,6 +2373,69 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) return ret; } +int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *dir, u64 objectid, + const char *name, int name_len) +{ + struct btrfs_path *path; + struct extent_buffer *leaf; + struct btrfs_dir_item *di; + struct btrfs_key key; + u64 index; + int ret; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, + name, name_len, -1); + BUG_ON(!di || IS_ERR(di)); + + leaf = path->nodes[0]; + btrfs_dir_item_key_to_cpu(leaf, di, &key); + WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid); + ret = btrfs_delete_one_dir_name(trans, root, path, di); + BUG_ON(ret); + btrfs_release_path(root, path); + + ret = btrfs_del_root_ref(trans, root->fs_info->tree_root, + objectid, root->root_key.objectid, + dir->i_ino, &index, name, name_len); + if (ret < 0) { + BUG_ON(ret != -ENOENT); + di = btrfs_search_dir_index_item(root, path, dir->i_ino, + name, name_len); + BUG_ON(!di || IS_ERR(di)); + + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + btrfs_release_path(root, path); + index = key.offset; + } + + di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, + index, name, name_len, -1); + BUG_ON(!di || IS_ERR(di)); + + leaf = path->nodes[0]; + btrfs_dir_item_key_to_cpu(leaf, di, &key); + WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid); + ret = btrfs_delete_one_dir_name(trans, root, path, di); + BUG_ON(ret); + btrfs_release_path(root, path); + + btrfs_i_size_write(dir, dir->i_size - name_len * 2); + dir->i_mtime = dir->i_ctime = CURRENT_TIME; + ret = btrfs_update_inode(trans, root, dir); + BUG_ON(ret); + dir->i_sb->s_dirt = 1; + + btrfs_free_path(path); + return 0; +} + static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) { struct inode *inode = dentry->d_inode; @@ -2361,29 +2445,31 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) struct btrfs_trans_handle *trans; unsigned long nr = 0; - /* - * the FIRST_FREE_OBJECTID check makes sure we don't try to rmdir - * the root of a subvolume or snapshot - */ if (inode->i_size > BTRFS_EMPTY_DIR_SIZE || - inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) { + inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) return -ENOTEMPTY; - } trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, dir); + if (unlikely(inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { + err = btrfs_unlink_subvol(trans, root, dir, + BTRFS_I(inode)->location.objectid, + dentry->d_name.name, + dentry->d_name.len); + goto out; + } + err = btrfs_orphan_add(trans, inode); if (err) - goto fail_trans; + goto out; /* now the directory is empty */ err = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, dentry->d_name.name, dentry->d_name.len); if (!err) btrfs_i_size_write(inode, 0); - -fail_trans: +out: nr = trans->blocks_used; ret = btrfs_end_transaction_throttle(trans, root); btrfs_btree_balance_dirty(root, nr); @@ -2935,7 +3021,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) cur_offset, cur_offset + hole_size, block_end, - cur_offset, &hint_byte); + cur_offset, &hint_byte, 1); if (err) break; err = btrfs_insert_file_extent(trans, root, @@ -3003,6 +3089,11 @@ void btrfs_delete_inode(struct inode *inode) } btrfs_wait_ordered_range(inode, 0, (u64)-1); + if (inode->i_nlink > 0) { + BUG_ON(btrfs_root_refs(&root->root_item) != 0); + goto no_delete; + } + btrfs_i_size_write(inode, 0); trans = btrfs_join_transaction(root, 1); @@ -3070,29 +3161,67 @@ out_err: * is kind of like crossing a mount point. */ static int fixup_tree_root_location(struct btrfs_root *root, - struct btrfs_key *location, - struct btrfs_root **sub_root, - struct dentry *dentry) + struct inode *dir, + struct dentry *dentry, + struct btrfs_key *location, + struct btrfs_root **sub_root) { - struct btrfs_root_item *ri; + struct btrfs_path *path; + struct btrfs_root *new_root; + struct btrfs_root_ref *ref; + struct extent_buffer *leaf; + int ret; + int err = 0; - if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY) - return 0; - if (location->objectid == BTRFS_ROOT_TREE_OBJECTID) - return 0; + path = btrfs_alloc_path(); + if (!path) { + err = -ENOMEM; + goto out; + } - *sub_root = btrfs_read_fs_root(root->fs_info, location, - dentry->d_name.name, - dentry->d_name.len); - if (IS_ERR(*sub_root)) - return PTR_ERR(*sub_root); + err = -ENOENT; + ret = btrfs_find_root_ref(root->fs_info->tree_root, path, + BTRFS_I(dir)->root->root_key.objectid, + location->objectid); + if (ret) { + if (ret < 0) + err = ret; + goto out; + } - ri = &(*sub_root)->root_item; - location->objectid = btrfs_root_dirid(ri); - btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY); - location->offset = 0; + leaf = path->nodes[0]; + ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref); + if (btrfs_root_ref_dirid(leaf, ref) != dir->i_ino || + btrfs_root_ref_name_len(leaf, ref) != dentry->d_name.len) + goto out; - return 0; + ret = memcmp_extent_buffer(leaf, dentry->d_name.name, + (unsigned long)(ref + 1), + dentry->d_name.len); + if (ret) + goto out; + + btrfs_release_path(root->fs_info->tree_root, path); + + new_root = btrfs_read_fs_root_no_name(root->fs_info, location); + if (IS_ERR(new_root)) { + err = PTR_ERR(new_root); + goto out; + } + + if (btrfs_root_refs(&new_root->root_item) == 0) { + err = -ENOENT; + goto out; + } + + *sub_root = new_root; + location->objectid = btrfs_root_dirid(&new_root->root_item); + location->type = BTRFS_INODE_ITEM_KEY; + location->offset = 0; + err = 0; +out: + btrfs_free_path(path); + return err; } static void inode_tree_add(struct inode *inode) @@ -3101,11 +3230,13 @@ static void inode_tree_add(struct inode *inode) struct btrfs_inode *entry; struct rb_node **p; struct rb_node *parent; - again: p = &root->inode_tree.rb_node; parent = NULL; + if (hlist_unhashed(&inode->i_hash)) + return; + spin_lock(&root->inode_lock); while (*p) { parent = *p; @@ -3132,13 +3263,87 @@ again: static void inode_tree_del(struct inode *inode) { struct btrfs_root *root = BTRFS_I(inode)->root; + int empty = 0; spin_lock(&root->inode_lock); if (!RB_EMPTY_NODE(&BTRFS_I(inode)->rb_node)) { rb_erase(&BTRFS_I(inode)->rb_node, &root->inode_tree); RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); + empty = RB_EMPTY_ROOT(&root->inode_tree); } spin_unlock(&root->inode_lock); + + if (empty && btrfs_root_refs(&root->root_item) == 0) { + synchronize_srcu(&root->fs_info->subvol_srcu); + spin_lock(&root->inode_lock); + empty = RB_EMPTY_ROOT(&root->inode_tree); + spin_unlock(&root->inode_lock); + if (empty) + btrfs_add_dead_root(root); + } +} + +int btrfs_invalidate_inodes(struct btrfs_root *root) +{ + struct rb_node *node; + struct rb_node *prev; + struct btrfs_inode *entry; + struct inode *inode; + u64 objectid = 0; + + WARN_ON(btrfs_root_refs(&root->root_item) != 0); + + spin_lock(&root->inode_lock); +again: + node = root->inode_tree.rb_node; + prev = NULL; + while (node) { + prev = node; + entry = rb_entry(node, struct btrfs_inode, rb_node); + + if (objectid < entry->vfs_inode.i_ino) + node = node->rb_left; + else if (objectid > entry->vfs_inode.i_ino) + node = node->rb_right; + else + break; + } + if (!node) { + while (prev) { + entry = rb_entry(prev, struct btrfs_inode, rb_node); + if (objectid <= entry->vfs_inode.i_ino) { + node = prev; + break; + } + prev = rb_next(prev); + } + } + while (node) { + entry = rb_entry(node, struct btrfs_inode, rb_node); + objectid = entry->vfs_inode.i_ino + 1; + inode = igrab(&entry->vfs_inode); + if (inode) { + spin_unlock(&root->inode_lock); + if (atomic_read(&inode->i_count) > 1) + d_prune_aliases(inode); + /* + * btrfs_drop_inode will remove it from + * the inode cache when its usage count + * hits zero. + */ + iput(inode); + cond_resched(); + spin_lock(&root->inode_lock); + goto again; + } + + if (cond_resched_lock(&root->inode_lock)) + goto again; + + node = rb_next(node); + } + spin_unlock(&root->inode_lock); + return 0; } static noinline void init_btrfs_i(struct inode *inode) @@ -3225,15 +3430,41 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, return inode; } +static struct inode *new_simple_dir(struct super_block *s, + struct btrfs_key *key, + struct btrfs_root *root) +{ + struct inode *inode = new_inode(s); + + if (!inode) + return ERR_PTR(-ENOMEM); + + init_btrfs_i(inode); + + BTRFS_I(inode)->root = root; + memcpy(&BTRFS_I(inode)->location, key, sizeof(*key)); + BTRFS_I(inode)->dummy_inode = 1; + + inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID; + inode->i_op = &simple_dir_inode_operations; + inode->i_fop = &simple_dir_operations; + inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO; + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + + return inode; +} + struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) { struct inode *inode; - struct btrfs_inode *bi = BTRFS_I(dir); - struct btrfs_root *root = bi->root; + struct btrfs_root *root = BTRFS_I(dir)->root; struct btrfs_root *sub_root = root; struct btrfs_key location; + int index; int ret; + dentry->d_op = &btrfs_dentry_operations; + if (dentry->d_name.len > BTRFS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); @@ -3242,29 +3473,50 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) if (ret < 0) return ERR_PTR(ret); - inode = NULL; - if (location.objectid) { - ret = fixup_tree_root_location(root, &location, &sub_root, - dentry); - if (ret < 0) - return ERR_PTR(ret); - if (ret > 0) - return ERR_PTR(-ENOENT); + if (location.objectid == 0) + return NULL; + + if (location.type == BTRFS_INODE_ITEM_KEY) { + inode = btrfs_iget(dir->i_sb, &location, root); + return inode; + } + + BUG_ON(location.type != BTRFS_ROOT_ITEM_KEY); + + index = srcu_read_lock(&root->fs_info->subvol_srcu); + ret = fixup_tree_root_location(root, dir, dentry, + &location, &sub_root); + if (ret < 0) { + if (ret != -ENOENT) + inode = ERR_PTR(ret); + else + inode = new_simple_dir(dir->i_sb, &location, sub_root); + } else { inode = btrfs_iget(dir->i_sb, &location, sub_root); - if (IS_ERR(inode)) - return ERR_CAST(inode); } + srcu_read_unlock(&root->fs_info->subvol_srcu, index); + return inode; } +static int btrfs_dentry_delete(struct dentry *dentry) +{ + struct btrfs_root *root; + + if (!dentry->d_inode) + return 0; + + root = BTRFS_I(dentry->d_inode)->root; + if (btrfs_root_refs(&root->root_item) == 0) + return 1; + return 0; +} + static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { struct inode *inode; - if (dentry->d_name.len > BTRFS_NAME_LEN) - return ERR_PTR(-ENAMETOOLONG); - inode = btrfs_lookup_dentry(dir, dentry); if (IS_ERR(inode)) return ERR_CAST(inode); @@ -3603,9 +3855,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, if (ret != 0) goto fail; - if (objectid > root->highest_inode) - root->highest_inode = objectid; - inode->i_uid = current_fsuid(); if (dir && (dir->i_mode & S_ISGID)) { @@ -3673,26 +3922,35 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, struct inode *parent_inode, struct inode *inode, const char *name, int name_len, int add_backref, u64 index) { - int ret; + int ret = 0; struct btrfs_key key; struct btrfs_root *root = BTRFS_I(parent_inode)->root; - key.objectid = inode->i_ino; - btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); - key.offset = 0; + if (unlikely(inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { + memcpy(&key, &BTRFS_I(inode)->root->root_key, sizeof(key)); + } else { + key.objectid = inode->i_ino; + btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); + key.offset = 0; + } + + if (unlikely(inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { + ret = btrfs_add_root_ref(trans, root->fs_info->tree_root, + key.objectid, root->root_key.objectid, + parent_inode->i_ino, + index, name, name_len); + } else if (add_backref) { + ret = btrfs_insert_inode_ref(trans, root, + name, name_len, inode->i_ino, + parent_inode->i_ino, index); + } - ret = btrfs_insert_dir_item(trans, root, name, name_len, - parent_inode->i_ino, - &key, btrfs_inode_type(inode), - index); if (ret == 0) { - if (add_backref) { - ret = btrfs_insert_inode_ref(trans, root, - name, name_len, - inode->i_ino, - parent_inode->i_ino, - index); - } + ret = btrfs_insert_dir_item(trans, root, name, name_len, + parent_inode->i_ino, &key, + btrfs_inode_type(inode), index); + BUG_ON(ret); + btrfs_i_size_write(parent_inode, parent_inode->i_size + name_len * 2); parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; @@ -3875,18 +4133,16 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, err = btrfs_add_nondir(trans, dentry, inode, 1, index); - if (err) - drop_inode = 1; - - btrfs_update_inode_block_group(trans, dir); - err = btrfs_update_inode(trans, root, inode); - - if (err) + if (err) { drop_inode = 1; + } else { + btrfs_update_inode_block_group(trans, dir); + err = btrfs_update_inode(trans, root, inode); + BUG_ON(err); + btrfs_log_new_name(trans, inode, NULL, dentry->d_parent); + } nr = trans->blocks_used; - - btrfs_log_new_name(trans, inode, NULL, dentry->d_parent); btrfs_end_transaction_throttle(trans, root); fail: if (drop_inode) { @@ -4064,11 +4320,11 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, int compressed; again: - spin_lock(&em_tree->lock); + read_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, start, len); if (em) em->bdev = root->fs_info->fs_devices->latest_bdev; - spin_unlock(&em_tree->lock); + read_unlock(&em_tree->lock); if (em) { if (em->start > start || em->start + em->len <= start) @@ -4215,6 +4471,11 @@ again: map = kmap(page); read_extent_buffer(leaf, map + pg_offset, ptr, copy_size); + if (pg_offset + copy_size < PAGE_CACHE_SIZE) { + memset(map + pg_offset + copy_size, 0, + PAGE_CACHE_SIZE - pg_offset - + copy_size); + } kunmap(page); } flush_dcache_page(page); @@ -4259,7 +4520,7 @@ insert: } err = 0; - spin_lock(&em_tree->lock); + write_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em); /* it is possible that someone inserted the extent into the tree * while we had the lock dropped. It is also possible that @@ -4299,7 +4560,7 @@ insert: err = 0; } } - spin_unlock(&em_tree->lock); + write_unlock(&em_tree->lock); out: if (path) btrfs_free_path(path); @@ -4398,13 +4659,21 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) u64 page_start = page_offset(page); u64 page_end = page_start + PAGE_CACHE_SIZE - 1; + + /* + * we have the page locked, so new writeback can't start, + * and the dirty bit won't be cleared while we are here. + * + * Wait for IO on this page so that we can safely clear + * the PagePrivate2 bit and do ordered accounting + */ wait_on_page_writeback(page); + tree = &BTRFS_I(page->mapping->host)->io_tree; if (offset) { btrfs_releasepage(page, GFP_NOFS); return; } - lock_extent(tree, page_start, page_end, GFP_NOFS); ordered = btrfs_lookup_ordered_extent(page->mapping->host, page_offset(page)); @@ -4415,16 +4684,21 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) */ clear_extent_bit(tree, page_start, page_end, EXTENT_DIRTY | EXTENT_DELALLOC | - EXTENT_LOCKED, 1, 0, GFP_NOFS); - btrfs_finish_ordered_io(page->mapping->host, - page_start, page_end); + EXTENT_LOCKED, 1, 0, NULL, GFP_NOFS); + /* + * whoever cleared the private bit is responsible + * for the finish_ordered_io + */ + if (TestClearPagePrivate2(page)) { + btrfs_finish_ordered_io(page->mapping->host, + page_start, page_end); + } btrfs_put_ordered_extent(ordered); lock_extent(tree, page_start, page_end, GFP_NOFS); } clear_extent_bit(tree, page_start, page_end, - EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | - EXTENT_ORDERED, - 1, 1, GFP_NOFS); + EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC, + 1, 1, NULL, GFP_NOFS); __btrfs_releasepage(page, GFP_NOFS); ClearPageChecked(page); @@ -4521,11 +4795,14 @@ again: } ClearPageChecked(page); set_page_dirty(page); + SetPageUptodate(page); BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; unlock_extent(io_tree, page_start, page_end, GFP_NOFS); out_unlock: + if (!ret) + return VM_FAULT_LOCKED; unlock_page(page); out: return ret; @@ -4594,11 +4871,11 @@ out: * create a new subvolume directory/inode (helper for the ioctl). */ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, - struct btrfs_root *new_root, struct dentry *dentry, + struct btrfs_root *new_root, u64 new_dirid, u64 alloc_hint) { struct inode *inode; - int error; + int err; u64 index = 0; inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid, @@ -4611,11 +4888,10 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, inode->i_nlink = 1; btrfs_i_size_write(inode, 0); - error = btrfs_update_inode(trans, new_root, inode); - if (error) - return error; + err = btrfs_update_inode(trans, new_root, inode); + BUG_ON(err); - d_instantiate(dentry, inode); + iput(inode); return 0; } @@ -4693,6 +4969,16 @@ void btrfs_destroy_inode(struct inode *inode) kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); } +void btrfs_drop_inode(struct inode *inode) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + + if (inode->i_nlink > 0 && btrfs_root_refs(&root->root_item) == 0) + generic_delete_inode(inode); + else + generic_drop_inode(inode); +} + static void init_once(void *foo) { struct btrfs_inode *ei = (struct btrfs_inode *) foo; @@ -4761,31 +5047,32 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, { struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(old_dir)->root; + struct btrfs_root *dest = BTRFS_I(new_dir)->root; struct inode *new_inode = new_dentry->d_inode; struct inode *old_inode = old_dentry->d_inode; struct timespec ctime = CURRENT_TIME; u64 index = 0; + u64 root_objectid; int ret; - /* we're not allowed to rename between subvolumes */ - if (BTRFS_I(old_inode)->root->root_key.objectid != - BTRFS_I(new_dir)->root->root_key.objectid) + if (new_dir->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) + return -EPERM; + + /* we only allow rename subvolume link between subvolumes */ + if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest) return -EXDEV; - if (S_ISDIR(old_inode->i_mode) && new_inode && - new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) { + if (old_inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID || + (new_inode && new_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) return -ENOTEMPTY; - } - /* to rename a snapshot or subvolume, we need to juggle the - * backrefs. This isn't coded yet - */ - if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) - return -EXDEV; + if (S_ISDIR(old_inode->i_mode) && new_inode && + new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) + return -ENOTEMPTY; ret = btrfs_check_metadata_free_space(root); if (ret) - goto out_unlock; + return ret; /* * we're using rename to replace one file with another. @@ -4796,8 +5083,40 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, old_inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT) filemap_flush(old_inode->i_mapping); + /* close the racy window with snapshot create/destroy ioctl */ + if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) + down_read(&root->fs_info->subvol_sem); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, new_dir); + + if (dest != root) + btrfs_record_root_in_trans(trans, dest); + ret = btrfs_set_inode_index(new_dir, &index); + if (ret) + goto out_fail; + + if (unlikely(old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { + /* force full log commit if subvolume involved. */ + root->fs_info->last_trans_log_full_commit = trans->transid; + } else { + ret = btrfs_insert_inode_ref(trans, dest, + new_dentry->d_name.name, + new_dentry->d_name.len, + old_inode->i_ino, + new_dir->i_ino, index); + if (ret) + goto out_fail; + /* + * this is an ugly little race, but the rename is required + * to make sure that if we crash, the inode is either at the + * old name or the new one. pinning the log transaction lets + * us make sure we don't allow a log commit to come in after + * we unlink the name but before we add the new name back in. + */ + btrfs_pin_log_trans(root); + } /* * make sure the inode gets flushed if it is replacing * something. @@ -4807,18 +5126,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, btrfs_add_ordered_operation(trans, root, old_inode); } - /* - * this is an ugly little race, but the rename is required to make - * sure that if we crash, the inode is either at the old name - * or the new one. pinning the log transaction lets us make sure - * we don't allow a log commit to come in after we unlink the - * name but before we add the new name back in. - */ - btrfs_pin_log_trans(root); - - btrfs_set_trans_block_group(trans, new_dir); - - btrfs_inc_nlink(old_dentry->d_inode); old_dir->i_ctime = old_dir->i_mtime = ctime; new_dir->i_ctime = new_dir->i_mtime = ctime; old_inode->i_ctime = ctime; @@ -4826,47 +5133,58 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (old_dentry->d_parent != new_dentry->d_parent) btrfs_record_unlink_dir(trans, old_dir, old_inode, 1); - ret = btrfs_unlink_inode(trans, root, old_dir, old_dentry->d_inode, - old_dentry->d_name.name, - old_dentry->d_name.len); - if (ret) - goto out_fail; + if (unlikely(old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { + root_objectid = BTRFS_I(old_inode)->root->root_key.objectid; + ret = btrfs_unlink_subvol(trans, root, old_dir, root_objectid, + old_dentry->d_name.name, + old_dentry->d_name.len); + } else { + btrfs_inc_nlink(old_dentry->d_inode); + ret = btrfs_unlink_inode(trans, root, old_dir, + old_dentry->d_inode, + old_dentry->d_name.name, + old_dentry->d_name.len); + } + BUG_ON(ret); if (new_inode) { new_inode->i_ctime = CURRENT_TIME; - ret = btrfs_unlink_inode(trans, root, new_dir, - new_dentry->d_inode, - new_dentry->d_name.name, - new_dentry->d_name.len); - if (ret) - goto out_fail; + if (unlikely(new_inode->i_ino == + BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { + root_objectid = BTRFS_I(new_inode)->location.objectid; + ret = btrfs_unlink_subvol(trans, dest, new_dir, + root_objectid, + new_dentry->d_name.name, + new_dentry->d_name.len); + BUG_ON(new_inode->i_nlink == 0); + } else { + ret = btrfs_unlink_inode(trans, dest, new_dir, + new_dentry->d_inode, + new_dentry->d_name.name, + new_dentry->d_name.len); + } + BUG_ON(ret); if (new_inode->i_nlink == 0) { ret = btrfs_orphan_add(trans, new_dentry->d_inode); - if (ret) - goto out_fail; + BUG_ON(ret); } - } - ret = btrfs_set_inode_index(new_dir, &index); - if (ret) - goto out_fail; - ret = btrfs_add_link(trans, new_dentry->d_parent->d_inode, - old_inode, new_dentry->d_name.name, - new_dentry->d_name.len, 1, index); - if (ret) - goto out_fail; + ret = btrfs_add_link(trans, new_dir, old_inode, + new_dentry->d_name.name, + new_dentry->d_name.len, 0, index); + BUG_ON(ret); - btrfs_log_new_name(trans, old_inode, old_dir, - new_dentry->d_parent); + if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) { + btrfs_log_new_name(trans, old_inode, old_dir, + new_dentry->d_parent); + btrfs_end_log_trans(root); + } out_fail: - - /* this btrfs_end_log_trans just allows the current - * log-sub transaction to complete - */ - btrfs_end_log_trans(root); btrfs_end_transaction_throttle(trans, root); -out_unlock: + + if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) + up_read(&root->fs_info->subvol_sem); return ret; } @@ -5058,6 +5376,8 @@ static int prealloc_file_range(struct btrfs_trans_handle *trans, 0, 0, 0, BTRFS_FILE_EXTENT_PREALLOC); BUG_ON(ret); + btrfs_drop_extent_cache(inode, cur_offset, + cur_offset + ins.offset -1, 0); num_bytes -= ins.offset; cur_offset += ins.offset; alloc_hint = ins.objectid + ins.offset; @@ -5223,6 +5543,7 @@ static const struct inode_operations btrfs_dir_ro_inode_operations = { .lookup = btrfs_lookup, .permission = btrfs_permission, }; + static struct file_operations btrfs_dir_file_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, @@ -5269,6 +5590,7 @@ static const struct address_space_operations btrfs_aops = { .invalidatepage = btrfs_invalidatepage, .releasepage = btrfs_releasepage, .set_page_dirty = btrfs_set_page_dirty, + .error_remove_page = generic_error_remove_page, }; static const struct address_space_operations btrfs_symlink_aops = { @@ -5309,3 +5631,7 @@ static const struct inode_operations btrfs_symlink_inode_operations = { .listxattr = btrfs_listxattr, .removexattr = btrfs_removexattr, }; + +struct dentry_operations btrfs_dentry_operations = { + .d_delete = btrfs_dentry_delete, +}; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index bd88f25889f..a8577a7f26a 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -230,8 +230,8 @@ static noinline int create_subvol(struct btrfs_root *root, struct btrfs_root_item root_item; struct btrfs_inode_item *inode_item; struct extent_buffer *leaf; - struct btrfs_root *new_root = root; - struct inode *dir; + struct btrfs_root *new_root; + struct inode *dir = dentry->d_parent->d_inode; int ret; int err; u64 objectid; @@ -241,7 +241,7 @@ static noinline int create_subvol(struct btrfs_root *root, ret = btrfs_check_metadata_free_space(root); if (ret) - goto fail_commit; + return ret; trans = btrfs_start_transaction(root, 1); BUG_ON(!trans); @@ -304,11 +304,17 @@ static noinline int create_subvol(struct btrfs_root *root, if (ret) goto fail; + key.offset = (u64)-1; + new_root = btrfs_read_fs_root_no_name(root->fs_info, &key); + BUG_ON(IS_ERR(new_root)); + + btrfs_record_root_in_trans(trans, new_root); + + ret = btrfs_create_subvol_root(trans, new_root, new_dirid, + BTRFS_I(dir)->block_group); /* * insert the directory item */ - key.offset = (u64)-1; - dir = dentry->d_parent->d_inode; ret = btrfs_set_inode_index(dir, &index); BUG_ON(ret); @@ -322,44 +328,18 @@ static noinline int create_subvol(struct btrfs_root *root, ret = btrfs_update_inode(trans, root, dir); BUG_ON(ret); - /* add the backref first */ ret = btrfs_add_root_ref(trans, root->fs_info->tree_root, - objectid, BTRFS_ROOT_BACKREF_KEY, - root->root_key.objectid, + objectid, root->root_key.objectid, dir->i_ino, index, name, namelen); BUG_ON(ret); - /* now add the forward ref */ - ret = btrfs_add_root_ref(trans, root->fs_info->tree_root, - root->root_key.objectid, BTRFS_ROOT_REF_KEY, - objectid, - dir->i_ino, index, name, namelen); - - BUG_ON(ret); - - ret = btrfs_commit_transaction(trans, root); - if (ret) - goto fail_commit; - - new_root = btrfs_read_fs_root_no_name(root->fs_info, &key); - BUG_ON(!new_root); - - trans = btrfs_start_transaction(new_root, 1); - BUG_ON(!trans); - - ret = btrfs_create_subvol_root(trans, new_root, dentry, new_dirid, - BTRFS_I(dir)->block_group); - if (ret) - goto fail; - + d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry)); fail: nr = trans->blocks_used; - err = btrfs_commit_transaction(trans, new_root); + err = btrfs_commit_transaction(trans, root); if (err && !ret) ret = err; -fail_commit: - btrfs_btree_balance_dirty(root, nr); return ret; } @@ -420,14 +400,15 @@ static inline int btrfs_may_create(struct inode *dir, struct dentry *child) * sys_mkdirat and vfs_mkdir, but we only do a single component lookup * inside this filesystem so it's quite a bit simpler. */ -static noinline int btrfs_mksubvol(struct path *parent, char *name, - int mode, int namelen, +static noinline int btrfs_mksubvol(struct path *parent, + char *name, int namelen, struct btrfs_root *snap_src) { + struct inode *dir = parent->dentry->d_inode; struct dentry *dentry; int error; - mutex_lock_nested(&parent->dentry->d_inode->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); dentry = lookup_one_len(name, parent->dentry, namelen); error = PTR_ERR(dentry); @@ -438,99 +419,39 @@ static noinline int btrfs_mksubvol(struct path *parent, char *name, if (dentry->d_inode) goto out_dput; - if (!IS_POSIXACL(parent->dentry->d_inode)) - mode &= ~current_umask(); - error = mnt_want_write(parent->mnt); if (error) goto out_dput; - error = btrfs_may_create(parent->dentry->d_inode, dentry); + error = btrfs_may_create(dir, dentry); if (error) goto out_drop_write; - /* - * Actually perform the low-level subvolume creation after all - * this VFS fuzz. - * - * Eventually we want to pass in an inode under which we create this - * subvolume, but for now all are under the filesystem root. - * - * Also we should pass on the mode eventually to allow creating new - * subvolume with specific mode bits. - */ + down_read(&BTRFS_I(dir)->root->fs_info->subvol_sem); + + if (btrfs_root_refs(&BTRFS_I(dir)->root->root_item) == 0) + goto out_up_read; + if (snap_src) { - struct dentry *dir = dentry->d_parent; - struct dentry *test = dir->d_parent; - struct btrfs_path *path = btrfs_alloc_path(); - int ret; - u64 test_oid; - u64 parent_oid = BTRFS_I(dir->d_inode)->root->root_key.objectid; - - test_oid = snap_src->root_key.objectid; - - ret = btrfs_find_root_ref(snap_src->fs_info->tree_root, - path, parent_oid, test_oid); - if (ret == 0) - goto create; - btrfs_release_path(snap_src->fs_info->tree_root, path); - - /* we need to make sure we aren't creating a directory loop - * by taking a snapshot of something that has our current - * subvol in its directory tree. So, this loops through - * the dentries and checks the forward refs for each subvolume - * to see if is references the subvolume where we are - * placing this new snapshot. - */ - while (1) { - if (!test || - dir == snap_src->fs_info->sb->s_root || - test == snap_src->fs_info->sb->s_root || - test->d_inode->i_sb != snap_src->fs_info->sb) { - break; - } - if (S_ISLNK(test->d_inode->i_mode)) { - printk(KERN_INFO "Btrfs symlink in snapshot " - "path, failed\n"); - error = -EMLINK; - btrfs_free_path(path); - goto out_drop_write; - } - test_oid = - BTRFS_I(test->d_inode)->root->root_key.objectid; - ret = btrfs_find_root_ref(snap_src->fs_info->tree_root, - path, test_oid, parent_oid); - if (ret == 0) { - printk(KERN_INFO "Btrfs snapshot creation " - "failed, looping\n"); - error = -EMLINK; - btrfs_free_path(path); - goto out_drop_write; - } - btrfs_release_path(snap_src->fs_info->tree_root, path); - test = test->d_parent; - } -create: - btrfs_free_path(path); - error = create_snapshot(snap_src, dentry, name, namelen); + error = create_snapshot(snap_src, dentry, + name, namelen); } else { - error = create_subvol(BTRFS_I(parent->dentry->d_inode)->root, - dentry, name, namelen); + error = create_subvol(BTRFS_I(dir)->root, dentry, + name, namelen); } - if (error) - goto out_drop_write; - - fsnotify_mkdir(parent->dentry->d_inode, dentry); + if (!error) + fsnotify_mkdir(dir, dentry); +out_up_read: + up_read(&BTRFS_I(dir)->root->fs_info->subvol_sem); out_drop_write: mnt_drop_write(parent->mnt); out_dput: dput(dentry); out_unlock: - mutex_unlock(&parent->dentry->d_inode->i_mutex); + mutex_unlock(&dir->i_mutex); return error; } - static int btrfs_defrag_file(struct file *file) { struct inode *inode = fdentry(file)->d_inode; @@ -596,9 +517,8 @@ again: clear_page_dirty_for_io(page); btrfs_set_extent_delalloc(inode, page_start, page_end); - - unlock_extent(io_tree, page_start, page_end, GFP_NOFS); set_page_dirty(page); + unlock_extent(io_tree, page_start, page_end, GFP_NOFS); unlock_page(page); page_cache_release(page); balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1); @@ -609,7 +529,8 @@ out_unlock: return 0; } -static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg) +static noinline int btrfs_ioctl_resize(struct btrfs_root *root, + void __user *arg) { u64 new_size; u64 old_size; @@ -718,10 +639,7 @@ static noinline int btrfs_ioctl_snap_create(struct file *file, { struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; struct btrfs_ioctl_vol_args *vol_args; - struct btrfs_dir_item *di; - struct btrfs_path *path; struct file *src_file; - u64 root_dirid; int namelen; int ret = 0; @@ -739,32 +657,9 @@ static noinline int btrfs_ioctl_snap_create(struct file *file, goto out; } - path = btrfs_alloc_path(); - if (!path) { - ret = -ENOMEM; - goto out; - } - - root_dirid = root->fs_info->sb->s_root->d_inode->i_ino, - di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root, - path, root_dirid, - vol_args->name, namelen, 0); - btrfs_free_path(path); - - if (di && !IS_ERR(di)) { - ret = -EEXIST; - goto out; - } - - if (IS_ERR(di)) { - ret = PTR_ERR(di); - goto out; - } - if (subvol) { - ret = btrfs_mksubvol(&file->f_path, vol_args->name, - file->f_path.dentry->d_inode->i_mode, - namelen, NULL); + ret = btrfs_mksubvol(&file->f_path, vol_args->name, namelen, + NULL); } else { struct inode *src_inode; src_file = fget(vol_args->fd); @@ -781,17 +676,156 @@ static noinline int btrfs_ioctl_snap_create(struct file *file, fput(src_file); goto out; } - ret = btrfs_mksubvol(&file->f_path, vol_args->name, - file->f_path.dentry->d_inode->i_mode, - namelen, BTRFS_I(src_inode)->root); + ret = btrfs_mksubvol(&file->f_path, vol_args->name, namelen, + BTRFS_I(src_inode)->root); fput(src_file); } - out: kfree(vol_args); return ret; } +/* + * helper to check if the subvolume references other subvolumes + */ +static noinline int may_destroy_subvol(struct btrfs_root *root) +{ + struct btrfs_path *path; + struct btrfs_key key; + int ret; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + key.objectid = root->root_key.objectid; + key.type = BTRFS_ROOT_REF_KEY; + key.offset = (u64)-1; + + ret = btrfs_search_slot(NULL, root->fs_info->tree_root, + &key, path, 0, 0); + if (ret < 0) + goto out; + BUG_ON(ret == 0); + + ret = 0; + if (path->slots[0] > 0) { + path->slots[0]--; + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + if (key.objectid == root->root_key.objectid && + key.type == BTRFS_ROOT_REF_KEY) + ret = -ENOTEMPTY; + } +out: + btrfs_free_path(path); + return ret; +} + +static noinline int btrfs_ioctl_snap_destroy(struct file *file, + void __user *arg) +{ + struct dentry *parent = fdentry(file); + struct dentry *dentry; + struct inode *dir = parent->d_inode; + struct inode *inode; + struct btrfs_root *root = BTRFS_I(dir)->root; + struct btrfs_root *dest = NULL; + struct btrfs_ioctl_vol_args *vol_args; + struct btrfs_trans_handle *trans; + int namelen; + int ret; + int err = 0; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + vol_args = memdup_user(arg, sizeof(*vol_args)); + if (IS_ERR(vol_args)) + return PTR_ERR(vol_args); + + vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; + namelen = strlen(vol_args->name); + if (strchr(vol_args->name, '/') || + strncmp(vol_args->name, "..", namelen) == 0) { + err = -EINVAL; + goto out; + } + + err = mnt_want_write(file->f_path.mnt); + if (err) + goto out; + + mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); + dentry = lookup_one_len(vol_args->name, parent, namelen); + if (IS_ERR(dentry)) { + err = PTR_ERR(dentry); + goto out_unlock_dir; + } + + if (!dentry->d_inode) { + err = -ENOENT; + goto out_dput; + } + + inode = dentry->d_inode; + if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) { + err = -EINVAL; + goto out_dput; + } + + dest = BTRFS_I(inode)->root; + + mutex_lock(&inode->i_mutex); + err = d_invalidate(dentry); + if (err) + goto out_unlock; + + down_write(&root->fs_info->subvol_sem); + + err = may_destroy_subvol(dest); + if (err) + goto out_up_write; + + trans = btrfs_start_transaction(root, 1); + ret = btrfs_unlink_subvol(trans, root, dir, + dest->root_key.objectid, + dentry->d_name.name, + dentry->d_name.len); + BUG_ON(ret); + + btrfs_record_root_in_trans(trans, dest); + + memset(&dest->root_item.drop_progress, 0, + sizeof(dest->root_item.drop_progress)); + dest->root_item.drop_level = 0; + btrfs_set_root_refs(&dest->root_item, 0); + + ret = btrfs_insert_orphan_item(trans, + root->fs_info->tree_root, + dest->root_key.objectid); + BUG_ON(ret); + + ret = btrfs_commit_transaction(trans, root); + BUG_ON(ret); + inode->i_flags |= S_DEAD; +out_up_write: + up_write(&root->fs_info->subvol_sem); +out_unlock: + mutex_unlock(&inode->i_mutex); + if (!err) { + btrfs_invalidate_inodes(dest); + d_delete(dentry); + } +out_dput: + dput(dentry); +out_unlock_dir: + mutex_unlock(&dir->i_mutex); + mnt_drop_write(file->f_path.mnt); +out: + kfree(vol_args); + return err; +} + static int btrfs_ioctl_defrag(struct file *file) { struct inode *inode = fdentry(file)->d_inode; @@ -865,8 +899,8 @@ static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg) return ret; } -static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, - u64 off, u64 olen, u64 destoff) +static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, + u64 off, u64 olen, u64 destoff) { struct inode *inode = fdentry(file)->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; @@ -976,7 +1010,7 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, /* punch hole in destination first */ btrfs_drop_extents(trans, root, inode, off, off + len, - off + len, 0, &hint_byte); + off + len, 0, &hint_byte, 1); /* clone data */ key.objectid = src->i_ino; @@ -1071,8 +1105,7 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, datao += off - key.offset; datal -= off - key.offset; } - if (key.offset + datao + datal + key.offset > - off + len) + if (key.offset + datao + datal > off + len) datal = off + len - key.offset - datao; /* disko == 0 means it's a hole */ if (!disko) @@ -1258,6 +1291,8 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_snap_create(file, argp, 0); case BTRFS_IOC_SUBVOL_CREATE: return btrfs_ioctl_snap_create(file, argp, 1); + case BTRFS_IOC_SNAP_DESTROY: + return btrfs_ioctl_snap_destroy(file, argp); case BTRFS_IOC_DEFRAG: return btrfs_ioctl_defrag(file); case BTRFS_IOC_RESIZE: diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index b320b103fa1..bc49914475e 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -65,5 +65,6 @@ struct btrfs_ioctl_clone_range_args { #define BTRFS_IOC_SUBVOL_CREATE _IOW(BTRFS_IOCTL_MAGIC, 14, \ struct btrfs_ioctl_vol_args) - +#define BTRFS_IOC_SNAP_DESTROY _IOW(BTRFS_IOCTL_MAGIC, 15, \ + struct btrfs_ioctl_vol_args) #endif diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 7b2f401e604..b5d6d24726b 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -159,8 +159,6 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree, * * len is the length of the extent * - * This also sets the EXTENT_ORDERED bit on the range in the inode. - * * The tree is given a single reference on the ordered extent that was * inserted. */ @@ -181,6 +179,7 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, entry->start = start; entry->len = len; entry->disk_len = disk_len; + entry->bytes_left = len; entry->inode = inode; if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) set_bit(type, &entry->flags); @@ -195,9 +194,6 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, &entry->rb_node); BUG_ON(node); - set_extent_ordered(&BTRFS_I(inode)->io_tree, file_offset, - entry_end(entry) - 1, GFP_NOFS); - spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); list_add_tail(&entry->root_extent_list, &BTRFS_I(inode)->root->fs_info->ordered_extents); @@ -241,13 +237,10 @@ int btrfs_dec_test_ordered_pending(struct inode *inode, struct btrfs_ordered_inode_tree *tree; struct rb_node *node; struct btrfs_ordered_extent *entry; - struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; int ret; tree = &BTRFS_I(inode)->ordered_tree; mutex_lock(&tree->mutex); - clear_extent_ordered(io_tree, file_offset, file_offset + io_size - 1, - GFP_NOFS); node = tree_search(tree, file_offset); if (!node) { ret = 1; @@ -260,11 +253,16 @@ int btrfs_dec_test_ordered_pending(struct inode *inode, goto out; } - ret = test_range_bit(io_tree, entry->file_offset, - entry->file_offset + entry->len - 1, - EXTENT_ORDERED, 0); - if (ret == 0) + if (io_size > entry->bytes_left) { + printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n", + (unsigned long long)entry->bytes_left, + (unsigned long long)io_size); + } + entry->bytes_left -= io_size; + if (entry->bytes_left == 0) ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); + else + ret = 1; out: mutex_unlock(&tree->mutex); return ret == 0; @@ -476,6 +474,7 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) u64 orig_end; u64 wait_end; struct btrfs_ordered_extent *ordered; + int found; if (start + len < start) { orig_end = INT_LIMIT(loff_t); @@ -502,6 +501,7 @@ again: orig_end >> PAGE_CACHE_SHIFT); end = orig_end; + found = 0; while (1) { ordered = btrfs_lookup_first_ordered_extent(inode, end); if (!ordered) @@ -514,6 +514,7 @@ again: btrfs_put_ordered_extent(ordered); break; } + found++; btrfs_start_ordered_extent(inode, ordered, 1); end = ordered->file_offset; btrfs_put_ordered_extent(ordered); @@ -521,8 +522,8 @@ again: break; end--; } - if (test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end, - EXTENT_ORDERED | EXTENT_DELALLOC, 0)) { + if (found || test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end, + EXTENT_DELALLOC, 0, NULL)) { schedule_timeout(1); goto again; } @@ -613,7 +614,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, */ if (test_range_bit(io_tree, disk_i_size, ordered->file_offset + ordered->len - 1, - EXTENT_DELALLOC, 0)) { + EXTENT_DELALLOC, 0, NULL)) { goto out; } /* @@ -664,7 +665,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, */ if (i_size_test > entry_end(ordered) && !test_range_bit(io_tree, entry_end(ordered), i_size_test - 1, - EXTENT_DELALLOC, 0)) { + EXTENT_DELALLOC, 0, NULL)) { new_i_size = min_t(u64, i_size_test, i_size_read(inode)); } BTRFS_I(inode)->disk_i_size = new_i_size; diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 3d31c8827b0..993a7ea45c7 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -85,6 +85,9 @@ struct btrfs_ordered_extent { /* extent length on disk */ u64 disk_len; + /* number of bytes that still need writing */ + u64 bytes_left; + /* flags (described above) */ unsigned long flags; diff --git a/fs/btrfs/orphan.c b/fs/btrfs/orphan.c index 3c0d52af4f8..79cba5fbc28 100644 --- a/fs/btrfs/orphan.c +++ b/fs/btrfs/orphan.c @@ -65,3 +65,23 @@ out: btrfs_free_path(path); return ret; } + +int btrfs_find_orphan_item(struct btrfs_root *root, u64 offset) +{ + struct btrfs_path *path; + struct btrfs_key key; + int ret; + + key.objectid = BTRFS_ORPHAN_OBJECTID; + key.type = BTRFS_ORPHAN_ITEM_KEY; + key.offset = offset; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + + btrfs_free_path(path); + return ret; +} diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index c04f7f21260..361ad323faa 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -121,6 +121,15 @@ struct inodevec { int nr; }; +#define MAX_EXTENTS 128 + +struct file_extent_cluster { + u64 start; + u64 end; + u64 boundary[MAX_EXTENTS]; + unsigned int nr; +}; + struct reloc_control { /* block group to relocate */ struct btrfs_block_group_cache *block_group; @@ -2180,7 +2189,7 @@ static int tree_block_processed(u64 bytenr, u32 blocksize, struct reloc_control *rc) { if (test_range_bit(&rc->processed_blocks, bytenr, - bytenr + blocksize - 1, EXTENT_DIRTY, 1)) + bytenr + blocksize - 1, EXTENT_DIRTY, 1, NULL)) return 1; return 0; } @@ -2529,56 +2538,94 @@ out: } static noinline_for_stack -int relocate_inode_pages(struct inode *inode, u64 start, u64 len) +int setup_extent_mapping(struct inode *inode, u64 start, u64 end, + u64 block_start) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_map *em; + int ret = 0; + + em = alloc_extent_map(GFP_NOFS); + if (!em) + return -ENOMEM; + + em->start = start; + em->len = end + 1 - start; + em->block_len = em->len; + em->block_start = block_start; + em->bdev = root->fs_info->fs_devices->latest_bdev; + set_bit(EXTENT_FLAG_PINNED, &em->flags); + + lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); + while (1) { + write_lock(&em_tree->lock); + ret = add_extent_mapping(em_tree, em); + write_unlock(&em_tree->lock); + if (ret != -EEXIST) { + free_extent_map(em); + break; + } + btrfs_drop_extent_cache(inode, start, end, 0); + } + unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); + return ret; +} + +static int relocate_file_extent_cluster(struct inode *inode, + struct file_extent_cluster *cluster) { u64 page_start; u64 page_end; - unsigned long i; - unsigned long first_index; + u64 offset = BTRFS_I(inode)->index_cnt; + unsigned long index; unsigned long last_index; - unsigned int total_read = 0; - unsigned int total_dirty = 0; + unsigned int dirty_page = 0; struct page *page; struct file_ra_state *ra; - struct btrfs_ordered_extent *ordered; - struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + int nr = 0; int ret = 0; + if (!cluster->nr) + return 0; + ra = kzalloc(sizeof(*ra), GFP_NOFS); if (!ra) return -ENOMEM; + index = (cluster->start - offset) >> PAGE_CACHE_SHIFT; + last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT; + mutex_lock(&inode->i_mutex); - first_index = start >> PAGE_CACHE_SHIFT; - last_index = (start + len - 1) >> PAGE_CACHE_SHIFT; - /* make sure the dirty trick played by the caller work */ - while (1) { - ret = invalidate_inode_pages2_range(inode->i_mapping, - first_index, last_index); - if (ret != -EBUSY) - break; - schedule_timeout(HZ/10); - } + i_size_write(inode, cluster->end + 1 - offset); + ret = setup_extent_mapping(inode, cluster->start - offset, + cluster->end - offset, cluster->start); if (ret) goto out_unlock; file_ra_state_init(ra, inode->i_mapping); - for (i = first_index ; i <= last_index; i++) { - if (total_read % ra->ra_pages == 0) { - btrfs_force_ra(inode->i_mapping, ra, NULL, i, - min(last_index, ra->ra_pages + i - 1)); - } - total_read++; -again: - if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode)) - BUG_ON(1); - page = grab_cache_page(inode->i_mapping, i); + WARN_ON(cluster->start != cluster->boundary[0]); + while (index <= last_index) { + page = find_lock_page(inode->i_mapping, index); if (!page) { - ret = -ENOMEM; - goto out_unlock; + page_cache_sync_readahead(inode->i_mapping, + ra, NULL, index, + last_index + 1 - index); + page = grab_cache_page(inode->i_mapping, index); + if (!page) { + ret = -ENOMEM; + goto out_unlock; + } + } + + if (PageReadahead(page)) { + page_cache_async_readahead(inode->i_mapping, + ra, NULL, page, index, + last_index + 1 - index); } + if (!PageUptodate(page)) { btrfs_readpage(NULL, page); lock_page(page); @@ -2589,75 +2636,79 @@ again: goto out_unlock; } } - wait_on_page_writeback(page); page_start = (u64)page->index << PAGE_CACHE_SHIFT; page_end = page_start + PAGE_CACHE_SIZE - 1; - lock_extent(io_tree, page_start, page_end, GFP_NOFS); - - ordered = btrfs_lookup_ordered_extent(inode, page_start); - if (ordered) { - unlock_extent(io_tree, page_start, page_end, GFP_NOFS); - unlock_page(page); - page_cache_release(page); - btrfs_start_ordered_extent(inode, ordered, 1); - btrfs_put_ordered_extent(ordered); - goto again; - } + + lock_extent(&BTRFS_I(inode)->io_tree, + page_start, page_end, GFP_NOFS); + set_page_extent_mapped(page); - if (i == first_index) - set_extent_bits(io_tree, page_start, page_end, + if (nr < cluster->nr && + page_start + offset == cluster->boundary[nr]) { + set_extent_bits(&BTRFS_I(inode)->io_tree, + page_start, page_end, EXTENT_BOUNDARY, GFP_NOFS); + nr++; + } btrfs_set_extent_delalloc(inode, page_start, page_end); set_page_dirty(page); - total_dirty++; + dirty_page++; - unlock_extent(io_tree, page_start, page_end, GFP_NOFS); + unlock_extent(&BTRFS_I(inode)->io_tree, + page_start, page_end, GFP_NOFS); unlock_page(page); page_cache_release(page); + + index++; + if (nr < cluster->nr && + page_end + 1 + offset == cluster->boundary[nr]) { + balance_dirty_pages_ratelimited_nr(inode->i_mapping, + dirty_page); + dirty_page = 0; + } + } + if (dirty_page) { + balance_dirty_pages_ratelimited_nr(inode->i_mapping, + dirty_page); } + WARN_ON(nr != cluster->nr); out_unlock: mutex_unlock(&inode->i_mutex); kfree(ra); - balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty); return ret; } static noinline_for_stack -int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key) +int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key, + struct file_extent_cluster *cluster) { - struct btrfs_root *root = BTRFS_I(inode)->root; - struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; - struct extent_map *em; - u64 start = extent_key->objectid - BTRFS_I(inode)->index_cnt; - u64 end = start + extent_key->offset - 1; - - em = alloc_extent_map(GFP_NOFS); - em->start = start; - em->len = extent_key->offset; - em->block_len = extent_key->offset; - em->block_start = extent_key->objectid; - em->bdev = root->fs_info->fs_devices->latest_bdev; - set_bit(EXTENT_FLAG_PINNED, &em->flags); + int ret; - /* setup extent map to cheat btrfs_readpage */ - lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); - while (1) { - int ret; - spin_lock(&em_tree->lock); - ret = add_extent_mapping(em_tree, em); - spin_unlock(&em_tree->lock); - if (ret != -EEXIST) { - free_extent_map(em); - break; - } - btrfs_drop_extent_cache(inode, start, end, 0); + if (cluster->nr > 0 && extent_key->objectid != cluster->end + 1) { + ret = relocate_file_extent_cluster(inode, cluster); + if (ret) + return ret; + cluster->nr = 0; } - unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); - return relocate_inode_pages(inode, start, extent_key->offset); + if (!cluster->nr) + cluster->start = extent_key->objectid; + else + BUG_ON(cluster->nr >= MAX_EXTENTS); + cluster->end = extent_key->objectid + extent_key->offset - 1; + cluster->boundary[cluster->nr] = extent_key->objectid; + cluster->nr++; + + if (cluster->nr >= MAX_EXTENTS) { + ret = relocate_file_extent_cluster(inode, cluster); + if (ret) + return ret; + cluster->nr = 0; + } + return 0; } #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 @@ -3203,10 +3254,12 @@ static int check_extent_flags(u64 flags) return 0; } + static noinline_for_stack int relocate_block_group(struct reloc_control *rc) { struct rb_root blocks = RB_ROOT; struct btrfs_key key; + struct file_extent_cluster *cluster; struct btrfs_trans_handle *trans = NULL; struct btrfs_path *path; struct btrfs_extent_item *ei; @@ -3216,10 +3269,17 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) int ret; int err = 0; + cluster = kzalloc(sizeof(*cluster), GFP_NOFS); + if (!cluster) + return -ENOMEM; + path = btrfs_alloc_path(); if (!path) return -ENOMEM; + rc->extents_found = 0; + rc->extents_skipped = 0; + rc->search_start = rc->block_group->key.objectid; clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY, GFP_NOFS); @@ -3306,14 +3366,15 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) } nr = trans->blocks_used; - btrfs_end_transaction_throttle(trans, rc->extent_root); + btrfs_end_transaction(trans, rc->extent_root); trans = NULL; btrfs_btree_balance_dirty(rc->extent_root, nr); if (rc->stage == MOVE_DATA_EXTENTS && (flags & BTRFS_EXTENT_FLAG_DATA)) { rc->found_file_extent = 1; - ret = relocate_data_extent(rc->data_inode, &key); + ret = relocate_data_extent(rc->data_inode, + &key, cluster); if (ret < 0) { err = ret; break; @@ -3328,6 +3389,14 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) btrfs_btree_balance_dirty(rc->extent_root, nr); } + if (!err) { + ret = relocate_file_extent_cluster(rc->data_inode, cluster); + if (ret < 0) + err = ret; + } + + kfree(cluster); + rc->create_reloc_root = 0; smp_mb(); @@ -3348,8 +3417,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) } static int __insert_orphan_inode(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 objectid, u64 size) + struct btrfs_root *root, u64 objectid) { struct btrfs_path *path; struct btrfs_inode_item *item; @@ -3368,7 +3436,7 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans, item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item)); btrfs_set_inode_generation(leaf, item, 1); - btrfs_set_inode_size(leaf, item, size); + btrfs_set_inode_size(leaf, item, 0); btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS); btrfs_mark_buffer_dirty(leaf); @@ -3404,12 +3472,7 @@ static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, if (err) goto out; - err = __insert_orphan_inode(trans, root, objectid, group->key.offset); - BUG_ON(err); - - err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0, - group->key.offset, 0, group->key.offset, - 0, 0, 0); + err = __insert_orphan_inode(trans, root, objectid); BUG_ON(err); key.objectid = objectid; @@ -3475,14 +3538,15 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) btrfs_wait_ordered_extents(fs_info->tree_root, 0); while (1) { - mutex_lock(&fs_info->cleaner_mutex); - btrfs_clean_old_snapshots(fs_info->tree_root); - mutex_unlock(&fs_info->cleaner_mutex); - rc->extents_found = 0; rc->extents_skipped = 0; + mutex_lock(&fs_info->cleaner_mutex); + + btrfs_clean_old_snapshots(fs_info->tree_root); ret = relocate_block_group(rc); + + mutex_unlock(&fs_info->cleaner_mutex); if (ret < 0) { err = ret; break; @@ -3514,10 +3578,10 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) } } - filemap_fdatawrite_range(fs_info->btree_inode->i_mapping, - rc->block_group->key.objectid, - rc->block_group->key.objectid + - rc->block_group->key.offset - 1); + filemap_write_and_wait_range(fs_info->btree_inode->i_mapping, + rc->block_group->key.objectid, + rc->block_group->key.objectid + + rc->block_group->key.offset - 1); WARN_ON(rc->block_group->pinned > 0); WARN_ON(rc->block_group->reserved > 0); @@ -3530,6 +3594,26 @@ out: return err; } +static noinline_for_stack int mark_garbage_root(struct btrfs_root *root) +{ + struct btrfs_trans_handle *trans; + int ret; + + trans = btrfs_start_transaction(root->fs_info->tree_root, 1); + + memset(&root->root_item.drop_progress, 0, + sizeof(root->root_item.drop_progress)); + root->root_item.drop_level = 0; + btrfs_set_root_refs(&root->root_item, 0); + ret = btrfs_update_root(trans, root->fs_info->tree_root, + &root->root_key, &root->root_item); + BUG_ON(ret); + + ret = btrfs_end_transaction(trans, root->fs_info->tree_root); + BUG_ON(ret); + return 0; +} + /* * recover relocation interrupted by system crash. * @@ -3589,8 +3673,12 @@ int btrfs_recover_relocation(struct btrfs_root *root) fs_root = read_fs_root(root->fs_info, reloc_root->root_key.offset); if (IS_ERR(fs_root)) { - err = PTR_ERR(fs_root); - goto out; + ret = PTR_ERR(fs_root); + if (ret != -ENOENT) { + err = ret; + goto out; + } + mark_garbage_root(reloc_root); } } diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 0ddc6d61c55..9351428f30e 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -94,17 +94,23 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, goto out; BUG_ON(ret == 0); + if (path->slots[0] == 0) { + ret = 1; + goto out; + } l = path->nodes[0]; - BUG_ON(path->slots[0] == 0); slot = path->slots[0] - 1; btrfs_item_key_to_cpu(l, &found_key, slot); - if (found_key.objectid != objectid) { + if (found_key.objectid != objectid || + found_key.type != BTRFS_ROOT_ITEM_KEY) { ret = 1; goto out; } - read_extent_buffer(l, item, btrfs_item_ptr_offset(l, slot), - sizeof(*item)); - memcpy(key, &found_key, sizeof(found_key)); + if (item) + read_extent_buffer(l, item, btrfs_item_ptr_offset(l, slot), + sizeof(*item)); + if (key) + memcpy(key, &found_key, sizeof(found_key)); ret = 0; out: btrfs_free_path(path); @@ -249,6 +255,59 @@ err: return ret; } +int btrfs_find_orphan_roots(struct btrfs_root *tree_root) +{ + struct extent_buffer *leaf; + struct btrfs_path *path; + struct btrfs_key key; + int err = 0; + int ret; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + key.objectid = BTRFS_ORPHAN_OBJECTID; + key.type = BTRFS_ORPHAN_ITEM_KEY; + key.offset = 0; + + while (1) { + ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0); + if (ret < 0) { + err = ret; + break; + } + + leaf = path->nodes[0]; + if (path->slots[0] >= btrfs_header_nritems(leaf)) { + ret = btrfs_next_leaf(tree_root, path); + if (ret < 0) + err = ret; + if (ret != 0) + break; + leaf = path->nodes[0]; + } + + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + btrfs_release_path(tree_root, path); + + if (key.objectid != BTRFS_ORPHAN_OBJECTID || + key.type != BTRFS_ORPHAN_ITEM_KEY) + break; + + ret = btrfs_find_dead_roots(tree_root, key.offset); + if (ret) { + err = ret; + break; + } + + key.offset++; + } + + btrfs_free_path(path); + return err; +} + /* drop the root item for 'key' from 'root' */ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key) @@ -278,31 +337,57 @@ out: return ret; } -#if 0 /* this will get used when snapshot deletion is implemented */ int btrfs_del_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *tree_root, - u64 root_id, u8 type, u64 ref_id) + u64 root_id, u64 ref_id, u64 dirid, u64 *sequence, + const char *name, int name_len) + { + struct btrfs_path *path; + struct btrfs_root_ref *ref; + struct extent_buffer *leaf; struct btrfs_key key; + unsigned long ptr; + int err = 0; int ret; - struct btrfs_path *path; path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; key.objectid = root_id; - key.type = type; + key.type = BTRFS_ROOT_BACKREF_KEY; key.offset = ref_id; - +again: ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1); - BUG_ON(ret); - - ret = btrfs_del_item(trans, tree_root, path); - BUG_ON(ret); + BUG_ON(ret < 0); + if (ret == 0) { + leaf = path->nodes[0]; + ref = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_root_ref); + + WARN_ON(btrfs_root_ref_dirid(leaf, ref) != dirid); + WARN_ON(btrfs_root_ref_name_len(leaf, ref) != name_len); + ptr = (unsigned long)(ref + 1); + WARN_ON(memcmp_extent_buffer(leaf, name, ptr, name_len)); + *sequence = btrfs_root_ref_sequence(leaf, ref); + + ret = btrfs_del_item(trans, tree_root, path); + BUG_ON(ret); + } else + err = -ENOENT; + + if (key.type == BTRFS_ROOT_BACKREF_KEY) { + btrfs_release_path(tree_root, path); + key.objectid = ref_id; + key.type = BTRFS_ROOT_REF_KEY; + key.offset = root_id; + goto again; + } btrfs_free_path(path); - return ret; + return err; } -#endif int btrfs_find_root_ref(struct btrfs_root *tree_root, struct btrfs_path *path, @@ -319,7 +404,6 @@ int btrfs_find_root_ref(struct btrfs_root *tree_root, return ret; } - /* * add a btrfs_root_ref item. type is either BTRFS_ROOT_REF_KEY * or BTRFS_ROOT_BACKREF_KEY. @@ -335,8 +419,7 @@ int btrfs_find_root_ref(struct btrfs_root *tree_root, */ int btrfs_add_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *tree_root, - u64 root_id, u8 type, u64 ref_id, - u64 dirid, u64 sequence, + u64 root_id, u64 ref_id, u64 dirid, u64 sequence, const char *name, int name_len) { struct btrfs_key key; @@ -346,13 +429,14 @@ int btrfs_add_root_ref(struct btrfs_trans_handle *trans, struct extent_buffer *leaf; unsigned long ptr; - path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; key.objectid = root_id; - key.type = type; + key.type = BTRFS_ROOT_BACKREF_KEY; key.offset = ref_id; - +again: ret = btrfs_insert_empty_item(trans, tree_root, path, &key, sizeof(*ref) + name_len); BUG_ON(ret); @@ -366,6 +450,14 @@ int btrfs_add_root_ref(struct btrfs_trans_handle *trans, write_extent_buffer(leaf, name, ptr, name_len); btrfs_mark_buffer_dirty(leaf); + if (key.type == BTRFS_ROOT_BACKREF_KEY) { + btrfs_release_path(tree_root, path); + key.objectid = ref_id; + key.type = BTRFS_ROOT_REF_KEY; + key.offset = root_id; + goto again; + } + btrfs_free_path(path); - return ret; + return 0; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 2db17cd66fc..67035385444 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -676,6 +676,7 @@ static int btrfs_unfreeze(struct super_block *sb) } static const struct super_operations btrfs_super_ops = { + .drop_inode = btrfs_drop_inode, .delete_inode = btrfs_delete_inode, .put_super = btrfs_put_super, .sync_fs = btrfs_sync_fs, diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index cdbb5022da5..88f866f85e7 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -104,7 +104,6 @@ static noinline int record_root_in_trans(struct btrfs_trans_handle *trans, { if (root->ref_cows && root->last_trans < trans->transid) { WARN_ON(root == root->fs_info->extent_root); - WARN_ON(root->root_item.refs == 0); WARN_ON(root->commit_root != root->node); radix_tree_tag_set(&root->fs_info->fs_roots_radix, @@ -720,7 +719,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); key.objectid = objectid; - key.offset = 0; + /* record when the snapshot was created in key.offset */ + key.offset = trans->transid; btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); old = btrfs_lock_root_node(root); @@ -778,24 +778,14 @@ static noinline int finish_pending_snapshot(struct btrfs_fs_info *fs_info, ret = btrfs_update_inode(trans, parent_root, parent_inode); BUG_ON(ret); - /* add the backref first */ ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root, pending->root_key.objectid, - BTRFS_ROOT_BACKREF_KEY, parent_root->root_key.objectid, parent_inode->i_ino, index, pending->name, namelen); BUG_ON(ret); - /* now add the forward ref */ - ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root, - parent_root->root_key.objectid, - BTRFS_ROOT_REF_KEY, - pending->root_key.objectid, - parent_inode->i_ino, index, pending->name, - namelen); - inode = btrfs_lookup_dentry(parent_inode, pending->dentry); d_instantiate(pending->dentry, inode); fail: @@ -874,7 +864,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, unsigned long timeout = 1; struct btrfs_transaction *cur_trans; struct btrfs_transaction *prev_trans = NULL; - struct extent_io_tree *pinned_copy; DEFINE_WAIT(wait); int ret; int should_grow = 0; @@ -915,13 +904,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, return 0; } - pinned_copy = kmalloc(sizeof(*pinned_copy), GFP_NOFS); - if (!pinned_copy) - return -ENOMEM; - - extent_io_tree_init(pinned_copy, - root->fs_info->btree_inode->i_mapping, GFP_NOFS); - trans->transaction->in_commit = 1; trans->transaction->blocked = 1; if (cur_trans->list.prev != &root->fs_info->trans_list) { @@ -1019,6 +1001,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, ret = commit_cowonly_roots(trans, root); BUG_ON(ret); + btrfs_prepare_extent_commit(trans, root); + cur_trans = root->fs_info->running_transaction; spin_lock(&root->fs_info->new_trans_lock); root->fs_info->running_transaction = NULL; @@ -1042,8 +1026,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, memcpy(&root->fs_info->super_for_commit, &root->fs_info->super_copy, sizeof(root->fs_info->super_copy)); - btrfs_copy_pinned(root, pinned_copy); - trans->transaction->blocked = 0; wake_up(&root->fs_info->transaction_wait); @@ -1059,8 +1041,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, */ mutex_unlock(&root->fs_info->tree_log_mutex); - btrfs_finish_extent_commit(trans, root, pinned_copy); - kfree(pinned_copy); + btrfs_finish_extent_commit(trans, root); /* do the directory inserts of any pending snapshot creations */ finish_pending_snapshots(trans, root->fs_info); @@ -1096,8 +1077,13 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root) while (!list_empty(&list)) { root = list_entry(list.next, struct btrfs_root, root_list); - list_del_init(&root->root_list); - btrfs_drop_snapshot(root, 0); + list_del(&root->root_list); + + if (btrfs_header_backref_rev(root->node) < + BTRFS_MIXED_BACKREF_REV) + btrfs_drop_snapshot(root, 0); + else + btrfs_drop_snapshot(root, 1); } return 0; } diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 30c0d45c1b5..7827841b55c 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -263,8 +263,8 @@ static int process_one_buffer(struct btrfs_root *log, struct walk_control *wc, u64 gen) { if (wc->pin) - btrfs_update_pinned_extents(log->fs_info->extent_root, - eb->start, eb->len, 1); + btrfs_pin_extent(log->fs_info->extent_root, + eb->start, eb->len, 0); if (btrfs_buffer_uptodate(eb, gen)) { if (wc->write) @@ -534,7 +534,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, saved_nbytes = inode_get_bytes(inode); /* drop any overlapping extents */ ret = btrfs_drop_extents(trans, root, inode, - start, extent_end, extent_end, start, &alloc_hint); + start, extent_end, extent_end, start, &alloc_hint, 1); BUG_ON(ret); if (found_type == BTRFS_FILE_EXTENT_REG || @@ -2841,7 +2841,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans, if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb) break; - if (parent == sb->s_root) + if (IS_ROOT(parent)) break; parent = parent->d_parent; @@ -2880,6 +2880,12 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, goto end_no_trans; } + if (root != BTRFS_I(inode)->root || + btrfs_root_refs(&root->root_item) == 0) { + ret = 1; + goto end_no_trans; + } + ret = check_parent_dirs_for_sync(trans, inode, parent, sb, last_committed); if (ret) @@ -2907,12 +2913,15 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, break; inode = parent->d_inode; + if (root != BTRFS_I(inode)->root) + break; + if (BTRFS_I(inode)->generation > root->fs_info->last_trans_committed) { ret = btrfs_log_inode(trans, root, inode, inode_only); BUG_ON(ret); } - if (parent == sb->s_root) + if (IS_ROOT(parent)) break; parent = parent->d_parent; @@ -2951,7 +2960,6 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree) struct btrfs_key tmp_key; struct btrfs_root *log; struct btrfs_fs_info *fs_info = log_root_tree->fs_info; - u64 highest_inode; struct walk_control wc = { .process_func = process_one_buffer, .stage = 0, @@ -3010,11 +3018,6 @@ again: path); BUG_ON(ret); } - ret = btrfs_find_highest_inode(wc.replay_dest, &highest_inode); - if (ret == 0) { - wc.replay_dest->highest_inode = highest_inode; - wc.replay_dest->last_inode_alloc = highest_inode; - } key.offset = found_key.offset - 1; wc.replay_dest->log_root = NULL; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 5cf405b0828..23e7d36ff32 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -276,7 +276,7 @@ loop_lock: * is now congested. Back off and let other work structs * run instead */ - if (pending && bdi_write_congested(bdi) && batch_run > 32 && + if (pending && bdi_write_congested(bdi) && batch_run > 8 && fs_info->fs_devices->open_devices > 1) { struct io_context *ioc; @@ -719,10 +719,9 @@ error: * called very infrequently and that a given device has a small number * of extents */ -static noinline int find_free_dev_extent(struct btrfs_trans_handle *trans, - struct btrfs_device *device, - u64 num_bytes, u64 *start, - u64 *max_avail) +int find_free_dev_extent(struct btrfs_trans_handle *trans, + struct btrfs_device *device, u64 num_bytes, + u64 *start, u64 *max_avail) { struct btrfs_key key; struct btrfs_root *root = device->dev_root; @@ -1736,6 +1735,10 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, extent_root = root->fs_info->extent_root; em_tree = &root->fs_info->mapping_tree.map_tree; + ret = btrfs_can_relocate(extent_root, chunk_offset); + if (ret) + return -ENOSPC; + /* step one, relocate all the extents inside this chunk */ ret = btrfs_relocate_block_group(extent_root, chunk_offset); BUG_ON(ret); @@ -1749,9 +1752,9 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, * step two, delete the device extents and the * chunk tree entries */ - spin_lock(&em_tree->lock); + read_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, chunk_offset, 1); - spin_unlock(&em_tree->lock); + read_unlock(&em_tree->lock); BUG_ON(em->start > chunk_offset || em->start + em->len < chunk_offset); @@ -1780,9 +1783,9 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, ret = btrfs_remove_block_group(trans, extent_root, chunk_offset); BUG_ON(ret); - spin_lock(&em_tree->lock); + write_lock(&em_tree->lock); remove_extent_mapping(em_tree, em); - spin_unlock(&em_tree->lock); + write_unlock(&em_tree->lock); kfree(map); em->bdev = NULL; @@ -1807,12 +1810,15 @@ static int btrfs_relocate_sys_chunks(struct btrfs_root *root) struct btrfs_key found_key; u64 chunk_tree = chunk_root->root_key.objectid; u64 chunk_type; + bool retried = false; + int failed = 0; int ret; path = btrfs_alloc_path(); if (!path) return -ENOMEM; +again: key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; key.offset = (u64)-1; key.type = BTRFS_CHUNK_ITEM_KEY; @@ -1842,7 +1848,10 @@ static int btrfs_relocate_sys_chunks(struct btrfs_root *root) ret = btrfs_relocate_chunk(chunk_root, chunk_tree, found_key.objectid, found_key.offset); - BUG_ON(ret); + if (ret == -ENOSPC) + failed++; + else if (ret) + BUG(); } if (found_key.offset == 0) @@ -1850,6 +1859,14 @@ static int btrfs_relocate_sys_chunks(struct btrfs_root *root) key.offset = found_key.offset - 1; } ret = 0; + if (failed && !retried) { + failed = 0; + retried = true; + goto again; + } else if (failed && retried) { + WARN_ON(1); + ret = -ENOSPC; + } error: btrfs_free_path(path); return ret; @@ -1894,6 +1911,8 @@ int btrfs_balance(struct btrfs_root *dev_root) continue; ret = btrfs_shrink_device(device, old_size - size_to_free); + if (ret == -ENOSPC) + break; BUG_ON(ret); trans = btrfs_start_transaction(dev_root, 1); @@ -1938,9 +1957,8 @@ int btrfs_balance(struct btrfs_root *dev_root) chunk = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_chunk); - key.offset = found_key.offset; /* chunk zero is special */ - if (key.offset == 0) + if (found_key.offset == 0) break; btrfs_release_path(chunk_root, path); @@ -1948,7 +1966,8 @@ int btrfs_balance(struct btrfs_root *dev_root) chunk_root->root_key.objectid, found_key.objectid, found_key.offset); - BUG_ON(ret); + BUG_ON(ret && ret != -ENOSPC); + key.offset = found_key.offset - 1; } ret = 0; error: @@ -1974,10 +1993,13 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) u64 chunk_offset; int ret; int slot; + int failed = 0; + bool retried = false; struct extent_buffer *l; struct btrfs_key key; struct btrfs_super_block *super_copy = &root->fs_info->super_copy; u64 old_total = btrfs_super_total_bytes(super_copy); + u64 old_size = device->total_bytes; u64 diff = device->total_bytes - new_size; if (new_size >= device->total_bytes) @@ -1987,12 +2009,6 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) if (!path) return -ENOMEM; - trans = btrfs_start_transaction(root, 1); - if (!trans) { - ret = -ENOMEM; - goto done; - } - path->reada = 2; lock_chunks(root); @@ -2001,8 +2017,8 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) if (device->writeable) device->fs_devices->total_rw_bytes -= diff; unlock_chunks(root); - btrfs_end_transaction(trans, root); +again: key.objectid = device->devid; key.offset = (u64)-1; key.type = BTRFS_DEV_EXTENT_KEY; @@ -2017,6 +2033,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) goto done; if (ret) { ret = 0; + btrfs_release_path(root, path); break; } @@ -2024,14 +2041,18 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) slot = path->slots[0]; btrfs_item_key_to_cpu(l, &key, path->slots[0]); - if (key.objectid != device->devid) + if (key.objectid != device->devid) { + btrfs_release_path(root, path); break; + } dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); length = btrfs_dev_extent_length(l, dev_extent); - if (key.offset + length <= new_size) + if (key.offset + length <= new_size) { + btrfs_release_path(root, path); break; + } chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent); chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent); @@ -2040,8 +2061,26 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) ret = btrfs_relocate_chunk(root, chunk_tree, chunk_objectid, chunk_offset); - if (ret) + if (ret && ret != -ENOSPC) goto done; + if (ret == -ENOSPC) + failed++; + key.offset -= 1; + } + + if (failed && !retried) { + failed = 0; + retried = true; + goto again; + } else if (failed && retried) { + ret = -ENOSPC; + lock_chunks(root); + + device->total_bytes = old_size; + if (device->writeable) + device->fs_devices->total_rw_bytes += diff; + unlock_chunks(root); + goto done; } /* Shrinking succeeded, else we would be at "done". */ @@ -2294,9 +2333,9 @@ again: em->block_len = em->len; em_tree = &extent_root->fs_info->mapping_tree.map_tree; - spin_lock(&em_tree->lock); + write_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em); - spin_unlock(&em_tree->lock); + write_unlock(&em_tree->lock); BUG_ON(ret); free_extent_map(em); @@ -2491,9 +2530,9 @@ int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset) int readonly = 0; int i; - spin_lock(&map_tree->map_tree.lock); + read_lock(&map_tree->map_tree.lock); em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1); - spin_unlock(&map_tree->map_tree.lock); + read_unlock(&map_tree->map_tree.lock); if (!em) return 1; @@ -2518,11 +2557,11 @@ void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree) struct extent_map *em; while (1) { - spin_lock(&tree->map_tree.lock); + write_lock(&tree->map_tree.lock); em = lookup_extent_mapping(&tree->map_tree, 0, (u64)-1); if (em) remove_extent_mapping(&tree->map_tree, em); - spin_unlock(&tree->map_tree.lock); + write_unlock(&tree->map_tree.lock); if (!em) break; kfree(em->bdev); @@ -2540,9 +2579,9 @@ int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len) struct extent_map_tree *em_tree = &map_tree->map_tree; int ret; - spin_lock(&em_tree->lock); + read_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, logical, len); - spin_unlock(&em_tree->lock); + read_unlock(&em_tree->lock); BUG_ON(!em); BUG_ON(em->start > logical || em->start + em->len < logical); @@ -2604,9 +2643,9 @@ again: atomic_set(&multi->error, 0); } - spin_lock(&em_tree->lock); + read_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, logical, *length); - spin_unlock(&em_tree->lock); + read_unlock(&em_tree->lock); if (!em && unplug_page) return 0; @@ -2763,9 +2802,9 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, u64 stripe_nr; int i, j, nr = 0; - spin_lock(&em_tree->lock); + read_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, chunk_start, 1); - spin_unlock(&em_tree->lock); + read_unlock(&em_tree->lock); BUG_ON(!em || em->start != chunk_start); map = (struct map_lookup *)em->bdev; @@ -3053,9 +3092,9 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, logical = key->offset; length = btrfs_chunk_length(leaf, chunk); - spin_lock(&map_tree->map_tree.lock); + read_lock(&map_tree->map_tree.lock); em = lookup_extent_mapping(&map_tree->map_tree, logical, 1); - spin_unlock(&map_tree->map_tree.lock); + read_unlock(&map_tree->map_tree.lock); /* already mapped? */ if (em && em->start <= logical && em->start + em->len > logical) { @@ -3114,9 +3153,9 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, map->stripes[i].dev->in_fs_metadata = 1; } - spin_lock(&map_tree->map_tree.lock); + write_lock(&map_tree->map_tree.lock); ret = add_extent_mapping(&map_tree->map_tree, em); - spin_unlock(&map_tree->map_tree.lock); + write_unlock(&map_tree->map_tree.lock); BUG_ON(ret); free_extent_map(em); diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 5139a833f72..31b0fabdd2e 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -181,4 +181,7 @@ int btrfs_balance(struct btrfs_root *dev_root); void btrfs_unlock_volumes(void); void btrfs_lock_volumes(void); int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); +int find_free_dev_extent(struct btrfs_trans_handle *trans, + struct btrfs_device *device, u64 num_bytes, + u64 *start, u64 *max_avail); #endif diff --git a/fs/buffer.c b/fs/buffer.c index 209f7f15f5f..24afd7422ae 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2239,16 +2239,10 @@ int generic_cont_expand_simple(struct inode *inode, loff_t size) struct address_space *mapping = inode->i_mapping; struct page *page; void *fsdata; - unsigned long limit; int err; - err = -EFBIG; - limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; - if (limit != RLIM_INFINITY && size > (loff_t)limit) { - send_sig(SIGXFSZ, current, 0); - goto out; - } - if (size > inode->i_sb->s_maxbytes) + err = inode_newsize_ok(inode, size); + if (err) goto out; err = pagecache_write_begin(NULL, mapping, size, 0, diff --git a/fs/char_dev.c b/fs/char_dev.c index 3cbc57f932d..d6db933df2b 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -264,7 +264,6 @@ int __register_chrdev(unsigned int major, unsigned int baseminor, { struct char_device_struct *cd; struct cdev *cdev; - char *s; int err = -ENOMEM; cd = __register_chrdev_region(major, baseminor, count, name); @@ -278,8 +277,6 @@ int __register_chrdev(unsigned int major, unsigned int baseminor, cdev->owner = fops->owner; cdev->ops = fops; kobject_set_name(&cdev->kobj, "%s", name); - for (s = strchr(kobject_name(&cdev->kobj),'/'); s; s = strchr(s, '/')) - *s = '!'; err = cdev_add(cdev, MKDEV(cd->major, baseminor), count); if (err) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index d79ce2e95c2..90c5b39f031 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -185,8 +185,7 @@ out_mount_failed: cifs_sb->mountdata = NULL; } #endif - if (cifs_sb->local_nls) - unload_nls(cifs_sb->local_nls); + unload_nls(cifs_sb->local_nls); kfree(cifs_sb); } return rc; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 1f09c761931..5e2492535da 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1557,57 +1557,24 @@ static int cifs_truncate_page(struct address_space *mapping, loff_t from) static int cifs_vmtruncate(struct inode *inode, loff_t offset) { - struct address_space *mapping = inode->i_mapping; - unsigned long limit; + loff_t oldsize; + int err; spin_lock(&inode->i_lock); - if (inode->i_size < offset) - goto do_expand; - /* - * truncation of in-use swapfiles is disallowed - it would cause - * subsequent swapout to scribble on the now-freed blocks. - */ - if (IS_SWAPFILE(inode)) { - spin_unlock(&inode->i_lock); - goto out_busy; - } - i_size_write(inode, offset); - spin_unlock(&inode->i_lock); - /* - * unmap_mapping_range is called twice, first simply for efficiency - * so that truncate_inode_pages does fewer single-page unmaps. However - * after this first call, and before truncate_inode_pages finishes, - * it is possible for private pages to be COWed, which remain after - * truncate_inode_pages finishes, hence the second unmap_mapping_range - * call must be made for correctness. - */ - unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); - truncate_inode_pages(mapping, offset); - unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); - goto out_truncate; - -do_expand: - limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; - if (limit != RLIM_INFINITY && offset > limit) { + err = inode_newsize_ok(inode, offset); + if (err) { spin_unlock(&inode->i_lock); - goto out_sig; - } - if (offset > inode->i_sb->s_maxbytes) { - spin_unlock(&inode->i_lock); - goto out_big; + goto out; } + + oldsize = inode->i_size; i_size_write(inode, offset); spin_unlock(&inode->i_lock); -out_truncate: + truncate_pagecache(inode, oldsize, offset); if (inode->i_op->truncate) inode->i_op->truncate(inode); - return 0; -out_sig: - send_sig(SIGXFSZ, current, 0); -out_big: - return -EFBIG; -out_busy: - return -ETXTBSY; +out: + return err; } static int diff --git a/fs/coda/coda_int.h b/fs/coda/coda_int.h index 8ccd5ed81d9..d99860a3389 100644 --- a/fs/coda/coda_int.h +++ b/fs/coda/coda_int.h @@ -2,6 +2,7 @@ #define _CODA_INT_ struct dentry; +struct file; extern struct file_system_type coda_fs_type; extern unsigned long coda_timeout; diff --git a/fs/compat.c b/fs/compat.c index 3aa48834a22..d576b552e8e 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -768,13 +768,13 @@ asmlinkage long compat_sys_mount(char __user * dev_name, char __user * dir_name, char __user * type, unsigned long flags, void __user * data) { - unsigned long type_page; + char *kernel_type; unsigned long data_page; - unsigned long dev_page; + char *kernel_dev; char *dir_page; int retval; - retval = copy_mount_options (type, &type_page); + retval = copy_mount_string(type, &kernel_type); if (retval < 0) goto out; @@ -783,38 +783,38 @@ asmlinkage long compat_sys_mount(char __user * dev_name, char __user * dir_name, if (IS_ERR(dir_page)) goto out1; - retval = copy_mount_options (dev_name, &dev_page); + retval = copy_mount_string(dev_name, &kernel_dev); if (retval < 0) goto out2; - retval = copy_mount_options (data, &data_page); + retval = copy_mount_options(data, &data_page); if (retval < 0) goto out3; retval = -EINVAL; - if (type_page && data_page) { - if (!strcmp((char *)type_page, SMBFS_NAME)) { + if (kernel_type && data_page) { + if (!strcmp(kernel_type, SMBFS_NAME)) { do_smb_super_data_conv((void *)data_page); - } else if (!strcmp((char *)type_page, NCPFS_NAME)) { + } else if (!strcmp(kernel_type, NCPFS_NAME)) { do_ncp_super_data_conv((void *)data_page); - } else if (!strcmp((char *)type_page, NFS4_NAME)) { + } else if (!strcmp(kernel_type, NFS4_NAME)) { if (do_nfs4_super_data_conv((void *) data_page)) goto out4; } } - retval = do_mount((char*)dev_page, dir_page, (char*)type_page, + retval = do_mount(kernel_dev, dir_page, kernel_type, flags, (void*)data_page); out4: free_page(data_page); out3: - free_page(dev_page); + kfree(kernel_dev); out2: putname(dir_page); out1: - free_page(type_page); + kfree(kernel_type); out: return retval; } diff --git a/fs/drop_caches.c b/fs/drop_caches.c index a2edb791344..31f4b0e6d72 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c @@ -63,9 +63,9 @@ static void drop_slab(void) } int drop_caches_sysctl_handler(ctl_table *table, int write, - struct file *file, void __user *buffer, size_t *length, loff_t *ppos) + void __user *buffer, size_t *length, loff_t *ppos) { - proc_dointvec_minmax(table, write, file, buffer, length, ppos); + proc_dointvec_minmax(table, write, buffer, length, ppos); if (write) { if (sysctl_drop_caches & 1) drop_pagecache(); diff --git a/fs/ecryptfs/Kconfig b/fs/ecryptfs/Kconfig index 0c754e64232..8aadb99b763 100644 --- a/fs/ecryptfs/Kconfig +++ b/fs/ecryptfs/Kconfig @@ -1,6 +1,8 @@ config ECRYPT_FS tristate "eCrypt filesystem layer support (EXPERIMENTAL)" - depends on EXPERIMENTAL && KEYS && CRYPTO && NET + depends on EXPERIMENTAL && KEYS && NET + select CRYPTO_ECB + select CRYPTO_CBC help Encrypted filesystem that operates on the VFS layer. See <file:Documentation/filesystems/ecryptfs.txt> to learn more about diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index b91851f1cda..fbb6e5eed69 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -245,13 +245,11 @@ void ecryptfs_destroy_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat) crypto_free_blkcipher(crypt_stat->tfm); if (crypt_stat->hash_tfm) crypto_free_hash(crypt_stat->hash_tfm); - mutex_lock(&crypt_stat->keysig_list_mutex); list_for_each_entry_safe(key_sig, key_sig_tmp, &crypt_stat->keysig_list, crypt_stat_list) { list_del(&key_sig->crypt_stat_list); kmem_cache_free(ecryptfs_key_sig_cache, key_sig); } - mutex_unlock(&crypt_stat->keysig_list_mutex); memset(crypt_stat, 0, sizeof(struct ecryptfs_crypt_stat)); } @@ -511,13 +509,14 @@ int ecryptfs_encrypt_page(struct page *page) + extent_offset), crypt_stat); rc = ecryptfs_write_lower(ecryptfs_inode, enc_extent_virt, offset, crypt_stat->extent_size); - if (rc) { + if (rc < 0) { ecryptfs_printk(KERN_ERR, "Error attempting " "to write lower page; rc = [%d]" "\n", rc); goto out; } } + rc = 0; out: if (enc_extent_page) { kunmap(enc_extent_page); @@ -633,7 +632,7 @@ int ecryptfs_decrypt_page(struct page *page) rc = ecryptfs_read_lower(enc_extent_virt, offset, crypt_stat->extent_size, ecryptfs_inode); - if (rc) { + if (rc < 0) { ecryptfs_printk(KERN_ERR, "Error attempting " "to read lower page; rc = [%d]" "\n", rc); @@ -797,6 +796,7 @@ int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat) kfree(full_alg_name); if (IS_ERR(crypt_stat->tfm)) { rc = PTR_ERR(crypt_stat->tfm); + crypt_stat->tfm = NULL; ecryptfs_printk(KERN_ERR, "cryptfs: init_crypt_ctx(): " "Error initializing cipher [%s]\n", crypt_stat->cipher); @@ -925,7 +925,9 @@ static int ecryptfs_copy_mount_wide_sigs_to_inode_sigs( struct ecryptfs_global_auth_tok *global_auth_tok; int rc = 0; + mutex_lock(&crypt_stat->keysig_list_mutex); mutex_lock(&mount_crypt_stat->global_auth_tok_list_mutex); + list_for_each_entry(global_auth_tok, &mount_crypt_stat->global_auth_tok_list, mount_crypt_stat_list) { @@ -934,13 +936,13 @@ static int ecryptfs_copy_mount_wide_sigs_to_inode_sigs( rc = ecryptfs_add_keysig(crypt_stat, global_auth_tok->sig); if (rc) { printk(KERN_ERR "Error adding keysig; rc = [%d]\n", rc); - mutex_unlock( - &mount_crypt_stat->global_auth_tok_list_mutex); goto out; } } - mutex_unlock(&mount_crypt_stat->global_auth_tok_list_mutex); + out: + mutex_unlock(&mount_crypt_stat->global_auth_tok_list_mutex); + mutex_unlock(&crypt_stat->keysig_list_mutex); return rc; } @@ -1212,14 +1214,15 @@ int ecryptfs_read_and_validate_header_region(char *data, crypt_stat->extent_size = ECRYPTFS_DEFAULT_EXTENT_SIZE; rc = ecryptfs_read_lower(data, 0, crypt_stat->extent_size, ecryptfs_inode); - if (rc) { + if (rc < 0) { printk(KERN_ERR "%s: Error reading header region; rc = [%d]\n", __func__, rc); goto out; } if (!contains_ecryptfs_marker(data + ECRYPTFS_FILE_SIZE_BYTES)) { rc = -EINVAL; - } + } else + rc = 0; out: return rc; } @@ -1314,10 +1317,11 @@ ecryptfs_write_metadata_to_contents(struct dentry *ecryptfs_dentry, rc = ecryptfs_write_lower(ecryptfs_dentry->d_inode, virt, 0, virt_len); - if (rc) + if (rc < 0) printk(KERN_ERR "%s: Error attempting to write header " - "information to lower file; rc = [%d]\n", __func__, - rc); + "information to lower file; rc = [%d]\n", __func__, rc); + else + rc = 0; return rc; } @@ -1597,7 +1601,7 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry) } rc = ecryptfs_read_lower(page_virt, 0, crypt_stat->extent_size, ecryptfs_inode); - if (!rc) + if (rc >= 0) rc = ecryptfs_read_headers_virt(page_virt, crypt_stat, ecryptfs_dentry, ECRYPTFS_VALIDATE_HEADER_SIZE); @@ -1702,7 +1706,7 @@ ecryptfs_encrypt_filename(struct ecryptfs_filename *filename, } else { printk(KERN_ERR "%s: No support for requested filename " "encryption method in this release\n", __func__); - rc = -ENOTSUPP; + rc = -EOPNOTSUPP; goto out; } out: @@ -1763,7 +1767,7 @@ ecryptfs_process_key_cipher(struct crypto_blkcipher **key_tfm, if (IS_ERR(*key_tfm)) { rc = PTR_ERR(*key_tfm); printk(KERN_ERR "Unable to allocate crypto cipher with name " - "[%s]; rc = [%d]\n", cipher_name, rc); + "[%s]; rc = [%d]\n", full_alg_name, rc); goto out; } crypto_blkcipher_set_flags(*key_tfm, CRYPTO_TFM_REQ_WEAK_KEY); @@ -1776,7 +1780,8 @@ ecryptfs_process_key_cipher(struct crypto_blkcipher **key_tfm, rc = crypto_blkcipher_setkey(*key_tfm, dummy_key, *key_size); if (rc) { printk(KERN_ERR "Error attempting to set key of size [%zd] for " - "cipher [%s]; rc = [%d]\n", *key_size, cipher_name, rc); + "cipher [%s]; rc = [%d]\n", *key_size, full_alg_name, + rc); rc = -EINVAL; goto out; } @@ -2166,7 +2171,7 @@ int ecryptfs_encrypt_and_encode_filename( (*encoded_name)[(*encoded_name_size)] = '\0'; (*encoded_name_size)++; } else { - rc = -ENOTSUPP; + rc = -EOPNOTSUPP; } if (rc) { printk(KERN_ERR "%s: Error attempting to encode " diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 2f0945d6329..056fed62d0d 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -476,6 +476,7 @@ static int ecryptfs_unlink(struct inode *dir, struct dentry *dentry) struct inode *lower_dir_inode = ecryptfs_inode_to_lower(dir); struct dentry *lower_dir_dentry; + dget(lower_dentry); lower_dir_dentry = lock_parent(lower_dentry); rc = vfs_unlink(lower_dir_inode, lower_dentry); if (rc) { @@ -489,6 +490,7 @@ static int ecryptfs_unlink(struct inode *dir, struct dentry *dentry) d_drop(dentry); out_unlock: unlock_dir(lower_dir_dentry); + dput(lower_dentry); return rc; } diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index 259525c9abb..a0a7847567e 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -416,7 +416,9 @@ ecryptfs_find_global_auth_tok_for_sig( &mount_crypt_stat->global_auth_tok_list, mount_crypt_stat_list) { if (memcmp(walker->sig, sig, ECRYPTFS_SIG_SIZE_HEX) == 0) { - (*global_auth_tok) = walker; + rc = key_validate(walker->global_auth_tok_key); + if (!rc) + (*global_auth_tok) = walker; goto out; } } @@ -612,7 +614,12 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes, } /* TODO: Support other key modules than passphrase for * filename encryption */ - BUG_ON(s->auth_tok->token_type != ECRYPTFS_PASSWORD); + if (s->auth_tok->token_type != ECRYPTFS_PASSWORD) { + rc = -EOPNOTSUPP; + printk(KERN_INFO "%s: Filename encryption only supports " + "password tokens\n", __func__); + goto out_free_unlock; + } sg_init_one( &s->hash_sg, (u8 *)s->auth_tok->token.password.session_key_encryption_key, @@ -910,7 +917,12 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size, } /* TODO: Support other key modules than passphrase for * filename encryption */ - BUG_ON(s->auth_tok->token_type != ECRYPTFS_PASSWORD); + if (s->auth_tok->token_type != ECRYPTFS_PASSWORD) { + rc = -EOPNOTSUPP; + printk(KERN_INFO "%s: Filename encryption only supports " + "password tokens\n", __func__); + goto out_free_unlock; + } rc = crypto_blkcipher_setkey( s->desc.tfm, s->auth_tok->token.password.session_key_encryption_key, @@ -1316,8 +1328,10 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat, rc = -EINVAL; goto out_free; } - ecryptfs_cipher_code_to_string(crypt_stat->cipher, - (u16)data[(*packet_size)]); + rc = ecryptfs_cipher_code_to_string(crypt_stat->cipher, + (u16)data[(*packet_size)]); + if (rc) + goto out_free; /* A little extra work to differentiate among the AES key * sizes; see RFC2440 */ switch(data[(*packet_size)++]) { @@ -1328,7 +1342,9 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat, crypt_stat->key_size = (*new_auth_tok)->session_key.encrypted_key_size; } - ecryptfs_init_crypt_ctx(crypt_stat); + rc = ecryptfs_init_crypt_ctx(crypt_stat); + if (rc) + goto out_free; if (unlikely(data[(*packet_size)++] != 0x03)) { printk(KERN_WARNING "Only S2K ID 3 is currently supported\n"); rc = -ENOSYS; @@ -2366,21 +2382,18 @@ struct kmem_cache *ecryptfs_key_sig_cache; int ecryptfs_add_keysig(struct ecryptfs_crypt_stat *crypt_stat, char *sig) { struct ecryptfs_key_sig *new_key_sig; - int rc = 0; new_key_sig = kmem_cache_alloc(ecryptfs_key_sig_cache, GFP_KERNEL); if (!new_key_sig) { - rc = -ENOMEM; printk(KERN_ERR "Error allocating from ecryptfs_key_sig_cache\n"); - goto out; + return -ENOMEM; } memcpy(new_key_sig->keysig, sig, ECRYPTFS_SIG_SIZE_HEX); - mutex_lock(&crypt_stat->keysig_list_mutex); + /* Caller must hold keysig_list_mutex */ list_add(&new_key_sig->crypt_stat_list, &crypt_stat->keysig_list); - mutex_unlock(&crypt_stat->keysig_list_mutex); -out: - return rc; + + return 0; } struct kmem_cache *ecryptfs_global_auth_tok_cache; diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c index c6d7a4d748a..e14cf7e588d 100644 --- a/fs/ecryptfs/kthread.c +++ b/fs/ecryptfs/kthread.c @@ -136,6 +136,7 @@ int ecryptfs_privileged_open(struct file **lower_file, const struct cred *cred) { struct ecryptfs_open_req *req; + int flags = O_LARGEFILE; int rc = 0; /* Corresponding dput() and mntput() are done when the @@ -143,10 +144,14 @@ int ecryptfs_privileged_open(struct file **lower_file, * destroyed. */ dget(lower_dentry); mntget(lower_mnt); - (*lower_file) = dentry_open(lower_dentry, lower_mnt, - (O_RDWR | O_LARGEFILE), cred); + flags |= IS_RDONLY(lower_dentry->d_inode) ? O_RDONLY : O_RDWR; + (*lower_file) = dentry_open(lower_dentry, lower_mnt, flags, cred); if (!IS_ERR(*lower_file)) goto out; + if (flags & O_RDONLY) { + rc = PTR_ERR((*lower_file)); + goto out; + } req = kmem_cache_alloc(ecryptfs_open_req_cache, GFP_KERNEL); if (!req) { rc = -ENOMEM; @@ -180,21 +185,8 @@ int ecryptfs_privileged_open(struct file **lower_file, __func__); goto out_unlock; } - if (IS_ERR(*req->lower_file)) { + if (IS_ERR(*req->lower_file)) rc = PTR_ERR(*req->lower_file); - dget(lower_dentry); - mntget(lower_mnt); - (*lower_file) = dentry_open(lower_dentry, lower_mnt, - (O_RDONLY | O_LARGEFILE), cred); - if (IS_ERR(*lower_file)) { - rc = PTR_ERR(*req->lower_file); - (*lower_file) = NULL; - printk(KERN_WARNING "%s: Error attempting privileged " - "open of lower file with either RW or RO " - "perms; rc = [%d]. Giving up.\n", - __func__, rc); - } - } out_unlock: mutex_unlock(&req->mux); out_free: diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 9f0aa9883c2..101fe4c7b1e 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -129,11 +129,10 @@ int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry) lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry); rc = ecryptfs_privileged_open(&inode_info->lower_file, lower_dentry, lower_mnt, cred); - if (rc || IS_ERR(inode_info->lower_file)) { + if (rc) { printk(KERN_ERR "Error opening lower persistent file " "for lower_dentry [0x%p] and lower_mnt [0x%p]; " "rc = [%d]\n", lower_dentry, lower_mnt, rc); - rc = PTR_ERR(inode_info->lower_file); inode_info->lower_file = NULL; } } diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index 05772aeaa8f..df4ce99d059 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c @@ -396,9 +396,11 @@ static int ecryptfs_write_inode_size_to_header(struct inode *ecryptfs_inode) rc = ecryptfs_write_lower(ecryptfs_inode, file_size_virt, 0, sizeof(u64)); kfree(file_size_virt); - if (rc) + if (rc < 0) printk(KERN_ERR "%s: Error writing file size to header; " "rc = [%d]\n", __func__, rc); + else + rc = 0; out: return rc; } diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c index a137c6ea2fe..0cc4fafd655 100644 --- a/fs/ecryptfs/read_write.c +++ b/fs/ecryptfs/read_write.c @@ -34,15 +34,14 @@ * * Write data to the lower file. * - * Returns zero on success; non-zero on error + * Returns bytes written on success; less than zero on error */ int ecryptfs_write_lower(struct inode *ecryptfs_inode, char *data, loff_t offset, size_t size) { struct ecryptfs_inode_info *inode_info; - ssize_t octets_written; mm_segment_t fs_save; - int rc = 0; + ssize_t rc; inode_info = ecryptfs_inode_to_private(ecryptfs_inode); mutex_lock(&inode_info->lower_file_mutex); @@ -50,14 +49,9 @@ int ecryptfs_write_lower(struct inode *ecryptfs_inode, char *data, inode_info->lower_file->f_pos = offset; fs_save = get_fs(); set_fs(get_ds()); - octets_written = vfs_write(inode_info->lower_file, data, size, - &inode_info->lower_file->f_pos); + rc = vfs_write(inode_info->lower_file, data, size, + &inode_info->lower_file->f_pos); set_fs(fs_save); - if (octets_written < 0) { - printk(KERN_ERR "%s: octets_written = [%td]; " - "expected [%td]\n", __func__, octets_written, size); - rc = -EINVAL; - } mutex_unlock(&inode_info->lower_file_mutex); mark_inode_dirty_sync(ecryptfs_inode); return rc; @@ -91,6 +85,8 @@ int ecryptfs_write_lower_page_segment(struct inode *ecryptfs_inode, + offset_in_page); virt = kmap(page_for_lower); rc = ecryptfs_write_lower(ecryptfs_inode, virt, offset, size); + if (rc > 0) + rc = 0; kunmap(page_for_lower); return rc; } @@ -229,30 +225,24 @@ out: * Read @size bytes of data at byte offset @offset from the lower * inode into memory location @data. * - * Returns zero on success; non-zero on error + * Returns bytes read on success; 0 on EOF; less than zero on error */ int ecryptfs_read_lower(char *data, loff_t offset, size_t size, struct inode *ecryptfs_inode) { struct ecryptfs_inode_info *inode_info = ecryptfs_inode_to_private(ecryptfs_inode); - ssize_t octets_read; mm_segment_t fs_save; - int rc = 0; + ssize_t rc; mutex_lock(&inode_info->lower_file_mutex); BUG_ON(!inode_info->lower_file); inode_info->lower_file->f_pos = offset; fs_save = get_fs(); set_fs(get_ds()); - octets_read = vfs_read(inode_info->lower_file, data, size, - &inode_info->lower_file->f_pos); + rc = vfs_read(inode_info->lower_file, data, size, + &inode_info->lower_file->f_pos); set_fs(fs_save); - if (octets_read < 0) { - printk(KERN_ERR "%s: octets_read = [%td]; " - "expected [%td]\n", __func__, octets_read, size); - rc = -EINVAL; - } mutex_unlock(&inode_info->lower_file_mutex); return rc; } @@ -284,6 +274,8 @@ int ecryptfs_read_lower_page_segment(struct page *page_for_ecryptfs, offset = ((((loff_t)page_index) << PAGE_CACHE_SHIFT) + offset_in_page); virt = kmap(page_for_ecryptfs); rc = ecryptfs_read_lower(virt, offset, size, ecryptfs_inode); + if (rc > 0) + rc = 0; kunmap(page_for_ecryptfs); flush_dcache_page(page_for_ecryptfs); return rc; diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c index 12d649602d3..b15a43a80ab 100644 --- a/fs/ecryptfs/super.c +++ b/fs/ecryptfs/super.c @@ -77,7 +77,6 @@ static void ecryptfs_destroy_inode(struct inode *inode) struct ecryptfs_inode_info *inode_info; inode_info = ecryptfs_inode_to_private(inode); - mutex_lock(&inode_info->lower_file_mutex); if (inode_info->lower_file) { struct dentry *lower_dentry = inode_info->lower_file->f_dentry; @@ -89,7 +88,6 @@ static void ecryptfs_destroy_inode(struct inode *inode) d_drop(lower_dentry); } } - mutex_unlock(&inode_info->lower_file_mutex); ecryptfs_destroy_crypt_stat(&inode_info->crypt_stat); kmem_cache_free(ecryptfs_inode_info_cache, inode_info); } diff --git a/fs/exec.c b/fs/exec.c index 5c833c18d0d..d49be6bc179 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -55,6 +55,7 @@ #include <linux/kmod.h> #include <linux/fsnotify.h> #include <linux/fs_struct.h> +#include <linux/pipe_fs_i.h> #include <asm/uaccess.h> #include <asm/mmu_context.h> @@ -63,6 +64,7 @@ int core_uses_pid; char core_pattern[CORENAME_MAX_SIZE] = "core"; +unsigned int core_pipe_limit; int suid_dumpable = 0; /* The maximal length of core_pattern is also specified in sysctl.c */ @@ -1393,18 +1395,16 @@ out_ret: return retval; } -int set_binfmt(struct linux_binfmt *new) +void set_binfmt(struct linux_binfmt *new) { - struct linux_binfmt *old = current->binfmt; + struct mm_struct *mm = current->mm; - if (new) { - if (!try_module_get(new->module)) - return -1; - } - current->binfmt = new; - if (old) - module_put(old->module); - return 0; + if (mm->binfmt) + module_put(mm->binfmt->module); + + mm->binfmt = new; + if (new) + __module_get(new->module); } EXPORT_SYMBOL(set_binfmt); @@ -1728,6 +1728,29 @@ int get_dumpable(struct mm_struct *mm) return (ret >= 2) ? 2 : ret; } +static void wait_for_dump_helpers(struct file *file) +{ + struct pipe_inode_info *pipe; + + pipe = file->f_path.dentry->d_inode->i_pipe; + + pipe_lock(pipe); + pipe->readers++; + pipe->writers--; + + while ((pipe->readers > 1) && (!signal_pending(current))) { + wake_up_interruptible_sync(&pipe->wait); + kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); + pipe_wait(pipe); + } + + pipe->readers--; + pipe->writers++; + pipe_unlock(pipe); + +} + + void do_coredump(long signr, int exit_code, struct pt_regs *regs) { struct core_state core_state; @@ -1744,11 +1767,12 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) unsigned long core_limit = current->signal->rlim[RLIMIT_CORE].rlim_cur; char **helper_argv = NULL; int helper_argc = 0; - char *delimit; + int dump_count = 0; + static atomic_t core_dump_count = ATOMIC_INIT(0); audit_core_dumps(signr); - binfmt = current->binfmt; + binfmt = mm->binfmt; if (!binfmt || !binfmt->core_dump) goto fail; @@ -1799,54 +1823,63 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) lock_kernel(); ispipe = format_corename(corename, signr); unlock_kernel(); - /* - * Don't bother to check the RLIMIT_CORE value if core_pattern points - * to a pipe. Since we're not writing directly to the filesystem - * RLIMIT_CORE doesn't really apply, as no actual core file will be - * created unless the pipe reader choses to write out the core file - * at which point file size limits and permissions will be imposed - * as it does with any other process - */ + if ((!ispipe) && (core_limit < binfmt->min_coredump)) goto fail_unlock; if (ispipe) { + if (core_limit == 0) { + /* + * Normally core limits are irrelevant to pipes, since + * we're not writing to the file system, but we use + * core_limit of 0 here as a speacial value. Any + * non-zero limit gets set to RLIM_INFINITY below, but + * a limit of 0 skips the dump. This is a consistent + * way to catch recursive crashes. We can still crash + * if the core_pattern binary sets RLIM_CORE = !0 + * but it runs as root, and can do lots of stupid things + * Note that we use task_tgid_vnr here to grab the pid + * of the process group leader. That way we get the + * right pid if a thread in a multi-threaded + * core_pattern process dies. + */ + printk(KERN_WARNING + "Process %d(%s) has RLIMIT_CORE set to 0\n", + task_tgid_vnr(current), current->comm); + printk(KERN_WARNING "Aborting core\n"); + goto fail_unlock; + } + + dump_count = atomic_inc_return(&core_dump_count); + if (core_pipe_limit && (core_pipe_limit < dump_count)) { + printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n", + task_tgid_vnr(current), current->comm); + printk(KERN_WARNING "Skipping core dump\n"); + goto fail_dropcount; + } + helper_argv = argv_split(GFP_KERNEL, corename+1, &helper_argc); if (!helper_argv) { printk(KERN_WARNING "%s failed to allocate memory\n", __func__); - goto fail_unlock; - } - /* Terminate the string before the first option */ - delimit = strchr(corename, ' '); - if (delimit) - *delimit = '\0'; - delimit = strrchr(helper_argv[0], '/'); - if (delimit) - delimit++; - else - delimit = helper_argv[0]; - if (!strcmp(delimit, current->comm)) { - printk(KERN_NOTICE "Recursive core dump detected, " - "aborting\n"); - goto fail_unlock; + goto fail_dropcount; } core_limit = RLIM_INFINITY; /* SIGPIPE can happen, but it's just never processed */ - if (call_usermodehelper_pipe(corename+1, helper_argv, NULL, + if (call_usermodehelper_pipe(helper_argv[0], helper_argv, NULL, &file)) { printk(KERN_INFO "Core dump to %s pipe failed\n", corename); - goto fail_unlock; + goto fail_dropcount; } } else file = filp_open(corename, O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, 0600); if (IS_ERR(file)) - goto fail_unlock; + goto fail_dropcount; inode = file->f_path.dentry->d_inode; if (inode->i_nlink > 1) goto close_fail; /* multiple links - don't dump */ @@ -1875,7 +1908,12 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) if (retval) current->signal->group_exit_code |= 0x80; close_fail: + if (ispipe && core_pipe_limit) + wait_for_dump_helpers(file); filp_close(file, NULL); +fail_dropcount: + if (dump_count) + atomic_dec(&core_dump_count); fail_unlock: if (helper_argv) argv_free(helper_argv); diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 5ab10c3bbeb..9f500dec3b5 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -214,7 +214,6 @@ int exofs_sync_fs(struct super_block *sb, int wait) } lock_super(sb); - lock_kernel(); sbi = sb->s_fs_info; fscb->s_nextid = cpu_to_le64(sbi->s_nextid); fscb->s_numfiles = cpu_to_le32(sbi->s_numfiles); @@ -245,7 +244,6 @@ int exofs_sync_fs(struct super_block *sb, int wait) out: if (or) osd_end_request(or); - unlock_kernel(); unlock_super(sb); kfree(fscb); return ret; @@ -268,8 +266,6 @@ static void exofs_put_super(struct super_block *sb) int num_pend; struct exofs_sb_info *sbi = sb->s_fs_info; - lock_kernel(); - if (sb->s_dirt) exofs_write_super(sb); @@ -286,8 +282,6 @@ static void exofs_put_super(struct super_block *sb) osduld_put_device(sbi->s_dev); kfree(sb->s_fs_info); sb->s_fs_info = NULL; - - unlock_kernel(); } /* diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 1c1638f873a..ade634076d0 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -819,6 +819,7 @@ const struct address_space_operations ext2_aops = { .writepages = ext2_writepages, .migratepage = buffer_migrate_page, .is_partially_uptodate = block_is_partially_uptodate, + .error_remove_page = generic_error_remove_page, }; const struct address_space_operations ext2_aops_xip = { @@ -837,6 +838,7 @@ const struct address_space_operations ext2_nobh_aops = { .direct_IO = ext2_direct_IO, .writepages = ext2_writepages, .migratepage = buffer_migrate_page, + .error_remove_page = generic_error_remove_page, }; /* diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index cd098a7b77f..acf1b142332 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1830,6 +1830,7 @@ static const struct address_space_operations ext3_ordered_aops = { .direct_IO = ext3_direct_IO, .migratepage = buffer_migrate_page, .is_partially_uptodate = block_is_partially_uptodate, + .error_remove_page = generic_error_remove_page, }; static const struct address_space_operations ext3_writeback_aops = { @@ -1845,6 +1846,7 @@ static const struct address_space_operations ext3_writeback_aops = { .direct_IO = ext3_direct_IO, .migratepage = buffer_migrate_page, .is_partially_uptodate = block_is_partially_uptodate, + .error_remove_page = generic_error_remove_page, }; static const struct address_space_operations ext3_journalled_aops = { @@ -1859,6 +1861,7 @@ static const struct address_space_operations ext3_journalled_aops = { .invalidatepage = ext3_invalidatepage, .releasepage = ext3_releasepage, .is_partially_uptodate = block_is_partially_uptodate, + .error_remove_page = generic_error_remove_page, }; void ext3_set_aops(struct inode *inode) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 3a798737e30..064746fad58 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3386,6 +3386,7 @@ static const struct address_space_operations ext4_ordered_aops = { .direct_IO = ext4_direct_IO, .migratepage = buffer_migrate_page, .is_partially_uptodate = block_is_partially_uptodate, + .error_remove_page = generic_error_remove_page, }; static const struct address_space_operations ext4_writeback_aops = { @@ -3401,6 +3402,7 @@ static const struct address_space_operations ext4_writeback_aops = { .direct_IO = ext4_direct_IO, .migratepage = buffer_migrate_page, .is_partially_uptodate = block_is_partially_uptodate, + .error_remove_page = generic_error_remove_page, }; static const struct address_space_operations ext4_journalled_aops = { @@ -3415,6 +3417,7 @@ static const struct address_space_operations ext4_journalled_aops = { .invalidatepage = ext4_invalidatepage, .releasepage = ext4_releasepage, .is_partially_uptodate = block_is_partially_uptodate, + .error_remove_page = generic_error_remove_page, }; static const struct address_space_operations ext4_da_aops = { @@ -3431,6 +3434,7 @@ static const struct address_space_operations ext4_da_aops = { .direct_IO = ext4_direct_IO, .migratepage = buffer_migrate_page, .is_partially_uptodate = block_is_partially_uptodate, + .error_remove_page = generic_error_remove_page, }; void ext4_set_aops(struct inode *inode) diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 8970d8c49bb..04629d1302f 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -470,19 +470,11 @@ static void fat_put_super(struct super_block *sb) iput(sbi->fat_inode); - if (sbi->nls_disk) { - unload_nls(sbi->nls_disk); - sbi->nls_disk = NULL; - sbi->options.codepage = fat_default_codepage; - } - if (sbi->nls_io) { - unload_nls(sbi->nls_io); - sbi->nls_io = NULL; - } - if (sbi->options.iocharset != fat_default_iocharset) { + unload_nls(sbi->nls_disk); + unload_nls(sbi->nls_io); + + if (sbi->options.iocharset != fat_default_iocharset) kfree(sbi->options.iocharset); - sbi->options.iocharset = fat_default_iocharset; - } sb->s_fs_info = NULL; kfree(sbi); diff --git a/fs/fcntl.c b/fs/fcntl.c index ae413086db9..fc089f2f7f5 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -263,6 +263,79 @@ pid_t f_getown(struct file *filp) return pid; } +static int f_setown_ex(struct file *filp, unsigned long arg) +{ + struct f_owner_ex * __user owner_p = (void * __user)arg; + struct f_owner_ex owner; + struct pid *pid; + int type; + int ret; + + ret = copy_from_user(&owner, owner_p, sizeof(owner)); + if (ret) + return ret; + + switch (owner.type) { + case F_OWNER_TID: + type = PIDTYPE_MAX; + break; + + case F_OWNER_PID: + type = PIDTYPE_PID; + break; + + case F_OWNER_GID: + type = PIDTYPE_PGID; + break; + + default: + return -EINVAL; + } + + rcu_read_lock(); + pid = find_vpid(owner.pid); + if (owner.pid && !pid) + ret = -ESRCH; + else + ret = __f_setown(filp, pid, type, 1); + rcu_read_unlock(); + + return ret; +} + +static int f_getown_ex(struct file *filp, unsigned long arg) +{ + struct f_owner_ex * __user owner_p = (void * __user)arg; + struct f_owner_ex owner; + int ret = 0; + + read_lock(&filp->f_owner.lock); + owner.pid = pid_vnr(filp->f_owner.pid); + switch (filp->f_owner.pid_type) { + case PIDTYPE_MAX: + owner.type = F_OWNER_TID; + break; + + case PIDTYPE_PID: + owner.type = F_OWNER_PID; + break; + + case PIDTYPE_PGID: + owner.type = F_OWNER_GID; + break; + + default: + WARN_ON(1); + ret = -EINVAL; + break; + } + read_unlock(&filp->f_owner.lock); + + if (!ret) + ret = copy_to_user(owner_p, &owner, sizeof(owner)); + return ret; +} + static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, struct file *filp) { @@ -313,6 +386,12 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, case F_SETOWN: err = f_setown(filp, arg, 1); break; + case F_GETOWN_EX: + err = f_getown_ex(filp, arg); + break; + case F_SETOWN_EX: + err = f_setown_ex(filp, arg); + break; case F_GETSIG: err = filp->f_owner.signum; break; @@ -428,8 +507,7 @@ static inline int sigio_perm(struct task_struct *p, static void send_sigio_to_task(struct task_struct *p, struct fown_struct *fown, - int fd, - int reason) + int fd, int reason, int group) { /* * F_SETSIG can change ->signum lockless in parallel, make @@ -461,11 +539,11 @@ static void send_sigio_to_task(struct task_struct *p, else si.si_band = band_table[reason - POLL_IN]; si.si_fd = fd; - if (!group_send_sig_info(signum, &si, p)) + if (!do_send_sig_info(signum, &si, p, group)) break; /* fall-through: fall back on the old plain SIGIO signal */ case 0: - group_send_sig_info(SIGIO, SEND_SIG_PRIV, p); + do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, group); } } @@ -474,16 +552,23 @@ void send_sigio(struct fown_struct *fown, int fd, int band) struct task_struct *p; enum pid_type type; struct pid *pid; + int group = 1; read_lock(&fown->lock); + type = fown->pid_type; + if (type == PIDTYPE_MAX) { + group = 0; + type = PIDTYPE_PID; + } + pid = fown->pid; if (!pid) goto out_unlock_fown; read_lock(&tasklist_lock); do_each_pid_task(pid, type, p) { - send_sigio_to_task(p, fown, fd, band); + send_sigio_to_task(p, fown, fd, band, group); } while_each_pid_task(pid, type, p); read_unlock(&tasklist_lock); out_unlock_fown: @@ -491,10 +576,10 @@ void send_sigio(struct fown_struct *fown, int fd, int band) } static void send_sigurg_to_task(struct task_struct *p, - struct fown_struct *fown) + struct fown_struct *fown, int group) { if (sigio_perm(p, fown, SIGURG)) - group_send_sig_info(SIGURG, SEND_SIG_PRIV, p); + do_send_sig_info(SIGURG, SEND_SIG_PRIV, p, group); } int send_sigurg(struct fown_struct *fown) @@ -502,10 +587,17 @@ int send_sigurg(struct fown_struct *fown) struct task_struct *p; enum pid_type type; struct pid *pid; + int group = 1; int ret = 0; read_lock(&fown->lock); + type = fown->pid_type; + if (type == PIDTYPE_MAX) { + group = 0; + type = PIDTYPE_PID; + } + pid = fown->pid; if (!pid) goto out_unlock_fown; @@ -514,7 +606,7 @@ int send_sigurg(struct fown_struct *fown) read_lock(&tasklist_lock); do_each_pid_task(pid, type, p) { - send_sigurg_to_task(p, fown); + send_sigurg_to_task(p, fown, group); } while_each_pid_task(pid, type, p); read_unlock(&tasklist_lock); out_unlock_fown: diff --git a/fs/file_table.c b/fs/file_table.c index 334ce39881f..8eb44042e00 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -74,14 +74,14 @@ EXPORT_SYMBOL_GPL(get_max_files); * Handle nr_files sysctl */ #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) -int proc_nr_files(ctl_table *table, int write, struct file *filp, +int proc_nr_files(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { files_stat.nr_files = get_nr_files(); - return proc_dointvec(table, write, filp, buffer, lenp, ppos); + return proc_dointvec(table, write, buffer, lenp, ppos); } #else -int proc_nr_files(ctl_table *table, int write, struct file *filp, +int proc_nr_files(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index e703654e7f4..992f6c9410b 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1276,14 +1276,9 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr, return 0; if (attr->ia_valid & ATTR_SIZE) { - unsigned long limit; - if (IS_SWAPFILE(inode)) - return -ETXTBSY; - limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; - if (limit != RLIM_INFINITY && attr->ia_size > (loff_t) limit) { - send_sig(SIGXFSZ, current, 0); - return -EFBIG; - } + err = inode_newsize_ok(inode, attr->ia_size); + if (err) + return err; is_truncate = true; } @@ -1350,8 +1345,7 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr, * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock. */ if (S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) { - if (outarg.attr.size < oldsize) - fuse_truncate(inode->i_mapping, outarg.attr.size); + truncate_pagecache(inode, oldsize, outarg.attr.size); invalidate_inode_pages2(inode->i_mapping); } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index fc9c79feb5f..01cc462ff45 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -606,8 +606,6 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, u64 attr_valid); -void fuse_truncate(struct address_space *mapping, loff_t offset); - /** * Initialize the client device */ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 6da947daabd..1a822ce2b24 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -140,14 +140,6 @@ static int fuse_remount_fs(struct super_block *sb, int *flags, char *data) return 0; } -void fuse_truncate(struct address_space *mapping, loff_t offset) -{ - /* See vmtruncate() */ - unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); - truncate_inode_pages(mapping, offset); - unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); -} - void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, u64 attr_valid) { @@ -205,8 +197,7 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, spin_unlock(&fc->lock); if (S_ISREG(inode->i_mode) && oldsize != attr->size) { - if (attr->size < oldsize) - fuse_truncate(inode->i_mapping, attr->size); + truncate_pagecache(inode, oldsize, attr->size); invalidate_inode_pages2(inode->i_mapping); } } diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 7ebae9a4ecc..694b5d48f03 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -1135,6 +1135,7 @@ static const struct address_space_operations gfs2_writeback_aops = { .direct_IO = gfs2_direct_IO, .migratepage = buffer_migrate_page, .is_partially_uptodate = block_is_partially_uptodate, + .error_remove_page = generic_error_remove_page, }; static const struct address_space_operations gfs2_ordered_aops = { @@ -1151,6 +1152,7 @@ static const struct address_space_operations gfs2_ordered_aops = { .direct_IO = gfs2_direct_IO, .migratepage = buffer_migrate_page, .is_partially_uptodate = block_is_partially_uptodate, + .error_remove_page = generic_error_remove_page, }; static const struct address_space_operations gfs2_jdata_aops = { @@ -1166,6 +1168,7 @@ static const struct address_space_operations gfs2_jdata_aops = { .invalidatepage = gfs2_invalidatepage, .releasepage = gfs2_releasepage, .is_partially_uptodate = block_is_partially_uptodate, + .error_remove_page = generic_error_remove_page, }; void gfs2_set_aops(struct inode *inode) diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c index 7b6165f25fb..8bbe03c3f6d 100644 --- a/fs/hfs/mdb.c +++ b/fs/hfs/mdb.c @@ -344,10 +344,8 @@ void hfs_mdb_put(struct super_block *sb) brelse(HFS_SB(sb)->mdb_bh); brelse(HFS_SB(sb)->alt_mdb_bh); - if (HFS_SB(sb)->nls_io) - unload_nls(HFS_SB(sb)->nls_io); - if (HFS_SB(sb)->nls_disk) - unload_nls(HFS_SB(sb)->nls_disk); + unload_nls(HFS_SB(sb)->nls_io); + unload_nls(HFS_SB(sb)->nls_disk); free_pages((unsigned long)HFS_SB(sb)->bitmap, PAGE_SIZE < 8192 ? 1 : 0); kfree(HFS_SB(sb)); diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index c0759fe0855..43022f3d514 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -229,8 +229,7 @@ static void hfsplus_put_super(struct super_block *sb) iput(HFSPLUS_SB(sb).alloc_file); iput(HFSPLUS_SB(sb).hidden_dir); brelse(HFSPLUS_SB(sb).s_vhbh); - if (HFSPLUS_SB(sb).nls) - unload_nls(HFSPLUS_SB(sb).nls); + unload_nls(HFSPLUS_SB(sb).nls); kfree(sb->s_fs_info); sb->s_fs_info = NULL; @@ -464,8 +463,7 @@ out: cleanup: hfsplus_put_super(sb); - if (nls) - unload_nls(nls); + unload_nls(nls); return err; } diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index eba6d552d9c..87a1258953b 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -380,36 +380,11 @@ static void hugetlbfs_delete_inode(struct inode *inode) static void hugetlbfs_forget_inode(struct inode *inode) __releases(inode_lock) { - struct super_block *sb = inode->i_sb; - - if (!hlist_unhashed(&inode->i_hash)) { - if (!(inode->i_state & (I_DIRTY|I_SYNC))) - list_move(&inode->i_list, &inode_unused); - inodes_stat.nr_unused++; - if (!sb || (sb->s_flags & MS_ACTIVE)) { - spin_unlock(&inode_lock); - return; - } - inode->i_state |= I_WILL_FREE; - spin_unlock(&inode_lock); - /* - * write_inode_now is a noop as we set BDI_CAP_NO_WRITEBACK - * in our backing_dev_info. - */ - write_inode_now(inode, 1); - spin_lock(&inode_lock); - inode->i_state &= ~I_WILL_FREE; - inodes_stat.nr_unused--; - hlist_del_init(&inode->i_hash); + if (generic_detach_inode(inode)) { + truncate_hugepages(inode, 0); + clear_inode(inode); + destroy_inode(inode); } - list_del_init(&inode->i_list); - list_del_init(&inode->i_sb_list); - inode->i_state |= I_FREEING; - inodes_stat.nr_inodes--; - spin_unlock(&inode_lock); - truncate_hugepages(inode, 0); - clear_inode(inode); - destroy_inode(inode); } static void hugetlbfs_drop_inode(struct inode *inode) @@ -936,15 +911,9 @@ static struct file_system_type hugetlbfs_fs_type = { static struct vfsmount *hugetlbfs_vfsmount; -static int can_do_hugetlb_shm(int creat_flags) +static int can_do_hugetlb_shm(void) { - if (creat_flags != HUGETLB_SHMFS_INODE) - return 0; - if (capable(CAP_IPC_LOCK)) - return 1; - if (in_group_p(sysctl_hugetlb_shm_group)) - return 1; - return 0; + return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group); } struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag, @@ -960,7 +929,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag, if (!hugetlbfs_vfsmount) return ERR_PTR(-ENOENT); - if (!can_do_hugetlb_shm(creat_flags)) { + if (creat_flags == HUGETLB_SHMFS_INODE && !can_do_hugetlb_shm()) { *user = current_user(); if (user_shm_lock(size, *user)) { WARN_ONCE(1, diff --git a/fs/inode.c b/fs/inode.c index 76582b06ab9..4d8e3be5597 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1241,7 +1241,16 @@ void generic_delete_inode(struct inode *inode) } EXPORT_SYMBOL(generic_delete_inode); -static void generic_forget_inode(struct inode *inode) +/** + * generic_detach_inode - remove inode from inode lists + * @inode: inode to remove + * + * Remove inode from inode lists, write it if it's dirty. This is just an + * internal VFS helper exported for hugetlbfs. Do not use! + * + * Returns 1 if inode should be completely destroyed. + */ +int generic_detach_inode(struct inode *inode) { struct super_block *sb = inode->i_sb; @@ -1251,7 +1260,7 @@ static void generic_forget_inode(struct inode *inode) inodes_stat.nr_unused++; if (sb->s_flags & MS_ACTIVE) { spin_unlock(&inode_lock); - return; + return 0; } WARN_ON(inode->i_state & I_NEW); inode->i_state |= I_WILL_FREE; @@ -1269,6 +1278,14 @@ static void generic_forget_inode(struct inode *inode) inode->i_state |= I_FREEING; inodes_stat.nr_inodes--; spin_unlock(&inode_lock); + return 1; +} +EXPORT_SYMBOL_GPL(generic_detach_inode); + +static void generic_forget_inode(struct inode *inode) +{ + if (!generic_detach_inode(inode)) + return; if (inode->i_data.nrpages) truncate_inode_pages(&inode->i_data, 0); clear_inode(inode); @@ -1399,31 +1416,31 @@ void touch_atime(struct vfsmount *mnt, struct dentry *dentry) struct inode *inode = dentry->d_inode; struct timespec now; - if (mnt_want_write(mnt)) - return; if (inode->i_flags & S_NOATIME) - goto out; + return; if (IS_NOATIME(inode)) - goto out; + return; if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode)) - goto out; + return; if (mnt->mnt_flags & MNT_NOATIME) - goto out; + return; if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)) - goto out; + return; now = current_fs_time(inode->i_sb); if (!relatime_need_update(mnt, inode, now)) - goto out; + return; if (timespec_equal(&inode->i_atime, &now)) - goto out; + return; + + if (mnt_want_write(mnt)) + return; inode->i_atime = now; mark_inode_dirty_sync(inode); -out: mnt_drop_write(mnt); } EXPORT_SYMBOL(touch_atime); @@ -1444,34 +1461,37 @@ void file_update_time(struct file *file) { struct inode *inode = file->f_path.dentry->d_inode; struct timespec now; - int sync_it = 0; - int err; + enum { S_MTIME = 1, S_CTIME = 2, S_VERSION = 4 } sync_it = 0; + /* First try to exhaust all avenues to not sync */ if (IS_NOCMTIME(inode)) return; - err = mnt_want_write_file(file); - if (err) - return; - now = current_fs_time(inode->i_sb); - if (!timespec_equal(&inode->i_mtime, &now)) { - inode->i_mtime = now; - sync_it = 1; - } + if (!timespec_equal(&inode->i_mtime, &now)) + sync_it = S_MTIME; - if (!timespec_equal(&inode->i_ctime, &now)) { - inode->i_ctime = now; - sync_it = 1; - } + if (!timespec_equal(&inode->i_ctime, &now)) + sync_it |= S_CTIME; - if (IS_I_VERSION(inode)) { - inode_inc_iversion(inode); - sync_it = 1; - } + if (IS_I_VERSION(inode)) + sync_it |= S_VERSION; + + if (!sync_it) + return; - if (sync_it) - mark_inode_dirty_sync(inode); + /* Finally allowed to write? Takes lock. */ + if (mnt_want_write_file(file)) + return; + + /* Only change inode inside the lock region */ + if (sync_it & S_VERSION) + inode_inc_iversion(inode); + if (sync_it & S_CTIME) + inode->i_ctime = now; + if (sync_it & S_MTIME) + inode->i_mtime = now; + mark_inode_dirty_sync(inode); mnt_drop_write(file->f_path.mnt); } EXPORT_SYMBOL(file_update_time); @@ -1599,7 +1619,8 @@ void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev) else if (S_ISSOCK(mode)) inode->i_fop = &bad_sock_fops; else - printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o)\n", - mode); + printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for" + " inode %s:%lu\n", mode, inode->i_sb->s_id, + inode->i_ino); } EXPORT_SYMBOL(init_special_inode); diff --git a/fs/internal.h b/fs/internal.h index d55ef562f0b..515175b8b72 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -57,6 +57,7 @@ extern int check_unsafe_exec(struct linux_binprm *); * namespace.c */ extern int copy_mount_options(const void __user *, unsigned long *); +extern int copy_mount_string(const void __user *, char **); extern void free_vfsmnt(struct vfsmount *); extern struct vfsmount *alloc_vfsmnt(const char *); diff --git a/fs/ioctl.c b/fs/ioctl.c index 5612880fcbe..7b17a14396f 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -162,20 +162,21 @@ EXPORT_SYMBOL(fiemap_check_flags); static int fiemap_check_ranges(struct super_block *sb, u64 start, u64 len, u64 *new_len) { + u64 maxbytes = (u64) sb->s_maxbytes; + *new_len = len; if (len == 0) return -EINVAL; - if (start > sb->s_maxbytes) + if (start > maxbytes) return -EFBIG; /* * Shrink request scope to what the fs can actually handle. */ - if ((len > sb->s_maxbytes) || - (sb->s_maxbytes - len) < start) - *new_len = sb->s_maxbytes - start; + if (len > maxbytes || (maxbytes - len) < start) + *new_len = maxbytes - start; return 0; } diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 85f96bc651c..6b4dcd4f294 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -46,10 +46,7 @@ static void isofs_put_super(struct super_block *sb) #ifdef CONFIG_JOLIET lock_kernel(); - if (sbi->s_nls_iocharset) { - unload_nls(sbi->s_nls_iocharset); - sbi->s_nls_iocharset = NULL; - } + unload_nls(sbi->s_nls_iocharset); unlock_kernel(); #endif @@ -912,8 +909,7 @@ out_no_root: printk(KERN_WARNING "%s: get root inode failed\n", __func__); out_no_inode: #ifdef CONFIG_JOLIET - if (sbi->s_nls_iocharset) - unload_nls(sbi->s_nls_iocharset); + unload_nls(sbi->s_nls_iocharset); #endif goto out_freesbi; out_no_read: diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 37e6dcda8fc..2234c73fc57 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -178,13 +178,11 @@ static void jfs_put_super(struct super_block *sb) rc = jfs_umount(sb); if (rc) jfs_err("jfs_umount failed with return code %d", rc); - if (sbi->nls_tab) - unload_nls(sbi->nls_tab); - sbi->nls_tab = NULL; + + unload_nls(sbi->nls_tab); truncate_inode_pages(sbi->direct_inode->i_mapping, 0); iput(sbi->direct_inode); - sbi->direct_inode = NULL; kfree(sbi); @@ -347,8 +345,7 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize, if (nls_map != (void *) -1) { /* Discard old (if remount) */ - if (sbi->nls_tab) - unload_nls(sbi->nls_tab); + unload_nls(sbi->nls_tab); sbi->nls_tab = nls_map; } return 1; diff --git a/fs/libfs.c b/fs/libfs.c index dcec3d3ea64..219576c52d8 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -527,14 +527,18 @@ ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos, const void *from, size_t available) { loff_t pos = *ppos; + size_t ret; + if (pos < 0) return -EINVAL; - if (pos >= available) + if (pos >= available || !count) return 0; if (count > available - pos) count = available - pos; - if (copy_to_user(to, from + pos, count)) + ret = copy_to_user(to, from + pos, count); + if (ret == count) return -EFAULT; + count -= ret; *ppos = pos + count; return count; } @@ -735,10 +739,11 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf, if (copy_from_user(attr->set_buf, buf, size)) goto out; - ret = len; /* claim we got the whole input */ attr->set_buf[size] = '\0'; val = simple_strtol(attr->set_buf, NULL, 0); - attr->set(attr->data, val); + ret = attr->set(attr->data, val); + if (ret == 0) + ret = len; /* on success, claim we got the whole input */ out: mutex_unlock(&attr->mutex); return ret; diff --git a/fs/namespace.c b/fs/namespace.c index 7230787d18b..bdc3cb4fd22 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1640,7 +1640,7 @@ static int do_new_mount(struct path *path, char *type, int flags, { struct vfsmount *mnt; - if (!type || !memchr(type, 0, PAGE_SIZE)) + if (!type) return -EINVAL; /* we need capabilities... */ @@ -1871,6 +1871,23 @@ int copy_mount_options(const void __user * data, unsigned long *where) return 0; } +int copy_mount_string(const void __user *data, char **where) +{ + char *tmp; + + if (!data) { + *where = NULL; + return 0; + } + + tmp = strndup_user(data, PAGE_SIZE); + if (IS_ERR(tmp)) + return PTR_ERR(tmp); + + *where = tmp; + return 0; +} + /* * Flags is a 32-bit value that allows up to 31 non-fs dependent flags to * be given to the mount() call (ie: read-only, no-dev, no-suid etc). @@ -1900,8 +1917,6 @@ long do_mount(char *dev_name, char *dir_name, char *type_page, if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE)) return -EINVAL; - if (dev_name && !memchr(dev_name, 0, PAGE_SIZE)) - return -EINVAL; if (data_page) ((char *)data_page)[PAGE_SIZE - 1] = 0; @@ -2070,40 +2085,42 @@ EXPORT_SYMBOL(create_mnt_ns); SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name, char __user *, type, unsigned long, flags, void __user *, data) { - int retval; + int ret; + char *kernel_type; + char *kernel_dir; + char *kernel_dev; unsigned long data_page; - unsigned long type_page; - unsigned long dev_page; - char *dir_page; - retval = copy_mount_options(type, &type_page); - if (retval < 0) - return retval; + ret = copy_mount_string(type, &kernel_type); + if (ret < 0) + goto out_type; - dir_page = getname(dir_name); - retval = PTR_ERR(dir_page); - if (IS_ERR(dir_page)) - goto out1; + kernel_dir = getname(dir_name); + if (IS_ERR(kernel_dir)) { + ret = PTR_ERR(kernel_dir); + goto out_dir; + } - retval = copy_mount_options(dev_name, &dev_page); - if (retval < 0) - goto out2; + ret = copy_mount_string(dev_name, &kernel_dev); + if (ret < 0) + goto out_dev; - retval = copy_mount_options(data, &data_page); - if (retval < 0) - goto out3; + ret = copy_mount_options(data, &data_page); + if (ret < 0) + goto out_data; - retval = do_mount((char *)dev_page, dir_page, (char *)type_page, - flags, (void *)data_page); - free_page(data_page); + ret = do_mount(kernel_dev, kernel_dir, kernel_type, flags, + (void *) data_page); -out3: - free_page(dev_page); -out2: - putname(dir_page); -out1: - free_page(type_page); - return retval; + free_page(data_page); +out_data: + kfree(kernel_dev); +out_dev: + putname(kernel_dir); +out_dir: + kfree(kernel_type); +out_type: + return ret; } /* diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index b99ce205b1b..cf98da1be23 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -746,16 +746,8 @@ static void ncp_put_super(struct super_block *sb) #ifdef CONFIG_NCPFS_NLS /* unload the NLS charsets */ - if (server->nls_vol) - { - unload_nls(server->nls_vol); - server->nls_vol = NULL; - } - if (server->nls_io) - { - unload_nls(server->nls_io); - server->nls_io = NULL; - } + unload_nls(server->nls_vol); + unload_nls(server->nls_io); #endif /* CONFIG_NCPFS_NLS */ if (server->info_filp) diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c index 53a7ed7eb9c..0d58caf4a6e 100644 --- a/fs/ncpfs/ioctl.c +++ b/fs/ncpfs/ioctl.c @@ -223,10 +223,8 @@ ncp_set_charsets(struct ncp_server* server, struct ncp_nls_ioctl __user *arg) oldset_io = server->nls_io; server->nls_io = iocharset; - if (oldset_cp) - unload_nls(oldset_cp); - if (oldset_io) - unload_nls(oldset_io); + unload_nls(oldset_cp); + unload_nls(oldset_io); return 0; } diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 5021b75d2d1..86d6b4db109 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -525,6 +525,7 @@ const struct address_space_operations nfs_file_aops = { .direct_IO = nfs_direct_IO, .migratepage = nfs_migrate_page, .launder_page = nfs_launder_page, + .error_remove_page = generic_error_remove_page, }; /* diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 060022b4651..faa091865ad 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -458,49 +458,21 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) */ static int nfs_vmtruncate(struct inode * inode, loff_t offset) { - if (i_size_read(inode) < offset) { - unsigned long limit; - - limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; - if (limit != RLIM_INFINITY && offset > limit) - goto out_sig; - if (offset > inode->i_sb->s_maxbytes) - goto out_big; - spin_lock(&inode->i_lock); - i_size_write(inode, offset); - spin_unlock(&inode->i_lock); - } else { - struct address_space *mapping = inode->i_mapping; + loff_t oldsize; + int err; - /* - * truncation of in-use swapfiles is disallowed - it would - * cause subsequent swapout to scribble on the now-freed - * blocks. - */ - if (IS_SWAPFILE(inode)) - return -ETXTBSY; - spin_lock(&inode->i_lock); - i_size_write(inode, offset); - spin_unlock(&inode->i_lock); + err = inode_newsize_ok(inode, offset); + if (err) + goto out; - /* - * unmap_mapping_range is called twice, first simply for - * efficiency so that truncate_inode_pages does fewer - * single-page unmaps. However after this first call, and - * before truncate_inode_pages finishes, it is possible for - * private pages to be COWed, which remain after - * truncate_inode_pages finishes, hence the second - * unmap_mapping_range call must be made for correctness. - */ - unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); - truncate_inode_pages(mapping, offset); - unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); - } - return 0; -out_sig: - send_sig(SIGXFSZ, current, 0); -out_big: - return -EFBIG; + spin_lock(&inode->i_lock); + oldsize = inode->i_size; + i_size_write(inode, offset); + spin_unlock(&inode->i_lock); + + truncate_pagecache(inode, oldsize, offset); +out: + return err; } /** diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 810770f9681..29786d3b932 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1711,6 +1711,8 @@ static int nfs_validate_mount_data(void *options, if (!(data->flags & NFS_MOUNT_TCP)) args->nfs_server.protocol = XPRT_TRANSPORT_UDP; + else + args->nfs_server.protocol = XPRT_TRANSPORT_TCP; /* N.B. caller will free nfs_server.hostname in all cases */ args->nfs_server.hostname = kstrdup(data->hostname, GFP_KERNEL); args->namlen = data->namlen; diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c index 477d37d83b3..2224b4d07bf 100644 --- a/fs/nls/nls_base.c +++ b/fs/nls/nls_base.c @@ -270,7 +270,8 @@ struct nls_table *load_nls(char *charset) void unload_nls(struct nls_table *nls) { - module_put(nls->owner); + if (nls) + module_put(nls->owner); } static const wchar_t charset2uni[256] = { diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c index b38f944f066..cfce53cb65d 100644 --- a/fs/ntfs/aops.c +++ b/fs/ntfs/aops.c @@ -1550,6 +1550,7 @@ const struct address_space_operations ntfs_aops = { .migratepage = buffer_migrate_page, /* Move a page cache page from one physical page to an other. */ + .error_remove_page = generic_error_remove_page, }; /** @@ -1569,6 +1570,7 @@ const struct address_space_operations ntfs_mst_aops = { .migratepage = buffer_migrate_page, /* Move a page cache page from one physical page to an other. */ + .error_remove_page = generic_error_remove_page, }; #ifdef NTFS_RW diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index abaaa1cbf8d..80b04770e8e 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -201,8 +201,7 @@ use_utf8: v, old_nls->charset); nls_map = old_nls; } else /* nls_map */ { - if (old_nls) - unload_nls(old_nls); + unload_nls(old_nls); } } else if (!strcmp(p, "utf8")) { bool val = false; @@ -2427,10 +2426,9 @@ static void ntfs_put_super(struct super_block *sb) ntfs_free(vol->upcase); vol->upcase = NULL; } - if (vol->nls_map) { - unload_nls(vol->nls_map); - vol->nls_map = NULL; - } + + unload_nls(vol->nls_map); + sb->s_fs_info = NULL; kfree(vol); diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 72e76062a90..deb2b132ae5 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -2022,4 +2022,5 @@ const struct address_space_operations ocfs2_aops = { .releasepage = ocfs2_releasepage, .migratepage = buffer_migrate_page, .is_partially_uptodate = block_is_partially_uptodate, + .error_remove_page = generic_error_remove_page, }; diff --git a/fs/proc/array.c b/fs/proc/array.c index 0c6bc602e6c..07f77a7945c 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -322,6 +322,8 @@ static inline void task_context_switch_counts(struct seq_file *m, p->nivcsw); } +#ifdef CONFIG_MMU + struct stack_stats { struct vm_area_struct *vma; unsigned long startpage; @@ -402,6 +404,11 @@ static inline void task_show_stack_usage(struct seq_file *m, mmput(mm); } } +#else +static void task_show_stack_usage(struct seq_file *m, struct task_struct *task) +{ +} +#endif /* CONFIG_MMU */ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 171e052c07b..c7bff4f603f 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -97,7 +97,11 @@ static int meminfo_proc_show(struct seq_file *m, void *v) "Committed_AS: %8lu kB\n" "VmallocTotal: %8lu kB\n" "VmallocUsed: %8lu kB\n" - "VmallocChunk: %8lu kB\n", + "VmallocChunk: %8lu kB\n" +#ifdef CONFIG_MEMORY_FAILURE + "HardwareCorrupted: %8lu kB\n" +#endif + , K(i.totalram), K(i.freeram), K(i.bufferram), @@ -144,6 +148,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v) (unsigned long)VMALLOC_TOTAL >> 10, vmi.used >> 10, vmi.largest_chunk >> 10 +#ifdef CONFIG_MEMORY_FAILURE + ,atomic_long_read(&mce_bad_pages) << (PAGE_SHIFT - 10) +#endif ); hugetlb_report_meminfo(m); diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 9b1e4e9a16b..f667e8aeabd 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -153,7 +153,7 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf, /* careful: calling conventions are nasty here */ res = count; - error = table->proc_handler(table, write, filp, buf, &res, ppos); + error = table->proc_handler(table, write, buf, &res, ppos); if (!error) error = res; out: diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c index 0c10a0b3f14..766b1d45605 100644 --- a/fs/proc/uptime.c +++ b/fs/proc/uptime.c @@ -4,13 +4,18 @@ #include <linux/sched.h> #include <linux/seq_file.h> #include <linux/time.h> +#include <linux/kernel_stat.h> #include <asm/cputime.h> static int uptime_proc_show(struct seq_file *m, void *v) { struct timespec uptime; struct timespec idle; - cputime_t idletime = cputime_add(init_task.utime, init_task.stime); + int i; + cputime_t idletime = cputime_zero; + + for_each_possible_cpu(i) + idletime = cputime64_add(idletime, kstat_cpu(i).cpustat.idle); do_posix_clock_monotonic_gettime(&uptime); monotonic_to_bootbased(&uptime); diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c index 11f0c06316d..32fae4040eb 100644 --- a/fs/ramfs/file-nommu.c +++ b/fs/ramfs/file-nommu.c @@ -69,14 +69,11 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize) /* make various checks */ order = get_order(newsize); if (unlikely(order >= MAX_ORDER)) - goto too_big; + return -EFBIG; - limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; - if (limit != RLIM_INFINITY && newsize > limit) - goto fsize_exceeded; - - if (newsize > inode->i_sb->s_maxbytes) - goto too_big; + ret = inode_newsize_ok(inode, newsize); + if (ret) + return ret; i_size_write(inode, newsize); @@ -118,12 +115,7 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize) return 0; - fsize_exceeded: - send_sig(SIGXFSZ, current, 0); - too_big: - return -EFBIG; - - add_error: +add_error: while (loop < npages) __free_page(pages + loop++); return ret; diff --git a/fs/read_write.c b/fs/read_write.c index 6c8c55dec2b..3ac28987f22 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -839,9 +839,6 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes); pos = *ppos; - retval = -EINVAL; - if (unlikely(pos < 0)) - goto fput_out; if (unlikely(pos + count > max)) { retval = -EOVERFLOW; if (pos >= max) diff --git a/fs/romfs/super.c b/fs/romfs/super.c index 47f132df0c3..c117fa80d1e 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c @@ -528,7 +528,7 @@ static int romfs_fill_super(struct super_block *sb, void *data, int silent) pos = (ROMFH_SIZE + len + 1 + ROMFH_PAD) & ROMFH_MASK; root = romfs_iget(sb, pos); - if (!root) + if (IS_ERR(root)) goto error; sb->s_root = d_alloc_root(root); diff --git a/fs/seq_file.c b/fs/seq_file.c index 6c959275f2d..eae7d9dbf3f 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -429,20 +429,21 @@ EXPORT_SYMBOL(mangle_path); */ int seq_path(struct seq_file *m, struct path *path, char *esc) { - if (m->count < m->size) { - char *s = m->buf + m->count; - char *p = d_path(path, s, m->size - m->count); + char *buf; + size_t size = seq_get_buf(m, &buf); + int res = -1; + + if (size) { + char *p = d_path(path, buf, size); if (!IS_ERR(p)) { - s = mangle_path(s, p, esc); - if (s) { - p = m->buf + m->count; - m->count = s - m->buf; - return s - p; - } + char *end = mangle_path(buf, p, esc); + if (end) + res = end - buf; } } - m->count = m->size; - return -1; + seq_commit(m, res); + + return res; } EXPORT_SYMBOL(seq_path); @@ -454,26 +455,28 @@ EXPORT_SYMBOL(seq_path); int seq_path_root(struct seq_file *m, struct path *path, struct path *root, char *esc) { - int err = -ENAMETOOLONG; - if (m->count < m->size) { - char *s = m->buf + m->count; + char *buf; + size_t size = seq_get_buf(m, &buf); + int res = -ENAMETOOLONG; + + if (size) { char *p; spin_lock(&dcache_lock); - p = __d_path(path, root, s, m->size - m->count); + p = __d_path(path, root, buf, size); spin_unlock(&dcache_lock); - err = PTR_ERR(p); + res = PTR_ERR(p); if (!IS_ERR(p)) { - s = mangle_path(s, p, esc); - if (s) { - p = m->buf + m->count; - m->count = s - m->buf; - return 0; - } + char *end = mangle_path(buf, p, esc); + if (end) + res = end - buf; + else + res = -ENAMETOOLONG; } } - m->count = m->size; - return err; + seq_commit(m, res); + + return res < 0 ? res : 0; } /* @@ -481,20 +484,21 @@ int seq_path_root(struct seq_file *m, struct path *path, struct path *root, */ int seq_dentry(struct seq_file *m, struct dentry *dentry, char *esc) { - if (m->count < m->size) { - char *s = m->buf + m->count; - char *p = dentry_path(dentry, s, m->size - m->count); + char *buf; + size_t size = seq_get_buf(m, &buf); + int res = -1; + + if (size) { + char *p = dentry_path(dentry, buf, size); if (!IS_ERR(p)) { - s = mangle_path(s, p, esc); - if (s) { - p = m->buf + m->count; - m->count = s - m->buf; - return s - p; - } + char *end = mangle_path(buf, p, esc); + if (end) + res = end - buf; } } - m->count = m->size; - return -1; + seq_commit(m, res); + + return res; } int seq_bitmap(struct seq_file *m, const unsigned long *bits, diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c index 1402d2d54f5..1c4c8f08997 100644 --- a/fs/smbfs/inode.c +++ b/fs/smbfs/inode.c @@ -459,14 +459,8 @@ smb_show_options(struct seq_file *s, struct vfsmount *m) static void smb_unload_nls(struct smb_sb_info *server) { - if (server->remote_nls) { - unload_nls(server->remote_nls); - server->remote_nls = NULL; - } - if (server->local_nls) { - unload_nls(server->local_nls); - server->local_nls = NULL; - } + unload_nls(server->remote_nls); + unload_nls(server->local_nls); } static void diff --git a/fs/super.c b/fs/super.c index 0e7207b9815..19eb70b374b 100644 --- a/fs/super.c +++ b/fs/super.c @@ -465,6 +465,48 @@ rescan: } EXPORT_SYMBOL(get_super); + +/** + * get_active_super - get an active reference to the superblock of a device + * @bdev: device to get the superblock for + * + * Scans the superblock list and finds the superblock of the file system + * mounted on the device given. Returns the superblock with an active + * reference and s_umount held exclusively or %NULL if none was found. + */ +struct super_block *get_active_super(struct block_device *bdev) +{ + struct super_block *sb; + + if (!bdev) + return NULL; + + spin_lock(&sb_lock); + list_for_each_entry(sb, &super_blocks, s_list) { + if (sb->s_bdev != bdev) + continue; + + sb->s_count++; + spin_unlock(&sb_lock); + down_write(&sb->s_umount); + if (sb->s_root) { + spin_lock(&sb_lock); + if (sb->s_count > S_BIAS) { + atomic_inc(&sb->s_active); + sb->s_count--; + spin_unlock(&sb_lock); + return sb; + } + spin_unlock(&sb_lock); + } + up_write(&sb->s_umount); + put_super(sb); + yield(); + spin_lock(&sb_lock); + } + spin_unlock(&sb_lock); + return NULL; +} struct super_block * user_get_super(dev_t dev) { @@ -527,11 +569,15 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) { int retval; int remount_rw; - + + if (sb->s_frozen != SB_UNFROZEN) + return -EBUSY; + #ifdef CONFIG_BLOCK if (!(flags & MS_RDONLY) && bdev_read_only(sb->s_bdev)) return -EACCES; #endif + if (flags & MS_RDONLY) acct_auto_close(sb); shrink_dcache_sb(sb); @@ -743,9 +789,14 @@ int get_sb_bdev(struct file_system_type *fs_type, * will protect the lockfs code from trying to start a snapshot * while we are mounting */ - down(&bdev->bd_mount_sem); + mutex_lock(&bdev->bd_fsfreeze_mutex); + if (bdev->bd_fsfreeze_count > 0) { + mutex_unlock(&bdev->bd_fsfreeze_mutex); + error = -EBUSY; + goto error_bdev; + } s = sget(fs_type, test_bdev_super, set_bdev_super, bdev); - up(&bdev->bd_mount_sem); + mutex_unlock(&bdev->bd_fsfreeze_mutex); if (IS_ERR(s)) goto error_s; @@ -892,6 +943,16 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void if (error) goto out_sb; + /* + * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE + * but s_maxbytes was an unsigned long long for many releases. Throw + * this warning for a little while to try and catch filesystems that + * violate this rule. This warning should be either removed or + * converted to a BUG() in 2.6.34. + */ + WARN((mnt->mnt_sb->s_maxbytes < 0), "%s set sb->s_maxbytes to " + "negative value (%lld)\n", type->name, mnt->mnt_sb->s_maxbytes); + mnt->mnt_mountpoint = mnt->mnt_root; mnt->mnt_parent = mnt; up_write(&mnt->mnt_sb->s_umount); diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index d5e5559e31d..381854461b2 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -1635,4 +1635,5 @@ const struct address_space_operations xfs_address_space_operations = { .direct_IO = xfs_vm_direct_IO, .migratepage = buffer_migrate_page, .is_partially_uptodate = block_is_partially_uptodate, + .error_remove_page = generic_error_remove_page, }; diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c index 916c0ffb608..c5bc67c4e3b 100644 --- a/fs/xfs/linux-2.6/xfs_sysctl.c +++ b/fs/xfs/linux-2.6/xfs_sysctl.c @@ -26,7 +26,6 @@ STATIC int xfs_stats_clear_proc_handler( ctl_table *ctl, int write, - struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -34,7 +33,7 @@ xfs_stats_clear_proc_handler( int c, ret, *valp = ctl->data; __uint32_t vn_active; - ret = proc_dointvec_minmax(ctl, write, filp, buffer, lenp, ppos); + ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos); if (!ret && write && *valp) { printk("XFS Clearing xfsstats\n"); diff --git a/include/acpi/button.h b/include/acpi/button.h new file mode 100644 index 00000000000..97eea0e4c01 --- /dev/null +++ b/include/acpi/button.h @@ -0,0 +1,25 @@ +#ifndef ACPI_BUTTON_H +#define ACPI_BUTTON_H + +#include <linux/notifier.h> + +#if defined(CONFIG_ACPI_BUTTON) || defined(CONFIG_ACPI_BUTTON_MODULE) +extern int acpi_lid_notifier_register(struct notifier_block *nb); +extern int acpi_lid_notifier_unregister(struct notifier_block *nb); +extern int acpi_lid_open(void); +#else +static inline int acpi_lid_notifier_register(struct notifier_block *nb) +{ + return 0; +} +static inline int acpi_lid_notifier_unregister(struct notifier_block *nb) +{ + return 0; +} +static inline int acpi_lid_open(void) +{ + return 1; +} +#endif /* defined(CONFIG_ACPI_BUTTON) || defined(CONFIG_ACPI_BUTTON_MODULE) */ + +#endif /* ACPI_BUTTON_H */ diff --git a/include/asm-generic/fcntl.h b/include/asm-generic/fcntl.h index 4d3e48373e7..0c3dd860392 100644 --- a/include/asm-generic/fcntl.h +++ b/include/asm-generic/fcntl.h @@ -73,6 +73,19 @@ #define F_SETSIG 10 /* for sockets. */ #define F_GETSIG 11 /* for sockets. */ #endif +#ifndef F_SETOWN_EX +#define F_SETOWN_EX 12 +#define F_GETOWN_EX 13 +#endif + +#define F_OWNER_TID 0 +#define F_OWNER_PID 1 +#define F_OWNER_GID 2 + +struct f_owner_ex { + int type; + pid_t pid; +}; /* for F_[GET|SET]FL */ #define FD_CLOEXEC 1 /* actually anything with low bit set goes */ diff --git a/include/asm-generic/mman-common.h b/include/asm-generic/mman-common.h index dd63bd38864..5ee13b2fd22 100644 --- a/include/asm-generic/mman-common.h +++ b/include/asm-generic/mman-common.h @@ -34,6 +34,7 @@ #define MADV_REMOVE 9 /* remove these pages & resources */ #define MADV_DONTFORK 10 /* don't inherit across fork */ #define MADV_DOFORK 11 /* do inherit across fork */ +#define MADV_HWPOISON 100 /* poison a page for testing */ #define MADV_MERGEABLE 12 /* KSM may merge identical pages */ #define MADV_UNMERGEABLE 13 /* KSM may not merge identical pages */ diff --git a/include/asm-generic/siginfo.h b/include/asm-generic/siginfo.h index c840719a8c5..942d30b5aab 100644 --- a/include/asm-generic/siginfo.h +++ b/include/asm-generic/siginfo.h @@ -82,6 +82,7 @@ typedef struct siginfo { #ifdef __ARCH_SI_TRAPNO int _trapno; /* TRAP # which caused the signal */ #endif + short _addr_lsb; /* LSB of the reported address */ } _sigfault; /* SIGPOLL */ @@ -112,6 +113,7 @@ typedef struct siginfo { #ifdef __ARCH_SI_TRAPNO #define si_trapno _sifields._sigfault._trapno #endif +#define si_addr_lsb _sifields._sigfault._addr_lsb #define si_band _sifields._sigpoll._band #define si_fd _sifields._sigpoll._fd @@ -192,7 +194,11 @@ typedef struct siginfo { #define BUS_ADRALN (__SI_FAULT|1) /* invalid address alignment */ #define BUS_ADRERR (__SI_FAULT|2) /* non-existant physical address */ #define BUS_OBJERR (__SI_FAULT|3) /* object specific hardware error */ -#define NSIGBUS 3 +/* hardware memory error consumed on a machine check: action required */ +#define BUS_MCEERR_AR (__SI_FAULT|4) +/* hardware memory error detected in process but not consumed: action optional*/ +#define BUS_MCEERR_AO (__SI_FAULT|5) +#define NSIGBUS 5 /* * SIGTRAP si_codes diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 29ca8f53ffb..b6e818f4b24 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -721,12 +721,12 @@ . = ALIGN(PAGE_SIZE); \ .data : AT(ADDR(.data) - LOAD_OFFSET) { \ INIT_TASK_DATA(inittask) \ + NOSAVE_DATA \ + PAGE_ALIGNED_DATA(pagealigned) \ CACHELINE_ALIGNED_DATA(cacheline) \ READ_MOSTLY_DATA(cacheline) \ DATA_DATA \ CONSTRUCTORS \ - NOSAVE_DATA \ - PAGE_ALIGNED_DATA(pagealigned) \ } #define INIT_TEXT_SECTION(inittext_align) \ diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h index 853508499d2..3f6e545609b 100644 --- a/include/drm/drm_pciids.h +++ b/include/drm/drm_pciids.h @@ -552,6 +552,7 @@ {0x8086, 0x2e12, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \ {0x8086, 0x2e22, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \ {0x8086, 0x2e32, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \ + {0x8086, 0x2e42, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \ {0x8086, 0xa001, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \ {0x8086, 0xa011, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \ {0x8086, 0x35e8, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \ diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h index 8e1e92583fb..7e0cb1da92e 100644 --- a/include/drm/i915_drm.h +++ b/include/drm/i915_drm.h @@ -185,6 +185,7 @@ typedef struct _drm_i915_sarea { #define DRM_I915_GEM_GET_APERTURE 0x23 #define DRM_I915_GEM_MMAP_GTT 0x24 #define DRM_I915_GET_PIPE_FROM_CRTC_ID 0x25 +#define DRM_I915_GEM_MADVISE 0x26 #define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t) #define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH) @@ -221,6 +222,7 @@ typedef struct _drm_i915_sarea { #define DRM_IOCTL_I915_GEM_GET_TILING DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_TILING, struct drm_i915_gem_get_tiling) #define DRM_IOCTL_I915_GEM_GET_APERTURE DRM_IOR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_APERTURE, struct drm_i915_gem_get_aperture) #define DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GET_PIPE_FROM_CRTC_ID, struct drm_intel_get_pipe_from_crtc_id) +#define DRM_IOCTL_I915_GEM_MADVISE DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MADVISE, struct drm_i915_gem_madvise) /* Allow drivers to submit batchbuffers directly to hardware, relying * on the security mechanisms provided by hardware. @@ -667,4 +669,21 @@ struct drm_i915_get_pipe_from_crtc_id { __u32 pipe; }; +#define I915_MADV_WILLNEED 0 +#define I915_MADV_DONTNEED 1 +#define __I915_MADV_PURGED 2 /* internal state */ + +struct drm_i915_gem_madvise { + /** Handle of the buffer to change the backing store advice */ + __u32 handle; + + /* Advice: either the buffer will be needed again in the near future, + * or wont be and could be discarded under memory pressure. + */ + __u32 madv; + + /** Whether the backing store still exists. */ + __u32 retained; +}; + #endif /* _I915_DRM_H_ */ diff --git a/arch/arm/include/asm/mach/mmc.h b/include/linux/amba/mmci.h index b490ecc79de..6b4241748dd 100644 --- a/arch/arm/include/asm/mach/mmc.h +++ b/include/linux/amba/mmci.h @@ -1,17 +1,18 @@ /* - * arch/arm/include/asm/mach/mmc.h + * include/linux/amba/mmci.h */ -#ifndef ASMARM_MACH_MMC_H -#define ASMARM_MACH_MMC_H +#ifndef AMBA_MMCI_H +#define AMBA_MMCI_H #include <linux/mmc/host.h> -struct mmc_platform_data { +struct mmci_platform_data { unsigned int ocr_mask; /* available voltages */ u32 (*translate_vdd)(struct device *, unsigned int); unsigned int (*status)(struct device *); int gpio_wp; int gpio_cd; + unsigned long capabilities; }; #endif diff --git a/include/linux/amba/pl022.h b/include/linux/amba/pl022.h index dcad0ffd175..e4836c6b3dd 100644 --- a/include/linux/amba/pl022.h +++ b/include/linux/amba/pl022.h @@ -136,12 +136,12 @@ enum ssp_tx_level_trig { /** * enum SPI Clock Phase - clock phase (Motorola SPI interface only) - * @SSP_CLK_RISING_EDGE: Receive data on rising edge - * @SSP_CLK_FALLING_EDGE: Receive data on falling edge + * @SSP_CLK_FIRST_EDGE: Receive data on first edge transition (actual direction depends on polarity) + * @SSP_CLK_SECOND_EDGE: Receive data on second edge transition (actual direction depends on polarity) */ enum ssp_spi_clk_phase { - SSP_CLK_RISING_EDGE, - SSP_CLK_FALLING_EDGE + SSP_CLK_FIRST_EDGE, + SSP_CLK_SECOND_EDGE }; /** diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index 5fc2ef8d97f..a1c486a88e8 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -58,25 +58,60 @@ struct dma_chan_ref { * array. * @ASYNC_TX_ACK: immediately ack the descriptor, precludes setting up a * dependency chain - * @ASYNC_TX_DEP_ACK: ack the dependency descriptor. Useful for chaining. + * @ASYNC_TX_FENCE: specify that the next operation in the dependency + * chain uses this operation's result as an input */ enum async_tx_flags { ASYNC_TX_XOR_ZERO_DST = (1 << 0), ASYNC_TX_XOR_DROP_DST = (1 << 1), - ASYNC_TX_ACK = (1 << 3), - ASYNC_TX_DEP_ACK = (1 << 4), + ASYNC_TX_ACK = (1 << 2), + ASYNC_TX_FENCE = (1 << 3), +}; + +/** + * struct async_submit_ctl - async_tx submission/completion modifiers + * @flags: submission modifiers + * @depend_tx: parent dependency of the current operation being submitted + * @cb_fn: callback routine to run at operation completion + * @cb_param: parameter for the callback routine + * @scribble: caller provided space for dma/page address conversions + */ +struct async_submit_ctl { + enum async_tx_flags flags; + struct dma_async_tx_descriptor *depend_tx; + dma_async_tx_callback cb_fn; + void *cb_param; + void *scribble; }; #ifdef CONFIG_DMA_ENGINE #define async_tx_issue_pending_all dma_issue_pending_all + +/** + * async_tx_issue_pending - send pending descriptor to the hardware channel + * @tx: descriptor handle to retrieve hardware context + * + * Note: any dependent operations will have already been issued by + * async_tx_channel_switch, or (in the case of no channel switch) will + * be already pending on this channel. + */ +static inline void async_tx_issue_pending(struct dma_async_tx_descriptor *tx) +{ + if (likely(tx)) { + struct dma_chan *chan = tx->chan; + struct dma_device *dma = chan->device; + + dma->device_issue_pending(chan); + } +} #ifdef CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL #include <asm/async_tx.h> #else #define async_tx_find_channel(dep, type, dst, dst_count, src, src_count, len) \ __async_tx_find_channel(dep, type) struct dma_chan * -__async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, - enum dma_transaction_type tx_type); +__async_tx_find_channel(struct async_submit_ctl *submit, + enum dma_transaction_type tx_type); #endif /* CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL */ #else static inline void async_tx_issue_pending_all(void) @@ -84,10 +119,16 @@ static inline void async_tx_issue_pending_all(void) do { } while (0); } +static inline void async_tx_issue_pending(struct dma_async_tx_descriptor *tx) +{ + do { } while (0); +} + static inline struct dma_chan * -async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, - enum dma_transaction_type tx_type, struct page **dst, int dst_count, - struct page **src, int src_count, size_t len) +async_tx_find_channel(struct async_submit_ctl *submit, + enum dma_transaction_type tx_type, struct page **dst, + int dst_count, struct page **src, int src_count, + size_t len) { return NULL; } @@ -99,46 +140,70 @@ async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, * @cb_fn_param: parameter to pass to the callback routine */ static inline void -async_tx_sync_epilog(dma_async_tx_callback cb_fn, void *cb_fn_param) +async_tx_sync_epilog(struct async_submit_ctl *submit) { - if (cb_fn) - cb_fn(cb_fn_param); + if (submit->cb_fn) + submit->cb_fn(submit->cb_param); } -void -async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, - enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_fn_param); +typedef union { + unsigned long addr; + struct page *page; + dma_addr_t dma; +} addr_conv_t; + +static inline void +init_async_submit(struct async_submit_ctl *args, enum async_tx_flags flags, + struct dma_async_tx_descriptor *tx, + dma_async_tx_callback cb_fn, void *cb_param, + addr_conv_t *scribble) +{ + args->flags = flags; + args->depend_tx = tx; + args->cb_fn = cb_fn; + args->cb_param = cb_param; + args->scribble = scribble; +} + +void async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, + struct async_submit_ctl *submit); struct dma_async_tx_descriptor * async_xor(struct page *dest, struct page **src_list, unsigned int offset, - int src_cnt, size_t len, enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_fn_param); + int src_cnt, size_t len, struct async_submit_ctl *submit); struct dma_async_tx_descriptor * -async_xor_zero_sum(struct page *dest, struct page **src_list, - unsigned int offset, int src_cnt, size_t len, - u32 *result, enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_fn_param); +async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, + int src_cnt, size_t len, enum sum_check_flags *result, + struct async_submit_ctl *submit); struct dma_async_tx_descriptor * async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, - unsigned int src_offset, size_t len, enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_fn_param); + unsigned int src_offset, size_t len, + struct async_submit_ctl *submit); struct dma_async_tx_descriptor * async_memset(struct page *dest, int val, unsigned int offset, - size_t len, enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_fn_param); + size_t len, struct async_submit_ctl *submit); + +struct dma_async_tx_descriptor *async_trigger_callback(struct async_submit_ctl *submit); + +struct dma_async_tx_descriptor * +async_gen_syndrome(struct page **blocks, unsigned int offset, int src_cnt, + size_t len, struct async_submit_ctl *submit); + +struct dma_async_tx_descriptor * +async_syndrome_val(struct page **blocks, unsigned int offset, int src_cnt, + size_t len, enum sum_check_flags *pqres, struct page *spare, + struct async_submit_ctl *submit); + +struct dma_async_tx_descriptor * +async_raid6_2data_recov(int src_num, size_t bytes, int faila, int failb, + struct page **ptrs, struct async_submit_ctl *submit); struct dma_async_tx_descriptor * -async_trigger_callback(enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_fn_param); +async_raid6_datap_recov(int src_num, size_t bytes, int faila, + struct page **ptrs, struct async_submit_ctl *submit); void async_tx_quiesce(struct dma_async_tx_descriptor **tx); #endif /* _ASYNC_TX_H_ */ diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 2046b5b8af4..aece486ac73 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -120,7 +120,7 @@ extern int copy_strings_kernel(int argc,char ** argv,struct linux_binprm *bprm); extern int prepare_bprm_creds(struct linux_binprm *bprm); extern void install_exec_creds(struct linux_binprm *bprm); extern void do_coredump(long signr, int exit_code, struct pt_regs *regs); -extern int set_binfmt(struct linux_binfmt *new); +extern void set_binfmt(struct linux_binfmt *new); extern void free_bprm(struct linux_binprm *); #endif /* __KERNEL__ */ diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 90bba9e6228..b62bb9294d0 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -141,6 +141,38 @@ enum { CGRP_WAIT_ON_RMDIR, }; +/* which pidlist file are we talking about? */ +enum cgroup_filetype { + CGROUP_FILE_PROCS, + CGROUP_FILE_TASKS, +}; + +/* + * A pidlist is a list of pids that virtually represents the contents of one + * of the cgroup files ("procs" or "tasks"). We keep a list of such pidlists, + * a pair (one each for procs, tasks) for each pid namespace that's relevant + * to the cgroup. + */ +struct cgroup_pidlist { + /* + * used to find which pidlist is wanted. doesn't change as long as + * this particular list stays in the list. + */ + struct { enum cgroup_filetype type; struct pid_namespace *ns; } key; + /* array of xids */ + pid_t *list; + /* how many elements the above list has */ + int length; + /* how many files are using the current array */ + int use_count; + /* each of these stored in a list by its cgroup */ + struct list_head links; + /* pointer to the cgroup we belong to, for list removal purposes */ + struct cgroup *owner; + /* protects the other fields */ + struct rw_semaphore mutex; +}; + struct cgroup { unsigned long flags; /* "unsigned long" so bitops work */ @@ -179,11 +211,12 @@ struct cgroup { */ struct list_head release_list; - /* pids_mutex protects pids_list and cached pid arrays. */ - struct rw_semaphore pids_mutex; - - /* Linked list of struct cgroup_pids */ - struct list_head pids_list; + /* + * list of pidlists, up to two for each namespace (one for procs, one + * for tasks); created on demand. + */ + struct list_head pidlists; + struct mutex pidlist_mutex; /* For RCU-protected deletion */ struct rcu_head rcu_head; @@ -227,6 +260,9 @@ struct css_set { * during subsystem registration (at boot time). */ struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; + + /* For RCU-protected deletion */ + struct rcu_head rcu_head; }; /* @@ -389,10 +425,11 @@ struct cgroup_subsys { struct cgroup *cgrp); int (*pre_destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp); void (*destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp); - int (*can_attach)(struct cgroup_subsys *ss, - struct cgroup *cgrp, struct task_struct *tsk); + int (*can_attach)(struct cgroup_subsys *ss, struct cgroup *cgrp, + struct task_struct *tsk, bool threadgroup); void (*attach)(struct cgroup_subsys *ss, struct cgroup *cgrp, - struct cgroup *old_cgrp, struct task_struct *tsk); + struct cgroup *old_cgrp, struct task_struct *tsk, + bool threadgroup); void (*fork)(struct cgroup_subsys *ss, struct task_struct *task); void (*exit)(struct cgroup_subsys *ss, struct task_struct *task); int (*populate)(struct cgroup_subsys *ss, diff --git a/include/linux/configfs.h b/include/linux/configfs.h index 7f627775c94..ddb7a97c78c 100644 --- a/include/linux/configfs.h +++ b/include/linux/configfs.h @@ -27,8 +27,8 @@ * * configfs Copyright (C) 2005 Oracle. All rights reserved. * - * Please read Documentation/filesystems/configfs.txt before using the - * configfs interface, ESPECIALLY the parts about reference counts and + * Please read Documentation/filesystems/configfs/configfs.txt before using + * the configfs interface, ESPECIALLY the parts about reference counts and * item destructors. */ diff --git a/include/linux/dca.h b/include/linux/dca.h index 9c20c7e87d0..d27a7a05718 100644 --- a/include/linux/dca.h +++ b/include/linux/dca.h @@ -20,6 +20,9 @@ */ #ifndef DCA_H #define DCA_H + +#include <linux/pci.h> + /* DCA Provider API */ /* DCA Notifier Interface */ @@ -36,6 +39,12 @@ struct dca_provider { int id; }; +struct dca_domain { + struct list_head node; + struct list_head dca_providers; + struct pci_bus *pci_rc; +}; + struct dca_ops { int (*add_requester) (struct dca_provider *, struct device *); int (*remove_requester) (struct dca_provider *, struct device *); @@ -47,7 +56,7 @@ struct dca_ops { struct dca_provider *alloc_dca_provider(struct dca_ops *ops, int priv_size); void free_dca_provider(struct dca_provider *dca); int register_dca_provider(struct dca_provider *dca, struct device *dev); -void unregister_dca_provider(struct dca_provider *dca); +void unregister_dca_provider(struct dca_provider *dca, struct device *dev); static inline void *dca_priv(struct dca_provider *dca) { diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index eb5c2ba2f81..fc1b930f246 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -9,7 +9,7 @@ * 2 as published by the Free Software Foundation. * * debugfs is for people to use instead of /proc or /sys. - * See Documentation/DocBook/kernel-api for more details. + * See Documentation/DocBook/filesystems for more details. */ #ifndef _DEBUGFS_H_ diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index ffefba81c81..2b9f2ac7ed6 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -48,19 +48,20 @@ enum dma_status { /** * enum dma_transaction_type - DMA transaction types/indexes + * + * Note: The DMA_ASYNC_TX capability is not to be set by drivers. It is + * automatically set as dma devices are registered. */ enum dma_transaction_type { DMA_MEMCPY, DMA_XOR, - DMA_PQ_XOR, - DMA_DUAL_XOR, - DMA_PQ_UPDATE, - DMA_ZERO_SUM, - DMA_PQ_ZERO_SUM, + DMA_PQ, + DMA_XOR_VAL, + DMA_PQ_VAL, DMA_MEMSET, - DMA_MEMCPY_CRC32C, DMA_INTERRUPT, DMA_PRIVATE, + DMA_ASYNC_TX, DMA_SLAVE, }; @@ -70,18 +71,25 @@ enum dma_transaction_type { /** * enum dma_ctrl_flags - DMA flags to augment operation preparation, - * control completion, and communicate status. + * control completion, and communicate status. * @DMA_PREP_INTERRUPT - trigger an interrupt (callback) upon completion of - * this transaction + * this transaction * @DMA_CTRL_ACK - the descriptor cannot be reused until the client - * acknowledges receipt, i.e. has has a chance to establish any - * dependency chains + * acknowledges receipt, i.e. has has a chance to establish any dependency + * chains * @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s) * @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destination(s) * @DMA_COMPL_SRC_UNMAP_SINGLE - set to do the source dma-unmapping as single * (if not set, do the source dma-unmapping as page) * @DMA_COMPL_DEST_UNMAP_SINGLE - set to do the destination dma-unmapping as single * (if not set, do the destination dma-unmapping as page) + * @DMA_PREP_PQ_DISABLE_P - prevent generation of P while generating Q + * @DMA_PREP_PQ_DISABLE_Q - prevent generation of Q while generating P + * @DMA_PREP_CONTINUE - indicate to a driver that it is reusing buffers as + * sources that were the result of a previous operation, in the case of a PQ + * operation it continues the calculation with new sources + * @DMA_PREP_FENCE - tell the driver that subsequent operations depend + * on the result of this operation */ enum dma_ctrl_flags { DMA_PREP_INTERRUPT = (1 << 0), @@ -90,9 +98,32 @@ enum dma_ctrl_flags { DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3), DMA_COMPL_SRC_UNMAP_SINGLE = (1 << 4), DMA_COMPL_DEST_UNMAP_SINGLE = (1 << 5), + DMA_PREP_PQ_DISABLE_P = (1 << 6), + DMA_PREP_PQ_DISABLE_Q = (1 << 7), + DMA_PREP_CONTINUE = (1 << 8), + DMA_PREP_FENCE = (1 << 9), }; /** + * enum sum_check_bits - bit position of pq_check_flags + */ +enum sum_check_bits { + SUM_CHECK_P = 0, + SUM_CHECK_Q = 1, +}; + +/** + * enum pq_check_flags - result of async_{xor,pq}_zero_sum operations + * @SUM_CHECK_P_RESULT - 1 if xor zero sum error, 0 otherwise + * @SUM_CHECK_Q_RESULT - 1 if reed-solomon zero sum error, 0 otherwise + */ +enum sum_check_flags { + SUM_CHECK_P_RESULT = (1 << SUM_CHECK_P), + SUM_CHECK_Q_RESULT = (1 << SUM_CHECK_Q), +}; + + +/** * dma_cap_mask_t - capabilities bitmap modeled after cpumask_t. * See linux/cpumask.h */ @@ -180,8 +211,6 @@ typedef void (*dma_async_tx_callback)(void *dma_async_param); * @flags: flags to augment operation preparation, control completion, and * communicate status * @phys: physical address of the descriptor - * @tx_list: driver common field for operations that require multiple - * descriptors * @chan: target channel for this operation * @tx_submit: set the prepared descriptor(s) to be executed by the engine * @callback: routine to call after this operation is complete @@ -195,7 +224,6 @@ struct dma_async_tx_descriptor { dma_cookie_t cookie; enum dma_ctrl_flags flags; /* not a 'long' to pack with cookie */ dma_addr_t phys; - struct list_head tx_list; struct dma_chan *chan; dma_cookie_t (*tx_submit)(struct dma_async_tx_descriptor *tx); dma_async_tx_callback callback; @@ -213,6 +241,11 @@ struct dma_async_tx_descriptor { * @global_node: list_head for global dma_device_list * @cap_mask: one or more dma_capability flags * @max_xor: maximum number of xor sources, 0 if no capability + * @max_pq: maximum number of PQ sources and PQ-continue capability + * @copy_align: alignment shift for memcpy operations + * @xor_align: alignment shift for xor operations + * @pq_align: alignment shift for pq operations + * @fill_align: alignment shift for memset operations * @dev_id: unique device ID * @dev: struct device reference for dma mapping api * @device_alloc_chan_resources: allocate resources and return the @@ -220,7 +253,9 @@ struct dma_async_tx_descriptor { * @device_free_chan_resources: release DMA channel's resources * @device_prep_dma_memcpy: prepares a memcpy operation * @device_prep_dma_xor: prepares a xor operation - * @device_prep_dma_zero_sum: prepares a zero_sum operation + * @device_prep_dma_xor_val: prepares a xor validation operation + * @device_prep_dma_pq: prepares a pq operation + * @device_prep_dma_pq_val: prepares a pqzero_sum operation * @device_prep_dma_memset: prepares a memset operation * @device_prep_dma_interrupt: prepares an end of chain interrupt operation * @device_prep_slave_sg: prepares a slave dma operation @@ -235,7 +270,13 @@ struct dma_device { struct list_head channels; struct list_head global_node; dma_cap_mask_t cap_mask; - int max_xor; + unsigned short max_xor; + unsigned short max_pq; + u8 copy_align; + u8 xor_align; + u8 pq_align; + u8 fill_align; + #define DMA_HAS_PQ_CONTINUE (1 << 15) int dev_id; struct device *dev; @@ -249,9 +290,17 @@ struct dma_device { struct dma_async_tx_descriptor *(*device_prep_dma_xor)( struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, unsigned int src_cnt, size_t len, unsigned long flags); - struct dma_async_tx_descriptor *(*device_prep_dma_zero_sum)( + struct dma_async_tx_descriptor *(*device_prep_dma_xor_val)( struct dma_chan *chan, dma_addr_t *src, unsigned int src_cnt, - size_t len, u32 *result, unsigned long flags); + size_t len, enum sum_check_flags *result, unsigned long flags); + struct dma_async_tx_descriptor *(*device_prep_dma_pq)( + struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, + size_t len, unsigned long flags); + struct dma_async_tx_descriptor *(*device_prep_dma_pq_val)( + struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, size_t len, + enum sum_check_flags *pqres, unsigned long flags); struct dma_async_tx_descriptor *(*device_prep_dma_memset)( struct dma_chan *chan, dma_addr_t dest, int value, size_t len, unsigned long flags); @@ -270,6 +319,96 @@ struct dma_device { void (*device_issue_pending)(struct dma_chan *chan); }; +static inline bool dmaengine_check_align(u8 align, size_t off1, size_t off2, size_t len) +{ + size_t mask; + + if (!align) + return true; + mask = (1 << align) - 1; + if (mask & (off1 | off2 | len)) + return false; + return true; +} + +static inline bool is_dma_copy_aligned(struct dma_device *dev, size_t off1, + size_t off2, size_t len) +{ + return dmaengine_check_align(dev->copy_align, off1, off2, len); +} + +static inline bool is_dma_xor_aligned(struct dma_device *dev, size_t off1, + size_t off2, size_t len) +{ + return dmaengine_check_align(dev->xor_align, off1, off2, len); +} + +static inline bool is_dma_pq_aligned(struct dma_device *dev, size_t off1, + size_t off2, size_t len) +{ + return dmaengine_check_align(dev->pq_align, off1, off2, len); +} + +static inline bool is_dma_fill_aligned(struct dma_device *dev, size_t off1, + size_t off2, size_t len) +{ + return dmaengine_check_align(dev->fill_align, off1, off2, len); +} + +static inline void +dma_set_maxpq(struct dma_device *dma, int maxpq, int has_pq_continue) +{ + dma->max_pq = maxpq; + if (has_pq_continue) + dma->max_pq |= DMA_HAS_PQ_CONTINUE; +} + +static inline bool dmaf_continue(enum dma_ctrl_flags flags) +{ + return (flags & DMA_PREP_CONTINUE) == DMA_PREP_CONTINUE; +} + +static inline bool dmaf_p_disabled_continue(enum dma_ctrl_flags flags) +{ + enum dma_ctrl_flags mask = DMA_PREP_CONTINUE | DMA_PREP_PQ_DISABLE_P; + + return (flags & mask) == mask; +} + +static inline bool dma_dev_has_pq_continue(struct dma_device *dma) +{ + return (dma->max_pq & DMA_HAS_PQ_CONTINUE) == DMA_HAS_PQ_CONTINUE; +} + +static unsigned short dma_dev_to_maxpq(struct dma_device *dma) +{ + return dma->max_pq & ~DMA_HAS_PQ_CONTINUE; +} + +/* dma_maxpq - reduce maxpq in the face of continued operations + * @dma - dma device with PQ capability + * @flags - to check if DMA_PREP_CONTINUE and DMA_PREP_PQ_DISABLE_P are set + * + * When an engine does not support native continuation we need 3 extra + * source slots to reuse P and Q with the following coefficients: + * 1/ {00} * P : remove P from Q', but use it as a source for P' + * 2/ {01} * Q : use Q to continue Q' calculation + * 3/ {00} * Q : subtract Q from P' to cancel (2) + * + * In the case where P is disabled we only need 1 extra source: + * 1/ {01} * Q : use Q to continue Q' calculation + */ +static inline int dma_maxpq(struct dma_device *dma, enum dma_ctrl_flags flags) +{ + if (dma_dev_has_pq_continue(dma) || !dmaf_continue(flags)) + return dma_dev_to_maxpq(dma); + else if (dmaf_p_disabled_continue(flags)) + return dma_dev_to_maxpq(dma) - 1; + else if (dmaf_continue(flags)) + return dma_dev_to_maxpq(dma) - 3; + BUG(); +} + /* --- public DMA engine API --- */ #ifdef CONFIG_DMA_ENGINE @@ -299,7 +438,11 @@ static inline void net_dmaengine_put(void) #ifdef CONFIG_ASYNC_TX_DMA #define async_dmaengine_get() dmaengine_get() #define async_dmaengine_put() dmaengine_put() +#ifdef CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH +#define async_dma_find_channel(type) dma_find_channel(DMA_ASYNC_TX) +#else #define async_dma_find_channel(type) dma_find_channel(type) +#endif /* CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH */ #else static inline void async_dmaengine_get(void) { @@ -312,7 +455,7 @@ async_dma_find_channel(enum dma_transaction_type type) { return NULL; } -#endif +#endif /* CONFIG_ASYNC_TX_DMA */ dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan, void *dest, void *src, size_t len); diff --git a/include/linux/fb.h b/include/linux/fb.h index f847df9e99b..a34bdf5a9d2 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -133,6 +133,7 @@ struct dentry; #define FB_ACCEL_NEOMAGIC_NM2230 96 /* NeoMagic NM2230 */ #define FB_ACCEL_NEOMAGIC_NM2360 97 /* NeoMagic NM2360 */ #define FB_ACCEL_NEOMAGIC_NM2380 98 /* NeoMagic NM2380 */ +#define FB_ACCEL_PXA3XX 99 /* PXA3xx */ #define FB_ACCEL_SAVAGE4 0x80 /* S3 Savage4 */ #define FB_ACCEL_SAVAGE3D 0x81 /* S3 Savage3D */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 51803528b09..2adaa2529f1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -595,6 +595,7 @@ struct address_space_operations { int (*launder_page) (struct page *); int (*is_partially_uptodate) (struct page *, read_descriptor_t *, unsigned long); + int (*error_remove_page)(struct address_space *, struct page *); }; /* @@ -640,7 +641,6 @@ struct block_device { struct super_block * bd_super; int bd_openers; struct mutex bd_mutex; /* open/close mutex */ - struct semaphore bd_mount_sem; struct list_head bd_inodes; void * bd_holder; int bd_holders; @@ -1315,7 +1315,7 @@ struct super_block { unsigned long s_blocksize; unsigned char s_blocksize_bits; unsigned char s_dirt; - unsigned long long s_maxbytes; /* Max file size */ + loff_t s_maxbytes; /* Max file size */ struct file_system_type *s_type; const struct super_operations *s_op; const struct dquot_operations *dq_op; @@ -2156,6 +2156,7 @@ extern ino_t iunique(struct super_block *, ino_t); extern int inode_needs_sync(struct inode *inode); extern void generic_delete_inode(struct inode *inode); extern void generic_drop_inode(struct inode *inode); +extern int generic_detach_inode(struct inode *inode); extern struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval, int (*test)(struct inode *, void *), @@ -2334,6 +2335,7 @@ extern void get_filesystem(struct file_system_type *fs); extern void put_filesystem(struct file_system_type *fs); extern struct file_system_type *get_fs_type(const char *name); extern struct super_block *get_super(struct block_device *); +extern struct super_block *get_active_super(struct block_device *bdev); extern struct super_block *user_get_super(dev_t); extern void drop_super(struct super_block *sb); @@ -2381,7 +2383,8 @@ extern int buffer_migrate_page(struct address_space *, #define buffer_migrate_page NULL #endif -extern int inode_change_ok(struct inode *, struct iattr *); +extern int inode_change_ok(const struct inode *, struct iattr *); +extern int inode_newsize_ok(const struct inode *, loff_t offset); extern int __must_check inode_setattr(struct inode *, struct iattr *); extern void file_update_time(struct file *file); @@ -2467,7 +2470,7 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf, size_t len, loff_t *ppos); struct ctl_table; -int proc_nr_files(struct ctl_table *table, int write, struct file *filp, +int proc_nr_files(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); int __init get_filesystem_list(char *buf); diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 3c0924a18da..cd3d2abaf30 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -19,7 +19,7 @@ extern int ftrace_enabled; extern int ftrace_enable_sysctl(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos); typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip); @@ -94,7 +94,7 @@ static inline void ftrace_start(void) { } extern int stack_tracer_enabled; int stack_trace_sysctl(struct ctl_table *table, int write, - struct file *file, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos); #endif diff --git a/include/linux/futex.h b/include/linux/futex.h index 34956c8fdeb..8ec17997d94 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -4,11 +4,6 @@ #include <linux/compiler.h> #include <linux/types.h> -struct inode; -struct mm_struct; -struct task_struct; -union ktime; - /* Second argument to futex syscall */ @@ -129,6 +124,11 @@ struct robust_list_head { #define FUTEX_BITSET_MATCH_ANY 0xffffffff #ifdef __KERNEL__ +struct inode; +struct mm_struct; +struct task_struct; +union ktime; + long do_futex(u32 __user *uaddr, int op, u32 val, union ktime *timeout, u32 __user *uaddr2, u32 val2, u32 val3); diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 176e7ee73ef..16937995abd 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -3,15 +3,15 @@ #include <linux/fs.h> +struct ctl_table; +struct user_struct; + #ifdef CONFIG_HUGETLB_PAGE #include <linux/mempolicy.h> #include <linux/shm.h> #include <asm/tlbflush.h> -struct ctl_table; -struct user_struct; - int PageHuge(struct page *page); static inline int is_vm_hugetlb_page(struct vm_area_struct *vma) @@ -20,9 +20,9 @@ static inline int is_vm_hugetlb_page(struct vm_area_struct *vma) } void reset_vma_resv_huge_pages(struct vm_area_struct *vma); -int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); -int hugetlb_overcommit_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); -int hugetlb_treat_movable_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); +int hugetlb_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); +int hugetlb_overcommit_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); +int hugetlb_treat_movable_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *); int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, @@ -187,7 +187,11 @@ static inline void set_file_hugepages(struct file *file) #define is_file_hugepages(file) 0 #define set_file_hugepages(file) BUG() -#define hugetlb_file_setup(name,size,acct,user,creat) ERR_PTR(-ENOSYS) +static inline struct file *hugetlb_file_setup(const char *name, size_t size, + int acctflag, struct user_struct **user, int creat_flags) +{ + return ERR_PTR(-ENOSYS); +} #endif /* !CONFIG_HUGETLBFS */ diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index e46a0734ab6..bf9213b2db8 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -118,6 +118,9 @@ static inline bool mem_cgroup_disabled(void) extern bool mem_cgroup_oom_called(struct task_struct *task); void mem_cgroup_update_mapped_file_stat(struct page *page, int val); +unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, + gfp_t gfp_mask, int nid, + int zid); #else /* CONFIG_CGROUP_MEM_RES_CTLR */ struct mem_cgroup; @@ -276,6 +279,13 @@ static inline void mem_cgroup_update_mapped_file_stat(struct page *page, { } +static inline +unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, + gfp_t gfp_mask, int nid, int zid) +{ + return 0; +} + #endif /* CONFIG_CGROUP_MEM_CONT */ #endif /* _LINUX_MEMCONTROL_H */ diff --git a/include/linux/mm.h b/include/linux/mm.h index b6eae5e3144..24c395694f4 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -288,7 +288,7 @@ static inline int is_vmalloc_addr(const void *x) #ifdef CONFIG_MMU extern int is_vmalloc_or_module_addr(const void *x); #else -static int is_vmalloc_or_module_addr(const void *x) +static inline int is_vmalloc_or_module_addr(const void *x) { return 0; } @@ -695,11 +695,12 @@ static inline int page_mapped(struct page *page) #define VM_FAULT_SIGBUS 0x0002 #define VM_FAULT_MAJOR 0x0004 #define VM_FAULT_WRITE 0x0008 /* Special case for get_user_pages */ +#define VM_FAULT_HWPOISON 0x0010 /* Hit poisoned page */ #define VM_FAULT_NOPAGE 0x0100 /* ->fault installed the pte, not return page */ #define VM_FAULT_LOCKED 0x0200 /* ->fault locked the returned page */ -#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS) +#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_HWPOISON) /* * Can be called by the pagefault handler when it gets a VM_FAULT_OOM. @@ -791,8 +792,14 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping, unmap_mapping_range(mapping, holebegin, holelen, 0); } -extern int vmtruncate(struct inode * inode, loff_t offset); -extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end); +extern void truncate_pagecache(struct inode *inode, loff_t old, loff_t new); +extern int vmtruncate(struct inode *inode, loff_t offset); +extern int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end); + +int truncate_inode_page(struct address_space *mapping, struct page *page); +int generic_error_remove_page(struct address_space *mapping, struct page *page); + +int invalidate_inode_page(struct page *page); #ifdef CONFIG_MMU extern int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, @@ -1279,7 +1286,7 @@ int in_gate_area_no_task(unsigned long addr); #define in_gate_area(task, addr) ({(void)task; in_gate_area_no_task(addr);}) #endif /* __HAVE_ARCH_GATE_AREA */ -int drop_caches_sysctl_handler(struct ctl_table *, int, struct file *, +int drop_caches_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask, unsigned long lru_pages); @@ -1308,5 +1315,12 @@ void vmemmap_populate_print_last(void); extern int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim, size_t size); extern void refund_locked_memory(struct mm_struct *mm, size_t size); + +extern void memory_failure(unsigned long pfn, int trapno); +extern int __memory_failure(unsigned long pfn, int trapno, int ref); +extern int sysctl_memory_failure_early_kill; +extern int sysctl_memory_failure_recovery; +extern atomic_long_t mce_bad_pages; + #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 0042090a4d7..21d6aa45206 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -240,6 +240,8 @@ struct mm_struct { unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */ + struct linux_binfmt *binfmt; + cpumask_t cpu_vm_mask; /* Architecture-specific MM context */ @@ -259,11 +261,10 @@ struct mm_struct { unsigned long flags; /* Must use atomic bitops to access the bits */ struct core_state *core_state; /* coredumping support */ - - /* aio bits */ +#ifdef CONFIG_AIO spinlock_t ioctx_lock; struct hlist_head ioctx_list; - +#endif #ifdef CONFIG_MM_OWNER /* * "owner" points to a task that is regarded as the canonical diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 652ef01be58..6f7561730d8 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -755,21 +755,20 @@ static inline int is_dma(struct zone *zone) /* These two functions are used to setup the per zone pages min values */ struct ctl_table; -struct file; -int min_free_kbytes_sysctl_handler(struct ctl_table *, int, struct file *, +int min_free_kbytes_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1]; -int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, struct file *, +int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); -int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, struct file *, +int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int, - struct file *, void __user *, size_t *, loff_t *); + void __user *, size_t *, loff_t *); int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int, - struct file *, void __user *, size_t *, loff_t *); + void __user *, size_t *, loff_t *); extern int numa_zonelist_order_handler(struct ctl_table *, int, - struct file *, void __user *, size_t *, loff_t *); + void __user *, size_t *, loff_t *); extern char numa_zonelist_order[]; #define NUMA_ZONELIST_ORDER_LEN 16 /* string buffer size */ diff --git a/include/linux/module.h b/include/linux/module.h index 1c755b2f937..482efc865ac 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -128,7 +128,10 @@ extern struct module __this_module; */ #define MODULE_LICENSE(_license) MODULE_INFO(license, _license) -/* Author, ideally of form NAME[, NAME]*[ and NAME] */ +/* + * Author(s), use "Name <email>" or just "Name", for multiple + * authors use multiple MODULE_AUTHOR() statements/lines. + */ #define MODULE_AUTHOR(_author) MODULE_INFO(author, _author) /* What your module does. */ @@ -308,10 +311,14 @@ struct module #endif #ifdef CONFIG_KALLSYMS - /* We keep the symbol and string tables for kallsyms. */ - Elf_Sym *symtab; - unsigned int num_symtab; - char *strtab; + /* + * We keep the symbol and string tables for kallsyms. + * The core_* fields below are temporary, loader-only (they + * could really be discarded after module init). + */ + Elf_Sym *symtab, *core_symtab; + unsigned int num_symtab, core_num_syms; + char *strtab, *core_strtab; /* Section attributes */ struct module_sect_attrs *sect_attrs; diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 13de789f0a5..6b202b17395 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -51,6 +51,9 @@ * PG_buddy is set to indicate that the page is free and in the buddy system * (see mm/page_alloc.c). * + * PG_hwpoison indicates that a page got corrupted in hardware and contains + * data with incorrect ECC bits that triggered a machine check. Accessing is + * not safe since it may cause another machine check. Don't touch! */ /* @@ -102,6 +105,9 @@ enum pageflags { #ifdef CONFIG_ARCH_USES_PG_UNCACHED PG_uncached, /* Page has been mapped as uncached */ #endif +#ifdef CONFIG_MEMORY_FAILURE + PG_hwpoison, /* hardware poisoned page. Don't touch */ +#endif __NR_PAGEFLAGS, /* Filesystems */ @@ -269,6 +275,15 @@ PAGEFLAG(Uncached, uncached) PAGEFLAG_FALSE(Uncached) #endif +#ifdef CONFIG_MEMORY_FAILURE +PAGEFLAG(HWPoison, hwpoison) +TESTSETFLAG(HWPoison, hwpoison) +#define __PG_HWPOISON (1UL << PG_hwpoison) +#else +PAGEFLAG_FALSE(HWPoison) +#define __PG_HWPOISON 0 +#endif + static inline int PageUptodate(struct page *page) { int ret = test_bit(PG_uptodate, &(page)->flags); @@ -393,7 +408,7 @@ static inline void __ClearPageTail(struct page *page) 1 << PG_private | 1 << PG_private_2 | \ 1 << PG_buddy | 1 << PG_writeback | 1 << PG_reserved | \ 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \ - 1 << PG_unevictable | __PG_MLOCKED) + 1 << PG_unevictable | __PG_MLOCKED | __PG_HWPOISON) /* * Flags checked when a page is prepped for return by the page allocator. diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h index ada779f2417..4b938d4f3ac 100644 --- a/include/linux/page_cgroup.h +++ b/include/linux/page_cgroup.h @@ -38,6 +38,7 @@ enum { PCG_LOCK, /* page cgroup is locked */ PCG_CACHE, /* charged as cache */ PCG_USED, /* this object is in use. */ + PCG_ACCT_LRU, /* page has been accounted for */ }; #define TESTPCGFLAG(uname, lname) \ @@ -52,11 +53,23 @@ static inline void SetPageCgroup##uname(struct page_cgroup *pc)\ static inline void ClearPageCgroup##uname(struct page_cgroup *pc) \ { clear_bit(PCG_##lname, &pc->flags); } +#define TESTCLEARPCGFLAG(uname, lname) \ +static inline int TestClearPageCgroup##uname(struct page_cgroup *pc) \ + { return test_and_clear_bit(PCG_##lname, &pc->flags); } + /* Cache flag is set only once (at allocation) */ TESTPCGFLAG(Cache, CACHE) +CLEARPCGFLAG(Cache, CACHE) +SETPCGFLAG(Cache, CACHE) TESTPCGFLAG(Used, USED) CLEARPCGFLAG(Used, USED) +SETPCGFLAG(Used, USED) + +SETPCGFLAG(AcctLRU, ACCT_LRU) +CLEARPCGFLAG(AcctLRU, ACCT_LRU) +TESTPCGFLAG(AcctLRU, ACCT_LRU) +TESTCLEARPCGFLAG(AcctLRU, ACCT_LRU) static inline int page_cgroup_nid(struct page_cgroup *pc) { diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 7803565aa87..da1fda8623e 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2527,6 +2527,16 @@ #define PCI_DEVICE_ID_INTEL_E7525_MCH 0x359e #define PCI_DEVICE_ID_INTEL_IOAT_CNB 0x360b #define PCI_DEVICE_ID_INTEL_FBD_CNB 0x360c +#define PCI_DEVICE_ID_INTEL_IOAT_JSF0 0x3710 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF1 0x3711 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF2 0x3712 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF3 0x3713 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF4 0x3714 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF5 0x3715 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF6 0x3716 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF7 0x3717 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF8 0x3718 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF9 0x3719 #define PCI_DEVICE_ID_INTEL_ICH10_0 0x3a14 #define PCI_DEVICE_ID_INTEL_ICH10_1 0x3a16 #define PCI_DEVICE_ID_INTEL_ICH10_2 0x3a18 diff --git a/include/linux/prctl.h b/include/linux/prctl.h index 07bff666e65..931150566ad 100644 --- a/include/linux/prctl.h +++ b/include/linux/prctl.h @@ -88,4 +88,6 @@ #define PR_TASK_PERF_EVENTS_DISABLE 31 #define PR_TASK_PERF_EVENTS_ENABLE 32 +#define PR_MCE_KILL 33 + #endif /* _LINUX_PRCTL_H */ diff --git a/include/linux/relay.h b/include/linux/relay.h index 953fc055e87..14a86bc7102 100644 --- a/include/linux/relay.h +++ b/include/linux/relay.h @@ -140,7 +140,7 @@ struct rchan_callbacks * cause relay_open() to create a single global buffer rather * than the default set of per-cpu buffers. * - * See Documentation/filesystems/relayfs.txt for more info. + * See Documentation/filesystems/relay.txt for more info. */ struct dentry *(*create_buf_file)(const char *filename, struct dentry *parent, diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h index 511f42fc681..731af71cddc 100644 --- a/include/linux/res_counter.h +++ b/include/linux/res_counter.h @@ -35,6 +35,10 @@ struct res_counter { */ unsigned long long limit; /* + * the limit that usage can be exceed + */ + unsigned long long soft_limit; + /* * the number of unsuccessful attempts to consume the resource */ unsigned long long failcnt; @@ -87,6 +91,7 @@ enum { RES_MAX_USAGE, RES_LIMIT, RES_FAILCNT, + RES_SOFT_LIMIT, }; /* @@ -109,7 +114,8 @@ void res_counter_init(struct res_counter *counter, struct res_counter *parent); int __must_check res_counter_charge_locked(struct res_counter *counter, unsigned long val); int __must_check res_counter_charge(struct res_counter *counter, - unsigned long val, struct res_counter **limit_fail_at); + unsigned long val, struct res_counter **limit_fail_at, + struct res_counter **soft_limit_at); /* * uncharge - tell that some portion of the resource is released @@ -122,7 +128,8 @@ int __must_check res_counter_charge(struct res_counter *counter, */ void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val); -void res_counter_uncharge(struct res_counter *counter, unsigned long val); +void res_counter_uncharge(struct res_counter *counter, unsigned long val, + bool *was_soft_limit_excess); static inline bool res_counter_limit_check_locked(struct res_counter *cnt) { @@ -132,6 +139,36 @@ static inline bool res_counter_limit_check_locked(struct res_counter *cnt) return false; } +static inline bool res_counter_soft_limit_check_locked(struct res_counter *cnt) +{ + if (cnt->usage < cnt->soft_limit) + return true; + + return false; +} + +/** + * Get the difference between the usage and the soft limit + * @cnt: The counter + * + * Returns 0 if usage is less than or equal to soft limit + * The difference between usage and soft limit, otherwise. + */ +static inline unsigned long long +res_counter_soft_limit_excess(struct res_counter *cnt) +{ + unsigned long long excess; + unsigned long flags; + + spin_lock_irqsave(&cnt->lock, flags); + if (cnt->usage <= cnt->soft_limit) + excess = 0; + else + excess = cnt->usage - cnt->soft_limit; + spin_unlock_irqrestore(&cnt->lock, flags); + return excess; +} + /* * Helper function to detect if the cgroup is within it's limit or * not. It's currently called from cgroup_rss_prepare() @@ -147,6 +184,17 @@ static inline bool res_counter_check_under_limit(struct res_counter *cnt) return ret; } +static inline bool res_counter_check_under_soft_limit(struct res_counter *cnt) +{ + bool ret; + unsigned long flags; + + spin_lock_irqsave(&cnt->lock, flags); + ret = res_counter_soft_limit_check_locked(cnt); + spin_unlock_irqrestore(&cnt->lock, flags); + return ret; +} + static inline void res_counter_reset_max(struct res_counter *cnt) { unsigned long flags; @@ -180,4 +228,16 @@ static inline int res_counter_set_limit(struct res_counter *cnt, return ret; } +static inline int +res_counter_set_soft_limit(struct res_counter *cnt, + unsigned long long soft_limit) +{ + unsigned long flags; + + spin_lock_irqsave(&cnt->lock, flags); + cnt->soft_limit = soft_limit; + spin_unlock_irqrestore(&cnt->lock, flags); + return 0; +} + #endif diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 477841d29fc..cb0ba703260 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -81,7 +81,19 @@ static inline void page_dup_rmap(struct page *page) */ int page_referenced(struct page *, int is_locked, struct mem_cgroup *cnt, unsigned long *vm_flags); -int try_to_unmap(struct page *, int ignore_refs); +enum ttu_flags { + TTU_UNMAP = 0, /* unmap mode */ + TTU_MIGRATION = 1, /* migration mode */ + TTU_MUNLOCK = 2, /* munlock mode */ + TTU_ACTION_MASK = 0xff, + + TTU_IGNORE_MLOCK = (1 << 8), /* ignore mlock */ + TTU_IGNORE_ACCESS = (1 << 9), /* don't age */ + TTU_IGNORE_HWPOISON = (1 << 10),/* corrupted page is recoverable */ +}; +#define TTU_ACTION(x) ((x) & TTU_ACTION_MASK) + +int try_to_unmap(struct page *, enum ttu_flags flags); /* * Called from mm/filemap_xip.c to unmap empty zero page @@ -108,6 +120,13 @@ int page_mkclean(struct page *); */ int try_to_munlock(struct page *); +/* + * Called by memory-failure.c to kill processes. + */ +struct anon_vma *page_lock_anon_vma(struct page *page); +void page_unlock_anon_vma(struct anon_vma *anon_vma); +int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma); + #else /* !CONFIG_MMU */ #define anon_vma_init() do {} while (0) diff --git a/include/linux/sched.h b/include/linux/sched.h index 848d1f20086..75e6e60bf58 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -309,7 +309,7 @@ extern void softlockup_tick(void); extern void touch_softlockup_watchdog(void); extern void touch_all_softlockup_watchdogs(void); extern int proc_dosoftlockup_thresh(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, + void __user *buffer, size_t *lenp, loff_t *ppos); extern unsigned int softlockup_panic; extern int softlockup_thresh; @@ -331,7 +331,7 @@ extern unsigned long sysctl_hung_task_check_count; extern unsigned long sysctl_hung_task_timeout_secs; extern unsigned long sysctl_hung_task_warnings; extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, + void __user *buffer, size_t *lenp, loff_t *ppos); #endif @@ -1271,7 +1271,6 @@ struct task_struct { struct mm_struct *mm, *active_mm; /* task state */ - struct linux_binfmt *binfmt; int exit_state; int exit_code, exit_signal; int pdeath_signal; /* The signal sent when the parent dies */ @@ -1735,6 +1734,7 @@ extern cputime_t task_gtime(struct task_struct *p); #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ #define PF_VCPU 0x00000010 /* I'm a virtual CPU */ #define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */ +#define PF_MCE_PROCESS 0x00000080 /* process policy on mce errors */ #define PF_SUPERPRIV 0x00000100 /* used super-user privileges */ #define PF_DUMPCORE 0x00000200 /* dumped core */ #define PF_SIGNALED 0x00000400 /* killed by a signal */ @@ -1754,6 +1754,7 @@ extern cputime_t task_gtime(struct task_struct *p); #define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */ #define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */ #define PF_THREAD_BOUND 0x04000000 /* Thread bound to specific cpu */ +#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezeable */ @@ -1906,7 +1907,7 @@ extern unsigned int sysctl_sched_time_avg; extern unsigned int sysctl_timer_migration; int sched_nr_latency_handler(struct ctl_table *table, int write, - struct file *file, void __user *buffer, size_t *length, + void __user *buffer, size_t *length, loff_t *ppos); #endif #ifdef CONFIG_SCHED_DEBUG @@ -1924,7 +1925,7 @@ extern unsigned int sysctl_sched_rt_period; extern int sysctl_sched_rt_runtime; int sched_rt_handler(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos); extern unsigned int sysctl_sched_compat_yield; @@ -2059,6 +2060,7 @@ extern int kill_pgrp(struct pid *pid, int sig, int priv); extern int kill_pid(struct pid *pid, int sig, int priv); extern int kill_proc_info(int, struct siginfo *, pid_t); extern int do_notify_parent(struct task_struct *, int); +extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent); extern void force_sig(int, struct task_struct *); extern void force_sig_specific(int, struct task_struct *); extern int send_sig(int, struct task_struct *, int); @@ -2336,7 +2338,10 @@ static inline int signal_pending(struct task_struct *p) return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING)); } -extern int __fatal_signal_pending(struct task_struct *p); +static inline int __fatal_signal_pending(struct task_struct *p) +{ + return unlikely(sigismember(&p->pending.signal, SIGKILL)); +} static inline int fatal_signal_pending(struct task_struct *p) { diff --git a/include/linux/security.h b/include/linux/security.h index d050b66ab9e..239e40d0450 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -133,7 +133,7 @@ static inline unsigned long round_hint_to_min(unsigned long hint) return PAGE_ALIGN(mmap_min_addr); return hint; } -extern int mmap_min_addr_handler(struct ctl_table *table, int write, struct file *filp, +extern int mmap_min_addr_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); #ifdef CONFIG_SECURITY diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index 0c6a86b7959..8366d8f12e5 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -35,6 +35,44 @@ struct seq_operations { #define SEQ_SKIP 1 +/** + * seq_get_buf - get buffer to write arbitrary data to + * @m: the seq_file handle + * @bufp: the beginning of the buffer is stored here + * + * Return the number of bytes available in the buffer, or zero if + * there's no space. + */ +static inline size_t seq_get_buf(struct seq_file *m, char **bufp) +{ + BUG_ON(m->count > m->size); + if (m->count < m->size) + *bufp = m->buf + m->count; + else + *bufp = NULL; + + return m->size - m->count; +} + +/** + * seq_commit - commit data to the buffer + * @m: the seq_file handle + * @num: the number of bytes to commit + * + * Commit @num bytes of data written to a buffer previously acquired + * by seq_buf_get. To signal an error condition, or that the data + * didn't fit in the available space, pass a negative @num value. + */ +static inline void seq_commit(struct seq_file *m, int num) +{ + if (num < 0) { + m->count = m->size; + } else { + BUG_ON(m->count + num > m->size); + m->count += num; + } +} + char *mangle_path(char *s, char *p, char *esc); int seq_open(struct file *, const struct seq_operations *); ssize_t seq_read(struct file *, char __user *, size_t, loff_t *); diff --git a/include/linux/signal.h b/include/linux/signal.h index c7552836bd9..ab9272cc270 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -233,6 +233,8 @@ static inline int valid_signal(unsigned long sig) } extern int next_signal(struct sigpending *pending, sigset_t *mask); +extern int do_send_sig_info(int sig, struct siginfo *info, + struct task_struct *p, bool group); extern int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p); extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *); extern long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, diff --git a/include/linux/swap.h b/include/linux/swap.h index 6c990e658f4..4ec90019c1a 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -34,16 +34,38 @@ static inline int current_is_kswapd(void) * the type/offset into the pte as 5/27 as well. */ #define MAX_SWAPFILES_SHIFT 5 -#ifndef CONFIG_MIGRATION -#define MAX_SWAPFILES (1 << MAX_SWAPFILES_SHIFT) + +/* + * Use some of the swap files numbers for other purposes. This + * is a convenient way to hook into the VM to trigger special + * actions on faults. + */ + +/* + * NUMA node memory migration support + */ +#ifdef CONFIG_MIGRATION +#define SWP_MIGRATION_NUM 2 +#define SWP_MIGRATION_READ (MAX_SWAPFILES + SWP_HWPOISON_NUM) +#define SWP_MIGRATION_WRITE (MAX_SWAPFILES + SWP_HWPOISON_NUM + 1) #else -/* Use last two entries for page migration swap entries */ -#define MAX_SWAPFILES ((1 << MAX_SWAPFILES_SHIFT)-2) -#define SWP_MIGRATION_READ MAX_SWAPFILES -#define SWP_MIGRATION_WRITE (MAX_SWAPFILES + 1) +#define SWP_MIGRATION_NUM 0 #endif /* + * Handling of hardware poisoned pages with memory corruption. + */ +#ifdef CONFIG_MEMORY_FAILURE +#define SWP_HWPOISON_NUM 1 +#define SWP_HWPOISON MAX_SWAPFILES +#else +#define SWP_HWPOISON_NUM 0 +#endif + +#define MAX_SWAPFILES \ + ((1 << MAX_SWAPFILES_SHIFT) - SWP_MIGRATION_NUM - SWP_HWPOISON_NUM) + +/* * Magic header for a swap area. The first part of the union is * what the swap magic looks like for the old (limited to 128MB) * swap area format, the second part of the union adds - in the @@ -217,6 +239,11 @@ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem, gfp_t gfp_mask, bool noswap, unsigned int swappiness); +extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, + gfp_t gfp_mask, bool noswap, + unsigned int swappiness, + struct zone *zone, + int nid); extern int __isolate_lru_page(struct page *page, int mode, int file); extern unsigned long shrink_all_memory(unsigned long nr_pages); extern int vm_swappiness; @@ -240,7 +267,7 @@ extern int page_evictable(struct page *page, struct vm_area_struct *vma); extern void scan_mapping_unevictable_pages(struct address_space *); extern unsigned long scan_unevictable_pages; -extern int scan_unevictable_handler(struct ctl_table *, int, struct file *, +extern int scan_unevictable_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); extern int scan_unevictable_register_node(struct node *node); extern void scan_unevictable_unregister_node(struct node *node); diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 6ec39ab27b4..cd42e30b7c6 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -131,3 +131,41 @@ static inline int is_write_migration_entry(swp_entry_t entry) #endif +#ifdef CONFIG_MEMORY_FAILURE +/* + * Support for hardware poisoned pages + */ +static inline swp_entry_t make_hwpoison_entry(struct page *page) +{ + BUG_ON(!PageLocked(page)); + return swp_entry(SWP_HWPOISON, page_to_pfn(page)); +} + +static inline int is_hwpoison_entry(swp_entry_t entry) +{ + return swp_type(entry) == SWP_HWPOISON; +} +#else + +static inline swp_entry_t make_hwpoison_entry(struct page *page) +{ + return swp_entry(0, 0); +} + +static inline int is_hwpoison_entry(swp_entry_t swp) +{ + return 0; +} +#endif + +#if defined(CONFIG_MEMORY_FAILURE) || defined(CONFIG_MIGRATION) +static inline int non_swap_entry(swp_entry_t entry) +{ + return swp_type(entry) >= MAX_SWAPFILES; +} +#else +static inline int non_swap_entry(swp_entry_t entry) +{ + return 0; +} +#endif diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index e76d3b22a46..1e4743ee683 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -29,7 +29,6 @@ #include <linux/types.h> #include <linux/compiler.h> -struct file; struct completion; #define CTL_MAXNAME 10 /* how many path components do we allow in a @@ -977,25 +976,25 @@ typedef int ctl_handler (struct ctl_table *table, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen); -typedef int proc_handler (struct ctl_table *ctl, int write, struct file * filp, +typedef int proc_handler (struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos); -extern int proc_dostring(struct ctl_table *, int, struct file *, +extern int proc_dostring(struct ctl_table *, int, void __user *, size_t *, loff_t *); -extern int proc_dointvec(struct ctl_table *, int, struct file *, +extern int proc_dointvec(struct ctl_table *, int, void __user *, size_t *, loff_t *); -extern int proc_dointvec_minmax(struct ctl_table *, int, struct file *, +extern int proc_dointvec_minmax(struct ctl_table *, int, void __user *, size_t *, loff_t *); -extern int proc_dointvec_jiffies(struct ctl_table *, int, struct file *, +extern int proc_dointvec_jiffies(struct ctl_table *, int, void __user *, size_t *, loff_t *); -extern int proc_dointvec_userhz_jiffies(struct ctl_table *, int, struct file *, +extern int proc_dointvec_userhz_jiffies(struct ctl_table *, int, void __user *, size_t *, loff_t *); -extern int proc_dointvec_ms_jiffies(struct ctl_table *, int, struct file *, +extern int proc_dointvec_ms_jiffies(struct ctl_table *, int, void __user *, size_t *, loff_t *); -extern int proc_doulongvec_minmax(struct ctl_table *, int, struct file *, +extern int proc_doulongvec_minmax(struct ctl_table *, int, void __user *, size_t *, loff_t *); extern int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int, - struct file *, void __user *, size_t *, loff_t *); + void __user *, size_t *, loff_t *); extern int do_sysctl (int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, diff --git a/include/linux/time.h b/include/linux/time.h index 56787c09334..fe04e5ef6a5 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -155,6 +155,34 @@ extern void timekeeping_leap_insert(int leapsecond); struct tms; extern void do_sys_times(struct tms *); +/* + * Similar to the struct tm in userspace <time.h>, but it needs to be here so + * that the kernel source is self contained. + */ +struct tm { + /* + * the number of seconds after the minute, normally in the range + * 0 to 59, but can be up to 60 to allow for leap seconds + */ + int tm_sec; + /* the number of minutes after the hour, in the range 0 to 59*/ + int tm_min; + /* the number of hours past midnight, in the range 0 to 23 */ + int tm_hour; + /* the day of the month, in the range 1 to 31 */ + int tm_mday; + /* the number of months since January, in the range 0 to 11 */ + int tm_mon; + /* the number of years since 1900 */ + long tm_year; + /* the number of days since Sunday, in the range 0 to 6 */ + int tm_wday; + /* the number of days since January 1, in the range 0 to 365 */ + int tm_yday; +}; + +void time_to_tm(time_t totalsecs, int offset, struct tm *result); + /** * timespec_to_ns - Convert timespec to nanoseconds * @ts: pointer to the timespec variable to be converted diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index 17ba82efa48..1eb44a924e5 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -1,7 +1,7 @@ /* * Tracing hooks * - * Copyright (C) 2008 Red Hat, Inc. All rights reserved. + * Copyright (C) 2008-2009 Red Hat, Inc. All rights reserved. * * This copyrighted material is made available to anyone wishing to use, * modify, copy, or redistribute it subject to the terms and conditions @@ -463,22 +463,38 @@ static inline int tracehook_get_signal(struct task_struct *task, /** * tracehook_notify_jctl - report about job control stop/continue - * @notify: nonzero if this is the last thread in the group to stop + * @notify: zero, %CLD_STOPPED or %CLD_CONTINUED * @why: %CLD_STOPPED or %CLD_CONTINUED * * This is called when we might call do_notify_parent_cldstop(). - * It's called when about to stop for job control; we are already in - * %TASK_STOPPED state, about to call schedule(). It's also called when - * a delayed %CLD_STOPPED or %CLD_CONTINUED report is ready to be made. * - * Return nonzero to generate a %SIGCHLD with @why, which is - * normal if @notify is nonzero. + * @notify is zero if we would not ordinarily send a %SIGCHLD, + * or is the %CLD_STOPPED or %CLD_CONTINUED .si_code for %SIGCHLD. * - * Called with no locks held. + * @why is %CLD_STOPPED when about to stop for job control; + * we are already in %TASK_STOPPED state, about to call schedule(). + * It might also be that we have just exited (check %PF_EXITING), + * but need to report that a group-wide stop is complete. + * + * @why is %CLD_CONTINUED when waking up after job control stop and + * ready to make a delayed @notify report. + * + * Return the %CLD_* value for %SIGCHLD, or zero to generate no signal. + * + * Called with the siglock held. */ static inline int tracehook_notify_jctl(int notify, int why) { - return notify || (current->ptrace & PT_PTRACED); + return notify ?: (current->ptrace & PT_PTRACED) ? why : 0; +} + +/** + * tracehook_finish_jctl - report about return from job control stop + * + * This is called by do_signal_stop() after wakeup. + */ +static inline void tracehook_finish_jctl(void) +{ } #define DEATH_REAP -1 diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 63a3f7a8058..660a9de96f8 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -4,7 +4,7 @@ /* * Kernel Tracepoint API. * - * See Documentation/tracepoint.txt. + * See Documentation/trace/tracepoints.txt. * * (C) Copyright 2008 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> * diff --git a/include/linux/unaligned/be_byteshift.h b/include/linux/unaligned/be_byteshift.h index 46dd12c5709..9356b24223a 100644 --- a/include/linux/unaligned/be_byteshift.h +++ b/include/linux/unaligned/be_byteshift.h @@ -1,7 +1,7 @@ #ifndef _LINUX_UNALIGNED_BE_BYTESHIFT_H #define _LINUX_UNALIGNED_BE_BYTESHIFT_H -#include <linux/kernel.h> +#include <linux/types.h> static inline u16 __get_unaligned_be16(const u8 *p) { diff --git a/include/linux/unaligned/le_byteshift.h b/include/linux/unaligned/le_byteshift.h index 59777e951ba..be376fb79b6 100644 --- a/include/linux/unaligned/le_byteshift.h +++ b/include/linux/unaligned/le_byteshift.h @@ -1,7 +1,7 @@ #ifndef _LINUX_UNALIGNED_LE_BYTESHIFT_H #define _LINUX_UNALIGNED_LE_BYTESHIFT_H -#include <linux/kernel.h> +#include <linux/types.h> static inline u16 __get_unaligned_le16(const u8 *p) { diff --git a/include/linux/vgaarb.h b/include/linux/vgaarb.h index 923f9040ea2..2dfaa293ae8 100644 --- a/include/linux/vgaarb.h +++ b/include/linux/vgaarb.h @@ -1,5 +1,6 @@ /* - * vgaarb.c + * The VGA aribiter manages VGA space routing and VGA resource decode to + * allow multiple VGA devices to be used in a system in a safe way. * * (C) Copyright 2005 Benjamin Herrenschmidt <benh@kernel.crashing.org> * (C) Copyright 2007 Paulo R. Zanoni <przanoni@gmail.com> diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 75cf58666ff..66ebddcff66 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -110,21 +110,20 @@ extern int laptop_mode; extern unsigned long determine_dirtyable_memory(void); extern int dirty_background_ratio_handler(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos); extern int dirty_background_bytes_handler(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos); extern int dirty_ratio_handler(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos); extern int dirty_bytes_handler(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos); struct ctl_table; -struct file; -int dirty_writeback_centisecs_handler(struct ctl_table *, int, struct file *, +int dirty_writeback_centisecs_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); void get_dirty_limits(unsigned long *pbackground, unsigned long *pdirty, diff --git a/include/net/ip.h b/include/net/ip.h index 72c36926c26..5b26a0bd178 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -399,7 +399,7 @@ extern void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 dport, * fed into the routing cache should use these handlers. */ int ipv4_doint_and_flush(ctl_table *ctl, int write, - struct file* filp, void __user *buffer, + void __user *buffer, size_t *lenp, loff_t *ppos); int ipv4_doint_and_flush_strategy(ctl_table *table, void __user *oldval, size_t __user *oldlenp, diff --git a/include/net/ndisc.h b/include/net/ndisc.h index 1459ed3e269..f76f22d0572 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -55,7 +55,6 @@ enum { #include <net/neighbour.h> struct ctl_table; -struct file; struct inet6_dev; struct net_device; struct net_proto_family; @@ -139,7 +138,6 @@ extern int igmp6_event_report(struct sk_buff *skb); #ifdef CONFIG_SYSCTL extern int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, - struct file * filp, void __user *buffer, size_t *lenp, loff_t *ppos); diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c index 40eab7314ae..7d3704750ef 100644 --- a/ipc/ipc_sysctl.c +++ b/ipc/ipc_sysctl.c @@ -27,18 +27,18 @@ static void *get_ipc(ctl_table *table) } #ifdef CONFIG_PROC_SYSCTL -static int proc_ipc_dointvec(ctl_table *table, int write, struct file *filp, +static int proc_ipc_dointvec(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table ipc_table; memcpy(&ipc_table, table, sizeof(ipc_table)); ipc_table.data = get_ipc(table); - return proc_dointvec(&ipc_table, write, filp, buffer, lenp, ppos); + return proc_dointvec(&ipc_table, write, buffer, lenp, ppos); } static int proc_ipc_callback_dointvec(ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) + void __user *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table ipc_table; size_t lenp_bef = *lenp; @@ -47,7 +47,7 @@ static int proc_ipc_callback_dointvec(ctl_table *table, int write, memcpy(&ipc_table, table, sizeof(ipc_table)); ipc_table.data = get_ipc(table); - rc = proc_dointvec(&ipc_table, write, filp, buffer, lenp, ppos); + rc = proc_dointvec(&ipc_table, write, buffer, lenp, ppos); if (write && !rc && lenp_bef == *lenp) /* @@ -61,13 +61,13 @@ static int proc_ipc_callback_dointvec(ctl_table *table, int write, } static int proc_ipc_doulongvec_minmax(ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) + void __user *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table ipc_table; memcpy(&ipc_table, table, sizeof(ipc_table)); ipc_table.data = get_ipc(table); - return proc_doulongvec_minmax(&ipc_table, write, filp, buffer, + return proc_doulongvec_minmax(&ipc_table, write, buffer, lenp, ppos); } @@ -95,7 +95,7 @@ static void ipc_auto_callback(int val) } static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) + void __user *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table ipc_table; size_t lenp_bef = *lenp; @@ -106,7 +106,7 @@ static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write, ipc_table.data = get_ipc(table); oldval = *((int *)(ipc_table.data)); - rc = proc_dointvec_minmax(&ipc_table, write, filp, buffer, lenp, ppos); + rc = proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos); if (write && !rc && lenp_bef == *lenp) { int newval = *((int *)(ipc_table.data)); diff --git a/ipc/mq_sysctl.c b/ipc/mq_sysctl.c index 24ae46dfe45..8a058711fc1 100644 --- a/ipc/mq_sysctl.c +++ b/ipc/mq_sysctl.c @@ -31,24 +31,24 @@ static void *get_mq(ctl_table *table) return which; } -static int proc_mq_dointvec(ctl_table *table, int write, struct file *filp, +static int proc_mq_dointvec(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table mq_table; memcpy(&mq_table, table, sizeof(mq_table)); mq_table.data = get_mq(table); - return proc_dointvec(&mq_table, write, filp, buffer, lenp, ppos); + return proc_dointvec(&mq_table, write, buffer, lenp, ppos); } static int proc_mq_dointvec_minmax(ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) + void __user *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table mq_table; memcpy(&mq_table, table, sizeof(mq_table)); mq_table.data = get_mq(table); - return proc_dointvec_minmax(&mq_table, write, filp, buffer, + return proc_dointvec_minmax(&mq_table, write, buffer, lenp, ppos); } #else diff --git a/kernel/Makefile b/kernel/Makefile index 187c89b4783..b8d4cd8ac0b 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -58,7 +58,6 @@ obj-$(CONFIG_KEXEC) += kexec.o obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o obj-$(CONFIG_COMPAT) += compat.o obj-$(CONFIG_CGROUPS) += cgroup.o -obj-$(CONFIG_CGROUP_DEBUG) += cgroup_debug.o obj-$(CONFIG_CGROUP_FREEZER) += cgroup_freezer.o obj-$(CONFIG_CPUSETS) += cpuset.o obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o diff --git a/kernel/audit.c b/kernel/audit.c index defc2e6f1e3..5feed232be9 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -855,18 +855,24 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) break; } case AUDIT_SIGNAL_INFO: - err = security_secid_to_secctx(audit_sig_sid, &ctx, &len); - if (err) - return err; + len = 0; + if (audit_sig_sid) { + err = security_secid_to_secctx(audit_sig_sid, &ctx, &len); + if (err) + return err; + } sig_data = kmalloc(sizeof(*sig_data) + len, GFP_KERNEL); if (!sig_data) { - security_release_secctx(ctx, len); + if (audit_sig_sid) + security_release_secctx(ctx, len); return -ENOMEM; } sig_data->uid = audit_sig_uid; sig_data->pid = audit_sig_pid; - memcpy(sig_data->ctx, ctx, len); - security_release_secctx(ctx, len); + if (audit_sig_sid) { + memcpy(sig_data->ctx, ctx, len); + security_release_secctx(ctx, len); + } audit_send_reply(NETLINK_CB(skb).pid, seq, AUDIT_SIGNAL_INFO, 0, 0, sig_data, sizeof(*sig_data) + len); kfree(sig_data); diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index 0e96dbc60ea..cc7e87936cb 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -45,8 +45,8 @@ struct audit_watch { atomic_t count; /* reference count */ - char *path; /* insertion path */ dev_t dev; /* associated superblock device */ + char *path; /* insertion path */ unsigned long ino; /* associated inode number */ struct audit_parent *parent; /* associated parent */ struct list_head wlist; /* entry in parent->watches list */ diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 68d3c6a0ecd..267e484f019 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -168,12 +168,12 @@ struct audit_context { int in_syscall; /* 1 if task is in a syscall */ enum audit_state state, current_state; unsigned int serial; /* serial number for record */ - struct timespec ctime; /* time of syscall entry */ int major; /* syscall number */ + struct timespec ctime; /* time of syscall entry */ unsigned long argv[4]; /* syscall arguments */ - int return_valid; /* return code is valid */ long return_code;/* syscall return code */ u64 prio; + int return_valid; /* return code is valid */ int name_count; struct audit_names names[AUDIT_NAMES]; char * filterkey; /* key for rule that triggered record */ @@ -198,8 +198,8 @@ struct audit_context { char target_comm[TASK_COMM_LEN]; struct audit_tree_refs *trees, *first_trees; - int tree_count; struct list_head killed_trees; + int tree_count; int type; union { diff --git a/kernel/cgroup.c b/kernel/cgroup.c index cd83d9933b6..7ccba4bc5e3 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -23,6 +23,7 @@ */ #include <linux/cgroup.h> +#include <linux/ctype.h> #include <linux/errno.h> #include <linux/fs.h> #include <linux/kernel.h> @@ -48,6 +49,8 @@ #include <linux/namei.h> #include <linux/smp_lock.h> #include <linux/pid_namespace.h> +#include <linux/idr.h> +#include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */ #include <asm/atomic.h> @@ -60,6 +63,8 @@ static struct cgroup_subsys *subsys[] = { #include <linux/cgroup_subsys.h> }; +#define MAX_CGROUP_ROOT_NAMELEN 64 + /* * A cgroupfs_root represents the root of a cgroup hierarchy, * and may be associated with a superblock to form an active @@ -74,6 +79,9 @@ struct cgroupfs_root { */ unsigned long subsys_bits; + /* Unique id for this hierarchy. */ + int hierarchy_id; + /* The bitmask of subsystems currently attached to this hierarchy */ unsigned long actual_subsys_bits; @@ -94,6 +102,9 @@ struct cgroupfs_root { /* The path to use for release notifications. */ char release_agent_path[PATH_MAX]; + + /* The name for this hierarchy - may be empty */ + char name[MAX_CGROUP_ROOT_NAMELEN]; }; /* @@ -141,6 +152,10 @@ struct css_id { static LIST_HEAD(roots); static int root_count; +static DEFINE_IDA(hierarchy_ida); +static int next_hierarchy_id; +static DEFINE_SPINLOCK(hierarchy_id_lock); + /* dummytop is a shorthand for the dummy hierarchy's top cgroup */ #define dummytop (&rootnode.top_cgroup) @@ -201,6 +216,7 @@ struct cg_cgroup_link { * cgroup, anchored on cgroup->css_sets */ struct list_head cgrp_link_list; + struct cgroup *cgrp; /* * List running through cg_cgroup_links pointing at a * single css_set object, anchored on css_set->cg_links @@ -227,8 +243,11 @@ static int cgroup_subsys_init_idr(struct cgroup_subsys *ss); static DEFINE_RWLOCK(css_set_lock); static int css_set_count; -/* hash table for cgroup groups. This improves the performance to - * find an existing css_set */ +/* + * hash table for cgroup groups. This improves the performance to find + * an existing css_set. This hash doesn't (currently) take into + * account cgroups in empty hierarchies. + */ #define CSS_SET_HASH_BITS 7 #define CSS_SET_TABLE_SIZE (1 << CSS_SET_HASH_BITS) static struct hlist_head css_set_table[CSS_SET_TABLE_SIZE]; @@ -248,48 +267,22 @@ static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[]) return &css_set_table[index]; } +static void free_css_set_rcu(struct rcu_head *obj) +{ + struct css_set *cg = container_of(obj, struct css_set, rcu_head); + kfree(cg); +} + /* We don't maintain the lists running through each css_set to its * task until after the first call to cgroup_iter_start(). This * reduces the fork()/exit() overhead for people who have cgroups * compiled into their kernel but not actually in use */ static int use_task_css_set_links __read_mostly; -/* When we create or destroy a css_set, the operation simply - * takes/releases a reference count on all the cgroups referenced - * by subsystems in this css_set. This can end up multiple-counting - * some cgroups, but that's OK - the ref-count is just a - * busy/not-busy indicator; ensuring that we only count each cgroup - * once would require taking a global lock to ensure that no - * subsystems moved between hierarchies while we were doing so. - * - * Possible TODO: decide at boot time based on the number of - * registered subsystems and the number of CPUs or NUMA nodes whether - * it's better for performance to ref-count every subsystem, or to - * take a global lock and only add one ref count to each hierarchy. - */ - -/* - * unlink a css_set from the list and free it - */ -static void unlink_css_set(struct css_set *cg) +static void __put_css_set(struct css_set *cg, int taskexit) { struct cg_cgroup_link *link; struct cg_cgroup_link *saved_link; - - hlist_del(&cg->hlist); - css_set_count--; - - list_for_each_entry_safe(link, saved_link, &cg->cg_links, - cg_link_list) { - list_del(&link->cg_link_list); - list_del(&link->cgrp_link_list); - kfree(link); - } -} - -static void __put_css_set(struct css_set *cg, int taskexit) -{ - int i; /* * Ensure that the refcount doesn't hit zero while any readers * can see it. Similar to atomic_dec_and_lock(), but for an @@ -302,21 +295,28 @@ static void __put_css_set(struct css_set *cg, int taskexit) write_unlock(&css_set_lock); return; } - unlink_css_set(cg); - write_unlock(&css_set_lock); - rcu_read_lock(); - for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { - struct cgroup *cgrp = rcu_dereference(cg->subsys[i]->cgroup); + /* This css_set is dead. unlink it and release cgroup refcounts */ + hlist_del(&cg->hlist); + css_set_count--; + + list_for_each_entry_safe(link, saved_link, &cg->cg_links, + cg_link_list) { + struct cgroup *cgrp = link->cgrp; + list_del(&link->cg_link_list); + list_del(&link->cgrp_link_list); if (atomic_dec_and_test(&cgrp->count) && notify_on_release(cgrp)) { if (taskexit) set_bit(CGRP_RELEASABLE, &cgrp->flags); check_for_release(cgrp); } + + kfree(link); } - rcu_read_unlock(); - kfree(cg); + + write_unlock(&css_set_lock); + call_rcu(&cg->rcu_head, free_css_set_rcu); } /* @@ -338,6 +338,78 @@ static inline void put_css_set_taskexit(struct css_set *cg) } /* + * compare_css_sets - helper function for find_existing_css_set(). + * @cg: candidate css_set being tested + * @old_cg: existing css_set for a task + * @new_cgrp: cgroup that's being entered by the task + * @template: desired set of css pointers in css_set (pre-calculated) + * + * Returns true if "cg" matches "old_cg" except for the hierarchy + * which "new_cgrp" belongs to, for which it should match "new_cgrp". + */ +static bool compare_css_sets(struct css_set *cg, + struct css_set *old_cg, + struct cgroup *new_cgrp, + struct cgroup_subsys_state *template[]) +{ + struct list_head *l1, *l2; + + if (memcmp(template, cg->subsys, sizeof(cg->subsys))) { + /* Not all subsystems matched */ + return false; + } + + /* + * Compare cgroup pointers in order to distinguish between + * different cgroups in heirarchies with no subsystems. We + * could get by with just this check alone (and skip the + * memcmp above) but on most setups the memcmp check will + * avoid the need for this more expensive check on almost all + * candidates. + */ + + l1 = &cg->cg_links; + l2 = &old_cg->cg_links; + while (1) { + struct cg_cgroup_link *cgl1, *cgl2; + struct cgroup *cg1, *cg2; + + l1 = l1->next; + l2 = l2->next; + /* See if we reached the end - both lists are equal length. */ + if (l1 == &cg->cg_links) { + BUG_ON(l2 != &old_cg->cg_links); + break; + } else { + BUG_ON(l2 == &old_cg->cg_links); + } + /* Locate the cgroups associated with these links. */ + cgl1 = list_entry(l1, struct cg_cgroup_link, cg_link_list); + cgl2 = list_entry(l2, struct cg_cgroup_link, cg_link_list); + cg1 = cgl1->cgrp; + cg2 = cgl2->cgrp; + /* Hierarchies should be linked in the same order. */ + BUG_ON(cg1->root != cg2->root); + + /* + * If this hierarchy is the hierarchy of the cgroup + * that's changing, then we need to check that this + * css_set points to the new cgroup; if it's any other + * hierarchy, then this css_set should point to the + * same cgroup as the old css_set. + */ + if (cg1->root == new_cgrp->root) { + if (cg1 != new_cgrp) + return false; + } else { + if (cg1 != cg2) + return false; + } + } + return true; +} + +/* * find_existing_css_set() is a helper for * find_css_set(), and checks to see whether an existing * css_set is suitable. @@ -378,10 +450,11 @@ static struct css_set *find_existing_css_set( hhead = css_set_hash(template); hlist_for_each_entry(cg, node, hhead, hlist) { - if (!memcmp(template, cg->subsys, sizeof(cg->subsys))) { - /* All subsystems matched */ - return cg; - } + if (!compare_css_sets(cg, oldcg, cgrp, template)) + continue; + + /* This css_set matches what we need */ + return cg; } /* No existing cgroup group matched */ @@ -435,8 +508,14 @@ static void link_css_set(struct list_head *tmp_cg_links, link = list_first_entry(tmp_cg_links, struct cg_cgroup_link, cgrp_link_list); link->cg = cg; + link->cgrp = cgrp; + atomic_inc(&cgrp->count); list_move(&link->cgrp_link_list, &cgrp->css_sets); - list_add(&link->cg_link_list, &cg->cg_links); + /* + * Always add links to the tail of the list so that the list + * is sorted by order of hierarchy creation + */ + list_add_tail(&link->cg_link_list, &cg->cg_links); } /* @@ -451,11 +530,11 @@ static struct css_set *find_css_set( { struct css_set *res; struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT]; - int i; struct list_head tmp_cg_links; struct hlist_head *hhead; + struct cg_cgroup_link *link; /* First see if we already have a cgroup group that matches * the desired set */ @@ -489,20 +568,12 @@ static struct css_set *find_css_set( write_lock(&css_set_lock); /* Add reference counts and links from the new css_set. */ - for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { - struct cgroup *cgrp = res->subsys[i]->cgroup; - struct cgroup_subsys *ss = subsys[i]; - atomic_inc(&cgrp->count); - /* - * We want to add a link once per cgroup, so we - * only do it for the first subsystem in each - * hierarchy - */ - if (ss->root->subsys_list.next == &ss->sibling) - link_css_set(&tmp_cg_links, res, cgrp); + list_for_each_entry(link, &oldcg->cg_links, cg_link_list) { + struct cgroup *c = link->cgrp; + if (c->root == cgrp->root) + c = cgrp; + link_css_set(&tmp_cg_links, res, c); } - if (list_empty(&rootnode.subsys_list)) - link_css_set(&tmp_cg_links, res, dummytop); BUG_ON(!list_empty(&tmp_cg_links)); @@ -518,6 +589,41 @@ static struct css_set *find_css_set( } /* + * Return the cgroup for "task" from the given hierarchy. Must be + * called with cgroup_mutex held. + */ +static struct cgroup *task_cgroup_from_root(struct task_struct *task, + struct cgroupfs_root *root) +{ + struct css_set *css; + struct cgroup *res = NULL; + + BUG_ON(!mutex_is_locked(&cgroup_mutex)); + read_lock(&css_set_lock); + /* + * No need to lock the task - since we hold cgroup_mutex the + * task can't change groups, so the only thing that can happen + * is that it exits and its css is set back to init_css_set. + */ + css = task->cgroups; + if (css == &init_css_set) { + res = &root->top_cgroup; + } else { + struct cg_cgroup_link *link; + list_for_each_entry(link, &css->cg_links, cg_link_list) { + struct cgroup *c = link->cgrp; + if (c->root == root) { + res = c; + break; + } + } + } + read_unlock(&css_set_lock); + BUG_ON(!res); + return res; +} + +/* * There is one global cgroup mutex. We also require taking * task_lock() when dereferencing a task's cgroup subsys pointers. * See "The task_lock() exception", at the end of this comment. @@ -677,6 +783,12 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode) */ deactivate_super(cgrp->root->sb); + /* + * if we're getting rid of the cgroup, refcount should ensure + * that there are no pidlists left. + */ + BUG_ON(!list_empty(&cgrp->pidlists)); + call_rcu(&cgrp->rcu_head, free_cgroup_rcu); } iput(inode); @@ -841,6 +953,8 @@ static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs) seq_puts(seq, ",noprefix"); if (strlen(root->release_agent_path)) seq_printf(seq, ",release_agent=%s", root->release_agent_path); + if (strlen(root->name)) + seq_printf(seq, ",name=%s", root->name); mutex_unlock(&cgroup_mutex); return 0; } @@ -849,6 +963,12 @@ struct cgroup_sb_opts { unsigned long subsys_bits; unsigned long flags; char *release_agent; + char *name; + /* User explicitly requested empty subsystem */ + bool none; + + struct cgroupfs_root *new_root; + }; /* Convert a hierarchy specifier into a bitmask of subsystems and @@ -863,9 +983,7 @@ static int parse_cgroupfs_options(char *data, mask = ~(1UL << cpuset_subsys_id); #endif - opts->subsys_bits = 0; - opts->flags = 0; - opts->release_agent = NULL; + memset(opts, 0, sizeof(*opts)); while ((token = strsep(&o, ",")) != NULL) { if (!*token) @@ -879,17 +997,42 @@ static int parse_cgroupfs_options(char *data, if (!ss->disabled) opts->subsys_bits |= 1ul << i; } + } else if (!strcmp(token, "none")) { + /* Explicitly have no subsystems */ + opts->none = true; } else if (!strcmp(token, "noprefix")) { set_bit(ROOT_NOPREFIX, &opts->flags); } else if (!strncmp(token, "release_agent=", 14)) { /* Specifying two release agents is forbidden */ if (opts->release_agent) return -EINVAL; - opts->release_agent = kzalloc(PATH_MAX, GFP_KERNEL); + opts->release_agent = + kstrndup(token + 14, PATH_MAX, GFP_KERNEL); if (!opts->release_agent) return -ENOMEM; - strncpy(opts->release_agent, token + 14, PATH_MAX - 1); - opts->release_agent[PATH_MAX - 1] = 0; + } else if (!strncmp(token, "name=", 5)) { + int i; + const char *name = token + 5; + /* Can't specify an empty name */ + if (!strlen(name)) + return -EINVAL; + /* Must match [\w.-]+ */ + for (i = 0; i < strlen(name); i++) { + char c = name[i]; + if (isalnum(c)) + continue; + if ((c == '.') || (c == '-') || (c == '_')) + continue; + return -EINVAL; + } + /* Specifying two names is forbidden */ + if (opts->name) + return -EINVAL; + opts->name = kstrndup(name, + MAX_CGROUP_ROOT_NAMELEN, + GFP_KERNEL); + if (!opts->name) + return -ENOMEM; } else { struct cgroup_subsys *ss; int i; @@ -906,6 +1049,8 @@ static int parse_cgroupfs_options(char *data, } } + /* Consistency checks */ + /* * Option noprefix was introduced just for backward compatibility * with the old cpuset, so we allow noprefix only if mounting just @@ -915,8 +1060,16 @@ static int parse_cgroupfs_options(char *data, (opts->subsys_bits & mask)) return -EINVAL; - /* We can't have an empty hierarchy */ - if (!opts->subsys_bits) + + /* Can't specify "none" and some subsystems */ + if (opts->subsys_bits && opts->none) + return -EINVAL; + + /* + * We either have to specify by name or by subsystems. (So all + * empty hierarchies must have a name). + */ + if (!opts->subsys_bits && !opts->name) return -EINVAL; return 0; @@ -944,6 +1097,12 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) goto out_unlock; } + /* Don't allow name to change at remount */ + if (opts.name && strcmp(opts.name, root->name)) { + ret = -EINVAL; + goto out_unlock; + } + ret = rebind_subsystems(root, opts.subsys_bits); if (ret) goto out_unlock; @@ -955,6 +1114,7 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) strcpy(root->release_agent_path, opts.release_agent); out_unlock: kfree(opts.release_agent); + kfree(opts.name); mutex_unlock(&cgroup_mutex); mutex_unlock(&cgrp->dentry->d_inode->i_mutex); unlock_kernel(); @@ -974,9 +1134,10 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) INIT_LIST_HEAD(&cgrp->children); INIT_LIST_HEAD(&cgrp->css_sets); INIT_LIST_HEAD(&cgrp->release_list); - INIT_LIST_HEAD(&cgrp->pids_list); - init_rwsem(&cgrp->pids_mutex); + INIT_LIST_HEAD(&cgrp->pidlists); + mutex_init(&cgrp->pidlist_mutex); } + static void init_cgroup_root(struct cgroupfs_root *root) { struct cgroup *cgrp = &root->top_cgroup; @@ -988,33 +1149,106 @@ static void init_cgroup_root(struct cgroupfs_root *root) init_cgroup_housekeeping(cgrp); } +static bool init_root_id(struct cgroupfs_root *root) +{ + int ret = 0; + + do { + if (!ida_pre_get(&hierarchy_ida, GFP_KERNEL)) + return false; + spin_lock(&hierarchy_id_lock); + /* Try to allocate the next unused ID */ + ret = ida_get_new_above(&hierarchy_ida, next_hierarchy_id, + &root->hierarchy_id); + if (ret == -ENOSPC) + /* Try again starting from 0 */ + ret = ida_get_new(&hierarchy_ida, &root->hierarchy_id); + if (!ret) { + next_hierarchy_id = root->hierarchy_id + 1; + } else if (ret != -EAGAIN) { + /* Can only get here if the 31-bit IDR is full ... */ + BUG_ON(ret); + } + spin_unlock(&hierarchy_id_lock); + } while (ret); + return true; +} + static int cgroup_test_super(struct super_block *sb, void *data) { - struct cgroupfs_root *new = data; + struct cgroup_sb_opts *opts = data; struct cgroupfs_root *root = sb->s_fs_info; - /* First check subsystems */ - if (new->subsys_bits != root->subsys_bits) - return 0; + /* If we asked for a name then it must match */ + if (opts->name && strcmp(opts->name, root->name)) + return 0; - /* Next check flags */ - if (new->flags != root->flags) + /* + * If we asked for subsystems (or explicitly for no + * subsystems) then they must match + */ + if ((opts->subsys_bits || opts->none) + && (opts->subsys_bits != root->subsys_bits)) return 0; return 1; } +static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts) +{ + struct cgroupfs_root *root; + + if (!opts->subsys_bits && !opts->none) + return NULL; + + root = kzalloc(sizeof(*root), GFP_KERNEL); + if (!root) + return ERR_PTR(-ENOMEM); + + if (!init_root_id(root)) { + kfree(root); + return ERR_PTR(-ENOMEM); + } + init_cgroup_root(root); + + root->subsys_bits = opts->subsys_bits; + root->flags = opts->flags; + if (opts->release_agent) + strcpy(root->release_agent_path, opts->release_agent); + if (opts->name) + strcpy(root->name, opts->name); + return root; +} + +static void cgroup_drop_root(struct cgroupfs_root *root) +{ + if (!root) + return; + + BUG_ON(!root->hierarchy_id); + spin_lock(&hierarchy_id_lock); + ida_remove(&hierarchy_ida, root->hierarchy_id); + spin_unlock(&hierarchy_id_lock); + kfree(root); +} + static int cgroup_set_super(struct super_block *sb, void *data) { int ret; - struct cgroupfs_root *root = data; + struct cgroup_sb_opts *opts = data; + + /* If we don't have a new root, we can't set up a new sb */ + if (!opts->new_root) + return -EINVAL; + + BUG_ON(!opts->subsys_bits && !opts->none); ret = set_anon_super(sb, NULL); if (ret) return ret; - sb->s_fs_info = root; - root->sb = sb; + sb->s_fs_info = opts->new_root; + opts->new_root->sb = sb; sb->s_blocksize = PAGE_CACHE_SIZE; sb->s_blocksize_bits = PAGE_CACHE_SHIFT; @@ -1051,48 +1285,43 @@ static int cgroup_get_sb(struct file_system_type *fs_type, void *data, struct vfsmount *mnt) { struct cgroup_sb_opts opts; + struct cgroupfs_root *root; int ret = 0; struct super_block *sb; - struct cgroupfs_root *root; - struct list_head tmp_cg_links; + struct cgroupfs_root *new_root; /* First find the desired set of subsystems */ ret = parse_cgroupfs_options(data, &opts); - if (ret) { - kfree(opts.release_agent); - return ret; - } - - root = kzalloc(sizeof(*root), GFP_KERNEL); - if (!root) { - kfree(opts.release_agent); - return -ENOMEM; - } + if (ret) + goto out_err; - init_cgroup_root(root); - root->subsys_bits = opts.subsys_bits; - root->flags = opts.flags; - if (opts.release_agent) { - strcpy(root->release_agent_path, opts.release_agent); - kfree(opts.release_agent); + /* + * Allocate a new cgroup root. We may not need it if we're + * reusing an existing hierarchy. + */ + new_root = cgroup_root_from_opts(&opts); + if (IS_ERR(new_root)) { + ret = PTR_ERR(new_root); + goto out_err; } + opts.new_root = new_root; - sb = sget(fs_type, cgroup_test_super, cgroup_set_super, root); - + /* Locate an existing or new sb for this hierarchy */ + sb = sget(fs_type, cgroup_test_super, cgroup_set_super, &opts); if (IS_ERR(sb)) { - kfree(root); - return PTR_ERR(sb); + ret = PTR_ERR(sb); + cgroup_drop_root(opts.new_root); + goto out_err; } - if (sb->s_fs_info != root) { - /* Reusing an existing superblock */ - BUG_ON(sb->s_root == NULL); - kfree(root); - root = NULL; - } else { - /* New superblock */ + root = sb->s_fs_info; + BUG_ON(!root); + if (root == opts.new_root) { + /* We used the new root structure, so this is a new hierarchy */ + struct list_head tmp_cg_links; struct cgroup *root_cgrp = &root->top_cgroup; struct inode *inode; + struct cgroupfs_root *existing_root; int i; BUG_ON(sb->s_root != NULL); @@ -1105,6 +1334,18 @@ static int cgroup_get_sb(struct file_system_type *fs_type, mutex_lock(&inode->i_mutex); mutex_lock(&cgroup_mutex); + if (strlen(root->name)) { + /* Check for name clashes with existing mounts */ + for_each_active_root(existing_root) { + if (!strcmp(existing_root->name, root->name)) { + ret = -EBUSY; + mutex_unlock(&cgroup_mutex); + mutex_unlock(&inode->i_mutex); + goto drop_new_super; + } + } + } + /* * We're accessing css_set_count without locking * css_set_lock here, but that's OK - it can only be @@ -1123,7 +1364,8 @@ static int cgroup_get_sb(struct file_system_type *fs_type, if (ret == -EBUSY) { mutex_unlock(&cgroup_mutex); mutex_unlock(&inode->i_mutex); - goto free_cg_links; + free_cg_links(&tmp_cg_links); + goto drop_new_super; } /* EBUSY should be the only error here */ @@ -1155,17 +1397,27 @@ static int cgroup_get_sb(struct file_system_type *fs_type, BUG_ON(root->number_of_cgroups != 1); cgroup_populate_dir(root_cgrp); - mutex_unlock(&inode->i_mutex); mutex_unlock(&cgroup_mutex); + mutex_unlock(&inode->i_mutex); + } else { + /* + * We re-used an existing hierarchy - the new root (if + * any) is not needed + */ + cgroup_drop_root(opts.new_root); } simple_set_mnt(mnt, sb); + kfree(opts.release_agent); + kfree(opts.name); return 0; - free_cg_links: - free_cg_links(&tmp_cg_links); drop_new_super: deactivate_locked_super(sb); + out_err: + kfree(opts.release_agent); + kfree(opts.name); + return ret; } @@ -1211,7 +1463,7 @@ static void cgroup_kill_sb(struct super_block *sb) { mutex_unlock(&cgroup_mutex); kill_litter_super(sb); - kfree(root); + cgroup_drop_root(root); } static struct file_system_type cgroup_fs_type = { @@ -1276,27 +1528,6 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen) return 0; } -/* - * Return the first subsystem attached to a cgroup's hierarchy, and - * its subsystem id. - */ - -static void get_first_subsys(const struct cgroup *cgrp, - struct cgroup_subsys_state **css, int *subsys_id) -{ - const struct cgroupfs_root *root = cgrp->root; - const struct cgroup_subsys *test_ss; - BUG_ON(list_empty(&root->subsys_list)); - test_ss = list_entry(root->subsys_list.next, - struct cgroup_subsys, sibling); - if (css) { - *css = cgrp->subsys[test_ss->subsys_id]; - BUG_ON(!*css); - } - if (subsys_id) - *subsys_id = test_ss->subsys_id; -} - /** * cgroup_attach_task - attach task 'tsk' to cgroup 'cgrp' * @cgrp: the cgroup the task is attaching to @@ -1313,18 +1544,15 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) struct css_set *cg; struct css_set *newcg; struct cgroupfs_root *root = cgrp->root; - int subsys_id; - - get_first_subsys(cgrp, NULL, &subsys_id); /* Nothing to do if the task is already in that cgroup */ - oldcgrp = task_cgroup(tsk, subsys_id); + oldcgrp = task_cgroup_from_root(tsk, root); if (cgrp == oldcgrp) return 0; for_each_subsys(root, ss) { if (ss->can_attach) { - retval = ss->can_attach(ss, cgrp, tsk); + retval = ss->can_attach(ss, cgrp, tsk, false); if (retval) return retval; } @@ -1362,7 +1590,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) for_each_subsys(root, ss) { if (ss->attach) - ss->attach(ss, cgrp, oldcgrp, tsk); + ss->attach(ss, cgrp, oldcgrp, tsk, false); } set_bit(CGRP_RELEASABLE, &oldcgrp->flags); synchronize_rcu(); @@ -1423,15 +1651,6 @@ static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid) return ret; } -/* The various types of files and directories in a cgroup file system */ -enum cgroup_filetype { - FILE_ROOT, - FILE_DIR, - FILE_TASKLIST, - FILE_NOTIFY_ON_RELEASE, - FILE_RELEASE_AGENT, -}; - /** * cgroup_lock_live_group - take cgroup_mutex and check that cgrp is alive. * @cgrp: the cgroup to be checked for liveness @@ -1876,7 +2095,7 @@ int cgroup_task_count(const struct cgroup *cgrp) * the start of a css_set */ static void cgroup_advance_iter(struct cgroup *cgrp, - struct cgroup_iter *it) + struct cgroup_iter *it) { struct list_head *l = it->cg_link; struct cg_cgroup_link *link; @@ -2129,7 +2348,7 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan) } /* - * Stuff for reading the 'tasks' file. + * Stuff for reading the 'tasks'/'procs' files. * * Reading this file can return large amounts of data if a cgroup has * *lots* of attached tasks. So it may need several calls to read(), @@ -2139,27 +2358,196 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan) */ /* - * Load into 'pidarray' up to 'npids' of the tasks using cgroup - * 'cgrp'. Return actual number of pids loaded. No need to - * task_lock(p) when reading out p->cgroup, since we're in an RCU - * read section, so the css_set can't go away, and is - * immutable after creation. + * The following two functions "fix" the issue where there are more pids + * than kmalloc will give memory for; in such cases, we use vmalloc/vfree. + * TODO: replace with a kernel-wide solution to this problem + */ +#define PIDLIST_TOO_LARGE(c) ((c) * sizeof(pid_t) > (PAGE_SIZE * 2)) +static void *pidlist_allocate(int count) +{ + if (PIDLIST_TOO_LARGE(count)) + return vmalloc(count * sizeof(pid_t)); + else + return kmalloc(count * sizeof(pid_t), GFP_KERNEL); +} +static void pidlist_free(void *p) +{ + if (is_vmalloc_addr(p)) + vfree(p); + else + kfree(p); +} +static void *pidlist_resize(void *p, int newcount) +{ + void *newlist; + /* note: if new alloc fails, old p will still be valid either way */ + if (is_vmalloc_addr(p)) { + newlist = vmalloc(newcount * sizeof(pid_t)); + if (!newlist) + return NULL; + memcpy(newlist, p, newcount * sizeof(pid_t)); + vfree(p); + } else { + newlist = krealloc(p, newcount * sizeof(pid_t), GFP_KERNEL); + } + return newlist; +} + +/* + * pidlist_uniq - given a kmalloc()ed list, strip out all duplicate entries + * If the new stripped list is sufficiently smaller and there's enough memory + * to allocate a new buffer, will let go of the unneeded memory. Returns the + * number of unique elements. + */ +/* is the size difference enough that we should re-allocate the array? */ +#define PIDLIST_REALLOC_DIFFERENCE(old, new) ((old) - PAGE_SIZE >= (new)) +static int pidlist_uniq(pid_t **p, int length) +{ + int src, dest = 1; + pid_t *list = *p; + pid_t *newlist; + + /* + * we presume the 0th element is unique, so i starts at 1. trivial + * edge cases first; no work needs to be done for either + */ + if (length == 0 || length == 1) + return length; + /* src and dest walk down the list; dest counts unique elements */ + for (src = 1; src < length; src++) { + /* find next unique element */ + while (list[src] == list[src-1]) { + src++; + if (src == length) + goto after; + } + /* dest always points to where the next unique element goes */ + list[dest] = list[src]; + dest++; + } +after: + /* + * if the length difference is large enough, we want to allocate a + * smaller buffer to save memory. if this fails due to out of memory, + * we'll just stay with what we've got. + */ + if (PIDLIST_REALLOC_DIFFERENCE(length, dest)) { + newlist = pidlist_resize(list, dest); + if (newlist) + *p = newlist; + } + return dest; +} + +static int cmppid(const void *a, const void *b) +{ + return *(pid_t *)a - *(pid_t *)b; +} + +/* + * find the appropriate pidlist for our purpose (given procs vs tasks) + * returns with the lock on that pidlist already held, and takes care + * of the use count, or returns NULL with no locks held if we're out of + * memory. */ -static int pid_array_load(pid_t *pidarray, int npids, struct cgroup *cgrp) +static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp, + enum cgroup_filetype type) { - int n = 0, pid; + struct cgroup_pidlist *l; + /* don't need task_nsproxy() if we're looking at ourself */ + struct pid_namespace *ns = get_pid_ns(current->nsproxy->pid_ns); + /* + * We can't drop the pidlist_mutex before taking the l->mutex in case + * the last ref-holder is trying to remove l from the list at the same + * time. Holding the pidlist_mutex precludes somebody taking whichever + * list we find out from under us - compare release_pid_array(). + */ + mutex_lock(&cgrp->pidlist_mutex); + list_for_each_entry(l, &cgrp->pidlists, links) { + if (l->key.type == type && l->key.ns == ns) { + /* found a matching list - drop the extra refcount */ + put_pid_ns(ns); + /* make sure l doesn't vanish out from under us */ + down_write(&l->mutex); + mutex_unlock(&cgrp->pidlist_mutex); + l->use_count++; + return l; + } + } + /* entry not found; create a new one */ + l = kmalloc(sizeof(struct cgroup_pidlist), GFP_KERNEL); + if (!l) { + mutex_unlock(&cgrp->pidlist_mutex); + put_pid_ns(ns); + return l; + } + init_rwsem(&l->mutex); + down_write(&l->mutex); + l->key.type = type; + l->key.ns = ns; + l->use_count = 0; /* don't increment here */ + l->list = NULL; + l->owner = cgrp; + list_add(&l->links, &cgrp->pidlists); + mutex_unlock(&cgrp->pidlist_mutex); + return l; +} + +/* + * Load a cgroup's pidarray with either procs' tgids or tasks' pids + */ +static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type, + struct cgroup_pidlist **lp) +{ + pid_t *array; + int length; + int pid, n = 0; /* used for populating the array */ struct cgroup_iter it; struct task_struct *tsk; + struct cgroup_pidlist *l; + + /* + * If cgroup gets more users after we read count, we won't have + * enough space - tough. This race is indistinguishable to the + * caller from the case that the additional cgroup users didn't + * show up until sometime later on. + */ + length = cgroup_task_count(cgrp); + array = pidlist_allocate(length); + if (!array) + return -ENOMEM; + /* now, populate the array */ cgroup_iter_start(cgrp, &it); while ((tsk = cgroup_iter_next(cgrp, &it))) { - if (unlikely(n == npids)) + if (unlikely(n == length)) break; - pid = task_pid_vnr(tsk); - if (pid > 0) - pidarray[n++] = pid; + /* get tgid or pid for procs or tasks file respectively */ + if (type == CGROUP_FILE_PROCS) + pid = task_tgid_vnr(tsk); + else + pid = task_pid_vnr(tsk); + if (pid > 0) /* make sure to only use valid results */ + array[n++] = pid; } cgroup_iter_end(cgrp, &it); - return n; + length = n; + /* now sort & (if procs) strip out duplicates */ + sort(array, length, sizeof(pid_t), cmppid, NULL); + if (type == CGROUP_FILE_PROCS) + length = pidlist_uniq(&array, length); + l = cgroup_pidlist_find(cgrp, type); + if (!l) { + pidlist_free(array); + return -ENOMEM; + } + /* store array, freeing old if necessary - lock already held */ + pidlist_free(l->list); + l->list = array; + l->length = length; + l->use_count++; + up_write(&l->mutex); + *lp = l; + return 0; } /** @@ -2216,37 +2604,14 @@ err: return ret; } -/* - * Cache pids for all threads in the same pid namespace that are - * opening the same "tasks" file. - */ -struct cgroup_pids { - /* The node in cgrp->pids_list */ - struct list_head list; - /* The cgroup those pids belong to */ - struct cgroup *cgrp; - /* The namepsace those pids belong to */ - struct pid_namespace *ns; - /* Array of process ids in the cgroup */ - pid_t *tasks_pids; - /* How many files are using the this tasks_pids array */ - int use_count; - /* Length of the current tasks_pids array */ - int length; -}; - -static int cmppid(const void *a, const void *b) -{ - return *(pid_t *)a - *(pid_t *)b; -} /* - * seq_file methods for the "tasks" file. The seq_file position is the + * seq_file methods for the tasks/procs files. The seq_file position is the * next pid to display; the seq_file iterator is a pointer to the pid - * in the cgroup->tasks_pids array. + * in the cgroup->l->list array. */ -static void *cgroup_tasks_start(struct seq_file *s, loff_t *pos) +static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos) { /* * Initially we receive a position value that corresponds to @@ -2254,48 +2619,45 @@ static void *cgroup_tasks_start(struct seq_file *s, loff_t *pos) * after a seek to the start). Use a binary-search to find the * next pid to display, if any */ - struct cgroup_pids *cp = s->private; - struct cgroup *cgrp = cp->cgrp; + struct cgroup_pidlist *l = s->private; int index = 0, pid = *pos; int *iter; - down_read(&cgrp->pids_mutex); + down_read(&l->mutex); if (pid) { - int end = cp->length; + int end = l->length; while (index < end) { int mid = (index + end) / 2; - if (cp->tasks_pids[mid] == pid) { + if (l->list[mid] == pid) { index = mid; break; - } else if (cp->tasks_pids[mid] <= pid) + } else if (l->list[mid] <= pid) index = mid + 1; else end = mid; } } /* If we're off the end of the array, we're done */ - if (index >= cp->length) + if (index >= l->length) return NULL; /* Update the abstract position to be the actual pid that we found */ - iter = cp->tasks_pids + index; + iter = l->list + index; *pos = *iter; return iter; } -static void cgroup_tasks_stop(struct seq_file *s, void *v) +static void cgroup_pidlist_stop(struct seq_file *s, void *v) { - struct cgroup_pids *cp = s->private; - struct cgroup *cgrp = cp->cgrp; - up_read(&cgrp->pids_mutex); + struct cgroup_pidlist *l = s->private; + up_read(&l->mutex); } -static void *cgroup_tasks_next(struct seq_file *s, void *v, loff_t *pos) +static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos) { - struct cgroup_pids *cp = s->private; - int *p = v; - int *end = cp->tasks_pids + cp->length; - + struct cgroup_pidlist *l = s->private; + pid_t *p = v; + pid_t *end = l->list + l->length; /* * Advance to the next pid in the array. If this goes off the * end, we're done @@ -2309,124 +2671,107 @@ static void *cgroup_tasks_next(struct seq_file *s, void *v, loff_t *pos) } } -static int cgroup_tasks_show(struct seq_file *s, void *v) +static int cgroup_pidlist_show(struct seq_file *s, void *v) { return seq_printf(s, "%d\n", *(int *)v); } -static const struct seq_operations cgroup_tasks_seq_operations = { - .start = cgroup_tasks_start, - .stop = cgroup_tasks_stop, - .next = cgroup_tasks_next, - .show = cgroup_tasks_show, +/* + * seq_operations functions for iterating on pidlists through seq_file - + * independent of whether it's tasks or procs + */ +static const struct seq_operations cgroup_pidlist_seq_operations = { + .start = cgroup_pidlist_start, + .stop = cgroup_pidlist_stop, + .next = cgroup_pidlist_next, + .show = cgroup_pidlist_show, }; -static void release_cgroup_pid_array(struct cgroup_pids *cp) +static void cgroup_release_pid_array(struct cgroup_pidlist *l) { - struct cgroup *cgrp = cp->cgrp; - - down_write(&cgrp->pids_mutex); - BUG_ON(!cp->use_count); - if (!--cp->use_count) { - list_del(&cp->list); - put_pid_ns(cp->ns); - kfree(cp->tasks_pids); - kfree(cp); + /* + * the case where we're the last user of this particular pidlist will + * have us remove it from the cgroup's list, which entails taking the + * mutex. since in pidlist_find the pidlist->lock depends on cgroup-> + * pidlist_mutex, we have to take pidlist_mutex first. + */ + mutex_lock(&l->owner->pidlist_mutex); + down_write(&l->mutex); + BUG_ON(!l->use_count); + if (!--l->use_count) { + /* we're the last user if refcount is 0; remove and free */ + list_del(&l->links); + mutex_unlock(&l->owner->pidlist_mutex); + pidlist_free(l->list); + put_pid_ns(l->key.ns); + up_write(&l->mutex); + kfree(l); + return; } - up_write(&cgrp->pids_mutex); + mutex_unlock(&l->owner->pidlist_mutex); + up_write(&l->mutex); } -static int cgroup_tasks_release(struct inode *inode, struct file *file) +static int cgroup_pidlist_release(struct inode *inode, struct file *file) { - struct seq_file *seq; - struct cgroup_pids *cp; - + struct cgroup_pidlist *l; if (!(file->f_mode & FMODE_READ)) return 0; - - seq = file->private_data; - cp = seq->private; - - release_cgroup_pid_array(cp); + /* + * the seq_file will only be initialized if the file was opened for + * reading; hence we check if it's not null only in that case. + */ + l = ((struct seq_file *)file->private_data)->private; + cgroup_release_pid_array(l); return seq_release(inode, file); } -static struct file_operations cgroup_tasks_operations = { +static const struct file_operations cgroup_pidlist_operations = { .read = seq_read, .llseek = seq_lseek, .write = cgroup_file_write, - .release = cgroup_tasks_release, + .release = cgroup_pidlist_release, }; /* - * Handle an open on 'tasks' file. Prepare an array containing the - * process id's of tasks currently attached to the cgroup being opened. + * The following functions handle opens on a file that displays a pidlist + * (tasks or procs). Prepare an array of the process/thread IDs of whoever's + * in the cgroup. */ - -static int cgroup_tasks_open(struct inode *unused, struct file *file) +/* helper function for the two below it */ +static int cgroup_pidlist_open(struct file *file, enum cgroup_filetype type) { struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); - struct pid_namespace *ns = current->nsproxy->pid_ns; - struct cgroup_pids *cp; - pid_t *pidarray; - int npids; + struct cgroup_pidlist *l; int retval; /* Nothing to do for write-only files */ if (!(file->f_mode & FMODE_READ)) return 0; - /* - * If cgroup gets more users after we read count, we won't have - * enough space - tough. This race is indistinguishable to the - * caller from the case that the additional cgroup users didn't - * show up until sometime later on. - */ - npids = cgroup_task_count(cgrp); - pidarray = kmalloc(npids * sizeof(pid_t), GFP_KERNEL); - if (!pidarray) - return -ENOMEM; - npids = pid_array_load(pidarray, npids, cgrp); - sort(pidarray, npids, sizeof(pid_t), cmppid, NULL); - - /* - * Store the array in the cgroup, freeing the old - * array if necessary - */ - down_write(&cgrp->pids_mutex); - - list_for_each_entry(cp, &cgrp->pids_list, list) { - if (ns == cp->ns) - goto found; - } - - cp = kzalloc(sizeof(*cp), GFP_KERNEL); - if (!cp) { - up_write(&cgrp->pids_mutex); - kfree(pidarray); - return -ENOMEM; - } - cp->cgrp = cgrp; - cp->ns = ns; - get_pid_ns(ns); - list_add(&cp->list, &cgrp->pids_list); -found: - kfree(cp->tasks_pids); - cp->tasks_pids = pidarray; - cp->length = npids; - cp->use_count++; - up_write(&cgrp->pids_mutex); - - file->f_op = &cgroup_tasks_operations; + /* have the array populated */ + retval = pidlist_array_load(cgrp, type, &l); + if (retval) + return retval; + /* configure file information */ + file->f_op = &cgroup_pidlist_operations; - retval = seq_open(file, &cgroup_tasks_seq_operations); + retval = seq_open(file, &cgroup_pidlist_seq_operations); if (retval) { - release_cgroup_pid_array(cp); + cgroup_release_pid_array(l); return retval; } - ((struct seq_file *)file->private_data)->private = cp; + ((struct seq_file *)file->private_data)->private = l; return 0; } +static int cgroup_tasks_open(struct inode *unused, struct file *file) +{ + return cgroup_pidlist_open(file, CGROUP_FILE_TASKS); +} +static int cgroup_procs_open(struct inode *unused, struct file *file) +{ + return cgroup_pidlist_open(file, CGROUP_FILE_PROCS); +} static u64 cgroup_read_notify_on_release(struct cgroup *cgrp, struct cftype *cft) @@ -2449,21 +2794,27 @@ static int cgroup_write_notify_on_release(struct cgroup *cgrp, /* * for the common functions, 'private' gives the type of file */ +/* for hysterical raisins, we can't put this on the older files */ +#define CGROUP_FILE_GENERIC_PREFIX "cgroup." static struct cftype files[] = { { .name = "tasks", .open = cgroup_tasks_open, .write_u64 = cgroup_tasks_write, - .release = cgroup_tasks_release, - .private = FILE_TASKLIST, + .release = cgroup_pidlist_release, .mode = S_IRUGO | S_IWUSR, }, - + { + .name = CGROUP_FILE_GENERIC_PREFIX "procs", + .open = cgroup_procs_open, + /* .write_u64 = cgroup_procs_write, TODO */ + .release = cgroup_pidlist_release, + .mode = S_IRUGO, + }, { .name = "notify_on_release", .read_u64 = cgroup_read_notify_on_release, .write_u64 = cgroup_write_notify_on_release, - .private = FILE_NOTIFY_ON_RELEASE, }, }; @@ -2472,7 +2823,6 @@ static struct cftype cft_release_agent = { .read_seq_string = cgroup_release_agent_show, .write_string = cgroup_release_agent_write, .max_write_len = PATH_MAX, - .private = FILE_RELEASE_AGENT, }; static int cgroup_populate_dir(struct cgroup *cgrp) @@ -2879,6 +3229,7 @@ int __init cgroup_init_early(void) init_task.cgroups = &init_css_set; init_css_set_link.cg = &init_css_set; + init_css_set_link.cgrp = dummytop; list_add(&init_css_set_link.cgrp_link_list, &rootnode.top_cgroup.css_sets); list_add(&init_css_set_link.cg_link_list, @@ -2933,7 +3284,7 @@ int __init cgroup_init(void) /* Add init_css_set to the hash table */ hhead = css_set_hash(init_css_set.subsys); hlist_add_head(&init_css_set.hlist, hhead); - + BUG_ON(!init_root_id(&rootnode)); err = register_filesystem(&cgroup_fs_type); if (err < 0) goto out; @@ -2986,15 +3337,16 @@ static int proc_cgroup_show(struct seq_file *m, void *v) for_each_active_root(root) { struct cgroup_subsys *ss; struct cgroup *cgrp; - int subsys_id; int count = 0; - seq_printf(m, "%lu:", root->subsys_bits); + seq_printf(m, "%d:", root->hierarchy_id); for_each_subsys(root, ss) seq_printf(m, "%s%s", count++ ? "," : "", ss->name); + if (strlen(root->name)) + seq_printf(m, "%sname=%s", count ? "," : "", + root->name); seq_putc(m, ':'); - get_first_subsys(&root->top_cgroup, NULL, &subsys_id); - cgrp = task_cgroup(tsk, subsys_id); + cgrp = task_cgroup_from_root(tsk, root); retval = cgroup_path(cgrp, buf, PAGE_SIZE); if (retval < 0) goto out_unlock; @@ -3033,8 +3385,8 @@ static int proc_cgroupstats_show(struct seq_file *m, void *v) mutex_lock(&cgroup_mutex); for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { struct cgroup_subsys *ss = subsys[i]; - seq_printf(m, "%s\t%lu\t%d\t%d\n", - ss->name, ss->root->subsys_bits, + seq_printf(m, "%s\t%d\t%d\t%d\n", + ss->name, ss->root->hierarchy_id, ss->root->number_of_cgroups, !ss->disabled); } mutex_unlock(&cgroup_mutex); @@ -3320,13 +3672,11 @@ int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task) { int ret; struct cgroup *target; - int subsys_id; if (cgrp == dummytop) return 1; - get_first_subsys(cgrp, NULL, &subsys_id); - target = task_cgroup(task, subsys_id); + target = task_cgroup_from_root(task, cgrp->root); while (cgrp != target && cgrp!= cgrp->top_cgroup) cgrp = cgrp->parent; ret = (cgrp == target); @@ -3693,3 +4043,154 @@ css_get_next(struct cgroup_subsys *ss, int id, return ret; } +#ifdef CONFIG_CGROUP_DEBUG +static struct cgroup_subsys_state *debug_create(struct cgroup_subsys *ss, + struct cgroup *cont) +{ + struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL); + + if (!css) + return ERR_PTR(-ENOMEM); + + return css; +} + +static void debug_destroy(struct cgroup_subsys *ss, struct cgroup *cont) +{ + kfree(cont->subsys[debug_subsys_id]); +} + +static u64 cgroup_refcount_read(struct cgroup *cont, struct cftype *cft) +{ + return atomic_read(&cont->count); +} + +static u64 debug_taskcount_read(struct cgroup *cont, struct cftype *cft) +{ + return cgroup_task_count(cont); +} + +static u64 current_css_set_read(struct cgroup *cont, struct cftype *cft) +{ + return (u64)(unsigned long)current->cgroups; +} + +static u64 current_css_set_refcount_read(struct cgroup *cont, + struct cftype *cft) +{ + u64 count; + + rcu_read_lock(); + count = atomic_read(¤t->cgroups->refcount); + rcu_read_unlock(); + return count; +} + +static int current_css_set_cg_links_read(struct cgroup *cont, + struct cftype *cft, + struct seq_file *seq) +{ + struct cg_cgroup_link *link; + struct css_set *cg; + + read_lock(&css_set_lock); + rcu_read_lock(); + cg = rcu_dereference(current->cgroups); + list_for_each_entry(link, &cg->cg_links, cg_link_list) { + struct cgroup *c = link->cgrp; + const char *name; + + if (c->dentry) + name = c->dentry->d_name.name; + else + name = "?"; + seq_printf(seq, "Root %d group %s\n", + c->root->hierarchy_id, name); + } + rcu_read_unlock(); + read_unlock(&css_set_lock); + return 0; +} + +#define MAX_TASKS_SHOWN_PER_CSS 25 +static int cgroup_css_links_read(struct cgroup *cont, + struct cftype *cft, + struct seq_file *seq) +{ + struct cg_cgroup_link *link; + + read_lock(&css_set_lock); + list_for_each_entry(link, &cont->css_sets, cgrp_link_list) { + struct css_set *cg = link->cg; + struct task_struct *task; + int count = 0; + seq_printf(seq, "css_set %p\n", cg); + list_for_each_entry(task, &cg->tasks, cg_list) { + if (count++ > MAX_TASKS_SHOWN_PER_CSS) { + seq_puts(seq, " ...\n"); + break; + } else { + seq_printf(seq, " task %d\n", + task_pid_vnr(task)); + } + } + } + read_unlock(&css_set_lock); + return 0; +} + +static u64 releasable_read(struct cgroup *cgrp, struct cftype *cft) +{ + return test_bit(CGRP_RELEASABLE, &cgrp->flags); +} + +static struct cftype debug_files[] = { + { + .name = "cgroup_refcount", + .read_u64 = cgroup_refcount_read, + }, + { + .name = "taskcount", + .read_u64 = debug_taskcount_read, + }, + + { + .name = "current_css_set", + .read_u64 = current_css_set_read, + }, + + { + .name = "current_css_set_refcount", + .read_u64 = current_css_set_refcount_read, + }, + + { + .name = "current_css_set_cg_links", + .read_seq_string = current_css_set_cg_links_read, + }, + + { + .name = "cgroup_css_links", + .read_seq_string = cgroup_css_links_read, + }, + + { + .name = "releasable", + .read_u64 = releasable_read, + }, +}; + +static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont) +{ + return cgroup_add_files(cont, ss, debug_files, + ARRAY_SIZE(debug_files)); +} + +struct cgroup_subsys debug_subsys = { + .name = "debug", + .create = debug_create, + .destroy = debug_destroy, + .populate = debug_populate, + .subsys_id = debug_subsys_id, +}; +#endif /* CONFIG_CGROUP_DEBUG */ diff --git a/kernel/cgroup_debug.c b/kernel/cgroup_debug.c deleted file mode 100644 index 0c92d797baa..00000000000 --- a/kernel/cgroup_debug.c +++ /dev/null @@ -1,105 +0,0 @@ -/* - * kernel/cgroup_debug.c - Example cgroup subsystem that - * exposes debug info - * - * Copyright (C) Google Inc, 2007 - * - * Developed by Paul Menage (menage@google.com) - * - */ - -#include <linux/cgroup.h> -#include <linux/fs.h> -#include <linux/slab.h> -#include <linux/rcupdate.h> - -#include <asm/atomic.h> - -static struct cgroup_subsys_state *debug_create(struct cgroup_subsys *ss, - struct cgroup *cont) -{ - struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL); - - if (!css) - return ERR_PTR(-ENOMEM); - - return css; -} - -static void debug_destroy(struct cgroup_subsys *ss, struct cgroup *cont) -{ - kfree(cont->subsys[debug_subsys_id]); -} - -static u64 cgroup_refcount_read(struct cgroup *cont, struct cftype *cft) -{ - return atomic_read(&cont->count); -} - -static u64 taskcount_read(struct cgroup *cont, struct cftype *cft) -{ - u64 count; - - count = cgroup_task_count(cont); - return count; -} - -static u64 current_css_set_read(struct cgroup *cont, struct cftype *cft) -{ - return (u64)(long)current->cgroups; -} - -static u64 current_css_set_refcount_read(struct cgroup *cont, - struct cftype *cft) -{ - u64 count; - - rcu_read_lock(); - count = atomic_read(¤t->cgroups->refcount); - rcu_read_unlock(); - return count; -} - -static u64 releasable_read(struct cgroup *cgrp, struct cftype *cft) -{ - return test_bit(CGRP_RELEASABLE, &cgrp->flags); -} - -static struct cftype files[] = { - { - .name = "cgroup_refcount", - .read_u64 = cgroup_refcount_read, - }, - { - .name = "taskcount", - .read_u64 = taskcount_read, - }, - - { - .name = "current_css_set", - .read_u64 = current_css_set_read, - }, - - { - .name = "current_css_set_refcount", - .read_u64 = current_css_set_refcount_read, - }, - - { - .name = "releasable", - .read_u64 = releasable_read, - }, -}; - -static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont) -{ - return cgroup_add_files(cont, ss, files, ARRAY_SIZE(files)); -} - -struct cgroup_subsys debug_subsys = { - .name = "debug", - .create = debug_create, - .destroy = debug_destroy, - .populate = debug_populate, - .subsys_id = debug_subsys_id, -}; diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index fb249e2bcad..59e9ef6aab4 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -159,7 +159,7 @@ static bool is_task_frozen_enough(struct task_struct *task) */ static int freezer_can_attach(struct cgroup_subsys *ss, struct cgroup *new_cgroup, - struct task_struct *task) + struct task_struct *task, bool threadgroup) { struct freezer *freezer; @@ -177,6 +177,19 @@ static int freezer_can_attach(struct cgroup_subsys *ss, if (freezer->state == CGROUP_FROZEN) return -EBUSY; + if (threadgroup) { + struct task_struct *c; + + rcu_read_lock(); + list_for_each_entry_rcu(c, &task->thread_group, thread_group) { + if (is_task_frozen_enough(c)) { + rcu_read_unlock(); + return -EBUSY; + } + } + rcu_read_unlock(); + } + return 0; } diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 7e75a41bd50..b5cb469d254 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1324,9 +1324,10 @@ static int fmeter_getrate(struct fmeter *fmp) static cpumask_var_t cpus_attach; /* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */ -static int cpuset_can_attach(struct cgroup_subsys *ss, - struct cgroup *cont, struct task_struct *tsk) +static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont, + struct task_struct *tsk, bool threadgroup) { + int ret; struct cpuset *cs = cgroup_cs(cont); if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) @@ -1343,18 +1344,51 @@ static int cpuset_can_attach(struct cgroup_subsys *ss, if (tsk->flags & PF_THREAD_BOUND) return -EINVAL; - return security_task_setscheduler(tsk, 0, NULL); + ret = security_task_setscheduler(tsk, 0, NULL); + if (ret) + return ret; + if (threadgroup) { + struct task_struct *c; + + rcu_read_lock(); + list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) { + ret = security_task_setscheduler(c, 0, NULL); + if (ret) { + rcu_read_unlock(); + return ret; + } + } + rcu_read_unlock(); + } + return 0; +} + +static void cpuset_attach_task(struct task_struct *tsk, nodemask_t *to, + struct cpuset *cs) +{ + int err; + /* + * can_attach beforehand should guarantee that this doesn't fail. + * TODO: have a better way to handle failure here + */ + err = set_cpus_allowed_ptr(tsk, cpus_attach); + WARN_ON_ONCE(err); + + task_lock(tsk); + cpuset_change_task_nodemask(tsk, to); + task_unlock(tsk); + cpuset_update_task_spread_flag(cs, tsk); + } -static void cpuset_attach(struct cgroup_subsys *ss, - struct cgroup *cont, struct cgroup *oldcont, - struct task_struct *tsk) +static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont, + struct cgroup *oldcont, struct task_struct *tsk, + bool threadgroup) { nodemask_t from, to; struct mm_struct *mm; struct cpuset *cs = cgroup_cs(cont); struct cpuset *oldcs = cgroup_cs(oldcont); - int err; if (cs == &top_cpuset) { cpumask_copy(cpus_attach, cpu_possible_mask); @@ -1363,15 +1397,19 @@ static void cpuset_attach(struct cgroup_subsys *ss, guarantee_online_cpus(cs, cpus_attach); guarantee_online_mems(cs, &to); } - err = set_cpus_allowed_ptr(tsk, cpus_attach); - if (err) - return; - task_lock(tsk); - cpuset_change_task_nodemask(tsk, &to); - task_unlock(tsk); - cpuset_update_task_spread_flag(cs, tsk); + /* do per-task migration stuff possibly for each in the threadgroup */ + cpuset_attach_task(tsk, &to, cs); + if (threadgroup) { + struct task_struct *c; + rcu_read_lock(); + list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) { + cpuset_attach_task(c, &to, cs); + } + rcu_read_unlock(); + } + /* change mm; only needs to be done once even if threadgroup */ from = oldcs->mems_allowed; to = cs->mems_allowed; mm = get_task_mm(tsk); diff --git a/kernel/exit.c b/kernel/exit.c index 60d6fdcc926..5859f598c95 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -976,8 +976,6 @@ NORET_TYPE void do_exit(long code) disassociate_ctty(1); module_put(task_thread_info(tsk)->exec_domain->module); - if (tsk->binfmt) - module_put(tsk->binfmt->module); proc_exit_connector(tsk); @@ -1097,28 +1095,28 @@ struct wait_opts { int __user *wo_stat; struct rusage __user *wo_rusage; + wait_queue_t child_wait; int notask_error; }; -static struct pid *task_pid_type(struct task_struct *task, enum pid_type type) +static inline +struct pid *task_pid_type(struct task_struct *task, enum pid_type type) { - struct pid *pid = NULL; - if (type == PIDTYPE_PID) - pid = task->pids[type].pid; - else if (type < PIDTYPE_MAX) - pid = task->group_leader->pids[type].pid; - return pid; + if (type != PIDTYPE_PID) + task = task->group_leader; + return task->pids[type].pid; } -static int eligible_child(struct wait_opts *wo, struct task_struct *p) +static int eligible_pid(struct wait_opts *wo, struct task_struct *p) { - int err; - - if (wo->wo_type < PIDTYPE_MAX) { - if (task_pid_type(p, wo->wo_type) != wo->wo_pid) - return 0; - } + return wo->wo_type == PIDTYPE_MAX || + task_pid_type(p, wo->wo_type) == wo->wo_pid; +} +static int eligible_child(struct wait_opts *wo, struct task_struct *p) +{ + if (!eligible_pid(wo, p)) + return 0; /* Wait for all children (clone and not) if __WALL is set; * otherwise, wait for clone children *only* if __WCLONE is * set; otherwise, wait for non-clone children *only*. (Note: @@ -1128,10 +1126,6 @@ static int eligible_child(struct wait_opts *wo, struct task_struct *p) && !(wo->wo_flags & __WALL)) return 0; - err = security_task_wait(p); - if (err) - return err; - return 1; } @@ -1144,18 +1138,20 @@ static int wait_noreap_copyout(struct wait_opts *wo, struct task_struct *p, put_task_struct(p); infop = wo->wo_info; - if (!retval) - retval = put_user(SIGCHLD, &infop->si_signo); - if (!retval) - retval = put_user(0, &infop->si_errno); - if (!retval) - retval = put_user((short)why, &infop->si_code); - if (!retval) - retval = put_user(pid, &infop->si_pid); - if (!retval) - retval = put_user(uid, &infop->si_uid); - if (!retval) - retval = put_user(status, &infop->si_status); + if (infop) { + if (!retval) + retval = put_user(SIGCHLD, &infop->si_signo); + if (!retval) + retval = put_user(0, &infop->si_errno); + if (!retval) + retval = put_user((short)why, &infop->si_code); + if (!retval) + retval = put_user(pid, &infop->si_pid); + if (!retval) + retval = put_user(uid, &infop->si_uid); + if (!retval) + retval = put_user(status, &infop->si_status); + } if (!retval) retval = pid; return retval; @@ -1485,13 +1481,14 @@ static int wait_task_continued(struct wait_opts *wo, struct task_struct *p) * then ->notask_error is 0 if @p is an eligible child, * or another error from security_task_wait(), or still -ECHILD. */ -static int wait_consider_task(struct wait_opts *wo, struct task_struct *parent, - int ptrace, struct task_struct *p) +static int wait_consider_task(struct wait_opts *wo, int ptrace, + struct task_struct *p) { int ret = eligible_child(wo, p); if (!ret) return ret; + ret = security_task_wait(p); if (unlikely(ret < 0)) { /* * If we have not yet seen any eligible child, @@ -1553,7 +1550,7 @@ static int do_wait_thread(struct wait_opts *wo, struct task_struct *tsk) * Do not consider detached threads. */ if (!task_detached(p)) { - int ret = wait_consider_task(wo, tsk, 0, p); + int ret = wait_consider_task(wo, 0, p); if (ret) return ret; } @@ -1567,7 +1564,7 @@ static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk) struct task_struct *p; list_for_each_entry(p, &tsk->ptraced, ptrace_entry) { - int ret = wait_consider_task(wo, tsk, 1, p); + int ret = wait_consider_task(wo, 1, p); if (ret) return ret; } @@ -1575,15 +1572,38 @@ static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk) return 0; } +static int child_wait_callback(wait_queue_t *wait, unsigned mode, + int sync, void *key) +{ + struct wait_opts *wo = container_of(wait, struct wait_opts, + child_wait); + struct task_struct *p = key; + + if (!eligible_pid(wo, p)) + return 0; + + if ((wo->wo_flags & __WNOTHREAD) && wait->private != p->parent) + return 0; + + return default_wake_function(wait, mode, sync, key); +} + +void __wake_up_parent(struct task_struct *p, struct task_struct *parent) +{ + __wake_up_sync_key(&parent->signal->wait_chldexit, + TASK_INTERRUPTIBLE, 1, p); +} + static long do_wait(struct wait_opts *wo) { - DECLARE_WAITQUEUE(wait, current); struct task_struct *tsk; int retval; trace_sched_process_wait(wo->wo_pid); - add_wait_queue(¤t->signal->wait_chldexit,&wait); + init_waitqueue_func_entry(&wo->child_wait, child_wait_callback); + wo->child_wait.private = current; + add_wait_queue(¤t->signal->wait_chldexit, &wo->child_wait); repeat: /* * If there is nothing that can match our critiera just get out. @@ -1624,32 +1644,7 @@ notask: } end: __set_current_state(TASK_RUNNING); - remove_wait_queue(¤t->signal->wait_chldexit,&wait); - if (wo->wo_info) { - struct siginfo __user *infop = wo->wo_info; - - if (retval > 0) - retval = 0; - else { - /* - * For a WNOHANG return, clear out all the fields - * we would set so the user can easily tell the - * difference. - */ - if (!retval) - retval = put_user(0, &infop->si_signo); - if (!retval) - retval = put_user(0, &infop->si_errno); - if (!retval) - retval = put_user(0, &infop->si_code); - if (!retval) - retval = put_user(0, &infop->si_pid); - if (!retval) - retval = put_user(0, &infop->si_uid); - if (!retval) - retval = put_user(0, &infop->si_status); - } - } + remove_wait_queue(¤t->signal->wait_chldexit, &wo->child_wait); return retval; } @@ -1694,6 +1689,29 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *, wo.wo_stat = NULL; wo.wo_rusage = ru; ret = do_wait(&wo); + + if (ret > 0) { + ret = 0; + } else if (infop) { + /* + * For a WNOHANG return, clear out all the fields + * we would set so the user can easily tell the + * difference. + */ + if (!ret) + ret = put_user(0, &infop->si_signo); + if (!ret) + ret = put_user(0, &infop->si_errno); + if (!ret) + ret = put_user(0, &infop->si_code); + if (!ret) + ret = put_user(0, &infop->si_pid); + if (!ret) + ret = put_user(0, &infop->si_uid); + if (!ret) + ret = put_user(0, &infop->si_status); + } + put_pid(pid); /* avoid REGPARM breakage on x86: */ diff --git a/kernel/fork.c b/kernel/fork.c index 51ad0b0b726..266c6af6ef1 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -434,6 +434,14 @@ __setup("coredump_filter=", coredump_filter_setup); #include <linux/init_task.h> +static void mm_init_aio(struct mm_struct *mm) +{ +#ifdef CONFIG_AIO + spin_lock_init(&mm->ioctx_lock); + INIT_HLIST_HEAD(&mm->ioctx_list); +#endif +} + static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) { atomic_set(&mm->mm_users, 1); @@ -447,10 +455,9 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) set_mm_counter(mm, file_rss, 0); set_mm_counter(mm, anon_rss, 0); spin_lock_init(&mm->page_table_lock); - spin_lock_init(&mm->ioctx_lock); - INIT_HLIST_HEAD(&mm->ioctx_list); mm->free_area_cache = TASK_UNMAPPED_BASE; mm->cached_hole_size = ~0UL; + mm_init_aio(mm); mm_init_owner(mm, p); if (likely(!mm_alloc_pgd(mm))) { @@ -511,6 +518,8 @@ void mmput(struct mm_struct *mm) spin_unlock(&mmlist_lock); } put_swap_token(mm); + if (mm->binfmt) + module_put(mm->binfmt->module); mmdrop(mm); } } @@ -636,9 +645,14 @@ struct mm_struct *dup_mm(struct task_struct *tsk) mm->hiwater_rss = get_mm_rss(mm); mm->hiwater_vm = mm->total_vm; + if (mm->binfmt && !try_module_get(mm->binfmt->module)) + goto free_pt; + return mm; free_pt: + /* don't put binfmt in mmput, we haven't got module yet */ + mm->binfmt = NULL; mmput(mm); fail_nomem: @@ -979,6 +993,16 @@ static struct task_struct *copy_process(unsigned long clone_flags, if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) return ERR_PTR(-EINVAL); + /* + * Siblings of global init remain as zombies on exit since they are + * not reaped by their parent (swapper). To solve this and to avoid + * multi-rooted process trees, prevent global and container-inits + * from creating siblings. + */ + if ((clone_flags & CLONE_PARENT) && + current->signal->flags & SIGNAL_UNKILLABLE) + return ERR_PTR(-EINVAL); + retval = security_task_create(clone_flags); if (retval) goto fork_out; @@ -1020,9 +1044,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, if (!try_module_get(task_thread_info(p)->exec_domain->module)) goto bad_fork_cleanup_count; - if (p->binfmt && !try_module_get(p->binfmt->module)) - goto bad_fork_cleanup_put_domain; - p->did_exec = 0; delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ copy_flags(clone_flags, p); @@ -1310,9 +1331,6 @@ bad_fork_cleanup_cgroup: #endif cgroup_exit(p, cgroup_callbacks_done); delayacct_tsk_free(p); - if (p->binfmt) - module_put(p->binfmt->module); -bad_fork_cleanup_put_domain: module_put(task_thread_info(p)->exec_domain->module); bad_fork_cleanup_count: atomic_dec(&p->cred->user->processes); diff --git a/kernel/gcov/Kconfig b/kernel/gcov/Kconfig index 654efd09f6a..70a298d6da7 100644 --- a/kernel/gcov/Kconfig +++ b/kernel/gcov/Kconfig @@ -34,7 +34,7 @@ config GCOV_KERNEL config GCOV_PROFILE_ALL bool "Profile entire Kernel" depends on GCOV_KERNEL - depends on S390 || X86 || (PPC && EXPERIMENTAL) + depends on S390 || X86 || (PPC && EXPERIMENTAL) || MICROBLAZE default n ---help--- This options activates profiling for the entire kernel. diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 022a4927b78..d4e84174740 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -171,12 +171,12 @@ static unsigned long timeout_jiffies(unsigned long timeout) * Process updating of timeout sysctl */ int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, + void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; - ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos); + ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos); if (ret || !write) goto out; diff --git a/kernel/module.c b/kernel/module.c index e6bc4b28aa6..5a29397ca4b 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1797,6 +1797,17 @@ static void setup_modinfo(struct module *mod, Elf_Shdr *sechdrs, } } +static void free_modinfo(struct module *mod) +{ + struct module_attribute *attr; + int i; + + for (i = 0; (attr = modinfo_attrs[i]); i++) { + if (attr->free) + attr->free(mod); + } +} + #ifdef CONFIG_KALLSYMS /* lookup symbol in given range of kernel_symbols */ @@ -1862,13 +1873,93 @@ static char elf_type(const Elf_Sym *sym, return '?'; } +static bool is_core_symbol(const Elf_Sym *src, const Elf_Shdr *sechdrs, + unsigned int shnum) +{ + const Elf_Shdr *sec; + + if (src->st_shndx == SHN_UNDEF + || src->st_shndx >= shnum + || !src->st_name) + return false; + + sec = sechdrs + src->st_shndx; + if (!(sec->sh_flags & SHF_ALLOC) +#ifndef CONFIG_KALLSYMS_ALL + || !(sec->sh_flags & SHF_EXECINSTR) +#endif + || (sec->sh_entsize & INIT_OFFSET_MASK)) + return false; + + return true; +} + +static unsigned long layout_symtab(struct module *mod, + Elf_Shdr *sechdrs, + unsigned int symindex, + unsigned int strindex, + const Elf_Ehdr *hdr, + const char *secstrings, + unsigned long *pstroffs, + unsigned long *strmap) +{ + unsigned long symoffs; + Elf_Shdr *symsect = sechdrs + symindex; + Elf_Shdr *strsect = sechdrs + strindex; + const Elf_Sym *src; + const char *strtab; + unsigned int i, nsrc, ndst; + + /* Put symbol section at end of init part of module. */ + symsect->sh_flags |= SHF_ALLOC; + symsect->sh_entsize = get_offset(mod, &mod->init_size, symsect, + symindex) | INIT_OFFSET_MASK; + DEBUGP("\t%s\n", secstrings + symsect->sh_name); + + src = (void *)hdr + symsect->sh_offset; + nsrc = symsect->sh_size / sizeof(*src); + strtab = (void *)hdr + strsect->sh_offset; + for (ndst = i = 1; i < nsrc; ++i, ++src) + if (is_core_symbol(src, sechdrs, hdr->e_shnum)) { + unsigned int j = src->st_name; + + while(!__test_and_set_bit(j, strmap) && strtab[j]) + ++j; + ++ndst; + } + + /* Append room for core symbols at end of core part. */ + symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1); + mod->core_size = symoffs + ndst * sizeof(Elf_Sym); + + /* Put string table section at end of init part of module. */ + strsect->sh_flags |= SHF_ALLOC; + strsect->sh_entsize = get_offset(mod, &mod->init_size, strsect, + strindex) | INIT_OFFSET_MASK; + DEBUGP("\t%s\n", secstrings + strsect->sh_name); + + /* Append room for core symbols' strings at end of core part. */ + *pstroffs = mod->core_size; + __set_bit(0, strmap); + mod->core_size += bitmap_weight(strmap, strsect->sh_size); + + return symoffs; +} + static void add_kallsyms(struct module *mod, Elf_Shdr *sechdrs, + unsigned int shnum, unsigned int symindex, unsigned int strindex, - const char *secstrings) + unsigned long symoffs, + unsigned long stroffs, + const char *secstrings, + unsigned long *strmap) { - unsigned int i; + unsigned int i, ndst; + const Elf_Sym *src; + Elf_Sym *dst; + char *s; mod->symtab = (void *)sechdrs[symindex].sh_addr; mod->num_symtab = sechdrs[symindex].sh_size / sizeof(Elf_Sym); @@ -1878,13 +1969,44 @@ static void add_kallsyms(struct module *mod, for (i = 0; i < mod->num_symtab; i++) mod->symtab[i].st_info = elf_type(&mod->symtab[i], sechdrs, secstrings, mod); + + mod->core_symtab = dst = mod->module_core + symoffs; + src = mod->symtab; + *dst = *src; + for (ndst = i = 1; i < mod->num_symtab; ++i, ++src) { + if (!is_core_symbol(src, sechdrs, shnum)) + continue; + dst[ndst] = *src; + dst[ndst].st_name = bitmap_weight(strmap, dst[ndst].st_name); + ++ndst; + } + mod->core_num_syms = ndst; + + mod->core_strtab = s = mod->module_core + stroffs; + for (*s = 0, i = 1; i < sechdrs[strindex].sh_size; ++i) + if (test_bit(i, strmap)) + *++s = mod->strtab[i]; } #else +static inline unsigned long layout_symtab(struct module *mod, + Elf_Shdr *sechdrs, + unsigned int symindex, + unsigned int strindex, + const Elf_Hdr *hdr, + const char *secstrings, + unsigned long *pstroffs, + unsigned long *strmap) +{ +} static inline void add_kallsyms(struct module *mod, Elf_Shdr *sechdrs, + unsigned int shnum, unsigned int symindex, unsigned int strindex, - const char *secstrings) + unsigned long symoffs, + unsigned long stroffs, + const char *secstrings, + const unsigned long *strmap) { } #endif /* CONFIG_KALLSYMS */ @@ -1959,6 +2081,9 @@ static noinline struct module *load_module(void __user *umod, struct module *mod; long err = 0; void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ +#ifdef CONFIG_KALLSYMS + unsigned long symoffs, stroffs, *strmap; +#endif mm_segment_t old_fs; DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n", @@ -2040,11 +2165,6 @@ static noinline struct module *load_module(void __user *umod, /* Don't keep modinfo and version sections. */ sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC; sechdrs[versindex].sh_flags &= ~(unsigned long)SHF_ALLOC; -#ifdef CONFIG_KALLSYMS - /* Keep symbol and string tables for decoding later. */ - sechdrs[symindex].sh_flags |= SHF_ALLOC; - sechdrs[strindex].sh_flags |= SHF_ALLOC; -#endif /* Check module struct version now, before we try to use module. */ if (!check_modstruct_version(sechdrs, versindex, mod)) { @@ -2080,6 +2200,13 @@ static noinline struct module *load_module(void __user *umod, goto free_hdr; } + strmap = kzalloc(BITS_TO_LONGS(sechdrs[strindex].sh_size) + * sizeof(long), GFP_KERNEL); + if (!strmap) { + err = -ENOMEM; + goto free_mod; + } + if (find_module(mod->name)) { err = -EEXIST; goto free_mod; @@ -2109,6 +2236,8 @@ static noinline struct module *load_module(void __user *umod, this is done generically; there doesn't appear to be any special cases for the architectures. */ layout_sections(mod, hdr, sechdrs, secstrings); + symoffs = layout_symtab(mod, sechdrs, symindex, strindex, hdr, + secstrings, &stroffs, strmap); /* Do the allocs. */ ptr = module_alloc_update_bounds(mod->core_size); @@ -2313,7 +2442,10 @@ static noinline struct module *load_module(void __user *umod, percpu_modcopy(mod->percpu, (void *)sechdrs[pcpuindex].sh_addr, sechdrs[pcpuindex].sh_size); - add_kallsyms(mod, sechdrs, symindex, strindex, secstrings); + add_kallsyms(mod, sechdrs, hdr->e_shnum, symindex, strindex, + symoffs, stroffs, secstrings, strmap); + kfree(strmap); + strmap = NULL; if (!mod->taints) { struct _ddebug *debug; @@ -2385,13 +2517,14 @@ static noinline struct module *load_module(void __user *umod, synchronize_sched(); module_arch_cleanup(mod); cleanup: + free_modinfo(mod); kobject_del(&mod->mkobj.kobj); kobject_put(&mod->mkobj.kobj); free_unload: module_unload_free(mod); #if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) - free_init: percpu_modfree(mod->refptr); + free_init: #endif module_free(mod, mod->module_init); free_core: @@ -2402,6 +2535,7 @@ static noinline struct module *load_module(void __user *umod, percpu_modfree(percpu); free_mod: kfree(args); + kfree(strmap); free_hdr: vfree(hdr); return ERR_PTR(err); @@ -2491,6 +2625,11 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, /* Drop initial reference. */ module_put(mod); trim_init_extable(mod); +#ifdef CONFIG_KALLSYMS + mod->num_symtab = mod->core_num_syms; + mod->symtab = mod->core_symtab; + mod->strtab = mod->core_strtab; +#endif module_free(mod, mod->module_init); mod->module_init = NULL; mod->init_size = 0; diff --git a/kernel/ns_cgroup.c b/kernel/ns_cgroup.c index 5aa854f9e5a..2a5dfec8efe 100644 --- a/kernel/ns_cgroup.c +++ b/kernel/ns_cgroup.c @@ -42,8 +42,8 @@ int ns_cgroup_clone(struct task_struct *task, struct pid *pid) * (hence either you are in the same cgroup as task, or in an * ancestor cgroup thereof) */ -static int ns_can_attach(struct cgroup_subsys *ss, - struct cgroup *new_cgroup, struct task_struct *task) +static int ns_can_attach(struct cgroup_subsys *ss, struct cgroup *new_cgroup, + struct task_struct *task, bool threadgroup) { if (current != task) { if (!capable(CAP_SYS_ADMIN)) @@ -56,6 +56,18 @@ static int ns_can_attach(struct cgroup_subsys *ss, if (!cgroup_is_descendant(new_cgroup, task)) return -EPERM; + if (threadgroup) { + struct task_struct *c; + rcu_read_lock(); + list_for_each_entry_rcu(c, &task->thread_group, thread_group) { + if (!cgroup_is_descendant(new_cgroup, c)) { + rcu_read_unlock(); + return -EPERM; + } + } + rcu_read_unlock(); + } + return 0; } diff --git a/kernel/params.c b/kernel/params.c index 7f6912ced2b..9da58eabdcb 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -23,6 +23,7 @@ #include <linux/device.h> #include <linux/err.h> #include <linux/slab.h> +#include <linux/ctype.h> #if 0 #define DEBUGP printk @@ -87,7 +88,7 @@ static char *next_arg(char *args, char **param, char **val) } for (i = 0; args[i]; i++) { - if (args[i] == ' ' && !in_quote) + if (isspace(args[i]) && !in_quote) break; if (equals == 0) { if (args[i] == '=') @@ -121,7 +122,7 @@ static char *next_arg(char *args, char **param, char **val) next = args + i; /* Chew up trailing spaces. */ - while (*next == ' ') + while (isspace(*next)) next++; return next; } @@ -138,7 +139,7 @@ int parse_args(const char *name, DEBUGP("Parsing ARGS: %s\n", args); /* Chew leading spaces */ - while (*args == ' ') + while (isspace(*args)) args++; while (*args) { diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 821722ae58a..86b3796b043 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -118,7 +118,7 @@ struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old { if (!(flags & CLONE_NEWPID)) return get_pid_ns(old_ns); - if (flags & CLONE_THREAD) + if (flags & (CLONE_THREAD|CLONE_PARENT)) return ERR_PTR(-EINVAL); return create_pid_namespace(old_ns); } diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 307c285af59..23bd09cd042 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -266,9 +266,10 @@ static int ignoring_children(struct sighand_struct *sigh) * or self-reaping. Do notification now if it would have happened earlier. * If it should reap itself, return true. * - * If it's our own child, there is no notification to do. - * But if our normal children self-reap, then this child - * was prevented by ptrace and we must reap it now. + * If it's our own child, there is no notification to do. But if our normal + * children self-reap, then this child was prevented by ptrace and we must + * reap it now, in that case we must also wake up sub-threads sleeping in + * do_wait(). */ static bool __ptrace_detach(struct task_struct *tracer, struct task_struct *p) { @@ -278,8 +279,10 @@ static bool __ptrace_detach(struct task_struct *tracer, struct task_struct *p) if (!task_detached(p) && thread_group_empty(p)) { if (!same_thread_group(p->real_parent, tracer)) do_notify_parent(p, p->exit_signal); - else if (ignoring_children(tracer->sighand)) + else if (ignoring_children(tracer->sighand)) { + __wake_up_parent(p, tracer); p->exit_signal = -1; + } } if (task_detached(p)) { /* Mark it as in the process of being reaped. */ diff --git a/kernel/res_counter.c b/kernel/res_counter.c index e1338f07431..88faec23e83 100644 --- a/kernel/res_counter.c +++ b/kernel/res_counter.c @@ -19,6 +19,7 @@ void res_counter_init(struct res_counter *counter, struct res_counter *parent) { spin_lock_init(&counter->lock); counter->limit = RESOURCE_MAX; + counter->soft_limit = RESOURCE_MAX; counter->parent = parent; } @@ -36,17 +37,27 @@ int res_counter_charge_locked(struct res_counter *counter, unsigned long val) } int res_counter_charge(struct res_counter *counter, unsigned long val, - struct res_counter **limit_fail_at) + struct res_counter **limit_fail_at, + struct res_counter **soft_limit_fail_at) { int ret; unsigned long flags; struct res_counter *c, *u; *limit_fail_at = NULL; + if (soft_limit_fail_at) + *soft_limit_fail_at = NULL; local_irq_save(flags); for (c = counter; c != NULL; c = c->parent) { spin_lock(&c->lock); ret = res_counter_charge_locked(c, val); + /* + * With soft limits, we return the highest ancestor + * that exceeds its soft limit + */ + if (soft_limit_fail_at && + !res_counter_soft_limit_check_locked(c)) + *soft_limit_fail_at = c; spin_unlock(&c->lock); if (ret < 0) { *limit_fail_at = c; @@ -74,7 +85,8 @@ void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val) counter->usage -= val; } -void res_counter_uncharge(struct res_counter *counter, unsigned long val) +void res_counter_uncharge(struct res_counter *counter, unsigned long val, + bool *was_soft_limit_excess) { unsigned long flags; struct res_counter *c; @@ -82,6 +94,9 @@ void res_counter_uncharge(struct res_counter *counter, unsigned long val) local_irq_save(flags); for (c = counter; c != NULL; c = c->parent) { spin_lock(&c->lock); + if (was_soft_limit_excess) + *was_soft_limit_excess = + !res_counter_soft_limit_check_locked(c); res_counter_uncharge_locked(c, val); spin_unlock(&c->lock); } @@ -101,6 +116,8 @@ res_counter_member(struct res_counter *counter, int member) return &counter->limit; case RES_FAILCNT: return &counter->failcnt; + case RES_SOFT_LIMIT: + return &counter->soft_limit; }; BUG(); diff --git a/kernel/sched.c b/kernel/sched.c index 2f76e06bea5..ee61f454a98 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -10312,7 +10312,7 @@ static int sched_rt_global_constraints(void) #endif /* CONFIG_RT_GROUP_SCHED */ int sched_rt_handler(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; @@ -10323,7 +10323,7 @@ int sched_rt_handler(struct ctl_table *table, int write, old_period = sysctl_sched_rt_period; old_runtime = sysctl_sched_rt_runtime; - ret = proc_dointvec(table, write, filp, buffer, lenp, ppos); + ret = proc_dointvec(table, write, buffer, lenp, ppos); if (!ret && write) { ret = sched_rt_global_constraints(); @@ -10377,8 +10377,7 @@ cpu_cgroup_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) } static int -cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, - struct task_struct *tsk) +cpu_cgroup_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk) { #ifdef CONFIG_RT_GROUP_SCHED if (!sched_rt_can_attach(cgroup_tg(cgrp), tsk)) @@ -10388,15 +10387,45 @@ cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, if (tsk->sched_class != &fair_sched_class) return -EINVAL; #endif + return 0; +} +static int +cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, + struct task_struct *tsk, bool threadgroup) +{ + int retval = cpu_cgroup_can_attach_task(cgrp, tsk); + if (retval) + return retval; + if (threadgroup) { + struct task_struct *c; + rcu_read_lock(); + list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) { + retval = cpu_cgroup_can_attach_task(cgrp, c); + if (retval) { + rcu_read_unlock(); + return retval; + } + } + rcu_read_unlock(); + } return 0; } static void cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, - struct cgroup *old_cont, struct task_struct *tsk) + struct cgroup *old_cont, struct task_struct *tsk, + bool threadgroup) { sched_move_task(tsk); + if (threadgroup) { + struct task_struct *c; + rcu_read_lock(); + list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) { + sched_move_task(c); + } + rcu_read_unlock(); + } } #ifdef CONFIG_FAIR_GROUP_SCHED diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index ecc637a0d59..4e777b47eed 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -384,10 +384,10 @@ static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) #ifdef CONFIG_SCHED_DEBUG int sched_nr_latency_handler(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos) { - int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); + int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (ret || !write) return ret; diff --git a/kernel/signal.c b/kernel/signal.c index 64c5deeaca5..6705320784f 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -705,7 +705,7 @@ static int prepare_signal(int sig, struct task_struct *p, int from_ancestor_ns) if (why) { /* - * The first thread which returns from finish_stop() + * The first thread which returns from do_signal_stop() * will take ->siglock, notice SIGNAL_CLD_MASK, and * notify its parent. See get_signal_to_deliver(). */ @@ -971,6 +971,20 @@ specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t) return send_signal(sig, info, t, 0); } +int do_send_sig_info(int sig, struct siginfo *info, struct task_struct *p, + bool group) +{ + unsigned long flags; + int ret = -ESRCH; + + if (lock_task_sighand(p, &flags)) { + ret = send_signal(sig, info, p, group); + unlock_task_sighand(p, &flags); + } + + return ret; +} + /* * Force a signal that the process can't ignore: if necessary * we unblock the signal and change any SIG_IGN to SIG_DFL. @@ -1036,12 +1050,6 @@ void zap_other_threads(struct task_struct *p) } } -int __fatal_signal_pending(struct task_struct *tsk) -{ - return sigismember(&tsk->pending.signal, SIGKILL); -} -EXPORT_SYMBOL(__fatal_signal_pending); - struct sighand_struct *lock_task_sighand(struct task_struct *tsk, unsigned long *flags) { struct sighand_struct *sighand; @@ -1068,18 +1076,10 @@ struct sighand_struct *lock_task_sighand(struct task_struct *tsk, unsigned long */ int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p) { - unsigned long flags; - int ret; + int ret = check_kill_permission(sig, info, p); - ret = check_kill_permission(sig, info, p); - - if (!ret && sig) { - ret = -ESRCH; - if (lock_task_sighand(p, &flags)) { - ret = __group_send_sig_info(sig, info, p); - unlock_task_sighand(p, &flags); - } - } + if (!ret && sig) + ret = do_send_sig_info(sig, info, p, true); return ret; } @@ -1224,15 +1224,9 @@ static int kill_something_info(int sig, struct siginfo *info, pid_t pid) * These are for backward compatibility with the rest of the kernel source. */ -/* - * The caller must ensure the task can't exit. - */ int send_sig_info(int sig, struct siginfo *info, struct task_struct *p) { - int ret; - unsigned long flags; - /* * Make sure legacy kernel users don't send in bad values * (normal paths check this in check_kill_permission). @@ -1240,10 +1234,7 @@ send_sig_info(int sig, struct siginfo *info, struct task_struct *p) if (!valid_signal(sig)) return -EINVAL; - spin_lock_irqsave(&p->sighand->siglock, flags); - ret = specific_send_sig_info(sig, info, p); - spin_unlock_irqrestore(&p->sighand->siglock, flags); - return ret; + return do_send_sig_info(sig, info, p, false); } #define __si_special(priv) \ @@ -1383,15 +1374,6 @@ ret: } /* - * Wake up any threads in the parent blocked in wait* syscalls. - */ -static inline void __wake_up_parent(struct task_struct *p, - struct task_struct *parent) -{ - wake_up_interruptible_sync(&parent->signal->wait_chldexit); -} - -/* * Let a parent know about the death of a child. * For a stopped/continued status change, use do_notify_parent_cldstop instead. * @@ -1673,29 +1655,6 @@ void ptrace_notify(int exit_code) spin_unlock_irq(¤t->sighand->siglock); } -static void -finish_stop(int stop_count) -{ - /* - * If there are no other threads in the group, or if there is - * a group stop in progress and we are the last to stop, - * report to the parent. When ptraced, every thread reports itself. - */ - if (tracehook_notify_jctl(stop_count == 0, CLD_STOPPED)) { - read_lock(&tasklist_lock); - do_notify_parent_cldstop(current, CLD_STOPPED); - read_unlock(&tasklist_lock); - } - - do { - schedule(); - } while (try_to_freeze()); - /* - * Now we don't run again until continued. - */ - current->exit_code = 0; -} - /* * This performs the stopping for SIGSTOP and other stop signals. * We have to stop all threads in the thread group. @@ -1705,15 +1664,9 @@ finish_stop(int stop_count) static int do_signal_stop(int signr) { struct signal_struct *sig = current->signal; - int stop_count; + int notify; - if (sig->group_stop_count > 0) { - /* - * There is a group stop in progress. We don't need to - * start another one. - */ - stop_count = --sig->group_stop_count; - } else { + if (!sig->group_stop_count) { struct task_struct *t; if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED) || @@ -1725,7 +1678,7 @@ static int do_signal_stop(int signr) */ sig->group_exit_code = signr; - stop_count = 0; + sig->group_stop_count = 1; for (t = next_thread(current); t != current; t = next_thread(t)) /* * Setting state to TASK_STOPPED for a group @@ -1734,19 +1687,44 @@ static int do_signal_stop(int signr) */ if (!(t->flags & PF_EXITING) && !task_is_stopped_or_traced(t)) { - stop_count++; + sig->group_stop_count++; signal_wake_up(t, 0); } - sig->group_stop_count = stop_count; } + /* + * If there are no other threads in the group, or if there is + * a group stop in progress and we are the last to stop, report + * to the parent. When ptraced, every thread reports itself. + */ + notify = sig->group_stop_count == 1 ? CLD_STOPPED : 0; + notify = tracehook_notify_jctl(notify, CLD_STOPPED); + /* + * tracehook_notify_jctl() can drop and reacquire siglock, so + * we keep ->group_stop_count != 0 before the call. If SIGCONT + * or SIGKILL comes in between ->group_stop_count == 0. + */ + if (sig->group_stop_count) { + if (!--sig->group_stop_count) + sig->flags = SIGNAL_STOP_STOPPED; + current->exit_code = sig->group_exit_code; + __set_current_state(TASK_STOPPED); + } + spin_unlock_irq(¤t->sighand->siglock); - if (stop_count == 0) - sig->flags = SIGNAL_STOP_STOPPED; - current->exit_code = sig->group_exit_code; - __set_current_state(TASK_STOPPED); + if (notify) { + read_lock(&tasklist_lock); + do_notify_parent_cldstop(current, notify); + read_unlock(&tasklist_lock); + } + + /* Now we don't run again until woken by SIGCONT or SIGKILL */ + do { + schedule(); + } while (try_to_freeze()); + + tracehook_finish_jctl(); + current->exit_code = 0; - spin_unlock_irq(¤t->sighand->siglock); - finish_stop(stop_count); return 1; } @@ -1815,14 +1793,15 @@ relock: int why = (signal->flags & SIGNAL_STOP_CONTINUED) ? CLD_CONTINUED : CLD_STOPPED; signal->flags &= ~SIGNAL_CLD_MASK; - spin_unlock_irq(&sighand->siglock); - if (unlikely(!tracehook_notify_jctl(1, why))) - goto relock; + why = tracehook_notify_jctl(why, CLD_CONTINUED); + spin_unlock_irq(&sighand->siglock); - read_lock(&tasklist_lock); - do_notify_parent_cldstop(current->group_leader, why); - read_unlock(&tasklist_lock); + if (why) { + read_lock(&tasklist_lock); + do_notify_parent_cldstop(current->group_leader, why); + read_unlock(&tasklist_lock); + } goto relock; } @@ -1987,14 +1966,14 @@ void exit_signals(struct task_struct *tsk) if (unlikely(tsk->signal->group_stop_count) && !--tsk->signal->group_stop_count) { tsk->signal->flags = SIGNAL_STOP_STOPPED; - group_stop = 1; + group_stop = tracehook_notify_jctl(CLD_STOPPED, CLD_STOPPED); } out: spin_unlock_irq(&tsk->sighand->siglock); - if (unlikely(group_stop) && tracehook_notify_jctl(1, CLD_STOPPED)) { + if (unlikely(group_stop)) { read_lock(&tasklist_lock); - do_notify_parent_cldstop(tsk, CLD_STOPPED); + do_notify_parent_cldstop(tsk, group_stop); read_unlock(&tasklist_lock); } } @@ -2290,7 +2269,6 @@ static int do_send_specific(pid_t tgid, pid_t pid, int sig, struct siginfo *info) { struct task_struct *p; - unsigned long flags; int error = -ESRCH; rcu_read_lock(); @@ -2300,14 +2278,16 @@ do_send_specific(pid_t tgid, pid_t pid, int sig, struct siginfo *info) /* * The null signal is a permissions and process existence * probe. No signal is actually delivered. - * - * If lock_task_sighand() fails we pretend the task dies - * after receiving the signal. The window is tiny, and the - * signal is private anyway. */ - if (!error && sig && lock_task_sighand(p, &flags)) { - error = specific_send_sig_info(sig, info, p); - unlock_task_sighand(p, &flags); + if (!error && sig) { + error = do_send_sig_info(sig, info, p, false); + /* + * If lock_task_sighand() failed we pretend the task + * dies after receiving the signal. The window is tiny, + * and the signal is private anyway. + */ + if (unlikely(error == -ESRCH)) + error = 0; } } rcu_read_unlock(); diff --git a/kernel/slow-work.c b/kernel/slow-work.c index 09d7519557d..0d31135efbf 100644 --- a/kernel/slow-work.c +++ b/kernel/slow-work.c @@ -26,10 +26,10 @@ static void slow_work_cull_timeout(unsigned long); static void slow_work_oom_timeout(unsigned long); #ifdef CONFIG_SYSCTL -static int slow_work_min_threads_sysctl(struct ctl_table *, int, struct file *, +static int slow_work_min_threads_sysctl(struct ctl_table *, int, void __user *, size_t *, loff_t *); -static int slow_work_max_threads_sysctl(struct ctl_table *, int , struct file *, +static int slow_work_max_threads_sysctl(struct ctl_table *, int , void __user *, size_t *, loff_t *); #endif @@ -493,10 +493,10 @@ static void slow_work_oom_timeout(unsigned long data) * Handle adjustment of the minimum number of threads */ static int slow_work_min_threads_sysctl(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, + void __user *buffer, size_t *lenp, loff_t *ppos) { - int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); + int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); int n; if (ret == 0) { @@ -521,10 +521,10 @@ static int slow_work_min_threads_sysctl(struct ctl_table *table, int write, * Handle adjustment of the maximum number of threads */ static int slow_work_max_threads_sysctl(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, + void __user *buffer, size_t *lenp, loff_t *ppos) { - int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); + int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); int n; if (ret == 0) { diff --git a/kernel/softlockup.c b/kernel/softlockup.c index 88796c33083..81324d12eb3 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c @@ -90,11 +90,11 @@ void touch_all_softlockup_watchdogs(void) EXPORT_SYMBOL(touch_all_softlockup_watchdogs); int proc_dosoftlockup_thresh(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, + void __user *buffer, size_t *lenp, loff_t *ppos) { touch_all_softlockup_watchdogs(); - return proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); + return proc_dointvec_minmax(table, write, buffer, lenp, ppos); } /* diff --git a/kernel/sys.c b/kernel/sys.c index ebcb1561172..255475d163e 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1542,6 +1542,28 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, current->timer_slack_ns = arg2; error = 0; break; + case PR_MCE_KILL: + if (arg4 | arg5) + return -EINVAL; + switch (arg2) { + case 0: + if (arg3 != 0) + return -EINVAL; + current->flags &= ~PF_MCE_PROCESS; + break; + case 1: + current->flags |= PF_MCE_PROCESS; + if (arg3 != 0) + current->flags |= PF_MCE_EARLY; + else + current->flags &= ~PF_MCE_EARLY; + break; + default: + return -EINVAL; + } + error = 0; + break; + default: error = -EINVAL; break; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 7f4f57bea4c..0d949c51741 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -76,6 +76,7 @@ extern int max_threads; extern int core_uses_pid; extern int suid_dumpable; extern char core_pattern[]; +extern unsigned int core_pipe_limit; extern int pid_max; extern int min_free_kbytes; extern int pid_max_min, pid_max_max; @@ -162,9 +163,9 @@ extern int max_lock_depth; #endif #ifdef CONFIG_PROC_SYSCTL -static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp, +static int proc_do_cad_pid(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); -static int proc_taint(struct ctl_table *table, int write, struct file *filp, +static int proc_taint(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); #endif @@ -423,6 +424,14 @@ static struct ctl_table kern_table[] = { .proc_handler = &proc_dostring, .strategy = &sysctl_string, }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "core_pipe_limit", + .data = &core_pipe_limit, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, #ifdef CONFIG_PROC_SYSCTL { .procname = "tainted", @@ -1389,6 +1398,31 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = &scan_unevictable_handler, }, +#ifdef CONFIG_MEMORY_FAILURE + { + .ctl_name = CTL_UNNUMBERED, + .procname = "memory_failure_early_kill", + .data = &sysctl_memory_failure_early_kill, + .maxlen = sizeof(sysctl_memory_failure_early_kill), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &zero, + .extra2 = &one, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "memory_failure_recovery", + .data = &sysctl_memory_failure_recovery, + .maxlen = sizeof(sysctl_memory_failure_recovery), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &zero, + .extra2 = &one, + }, +#endif + /* * NOTE: do not add new entries to this table unless you have read * Documentation/sysctl/ctl_unnumbered.txt @@ -2217,7 +2251,7 @@ void sysctl_head_put(struct ctl_table_header *head) #ifdef CONFIG_PROC_SYSCTL static int _proc_do_string(void* data, int maxlen, int write, - struct file *filp, void __user *buffer, + void __user *buffer, size_t *lenp, loff_t *ppos) { size_t len; @@ -2278,7 +2312,6 @@ static int _proc_do_string(void* data, int maxlen, int write, * proc_dostring - read a string sysctl * @table: the sysctl table * @write: %TRUE if this is a write to the sysctl file - * @filp: the file structure * @buffer: the user buffer * @lenp: the size of the user buffer * @ppos: file position @@ -2292,10 +2325,10 @@ static int _proc_do_string(void* data, int maxlen, int write, * * Returns 0 on success. */ -int proc_dostring(struct ctl_table *table, int write, struct file *filp, +int proc_dostring(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - return _proc_do_string(table->data, table->maxlen, write, filp, + return _proc_do_string(table->data, table->maxlen, write, buffer, lenp, ppos); } @@ -2320,7 +2353,7 @@ static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp, } static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table, - int write, struct file *filp, void __user *buffer, + int write, void __user *buffer, size_t *lenp, loff_t *ppos, int (*conv)(int *negp, unsigned long *lvalp, int *valp, int write, void *data), @@ -2427,13 +2460,13 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table, #undef TMPBUFLEN } -static int do_proc_dointvec(struct ctl_table *table, int write, struct file *filp, +static int do_proc_dointvec(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos, int (*conv)(int *negp, unsigned long *lvalp, int *valp, int write, void *data), void *data) { - return __do_proc_dointvec(table->data, table, write, filp, + return __do_proc_dointvec(table->data, table, write, buffer, lenp, ppos, conv, data); } @@ -2441,7 +2474,6 @@ static int do_proc_dointvec(struct ctl_table *table, int write, struct file *fil * proc_dointvec - read a vector of integers * @table: the sysctl table * @write: %TRUE if this is a write to the sysctl file - * @filp: the file structure * @buffer: the user buffer * @lenp: the size of the user buffer * @ppos: file position @@ -2451,10 +2483,10 @@ static int do_proc_dointvec(struct ctl_table *table, int write, struct file *fil * * Returns 0 on success. */ -int proc_dointvec(struct ctl_table *table, int write, struct file *filp, +int proc_dointvec(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - return do_proc_dointvec(table,write,filp,buffer,lenp,ppos, + return do_proc_dointvec(table,write,buffer,lenp,ppos, NULL,NULL); } @@ -2462,7 +2494,7 @@ int proc_dointvec(struct ctl_table *table, int write, struct file *filp, * Taint values can only be increased * This means we can safely use a temporary. */ -static int proc_taint(struct ctl_table *table, int write, struct file *filp, +static int proc_taint(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table t; @@ -2474,7 +2506,7 @@ static int proc_taint(struct ctl_table *table, int write, struct file *filp, t = *table; t.data = &tmptaint; - err = proc_doulongvec_minmax(&t, write, filp, buffer, lenp, ppos); + err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos); if (err < 0) return err; @@ -2526,7 +2558,6 @@ static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp, * proc_dointvec_minmax - read a vector of integers with min/max values * @table: the sysctl table * @write: %TRUE if this is a write to the sysctl file - * @filp: the file structure * @buffer: the user buffer * @lenp: the size of the user buffer * @ppos: file position @@ -2539,19 +2570,18 @@ static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp, * * Returns 0 on success. */ -int proc_dointvec_minmax(struct ctl_table *table, int write, struct file *filp, +int proc_dointvec_minmax(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct do_proc_dointvec_minmax_conv_param param = { .min = (int *) table->extra1, .max = (int *) table->extra2, }; - return do_proc_dointvec(table, write, filp, buffer, lenp, ppos, + return do_proc_dointvec(table, write, buffer, lenp, ppos, do_proc_dointvec_minmax_conv, ¶m); } static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos, unsigned long convmul, @@ -2656,21 +2686,19 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int } static int do_proc_doulongvec_minmax(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos, unsigned long convmul, unsigned long convdiv) { return __do_proc_doulongvec_minmax(table->data, table, write, - filp, buffer, lenp, ppos, convmul, convdiv); + buffer, lenp, ppos, convmul, convdiv); } /** * proc_doulongvec_minmax - read a vector of long integers with min/max values * @table: the sysctl table * @write: %TRUE if this is a write to the sysctl file - * @filp: the file structure * @buffer: the user buffer * @lenp: the size of the user buffer * @ppos: file position @@ -2683,17 +2711,16 @@ static int do_proc_doulongvec_minmax(struct ctl_table *table, int write, * * Returns 0 on success. */ -int proc_doulongvec_minmax(struct ctl_table *table, int write, struct file *filp, +int proc_doulongvec_minmax(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - return do_proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos, 1l, 1l); + return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l); } /** * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values * @table: the sysctl table * @write: %TRUE if this is a write to the sysctl file - * @filp: the file structure * @buffer: the user buffer * @lenp: the size of the user buffer * @ppos: file position @@ -2708,11 +2735,10 @@ int proc_doulongvec_minmax(struct ctl_table *table, int write, struct file *filp * Returns 0 on success. */ int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { - return do_proc_doulongvec_minmax(table, write, filp, buffer, + return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, HZ, 1000l); } @@ -2788,7 +2814,6 @@ static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp, * proc_dointvec_jiffies - read a vector of integers as seconds * @table: the sysctl table * @write: %TRUE if this is a write to the sysctl file - * @filp: the file structure * @buffer: the user buffer * @lenp: the size of the user buffer * @ppos: file position @@ -2800,10 +2825,10 @@ static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp, * * Returns 0 on success. */ -int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp, +int proc_dointvec_jiffies(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - return do_proc_dointvec(table,write,filp,buffer,lenp,ppos, + return do_proc_dointvec(table,write,buffer,lenp,ppos, do_proc_dointvec_jiffies_conv,NULL); } @@ -2811,7 +2836,6 @@ int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp, * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds * @table: the sysctl table * @write: %TRUE if this is a write to the sysctl file - * @filp: the file structure * @buffer: the user buffer * @lenp: the size of the user buffer * @ppos: pointer to the file position @@ -2823,10 +2847,10 @@ int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp, * * Returns 0 on success. */ -int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file *filp, +int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - return do_proc_dointvec(table,write,filp,buffer,lenp,ppos, + return do_proc_dointvec(table,write,buffer,lenp,ppos, do_proc_dointvec_userhz_jiffies_conv,NULL); } @@ -2834,7 +2858,6 @@ int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds * @table: the sysctl table * @write: %TRUE if this is a write to the sysctl file - * @filp: the file structure * @buffer: the user buffer * @lenp: the size of the user buffer * @ppos: file position @@ -2847,14 +2870,14 @@ int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file * * Returns 0 on success. */ -int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, struct file *filp, +int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - return do_proc_dointvec(table, write, filp, buffer, lenp, ppos, + return do_proc_dointvec(table, write, buffer, lenp, ppos, do_proc_dointvec_ms_jiffies_conv, NULL); } -static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp, +static int proc_do_cad_pid(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct pid *new_pid; @@ -2863,7 +2886,7 @@ static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp tmp = pid_vnr(cad_pid); - r = __do_proc_dointvec(&tmp, table, write, filp, buffer, + r = __do_proc_dointvec(&tmp, table, write, buffer, lenp, ppos, NULL, NULL); if (r || !write) return r; @@ -2878,50 +2901,49 @@ static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp #else /* CONFIG_PROC_FS */ -int proc_dostring(struct ctl_table *table, int write, struct file *filp, +int proc_dostring(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } -int proc_dointvec(struct ctl_table *table, int write, struct file *filp, +int proc_dointvec(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } -int proc_dointvec_minmax(struct ctl_table *table, int write, struct file *filp, +int proc_dointvec_minmax(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } -int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp, +int proc_dointvec_jiffies(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } -int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file *filp, +int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } -int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, struct file *filp, +int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } -int proc_doulongvec_minmax(struct ctl_table *table, int write, struct file *filp, +int proc_doulongvec_minmax(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { diff --git a/kernel/time/Makefile b/kernel/time/Makefile index 0b0a6366c9d..ee266620b06 100644 --- a/kernel/time/Makefile +++ b/kernel/time/Makefile @@ -1,4 +1,4 @@ -obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o timecompare.o +obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o timecompare.o timeconv.o obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o diff --git a/kernel/time/timeconv.c b/kernel/time/timeconv.c new file mode 100644 index 00000000000..86628e755f3 --- /dev/null +++ b/kernel/time/timeconv.c @@ -0,0 +1,127 @@ +/* + * Copyright (C) 1993, 1994, 1995, 1996, 1997 Free Software Foundation, Inc. + * This file is part of the GNU C Library. + * Contributed by Paul Eggert (eggert@twinsun.com). + * + * The GNU C Library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * The GNU C Library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with the GNU C Library; see the file COPYING.LIB. If not, + * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + +/* + * Converts the calendar time to broken-down time representation + * Based on code from glibc-2.6 + * + * 2009-7-14: + * Moved from glibc-2.6 to kernel by Zhaolei<zhaolei@cn.fujitsu.com> + */ + +#include <linux/time.h> +#include <linux/module.h> + +/* + * Nonzero if YEAR is a leap year (every 4 years, + * except every 100th isn't, and every 400th is). + */ +static int __isleap(long year) +{ + return (year) % 4 == 0 && ((year) % 100 != 0 || (year) % 400 == 0); +} + +/* do a mathdiv for long type */ +static long math_div(long a, long b) +{ + return a / b - (a % b < 0); +} + +/* How many leap years between y1 and y2, y1 must less or equal to y2 */ +static long leaps_between(long y1, long y2) +{ + long leaps1 = math_div(y1 - 1, 4) - math_div(y1 - 1, 100) + + math_div(y1 - 1, 400); + long leaps2 = math_div(y2 - 1, 4) - math_div(y2 - 1, 100) + + math_div(y2 - 1, 400); + return leaps2 - leaps1; +} + +/* How many days come before each month (0-12). */ +static const unsigned short __mon_yday[2][13] = { + /* Normal years. */ + {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365}, + /* Leap years. */ + {0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366} +}; + +#define SECS_PER_HOUR (60 * 60) +#define SECS_PER_DAY (SECS_PER_HOUR * 24) + +/** + * time_to_tm - converts the calendar time to local broken-down time + * + * @totalsecs the number of seconds elapsed since 00:00:00 on January 1, 1970, + * Coordinated Universal Time (UTC). + * @offset offset seconds adding to totalsecs. + * @result pointer to struct tm variable to receive broken-down time + */ +void time_to_tm(time_t totalsecs, int offset, struct tm *result) +{ + long days, rem, y; + const unsigned short *ip; + + days = totalsecs / SECS_PER_DAY; + rem = totalsecs % SECS_PER_DAY; + rem += offset; + while (rem < 0) { + rem += SECS_PER_DAY; + --days; + } + while (rem >= SECS_PER_DAY) { + rem -= SECS_PER_DAY; + ++days; + } + + result->tm_hour = rem / SECS_PER_HOUR; + rem %= SECS_PER_HOUR; + result->tm_min = rem / 60; + result->tm_sec = rem % 60; + + /* January 1, 1970 was a Thursday. */ + result->tm_wday = (4 + days) % 7; + if (result->tm_wday < 0) + result->tm_wday += 7; + + y = 1970; + + while (days < 0 || days >= (__isleap(y) ? 366 : 365)) { + /* Guess a corrected year, assuming 365 days per year. */ + long yg = y + math_div(days, 365); + + /* Adjust DAYS and Y to match the guessed year. */ + days -= (yg - y) * 365 + leaps_between(y, yg); + y = yg; + } + + result->tm_year = y - 1900; + + result->tm_yday = days; + + ip = __mon_yday[__isleap(y)]; + for (y = 11; days < ip[y]; y--) + continue; + days -= ip[y]; + + result->tm_mon = y; + result->tm_mday = days + 1; +} +EXPORT_SYMBOL(time_to_tm); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 23df7771c93..a142579765b 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3015,7 +3015,7 @@ int unregister_ftrace_function(struct ftrace_ops *ops) int ftrace_enable_sysctl(struct ctl_table *table, int write, - struct file *file, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; @@ -3025,7 +3025,7 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, mutex_lock(&ftrace_lock); - ret = proc_dointvec(table, write, file, buffer, lenp, ppos); + ret = proc_dointvec(table, write, buffer, lenp, ppos); if (ret || !write || (last_ftrace_enabled == !!ftrace_enabled)) goto out; diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 0f6facb050a..8504ac71e4e 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -296,14 +296,14 @@ static const struct file_operations stack_trace_fops = { int stack_trace_sysctl(struct ctl_table *table, int write, - struct file *file, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; mutex_lock(&stack_sysctl_mutex); - ret = proc_dointvec(table, write, file, buffer, lenp, ppos); + ret = proc_dointvec(table, write, buffer, lenp, ppos); if (ret || !write || (last_stack_tracer_enabled == !!stack_tracer_enabled)) diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c index 92359cc747a..69eae358a72 100644 --- a/kernel/utsname_sysctl.c +++ b/kernel/utsname_sysctl.c @@ -42,14 +42,14 @@ static void put_uts(ctl_table *table, int write, void *which) * Special case of dostring for the UTS structure. This has locks * to observe. Should this be in kernel/sys.c ???? */ -static int proc_do_uts_string(ctl_table *table, int write, struct file *filp, +static int proc_do_uts_string(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table uts_table; int r; memcpy(&uts_table, table, sizeof(uts_table)); uts_table.data = get_uts(table, write); - r = proc_dostring(&uts_table,write,filp,buffer,lenp, ppos); + r = proc_dostring(&uts_table,write,buffer,lenp, ppos); put_uts(table, write, uts_table.data); return r; } diff --git a/lib/decompress_inflate.c b/lib/decompress_inflate.c index 68dfce59c1b..fc686c7a0a0 100644 --- a/lib/decompress_inflate.c +++ b/lib/decompress_inflate.c @@ -27,6 +27,11 @@ #define GZIP_IOBUF_SIZE (16*1024) +static int nofill(void *buffer, unsigned int len) +{ + return -1; +} + /* Included from initramfs et al code */ STATIC int INIT gunzip(unsigned char *buf, int len, int(*fill)(void*, unsigned int), @@ -76,6 +81,9 @@ STATIC int INIT gunzip(unsigned char *buf, int len, goto gunzip_nomem4; } + if (!fill) + fill = nofill; + if (len == 0) len = fill(zbuf, GZIP_IOBUF_SIZE); diff --git a/lib/decompress_unlzma.c b/lib/decompress_unlzma.c index 0b954e04bd3..ca82fde81c8 100644 --- a/lib/decompress_unlzma.c +++ b/lib/decompress_unlzma.c @@ -82,6 +82,11 @@ struct rc { #define RC_MODEL_TOTAL_BITS 11 +static int nofill(void *buffer, unsigned int len) +{ + return -1; +} + /* Called twice: once at startup and once in rc_normalize() */ static void INIT rc_read(struct rc *rc) { @@ -97,7 +102,10 @@ static inline void INIT rc_init(struct rc *rc, int (*fill)(void*, unsigned int), char *buffer, int buffer_size) { - rc->fill = fill; + if (fill) + rc->fill = fill; + else + rc->fill = nofill; rc->buffer = (uint8_t *)buffer; rc->buffer_size = buffer_size; rc->buffer_end = rc->buffer + rc->buffer_size; diff --git a/mm/Kconfig b/mm/Kconfig index 71eb0b4cce8..24776072959 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -245,6 +245,20 @@ config DEFAULT_MMAP_MIN_ADDR /proc/sys/vm/mmap_min_addr tunable. +config MEMORY_FAILURE + depends on MMU + depends on X86_MCE + bool "Enable recovery from hardware memory errors" + help + Enables code to recover from some memory failures on systems + with MCA recovery. This allows a system to continue running + even when some of its memory has uncorrected errors. This requires + special hardware support and typically ECC memory. + +config HWPOISON_INJECT + tristate "Poison pages injector" + depends on MEMORY_FAILURE && DEBUG_KERNEL + config NOMMU_INITIAL_TRIM_EXCESS int "Turn on mmap() excess space trimming before booting" depends on !MMU diff --git a/mm/Makefile b/mm/Makefile index 88193d73cd1..ebf849042ed 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -5,14 +5,14 @@ mmu-y := nommu.o mmu-$(CONFIG_MMU) := fremap.o highmem.o madvise.o memory.o mincore.o \ mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \ - vmalloc.o + vmalloc.o pagewalk.o obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \ maccess.o page_alloc.o page-writeback.o \ readahead.o swap.o truncate.o vmscan.o shmem.o \ prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \ page_isolation.o mm_init.o mmu_context.o \ - pagewalk.o $(mmu-y) + $(mmu-y) obj-y += init-mm.o obj-$(CONFIG_BOUNCE) += bounce.o @@ -41,5 +41,7 @@ obj-$(CONFIG_SMP) += allocpercpu.o endif obj-$(CONFIG_QUICKLIST) += quicklist.o obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o +obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o +obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o diff --git a/mm/filemap.c b/mm/filemap.c index bcc7372aebb..6c84e598b4a 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -58,7 +58,7 @@ /* * Lock ordering: * - * ->i_mmap_lock (vmtruncate) + * ->i_mmap_lock (truncate_pagecache) * ->private_lock (__free_pte->__set_page_dirty_buffers) * ->swap_lock (exclusive_swap_page, others) * ->mapping->tree_lock @@ -104,6 +104,10 @@ * * ->task->proc_lock * ->dcache_lock (proc_pid_lookup) + * + * (code doesn't rely on that order, so you could switch it around) + * ->tasklist_lock (memory_failure, collect_procs_ao) + * ->i_mmap_lock */ /* diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 815dbd4a6dc..6f048fcc749 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1537,7 +1537,7 @@ static unsigned int cpuset_mems_nr(unsigned int *array) #ifdef CONFIG_SYSCTL int hugetlb_sysctl_handler(struct ctl_table *table, int write, - struct file *file, void __user *buffer, + void __user *buffer, size_t *length, loff_t *ppos) { struct hstate *h = &default_hstate; @@ -1548,7 +1548,7 @@ int hugetlb_sysctl_handler(struct ctl_table *table, int write, table->data = &tmp; table->maxlen = sizeof(unsigned long); - proc_doulongvec_minmax(table, write, file, buffer, length, ppos); + proc_doulongvec_minmax(table, write, buffer, length, ppos); if (write) h->max_huge_pages = set_max_huge_pages(h, tmp); @@ -1557,10 +1557,10 @@ int hugetlb_sysctl_handler(struct ctl_table *table, int write, } int hugetlb_treat_movable_handler(struct ctl_table *table, int write, - struct file *file, void __user *buffer, + void __user *buffer, size_t *length, loff_t *ppos) { - proc_dointvec(table, write, file, buffer, length, ppos); + proc_dointvec(table, write, buffer, length, ppos); if (hugepages_treat_as_movable) htlb_alloc_mask = GFP_HIGHUSER_MOVABLE; else @@ -1569,7 +1569,7 @@ int hugetlb_treat_movable_handler(struct ctl_table *table, int write, } int hugetlb_overcommit_handler(struct ctl_table *table, int write, - struct file *file, void __user *buffer, + void __user *buffer, size_t *length, loff_t *ppos) { struct hstate *h = &default_hstate; @@ -1580,7 +1580,7 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write, table->data = &tmp; table->maxlen = sizeof(unsigned long); - proc_doulongvec_minmax(table, write, file, buffer, length, ppos); + proc_doulongvec_minmax(table, write, buffer, length, ppos); if (write) { spin_lock(&hugetlb_lock); diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c new file mode 100644 index 00000000000..e1d85137f08 --- /dev/null +++ b/mm/hwpoison-inject.c @@ -0,0 +1,41 @@ +/* Inject a hwpoison memory failure on a arbitary pfn */ +#include <linux/module.h> +#include <linux/debugfs.h> +#include <linux/kernel.h> +#include <linux/mm.h> + +static struct dentry *hwpoison_dir, *corrupt_pfn; + +static int hwpoison_inject(void *data, u64 val) +{ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + printk(KERN_INFO "Injecting memory failure at pfn %Lx\n", val); + return __memory_failure(val, 18, 0); +} + +DEFINE_SIMPLE_ATTRIBUTE(hwpoison_fops, NULL, hwpoison_inject, "%lli\n"); + +static void pfn_inject_exit(void) +{ + if (hwpoison_dir) + debugfs_remove_recursive(hwpoison_dir); +} + +static int pfn_inject_init(void) +{ + hwpoison_dir = debugfs_create_dir("hwpoison", NULL); + if (hwpoison_dir == NULL) + return -ENOMEM; + corrupt_pfn = debugfs_create_file("corrupt-pfn", 0600, hwpoison_dir, + NULL, &hwpoison_fops); + if (corrupt_pfn == NULL) { + pfn_inject_exit(); + return -ENOMEM; + } + return 0; +} + +module_init(pfn_inject_init); +module_exit(pfn_inject_exit); +MODULE_LICENSE("GPL"); @@ -30,6 +30,7 @@ #include <linux/slab.h> #include <linux/rbtree.h> #include <linux/mmu_notifier.h> +#include <linux/swap.h> #include <linux/ksm.h> #include <asm/tlbflush.h> @@ -162,10 +163,10 @@ static unsigned long ksm_pages_unshared; static unsigned long ksm_rmap_items; /* Limit on the number of unswappable pages used */ -static unsigned long ksm_max_kernel_pages = 2000; +static unsigned long ksm_max_kernel_pages; /* Number of pages ksmd should scan in one batch */ -static unsigned int ksm_thread_pages_to_scan = 200; +static unsigned int ksm_thread_pages_to_scan = 100; /* Milliseconds ksmd should sleep between batches */ static unsigned int ksm_thread_sleep_millisecs = 20; @@ -173,7 +174,7 @@ static unsigned int ksm_thread_sleep_millisecs = 20; #define KSM_RUN_STOP 0 #define KSM_RUN_MERGE 1 #define KSM_RUN_UNMERGE 2 -static unsigned int ksm_run = KSM_RUN_MERGE; +static unsigned int ksm_run = KSM_RUN_STOP; static DECLARE_WAIT_QUEUE_HEAD(ksm_thread_wait); static DEFINE_MUTEX(ksm_thread_mutex); @@ -183,6 +184,11 @@ static DEFINE_SPINLOCK(ksm_mmlist_lock); sizeof(struct __struct), __alignof__(struct __struct),\ (__flags), NULL) +static void __init ksm_init_max_kernel_pages(void) +{ + ksm_max_kernel_pages = nr_free_buffer_pages() / 4; +} + static int __init ksm_slab_init(void) { rmap_item_cache = KSM_KMEM_CACHE(rmap_item, 0); @@ -1667,6 +1673,8 @@ static int __init ksm_init(void) struct task_struct *ksm_thread; int err; + ksm_init_max_kernel_pages(); + err = ksm_slab_init(); if (err) goto out; diff --git a/mm/madvise.c b/mm/madvise.c index d9ae2067952..35b1479b7c9 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -218,6 +218,32 @@ static long madvise_remove(struct vm_area_struct *vma, return error; } +#ifdef CONFIG_MEMORY_FAILURE +/* + * Error injection support for memory error handling. + */ +static int madvise_hwpoison(unsigned long start, unsigned long end) +{ + int ret = 0; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + for (; start < end; start += PAGE_SIZE) { + struct page *p; + int ret = get_user_pages(current, current->mm, start, 1, + 0, 0, &p, NULL); + if (ret != 1) + return ret; + printk(KERN_INFO "Injecting memory failure for page %lx at %lx\n", + page_to_pfn(p), start); + /* Ignore return value for now */ + __memory_failure(page_to_pfn(p), 0, 1); + put_page(p); + } + return ret; +} +#endif + static long madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, unsigned long end, int behavior) @@ -308,6 +334,10 @@ SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior) int write; size_t len; +#ifdef CONFIG_MEMORY_FAILURE + if (behavior == MADV_HWPOISON) + return madvise_hwpoison(start, start+len_in); +#endif if (!madvise_behavior_valid(behavior)) return error; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 9b10d875378..e2b98a6875c 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -29,6 +29,7 @@ #include <linux/rcupdate.h> #include <linux/limits.h> #include <linux/mutex.h> +#include <linux/rbtree.h> #include <linux/slab.h> #include <linux/swap.h> #include <linux/spinlock.h> @@ -43,6 +44,7 @@ struct cgroup_subsys mem_cgroup_subsys __read_mostly; #define MEM_CGROUP_RECLAIM_RETRIES 5 +struct mem_cgroup *root_mem_cgroup __read_mostly; #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP /* Turned on only when memory cgroup is enabled && really_do_swap_account = 1 */ @@ -53,6 +55,7 @@ static int really_do_swap_account __initdata = 1; /* for remember boot option*/ #endif static DEFINE_MUTEX(memcg_tasklist); /* can be hold under cgroup_mutex */ +#define SOFTLIMIT_EVENTS_THRESH (1000) /* * Statistics for memory cgroup. @@ -66,6 +69,8 @@ enum mem_cgroup_stat_index { MEM_CGROUP_STAT_MAPPED_FILE, /* # of pages charged as file rss */ MEM_CGROUP_STAT_PGPGIN_COUNT, /* # of pages paged in */ MEM_CGROUP_STAT_PGPGOUT_COUNT, /* # of pages paged out */ + MEM_CGROUP_STAT_EVENTS, /* sum of pagein + pageout for internal use */ + MEM_CGROUP_STAT_SWAPOUT, /* # of pages, swapped out */ MEM_CGROUP_STAT_NSTATS, }; @@ -78,6 +83,20 @@ struct mem_cgroup_stat { struct mem_cgroup_stat_cpu cpustat[0]; }; +static inline void +__mem_cgroup_stat_reset_safe(struct mem_cgroup_stat_cpu *stat, + enum mem_cgroup_stat_index idx) +{ + stat->count[idx] = 0; +} + +static inline s64 +__mem_cgroup_stat_read_local(struct mem_cgroup_stat_cpu *stat, + enum mem_cgroup_stat_index idx) +{ + return stat->count[idx]; +} + /* * For accounting under irq disable, no need for increment preempt count. */ @@ -117,6 +136,12 @@ struct mem_cgroup_per_zone { unsigned long count[NR_LRU_LISTS]; struct zone_reclaim_stat reclaim_stat; + struct rb_node tree_node; /* RB tree node */ + unsigned long long usage_in_excess;/* Set to the value by which */ + /* the soft limit is exceeded*/ + bool on_tree; + struct mem_cgroup *mem; /* Back pointer, we cannot */ + /* use container_of */ }; /* Macro for accessing counter */ #define MEM_CGROUP_ZSTAT(mz, idx) ((mz)->count[(idx)]) @@ -130,6 +155,26 @@ struct mem_cgroup_lru_info { }; /* + * Cgroups above their limits are maintained in a RB-Tree, independent of + * their hierarchy representation + */ + +struct mem_cgroup_tree_per_zone { + struct rb_root rb_root; + spinlock_t lock; +}; + +struct mem_cgroup_tree_per_node { + struct mem_cgroup_tree_per_zone rb_tree_per_zone[MAX_NR_ZONES]; +}; + +struct mem_cgroup_tree { + struct mem_cgroup_tree_per_node *rb_tree_per_node[MAX_NUMNODES]; +}; + +static struct mem_cgroup_tree soft_limit_tree __read_mostly; + +/* * The memory controller data structure. The memory controller controls both * page cache and RSS per cgroup. We would eventually like to provide * statistics based on the statistics developed by Rik Van Riel for clock-pro, @@ -186,6 +231,13 @@ struct mem_cgroup { struct mem_cgroup_stat stat; }; +/* + * Maximum loops in mem_cgroup_hierarchical_reclaim(), used for soft + * limit reclaim to prevent infinite loops, if they ever occur. + */ +#define MEM_CGROUP_MAX_RECLAIM_LOOPS (100) +#define MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS (2) + enum charge_type { MEM_CGROUP_CHARGE_TYPE_CACHE = 0, MEM_CGROUP_CHARGE_TYPE_MAPPED, @@ -200,13 +252,8 @@ enum charge_type { #define PCGF_CACHE (1UL << PCG_CACHE) #define PCGF_USED (1UL << PCG_USED) #define PCGF_LOCK (1UL << PCG_LOCK) -static const unsigned long -pcg_default_flags[NR_CHARGE_TYPE] = { - PCGF_CACHE | PCGF_USED | PCGF_LOCK, /* File Cache */ - PCGF_USED | PCGF_LOCK, /* Anon */ - PCGF_CACHE | PCGF_USED | PCGF_LOCK, /* Shmem */ - 0, /* FORCE */ -}; +/* Not used, but added here for completeness */ +#define PCGF_ACCT (1UL << PCG_ACCT) /* for encoding cft->private value on file */ #define _MEM (0) @@ -215,15 +262,241 @@ pcg_default_flags[NR_CHARGE_TYPE] = { #define MEMFILE_TYPE(val) (((val) >> 16) & 0xffff) #define MEMFILE_ATTR(val) ((val) & 0xffff) +/* + * Reclaim flags for mem_cgroup_hierarchical_reclaim + */ +#define MEM_CGROUP_RECLAIM_NOSWAP_BIT 0x0 +#define MEM_CGROUP_RECLAIM_NOSWAP (1 << MEM_CGROUP_RECLAIM_NOSWAP_BIT) +#define MEM_CGROUP_RECLAIM_SHRINK_BIT 0x1 +#define MEM_CGROUP_RECLAIM_SHRINK (1 << MEM_CGROUP_RECLAIM_SHRINK_BIT) +#define MEM_CGROUP_RECLAIM_SOFT_BIT 0x2 +#define MEM_CGROUP_RECLAIM_SOFT (1 << MEM_CGROUP_RECLAIM_SOFT_BIT) + static void mem_cgroup_get(struct mem_cgroup *mem); static void mem_cgroup_put(struct mem_cgroup *mem); static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *mem); +static struct mem_cgroup_per_zone * +mem_cgroup_zoneinfo(struct mem_cgroup *mem, int nid, int zid) +{ + return &mem->info.nodeinfo[nid]->zoneinfo[zid]; +} + +static struct mem_cgroup_per_zone * +page_cgroup_zoneinfo(struct page_cgroup *pc) +{ + struct mem_cgroup *mem = pc->mem_cgroup; + int nid = page_cgroup_nid(pc); + int zid = page_cgroup_zid(pc); + + if (!mem) + return NULL; + + return mem_cgroup_zoneinfo(mem, nid, zid); +} + +static struct mem_cgroup_tree_per_zone * +soft_limit_tree_node_zone(int nid, int zid) +{ + return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid]; +} + +static struct mem_cgroup_tree_per_zone * +soft_limit_tree_from_page(struct page *page) +{ + int nid = page_to_nid(page); + int zid = page_zonenum(page); + + return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid]; +} + +static void +__mem_cgroup_insert_exceeded(struct mem_cgroup *mem, + struct mem_cgroup_per_zone *mz, + struct mem_cgroup_tree_per_zone *mctz) +{ + struct rb_node **p = &mctz->rb_root.rb_node; + struct rb_node *parent = NULL; + struct mem_cgroup_per_zone *mz_node; + + if (mz->on_tree) + return; + + mz->usage_in_excess = res_counter_soft_limit_excess(&mem->res); + while (*p) { + parent = *p; + mz_node = rb_entry(parent, struct mem_cgroup_per_zone, + tree_node); + if (mz->usage_in_excess < mz_node->usage_in_excess) + p = &(*p)->rb_left; + /* + * We can't avoid mem cgroups that are over their soft + * limit by the same amount + */ + else if (mz->usage_in_excess >= mz_node->usage_in_excess) + p = &(*p)->rb_right; + } + rb_link_node(&mz->tree_node, parent, p); + rb_insert_color(&mz->tree_node, &mctz->rb_root); + mz->on_tree = true; +} + +static void +__mem_cgroup_remove_exceeded(struct mem_cgroup *mem, + struct mem_cgroup_per_zone *mz, + struct mem_cgroup_tree_per_zone *mctz) +{ + if (!mz->on_tree) + return; + rb_erase(&mz->tree_node, &mctz->rb_root); + mz->on_tree = false; +} + +static void +mem_cgroup_insert_exceeded(struct mem_cgroup *mem, + struct mem_cgroup_per_zone *mz, + struct mem_cgroup_tree_per_zone *mctz) +{ + spin_lock(&mctz->lock); + __mem_cgroup_insert_exceeded(mem, mz, mctz); + spin_unlock(&mctz->lock); +} + +static void +mem_cgroup_remove_exceeded(struct mem_cgroup *mem, + struct mem_cgroup_per_zone *mz, + struct mem_cgroup_tree_per_zone *mctz) +{ + spin_lock(&mctz->lock); + __mem_cgroup_remove_exceeded(mem, mz, mctz); + spin_unlock(&mctz->lock); +} + +static bool mem_cgroup_soft_limit_check(struct mem_cgroup *mem) +{ + bool ret = false; + int cpu; + s64 val; + struct mem_cgroup_stat_cpu *cpustat; + + cpu = get_cpu(); + cpustat = &mem->stat.cpustat[cpu]; + val = __mem_cgroup_stat_read_local(cpustat, MEM_CGROUP_STAT_EVENTS); + if (unlikely(val > SOFTLIMIT_EVENTS_THRESH)) { + __mem_cgroup_stat_reset_safe(cpustat, MEM_CGROUP_STAT_EVENTS); + ret = true; + } + put_cpu(); + return ret; +} + +static void mem_cgroup_update_tree(struct mem_cgroup *mem, struct page *page) +{ + unsigned long long prev_usage_in_excess, new_usage_in_excess; + bool updated_tree = false; + struct mem_cgroup_per_zone *mz; + struct mem_cgroup_tree_per_zone *mctz; + + mz = mem_cgroup_zoneinfo(mem, page_to_nid(page), page_zonenum(page)); + mctz = soft_limit_tree_from_page(page); + + /* + * We do updates in lazy mode, mem's are removed + * lazily from the per-zone, per-node rb tree + */ + prev_usage_in_excess = mz->usage_in_excess; + + new_usage_in_excess = res_counter_soft_limit_excess(&mem->res); + if (prev_usage_in_excess) { + mem_cgroup_remove_exceeded(mem, mz, mctz); + updated_tree = true; + } + if (!new_usage_in_excess) + goto done; + mem_cgroup_insert_exceeded(mem, mz, mctz); + +done: + if (updated_tree) { + spin_lock(&mctz->lock); + mz->usage_in_excess = new_usage_in_excess; + spin_unlock(&mctz->lock); + } +} + +static void mem_cgroup_remove_from_trees(struct mem_cgroup *mem) +{ + int node, zone; + struct mem_cgroup_per_zone *mz; + struct mem_cgroup_tree_per_zone *mctz; + + for_each_node_state(node, N_POSSIBLE) { + for (zone = 0; zone < MAX_NR_ZONES; zone++) { + mz = mem_cgroup_zoneinfo(mem, node, zone); + mctz = soft_limit_tree_node_zone(node, zone); + mem_cgroup_remove_exceeded(mem, mz, mctz); + } + } +} + +static inline unsigned long mem_cgroup_get_excess(struct mem_cgroup *mem) +{ + return res_counter_soft_limit_excess(&mem->res) >> PAGE_SHIFT; +} + +static struct mem_cgroup_per_zone * +__mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz) +{ + struct rb_node *rightmost = NULL; + struct mem_cgroup_per_zone *mz = NULL; + +retry: + rightmost = rb_last(&mctz->rb_root); + if (!rightmost) + goto done; /* Nothing to reclaim from */ + + mz = rb_entry(rightmost, struct mem_cgroup_per_zone, tree_node); + /* + * Remove the node now but someone else can add it back, + * we will to add it back at the end of reclaim to its correct + * position in the tree. + */ + __mem_cgroup_remove_exceeded(mz->mem, mz, mctz); + if (!res_counter_soft_limit_excess(&mz->mem->res) || + !css_tryget(&mz->mem->css)) + goto retry; +done: + return mz; +} + +static struct mem_cgroup_per_zone * +mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz) +{ + struct mem_cgroup_per_zone *mz; + + spin_lock(&mctz->lock); + mz = __mem_cgroup_largest_soft_limit_node(mctz); + spin_unlock(&mctz->lock); + return mz; +} + +static void mem_cgroup_swap_statistics(struct mem_cgroup *mem, + bool charge) +{ + int val = (charge) ? 1 : -1; + struct mem_cgroup_stat *stat = &mem->stat; + struct mem_cgroup_stat_cpu *cpustat; + int cpu = get_cpu(); + + cpustat = &stat->cpustat[cpu]; + __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_SWAPOUT, val); + put_cpu(); +} + static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, struct page_cgroup *pc, bool charge) { - int val = (charge)? 1 : -1; + int val = (charge) ? 1 : -1; struct mem_cgroup_stat *stat = &mem->stat; struct mem_cgroup_stat_cpu *cpustat; int cpu = get_cpu(); @@ -240,28 +513,10 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, else __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_PGPGOUT_COUNT, 1); + __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_EVENTS, 1); put_cpu(); } -static struct mem_cgroup_per_zone * -mem_cgroup_zoneinfo(struct mem_cgroup *mem, int nid, int zid) -{ - return &mem->info.nodeinfo[nid]->zoneinfo[zid]; -} - -static struct mem_cgroup_per_zone * -page_cgroup_zoneinfo(struct page_cgroup *pc) -{ - struct mem_cgroup *mem = pc->mem_cgroup; - int nid = page_cgroup_nid(pc); - int zid = page_cgroup_zid(pc); - - if (!mem) - return NULL; - - return mem_cgroup_zoneinfo(mem, nid, zid); -} - static unsigned long mem_cgroup_get_local_zonestat(struct mem_cgroup *mem, enum lru_list idx) { @@ -354,6 +609,11 @@ static int mem_cgroup_walk_tree(struct mem_cgroup *root, void *data, return ret; } +static inline bool mem_cgroup_is_root(struct mem_cgroup *mem) +{ + return (mem == root_mem_cgroup); +} + /* * Following LRU functions are allowed to be used without PCG_LOCK. * Operations are called by routine of global LRU independently from memcg. @@ -371,22 +631,24 @@ static int mem_cgroup_walk_tree(struct mem_cgroup *root, void *data, void mem_cgroup_del_lru_list(struct page *page, enum lru_list lru) { struct page_cgroup *pc; - struct mem_cgroup *mem; struct mem_cgroup_per_zone *mz; if (mem_cgroup_disabled()) return; pc = lookup_page_cgroup(page); /* can happen while we handle swapcache. */ - if (list_empty(&pc->lru) || !pc->mem_cgroup) + if (!TestClearPageCgroupAcctLRU(pc)) return; + VM_BUG_ON(!pc->mem_cgroup); /* * We don't check PCG_USED bit. It's cleared when the "page" is finally * removed from global LRU. */ mz = page_cgroup_zoneinfo(pc); - mem = pc->mem_cgroup; MEM_CGROUP_ZSTAT(mz, lru) -= 1; + if (mem_cgroup_is_root(pc->mem_cgroup)) + return; + VM_BUG_ON(list_empty(&pc->lru)); list_del_init(&pc->lru); return; } @@ -410,8 +672,8 @@ void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru) * For making pc->mem_cgroup visible, insert smp_rmb() here. */ smp_rmb(); - /* unused page is not rotated. */ - if (!PageCgroupUsed(pc)) + /* unused or root page is not rotated. */ + if (!PageCgroupUsed(pc) || mem_cgroup_is_root(pc->mem_cgroup)) return; mz = page_cgroup_zoneinfo(pc); list_move(&pc->lru, &mz->lists[lru]); @@ -425,6 +687,7 @@ void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru) if (mem_cgroup_disabled()) return; pc = lookup_page_cgroup(page); + VM_BUG_ON(PageCgroupAcctLRU(pc)); /* * Used bit is set without atomic ops but after smp_wmb(). * For making pc->mem_cgroup visible, insert smp_rmb() here. @@ -435,6 +698,9 @@ void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru) mz = page_cgroup_zoneinfo(pc); MEM_CGROUP_ZSTAT(mz, lru) += 1; + SetPageCgroupAcctLRU(pc); + if (mem_cgroup_is_root(pc->mem_cgroup)) + return; list_add(&pc->lru, &mz->lists[lru]); } @@ -469,7 +735,7 @@ static void mem_cgroup_lru_add_after_commit_swapcache(struct page *page) spin_lock_irqsave(&zone->lru_lock, flags); /* link when the page is linked to LRU but page_cgroup isn't */ - if (PageLRU(page) && list_empty(&pc->lru)) + if (PageLRU(page) && !PageCgroupAcctLRU(pc)) mem_cgroup_add_lru_list(page, page_lru(page)); spin_unlock_irqrestore(&zone->lru_lock, flags); } @@ -855,28 +1121,62 @@ mem_cgroup_select_victim(struct mem_cgroup *root_mem) * If shrink==true, for avoiding to free too much, this returns immedieately. */ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, - gfp_t gfp_mask, bool noswap, bool shrink) + struct zone *zone, + gfp_t gfp_mask, + unsigned long reclaim_options) { struct mem_cgroup *victim; int ret, total = 0; int loop = 0; + bool noswap = reclaim_options & MEM_CGROUP_RECLAIM_NOSWAP; + bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK; + bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT; + unsigned long excess = mem_cgroup_get_excess(root_mem); /* If memsw_is_minimum==1, swap-out is of-no-use. */ if (root_mem->memsw_is_minimum) noswap = true; - while (loop < 2) { + while (1) { victim = mem_cgroup_select_victim(root_mem); - if (victim == root_mem) + if (victim == root_mem) { loop++; + if (loop >= 2) { + /* + * If we have not been able to reclaim + * anything, it might because there are + * no reclaimable pages under this hierarchy + */ + if (!check_soft || !total) { + css_put(&victim->css); + break; + } + /* + * We want to do more targetted reclaim. + * excess >> 2 is not to excessive so as to + * reclaim too much, nor too less that we keep + * coming back to reclaim from this cgroup + */ + if (total >= (excess >> 2) || + (loop > MEM_CGROUP_MAX_RECLAIM_LOOPS)) { + css_put(&victim->css); + break; + } + } + } if (!mem_cgroup_local_usage(&victim->stat)) { /* this cgroup's local usage == 0 */ css_put(&victim->css); continue; } /* we use swappiness of local cgroup */ - ret = try_to_free_mem_cgroup_pages(victim, gfp_mask, noswap, - get_swappiness(victim)); + if (check_soft) + ret = mem_cgroup_shrink_node_zone(victim, gfp_mask, + noswap, get_swappiness(victim), zone, + zone->zone_pgdat->node_id); + else + ret = try_to_free_mem_cgroup_pages(victim, gfp_mask, + noswap, get_swappiness(victim)); css_put(&victim->css); /* * At shrinking usage, we can't check we should stop here or @@ -886,7 +1186,10 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, if (shrink) return ret; total += ret; - if (mem_cgroup_check_under_limit(root_mem)) + if (check_soft) { + if (res_counter_check_under_soft_limit(&root_mem->res)) + return total; + } else if (mem_cgroup_check_under_limit(root_mem)) return 1 + total; } return total; @@ -965,11 +1268,11 @@ done: */ static int __mem_cgroup_try_charge(struct mm_struct *mm, gfp_t gfp_mask, struct mem_cgroup **memcg, - bool oom) + bool oom, struct page *page) { - struct mem_cgroup *mem, *mem_over_limit; + struct mem_cgroup *mem, *mem_over_limit, *mem_over_soft_limit; int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; - struct res_counter *fail_res; + struct res_counter *fail_res, *soft_fail_res = NULL; if (unlikely(test_thread_flag(TIF_MEMDIE))) { /* Don't account this! */ @@ -996,20 +1299,23 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, VM_BUG_ON(css_is_removed(&mem->css)); while (1) { - int ret; - bool noswap = false; + int ret = 0; + unsigned long flags = 0; - ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res); + if (mem_cgroup_is_root(mem)) + goto done; + ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res, + &soft_fail_res); if (likely(!ret)) { if (!do_swap_account) break; ret = res_counter_charge(&mem->memsw, PAGE_SIZE, - &fail_res); + &fail_res, NULL); if (likely(!ret)) break; /* mem+swap counter fails */ - res_counter_uncharge(&mem->res, PAGE_SIZE); - noswap = true; + res_counter_uncharge(&mem->res, PAGE_SIZE, NULL); + flags |= MEM_CGROUP_RECLAIM_NOSWAP; mem_over_limit = mem_cgroup_from_res_counter(fail_res, memsw); } else @@ -1020,8 +1326,8 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, if (!(gfp_mask & __GFP_WAIT)) goto nomem; - ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, gfp_mask, - noswap, false); + ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL, + gfp_mask, flags); if (ret) continue; @@ -1046,13 +1352,24 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, goto nomem; } } + /* + * Insert just the ancestor, we should trickle down to the correct + * cgroup for reclaim, since the other nodes will be below their + * soft limit + */ + if (soft_fail_res) { + mem_over_soft_limit = + mem_cgroup_from_res_counter(soft_fail_res, res); + if (mem_cgroup_soft_limit_check(mem_over_soft_limit)) + mem_cgroup_update_tree(mem_over_soft_limit, page); + } +done: return 0; nomem: css_put(&mem->css); return -ENOMEM; } - /* * A helper function to get mem_cgroup from ID. must be called under * rcu_read_lock(). The caller must check css_is_removed() or some if @@ -1119,15 +1436,38 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, lock_page_cgroup(pc); if (unlikely(PageCgroupUsed(pc))) { unlock_page_cgroup(pc); - res_counter_uncharge(&mem->res, PAGE_SIZE); - if (do_swap_account) - res_counter_uncharge(&mem->memsw, PAGE_SIZE); + if (!mem_cgroup_is_root(mem)) { + res_counter_uncharge(&mem->res, PAGE_SIZE, NULL); + if (do_swap_account) + res_counter_uncharge(&mem->memsw, PAGE_SIZE, + NULL); + } css_put(&mem->css); return; } + pc->mem_cgroup = mem; + /* + * We access a page_cgroup asynchronously without lock_page_cgroup(). + * Especially when a page_cgroup is taken from a page, pc->mem_cgroup + * is accessed after testing USED bit. To make pc->mem_cgroup visible + * before USED bit, we need memory barrier here. + * See mem_cgroup_add_lru_list(), etc. + */ smp_wmb(); - pc->flags = pcg_default_flags[ctype]; + switch (ctype) { + case MEM_CGROUP_CHARGE_TYPE_CACHE: + case MEM_CGROUP_CHARGE_TYPE_SHMEM: + SetPageCgroupCache(pc); + SetPageCgroupUsed(pc); + break; + case MEM_CGROUP_CHARGE_TYPE_MAPPED: + ClearPageCgroupCache(pc); + SetPageCgroupUsed(pc); + break; + default: + break; + } mem_cgroup_charge_statistics(mem, pc, true); @@ -1178,7 +1518,8 @@ static int mem_cgroup_move_account(struct page_cgroup *pc, if (pc->mem_cgroup != from) goto out; - res_counter_uncharge(&from->res, PAGE_SIZE); + if (!mem_cgroup_is_root(from)) + res_counter_uncharge(&from->res, PAGE_SIZE, NULL); mem_cgroup_charge_statistics(from, pc, false); page = pc->page; @@ -1197,8 +1538,8 @@ static int mem_cgroup_move_account(struct page_cgroup *pc, 1); } - if (do_swap_account) - res_counter_uncharge(&from->memsw, PAGE_SIZE); + if (do_swap_account && !mem_cgroup_is_root(from)) + res_counter_uncharge(&from->memsw, PAGE_SIZE, NULL); css_put(&from->css); css_get(&to->css); @@ -1238,7 +1579,7 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc, parent = mem_cgroup_from_cont(pcg); - ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false); + ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false, page); if (ret || !parent) return ret; @@ -1268,9 +1609,11 @@ uncharge: /* drop extra refcnt by try_charge() */ css_put(&parent->css); /* uncharge if move fails */ - res_counter_uncharge(&parent->res, PAGE_SIZE); - if (do_swap_account) - res_counter_uncharge(&parent->memsw, PAGE_SIZE); + if (!mem_cgroup_is_root(parent)) { + res_counter_uncharge(&parent->res, PAGE_SIZE, NULL); + if (do_swap_account) + res_counter_uncharge(&parent->memsw, PAGE_SIZE, NULL); + } return ret; } @@ -1295,7 +1638,7 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, prefetchw(pc); mem = memcg; - ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, true); + ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, true, page); if (ret || !mem) return ret; @@ -1414,14 +1757,14 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm, if (!mem) goto charge_cur_mm; *ptr = mem; - ret = __mem_cgroup_try_charge(NULL, mask, ptr, true); + ret = __mem_cgroup_try_charge(NULL, mask, ptr, true, page); /* drop extra refcnt from tryget */ css_put(&mem->css); return ret; charge_cur_mm: if (unlikely(!mm)) mm = &init_mm; - return __mem_cgroup_try_charge(mm, mask, ptr, true); + return __mem_cgroup_try_charge(mm, mask, ptr, true, page); } static void @@ -1459,7 +1802,10 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr, * This recorded memcg can be obsolete one. So, avoid * calling css_tryget */ - res_counter_uncharge(&memcg->memsw, PAGE_SIZE); + if (!mem_cgroup_is_root(memcg)) + res_counter_uncharge(&memcg->memsw, PAGE_SIZE, + NULL); + mem_cgroup_swap_statistics(memcg, false); mem_cgroup_put(memcg); } rcu_read_unlock(); @@ -1484,9 +1830,11 @@ void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *mem) return; if (!mem) return; - res_counter_uncharge(&mem->res, PAGE_SIZE); - if (do_swap_account) - res_counter_uncharge(&mem->memsw, PAGE_SIZE); + if (!mem_cgroup_is_root(mem)) { + res_counter_uncharge(&mem->res, PAGE_SIZE, NULL); + if (do_swap_account) + res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL); + } css_put(&mem->css); } @@ -1500,6 +1848,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) struct page_cgroup *pc; struct mem_cgroup *mem = NULL; struct mem_cgroup_per_zone *mz; + bool soft_limit_excess = false; if (mem_cgroup_disabled()) return NULL; @@ -1538,9 +1887,14 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) break; } - res_counter_uncharge(&mem->res, PAGE_SIZE); - if (do_swap_account && (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT)) - res_counter_uncharge(&mem->memsw, PAGE_SIZE); + if (!mem_cgroup_is_root(mem)) { + res_counter_uncharge(&mem->res, PAGE_SIZE, &soft_limit_excess); + if (do_swap_account && + (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT)) + res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL); + } + if (ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) + mem_cgroup_swap_statistics(mem, true); mem_cgroup_charge_statistics(mem, pc, false); ClearPageCgroupUsed(pc); @@ -1554,6 +1908,8 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) mz = page_cgroup_zoneinfo(pc); unlock_page_cgroup(pc); + if (soft_limit_excess && mem_cgroup_soft_limit_check(mem)) + mem_cgroup_update_tree(mem, page); /* at swapout, this memcg will be accessed to record to swap */ if (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT) css_put(&mem->css); @@ -1629,7 +1985,9 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent) * We uncharge this because swap is freed. * This memcg can be obsolete one. We avoid calling css_tryget */ - res_counter_uncharge(&memcg->memsw, PAGE_SIZE); + if (!mem_cgroup_is_root(memcg)) + res_counter_uncharge(&memcg->memsw, PAGE_SIZE, NULL); + mem_cgroup_swap_statistics(memcg, false); mem_cgroup_put(memcg); } rcu_read_unlock(); @@ -1658,7 +2016,8 @@ int mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr) unlock_page_cgroup(pc); if (mem) { - ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, &mem, false); + ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, &mem, false, + page); css_put(&mem->css); } *ptr = mem; @@ -1798,8 +2157,9 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg, if (!ret) break; - progress = mem_cgroup_hierarchical_reclaim(memcg, GFP_KERNEL, - false, true); + progress = mem_cgroup_hierarchical_reclaim(memcg, NULL, + GFP_KERNEL, + MEM_CGROUP_RECLAIM_SHRINK); curusage = res_counter_read_u64(&memcg->res, RES_USAGE); /* Usage is reduced ? */ if (curusage >= oldusage) @@ -1851,7 +2211,9 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg, if (!ret) break; - mem_cgroup_hierarchical_reclaim(memcg, GFP_KERNEL, true, true); + mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL, + MEM_CGROUP_RECLAIM_NOSWAP | + MEM_CGROUP_RECLAIM_SHRINK); curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE); /* Usage is reduced ? */ if (curusage >= oldusage) @@ -1862,6 +2224,97 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg, return ret; } +unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, + gfp_t gfp_mask, int nid, + int zid) +{ + unsigned long nr_reclaimed = 0; + struct mem_cgroup_per_zone *mz, *next_mz = NULL; + unsigned long reclaimed; + int loop = 0; + struct mem_cgroup_tree_per_zone *mctz; + + if (order > 0) + return 0; + + mctz = soft_limit_tree_node_zone(nid, zid); + /* + * This loop can run a while, specially if mem_cgroup's continuously + * keep exceeding their soft limit and putting the system under + * pressure + */ + do { + if (next_mz) + mz = next_mz; + else + mz = mem_cgroup_largest_soft_limit_node(mctz); + if (!mz) + break; + + reclaimed = mem_cgroup_hierarchical_reclaim(mz->mem, zone, + gfp_mask, + MEM_CGROUP_RECLAIM_SOFT); + nr_reclaimed += reclaimed; + spin_lock(&mctz->lock); + + /* + * If we failed to reclaim anything from this memory cgroup + * it is time to move on to the next cgroup + */ + next_mz = NULL; + if (!reclaimed) { + do { + /* + * Loop until we find yet another one. + * + * By the time we get the soft_limit lock + * again, someone might have aded the + * group back on the RB tree. Iterate to + * make sure we get a different mem. + * mem_cgroup_largest_soft_limit_node returns + * NULL if no other cgroup is present on + * the tree + */ + next_mz = + __mem_cgroup_largest_soft_limit_node(mctz); + if (next_mz == mz) { + css_put(&next_mz->mem->css); + next_mz = NULL; + } else /* next_mz == NULL or other memcg */ + break; + } while (1); + } + mz->usage_in_excess = + res_counter_soft_limit_excess(&mz->mem->res); + __mem_cgroup_remove_exceeded(mz->mem, mz, mctz); + /* + * One school of thought says that we should not add + * back the node to the tree if reclaim returns 0. + * But our reclaim could return 0, simply because due + * to priority we are exposing a smaller subset of + * memory to reclaim from. Consider this as a longer + * term TODO. + */ + if (mz->usage_in_excess) + __mem_cgroup_insert_exceeded(mz->mem, mz, mctz); + spin_unlock(&mctz->lock); + css_put(&mz->mem->css); + loop++; + /* + * Could not reclaim anything and there are no more + * mem cgroups to try or we seem to be looping without + * reclaiming anything. + */ + if (!nr_reclaimed && + (next_mz == NULL || + loop > MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS)) + break; + } while (!nr_reclaimed); + if (next_mz) + css_put(&next_mz->mem->css); + return nr_reclaimed; +} + /* * This routine traverse page_cgroup in given list and drop them all. * *And* this routine doesn't reclaim page itself, just removes page_cgroup. @@ -2046,20 +2499,64 @@ static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft, return retval; } +struct mem_cgroup_idx_data { + s64 val; + enum mem_cgroup_stat_index idx; +}; + +static int +mem_cgroup_get_idx_stat(struct mem_cgroup *mem, void *data) +{ + struct mem_cgroup_idx_data *d = data; + d->val += mem_cgroup_read_stat(&mem->stat, d->idx); + return 0; +} + +static void +mem_cgroup_get_recursive_idx_stat(struct mem_cgroup *mem, + enum mem_cgroup_stat_index idx, s64 *val) +{ + struct mem_cgroup_idx_data d; + d.idx = idx; + d.val = 0; + mem_cgroup_walk_tree(mem, &d, mem_cgroup_get_idx_stat); + *val = d.val; +} + static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft) { struct mem_cgroup *mem = mem_cgroup_from_cont(cont); - u64 val = 0; + u64 idx_val, val; int type, name; type = MEMFILE_TYPE(cft->private); name = MEMFILE_ATTR(cft->private); switch (type) { case _MEM: - val = res_counter_read_u64(&mem->res, name); + if (name == RES_USAGE && mem_cgroup_is_root(mem)) { + mem_cgroup_get_recursive_idx_stat(mem, + MEM_CGROUP_STAT_CACHE, &idx_val); + val = idx_val; + mem_cgroup_get_recursive_idx_stat(mem, + MEM_CGROUP_STAT_RSS, &idx_val); + val += idx_val; + val <<= PAGE_SHIFT; + } else + val = res_counter_read_u64(&mem->res, name); break; case _MEMSWAP: - val = res_counter_read_u64(&mem->memsw, name); + if (name == RES_USAGE && mem_cgroup_is_root(mem)) { + mem_cgroup_get_recursive_idx_stat(mem, + MEM_CGROUP_STAT_CACHE, &idx_val); + val = idx_val; + mem_cgroup_get_recursive_idx_stat(mem, + MEM_CGROUP_STAT_RSS, &idx_val); + val += idx_val; + mem_cgroup_get_recursive_idx_stat(mem, + MEM_CGROUP_STAT_SWAPOUT, &idx_val); + val <<= PAGE_SHIFT; + } else + val = res_counter_read_u64(&mem->memsw, name); break; default: BUG(); @@ -2083,6 +2580,10 @@ static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft, name = MEMFILE_ATTR(cft->private); switch (name) { case RES_LIMIT: + if (mem_cgroup_is_root(memcg)) { /* Can't set limit on root */ + ret = -EINVAL; + break; + } /* This function does all necessary parse...reuse it */ ret = res_counter_memparse_write_strategy(buffer, &val); if (ret) @@ -2092,6 +2593,20 @@ static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft, else ret = mem_cgroup_resize_memsw_limit(memcg, val); break; + case RES_SOFT_LIMIT: + ret = res_counter_memparse_write_strategy(buffer, &val); + if (ret) + break; + /* + * For memsw, soft limits are hard to implement in terms + * of semantics, for now, we support soft limits for + * control without swap + */ + if (type == _MEM) + ret = res_counter_set_soft_limit(&memcg->res, val); + else + ret = -EINVAL; + break; default: ret = -EINVAL; /* should be BUG() ? */ break; @@ -2149,6 +2664,7 @@ static int mem_cgroup_reset(struct cgroup *cont, unsigned int event) res_counter_reset_failcnt(&mem->memsw); break; } + return 0; } @@ -2160,6 +2676,7 @@ enum { MCS_MAPPED_FILE, MCS_PGPGIN, MCS_PGPGOUT, + MCS_SWAP, MCS_INACTIVE_ANON, MCS_ACTIVE_ANON, MCS_INACTIVE_FILE, @@ -2181,6 +2698,7 @@ struct { {"mapped_file", "total_mapped_file"}, {"pgpgin", "total_pgpgin"}, {"pgpgout", "total_pgpgout"}, + {"swap", "total_swap"}, {"inactive_anon", "total_inactive_anon"}, {"active_anon", "total_active_anon"}, {"inactive_file", "total_inactive_file"}, @@ -2205,6 +2723,10 @@ static int mem_cgroup_get_local_stat(struct mem_cgroup *mem, void *data) s->stat[MCS_PGPGIN] += val; val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_PGPGOUT_COUNT); s->stat[MCS_PGPGOUT] += val; + if (do_swap_account) { + val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_SWAPOUT); + s->stat[MCS_SWAP] += val * PAGE_SIZE; + } /* per zone stat */ val = mem_cgroup_get_local_zonestat(mem, LRU_INACTIVE_ANON); @@ -2236,8 +2758,11 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft, memset(&mystat, 0, sizeof(mystat)); mem_cgroup_get_local_stat(mem_cont, &mystat); - for (i = 0; i < NR_MCS_STAT; i++) + for (i = 0; i < NR_MCS_STAT; i++) { + if (i == MCS_SWAP && !do_swap_account) + continue; cb->fill(cb, memcg_stat_strings[i].local_name, mystat.stat[i]); + } /* Hierarchical information */ { @@ -2250,9 +2775,11 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft, memset(&mystat, 0, sizeof(mystat)); mem_cgroup_get_total_stat(mem_cont, &mystat); - for (i = 0; i < NR_MCS_STAT; i++) + for (i = 0; i < NR_MCS_STAT; i++) { + if (i == MCS_SWAP && !do_swap_account) + continue; cb->fill(cb, memcg_stat_strings[i].total_name, mystat.stat[i]); - + } #ifdef CONFIG_DEBUG_VM cb->fill(cb, "inactive_ratio", calc_inactive_ratio(mem_cont, NULL)); @@ -2345,6 +2872,12 @@ static struct cftype mem_cgroup_files[] = { .read_u64 = mem_cgroup_read, }, { + .name = "soft_limit_in_bytes", + .private = MEMFILE_PRIVATE(_MEM, RES_SOFT_LIMIT), + .write_string = mem_cgroup_write, + .read_u64 = mem_cgroup_read, + }, + { .name = "failcnt", .private = MEMFILE_PRIVATE(_MEM, RES_FAILCNT), .trigger = mem_cgroup_reset, @@ -2438,6 +2971,9 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node) mz = &pn->zoneinfo[zone]; for_each_lru(l) INIT_LIST_HEAD(&mz->lists[l]); + mz->usage_in_excess = 0; + mz->on_tree = false; + mz->mem = mem; } return 0; } @@ -2483,6 +3019,7 @@ static void __mem_cgroup_free(struct mem_cgroup *mem) { int node; + mem_cgroup_remove_from_trees(mem); free_css_id(&mem_cgroup_subsys, &mem->css); for_each_node_state(node, N_POSSIBLE) @@ -2531,6 +3068,31 @@ static void __init enable_swap_cgroup(void) } #endif +static int mem_cgroup_soft_limit_tree_init(void) +{ + struct mem_cgroup_tree_per_node *rtpn; + struct mem_cgroup_tree_per_zone *rtpz; + int tmp, node, zone; + + for_each_node_state(node, N_POSSIBLE) { + tmp = node; + if (!node_state(node, N_NORMAL_MEMORY)) + tmp = -1; + rtpn = kzalloc_node(sizeof(*rtpn), GFP_KERNEL, tmp); + if (!rtpn) + return 1; + + soft_limit_tree.rb_tree_per_node[node] = rtpn; + + for (zone = 0; zone < MAX_NR_ZONES; zone++) { + rtpz = &rtpn->rb_tree_per_zone[zone]; + rtpz->rb_root = RB_ROOT; + spin_lock_init(&rtpz->lock); + } + } + return 0; +} + static struct cgroup_subsys_state * __ref mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) { @@ -2545,10 +3107,15 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) for_each_node_state(node, N_POSSIBLE) if (alloc_mem_cgroup_per_zone_info(mem, node)) goto free_out; + /* root ? */ if (cont->parent == NULL) { enable_swap_cgroup(); parent = NULL; + root_mem_cgroup = mem; + if (mem_cgroup_soft_limit_tree_init()) + goto free_out; + } else { parent = mem_cgroup_from_cont(cont->parent); mem->use_hierarchy = parent->use_hierarchy; @@ -2577,6 +3144,7 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) return &mem->css; free_out: __mem_cgroup_free(mem); + root_mem_cgroup = NULL; return ERR_PTR(error); } @@ -2612,7 +3180,8 @@ static int mem_cgroup_populate(struct cgroup_subsys *ss, static void mem_cgroup_move_task(struct cgroup_subsys *ss, struct cgroup *cont, struct cgroup *old_cont, - struct task_struct *p) + struct task_struct *p, + bool threadgroup) { mutex_lock(&memcg_tasklist); /* diff --git a/mm/memory-failure.c b/mm/memory-failure.c new file mode 100644 index 00000000000..729d4b15b64 --- /dev/null +++ b/mm/memory-failure.c @@ -0,0 +1,832 @@ +/* + * Copyright (C) 2008, 2009 Intel Corporation + * Authors: Andi Kleen, Fengguang Wu + * + * This software may be redistributed and/or modified under the terms of + * the GNU General Public License ("GPL") version 2 only as published by the + * Free Software Foundation. + * + * High level machine check handler. Handles pages reported by the + * hardware as being corrupted usually due to a 2bit ECC memory or cache + * failure. + * + * Handles page cache pages in various states. The tricky part + * here is that we can access any page asynchronous to other VM + * users, because memory failures could happen anytime and anywhere, + * possibly violating some of their assumptions. This is why this code + * has to be extremely careful. Generally it tries to use normal locking + * rules, as in get the standard locks, even if that means the + * error handling takes potentially a long time. + * + * The operation to map back from RMAP chains to processes has to walk + * the complete process list and has non linear complexity with the number + * mappings. In short it can be quite slow. But since memory corruptions + * are rare we hope to get away with this. + */ + +/* + * Notebook: + * - hugetlb needs more code + * - kcore/oldmem/vmcore/mem/kmem check for hwpoison pages + * - pass bad pages to kdump next kernel + */ +#define DEBUG 1 /* remove me in 2.6.34 */ +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/page-flags.h> +#include <linux/sched.h> +#include <linux/rmap.h> +#include <linux/pagemap.h> +#include <linux/swap.h> +#include <linux/backing-dev.h> +#include "internal.h" + +int sysctl_memory_failure_early_kill __read_mostly = 0; + +int sysctl_memory_failure_recovery __read_mostly = 1; + +atomic_long_t mce_bad_pages __read_mostly = ATOMIC_LONG_INIT(0); + +/* + * Send all the processes who have the page mapped an ``action optional'' + * signal. + */ +static int kill_proc_ao(struct task_struct *t, unsigned long addr, int trapno, + unsigned long pfn) +{ + struct siginfo si; + int ret; + + printk(KERN_ERR + "MCE %#lx: Killing %s:%d early due to hardware memory corruption\n", + pfn, t->comm, t->pid); + si.si_signo = SIGBUS; + si.si_errno = 0; + si.si_code = BUS_MCEERR_AO; + si.si_addr = (void *)addr; +#ifdef __ARCH_SI_TRAPNO + si.si_trapno = trapno; +#endif + si.si_addr_lsb = PAGE_SHIFT; + /* + * Don't use force here, it's convenient if the signal + * can be temporarily blocked. + * This could cause a loop when the user sets SIGBUS + * to SIG_IGN, but hopefully noone will do that? + */ + ret = send_sig_info(SIGBUS, &si, t); /* synchronous? */ + if (ret < 0) + printk(KERN_INFO "MCE: Error sending signal to %s:%d: %d\n", + t->comm, t->pid, ret); + return ret; +} + +/* + * Kill all processes that have a poisoned page mapped and then isolate + * the page. + * + * General strategy: + * Find all processes having the page mapped and kill them. + * But we keep a page reference around so that the page is not + * actually freed yet. + * Then stash the page away + * + * There's no convenient way to get back to mapped processes + * from the VMAs. So do a brute-force search over all + * running processes. + * + * Remember that machine checks are not common (or rather + * if they are common you have other problems), so this shouldn't + * be a performance issue. + * + * Also there are some races possible while we get from the + * error detection to actually handle it. + */ + +struct to_kill { + struct list_head nd; + struct task_struct *tsk; + unsigned long addr; + unsigned addr_valid:1; +}; + +/* + * Failure handling: if we can't find or can't kill a process there's + * not much we can do. We just print a message and ignore otherwise. + */ + +/* + * Schedule a process for later kill. + * Uses GFP_ATOMIC allocations to avoid potential recursions in the VM. + * TBD would GFP_NOIO be enough? + */ +static void add_to_kill(struct task_struct *tsk, struct page *p, + struct vm_area_struct *vma, + struct list_head *to_kill, + struct to_kill **tkc) +{ + struct to_kill *tk; + + if (*tkc) { + tk = *tkc; + *tkc = NULL; + } else { + tk = kmalloc(sizeof(struct to_kill), GFP_ATOMIC); + if (!tk) { + printk(KERN_ERR + "MCE: Out of memory while machine check handling\n"); + return; + } + } + tk->addr = page_address_in_vma(p, vma); + tk->addr_valid = 1; + + /* + * In theory we don't have to kill when the page was + * munmaped. But it could be also a mremap. Since that's + * likely very rare kill anyways just out of paranoia, but use + * a SIGKILL because the error is not contained anymore. + */ + if (tk->addr == -EFAULT) { + pr_debug("MCE: Unable to find user space address %lx in %s\n", + page_to_pfn(p), tsk->comm); + tk->addr_valid = 0; + } + get_task_struct(tsk); + tk->tsk = tsk; + list_add_tail(&tk->nd, to_kill); +} + +/* + * Kill the processes that have been collected earlier. + * + * Only do anything when DOIT is set, otherwise just free the list + * (this is used for clean pages which do not need killing) + * Also when FAIL is set do a force kill because something went + * wrong earlier. + */ +static void kill_procs_ao(struct list_head *to_kill, int doit, int trapno, + int fail, unsigned long pfn) +{ + struct to_kill *tk, *next; + + list_for_each_entry_safe (tk, next, to_kill, nd) { + if (doit) { + /* + * In case something went wrong with munmaping + * make sure the process doesn't catch the + * signal and then access the memory. Just kill it. + * the signal handlers + */ + if (fail || tk->addr_valid == 0) { + printk(KERN_ERR + "MCE %#lx: forcibly killing %s:%d because of failure to unmap corrupted page\n", + pfn, tk->tsk->comm, tk->tsk->pid); + force_sig(SIGKILL, tk->tsk); + } + + /* + * In theory the process could have mapped + * something else on the address in-between. We could + * check for that, but we need to tell the + * process anyways. + */ + else if (kill_proc_ao(tk->tsk, tk->addr, trapno, + pfn) < 0) + printk(KERN_ERR + "MCE %#lx: Cannot send advisory machine check signal to %s:%d\n", + pfn, tk->tsk->comm, tk->tsk->pid); + } + put_task_struct(tk->tsk); + kfree(tk); + } +} + +static int task_early_kill(struct task_struct *tsk) +{ + if (!tsk->mm) + return 0; + if (tsk->flags & PF_MCE_PROCESS) + return !!(tsk->flags & PF_MCE_EARLY); + return sysctl_memory_failure_early_kill; +} + +/* + * Collect processes when the error hit an anonymous page. + */ +static void collect_procs_anon(struct page *page, struct list_head *to_kill, + struct to_kill **tkc) +{ + struct vm_area_struct *vma; + struct task_struct *tsk; + struct anon_vma *av; + + read_lock(&tasklist_lock); + av = page_lock_anon_vma(page); + if (av == NULL) /* Not actually mapped anymore */ + goto out; + for_each_process (tsk) { + if (!task_early_kill(tsk)) + continue; + list_for_each_entry (vma, &av->head, anon_vma_node) { + if (!page_mapped_in_vma(page, vma)) + continue; + if (vma->vm_mm == tsk->mm) + add_to_kill(tsk, page, vma, to_kill, tkc); + } + } + page_unlock_anon_vma(av); +out: + read_unlock(&tasklist_lock); +} + +/* + * Collect processes when the error hit a file mapped page. + */ +static void collect_procs_file(struct page *page, struct list_head *to_kill, + struct to_kill **tkc) +{ + struct vm_area_struct *vma; + struct task_struct *tsk; + struct prio_tree_iter iter; + struct address_space *mapping = page->mapping; + + /* + * A note on the locking order between the two locks. + * We don't rely on this particular order. + * If you have some other code that needs a different order + * feel free to switch them around. Or add a reverse link + * from mm_struct to task_struct, then this could be all + * done without taking tasklist_lock and looping over all tasks. + */ + + read_lock(&tasklist_lock); + spin_lock(&mapping->i_mmap_lock); + for_each_process(tsk) { + pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); + + if (!task_early_kill(tsk)) + continue; + + vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, + pgoff) { + /* + * Send early kill signal to tasks where a vma covers + * the page but the corrupted page is not necessarily + * mapped it in its pte. + * Assume applications who requested early kill want + * to be informed of all such data corruptions. + */ + if (vma->vm_mm == tsk->mm) + add_to_kill(tsk, page, vma, to_kill, tkc); + } + } + spin_unlock(&mapping->i_mmap_lock); + read_unlock(&tasklist_lock); +} + +/* + * Collect the processes who have the corrupted page mapped to kill. + * This is done in two steps for locking reasons. + * First preallocate one tokill structure outside the spin locks, + * so that we can kill at least one process reasonably reliable. + */ +static void collect_procs(struct page *page, struct list_head *tokill) +{ + struct to_kill *tk; + + if (!page->mapping) + return; + + tk = kmalloc(sizeof(struct to_kill), GFP_NOIO); + if (!tk) + return; + if (PageAnon(page)) + collect_procs_anon(page, tokill, &tk); + else + collect_procs_file(page, tokill, &tk); + kfree(tk); +} + +/* + * Error handlers for various types of pages. + */ + +enum outcome { + FAILED, /* Error handling failed */ + DELAYED, /* Will be handled later */ + IGNORED, /* Error safely ignored */ + RECOVERED, /* Successfully recovered */ +}; + +static const char *action_name[] = { + [FAILED] = "Failed", + [DELAYED] = "Delayed", + [IGNORED] = "Ignored", + [RECOVERED] = "Recovered", +}; + +/* + * Error hit kernel page. + * Do nothing, try to be lucky and not touch this instead. For a few cases we + * could be more sophisticated. + */ +static int me_kernel(struct page *p, unsigned long pfn) +{ + return DELAYED; +} + +/* + * Already poisoned page. + */ +static int me_ignore(struct page *p, unsigned long pfn) +{ + return IGNORED; +} + +/* + * Page in unknown state. Do nothing. + */ +static int me_unknown(struct page *p, unsigned long pfn) +{ + printk(KERN_ERR "MCE %#lx: Unknown page state\n", pfn); + return FAILED; +} + +/* + * Free memory + */ +static int me_free(struct page *p, unsigned long pfn) +{ + return DELAYED; +} + +/* + * Clean (or cleaned) page cache page. + */ +static int me_pagecache_clean(struct page *p, unsigned long pfn) +{ + int err; + int ret = FAILED; + struct address_space *mapping; + + if (!isolate_lru_page(p)) + page_cache_release(p); + + /* + * For anonymous pages we're done the only reference left + * should be the one m_f() holds. + */ + if (PageAnon(p)) + return RECOVERED; + + /* + * Now truncate the page in the page cache. This is really + * more like a "temporary hole punch" + * Don't do this for block devices when someone else + * has a reference, because it could be file system metadata + * and that's not safe to truncate. + */ + mapping = page_mapping(p); + if (!mapping) { + /* + * Page has been teared down in the meanwhile + */ + return FAILED; + } + + /* + * Truncation is a bit tricky. Enable it per file system for now. + * + * Open: to take i_mutex or not for this? Right now we don't. + */ + if (mapping->a_ops->error_remove_page) { + err = mapping->a_ops->error_remove_page(mapping, p); + if (err != 0) { + printk(KERN_INFO "MCE %#lx: Failed to punch page: %d\n", + pfn, err); + } else if (page_has_private(p) && + !try_to_release_page(p, GFP_NOIO)) { + pr_debug("MCE %#lx: failed to release buffers\n", pfn); + } else { + ret = RECOVERED; + } + } else { + /* + * If the file system doesn't support it just invalidate + * This fails on dirty or anything with private pages + */ + if (invalidate_inode_page(p)) + ret = RECOVERED; + else + printk(KERN_INFO "MCE %#lx: Failed to invalidate\n", + pfn); + } + return ret; +} + +/* + * Dirty cache page page + * Issues: when the error hit a hole page the error is not properly + * propagated. + */ +static int me_pagecache_dirty(struct page *p, unsigned long pfn) +{ + struct address_space *mapping = page_mapping(p); + + SetPageError(p); + /* TBD: print more information about the file. */ + if (mapping) { + /* + * IO error will be reported by write(), fsync(), etc. + * who check the mapping. + * This way the application knows that something went + * wrong with its dirty file data. + * + * There's one open issue: + * + * The EIO will be only reported on the next IO + * operation and then cleared through the IO map. + * Normally Linux has two mechanisms to pass IO error + * first through the AS_EIO flag in the address space + * and then through the PageError flag in the page. + * Since we drop pages on memory failure handling the + * only mechanism open to use is through AS_AIO. + * + * This has the disadvantage that it gets cleared on + * the first operation that returns an error, while + * the PageError bit is more sticky and only cleared + * when the page is reread or dropped. If an + * application assumes it will always get error on + * fsync, but does other operations on the fd before + * and the page is dropped inbetween then the error + * will not be properly reported. + * + * This can already happen even without hwpoisoned + * pages: first on metadata IO errors (which only + * report through AS_EIO) or when the page is dropped + * at the wrong time. + * + * So right now we assume that the application DTRT on + * the first EIO, but we're not worse than other parts + * of the kernel. + */ + mapping_set_error(mapping, EIO); + } + + return me_pagecache_clean(p, pfn); +} + +/* + * Clean and dirty swap cache. + * + * Dirty swap cache page is tricky to handle. The page could live both in page + * cache and swap cache(ie. page is freshly swapped in). So it could be + * referenced concurrently by 2 types of PTEs: + * normal PTEs and swap PTEs. We try to handle them consistently by calling + * try_to_unmap(TTU_IGNORE_HWPOISON) to convert the normal PTEs to swap PTEs, + * and then + * - clear dirty bit to prevent IO + * - remove from LRU + * - but keep in the swap cache, so that when we return to it on + * a later page fault, we know the application is accessing + * corrupted data and shall be killed (we installed simple + * interception code in do_swap_page to catch it). + * + * Clean swap cache pages can be directly isolated. A later page fault will + * bring in the known good data from disk. + */ +static int me_swapcache_dirty(struct page *p, unsigned long pfn) +{ + int ret = FAILED; + + ClearPageDirty(p); + /* Trigger EIO in shmem: */ + ClearPageUptodate(p); + + if (!isolate_lru_page(p)) { + page_cache_release(p); + ret = DELAYED; + } + + return ret; +} + +static int me_swapcache_clean(struct page *p, unsigned long pfn) +{ + int ret = FAILED; + + if (!isolate_lru_page(p)) { + page_cache_release(p); + ret = RECOVERED; + } + delete_from_swap_cache(p); + return ret; +} + +/* + * Huge pages. Needs work. + * Issues: + * No rmap support so we cannot find the original mapper. In theory could walk + * all MMs and look for the mappings, but that would be non atomic and racy. + * Need rmap for hugepages for this. Alternatively we could employ a heuristic, + * like just walking the current process and hoping it has it mapped (that + * should be usually true for the common "shared database cache" case) + * Should handle free huge pages and dequeue them too, but this needs to + * handle huge page accounting correctly. + */ +static int me_huge_page(struct page *p, unsigned long pfn) +{ + return FAILED; +} + +/* + * Various page states we can handle. + * + * A page state is defined by its current page->flags bits. + * The table matches them in order and calls the right handler. + * + * This is quite tricky because we can access page at any time + * in its live cycle, so all accesses have to be extremly careful. + * + * This is not complete. More states could be added. + * For any missing state don't attempt recovery. + */ + +#define dirty (1UL << PG_dirty) +#define sc (1UL << PG_swapcache) +#define unevict (1UL << PG_unevictable) +#define mlock (1UL << PG_mlocked) +#define writeback (1UL << PG_writeback) +#define lru (1UL << PG_lru) +#define swapbacked (1UL << PG_swapbacked) +#define head (1UL << PG_head) +#define tail (1UL << PG_tail) +#define compound (1UL << PG_compound) +#define slab (1UL << PG_slab) +#define buddy (1UL << PG_buddy) +#define reserved (1UL << PG_reserved) + +static struct page_state { + unsigned long mask; + unsigned long res; + char *msg; + int (*action)(struct page *p, unsigned long pfn); +} error_states[] = { + { reserved, reserved, "reserved kernel", me_ignore }, + { buddy, buddy, "free kernel", me_free }, + + /* + * Could in theory check if slab page is free or if we can drop + * currently unused objects without touching them. But just + * treat it as standard kernel for now. + */ + { slab, slab, "kernel slab", me_kernel }, + +#ifdef CONFIG_PAGEFLAGS_EXTENDED + { head, head, "huge", me_huge_page }, + { tail, tail, "huge", me_huge_page }, +#else + { compound, compound, "huge", me_huge_page }, +#endif + + { sc|dirty, sc|dirty, "swapcache", me_swapcache_dirty }, + { sc|dirty, sc, "swapcache", me_swapcache_clean }, + + { unevict|dirty, unevict|dirty, "unevictable LRU", me_pagecache_dirty}, + { unevict, unevict, "unevictable LRU", me_pagecache_clean}, + +#ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT + { mlock|dirty, mlock|dirty, "mlocked LRU", me_pagecache_dirty }, + { mlock, mlock, "mlocked LRU", me_pagecache_clean }, +#endif + + { lru|dirty, lru|dirty, "LRU", me_pagecache_dirty }, + { lru|dirty, lru, "clean LRU", me_pagecache_clean }, + { swapbacked, swapbacked, "anonymous", me_pagecache_clean }, + + /* + * Catchall entry: must be at end. + */ + { 0, 0, "unknown page state", me_unknown }, +}; + +#undef lru + +static void action_result(unsigned long pfn, char *msg, int result) +{ + struct page *page = NULL; + if (pfn_valid(pfn)) + page = pfn_to_page(pfn); + + printk(KERN_ERR "MCE %#lx: %s%s page recovery: %s\n", + pfn, + page && PageDirty(page) ? "dirty " : "", + msg, action_name[result]); +} + +static int page_action(struct page_state *ps, struct page *p, + unsigned long pfn, int ref) +{ + int result; + + result = ps->action(p, pfn); + action_result(pfn, ps->msg, result); + if (page_count(p) != 1 + ref) + printk(KERN_ERR + "MCE %#lx: %s page still referenced by %d users\n", + pfn, ps->msg, page_count(p) - 1); + + /* Could do more checks here if page looks ok */ + /* + * Could adjust zone counters here to correct for the missing page. + */ + + return result == RECOVERED ? 0 : -EBUSY; +} + +#define N_UNMAP_TRIES 5 + +/* + * Do all that is necessary to remove user space mappings. Unmap + * the pages and send SIGBUS to the processes if the data was dirty. + */ +static void hwpoison_user_mappings(struct page *p, unsigned long pfn, + int trapno) +{ + enum ttu_flags ttu = TTU_UNMAP | TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS; + struct address_space *mapping; + LIST_HEAD(tokill); + int ret; + int i; + int kill = 1; + + if (PageReserved(p) || PageCompound(p) || PageSlab(p)) + return; + + if (!PageLRU(p)) + lru_add_drain_all(); + + /* + * This check implies we don't kill processes if their pages + * are in the swap cache early. Those are always late kills. + */ + if (!page_mapped(p)) + return; + + if (PageSwapCache(p)) { + printk(KERN_ERR + "MCE %#lx: keeping poisoned page in swap cache\n", pfn); + ttu |= TTU_IGNORE_HWPOISON; + } + + /* + * Propagate the dirty bit from PTEs to struct page first, because we + * need this to decide if we should kill or just drop the page. + */ + mapping = page_mapping(p); + if (!PageDirty(p) && mapping && mapping_cap_writeback_dirty(mapping)) { + if (page_mkclean(p)) { + SetPageDirty(p); + } else { + kill = 0; + ttu |= TTU_IGNORE_HWPOISON; + printk(KERN_INFO + "MCE %#lx: corrupted page was clean: dropped without side effects\n", + pfn); + } + } + + /* + * First collect all the processes that have the page + * mapped in dirty form. This has to be done before try_to_unmap, + * because ttu takes the rmap data structures down. + * + * Error handling: We ignore errors here because + * there's nothing that can be done. + */ + if (kill) + collect_procs(p, &tokill); + + /* + * try_to_unmap can fail temporarily due to races. + * Try a few times (RED-PEN better strategy?) + */ + for (i = 0; i < N_UNMAP_TRIES; i++) { + ret = try_to_unmap(p, ttu); + if (ret == SWAP_SUCCESS) + break; + pr_debug("MCE %#lx: try_to_unmap retry needed %d\n", pfn, ret); + } + + if (ret != SWAP_SUCCESS) + printk(KERN_ERR "MCE %#lx: failed to unmap page (mapcount=%d)\n", + pfn, page_mapcount(p)); + + /* + * Now that the dirty bit has been propagated to the + * struct page and all unmaps done we can decide if + * killing is needed or not. Only kill when the page + * was dirty, otherwise the tokill list is merely + * freed. When there was a problem unmapping earlier + * use a more force-full uncatchable kill to prevent + * any accesses to the poisoned memory. + */ + kill_procs_ao(&tokill, !!PageDirty(p), trapno, + ret != SWAP_SUCCESS, pfn); +} + +int __memory_failure(unsigned long pfn, int trapno, int ref) +{ + struct page_state *ps; + struct page *p; + int res; + + if (!sysctl_memory_failure_recovery) + panic("Memory failure from trap %d on page %lx", trapno, pfn); + + if (!pfn_valid(pfn)) { + action_result(pfn, "memory outside kernel control", IGNORED); + return -EIO; + } + + p = pfn_to_page(pfn); + if (TestSetPageHWPoison(p)) { + action_result(pfn, "already hardware poisoned", IGNORED); + return 0; + } + + atomic_long_add(1, &mce_bad_pages); + + /* + * We need/can do nothing about count=0 pages. + * 1) it's a free page, and therefore in safe hand: + * prep_new_page() will be the gate keeper. + * 2) it's part of a non-compound high order page. + * Implies some kernel user: cannot stop them from + * R/W the page; let's pray that the page has been + * used and will be freed some time later. + * In fact it's dangerous to directly bump up page count from 0, + * that may make page_freeze_refs()/page_unfreeze_refs() mismatch. + */ + if (!get_page_unless_zero(compound_head(p))) { + action_result(pfn, "free or high order kernel", IGNORED); + return PageBuddy(compound_head(p)) ? 0 : -EBUSY; + } + + /* + * Lock the page and wait for writeback to finish. + * It's very difficult to mess with pages currently under IO + * and in many cases impossible, so we just avoid it here. + */ + lock_page_nosync(p); + wait_on_page_writeback(p); + + /* + * Now take care of user space mappings. + */ + hwpoison_user_mappings(p, pfn, trapno); + + /* + * Torn down by someone else? + */ + if (PageLRU(p) && !PageSwapCache(p) && p->mapping == NULL) { + action_result(pfn, "already truncated LRU", IGNORED); + res = 0; + goto out; + } + + res = -EBUSY; + for (ps = error_states;; ps++) { + if ((p->flags & ps->mask) == ps->res) { + res = page_action(ps, p, pfn, ref); + break; + } + } +out: + unlock_page(p); + return res; +} +EXPORT_SYMBOL_GPL(__memory_failure); + +/** + * memory_failure - Handle memory failure of a page. + * @pfn: Page Number of the corrupted page + * @trapno: Trap number reported in the signal to user space. + * + * This function is called by the low level machine check code + * of an architecture when it detects hardware memory corruption + * of a page. It tries its best to recover, which includes + * dropping pages, killing processes etc. + * + * The function is primarily of use for corruptions that + * happen outside the current execution context (e.g. when + * detected by a background scrubber) + * + * Must run in process context (e.g. a work queue) with interrupts + * enabled and no spinlocks hold. + */ +void memory_failure(unsigned long pfn, int trapno) +{ + __memory_failure(pfn, trapno, 0); +} diff --git a/mm/memory.c b/mm/memory.c index b1443ac07c0..7e91b5f9f69 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -297,7 +297,8 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long addr = vma->vm_start; /* - * Hide vma from rmap and vmtruncate before freeing pgtables + * Hide vma from rmap and truncate_pagecache before freeing + * pgtables */ anon_vma_unlink(vma); unlink_file_vma(vma); @@ -1325,7 +1326,8 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, if (ret & VM_FAULT_ERROR) { if (ret & VM_FAULT_OOM) return i ? i : -ENOMEM; - else if (ret & VM_FAULT_SIGBUS) + if (ret & + (VM_FAULT_HWPOISON|VM_FAULT_SIGBUS)) return i ? i : -EFAULT; BUG(); } @@ -2407,7 +2409,7 @@ restart: * @mapping: the address space containing mmaps to be unmapped. * @holebegin: byte in first page to unmap, relative to the start of * the underlying file. This will be rounded down to a PAGE_SIZE - * boundary. Note that this is different from vmtruncate(), which + * boundary. Note that this is different from truncate_pagecache(), which * must keep the partial page. In contrast, we must get rid of * partial pages. * @holelen: size of prospective hole in bytes. This will be rounded @@ -2458,63 +2460,6 @@ void unmap_mapping_range(struct address_space *mapping, } EXPORT_SYMBOL(unmap_mapping_range); -/** - * vmtruncate - unmap mappings "freed" by truncate() syscall - * @inode: inode of the file used - * @offset: file offset to start truncating - * - * NOTE! We have to be ready to update the memory sharing - * between the file and the memory map for a potential last - * incomplete page. Ugly, but necessary. - */ -int vmtruncate(struct inode * inode, loff_t offset) -{ - if (inode->i_size < offset) { - unsigned long limit; - - limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; - if (limit != RLIM_INFINITY && offset > limit) - goto out_sig; - if (offset > inode->i_sb->s_maxbytes) - goto out_big; - i_size_write(inode, offset); - } else { - struct address_space *mapping = inode->i_mapping; - - /* - * truncation of in-use swapfiles is disallowed - it would - * cause subsequent swapout to scribble on the now-freed - * blocks. - */ - if (IS_SWAPFILE(inode)) - return -ETXTBSY; - i_size_write(inode, offset); - - /* - * unmap_mapping_range is called twice, first simply for - * efficiency so that truncate_inode_pages does fewer - * single-page unmaps. However after this first call, and - * before truncate_inode_pages finishes, it is possible for - * private pages to be COWed, which remain after - * truncate_inode_pages finishes, hence the second - * unmap_mapping_range call must be made for correctness. - */ - unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); - truncate_inode_pages(mapping, offset); - unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); - } - - if (inode->i_op->truncate) - inode->i_op->truncate(inode); - return 0; - -out_sig: - send_sig(SIGXFSZ, current, 0); -out_big: - return -EFBIG; -} -EXPORT_SYMBOL(vmtruncate); - int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end) { struct address_space *mapping = inode->i_mapping; @@ -2559,8 +2504,15 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, goto out; entry = pte_to_swp_entry(orig_pte); - if (is_migration_entry(entry)) { - migration_entry_wait(mm, pmd, address); + if (unlikely(non_swap_entry(entry))) { + if (is_migration_entry(entry)) { + migration_entry_wait(mm, pmd, address); + } else if (is_hwpoison_entry(entry)) { + ret = VM_FAULT_HWPOISON; + } else { + print_bad_pte(vma, address, orig_pte, NULL); + ret = VM_FAULT_OOM; + } goto out; } delayacct_set_flag(DELAYACCT_PF_SWAPIN); @@ -2584,6 +2536,10 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, /* Had to read the page from swap area: Major fault */ ret = VM_FAULT_MAJOR; count_vm_event(PGMAJFAULT); + } else if (PageHWPoison(page)) { + ret = VM_FAULT_HWPOISON; + delayacct_clear_flag(DELAYACCT_PF_SWAPIN); + goto out; } lock_page(page); @@ -2760,6 +2716,12 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) return ret; + if (unlikely(PageHWPoison(vmf.page))) { + if (ret & VM_FAULT_LOCKED) + unlock_page(vmf.page); + return VM_FAULT_HWPOISON; + } + /* * For consistency in subsequent calls, make the faulted page always * locked. diff --git a/mm/migrate.c b/mm/migrate.c index 16052e80aaa..1a4bf481378 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -675,7 +675,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, } /* Establish migration ptes or remove ptes */ - try_to_unmap(page, 1); + try_to_unmap(page, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS); skip_unmap: if (!page_mapped(page)) diff --git a/mm/mremap.c b/mm/mremap.c index 20a07dba6be..97bff254771 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -86,8 +86,8 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, if (vma->vm_file) { /* * Subtle point from Rajesh Venkatasubramanian: before - * moving file-based ptes, we must lock vmtruncate out, - * since it might clean the dst vma before the src vma, + * moving file-based ptes, we must lock truncate_pagecache + * out, since it might clean the dst vma before the src vma, * and we propagate stale pages into the dst afterward. */ mapping = vma->vm_file->f_mapping; diff --git a/mm/nommu.c b/mm/nommu.c index 8d484241d03..c73aa4753d7 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -83,46 +83,6 @@ struct vm_operations_struct generic_file_vm_ops = { }; /* - * Handle all mappings that got truncated by a "truncate()" - * system call. - * - * NOTE! We have to be ready to update the memory sharing - * between the file and the memory map for a potential last - * incomplete page. Ugly, but necessary. - */ -int vmtruncate(struct inode *inode, loff_t offset) -{ - struct address_space *mapping = inode->i_mapping; - unsigned long limit; - - if (inode->i_size < offset) - goto do_expand; - i_size_write(inode, offset); - - truncate_inode_pages(mapping, offset); - goto out_truncate; - -do_expand: - limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; - if (limit != RLIM_INFINITY && offset > limit) - goto out_sig; - if (offset > inode->i_sb->s_maxbytes) - goto out; - i_size_write(inode, offset); - -out_truncate: - if (inode->i_op->truncate) - inode->i_op->truncate(inode); - return 0; -out_sig: - send_sig(SIGXFSZ, current, 0); -out: - return -EFBIG; -} - -EXPORT_SYMBOL(vmtruncate); - -/* * Return the total memory allocated for this pointer, not * just what the caller asked for. * @@ -866,7 +826,7 @@ static int validate_mmap_request(struct file *file, int ret; /* do the simple checks first */ - if (flags & MAP_FIXED || addr) { + if (flags & MAP_FIXED) { printk(KERN_DEBUG "%d: Can't do fixed-address/overlay mmap of RAM\n", current->pid); @@ -1074,7 +1034,7 @@ static int do_mmap_shared_file(struct vm_area_struct *vma) ret = vma->vm_file->f_op->mmap(vma->vm_file, vma); if (ret == 0) { vma->vm_region->vm_top = vma->vm_region->vm_end; - return ret; + return 0; } if (ret != -ENOSYS) return ret; @@ -1091,7 +1051,8 @@ static int do_mmap_shared_file(struct vm_area_struct *vma) */ static int do_mmap_private(struct vm_area_struct *vma, struct vm_region *region, - unsigned long len) + unsigned long len, + unsigned long capabilities) { struct page *pages; unsigned long total, point, n, rlen; @@ -1102,13 +1063,13 @@ static int do_mmap_private(struct vm_area_struct *vma, * shared mappings on devices or memory * - VM_MAYSHARE will be set if it may attempt to share */ - if (vma->vm_file) { + if (capabilities & BDI_CAP_MAP_DIRECT) { ret = vma->vm_file->f_op->mmap(vma->vm_file, vma); if (ret == 0) { /* shouldn't return success if we're not sharing */ BUG_ON(!(vma->vm_flags & VM_MAYSHARE)); vma->vm_region->vm_top = vma->vm_region->vm_end; - return ret; + return 0; } if (ret != -ENOSYS) return ret; @@ -1221,9 +1182,6 @@ unsigned long do_mmap_pgoff(struct file *file, kenter(",%lx,%lx,%lx,%lx,%lx", addr, len, prot, flags, pgoff); - if (!(flags & MAP_FIXED)) - addr = round_hint_to_min(addr); - /* decide whether we should attempt the mapping, and if so what sort of * mapping */ ret = validate_mmap_request(file, addr, len, prot, flags, pgoff, @@ -1233,6 +1191,9 @@ unsigned long do_mmap_pgoff(struct file *file, return ret; } + /* we ignore the address hint */ + addr = 0; + /* we've determined that we can make the mapping, now translate what we * now know into VMA flags */ vm_flags = determine_vm_flags(file, prot, flags, capabilities); @@ -1346,7 +1307,7 @@ unsigned long do_mmap_pgoff(struct file *file, * - this is the hook for quasi-memory character devices to * tell us the location of a shared mapping */ - if (file && file->f_op->get_unmapped_area) { + if (capabilities & BDI_CAP_MAP_DIRECT) { addr = file->f_op->get_unmapped_area(file, addr, len, pgoff, flags); if (IS_ERR((void *) addr)) { @@ -1370,15 +1331,17 @@ unsigned long do_mmap_pgoff(struct file *file, } vma->vm_region = region; - add_nommu_region(region); - /* set up the mapping */ + /* set up the mapping + * - the region is filled in if BDI_CAP_MAP_DIRECT is still set + */ if (file && vma->vm_flags & VM_SHARED) ret = do_mmap_shared_file(vma); else - ret = do_mmap_private(vma, region, len); + ret = do_mmap_private(vma, region, len, capabilities); if (ret < 0) - goto error_put_region; + goto error_just_free; + add_nommu_region(region); /* okay... we have a mapping; now we have to register it */ result = vma->vm_start; @@ -1396,19 +1359,6 @@ share: kleave(" = %lx", result); return result; -error_put_region: - __put_nommu_region(region); - if (vma) { - if (vma->vm_file) { - fput(vma->vm_file); - if (vma->vm_flags & VM_EXECUTABLE) - removed_exe_file_vma(vma->vm_mm); - } - kmem_cache_free(vm_area_cachep, vma); - } - kleave(" = %d [pr]", ret); - return ret; - error_just_free: up_write(&nommu_region_sem); error: diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 5f378dd5880..d99664e8607 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -155,37 +155,37 @@ static void update_completion_period(void) } int dirty_background_ratio_handler(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; - ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (ret == 0 && write) dirty_background_bytes = 0; return ret; } int dirty_background_bytes_handler(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; - ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos); + ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos); if (ret == 0 && write) dirty_background_ratio = 0; return ret; } int dirty_ratio_handler(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos) { int old_ratio = vm_dirty_ratio; int ret; - ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (ret == 0 && write && vm_dirty_ratio != old_ratio) { update_completion_period(); vm_dirty_bytes = 0; @@ -195,13 +195,13 @@ int dirty_ratio_handler(struct ctl_table *table, int write, int dirty_bytes_handler(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos) { unsigned long old_bytes = vm_dirty_bytes; int ret; - ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos); + ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos); if (ret == 0 && write && vm_dirty_bytes != old_bytes) { update_completion_period(); vm_dirty_ratio = 0; @@ -686,9 +686,9 @@ static DEFINE_TIMER(laptop_mode_wb_timer, laptop_timer_fn, 0, 0); * sysctl handler for /proc/sys/vm/dirty_writeback_centisecs */ int dirty_writeback_centisecs_handler(ctl_table *table, int write, - struct file *file, void __user *buffer, size_t *length, loff_t *ppos) + void __user *buffer, size_t *length, loff_t *ppos) { - proc_dointvec(table, write, file, buffer, length, ppos); + proc_dointvec(table, write, buffer, length, ppos); return 0; } @@ -1149,6 +1149,13 @@ int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page) EXPORT_SYMBOL(redirty_page_for_writepage); /* + * Dirty a page. + * + * For pages with a mapping this should be done under the page lock + * for the benefit of asynchronous memory errors who prefer a consistent + * dirty state. This rule can be broken in some special cases, + * but should be better not to. + * * If the mapping doesn't provide a set_page_dirty a_op, then * just fall through and assume that it wants buffer_heads. */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 5717f27a070..bf720550b44 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -234,6 +234,12 @@ static void bad_page(struct page *page) static unsigned long nr_shown; static unsigned long nr_unshown; + /* Don't complain about poisoned pages */ + if (PageHWPoison(page)) { + __ClearPageBuddy(page); + return; + } + /* * Allow a burst of 60 reports, then keep quiet for that minute; * or allow a steady drip of one report per second. @@ -666,7 +672,7 @@ static inline void expand(struct zone *zone, struct page *page, /* * This page is about to be returned from the page allocator */ -static int prep_new_page(struct page *page, int order, gfp_t gfp_flags) +static inline int check_new_page(struct page *page) { if (unlikely(page_mapcount(page) | (page->mapping != NULL) | @@ -675,6 +681,18 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags) bad_page(page); return 1; } + return 0; +} + +static int prep_new_page(struct page *page, int order, gfp_t gfp_flags) +{ + int i; + + for (i = 0; i < (1 << order); i++) { + struct page *p = page + i; + if (unlikely(check_new_page(p))) + return 1; + } set_page_private(page, 0); set_page_refcounted(page); @@ -2373,7 +2391,7 @@ early_param("numa_zonelist_order", setup_numa_zonelist_order); * sysctl handler for numa_zonelist_order */ int numa_zonelist_order_handler(ctl_table *table, int write, - struct file *file, void __user *buffer, size_t *length, + void __user *buffer, size_t *length, loff_t *ppos) { char saved_string[NUMA_ZONELIST_ORDER_LEN]; @@ -2382,7 +2400,7 @@ int numa_zonelist_order_handler(ctl_table *table, int write, if (write) strncpy(saved_string, (char*)table->data, NUMA_ZONELIST_ORDER_LEN); - ret = proc_dostring(table, write, file, buffer, length, ppos); + ret = proc_dostring(table, write, buffer, length, ppos); if (ret) return ret; if (write) { @@ -4706,9 +4724,9 @@ module_init(init_per_zone_wmark_min) * changes. */ int min_free_kbytes_sysctl_handler(ctl_table *table, int write, - struct file *file, void __user *buffer, size_t *length, loff_t *ppos) + void __user *buffer, size_t *length, loff_t *ppos) { - proc_dointvec(table, write, file, buffer, length, ppos); + proc_dointvec(table, write, buffer, length, ppos); if (write) setup_per_zone_wmarks(); return 0; @@ -4716,12 +4734,12 @@ int min_free_kbytes_sysctl_handler(ctl_table *table, int write, #ifdef CONFIG_NUMA int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write, - struct file *file, void __user *buffer, size_t *length, loff_t *ppos) + void __user *buffer, size_t *length, loff_t *ppos) { struct zone *zone; int rc; - rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos); + rc = proc_dointvec_minmax(table, write, buffer, length, ppos); if (rc) return rc; @@ -4732,12 +4750,12 @@ int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write, } int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write, - struct file *file, void __user *buffer, size_t *length, loff_t *ppos) + void __user *buffer, size_t *length, loff_t *ppos) { struct zone *zone; int rc; - rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos); + rc = proc_dointvec_minmax(table, write, buffer, length, ppos); if (rc) return rc; @@ -4758,9 +4776,9 @@ int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write, * if in function of the boot time zone sizes. */ int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write, - struct file *file, void __user *buffer, size_t *length, loff_t *ppos) + void __user *buffer, size_t *length, loff_t *ppos) { - proc_dointvec_minmax(table, write, file, buffer, length, ppos); + proc_dointvec_minmax(table, write, buffer, length, ppos); setup_per_zone_lowmem_reserve(); return 0; } @@ -4772,13 +4790,13 @@ int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write, */ int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write, - struct file *file, void __user *buffer, size_t *length, loff_t *ppos) + void __user *buffer, size_t *length, loff_t *ppos) { struct zone *zone; unsigned int cpu; int ret; - ret = proc_dointvec_minmax(table, write, file, buffer, length, ppos); + ret = proc_dointvec_minmax(table, write, buffer, length, ppos); if (!write || (ret == -EINVAL)) return ret; for_each_populated_zone(zone) { diff --git a/mm/rmap.c b/mm/rmap.c index 720fc03a7bc..28aafe2b530 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -36,6 +36,11 @@ * mapping->tree_lock (widely used, in set_page_dirty, * in arch-dependent flush_dcache_mmap_lock, * within inode_lock in __sync_single_inode) + * + * (code doesn't rely on that order so it could be switched around) + * ->tasklist_lock + * anon_vma->lock (memory_failure, collect_procs_anon) + * pte map lock */ #include <linux/mm.h> @@ -191,7 +196,7 @@ void __init anon_vma_init(void) * Getting a lock on a stable anon_vma from a page off the LRU is * tricky: page_lock_anon_vma rely on RCU to guard against the races. */ -static struct anon_vma *page_lock_anon_vma(struct page *page) +struct anon_vma *page_lock_anon_vma(struct page *page) { struct anon_vma *anon_vma; unsigned long anon_mapping; @@ -211,7 +216,7 @@ out: return NULL; } -static void page_unlock_anon_vma(struct anon_vma *anon_vma) +void page_unlock_anon_vma(struct anon_vma *anon_vma) { spin_unlock(&anon_vma->lock); rcu_read_unlock(); @@ -311,7 +316,7 @@ pte_t *page_check_address(struct page *page, struct mm_struct *mm, * if the page is not mapped into the page tables of this VMA. Only * valid for normal file or anonymous VMAs. */ -static int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma) +int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma) { unsigned long address; pte_t *pte; @@ -756,7 +761,7 @@ void page_remove_rmap(struct page *page) * repeatedly from either try_to_unmap_anon or try_to_unmap_file. */ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, - int migration) + enum ttu_flags flags) { struct mm_struct *mm = vma->vm_mm; unsigned long address; @@ -778,11 +783,13 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, * If it's recently referenced (perhaps page_referenced * skipped over this mm) then we should reactivate it. */ - if (!migration) { + if (!(flags & TTU_IGNORE_MLOCK)) { if (vma->vm_flags & VM_LOCKED) { ret = SWAP_MLOCK; goto out_unmap; } + } + if (!(flags & TTU_IGNORE_ACCESS)) { if (ptep_clear_flush_young_notify(vma, address, pte)) { ret = SWAP_FAIL; goto out_unmap; @@ -800,7 +807,14 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, /* Update high watermark before we lower rss */ update_hiwater_rss(mm); - if (PageAnon(page)) { + if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) { + if (PageAnon(page)) + dec_mm_counter(mm, anon_rss); + else + dec_mm_counter(mm, file_rss); + set_pte_at(mm, address, pte, + swp_entry_to_pte(make_hwpoison_entry(page))); + } else if (PageAnon(page)) { swp_entry_t entry = { .val = page_private(page) }; if (PageSwapCache(page)) { @@ -822,12 +836,12 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, * pte. do_swap_page() will wait until the migration * pte is removed and then restart fault handling. */ - BUG_ON(!migration); + BUG_ON(TTU_ACTION(flags) != TTU_MIGRATION); entry = make_migration_entry(page, pte_write(pteval)); } set_pte_at(mm, address, pte, swp_entry_to_pte(entry)); BUG_ON(pte_file(*pte)); - } else if (PAGE_MIGRATION && migration) { + } else if (PAGE_MIGRATION && (TTU_ACTION(flags) == TTU_MIGRATION)) { /* Establish migration entry for a file page */ swp_entry_t entry; entry = make_migration_entry(page, pte_write(pteval)); @@ -996,12 +1010,13 @@ static int try_to_mlock_page(struct page *page, struct vm_area_struct *vma) * vm_flags for that VMA. That should be OK, because that vma shouldn't be * 'LOCKED. */ -static int try_to_unmap_anon(struct page *page, int unlock, int migration) +static int try_to_unmap_anon(struct page *page, enum ttu_flags flags) { struct anon_vma *anon_vma; struct vm_area_struct *vma; unsigned int mlocked = 0; int ret = SWAP_AGAIN; + int unlock = TTU_ACTION(flags) == TTU_MUNLOCK; if (MLOCK_PAGES && unlikely(unlock)) ret = SWAP_SUCCESS; /* default for try_to_munlock() */ @@ -1017,7 +1032,7 @@ static int try_to_unmap_anon(struct page *page, int unlock, int migration) continue; /* must visit all unlocked vmas */ ret = SWAP_MLOCK; /* saw at least one mlocked vma */ } else { - ret = try_to_unmap_one(page, vma, migration); + ret = try_to_unmap_one(page, vma, flags); if (ret == SWAP_FAIL || !page_mapped(page)) break; } @@ -1041,8 +1056,7 @@ static int try_to_unmap_anon(struct page *page, int unlock, int migration) /** * try_to_unmap_file - unmap/unlock file page using the object-based rmap method * @page: the page to unmap/unlock - * @unlock: request for unlock rather than unmap [unlikely] - * @migration: unmapping for migration - ignored if @unlock + * @flags: action and flags * * Find all the mappings of a page using the mapping pointer and the vma chains * contained in the address_space struct it points to. @@ -1054,7 +1068,7 @@ static int try_to_unmap_anon(struct page *page, int unlock, int migration) * vm_flags for that VMA. That should be OK, because that vma shouldn't be * 'LOCKED. */ -static int try_to_unmap_file(struct page *page, int unlock, int migration) +static int try_to_unmap_file(struct page *page, enum ttu_flags flags) { struct address_space *mapping = page->mapping; pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); @@ -1066,6 +1080,7 @@ static int try_to_unmap_file(struct page *page, int unlock, int migration) unsigned long max_nl_size = 0; unsigned int mapcount; unsigned int mlocked = 0; + int unlock = TTU_ACTION(flags) == TTU_MUNLOCK; if (MLOCK_PAGES && unlikely(unlock)) ret = SWAP_SUCCESS; /* default for try_to_munlock() */ @@ -1078,7 +1093,7 @@ static int try_to_unmap_file(struct page *page, int unlock, int migration) continue; /* must visit all vmas */ ret = SWAP_MLOCK; } else { - ret = try_to_unmap_one(page, vma, migration); + ret = try_to_unmap_one(page, vma, flags); if (ret == SWAP_FAIL || !page_mapped(page)) goto out; } @@ -1103,7 +1118,8 @@ static int try_to_unmap_file(struct page *page, int unlock, int migration) ret = SWAP_MLOCK; /* leave mlocked == 0 */ goto out; /* no need to look further */ } - if (!MLOCK_PAGES && !migration && (vma->vm_flags & VM_LOCKED)) + if (!MLOCK_PAGES && !(flags & TTU_IGNORE_MLOCK) && + (vma->vm_flags & VM_LOCKED)) continue; cursor = (unsigned long) vma->vm_private_data; if (cursor > max_nl_cursor) @@ -1137,7 +1153,7 @@ static int try_to_unmap_file(struct page *page, int unlock, int migration) do { list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list) { - if (!MLOCK_PAGES && !migration && + if (!MLOCK_PAGES && !(flags & TTU_IGNORE_MLOCK) && (vma->vm_flags & VM_LOCKED)) continue; cursor = (unsigned long) vma->vm_private_data; @@ -1177,7 +1193,7 @@ out: /** * try_to_unmap - try to remove all page table mappings to a page * @page: the page to get unmapped - * @migration: migration flag + * @flags: action and flags * * Tries to remove all the page table entries which are mapping this * page, used in the pageout path. Caller must hold the page lock. @@ -1188,16 +1204,16 @@ out: * SWAP_FAIL - the page is unswappable * SWAP_MLOCK - page is mlocked. */ -int try_to_unmap(struct page *page, int migration) +int try_to_unmap(struct page *page, enum ttu_flags flags) { int ret; BUG_ON(!PageLocked(page)); if (PageAnon(page)) - ret = try_to_unmap_anon(page, 0, migration); + ret = try_to_unmap_anon(page, flags); else - ret = try_to_unmap_file(page, 0, migration); + ret = try_to_unmap_file(page, flags); if (ret != SWAP_MLOCK && !page_mapped(page)) ret = SWAP_SUCCESS; return ret; @@ -1222,8 +1238,8 @@ int try_to_munlock(struct page *page) VM_BUG_ON(!PageLocked(page) || PageLRU(page)); if (PageAnon(page)) - return try_to_unmap_anon(page, 1, 0); + return try_to_unmap_anon(page, TTU_MUNLOCK); else - return try_to_unmap_file(page, 1, 0); + return try_to_unmap_file(page, TTU_MUNLOCK); } diff --git a/mm/shmem.c b/mm/shmem.c index b206a7a32e2..98631c26c20 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1633,8 +1633,8 @@ shmem_write_end(struct file *file, struct address_space *mapping, if (pos + copied > inode->i_size) i_size_write(inode, pos + copied); - unlock_page(page); set_page_dirty(page); + unlock_page(page); page_cache_release(page); return copied; @@ -1971,13 +1971,13 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s iput(inode); return error; } - unlock_page(page); inode->i_mapping->a_ops = &shmem_aops; inode->i_op = &shmem_symlink_inode_operations; kaddr = kmap_atomic(page, KM_USER0); memcpy(kaddr, symname, len); kunmap_atomic(kaddr, KM_USER0); set_page_dirty(page); + unlock_page(page); page_cache_release(page); } if (dir->i_mode & S_ISGID) @@ -2420,6 +2420,7 @@ static const struct address_space_operations shmem_aops = { .write_end = shmem_write_end, #endif .migratepage = migrate_page, + .error_remove_page = generic_error_remove_page, }; static const struct file_operations shmem_file_operations = { diff --git a/mm/swapfile.c b/mm/swapfile.c index f1bf19daadc..4de7f02f820 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -699,7 +699,7 @@ int free_swap_and_cache(swp_entry_t entry) struct swap_info_struct *p; struct page *page = NULL; - if (is_migration_entry(entry)) + if (non_swap_entry(entry)) return 1; p = swap_info_get(entry); @@ -2085,7 +2085,7 @@ static int __swap_duplicate(swp_entry_t entry, bool cache) int count; bool has_cache; - if (is_migration_entry(entry)) + if (non_swap_entry(entry)) return -EINVAL; type = swp_type(entry); diff --git a/mm/truncate.c b/mm/truncate.c index ccc3ecf7cb9..450cebdabfc 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -93,11 +93,11 @@ EXPORT_SYMBOL(cancel_dirty_page); * its lock, b) when a concurrent invalidate_mapping_pages got there first and * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space. */ -static void +static int truncate_complete_page(struct address_space *mapping, struct page *page) { if (page->mapping != mapping) - return; + return -EIO; if (page_has_private(page)) do_invalidatepage(page, 0); @@ -108,6 +108,7 @@ truncate_complete_page(struct address_space *mapping, struct page *page) remove_from_page_cache(page); ClearPageMappedToDisk(page); page_cache_release(page); /* pagecache ref */ + return 0; } /* @@ -135,6 +136,51 @@ invalidate_complete_page(struct address_space *mapping, struct page *page) return ret; } +int truncate_inode_page(struct address_space *mapping, struct page *page) +{ + if (page_mapped(page)) { + unmap_mapping_range(mapping, + (loff_t)page->index << PAGE_CACHE_SHIFT, + PAGE_CACHE_SIZE, 0); + } + return truncate_complete_page(mapping, page); +} + +/* + * Used to get rid of pages on hardware memory corruption. + */ +int generic_error_remove_page(struct address_space *mapping, struct page *page) +{ + if (!mapping) + return -EINVAL; + /* + * Only punch for normal data pages for now. + * Handling other types like directories would need more auditing. + */ + if (!S_ISREG(mapping->host->i_mode)) + return -EIO; + return truncate_inode_page(mapping, page); +} +EXPORT_SYMBOL(generic_error_remove_page); + +/* + * Safely invalidate one page from its pagecache mapping. + * It only drops clean, unused pages. The page must be locked. + * + * Returns 1 if the page is successfully invalidated, otherwise 0. + */ +int invalidate_inode_page(struct page *page) +{ + struct address_space *mapping = page_mapping(page); + if (!mapping) + return 0; + if (PageDirty(page) || PageWriteback(page)) + return 0; + if (page_mapped(page)) + return 0; + return invalidate_complete_page(mapping, page); +} + /** * truncate_inode_pages - truncate range of pages specified by start & end byte offsets * @mapping: mapping to truncate @@ -196,12 +242,7 @@ void truncate_inode_pages_range(struct address_space *mapping, unlock_page(page); continue; } - if (page_mapped(page)) { - unmap_mapping_range(mapping, - (loff_t)page_index<<PAGE_CACHE_SHIFT, - PAGE_CACHE_SIZE, 0); - } - truncate_complete_page(mapping, page); + truncate_inode_page(mapping, page); unlock_page(page); } pagevec_release(&pvec); @@ -238,15 +279,10 @@ void truncate_inode_pages_range(struct address_space *mapping, break; lock_page(page); wait_on_page_writeback(page); - if (page_mapped(page)) { - unmap_mapping_range(mapping, - (loff_t)page->index<<PAGE_CACHE_SHIFT, - PAGE_CACHE_SIZE, 0); - } + truncate_inode_page(mapping, page); if (page->index > next) next = page->index; next++; - truncate_complete_page(mapping, page); unlock_page(page); } pagevec_release(&pvec); @@ -311,12 +347,8 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping, if (lock_failed) continue; - if (PageDirty(page) || PageWriteback(page)) - goto unlock; - if (page_mapped(page)) - goto unlock; - ret += invalidate_complete_page(mapping, page); -unlock: + ret += invalidate_inode_page(page); + unlock_page(page); if (next > end) break; @@ -465,3 +497,67 @@ int invalidate_inode_pages2(struct address_space *mapping) return invalidate_inode_pages2_range(mapping, 0, -1); } EXPORT_SYMBOL_GPL(invalidate_inode_pages2); + +/** + * truncate_pagecache - unmap and remove pagecache that has been truncated + * @inode: inode + * @old: old file offset + * @new: new file offset + * + * inode's new i_size must already be written before truncate_pagecache + * is called. + * + * This function should typically be called before the filesystem + * releases resources associated with the freed range (eg. deallocates + * blocks). This way, pagecache will always stay logically coherent + * with on-disk format, and the filesystem would not have to deal with + * situations such as writepage being called for a page that has already + * had its underlying blocks deallocated. + */ +void truncate_pagecache(struct inode *inode, loff_t old, loff_t new) +{ + if (new < old) { + struct address_space *mapping = inode->i_mapping; + + /* + * unmap_mapping_range is called twice, first simply for + * efficiency so that truncate_inode_pages does fewer + * single-page unmaps. However after this first call, and + * before truncate_inode_pages finishes, it is possible for + * private pages to be COWed, which remain after + * truncate_inode_pages finishes, hence the second + * unmap_mapping_range call must be made for correctness. + */ + unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1); + truncate_inode_pages(mapping, new); + unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1); + } +} +EXPORT_SYMBOL(truncate_pagecache); + +/** + * vmtruncate - unmap mappings "freed" by truncate() syscall + * @inode: inode of the file used + * @offset: file offset to start truncating + * + * NOTE! We have to be ready to update the memory sharing + * between the file and the memory map for a potential last + * incomplete page. Ugly, but necessary. + */ +int vmtruncate(struct inode *inode, loff_t offset) +{ + loff_t oldsize; + int error; + + error = inode_newsize_ok(inode, offset); + if (error) + return error; + oldsize = inode->i_size; + i_size_write(inode, offset); + truncate_pagecache(inode, oldsize, offset); + if (inode->i_op->truncate) + inode->i_op->truncate(inode); + + return error; +} +EXPORT_SYMBOL(vmtruncate); diff --git a/mm/vmscan.c b/mm/vmscan.c index 613e89f471d..1219ceb8a9b 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -663,7 +663,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, * processes. Try to unmap it here. */ if (page_mapped(page) && mapping) { - switch (try_to_unmap(page, 0)) { + switch (try_to_unmap(page, TTU_UNMAP)) { case SWAP_FAIL: goto activate_locked; case SWAP_AGAIN: @@ -1836,11 +1836,45 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, #ifdef CONFIG_CGROUP_MEM_RES_CTLR +unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, + gfp_t gfp_mask, bool noswap, + unsigned int swappiness, + struct zone *zone, int nid) +{ + struct scan_control sc = { + .may_writepage = !laptop_mode, + .may_unmap = 1, + .may_swap = !noswap, + .swap_cluster_max = SWAP_CLUSTER_MAX, + .swappiness = swappiness, + .order = 0, + .mem_cgroup = mem, + .isolate_pages = mem_cgroup_isolate_pages, + }; + nodemask_t nm = nodemask_of_node(nid); + + sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | + (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); + sc.nodemask = &nm; + sc.nr_reclaimed = 0; + sc.nr_scanned = 0; + /* + * NOTE: Although we can get the priority field, using it + * here is not a good idea, since it limits the pages we can scan. + * if we don't reclaim here, the shrink_zone from balance_pgdat + * will pick up pages from other mem cgroup's as well. We hack + * the priority and make it zero. + */ + shrink_zone(0, zone, &sc); + return sc.nr_reclaimed; +} + unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, gfp_t gfp_mask, bool noswap, unsigned int swappiness) { + struct zonelist *zonelist; struct scan_control sc = { .may_writepage = !laptop_mode, .may_unmap = 1, @@ -1852,7 +1886,6 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, .isolate_pages = mem_cgroup_isolate_pages, .nodemask = NULL, /* we don't care the placement */ }; - struct zonelist *zonelist; sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); @@ -1974,6 +2007,7 @@ loop_again: for (i = 0; i <= end_zone; i++) { struct zone *zone = pgdat->node_zones + i; int nr_slab; + int nid, zid; if (!populated_zone(zone)) continue; @@ -1988,6 +2022,15 @@ loop_again: temp_priority[i] = priority; sc.nr_scanned = 0; note_zone_scanning_priority(zone, priority); + + nid = pgdat->node_id; + zid = zone_idx(zone); + /* + * Call soft limit reclaim before calling shrink_zone. + * For now we ignore the return value + */ + mem_cgroup_soft_limit_reclaim(zone, order, sc.gfp_mask, + nid, zid); /* * We put equal pressure on every zone, unless one * zone has way too many pages free already. @@ -2801,10 +2844,10 @@ static void scan_all_zones_unevictable_pages(void) unsigned long scan_unevictable_pages; int scan_unevictable_handler(struct ctl_table *table, int write, - struct file *file, void __user *buffer, + void __user *buffer, size_t *length, loff_t *ppos) { - proc_doulongvec_minmax(table, write, file, buffer, length, ppos); + proc_doulongvec_minmax(table, write, buffer, length, ppos); if (write && *(unsigned long *)table->data) scan_all_zones_unevictable_pages(); diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 907a82e9023..a16a2342f6b 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -965,12 +965,12 @@ static struct nf_hook_ops br_nf_ops[] __read_mostly = { #ifdef CONFIG_SYSCTL static -int brnf_sysctl_call_tables(ctl_table * ctl, int write, struct file *filp, +int brnf_sysctl_call_tables(ctl_table * ctl, int write, void __user * buffer, size_t * lenp, loff_t * ppos) { int ret; - ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); + ret = proc_dointvec(ctl, write, buffer, lenp, ppos); if (write && *(int *)(ctl->data)) *(int *)(ctl->data) = 1; diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 1c6a5bb6f0c..6e1f085db06 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -164,7 +164,7 @@ static int max_t3[] = { 8191 }; /* Must fit in 16 bits when multiplied by BCT3MU static int min_priority[1]; static int max_priority[] = { 127 }; /* From DECnet spec */ -static int dn_forwarding_proc(ctl_table *, int, struct file *, +static int dn_forwarding_proc(ctl_table *, int, void __user *, size_t *, loff_t *); static int dn_forwarding_sysctl(ctl_table *table, void __user *oldval, size_t __user *oldlenp, @@ -274,7 +274,6 @@ static void dn_dev_sysctl_unregister(struct dn_dev_parms *parms) } static int dn_forwarding_proc(ctl_table *table, int write, - struct file *filep, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -290,7 +289,7 @@ static int dn_forwarding_proc(ctl_table *table, int write, dn_db = dev->dn_ptr; old = dn_db->parms.forwarding; - err = proc_dointvec(table, write, filep, buffer, lenp, ppos); + err = proc_dointvec(table, write, buffer, lenp, ppos); if ((err >= 0) && write) { if (dn_db->parms.forwarding < 0) diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c index 5bcd592ae6d..26b0ab1e9f5 100644 --- a/net/decnet/sysctl_net_decnet.c +++ b/net/decnet/sysctl_net_decnet.c @@ -165,7 +165,6 @@ static int dn_node_address_strategy(ctl_table *table, } static int dn_node_address_handler(ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -276,7 +275,6 @@ static int dn_def_dev_strategy(ctl_table *table, static int dn_def_dev_handler(ctl_table *table, int write, - struct file * filp, void __user *buffer, size_t *lenp, loff_t *ppos) { diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 07336c6201f..e92f1fd28aa 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1270,10 +1270,10 @@ static void inet_forward_change(struct net *net) } static int devinet_conf_proc(ctl_table *ctl, int write, - struct file *filp, void __user *buffer, + void __user *buffer, size_t *lenp, loff_t *ppos) { - int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); + int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); if (write) { struct ipv4_devconf *cnf = ctl->extra1; @@ -1342,12 +1342,12 @@ static int devinet_conf_sysctl(ctl_table *table, } static int devinet_sysctl_forward(ctl_table *ctl, int write, - struct file *filp, void __user *buffer, + void __user *buffer, size_t *lenp, loff_t *ppos) { int *valp = ctl->data; int val = *valp; - int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); + int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); if (write && *valp != val) { struct net *net = ctl->extra2; @@ -1372,12 +1372,12 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write, } int ipv4_doint_and_flush(ctl_table *ctl, int write, - struct file *filp, void __user *buffer, + void __user *buffer, size_t *lenp, loff_t *ppos) { int *valp = ctl->data; int val = *valp; - int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); + int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); struct net *net = ctl->extra2; if (write && *valp != val) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index df934731453..bb419925202 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -3036,7 +3036,7 @@ void ip_rt_multicast_event(struct in_device *in_dev) #ifdef CONFIG_SYSCTL static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write, - struct file *filp, void __user *buffer, + void __user *buffer, size_t *lenp, loff_t *ppos) { if (write) { @@ -3046,7 +3046,7 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write, memcpy(&ctl, __ctl, sizeof(ctl)); ctl.data = &flush_delay; - proc_dointvec(&ctl, write, filp, buffer, lenp, ppos); + proc_dointvec(&ctl, write, buffer, lenp, ppos); net = (struct net *)__ctl->extra1; rt_cache_flush(net, flush_delay); @@ -3106,12 +3106,11 @@ static void rt_secret_reschedule(int old) } static int ipv4_sysctl_rt_secret_interval(ctl_table *ctl, int write, - struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { int old = ip_rt_secret_interval; - int ret = proc_dointvec_jiffies(ctl, write, filp, buffer, lenp, ppos); + int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos); rt_secret_reschedule(old); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 4710d219f06..2dcf04d9b00 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -36,7 +36,7 @@ static void set_local_port_range(int range[2]) } /* Validate changes from /proc interface. */ -static int ipv4_local_port_range(ctl_table *table, int write, struct file *filp, +static int ipv4_local_port_range(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -51,7 +51,7 @@ static int ipv4_local_port_range(ctl_table *table, int write, struct file *filp, }; inet_get_local_port_range(range, range + 1); - ret = proc_dointvec_minmax(&tmp, write, filp, buffer, lenp, ppos); + ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); if (write && ret == 0) { if (range[1] < range[0]) @@ -91,7 +91,7 @@ static int ipv4_sysctl_local_port_range(ctl_table *table, } -static int proc_tcp_congestion_control(ctl_table *ctl, int write, struct file * filp, +static int proc_tcp_congestion_control(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { char val[TCP_CA_NAME_MAX]; @@ -103,7 +103,7 @@ static int proc_tcp_congestion_control(ctl_table *ctl, int write, struct file * tcp_get_default_congestion_control(val); - ret = proc_dostring(&tbl, write, filp, buffer, lenp, ppos); + ret = proc_dostring(&tbl, write, buffer, lenp, ppos); if (write && ret == 0) ret = tcp_set_default_congestion_control(val); return ret; @@ -129,7 +129,7 @@ static int sysctl_tcp_congestion_control(ctl_table *table, } static int proc_tcp_available_congestion_control(ctl_table *ctl, - int write, struct file * filp, + int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -140,13 +140,13 @@ static int proc_tcp_available_congestion_control(ctl_table *ctl, if (!tbl.data) return -ENOMEM; tcp_get_available_congestion_control(tbl.data, TCP_CA_BUF_MAX); - ret = proc_dostring(&tbl, write, filp, buffer, lenp, ppos); + ret = proc_dostring(&tbl, write, buffer, lenp, ppos); kfree(tbl.data); return ret; } static int proc_allowed_congestion_control(ctl_table *ctl, - int write, struct file * filp, + int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -158,7 +158,7 @@ static int proc_allowed_congestion_control(ctl_table *ctl, return -ENOMEM; tcp_get_allowed_congestion_control(tbl.data, tbl.maxlen); - ret = proc_dostring(&tbl, write, filp, buffer, lenp, ppos); + ret = proc_dostring(&tbl, write, buffer, lenp, ppos); if (write && ret == 0) ret = tcp_set_allowed_congestion_control(tbl.data); kfree(tbl.data); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 55f486d89c8..1fd0a3d775d 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3986,14 +3986,14 @@ static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) #ifdef CONFIG_SYSCTL static -int addrconf_sysctl_forward(ctl_table *ctl, int write, struct file * filp, +int addrconf_sysctl_forward(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int *valp = ctl->data; int val = *valp; int ret; - ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); + ret = proc_dointvec(ctl, write, buffer, lenp, ppos); if (write) ret = addrconf_fixup_forwarding(ctl, valp, val); @@ -4090,14 +4090,14 @@ static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int old) } static -int addrconf_sysctl_disable(ctl_table *ctl, int write, struct file * filp, +int addrconf_sysctl_disable(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int *valp = ctl->data; int val = *valp; int ret; - ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); + ret = proc_dointvec(ctl, write, buffer, lenp, ppos); if (write) ret = addrconf_disable_ipv6(ctl, valp, val); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 7015478797f..498b9b0b0fa 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1735,7 +1735,7 @@ static void ndisc_warn_deprecated_sysctl(struct ctl_table *ctl, } } -int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * filp, void __user *buffer, size_t *lenp, loff_t *ppos) +int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct net_device *dev = ctl->extra1; struct inet6_dev *idev; @@ -1746,16 +1746,16 @@ int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * f ndisc_warn_deprecated_sysctl(ctl, "syscall", dev ? dev->name : "default"); if (strcmp(ctl->procname, "retrans_time") == 0) - ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); + ret = proc_dointvec(ctl, write, buffer, lenp, ppos); else if (strcmp(ctl->procname, "base_reachable_time") == 0) ret = proc_dointvec_jiffies(ctl, write, - filp, buffer, lenp, ppos); + buffer, lenp, ppos); else if ((strcmp(ctl->procname, "retrans_time_ms") == 0) || (strcmp(ctl->procname, "base_reachable_time_ms") == 0)) ret = proc_dointvec_ms_jiffies(ctl, write, - filp, buffer, lenp, ppos); + buffer, lenp, ppos); else ret = -1; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 77aecbe8ff6..d6fe7646a8f 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2524,13 +2524,13 @@ static const struct file_operations rt6_stats_seq_fops = { #ifdef CONFIG_SYSCTL static -int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp, +int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct net *net = current->nsproxy->net_ns; int delay = net->ipv6.sysctl.flush_delay; if (write) { - proc_dointvec(ctl, write, filp, buffer, lenp, ppos); + proc_dointvec(ctl, write, buffer, lenp, ppos); fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net); return 0; } else diff --git a/net/irda/irsysctl.c b/net/irda/irsysctl.c index 57f8817c397..5c86567e5a7 100644 --- a/net/irda/irsysctl.c +++ b/net/irda/irsysctl.c @@ -73,12 +73,12 @@ static int min_lap_keepalive_time = 100; /* 100us */ /* For other sysctl, I've no idea of the range. Maybe Dag could help * us on that - Jean II */ -static int do_devname(ctl_table *table, int write, struct file *filp, +static int do_devname(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; - ret = proc_dostring(table, write, filp, buffer, lenp, ppos); + ret = proc_dostring(table, write, buffer, lenp, ppos); if (ret == 0 && write) { struct ias_value *val; @@ -90,12 +90,12 @@ static int do_devname(ctl_table *table, int write, struct file *filp, } -static int do_discovery(ctl_table *table, int write, struct file *filp, +static int do_discovery(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; - ret = proc_dointvec(table, write, filp, buffer, lenp, ppos); + ret = proc_dointvec(table, write, buffer, lenp, ppos); if (ret) return ret; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index fba2892b99e..446e9bd4b4b 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1496,14 +1496,14 @@ static int ip_vs_zero_all(void) static int -proc_do_defense_mode(ctl_table *table, int write, struct file * filp, +proc_do_defense_mode(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int *valp = table->data; int val = *valp; int rc; - rc = proc_dointvec(table, write, filp, buffer, lenp, ppos); + rc = proc_dointvec(table, write, buffer, lenp, ppos); if (write && (*valp != val)) { if ((*valp < 0) || (*valp > 3)) { /* Restore the correct value */ @@ -1517,7 +1517,7 @@ proc_do_defense_mode(ctl_table *table, int write, struct file * filp, static int -proc_do_sync_threshold(ctl_table *table, int write, struct file *filp, +proc_do_sync_threshold(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int *valp = table->data; @@ -1527,7 +1527,7 @@ proc_do_sync_threshold(ctl_table *table, int write, struct file *filp, /* backup the value first */ memcpy(val, valp, sizeof(val)); - rc = proc_dointvec(table, write, filp, buffer, lenp, ppos); + rc = proc_dointvec(table, write, buffer, lenp, ppos); if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) { /* Restore the correct value */ memcpy(valp, val, sizeof(val)); diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 4e620305f28..c93494fef8e 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -226,7 +226,7 @@ static char nf_log_sysctl_fnames[NFPROTO_NUMPROTO-NFPROTO_UNSPEC][3]; static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO+1]; static struct ctl_table_header *nf_log_dir_header; -static int nf_log_proc_dostring(ctl_table *table, int write, struct file *filp, +static int nf_log_proc_dostring(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { const struct nf_logger *logger; @@ -260,7 +260,7 @@ static int nf_log_proc_dostring(ctl_table *table, int write, struct file *filp, table->data = "NONE"; else table->data = logger->name; - r = proc_dostring(table, write, filp, buffer, lenp, ppos); + r = proc_dostring(table, write, buffer, lenp, ppos); mutex_unlock(&nf_log_mutex); } diff --git a/net/phonet/sysctl.c b/net/phonet/sysctl.c index 7b5749ee276..2220f332232 100644 --- a/net/phonet/sysctl.c +++ b/net/phonet/sysctl.c @@ -56,7 +56,7 @@ void phonet_get_local_port_range(int *min, int *max) } while (read_seqretry(&local_port_range_lock, seq)); } -static int proc_local_port_range(ctl_table *table, int write, struct file *filp, +static int proc_local_port_range(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -70,7 +70,7 @@ static int proc_local_port_range(ctl_table *table, int write, struct file *filp, .extra2 = &local_port_range_max, }; - ret = proc_dointvec_minmax(&tmp, write, filp, buffer, lenp, ppos); + ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); if (write && ret == 0) { if (range[1] < range[0]) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index a417d5ab5dd..38829e20500 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -640,10 +640,11 @@ EXPORT_SYMBOL_GPL(rpc_call_async); /** * rpc_run_bc_task - Allocate a new RPC task for backchannel use, then run * rpc_execute against it - * @ops: RPC call ops + * @req: RPC request + * @tk_ops: RPC call ops */ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req, - const struct rpc_call_ops *tk_ops) + const struct rpc_call_ops *tk_ops) { struct rpc_task *task; struct xdr_buf *xbufp = &req->rq_snd_buf; diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c index 5231f7aaac0..42f9748ae09 100644 --- a/net/sunrpc/sysctl.c +++ b/net/sunrpc/sysctl.c @@ -56,7 +56,7 @@ rpc_unregister_sysctl(void) } } -static int proc_do_xprt(ctl_table *table, int write, struct file *file, +static int proc_do_xprt(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { char tmpbuf[256]; @@ -71,7 +71,7 @@ static int proc_do_xprt(ctl_table *table, int write, struct file *file, } static int -proc_dodebug(ctl_table *table, int write, struct file *file, +proc_dodebug(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { char tmpbuf[20], c, *s; diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c index 87101177825..35fb68b9c8e 100644 --- a/net/sunrpc/xprtrdma/svc_rdma.c +++ b/net/sunrpc/xprtrdma/svc_rdma.c @@ -80,7 +80,7 @@ struct kmem_cache *svc_rdma_ctxt_cachep; * current value. */ static int read_reset_stat(ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos) { atomic_t *stat = (atomic_t *)table->data; diff --git a/security/device_cgroup.c b/security/device_cgroup.c index b8186bac8b7..6cf8fd2b79e 100644 --- a/security/device_cgroup.c +++ b/security/device_cgroup.c @@ -61,7 +61,8 @@ static inline struct dev_cgroup *task_devcgroup(struct task_struct *task) struct cgroup_subsys devices_subsys; static int devcgroup_can_attach(struct cgroup_subsys *ss, - struct cgroup *new_cgroup, struct task_struct *task) + struct cgroup *new_cgroup, struct task_struct *task, + bool threadgroup) { if (current != task && !capable(CAP_SYS_ADMIN)) return -EPERM; diff --git a/security/lsm_audit.c b/security/lsm_audit.c index 500aad0ebd6..3bb90b6f1dd 100644 --- a/security/lsm_audit.c +++ b/security/lsm_audit.c @@ -187,7 +187,7 @@ static inline void print_ipv6_addr(struct audit_buffer *ab, char *name1, char *name2) { if (!ipv6_addr_any(addr)) - audit_log_format(ab, " %s=%pI6", name1, addr); + audit_log_format(ab, " %s=%pI6c", name1, addr); if (port) audit_log_format(ab, " %s=%d", name2, ntohs(port)); } diff --git a/security/min_addr.c b/security/min_addr.c index 14cc7b3b8d0..c844eed7915 100644 --- a/security/min_addr.c +++ b/security/min_addr.c @@ -28,12 +28,12 @@ static void update_mmap_min_addr(void) * sysctl handler which just sets dac_mmap_min_addr = the new value and then * calls update_mmap_min_addr() so non MAP_FIXED hints get rounded properly */ -int mmap_min_addr_handler(struct ctl_table *table, int write, struct file *filp, +int mmap_min_addr_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; - ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos); + ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos); update_mmap_min_addr(); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 417f7c99452..bb230d5d708 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2411,7 +2411,7 @@ static void selinux_bprm_committed_creds(struct linux_binprm *bprm) /* Wake up the parent if it is waiting so that it can recheck * wait permission to the new task SID. */ read_lock(&tasklist_lock); - wake_up_interruptible(¤t->real_parent->signal->wait_chldexit); + __wake_up_parent(current, current->real_parent); read_unlock(&tasklist_lock); } diff --git a/sound/arm/pxa2xx-ac97.c b/sound/arm/pxa2xx-ac97.c index 4e34d19ddbc..b4b48afb6de 100644 --- a/sound/arm/pxa2xx-ac97.c +++ b/sound/arm/pxa2xx-ac97.c @@ -137,9 +137,9 @@ static int pxa2xx_ac97_do_resume(struct snd_card *card) return 0; } -static int pxa2xx_ac97_suspend(struct platform_device *dev, pm_message_t state) +static int pxa2xx_ac97_suspend(struct device *dev) { - struct snd_card *card = platform_get_drvdata(dev); + struct snd_card *card = dev_get_drvdata(dev); int ret = 0; if (card) @@ -148,9 +148,9 @@ static int pxa2xx_ac97_suspend(struct platform_device *dev, pm_message_t state) return ret; } -static int pxa2xx_ac97_resume(struct platform_device *dev) +static int pxa2xx_ac97_resume(struct device *dev) { - struct snd_card *card = platform_get_drvdata(dev); + struct snd_card *card = dev_get_drvdata(dev); int ret = 0; if (card) @@ -159,9 +159,10 @@ static int pxa2xx_ac97_resume(struct platform_device *dev) return ret; } -#else -#define pxa2xx_ac97_suspend NULL -#define pxa2xx_ac97_resume NULL +static struct dev_pm_ops pxa2xx_ac97_pm_ops = { + .suspend = pxa2xx_ac97_suspend, + .resume = pxa2xx_ac97_resume, +}; #endif static int __devinit pxa2xx_ac97_probe(struct platform_device *dev) @@ -241,11 +242,12 @@ static int __devexit pxa2xx_ac97_remove(struct platform_device *dev) static struct platform_driver pxa2xx_ac97_driver = { .probe = pxa2xx_ac97_probe, .remove = __devexit_p(pxa2xx_ac97_remove), - .suspend = pxa2xx_ac97_suspend, - .resume = pxa2xx_ac97_resume, .driver = { .name = "pxa2xx-ac97", .owner = THIS_MODULE, +#ifdef CONFIG_PM + .pm = &pxa2xx_ac97_pm_ops, +#endif }, }; |