From d4d84fef6d0366b585b7de13527a0faeca84d9ce Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 2 Jun 2011 10:19:41 -0400 Subject: slub: always align cpu_slab to honor cmpxchg_double requirement On an architecture without CMPXCHG_LOCAL but with DEBUG_VM enabled, the VM_BUG_ON() in __pcpu_double_call_return_bool() will cause an early panic during boot unless we always align cpu_slab properly. In principle we could remove the alignment-testing VM_BUG_ON() for architectures that don't have CMPXCHG_LOCAL, but leaving it in means that new code will tend not to break x86 even if it is introduced on another platform, and it's low cost to require alignment. Acked-by: David Rientjes Acked-by: Christoph Lameter Signed-off-by: Chris Metcalf Signed-off-by: Pekka Enberg --- include/linux/percpu.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 8b97308e65d..9ca008f0c54 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -259,6 +259,9 @@ extern void __bad_size_call_parameter(void); * Special handling for cmpxchg_double. cmpxchg_double is passed two * percpu variables. The first has to be aligned to a double word * boundary and the second has to follow directly thereafter. + * We enforce this on all architectures even if they don't support + * a double cmpxchg instruction, since it's a cheap requirement, and it + * avoids breaking the requirement for architectures with the instruction. */ #define __pcpu_double_call_return_bool(stem, pcp1, pcp2, ...) \ ({ \ -- cgit v1.2.3-70-g09d2 From fb04883371f2cb7867d24783e7d590036dc9b548 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 19 May 2011 15:44:27 +0200 Subject: netfilter: add more values to enum ip_conntrack_info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Following error is raised (and other similar ones) : net/ipv4/netfilter/nf_nat_standalone.c: In function ‘nf_nat_fn’: net/ipv4/netfilter/nf_nat_standalone.c:119:2: warning: case value ‘4’ not in enumerated type ‘enum ip_conntrack_info’ gcc barfs on adding two enum values and getting a not enumerated result : case IP_CT_RELATED+IP_CT_IS_REPLY: Add missing enum values Signed-off-by: Eric Dumazet CC: David Miller Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nf_conntrack_common.h | 3 +++ net/ipv4/netfilter/ipt_CLUSTERIP.c | 6 +++--- net/ipv4/netfilter/ipt_MASQUERADE.c | 2 +- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 2 +- net/ipv4/netfilter/nf_nat_core.c | 2 +- net/ipv4/netfilter/nf_nat_rule.c | 2 +- net/ipv4/netfilter/nf_nat_standalone.c | 4 ++-- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 2 +- net/netfilter/nf_conntrack_core.c | 4 ++-- net/netfilter/nf_conntrack_ftp.c | 2 +- net/netfilter/nf_conntrack_h323_main.c | 10 ++++------ net/netfilter/nf_conntrack_irc.c | 3 +-- net/netfilter/nf_conntrack_pptp.c | 3 +-- net/netfilter/nf_conntrack_sane.c | 2 +- net/netfilter/nf_conntrack_sip.c | 2 +- net/netfilter/xt_socket.c | 4 ++-- 16 files changed, 26 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h index 50cdc2559a5..0d3dd66322e 100644 --- a/include/linux/netfilter/nf_conntrack_common.h +++ b/include/linux/netfilter/nf_conntrack_common.h @@ -18,6 +18,9 @@ enum ip_conntrack_info { /* >= this indicates reply direction */ IP_CT_IS_REPLY, + IP_CT_ESTABLISHED_REPLY = IP_CT_ESTABLISHED + IP_CT_IS_REPLY, + IP_CT_RELATED_REPLY = IP_CT_RELATED + IP_CT_IS_REPLY, + IP_CT_NEW_REPLY = IP_CT_NEW + IP_CT_IS_REPLY, /* Number of distinct IP_CT types (no NEW in reply dirn). */ IP_CT_NUMBER = IP_CT_IS_REPLY * 2 - 1 }; diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index d609ac3cb9a..5c9e97c7901 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -307,7 +307,7 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par) * error messages (RELATED) and information requests (see below) */ if (ip_hdr(skb)->protocol == IPPROTO_ICMP && (ctinfo == IP_CT_RELATED || - ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)) + ctinfo == IP_CT_RELATED_REPLY)) return XT_CONTINUE; /* ip_conntrack_icmp guarantees us that we only have ICMP_ECHO, @@ -321,12 +321,12 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par) ct->mark = hash; break; case IP_CT_RELATED: - case IP_CT_RELATED+IP_CT_IS_REPLY: + case IP_CT_RELATED_REPLY: /* FIXME: we don't handle expectations at the * moment. they can arrive on a different node than * the master connection (e.g. FTP passive mode) */ case IP_CT_ESTABLISHED: - case IP_CT_ESTABLISHED+IP_CT_IS_REPLY: + case IP_CT_ESTABLISHED_REPLY: break; default: break; diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index d2ed9dc74eb..9931152a78b 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -60,7 +60,7 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par) nat = nfct_nat(ct); NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || - ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)); + ctinfo == IP_CT_RELATED_REPLY)); /* Source address is 0.0.0.0 - locally generated packet that is * probably not supposed to be masqueraded. diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 5a03c02af99..db10075dd88 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -101,7 +101,7 @@ static unsigned int ipv4_confirm(unsigned int hooknum, /* This is where we call the helper: as the packet goes out. */ ct = nf_ct_get(skb, &ctinfo); - if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY) + if (!ct || ctinfo == IP_CT_RELATED_REPLY) goto out; help = nfct_help(ct); diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index 9c71b2755ce..3346de5d94d 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -433,7 +433,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, /* Must be RELATED */ NF_CT_ASSERT(skb->nfctinfo == IP_CT_RELATED || - skb->nfctinfo == IP_CT_RELATED+IP_CT_IS_REPLY); + skb->nfctinfo == IP_CT_RELATED_REPLY); /* Redirects on non-null nats must be dropped, else they'll start talking to each other without our translation, and be diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index 21c30426480..733c9abc1cb 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c @@ -53,7 +53,7 @@ ipt_snat_target(struct sk_buff *skb, const struct xt_action_param *par) /* Connection must be valid and new. */ NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || - ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)); + ctinfo == IP_CT_RELATED_REPLY)); NF_CT_ASSERT(par->out != NULL); return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_SRC); diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c index 7317bdf1d45..483b76d042d 100644 --- a/net/ipv4/netfilter/nf_nat_standalone.c +++ b/net/ipv4/netfilter/nf_nat_standalone.c @@ -116,7 +116,7 @@ nf_nat_fn(unsigned int hooknum, switch (ctinfo) { case IP_CT_RELATED: - case IP_CT_RELATED+IP_CT_IS_REPLY: + case IP_CT_RELATED_REPLY: if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { if (!nf_nat_icmp_reply_translation(ct, ctinfo, hooknum, skb)) @@ -144,7 +144,7 @@ nf_nat_fn(unsigned int hooknum, default: /* ESTABLISHED */ NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || - ctinfo == (IP_CT_ESTABLISHED+IP_CT_IS_REPLY)); + ctinfo == IP_CT_ESTABLISHED_REPLY); } return nf_nat_packet(ct, ctinfo, hooknum, skb); diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index c8af58b2256..4111050a9fc 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -160,7 +160,7 @@ static unsigned int ipv6_confirm(unsigned int hooknum, /* This is where we call the helper: as the packet goes out. */ ct = nf_ct_get(skb, &ctinfo); - if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY) + if (!ct || ctinfo == IP_CT_RELATED_REPLY) goto out; help = nfct_help(ct); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 2e1c11f7841..0bd56892940 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -850,7 +850,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl, /* It exists; we have (non-exclusive) reference. */ if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) { - *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY; + *ctinfo = IP_CT_ESTABLISHED_REPLY; /* Please set reply bit if this packet OK */ *set_reply = 1; } else { @@ -1143,7 +1143,7 @@ static void nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb) /* This ICMP is in reverse direction to the packet which caused it */ ct = nf_ct_get(skb, &ctinfo); if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) - ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY; + ctinfo = IP_CT_RELATED_REPLY; else ctinfo = IP_CT_RELATED; diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index e17cb7c7dd8..6f5801eac99 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -368,7 +368,7 @@ static int help(struct sk_buff *skb, /* Until there's been traffic both ways, don't look in packets. */ if (ctinfo != IP_CT_ESTABLISHED && - ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) { + ctinfo != IP_CT_ESTABLISHED_REPLY) { pr_debug("ftp: Conntrackinfo = %u\n", ctinfo); return NF_ACCEPT; } diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 18b2ce5c8ce..f03c2d4539f 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -571,10 +571,9 @@ static int h245_help(struct sk_buff *skb, unsigned int protoff, int ret; /* Until there's been traffic both ways, don't look in packets. */ - if (ctinfo != IP_CT_ESTABLISHED && - ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) { + if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY) return NF_ACCEPT; - } + pr_debug("nf_ct_h245: skblen = %u\n", skb->len); spin_lock_bh(&nf_h323_lock); @@ -1125,10 +1124,9 @@ static int q931_help(struct sk_buff *skb, unsigned int protoff, int ret; /* Until there's been traffic both ways, don't look in packets. */ - if (ctinfo != IP_CT_ESTABLISHED && - ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) { + if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY) return NF_ACCEPT; - } + pr_debug("nf_ct_q931: skblen = %u\n", skb->len); spin_lock_bh(&nf_h323_lock); diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c index b394aa31877..4f9390b9869 100644 --- a/net/netfilter/nf_conntrack_irc.c +++ b/net/netfilter/nf_conntrack_irc.c @@ -125,8 +125,7 @@ static int help(struct sk_buff *skb, unsigned int protoff, return NF_ACCEPT; /* Until there's been traffic both ways, don't look in packets. */ - if (ctinfo != IP_CT_ESTABLISHED && - ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) + if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY) return NF_ACCEPT; /* Not a full tcp header? */ diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 088944824e1..2fd4565144d 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -519,8 +519,7 @@ conntrack_pptp_help(struct sk_buff *skb, unsigned int protoff, u_int16_t msg; /* don't do any tracking before tcp handshake complete */ - if (ctinfo != IP_CT_ESTABLISHED && - ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) + if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY) return NF_ACCEPT; nexthdr_off = protoff; diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c index d9e27734b2a..8501823b3f9 100644 --- a/net/netfilter/nf_conntrack_sane.c +++ b/net/netfilter/nf_conntrack_sane.c @@ -78,7 +78,7 @@ static int help(struct sk_buff *skb, ct_sane_info = &nfct_help(ct)->help.ct_sane_info; /* Until there's been traffic both ways, don't look in packets. */ if (ctinfo != IP_CT_ESTABLISHED && - ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) + ctinfo != IP_CT_ESTABLISHED_REPLY) return NF_ACCEPT; /* Not a full tcp header? */ diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index cb5a2858178..93faf6a3a63 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -1423,7 +1423,7 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff, typeof(nf_nat_sip_seq_adjust_hook) nf_nat_sip_seq_adjust; if (ctinfo != IP_CT_ESTABLISHED && - ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) + ctinfo != IP_CT_ESTABLISHED_REPLY) return NF_ACCEPT; /* No Data ? */ diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 9cc46356b57..fe39f7e913d 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -143,9 +143,9 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, ct = nf_ct_get(skb, &ctinfo); if (ct && !nf_ct_is_untracked(ct) && ((iph->protocol != IPPROTO_ICMP && - ctinfo == IP_CT_IS_REPLY + IP_CT_ESTABLISHED) || + ctinfo == IP_CT_ESTABLISHED_REPLY) || (iph->protocol == IPPROTO_ICMP && - ctinfo == IP_CT_IS_REPLY + IP_CT_RELATED)) && + ctinfo == IP_CT_RELATED_REPLY)) && (ct->status & IPS_SRC_NAT_DONE)) { daddr = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip; -- cgit v1.2.3-70-g09d2 From a9c667f8f0656631ee5438baaf21bf30d5f67375 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Mon, 6 Jun 2011 09:51:52 -0400 Subject: ext4: fixed tracepoints cleanup While creating fixed tracepoints for ext3, basically by porting them from ext4, I found a lot of useless retyping, wrong type usage, useless variable passing and other inconsistencies in the ext4 fixed tracepoint code. This patch cleans the fixed tracepoint code for ext4 and also simplify some of them. Signed-off-by: Lukas Czerner Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 2 +- fs/ext4/mballoc.c | 6 +- include/trace/events/ext4.h | 179 +++++++++++++++++++------------------------- 3 files changed, 80 insertions(+), 107 deletions(-) (limited to 'include') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index a5763e3505b..e3126c05100 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2634,7 +2634,7 @@ static int ext4_writepage(struct page *page, struct buffer_head *page_bufs = NULL; struct inode *inode = page->mapping->host; - trace_ext4_writepage(inode, page); + trace_ext4_writepage(page); size = i_size_read(inode); if (page->index == size >> PAGE_CACHE_SHIFT) len = size & ~PAGE_CACHE_MASK; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index eb71dd59f2e..6ed859d5685 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3578,8 +3578,8 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, free += next - bit; trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit); - trace_ext4_mb_release_inode_pa(sb, pa->pa_inode, pa, - grp_blk_start + bit, next - bit); + trace_ext4_mb_release_inode_pa(pa, grp_blk_start + bit, + next - bit); mb_free_blocks(pa->pa_inode, e4b, bit, next - bit); bit = next + 1; } @@ -3608,7 +3608,7 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b, ext4_group_t group; ext4_grpblk_t bit; - trace_ext4_mb_release_group_pa(sb, pa); + trace_ext4_mb_release_group_pa(pa); BUG_ON(pa->pa_deleted == 0); ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); BUG_ON(group != e4b->bd_group && pa->pa_len != 0); diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index e09592d2f91..5ce2b2f5f52 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -26,7 +26,7 @@ TRACE_EVENT(ext4_free_inode, __field( umode_t, mode ) __field( uid_t, uid ) __field( gid_t, gid ) - __field( blkcnt_t, blocks ) + __field( __u64, blocks ) ), TP_fast_assign( @@ -40,9 +40,8 @@ TRACE_EVENT(ext4_free_inode, TP_printk("dev %d,%d ino %lu mode 0%o uid %u gid %u blocks %llu", MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino, - __entry->mode, __entry->uid, __entry->gid, - (unsigned long long) __entry->blocks) + (unsigned long) __entry->ino, __entry->mode, + __entry->uid, __entry->gid, __entry->blocks) ); TRACE_EVENT(ext4_request_inode, @@ -178,7 +177,7 @@ TRACE_EVENT(ext4_begin_ordered_truncate, TP_printk("dev %d,%d ino %lu new_size %lld", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, - (long long) __entry->new_size) + __entry->new_size) ); DECLARE_EVENT_CLASS(ext4__write_begin, @@ -204,7 +203,7 @@ DECLARE_EVENT_CLASS(ext4__write_begin, __entry->flags = flags; ), - TP_printk("dev %d,%d ino %lu pos %llu len %u flags %u", + TP_printk("dev %d,%d ino %lu pos %lld len %u flags %u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->pos, __entry->len, __entry->flags) @@ -248,7 +247,7 @@ DECLARE_EVENT_CLASS(ext4__write_end, __entry->copied = copied; ), - TP_printk("dev %d,%d ino %lu pos %llu len %u copied %u", + TP_printk("dev %d,%d ino %lu pos %lld len %u copied %u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->pos, __entry->len, __entry->copied) @@ -286,29 +285,6 @@ DEFINE_EVENT(ext4__write_end, ext4_da_write_end, TP_ARGS(inode, pos, len, copied) ); -TRACE_EVENT(ext4_writepage, - TP_PROTO(struct inode *inode, struct page *page), - - TP_ARGS(inode, page), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( ino_t, ino ) - __field( pgoff_t, index ) - - ), - - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->index = page->index; - ), - - TP_printk("dev %d,%d ino %lu page_index %lu", - MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino, __entry->index) -); - TRACE_EVENT(ext4_da_writepages, TP_PROTO(struct inode *inode, struct writeback_control *wbc), @@ -341,7 +317,7 @@ TRACE_EVENT(ext4_da_writepages, ), TP_printk("dev %d,%d ino %lu nr_to_write %ld pages_skipped %ld " - "range_start %llu range_end %llu sync_mode %d" + "range_start %lld range_end %lld sync_mode %d" "for_kupdate %d range_cyclic %d writeback_index %lu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->nr_to_write, @@ -449,7 +425,14 @@ DECLARE_EVENT_CLASS(ext4__page_op, TP_printk("dev %d,%d ino %lu page_index %lu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, - __entry->index) + (unsigned long) __entry->index) +); + +DEFINE_EVENT(ext4__page_op, ext4_writepage, + + TP_PROTO(struct page *page), + + TP_ARGS(page) ); DEFINE_EVENT(ext4__page_op, ext4_readpage, @@ -489,7 +472,7 @@ TRACE_EVENT(ext4_invalidatepage, TP_printk("dev %d,%d ino %lu page_index %lu offset %lu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, - __entry->index, __entry->offset) + (unsigned long) __entry->index, __entry->offset) ); TRACE_EVENT(ext4_discard_blocks, @@ -562,12 +545,10 @@ DEFINE_EVENT(ext4__mb_new_pa, ext4_mb_new_group_pa, ); TRACE_EVENT(ext4_mb_release_inode_pa, - TP_PROTO(struct super_block *sb, - struct inode *inode, - struct ext4_prealloc_space *pa, + TP_PROTO(struct ext4_prealloc_space *pa, unsigned long long block, unsigned int count), - TP_ARGS(sb, inode, pa, block, count), + TP_ARGS(pa, block, count), TP_STRUCT__entry( __field( dev_t, dev ) @@ -578,8 +559,8 @@ TRACE_EVENT(ext4_mb_release_inode_pa, ), TP_fast_assign( - __entry->dev = sb->s_dev; - __entry->ino = inode->i_ino; + __entry->dev = pa->pa_inode->i_sb->s_dev; + __entry->ino = pa->pa_inode->i_ino; __entry->block = block; __entry->count = count; ), @@ -591,10 +572,9 @@ TRACE_EVENT(ext4_mb_release_inode_pa, ); TRACE_EVENT(ext4_mb_release_group_pa, - TP_PROTO(struct super_block *sb, - struct ext4_prealloc_space *pa), + TP_PROTO(struct ext4_prealloc_space *pa), - TP_ARGS(sb, pa), + TP_ARGS(pa), TP_STRUCT__entry( __field( dev_t, dev ) @@ -604,7 +584,7 @@ TRACE_EVENT(ext4_mb_release_group_pa, ), TP_fast_assign( - __entry->dev = sb->s_dev; + __entry->dev = pa->pa_inode->i_sb->s_dev; __entry->pa_pstart = pa->pa_pstart; __entry->pa_len = pa->pa_len; ), @@ -666,10 +646,10 @@ TRACE_EVENT(ext4_request_blocks, __field( ino_t, ino ) __field( unsigned int, flags ) __field( unsigned int, len ) - __field( __u64, logical ) + __field( __u32, logical ) + __field( __u32, lleft ) + __field( __u32, lright ) __field( __u64, goal ) - __field( __u64, lleft ) - __field( __u64, lright ) __field( __u64, pleft ) __field( __u64, pright ) ), @@ -687,17 +667,13 @@ TRACE_EVENT(ext4_request_blocks, __entry->pright = ar->pright; ), - TP_printk("dev %d,%d ino %lu flags %u len %u lblk %llu goal %llu " - "lleft %llu lright %llu pleft %llu pright %llu ", + TP_printk("dev %d,%d ino %lu flags %u len %u lblk %u goal %llu " + "lleft %u lright %u pleft %llu pright %llu ", MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino, - __entry->flags, __entry->len, - (unsigned long long) __entry->logical, - (unsigned long long) __entry->goal, - (unsigned long long) __entry->lleft, - (unsigned long long) __entry->lright, - (unsigned long long) __entry->pleft, - (unsigned long long) __entry->pright) + (unsigned long) __entry->ino, __entry->flags, + __entry->len, __entry->logical, __entry->goal, + __entry->lleft, __entry->lright, __entry->pleft, + __entry->pright) ); TRACE_EVENT(ext4_allocate_blocks, @@ -711,10 +687,10 @@ TRACE_EVENT(ext4_allocate_blocks, __field( __u64, block ) __field( unsigned int, flags ) __field( unsigned int, len ) - __field( __u64, logical ) + __field( __u32, logical ) + __field( __u32, lleft ) + __field( __u32, lright ) __field( __u64, goal ) - __field( __u64, lleft ) - __field( __u64, lright ) __field( __u64, pleft ) __field( __u64, pright ) ), @@ -733,17 +709,13 @@ TRACE_EVENT(ext4_allocate_blocks, __entry->pright = ar->pright; ), - TP_printk("dev %d,%d ino %lu flags %u len %u block %llu lblk %llu " - "goal %llu lleft %llu lright %llu pleft %llu pright %llu", + TP_printk("dev %d,%d ino %lu flags %u len %u block %llu lblk %u " + "goal %llu lleft %u lright %u pleft %llu pright %llu", MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino, - __entry->flags, __entry->len, __entry->block, - (unsigned long long) __entry->logical, - (unsigned long long) __entry->goal, - (unsigned long long) __entry->lleft, - (unsigned long long) __entry->lright, - (unsigned long long) __entry->pleft, - (unsigned long long) __entry->pright) + (unsigned long) __entry->ino, __entry->flags, + __entry->len, __entry->block, __entry->logical, + __entry->goal, __entry->lleft, __entry->lright, + __entry->pleft, __entry->pright) ); TRACE_EVENT(ext4_free_blocks, @@ -755,10 +727,10 @@ TRACE_EVENT(ext4_free_blocks, TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) - __field( umode_t, mode ) + __field( umode_t, mode ) __field( __u64, block ) __field( unsigned long, count ) - __field( int, flags ) + __field( int, flags ) ), TP_fast_assign( @@ -798,7 +770,7 @@ TRACE_EVENT(ext4_sync_file_enter, __entry->parent = dentry->d_parent->d_inode->i_ino; ), - TP_printk("dev %d,%d ino %ld parent %ld datasync %d ", + TP_printk("dev %d,%d ino %lu parent %lu datasync %d ", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (unsigned long) __entry->parent, __entry->datasync) @@ -821,7 +793,7 @@ TRACE_EVENT(ext4_sync_file_exit, __entry->dev = inode->i_sb->s_dev; ), - TP_printk("dev %d,%d ino %ld ret %d", + TP_printk("dev %d,%d ino %lu ret %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->ret) @@ -1005,7 +977,7 @@ DECLARE_EVENT_CLASS(ext4__mballoc, __entry->result_len = len; ), - TP_printk("dev %d,%d inode %lu extent %u/%d/%u ", + TP_printk("dev %d,%d inode %lu extent %u/%d/%d ", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->result_group, __entry->result_start, @@ -1093,7 +1065,7 @@ TRACE_EVENT(ext4_da_update_reserve_space, "allocated_meta_blocks %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, - __entry->mode, (unsigned long long) __entry->i_blocks, + __entry->mode, __entry->i_blocks, __entry->used_blocks, __entry->reserved_data_blocks, __entry->reserved_meta_blocks, __entry->allocated_meta_blocks) ); @@ -1127,7 +1099,7 @@ TRACE_EVENT(ext4_da_reserve_space, "reserved_data_blocks %d reserved_meta_blocks %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, - __entry->mode, (unsigned long long) __entry->i_blocks, + __entry->mode, __entry->i_blocks, __entry->md_needed, __entry->reserved_data_blocks, __entry->reserved_meta_blocks) ); @@ -1164,7 +1136,7 @@ TRACE_EVENT(ext4_da_release_space, "allocated_meta_blocks %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, - __entry->mode, (unsigned long long) __entry->i_blocks, + __entry->mode, __entry->i_blocks, __entry->freed_blocks, __entry->reserved_data_blocks, __entry->reserved_meta_blocks, __entry->allocated_meta_blocks) ); @@ -1239,14 +1211,15 @@ TRACE_EVENT(ext4_direct_IO_enter, __entry->rw = rw; ), - TP_printk("dev %d,%d ino %lu pos %llu len %lu rw %d", + TP_printk("dev %d,%d ino %lu pos %lld len %lu rw %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, - (unsigned long long) __entry->pos, __entry->len, __entry->rw) + __entry->pos, __entry->len, __entry->rw) ); TRACE_EVENT(ext4_direct_IO_exit, - TP_PROTO(struct inode *inode, loff_t offset, unsigned long len, int rw, int ret), + TP_PROTO(struct inode *inode, loff_t offset, unsigned long len, + int rw, int ret), TP_ARGS(inode, offset, len, rw, ret), @@ -1268,10 +1241,10 @@ TRACE_EVENT(ext4_direct_IO_exit, __entry->ret = ret; ), - TP_printk("dev %d,%d ino %lu pos %llu len %lu rw %d ret %d", + TP_printk("dev %d,%d ino %lu pos %lld len %lu rw %d ret %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, - (unsigned long long) __entry->pos, __entry->len, + __entry->pos, __entry->len, __entry->rw, __entry->ret) ); @@ -1296,15 +1269,15 @@ TRACE_EVENT(ext4_fallocate_enter, __entry->mode = mode; ), - TP_printk("dev %d,%d ino %ld pos %llu len %llu mode %d", + TP_printk("dev %d,%d ino %lu pos %lld len %lld mode %d", MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino, - (unsigned long long) __entry->pos, - (unsigned long long) __entry->len, __entry->mode) + (unsigned long) __entry->ino, __entry->pos, + __entry->len, __entry->mode) ); TRACE_EVENT(ext4_fallocate_exit, - TP_PROTO(struct inode *inode, loff_t offset, unsigned int max_blocks, int ret), + TP_PROTO(struct inode *inode, loff_t offset, + unsigned int max_blocks, int ret), TP_ARGS(inode, offset, max_blocks, ret), @@ -1312,7 +1285,7 @@ TRACE_EVENT(ext4_fallocate_exit, __field( ino_t, ino ) __field( dev_t, dev ) __field( loff_t, pos ) - __field( unsigned, blocks ) + __field( unsigned int, blocks ) __field( int, ret ) ), @@ -1324,10 +1297,10 @@ TRACE_EVENT(ext4_fallocate_exit, __entry->ret = ret; ), - TP_printk("dev %d,%d ino %ld pos %llu blocks %d ret %d", + TP_printk("dev %d,%d ino %lu pos %lld blocks %u ret %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, - (unsigned long long) __entry->pos, __entry->blocks, + __entry->pos, __entry->blocks, __entry->ret) ); @@ -1350,7 +1323,7 @@ TRACE_EVENT(ext4_unlink_enter, __entry->dev = dentry->d_inode->i_sb->s_dev; ), - TP_printk("dev %d,%d ino %ld size %lld parent %ld", + TP_printk("dev %d,%d ino %lu size %lld parent %lu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->size, (unsigned long) __entry->parent) @@ -1373,7 +1346,7 @@ TRACE_EVENT(ext4_unlink_exit, __entry->ret = ret; ), - TP_printk("dev %d,%d ino %ld ret %d", + TP_printk("dev %d,%d ino %lu ret %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->ret) @@ -1387,7 +1360,7 @@ DECLARE_EVENT_CLASS(ext4__truncate, TP_STRUCT__entry( __field( ino_t, ino ) __field( dev_t, dev ) - __field( blkcnt_t, blocks ) + __field( __u64, blocks ) ), TP_fast_assign( @@ -1396,9 +1369,9 @@ DECLARE_EVENT_CLASS(ext4__truncate, __entry->blocks = inode->i_blocks; ), - TP_printk("dev %d,%d ino %lu blocks %lu", + TP_printk("dev %d,%d ino %lu blocks %llu", MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino, (unsigned long) __entry->blocks) + (unsigned long) __entry->ino, __entry->blocks) ); DEFINE_EVENT(ext4__truncate, ext4_truncate_enter, @@ -1417,7 +1390,7 @@ DEFINE_EVENT(ext4__truncate, ext4_truncate_exit, DECLARE_EVENT_CLASS(ext4__map_blocks_enter, TP_PROTO(struct inode *inode, ext4_lblk_t lblk, - unsigned len, unsigned flags), + unsigned int len, unsigned int flags), TP_ARGS(inode, lblk, len, flags), @@ -1425,8 +1398,8 @@ DECLARE_EVENT_CLASS(ext4__map_blocks_enter, __field( ino_t, ino ) __field( dev_t, dev ) __field( ext4_lblk_t, lblk ) - __field( unsigned, len ) - __field( unsigned, flags ) + __field( unsigned int, len ) + __field( unsigned int, flags ) ), TP_fast_assign( @@ -1440,7 +1413,7 @@ DECLARE_EVENT_CLASS(ext4__map_blocks_enter, TP_printk("dev %d,%d ino %lu lblk %u len %u flags %u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, - (unsigned) __entry->lblk, __entry->len, __entry->flags) + __entry->lblk, __entry->len, __entry->flags) ); DEFINE_EVENT(ext4__map_blocks_enter, ext4_ext_map_blocks_enter, @@ -1459,7 +1432,7 @@ DEFINE_EVENT(ext4__map_blocks_enter, ext4_ind_map_blocks_enter, DECLARE_EVENT_CLASS(ext4__map_blocks_exit, TP_PROTO(struct inode *inode, ext4_lblk_t lblk, - ext4_fsblk_t pblk, unsigned len, int ret), + ext4_fsblk_t pblk, unsigned int len, int ret), TP_ARGS(inode, lblk, pblk, len, ret), @@ -1468,7 +1441,7 @@ DECLARE_EVENT_CLASS(ext4__map_blocks_exit, __field( dev_t, dev ) __field( ext4_lblk_t, lblk ) __field( ext4_fsblk_t, pblk ) - __field( unsigned, len ) + __field( unsigned int, len ) __field( int, ret ) ), @@ -1484,7 +1457,7 @@ DECLARE_EVENT_CLASS(ext4__map_blocks_exit, TP_printk("dev %d,%d ino %lu lblk %u pblk %llu len %u ret %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, - (unsigned) __entry->lblk, (unsigned long long) __entry->pblk, + __entry->lblk, __entry->pblk, __entry->len, __entry->ret) ); @@ -1524,7 +1497,7 @@ TRACE_EVENT(ext4_ext_load_extent, TP_printk("dev %d,%d ino %lu lblk %u pblk %llu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, - (unsigned) __entry->lblk, (unsigned long long) __entry->pblk) + __entry->lblk, __entry->pblk) ); TRACE_EVENT(ext4_load_inode, -- cgit v1.2.3-70-g09d2 From b084f598df36b62dfae83c10ed17f0b66b50f442 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 31 May 2011 12:24:58 -0400 Subject: nfsd: fix dependency of nfsd on auth_rpcgss Commit b0b0c0a26e84 "nfsd: add proc file listing kernel's gss_krb5 enctypes" added an nunnecessary dependency of nfsd on the auth_rpcgss module. It's a little ad hoc, but since the only piece of information nfsd needs from rpcsec_gss_krb5 is a single static string, one solution is just to share it with an include file. Cc: stable@kernel.org Reported-by: Michael Guntsche Cc: Kevin Coffman Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsctl.c | 19 ++++++------------- include/linux/sunrpc/gss_krb5_enctypes.h | 4 ++++ net/sunrpc/auth_gss/gss_krb5_mech.c | 3 ++- 3 files changed, 12 insertions(+), 14 deletions(-) create mode 100644 include/linux/sunrpc/gss_krb5_enctypes.h (limited to 'include') diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 1f5eae40f34..2b1449dd2f4 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "idmap.h" #include "nfsd.h" @@ -189,18 +190,10 @@ static struct file_operations export_features_operations = { .release = single_release, }; -#ifdef CONFIG_SUNRPC_GSS +#if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE) static int supported_enctypes_show(struct seq_file *m, void *v) { - struct gss_api_mech *k5mech; - - k5mech = gss_mech_get_by_name("krb5"); - if (k5mech == NULL) - goto out; - if (k5mech->gm_upcall_enctypes != NULL) - seq_printf(m, k5mech->gm_upcall_enctypes); - gss_mech_put(k5mech); -out: + seq_printf(m, KRB5_SUPPORTED_ENCTYPES); return 0; } @@ -215,7 +208,7 @@ static struct file_operations supported_enctypes_ops = { .llseek = seq_lseek, .release = single_release, }; -#endif /* CONFIG_SUNRPC_GSS */ +#endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */ extern int nfsd_pool_stats_open(struct inode *inode, struct file *file); extern int nfsd_pool_stats_release(struct inode *inode, struct file *file); @@ -1427,9 +1420,9 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent) [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO}, [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO}, -#ifdef CONFIG_SUNRPC_GSS +#if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE) [NFSD_SupportedEnctypes] = {"supported_krb5_enctypes", &supported_enctypes_ops, S_IRUGO}, -#endif /* CONFIG_SUNRPC_GSS */ +#endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */ #ifdef CONFIG_NFSD_V4 [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR}, diff --git a/include/linux/sunrpc/gss_krb5_enctypes.h b/include/linux/sunrpc/gss_krb5_enctypes.h new file mode 100644 index 00000000000..ec6234eee89 --- /dev/null +++ b/include/linux/sunrpc/gss_krb5_enctypes.h @@ -0,0 +1,4 @@ +/* + * Dumb way to share this static piece of information with nfsd + */ +#define KRB5_SUPPORTED_ENCTYPES "18,17,16,23,3,1,2" diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index 0a9a2ec2e46..c3b75333b82 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -43,6 +43,7 @@ #include #include #include +#include #ifdef RPC_DEBUG # define RPCDBG_FACILITY RPCDBG_AUTH @@ -750,7 +751,7 @@ static struct gss_api_mech gss_kerberos_mech = { .gm_ops = &gss_kerberos_ops, .gm_pf_num = ARRAY_SIZE(gss_kerberos_pfs), .gm_pfs = gss_kerberos_pfs, - .gm_upcall_enctypes = "18,17,16,23,3,1,2", + .gm_upcall_enctypes = KRB5_SUPPORTED_ENCTYPES, }; static int __init init_kerberos_module(void) -- cgit v1.2.3-70-g09d2 From 3019de124b9f5b1526cb3668b74af14371e21795 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 6 Jun 2011 16:41:33 -0700 Subject: net: Rework netdev_drivername() to avoid warning. This interface uses a temporary buffer, but for no real reason. And now can generate warnings like: net/sched/sch_generic.c: In function dev_watchdog net/sched/sch_generic.c:254:10: warning: unused variable drivername Just return driver->name directly or "". Reported-by: Connor Hansen Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- net/core/dev.c | 16 +++++----------- net/sched/sch_generic.c | 3 +-- 3 files changed, 7 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ca333e79e10..54b8b4d7b68 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2555,7 +2555,7 @@ extern void netdev_class_remove_file(struct class_attribute *class_attr); extern struct kobj_ns_type_operations net_ns_type_operations; -extern char *netdev_drivername(const struct net_device *dev, char *buffer, int len); +extern const char *netdev_drivername(const struct net_device *dev); extern void linkwatch_run_queue(void); diff --git a/net/core/dev.c b/net/core/dev.c index 939307891e7..1af6cb27f67 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6264,29 +6264,23 @@ err_name: /** * netdev_drivername - network driver for the device * @dev: network device - * @buffer: buffer for resulting name - * @len: size of buffer * * Determine network driver for device. */ -char *netdev_drivername(const struct net_device *dev, char *buffer, int len) +const char *netdev_drivername(const struct net_device *dev) { const struct device_driver *driver; const struct device *parent; - - if (len <= 0 || !buffer) - return buffer; - buffer[0] = 0; + const char *empty = ""; parent = dev->dev.parent; - if (!parent) - return buffer; + return empty; driver = parent->driver; if (driver && driver->name) - strlcpy(buffer, driver->name, len); - return buffer; + return driver->name; + return empty; } static int __netdev_printk(const char *level, const struct net_device *dev, diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index b1721d71c27..b4c680900d7 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -251,9 +251,8 @@ static void dev_watchdog(unsigned long arg) } if (some_queue_timedout) { - char drivername[64]; WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out\n", - dev->name, netdev_drivername(dev, drivername, 64), i); + dev->name, netdev_drivername(dev), i); dev->netdev_ops->ndo_tx_timeout(dev); } if (!mod_timer(&dev->watchdog_timer, -- cgit v1.2.3-70-g09d2 From 13fcb7bd322164c67926ffe272846d4860196dc6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 6 Jun 2011 22:42:06 -0700 Subject: af_packet: prevent information leak In 2.6.27, commit 393e52e33c6c2 (packet: deliver VLAN TCI to userspace) added a small information leak. Add padding field and make sure its zeroed before copy to user. Signed-off-by: Eric Dumazet CC: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/if_packet.h | 2 ++ net/packet/af_packet.c | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h index 6d66ce1791a..7b318630139 100644 --- a/include/linux/if_packet.h +++ b/include/linux/if_packet.h @@ -62,6 +62,7 @@ struct tpacket_auxdata { __u16 tp_mac; __u16 tp_net; __u16 tp_vlan_tci; + __u16 tp_padding; }; /* Rx ring - header status */ @@ -101,6 +102,7 @@ struct tpacket2_hdr { __u32 tp_sec; __u32 tp_nsec; __u16 tp_vlan_tci; + __u16 tp_padding; }; #define TPACKET2_HDRLEN (TPACKET_ALIGN(sizeof(struct tpacket2_hdr)) + sizeof(struct sockaddr_ll)) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index ba248d93399..c0c3cda1971 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -804,6 +804,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, } else { h.h2->tp_vlan_tci = 0; } + h.h2->tp_padding = 0; hdrlen = sizeof(*h.h2); break; default: @@ -1736,6 +1737,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, } else { aux.tp_vlan_tci = 0; } + aux.tp_padding = 0; put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux); } -- cgit v1.2.3-70-g09d2 From ea2c00095c022846dd8dfd211de05154d3e4e1b8 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Tue, 17 May 2011 15:25:37 -0700 Subject: Connector: Set the CN_NETLINK_USERS correctly The CN_NETLINK_USERS must be set to the highest valid index +1. Thanks to Evgeniy for pointing this out. Signed-off-by: K. Y. Srinivasan Acked-by: Evgeniy Polyakov Cc: stable [.39] Signed-off-by: Greg Kroah-Hartman --- include/linux/connector.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/connector.h b/include/linux/connector.h index 7c60d0942ad..f696bccd48c 100644 --- a/include/linux/connector.h +++ b/include/linux/connector.h @@ -44,7 +44,7 @@ #define CN_VAL_DRBD 0x1 #define CN_KVP_IDX 0x9 /* HyperV KVP */ -#define CN_NETLINK_USERS 9 +#define CN_NETLINK_USERS 10 /* Highest index + 1 */ /* * Maximum connector's message size. -- cgit v1.2.3-70-g09d2 From b4c8cc88c18213688268d1d53a51d97ce2f19a64 Mon Sep 17 00:00:00 2001 From: Yegor Yefremov Date: Thu, 9 Jun 2011 15:05:48 -0700 Subject: ethtool.h: fix typos Signed-off-by: Yegor Yefremov Signed-off-by: David S. Miller --- include/linux/ethtool.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index c6a850ab2ec..439b173c588 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -268,7 +268,7 @@ struct ethtool_pauseparam { __u32 cmd; /* ETHTOOL_{G,S}PAUSEPARAM */ /* If the link is being auto-negotiated (via ethtool_cmd.autoneg - * being true) the user may set 'autonet' here non-zero to have the + * being true) the user may set 'autoneg' here non-zero to have the * pause parameters be auto-negotiated too. In such a case, the * {rx,tx}_pause values below determine what capabilities are * advertised. @@ -811,7 +811,7 @@ bool ethtool_invalid_flags(struct net_device *dev, u32 data, u32 supported); * @get_tx_csum: Deprecated as redundant. Report whether transmit checksums * are turned on or off. * @set_tx_csum: Deprecated in favour of generic netdev features. Turn - * transmit checksums on or off. Returns a egative error code or zero. + * transmit checksums on or off. Returns a negative error code or zero. * @get_sg: Deprecated as redundant. Report whether scatter-gather is * enabled. * @set_sg: Deprecated in favour of generic netdev features. Turn @@ -1087,7 +1087,7 @@ struct ethtool_ops { /* The following are all involved in forcing a particular link * mode for the device for setting things. When getting the * devices settings, these indicate the current mode and whether - * it was foced up into this mode or autonegotiated. + * it was forced up into this mode or autonegotiated. */ /* The forced speed, 10Mb, 100Mb, gigabit, 2.5Gb, 10GbE. */ -- cgit v1.2.3-70-g09d2 From e5ea3f12d41d96edf4ad9374db2b1725e457acec Mon Sep 17 00:00:00 2001 From: Jamie Iles Date: Fri, 10 Jun 2011 13:44:49 +0100 Subject: gpio/basic_mmio: add missing include of spinlock_types.h include/linux/basic_mmio_gpio.h uses a spinlock_t without including any of the spinlock headers resulting in this compiler warning. include/linux/basic_mmio_gpio.h:51:2: error: expected specifier-qualifier-list before 'spinlock_t' Explicitly include linux/spinlock_types.h to fix it. Signed-off-by: Jamie Iles Signed-off-by: Grant Likely --- include/linux/basic_mmio_gpio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/basic_mmio_gpio.h b/include/linux/basic_mmio_gpio.h index 1ae12710d73..98999cf107c 100644 --- a/include/linux/basic_mmio_gpio.h +++ b/include/linux/basic_mmio_gpio.h @@ -16,6 +16,7 @@ #include #include #include +#include struct bgpio_pdata { int base; -- cgit v1.2.3-70-g09d2 From 8de111e27688798623b9e9062235bb0cac29f599 Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Fri, 27 May 2011 16:56:50 -0300 Subject: [media] lirc_dev: store cdev in irctl, up maxdevs Store the cdev pointer in struct irctl, allocated dynamically as needed, rather than having a static array. At the same time, recycle some of the saved memory to nudge the maximum number of lirc devices supported up a ways -- its not that uncommon these days, now that we have the rc-core lirc bridge driver, to see a system with at least 4 raw IR receivers. (consider a mythtv backend with several video capture devices and the possible need for IR transmit hardware). Signed-off-by: Jarod Wilson Signed-off-by: Mauro Carvalho Chehab --- drivers/media/rc/lirc_dev.c | 37 +++++++++++++++++++++++++++---------- include/media/lirc_dev.h | 2 +- 2 files changed, 28 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/drivers/media/rc/lirc_dev.c b/drivers/media/rc/lirc_dev.c index fd237ab120b..27997a9ceb0 100644 --- a/drivers/media/rc/lirc_dev.c +++ b/drivers/media/rc/lirc_dev.c @@ -55,6 +55,8 @@ struct irctl { struct lirc_buffer *buf; unsigned int chunk_size; + struct cdev *cdev; + struct task_struct *task; long jiffies_to_wait; }; @@ -62,7 +64,6 @@ struct irctl { static DEFINE_MUTEX(lirc_dev_lock); static struct irctl *irctls[MAX_IRCTL_DEVICES]; -static struct cdev cdevs[MAX_IRCTL_DEVICES]; /* Only used for sysfs but defined to void otherwise */ static struct class *lirc_class; @@ -167,9 +168,13 @@ static struct file_operations lirc_dev_fops = { static int lirc_cdev_add(struct irctl *ir) { - int retval; + int retval = -ENOMEM; struct lirc_driver *d = &ir->d; - struct cdev *cdev = &cdevs[d->minor]; + struct cdev *cdev; + + cdev = kzalloc(sizeof(*cdev), GFP_KERNEL); + if (!cdev) + goto err_out; if (d->fops) { cdev_init(cdev, d->fops); @@ -180,12 +185,20 @@ static int lirc_cdev_add(struct irctl *ir) } retval = kobject_set_name(&cdev->kobj, "lirc%d", d->minor); if (retval) - return retval; + goto err_out; retval = cdev_add(cdev, MKDEV(MAJOR(lirc_base_dev), d->minor), 1); - if (retval) + if (retval) { kobject_put(&cdev->kobj); + goto err_out; + } + + ir->cdev = cdev; + + return 0; +err_out: + kfree(cdev); return retval; } @@ -214,7 +227,7 @@ int lirc_register_driver(struct lirc_driver *d) if (MAX_IRCTL_DEVICES <= d->minor) { dev_err(d->dev, "lirc_dev: lirc_register_driver: " "\"minor\" must be between 0 and %d (%d)!\n", - MAX_IRCTL_DEVICES-1, d->minor); + MAX_IRCTL_DEVICES - 1, d->minor); err = -EBADRQC; goto out; } @@ -369,7 +382,7 @@ int lirc_unregister_driver(int minor) if (minor < 0 || minor >= MAX_IRCTL_DEVICES) { printk(KERN_ERR "lirc_dev: %s: minor (%d) must be between " - "0 and %d!\n", __func__, minor, MAX_IRCTL_DEVICES-1); + "0 and %d!\n", __func__, minor, MAX_IRCTL_DEVICES - 1); return -EBADRQC; } @@ -380,7 +393,7 @@ int lirc_unregister_driver(int minor) return -ENOENT; } - cdev = &cdevs[minor]; + cdev = ir->cdev; mutex_lock(&lirc_dev_lock); @@ -410,6 +423,7 @@ int lirc_unregister_driver(int minor) } else { lirc_irctl_cleanup(ir); cdev_del(cdev); + kfree(cdev); kfree(ir); irctls[minor] = NULL; } @@ -453,7 +467,7 @@ int lirc_dev_fop_open(struct inode *inode, struct file *file) goto error; } - cdev = &cdevs[iminor(inode)]; + cdev = ir->cdev; if (try_module_get(cdev->owner)) { ir->open++; retval = ir->d.set_use_inc(ir->d.data); @@ -484,13 +498,15 @@ EXPORT_SYMBOL(lirc_dev_fop_open); int lirc_dev_fop_close(struct inode *inode, struct file *file) { struct irctl *ir = irctls[iminor(inode)]; - struct cdev *cdev = &cdevs[iminor(inode)]; + struct cdev *cdev; if (!ir) { printk(KERN_ERR "%s: called with invalid irctl\n", __func__); return -EINVAL; } + cdev = ir->cdev; + dev_dbg(ir->d.dev, LOGHEAD "close called\n", ir->d.name, ir->d.minor); WARN_ON(mutex_lock_killable(&lirc_dev_lock)); @@ -503,6 +519,7 @@ int lirc_dev_fop_close(struct inode *inode, struct file *file) lirc_irctl_cleanup(ir); cdev_del(cdev); irctls[ir->d.minor] = NULL; + kfree(cdev); kfree(ir); } diff --git a/include/media/lirc_dev.h b/include/media/lirc_dev.h index 630e702c951..168dd0b1bae 100644 --- a/include/media/lirc_dev.h +++ b/include/media/lirc_dev.h @@ -9,7 +9,7 @@ #ifndef _LINUX_LIRC_DEV_H #define _LINUX_LIRC_DEV_H -#define MAX_IRCTL_DEVICES 4 +#define MAX_IRCTL_DEVICES 8 #define BUFLEN 16 #define mod(n, div) ((n) % (div)) -- cgit v1.2.3-70-g09d2 From c30701130cf7bff4f97a148b1bc96f878c046a40 Mon Sep 17 00:00:00 2001 From: "HeungJun, Kim" Date: Tue, 7 Jun 2011 02:00:58 -0300 Subject: [media] m5mols: Use proper email address format Signed-off-by: HeungJun, Kim Signed-off-by: Kyungmin Park Signed-off-by: Mauro Carvalho Chehab --- drivers/media/video/m5mols/m5mols.h | 4 ++-- drivers/media/video/m5mols/m5mols_capture.c | 4 ++-- drivers/media/video/m5mols/m5mols_controls.c | 4 ++-- drivers/media/video/m5mols/m5mols_core.c | 4 ++-- drivers/media/video/m5mols/m5mols_reg.h | 4 ++-- include/media/m5mols.h | 4 ++-- 6 files changed, 12 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/drivers/media/video/m5mols/m5mols.h b/drivers/media/video/m5mols/m5mols.h index 9ae17093587..89d09a8914f 100644 --- a/drivers/media/video/m5mols/m5mols.h +++ b/drivers/media/video/m5mols/m5mols.h @@ -2,10 +2,10 @@ * Header for M-5MOLS 8M Pixel camera sensor with ISP * * Copyright (C) 2011 Samsung Electronics Co., Ltd. - * Author: HeungJun Kim, riverful.kim@samsung.com + * Author: HeungJun Kim * * Copyright (C) 2009 Samsung Electronics Co., Ltd. - * Author: Dongsoo Nathaniel Kim, dongsoo45.kim@samsung.com + * Author: Dongsoo Nathaniel Kim * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/drivers/media/video/m5mols/m5mols_capture.c b/drivers/media/video/m5mols/m5mols_capture.c index 751f4593da9..d9471928369 100644 --- a/drivers/media/video/m5mols/m5mols_capture.c +++ b/drivers/media/video/m5mols/m5mols_capture.c @@ -2,10 +2,10 @@ * The Capture code for Fujitsu M-5MOLS ISP * * Copyright (C) 2011 Samsung Electronics Co., Ltd. - * Author: HeungJun Kim, riverful.kim@samsung.com + * Author: HeungJun Kim * * Copyright (C) 2009 Samsung Electronics Co., Ltd. - * Author: Dongsoo Nathaniel Kim, dongsoo45.kim@samsung.com + * Author: Dongsoo Nathaniel Kim * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/drivers/media/video/m5mols/m5mols_controls.c b/drivers/media/video/m5mols/m5mols_controls.c index d392c83fbf0..d135d20d09c 100644 --- a/drivers/media/video/m5mols/m5mols_controls.c +++ b/drivers/media/video/m5mols/m5mols_controls.c @@ -2,10 +2,10 @@ * Controls for M-5MOLS 8M Pixel camera sensor with ISP * * Copyright (C) 2011 Samsung Electronics Co., Ltd. - * Author: HeungJun Kim, riverful.kim@samsung.com + * Author: HeungJun Kim * * Copyright (C) 2009 Samsung Electronics Co., Ltd. - * Author: Dongsoo Nathaniel Kim, dongsoo45.kim@samsung.com + * Author: Dongsoo Nathaniel Kim * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/drivers/media/video/m5mols/m5mols_core.c b/drivers/media/video/m5mols/m5mols_core.c index 9815f2c75e8..43c68f51c5c 100644 --- a/drivers/media/video/m5mols/m5mols_core.c +++ b/drivers/media/video/m5mols/m5mols_core.c @@ -2,10 +2,10 @@ * Driver for M-5MOLS 8M Pixel camera sensor with ISP * * Copyright (C) 2011 Samsung Electronics Co., Ltd. - * Author: HeungJun Kim, riverful.kim@samsung.com + * Author: HeungJun Kim * * Copyright (C) 2009 Samsung Electronics Co., Ltd. - * Author: Dongsoo Nathaniel Kim, dongsoo45.kim@samsung.com + * Author: Dongsoo Nathaniel Kim * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/drivers/media/video/m5mols/m5mols_reg.h b/drivers/media/video/m5mols/m5mols_reg.h index 5f5bdcf608b..c755bd6edfe 100644 --- a/drivers/media/video/m5mols/m5mols_reg.h +++ b/drivers/media/video/m5mols/m5mols_reg.h @@ -2,10 +2,10 @@ * Register map for M-5MOLS 8M Pixel camera sensor with ISP * * Copyright (C) 2011 Samsung Electronics Co., Ltd. - * Author: HeungJun Kim, riverful.kim@samsung.com + * Author: HeungJun Kim * * Copyright (C) 2009 Samsung Electronics Co., Ltd. - * Author: Dongsoo Nathaniel Kim, dongsoo45.kim@samsung.com + * Author: Dongsoo Nathaniel Kim * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/include/media/m5mols.h b/include/media/m5mols.h index 2d7e7ca2313..aac2c0e06d5 100644 --- a/include/media/m5mols.h +++ b/include/media/m5mols.h @@ -2,10 +2,10 @@ * Driver header for M-5MOLS 8M Pixel camera sensor with ISP * * Copyright (C) 2011 Samsung Electronics Co., Ltd. - * Author: HeungJun Kim, riverful.kim@samsung.com + * Author: HeungJun Kim * * Copyright (C) 2009 Samsung Electronics Co., Ltd. - * Author: Dongsoo Nathaniel Kim, dongsoo45.kim@samsung.com + * Author: Dongsoo Nathaniel Kim * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by -- cgit v1.2.3-70-g09d2 From 56a210526adb2854d5b7c398a40260720390ee05 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 11 Jun 2011 12:29:58 +0100 Subject: linux/seqlock.h should #include asm/processor.h for cpu_relax() It uses cpu_relax(), and so needs Without this patch, I see: CC arch/mn10300/kernel/asm-offsets.s In file included from include/linux/time.h:8, from include/linux/timex.h:56, from include/linux/sched.h:57, from arch/mn10300/kernel/asm-offsets.c:7: include/linux/seqlock.h: In function 'read_seqbegin': include/linux/seqlock.h:91: error: implicit declaration of function 'cpu_relax' whilst building asb2364_defconfig on MN10300. Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- include/linux/seqlock.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index e9811892844..c6db9fb33c4 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -28,6 +28,7 @@ #include #include +#include typedef struct { unsigned sequence; -- cgit v1.2.3-70-g09d2 From 0b5c9db1b11d3175bb42b80663a9f072f801edf5 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 10 Jun 2011 06:56:58 +0000 Subject: vlan: Fix the ingress VLAN_FLAG_REORDER_HDR check Testing of VLAN_FLAG_REORDER_HDR does not belong in vlan_untag but rather in vlan_do_receive. Otherwise the vlan header will not be properly put on the packet in the case of vlan header accelleration. As we remove the check from vlan_check_reorder_header rename it vlan_reorder_header to keep the naming clean. Fix up the skb->pkt_type early so we don't look at the packet after adding the vlan tag, which guarantees we don't goof and look at the wrong field. Use a simple if statement instead of a complicated switch statement to decided that we need to increment rx_stats for a multicast packet. Hopefully at somepoint we will just declare the case where VLAN_FLAG_REORDER_HDR is cleared as unsupported and remove the code. Until then this keeps it working correctly. Signed-off-by: Eric W. Biederman Signed-off-by: Jiri Pirko Acked-by: Changli Gao Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 25 ++++++++++++++++++--- include/linux/skbuff.h | 5 +++++ net/8021q/vlan_core.c | 60 +++++++++++++++++++++++++++---------------------- net/core/dev.c | 2 +- 4 files changed, 61 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index dc01681fbb4..affa27380b7 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -225,7 +225,7 @@ static inline int vlan_hwaccel_receive_skb(struct sk_buff *skb, } /** - * __vlan_put_tag - regular VLAN tag inserting + * vlan_insert_tag - regular VLAN tag inserting * @skb: skbuff to tag * @vlan_tci: VLAN TCI to insert * @@ -234,8 +234,10 @@ static inline int vlan_hwaccel_receive_skb(struct sk_buff *skb, * * Following the skb_unshare() example, in case of error, the calling function * doesn't have to worry about freeing the original skb. + * + * Does not change skb->protocol so this function can be used during receive. */ -static inline struct sk_buff *__vlan_put_tag(struct sk_buff *skb, u16 vlan_tci) +static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb, u16 vlan_tci) { struct vlan_ethhdr *veth; @@ -255,8 +257,25 @@ static inline struct sk_buff *__vlan_put_tag(struct sk_buff *skb, u16 vlan_tci) /* now, the TCI */ veth->h_vlan_TCI = htons(vlan_tci); - skb->protocol = htons(ETH_P_8021Q); + return skb; +} +/** + * __vlan_put_tag - regular VLAN tag inserting + * @skb: skbuff to tag + * @vlan_tci: VLAN TCI to insert + * + * Inserts the VLAN tag into @skb as part of the payload + * Returns a VLAN tagged skb. If a new skb is created, @skb is freed. + * + * Following the skb_unshare() example, in case of error, the calling function + * doesn't have to worry about freeing the original skb. + */ +static inline struct sk_buff *__vlan_put_tag(struct sk_buff *skb, u16 vlan_tci) +{ + skb = vlan_insert_tag(skb, vlan_tci); + if (skb) + skb->protocol = htons(ETH_P_8021Q); return skb; } diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index e8b78ce1447..c0a4f3ab0cc 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1256,6 +1256,11 @@ static inline void skb_reserve(struct sk_buff *skb, int len) skb->tail += len; } +static inline void skb_reset_mac_len(struct sk_buff *skb) +{ + skb->mac_len = skb->network_header - skb->mac_header; +} + #ifdef NET_SKBUFF_DATA_USES_OFFSET static inline unsigned char *skb_transport_header(const struct sk_buff *skb) { diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 41495dc2a4c..fcc684678af 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -23,6 +23,31 @@ bool vlan_do_receive(struct sk_buff **skbp) return false; skb->dev = vlan_dev; + if (skb->pkt_type == PACKET_OTHERHOST) { + /* Our lower layer thinks this is not local, let's make sure. + * This allows the VLAN to have a different MAC than the + * underlying device, and still route correctly. */ + if (!compare_ether_addr(eth_hdr(skb)->h_dest, + vlan_dev->dev_addr)) + skb->pkt_type = PACKET_HOST; + } + + if (!(vlan_dev_info(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR)) { + unsigned int offset = skb->data - skb_mac_header(skb); + + /* + * vlan_insert_tag expect skb->data pointing to mac header. + * So change skb->data before calling it and change back to + * original position later + */ + skb_push(skb, offset); + skb = *skbp = vlan_insert_tag(skb, skb->vlan_tci); + if (!skb) + return false; + skb_pull(skb, offset + VLAN_HLEN); + skb_reset_mac_len(skb); + } + skb->priority = vlan_get_ingress_priority(vlan_dev, skb->vlan_tci); skb->vlan_tci = 0; @@ -31,22 +56,8 @@ bool vlan_do_receive(struct sk_buff **skbp) u64_stats_update_begin(&rx_stats->syncp); rx_stats->rx_packets++; rx_stats->rx_bytes += skb->len; - - switch (skb->pkt_type) { - case PACKET_BROADCAST: - break; - case PACKET_MULTICAST: + if (skb->pkt_type == PACKET_MULTICAST) rx_stats->rx_multicast++; - break; - case PACKET_OTHERHOST: - /* Our lower layer thinks this is not local, let's make sure. - * This allows the VLAN to have a different MAC than the - * underlying device, and still route correctly. */ - if (!compare_ether_addr(eth_hdr(skb)->h_dest, - vlan_dev->dev_addr)) - skb->pkt_type = PACKET_HOST; - break; - } u64_stats_update_end(&rx_stats->syncp); return true; @@ -89,18 +100,13 @@ gro_result_t vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp, } EXPORT_SYMBOL(vlan_gro_frags); -static struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb) +static struct sk_buff *vlan_reorder_header(struct sk_buff *skb) { - if (vlan_dev_info(skb->dev)->flags & VLAN_FLAG_REORDER_HDR) { - if (skb_cow(skb, skb_headroom(skb)) < 0) - skb = NULL; - if (skb) { - /* Lifted from Gleb's VLAN code... */ - memmove(skb->data - ETH_HLEN, - skb->data - VLAN_ETH_HLEN, 12); - skb->mac_header += VLAN_HLEN; - } - } + if (skb_cow(skb, skb_headroom(skb)) < 0) + return NULL; + memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN); + skb->mac_header += VLAN_HLEN; + skb_reset_mac_len(skb); return skb; } @@ -161,7 +167,7 @@ struct sk_buff *vlan_untag(struct sk_buff *skb) skb_pull_rcsum(skb, VLAN_HLEN); vlan_set_encap_proto(skb, vhdr); - skb = vlan_check_reorder_header(skb); + skb = vlan_reorder_header(skb); if (unlikely(!skb)) goto err_free; diff --git a/net/core/dev.c b/net/core/dev.c index a54c9f87ddb..9c58c1ec41a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3114,7 +3114,7 @@ static int __netif_receive_skb(struct sk_buff *skb) skb_reset_network_header(skb); skb_reset_transport_header(skb); - skb->mac_len = skb->network_header - skb->mac_header; + skb_reset_mac_len(skb); pt_prev = NULL; -- cgit v1.2.3-70-g09d2 From a685e08987d1edf1995b76511d4c98ea0e905377 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 8 Jun 2011 21:13:01 -0400 Subject: Delay struct net freeing while there's a sysfs instance refering to it * new refcount in struct net, controlling actual freeing of the memory * new method in kobj_ns_type_operations (->drop_ns()) * ->current_ns() semantics change - it's supposed to be followed by corresponding ->drop_ns(). For struct net in case of CONFIG_NET_NS it bumps the new refcount; net_drop_ns() decrements it and calls net_free() if the last reference has been dropped. Method renamed to ->grab_current_ns(). * old net_free() callers call net_drop_ns() instead. * sysfs_exit_ns() is gone, along with a large part of callchain leading to it; now that the references stored in ->ns[...] stay valid we do not need to hunt them down and replace them with NULL. That fixes problems in sysfs_lookup() and sysfs_readdir(), along with getting rid of sb->s_instances abuse. Note that struct net *shutdown* logics has not changed - net_cleanup() is called exactly when it used to be called. The only thing postponed by having a sysfs instance refering to that struct net is actual freeing of memory occupied by struct net. Signed-off-by: Al Viro --- fs/sysfs/mount.c | 37 +++++++++++-------------------------- fs/sysfs/sysfs.h | 2 +- include/linux/kobject_ns.h | 10 ++++++---- include/linux/sysfs.h | 7 ------- include/net/net_namespace.h | 10 +++++++++- lib/kobject.c | 26 +++++++++----------------- net/core/net-sysfs.c | 23 +++++++++-------------- net/core/net_namespace.c | 12 ++++++++++-- 8 files changed, 55 insertions(+), 72 deletions(-) (limited to 'include') diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 266895783b4..e34f0d99ea4 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -95,6 +95,14 @@ static int sysfs_set_super(struct super_block *sb, void *data) return error; } +static void free_sysfs_super_info(struct sysfs_super_info *info) +{ + int type; + for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) + kobj_ns_drop(type, info->ns[type]); + kfree(info); +} + static struct dentry *sysfs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { @@ -108,11 +116,11 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type, return ERR_PTR(-ENOMEM); for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) - info->ns[type] = kobj_ns_current(type); + info->ns[type] = kobj_ns_grab_current(type); sb = sget(fs_type, sysfs_test_super, sysfs_set_super, info); if (IS_ERR(sb) || sb->s_fs_info != info) - kfree(info); + free_sysfs_super_info(info); if (IS_ERR(sb)) return ERR_CAST(sb); if (!sb->s_root) { @@ -131,12 +139,11 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type, static void sysfs_kill_sb(struct super_block *sb) { struct sysfs_super_info *info = sysfs_info(sb); - /* Remove the superblock from fs_supers/s_instances * so we can't find it, before freeing sysfs_super_info. */ kill_anon_super(sb); - kfree(info); + free_sysfs_super_info(info); } static struct file_system_type sysfs_fs_type = { @@ -145,28 +152,6 @@ static struct file_system_type sysfs_fs_type = { .kill_sb = sysfs_kill_sb, }; -void sysfs_exit_ns(enum kobj_ns_type type, const void *ns) -{ - struct super_block *sb; - - mutex_lock(&sysfs_mutex); - spin_lock(&sb_lock); - list_for_each_entry(sb, &sysfs_fs_type.fs_supers, s_instances) { - struct sysfs_super_info *info = sysfs_info(sb); - /* - * If we see a superblock on the fs_supers/s_instances - * list the unmount has not completed and sb->s_fs_info - * points to a valid struct sysfs_super_info. - */ - /* Ignore superblocks with the wrong ns */ - if (info->ns[type] != ns) - continue; - info->ns[type] = NULL; - } - spin_unlock(&sb_lock); - mutex_unlock(&sysfs_mutex); -} - int __init sysfs_init(void) { int err = -ENOMEM; diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h index 3d28af31d86..2ed2404f311 100644 --- a/fs/sysfs/sysfs.h +++ b/fs/sysfs/sysfs.h @@ -136,7 +136,7 @@ struct sysfs_addrm_cxt { * instance). */ struct sysfs_super_info { - const void *ns[KOBJ_NS_TYPES]; + void *ns[KOBJ_NS_TYPES]; }; #define sysfs_info(SB) ((struct sysfs_super_info *)(SB->s_fs_info)) extern struct sysfs_dirent sysfs_root; diff --git a/include/linux/kobject_ns.h b/include/linux/kobject_ns.h index 82cb5bf461f..f66b065a8b5 100644 --- a/include/linux/kobject_ns.h +++ b/include/linux/kobject_ns.h @@ -32,15 +32,17 @@ enum kobj_ns_type { /* * Callbacks so sysfs can determine namespaces - * @current_ns: return calling task's namespace + * @grab_current_ns: return a new reference to calling task's namespace * @netlink_ns: return namespace to which a sock belongs (right?) * @initial_ns: return the initial namespace (i.e. init_net_ns) + * @drop_ns: drops a reference to namespace */ struct kobj_ns_type_operations { enum kobj_ns_type type; - const void *(*current_ns)(void); + void *(*grab_current_ns)(void); const void *(*netlink_ns)(struct sock *sk); const void *(*initial_ns)(void); + void (*drop_ns)(void *); }; int kobj_ns_type_register(const struct kobj_ns_type_operations *ops); @@ -48,9 +50,9 @@ int kobj_ns_type_registered(enum kobj_ns_type type); const struct kobj_ns_type_operations *kobj_child_ns_ops(struct kobject *parent); const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj); -const void *kobj_ns_current(enum kobj_ns_type type); +void *kobj_ns_grab_current(enum kobj_ns_type type); const void *kobj_ns_netlink(enum kobj_ns_type type, struct sock *sk); const void *kobj_ns_initial(enum kobj_ns_type type); -void kobj_ns_exit(enum kobj_ns_type type, const void *ns); +void kobj_ns_drop(enum kobj_ns_type type, void *ns); #endif /* _LINUX_KOBJECT_NS_H */ diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index c3acda60eee..e2696d76a59 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -177,9 +177,6 @@ struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd, struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd); void sysfs_put(struct sysfs_dirent *sd); -/* Called to clear a ns tag when it is no longer valid */ -void sysfs_exit_ns(enum kobj_ns_type type, const void *tag); - int __must_check sysfs_init(void); #else /* CONFIG_SYSFS */ @@ -338,10 +335,6 @@ static inline void sysfs_put(struct sysfs_dirent *sd) { } -static inline void sysfs_exit_ns(int type, const void *tag) -{ -} - static inline int __must_check sysfs_init(void) { return 0; diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 2bf9ed9ef26..aef430d779b 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -35,8 +35,11 @@ struct netns_ipvs; #define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS) struct net { + atomic_t passive; /* To decided when the network + * namespace should be freed. + */ atomic_t count; /* To decided when the network - * namespace should be freed. + * namespace should be shut down. */ #ifdef NETNS_REFCNT_DEBUG atomic_t use_count; /* To track references we @@ -154,6 +157,9 @@ int net_eq(const struct net *net1, const struct net *net2) { return net1 == net2; } + +extern void net_drop_ns(void *); + #else static inline struct net *get_net(struct net *net) @@ -175,6 +181,8 @@ int net_eq(const struct net *net1, const struct net *net2) { return 1; } + +#define net_drop_ns NULL #endif diff --git a/lib/kobject.c b/lib/kobject.c index 82dc34c095c..640bd98a4c8 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -948,14 +948,14 @@ const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj) } -const void *kobj_ns_current(enum kobj_ns_type type) +void *kobj_ns_grab_current(enum kobj_ns_type type) { - const void *ns = NULL; + void *ns = NULL; spin_lock(&kobj_ns_type_lock); if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) && kobj_ns_ops_tbl[type]) - ns = kobj_ns_ops_tbl[type]->current_ns(); + ns = kobj_ns_ops_tbl[type]->grab_current_ns(); spin_unlock(&kobj_ns_type_lock); return ns; @@ -987,23 +987,15 @@ const void *kobj_ns_initial(enum kobj_ns_type type) return ns; } -/* - * kobj_ns_exit - invalidate a namespace tag - * - * @type: the namespace type (i.e. KOBJ_NS_TYPE_NET) - * @ns: the actual namespace being invalidated - * - * This is called when a tag is no longer valid. For instance, - * when a network namespace exits, it uses this helper to - * make sure no sb's sysfs_info points to the now-invalidated - * netns. - */ -void kobj_ns_exit(enum kobj_ns_type type, const void *ns) +void kobj_ns_drop(enum kobj_ns_type type, void *ns) { - sysfs_exit_ns(type, ns); + spin_lock(&kobj_ns_type_lock); + if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) && + kobj_ns_ops_tbl[type] && kobj_ns_ops_tbl[type]->drop_ns) + kobj_ns_ops_tbl[type]->drop_ns(ns); + spin_unlock(&kobj_ns_type_lock); } - EXPORT_SYMBOL(kobject_get); EXPORT_SYMBOL(kobject_put); EXPORT_SYMBOL(kobject_del); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 11b98bc2aa8..33d2a1fba13 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1179,9 +1179,14 @@ static void remove_queue_kobjects(struct net_device *net) #endif } -static const void *net_current_ns(void) +static void *net_grab_current_ns(void) { - return current->nsproxy->net_ns; + struct net *ns = current->nsproxy->net_ns; +#ifdef CONFIG_NET_NS + if (ns) + atomic_inc(&ns->passive); +#endif + return ns; } static const void *net_initial_ns(void) @@ -1196,22 +1201,13 @@ static const void *net_netlink_ns(struct sock *sk) struct kobj_ns_type_operations net_ns_type_operations = { .type = KOBJ_NS_TYPE_NET, - .current_ns = net_current_ns, + .grab_current_ns = net_grab_current_ns, .netlink_ns = net_netlink_ns, .initial_ns = net_initial_ns, + .drop_ns = net_drop_ns, }; EXPORT_SYMBOL_GPL(net_ns_type_operations); -static void net_kobj_ns_exit(struct net *net) -{ - kobj_ns_exit(KOBJ_NS_TYPE_NET, net); -} - -static struct pernet_operations kobj_net_ops = { - .exit = net_kobj_ns_exit, -}; - - #ifdef CONFIG_HOTPLUG static int netdev_uevent(struct device *d, struct kobj_uevent_env *env) { @@ -1339,6 +1335,5 @@ EXPORT_SYMBOL(netdev_class_remove_file); int netdev_kobject_init(void) { kobj_ns_type_register(&net_ns_type_operations); - register_pernet_subsys(&kobj_net_ops); return class_register(&net_class); } diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 6c6b86d0da1..cdcbc3cb00a 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -128,6 +128,7 @@ static __net_init int setup_net(struct net *net) LIST_HEAD(net_exit_list); atomic_set(&net->count, 1); + atomic_set(&net->passive, 1); #ifdef NETNS_REFCNT_DEBUG atomic_set(&net->use_count, 0); @@ -210,6 +211,13 @@ static void net_free(struct net *net) kmem_cache_free(net_cachep, net); } +void net_drop_ns(void *p) +{ + struct net *ns = p; + if (ns && atomic_dec_and_test(&ns->passive)) + net_free(ns); +} + struct net *copy_net_ns(unsigned long flags, struct net *old_net) { struct net *net; @@ -230,7 +238,7 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net) } mutex_unlock(&net_mutex); if (rv < 0) { - net_free(net); + net_drop_ns(net); return ERR_PTR(rv); } return net; @@ -286,7 +294,7 @@ static void cleanup_net(struct work_struct *work) /* Finally it is safe to free my network namespace structure */ list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { list_del_init(&net->exit_list); - net_free(net); + net_drop_ns(net); } } static DECLARE_WORK(net_cleanup_work, cleanup_net); -- cgit v1.2.3-70-g09d2 From 08e8138adebdd511e0955e8d6c051904bb4082af Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 13 Jun 2011 10:42:49 +0200 Subject: block: Add __attribute__((format(printf...) and fix fallout Use the compiler to verify format strings and arguments. Fix fallout. Signed-off-by: Joe Perches Signed-off-by: Jens Axboe --- block/blk-throttle.c | 4 ++-- block/cfq-iosched.c | 11 ++++++----- include/linux/blktrace_api.h | 3 ++- 3 files changed, 10 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/block/blk-throttle.c b/block/blk-throttle.c index a62be8d0dc1..3689f833afd 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -927,7 +927,7 @@ static int throtl_dispatch(struct request_queue *q) bio_list_init(&bio_list_on_stack); - throtl_log(td, "dispatch nr_queued=%lu read=%u write=%u", + throtl_log(td, "dispatch nr_queued=%d read=%u write=%u", total_nr_queued(td), td->nr_queued[READ], td->nr_queued[WRITE]); @@ -1204,7 +1204,7 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop) } queue_bio: - throtl_log_tg(td, tg, "[%c] bio. bdisp=%u sz=%u bps=%llu" + throtl_log_tg(td, tg, "[%c] bio. bdisp=%llu sz=%u bps=%llu" " iodisp=%u iops=%u queued=%d/%d", rw == READ ? 'R' : 'W', tg->bytes_disp[rw], bio->bi_size, tg->bps[rw], diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 545b8d4c829..f3799432676 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -988,9 +988,10 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg, cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime, st->min_vdisktime); - cfq_log_cfqq(cfqq->cfqd, cfqq, "sl_used=%u disp=%u charge=%u iops=%u" - " sect=%u", used_sl, cfqq->slice_dispatch, charge, - iops_mode(cfqd), cfqq->nr_sectors); + cfq_log_cfqq(cfqq->cfqd, cfqq, + "sl_used=%u disp=%u charge=%u iops=%u sect=%lu", + used_sl, cfqq->slice_dispatch, charge, + iops_mode(cfqd), cfqq->nr_sectors); cfq_blkiocg_update_timeslice_used(&cfqg->blkg, used_sl, unaccounted_sl); cfq_blkiocg_set_start_empty_time(&cfqg->blkg); @@ -2023,8 +2024,8 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd) */ if (sample_valid(cic->ttime_samples) && (cfqq->slice_end - jiffies < cic->ttime_mean)) { - cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%d", - cic->ttime_mean); + cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%lu", + cic->ttime_mean); return; } diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index b22fb0d3db0..8c7c2de7631 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -169,7 +169,8 @@ extern void blk_trace_shutdown(struct request_queue *); extern int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, struct block_device *bdev, struct blk_user_trace_setup *buts); -extern void __trace_note_message(struct blk_trace *, const char *fmt, ...); +extern __attribute__((format(printf, 2, 3))) +void __trace_note_message(struct blk_trace *, const char *fmt, ...); /** * blk_add_trace_msg - Add a (simple) message to the blktrace stream -- cgit v1.2.3-70-g09d2 From e9c039052be59753e6bcc7c8b59763899dc1161c Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 13 Jun 2011 19:05:58 +0100 Subject: ASoC: Remove unused and about to be broken SND_SOC_CUSTOM I/O bus This will be removed in -next so let's drop it from mainline as soon as we can in order to minimise surprises. Signed-off-by: Mark Brown --- include/sound/soc.h | 3 +-- sound/soc/soc-cache.c | 3 --- 2 files changed, 1 insertion(+), 5 deletions(-) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index f1de3e0c75b..3a4bd3a3c68 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -248,8 +248,7 @@ typedef int (*hw_write_t)(void *,const char* ,int); extern struct snd_ac97_bus_ops soc_ac97_ops; enum snd_soc_control_type { - SND_SOC_CUSTOM = 1, - SND_SOC_I2C, + SND_SOC_I2C = 1, SND_SOC_SPI, }; diff --git a/sound/soc/soc-cache.c b/sound/soc/soc-cache.c index c005ceb70c9..039b9532b27 100644 --- a/sound/soc/soc-cache.c +++ b/sound/soc/soc-cache.c @@ -409,9 +409,6 @@ int snd_soc_codec_set_cache_io(struct snd_soc_codec *codec, codec->bulk_write_raw = snd_soc_hw_bulk_write_raw; switch (control) { - case SND_SOC_CUSTOM: - break; - case SND_SOC_I2C: #if defined(CONFIG_I2C) || (defined(CONFIG_I2C_MODULE) && defined(MODULE)) codec->hw_write = (hw_write_t)i2c_master_send; -- cgit v1.2.3-70-g09d2 From de1b794130b130e77ffa975bb58cb843744f9ae5 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 13 Jun 2011 15:38:22 -0400 Subject: jbd2: Fix oops in jbd2_journal_remove_journal_head() jbd2_journal_remove_journal_head() can oops when trying to access journal_head returned by bh2jh(). This is caused for example by the following race: TASK1 TASK2 jbd2_journal_commit_transaction() ... processing t_forget list __jbd2_journal_refile_buffer(jh); if (!jh->b_transaction) { jbd_unlock_bh_state(bh); jbd2_journal_try_to_free_buffers() jbd2_journal_grab_journal_head(bh) jbd_lock_bh_state(bh) __journal_try_to_free_buffer() jbd2_journal_put_journal_head(jh) jbd2_journal_remove_journal_head(bh); jbd2_journal_put_journal_head() in TASK2 sees that b_jcount == 0 and buffer is not part of any transaction and thus frees journal_head before TASK1 gets to doing so. Note that even buffer_head can be released by try_to_free_buffers() after jbd2_journal_put_journal_head() which adds even larger opportunity for oops (but I didn't see this happen in reality). Fix the problem by making transactions hold their own journal_head reference (in b_jcount). That way we don't have to remove journal_head explicitely via jbd2_journal_remove_journal_head() and instead just remove journal_head when b_jcount drops to zero. The result of this is that [__]jbd2_journal_refile_buffer(), [__]jbd2_journal_unfile_buffer(), and __jdb2_journal_remove_checkpoint() can free journal_head which needs modification of a few callers. Also we have to be careful because once journal_head is removed, buffer_head might be freed as well. So we have to get our own buffer_head reference where it matters. Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/jbd2/checkpoint.c | 28 +++++++++------- fs/jbd2/commit.c | 33 +++++++++++-------- fs/jbd2/journal.c | 91 ++++++++++++++++----------------------------------- fs/jbd2/transaction.c | 67 +++++++++++++++++++------------------ include/linux/jbd2.h | 2 -- 5 files changed, 99 insertions(+), 122 deletions(-) (limited to 'include') diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 6a79fd0a1a3..2c62c5aae82 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -97,10 +97,14 @@ static int __try_to_free_cp_buf(struct journal_head *jh) if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh) && !buffer_write_io_error(bh)) { + /* + * Get our reference so that bh cannot be freed before + * we unlock it + */ + get_bh(bh); JBUFFER_TRACE(jh, "remove from checkpoint list"); ret = __jbd2_journal_remove_checkpoint(jh) + 1; jbd_unlock_bh_state(bh); - jbd2_journal_remove_journal_head(bh); BUFFER_TRACE(bh, "release"); __brelse(bh); } else { @@ -223,8 +227,8 @@ restart: spin_lock(&journal->j_list_lock); goto restart; } + get_bh(bh); if (buffer_locked(bh)) { - atomic_inc(&bh->b_count); spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); wait_on_buffer(bh); @@ -243,7 +247,6 @@ restart: */ released = __jbd2_journal_remove_checkpoint(jh); jbd_unlock_bh_state(bh); - jbd2_journal_remove_journal_head(bh); __brelse(bh); } @@ -284,7 +287,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, int ret = 0; if (buffer_locked(bh)) { - atomic_inc(&bh->b_count); + get_bh(bh); spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); wait_on_buffer(bh); @@ -316,12 +319,12 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, ret = 1; if (unlikely(buffer_write_io_error(bh))) ret = -EIO; + get_bh(bh); J_ASSERT_JH(jh, !buffer_jbddirty(bh)); BUFFER_TRACE(bh, "remove from checkpoint"); __jbd2_journal_remove_checkpoint(jh); spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); - jbd2_journal_remove_journal_head(bh); __brelse(bh); } else { /* @@ -554,7 +557,8 @@ int jbd2_cleanup_journal_tail(journal_t *journal) /* * journal_clean_one_cp_list * - * Find all the written-back checkpoint buffers in the given list and release them. + * Find all the written-back checkpoint buffers in the given list and + * release them. * * Called with the journal locked. * Called with j_list_lock held. @@ -663,8 +667,8 @@ out: * checkpoint lists. * * The function returns 1 if it frees the transaction, 0 otherwise. + * The function can free jh and bh. * - * This function is called with the journal locked. * This function is called with j_list_lock held. * This function is called with jbd_lock_bh_state(jh2bh(jh)) */ @@ -684,13 +688,14 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh) } journal = transaction->t_journal; + JBUFFER_TRACE(jh, "removing from transaction"); __buffer_unlink(jh); jh->b_cp_transaction = NULL; + jbd2_journal_put_journal_head(jh); if (transaction->t_checkpoint_list != NULL || transaction->t_checkpoint_io_list != NULL) goto out; - JBUFFER_TRACE(jh, "transaction has no more buffers"); /* * There is one special case to worry about: if we have just pulled the @@ -701,10 +706,8 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh) * The locking here around t_state is a bit sleazy. * See the comment at the end of jbd2_journal_commit_transaction(). */ - if (transaction->t_state != T_FINISHED) { - JBUFFER_TRACE(jh, "belongs to running/committing transaction"); + if (transaction->t_state != T_FINISHED) goto out; - } /* OK, that was the last buffer for the transaction: we can now safely remove this transaction from the log */ @@ -723,7 +726,6 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh) wake_up(&journal->j_wait_logspace); ret = 1; out: - JBUFFER_TRACE(jh, "exit"); return ret; } @@ -742,6 +744,8 @@ void __jbd2_journal_insert_checkpoint(struct journal_head *jh, J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh))); J_ASSERT_JH(jh, jh->b_cp_transaction == NULL); + /* Get reference for checkpointing transaction */ + jbd2_journal_grab_journal_head(jh2bh(jh)); jh->b_cp_transaction = transaction; if (!transaction->t_checkpoint_list) { diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 7f21cf3aaf9..eef6979821a 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -848,10 +848,16 @@ restart_loop: while (commit_transaction->t_forget) { transaction_t *cp_transaction; struct buffer_head *bh; + int try_to_free = 0; jh = commit_transaction->t_forget; spin_unlock(&journal->j_list_lock); bh = jh2bh(jh); + /* + * Get a reference so that bh cannot be freed before we are + * done with it. + */ + get_bh(bh); jbd_lock_bh_state(bh); J_ASSERT_JH(jh, jh->b_transaction == commit_transaction); @@ -914,28 +920,27 @@ restart_loop: __jbd2_journal_insert_checkpoint(jh, commit_transaction); if (is_journal_aborted(journal)) clear_buffer_jbddirty(bh); - JBUFFER_TRACE(jh, "refile for checkpoint writeback"); - __jbd2_journal_refile_buffer(jh); - jbd_unlock_bh_state(bh); } else { J_ASSERT_BH(bh, !buffer_dirty(bh)); - /* The buffer on BJ_Forget list and not jbddirty means + /* + * The buffer on BJ_Forget list and not jbddirty means * it has been freed by this transaction and hence it * could not have been reallocated until this * transaction has committed. *BUT* it could be * reallocated once we have written all the data to * disk and before we process the buffer on BJ_Forget - * list. */ - JBUFFER_TRACE(jh, "refile or unfile freed buffer"); - __jbd2_journal_refile_buffer(jh); - if (!jh->b_transaction) { - jbd_unlock_bh_state(bh); - /* needs a brelse */ - jbd2_journal_remove_journal_head(bh); - release_buffer_page(bh); - } else - jbd_unlock_bh_state(bh); + * list. + */ + if (!jh->b_next_transaction) + try_to_free = 1; } + JBUFFER_TRACE(jh, "refile or unfile buffer"); + __jbd2_journal_refile_buffer(jh); + jbd_unlock_bh_state(bh); + if (try_to_free) + release_buffer_page(bh); /* Drops bh reference */ + else + __brelse(bh); cond_resched_lock(&journal->j_list_lock); } spin_unlock(&journal->j_list_lock); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 9a782699030..0dfa5b598e6 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -2078,10 +2078,9 @@ static void journal_free_journal_head(struct journal_head *jh) * When a buffer has its BH_JBD bit set it is immune from being released by * core kernel code, mainly via ->b_count. * - * A journal_head may be detached from its buffer_head when the journal_head's - * b_transaction, b_cp_transaction and b_next_transaction pointers are NULL. - * Various places in JBD call jbd2_journal_remove_journal_head() to indicate that the - * journal_head can be dropped if needed. + * A journal_head is detached from its buffer_head when the journal_head's + * b_jcount reaches zero. Running transaction (b_transaction) and checkpoint + * transaction (b_cp_transaction) hold their references to b_jcount. * * Various places in the kernel want to attach a journal_head to a buffer_head * _before_ attaching the journal_head to a transaction. To protect the @@ -2094,17 +2093,16 @@ static void journal_free_journal_head(struct journal_head *jh) * (Attach a journal_head if needed. Increments b_jcount) * struct journal_head *jh = jbd2_journal_add_journal_head(bh); * ... + * (Get another reference for transaction) + * jbd2_journal_grab_journal_head(bh); * jh->b_transaction = xxx; + * (Put original reference) * jbd2_journal_put_journal_head(jh); - * - * Now, the journal_head's b_jcount is zero, but it is safe from being released - * because it has a non-zero b_transaction. */ /* * Give a buffer_head a journal_head. * - * Doesn't need the journal lock. * May sleep. */ struct journal_head *jbd2_journal_add_journal_head(struct buffer_head *bh) @@ -2168,61 +2166,29 @@ static void __journal_remove_journal_head(struct buffer_head *bh) struct journal_head *jh = bh2jh(bh); J_ASSERT_JH(jh, jh->b_jcount >= 0); - - get_bh(bh); - if (jh->b_jcount == 0) { - if (jh->b_transaction == NULL && - jh->b_next_transaction == NULL && - jh->b_cp_transaction == NULL) { - J_ASSERT_JH(jh, jh->b_jlist == BJ_None); - J_ASSERT_BH(bh, buffer_jbd(bh)); - J_ASSERT_BH(bh, jh2bh(jh) == bh); - BUFFER_TRACE(bh, "remove journal_head"); - if (jh->b_frozen_data) { - printk(KERN_WARNING "%s: freeing " - "b_frozen_data\n", - __func__); - jbd2_free(jh->b_frozen_data, bh->b_size); - } - if (jh->b_committed_data) { - printk(KERN_WARNING "%s: freeing " - "b_committed_data\n", - __func__); - jbd2_free(jh->b_committed_data, bh->b_size); - } - bh->b_private = NULL; - jh->b_bh = NULL; /* debug, really */ - clear_buffer_jbd(bh); - __brelse(bh); - journal_free_journal_head(jh); - } else { - BUFFER_TRACE(bh, "journal_head was locked"); - } + J_ASSERT_JH(jh, jh->b_transaction == NULL); + J_ASSERT_JH(jh, jh->b_next_transaction == NULL); + J_ASSERT_JH(jh, jh->b_cp_transaction == NULL); + J_ASSERT_JH(jh, jh->b_jlist == BJ_None); + J_ASSERT_BH(bh, buffer_jbd(bh)); + J_ASSERT_BH(bh, jh2bh(jh) == bh); + BUFFER_TRACE(bh, "remove journal_head"); + if (jh->b_frozen_data) { + printk(KERN_WARNING "%s: freeing b_frozen_data\n", __func__); + jbd2_free(jh->b_frozen_data, bh->b_size); } + if (jh->b_committed_data) { + printk(KERN_WARNING "%s: freeing b_committed_data\n", __func__); + jbd2_free(jh->b_committed_data, bh->b_size); + } + bh->b_private = NULL; + jh->b_bh = NULL; /* debug, really */ + clear_buffer_jbd(bh); + journal_free_journal_head(jh); } /* - * jbd2_journal_remove_journal_head(): if the buffer isn't attached to a transaction - * and has a zero b_jcount then remove and release its journal_head. If we did - * see that the buffer is not used by any transaction we also "logically" - * decrement ->b_count. - * - * We in fact take an additional increment on ->b_count as a convenience, - * because the caller usually wants to do additional things with the bh - * after calling here. - * The caller of jbd2_journal_remove_journal_head() *must* run __brelse(bh) at some - * time. Once the caller has run __brelse(), the buffer is eligible for - * reaping by try_to_free_buffers(). - */ -void jbd2_journal_remove_journal_head(struct buffer_head *bh) -{ - jbd_lock_bh_journal_head(bh); - __journal_remove_journal_head(bh); - jbd_unlock_bh_journal_head(bh); -} - -/* - * Drop a reference on the passed journal_head. If it fell to zero then try to + * Drop a reference on the passed journal_head. If it fell to zero then * release the journal_head from the buffer_head. */ void jbd2_journal_put_journal_head(struct journal_head *jh) @@ -2232,11 +2198,12 @@ void jbd2_journal_put_journal_head(struct journal_head *jh) jbd_lock_bh_journal_head(bh); J_ASSERT_JH(jh, jh->b_jcount > 0); --jh->b_jcount; - if (!jh->b_jcount && !jh->b_transaction) { + if (!jh->b_jcount) { __journal_remove_journal_head(bh); + jbd_unlock_bh_journal_head(bh); __brelse(bh); - } - jbd_unlock_bh_journal_head(bh); + } else + jbd_unlock_bh_journal_head(bh); } /* diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 547b101049e..2d7109414cd 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -30,6 +30,7 @@ #include static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); +static void __jbd2_journal_unfile_buffer(struct journal_head *jh); /* * jbd2_get_transaction: obtain a new transaction_t object. @@ -764,7 +765,6 @@ repeat: if (!jh->b_transaction) { JBUFFER_TRACE(jh, "no transaction"); J_ASSERT_JH(jh, !jh->b_next_transaction); - jh->b_transaction = transaction; JBUFFER_TRACE(jh, "file as BJ_Reserved"); spin_lock(&journal->j_list_lock); __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved); @@ -895,8 +895,6 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh) * committed and so it's safe to clear the dirty bit. */ clear_buffer_dirty(jh2bh(jh)); - jh->b_transaction = transaction; - /* first access by this transaction */ jh->b_modified = 0; @@ -1230,8 +1228,6 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) __jbd2_journal_file_buffer(jh, transaction, BJ_Forget); } else { __jbd2_journal_unfile_buffer(jh); - jbd2_journal_remove_journal_head(bh); - __brelse(bh); if (!buffer_jbd(bh)) { spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); @@ -1554,19 +1550,32 @@ void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh) mark_buffer_dirty(bh); /* Expose it to the VM */ } -void __jbd2_journal_unfile_buffer(struct journal_head *jh) +/* + * Remove buffer from all transactions. + * + * Called with bh_state lock and j_list_lock + * + * jh and bh may be already freed when this function returns. + */ +static void __jbd2_journal_unfile_buffer(struct journal_head *jh) { __jbd2_journal_temp_unlink_buffer(jh); jh->b_transaction = NULL; + jbd2_journal_put_journal_head(jh); } void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh) { - jbd_lock_bh_state(jh2bh(jh)); + struct buffer_head *bh = jh2bh(jh); + + /* Get reference so that buffer cannot be freed before we unlock it */ + get_bh(bh); + jbd_lock_bh_state(bh); spin_lock(&journal->j_list_lock); __jbd2_journal_unfile_buffer(jh); spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(jh2bh(jh)); + jbd_unlock_bh_state(bh); + __brelse(bh); } /* @@ -1593,8 +1602,6 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh) if (jh->b_jlist == BJ_None) { JBUFFER_TRACE(jh, "remove from checkpoint list"); __jbd2_journal_remove_checkpoint(jh); - jbd2_journal_remove_journal_head(bh); - __brelse(bh); } } spin_unlock(&journal->j_list_lock); @@ -1657,7 +1664,6 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal, /* * We take our own ref against the journal_head here to avoid * having to add tons of locking around each instance of - * jbd2_journal_remove_journal_head() and * jbd2_journal_put_journal_head(). */ jh = jbd2_journal_grab_journal_head(bh); @@ -1695,10 +1701,9 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) int may_free = 1; struct buffer_head *bh = jh2bh(jh); - __jbd2_journal_unfile_buffer(jh); - if (jh->b_cp_transaction) { JBUFFER_TRACE(jh, "on running+cp transaction"); + __jbd2_journal_temp_unlink_buffer(jh); /* * We don't want to write the buffer anymore, clear the * bit so that we don't confuse checks in @@ -1709,8 +1714,7 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) may_free = 0; } else { JBUFFER_TRACE(jh, "on running transaction"); - jbd2_journal_remove_journal_head(bh); - __brelse(bh); + __jbd2_journal_unfile_buffer(jh); } return may_free; } @@ -1988,6 +1992,8 @@ void __jbd2_journal_file_buffer(struct journal_head *jh, if (jh->b_transaction) __jbd2_journal_temp_unlink_buffer(jh); + else + jbd2_journal_grab_journal_head(bh); jh->b_transaction = transaction; switch (jlist) { @@ -2039,9 +2045,10 @@ void jbd2_journal_file_buffer(struct journal_head *jh, * already started to be used by a subsequent transaction, refile the * buffer on that transaction's metadata list. * - * Called under journal->j_list_lock - * + * Called under j_list_lock * Called under jbd_lock_bh_state(jh2bh(jh)) + * + * jh and bh may be already free when this function returns */ void __jbd2_journal_refile_buffer(struct journal_head *jh) { @@ -2065,6 +2072,11 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh) was_dirty = test_clear_buffer_jbddirty(bh); __jbd2_journal_temp_unlink_buffer(jh); + /* + * We set b_transaction here because b_next_transaction will inherit + * our jh reference and thus __jbd2_journal_file_buffer() must not + * take a new one. + */ jh->b_transaction = jh->b_next_transaction; jh->b_next_transaction = NULL; if (buffer_freed(bh)) @@ -2081,30 +2093,21 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh) } /* - * For the unlocked version of this call, also make sure that any - * hanging journal_head is cleaned up if necessary. - * - * __jbd2_journal_refile_buffer is usually called as part of a single locked - * operation on a buffer_head, in which the caller is probably going to - * be hooking the journal_head onto other lists. In that case it is up - * to the caller to remove the journal_head if necessary. For the - * unlocked jbd2_journal_refile_buffer call, the caller isn't going to be - * doing anything else to the buffer so we need to do the cleanup - * ourselves to avoid a jh leak. - * - * *** The journal_head may be freed by this call! *** + * __jbd2_journal_refile_buffer() with necessary locking added. We take our + * bh reference so that we can safely unlock bh. + * + * The jh and bh may be freed by this call. */ void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh) { struct buffer_head *bh = jh2bh(jh); + /* Get reference so that buffer cannot be freed before we unlock it */ + get_bh(bh); jbd_lock_bh_state(bh); spin_lock(&journal->j_list_lock); - __jbd2_journal_refile_buffer(jh); jbd_unlock_bh_state(bh); - jbd2_journal_remove_journal_head(bh); - spin_unlock(&journal->j_list_lock); __brelse(bh); } diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 4ecb7b16b27..d087c2e7b2a 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1024,7 +1024,6 @@ struct journal_s /* Filing buffers */ extern void jbd2_journal_unfile_buffer(journal_t *, struct journal_head *); -extern void __jbd2_journal_unfile_buffer(struct journal_head *); extern void __jbd2_journal_refile_buffer(struct journal_head *); extern void jbd2_journal_refile_buffer(journal_t *, struct journal_head *); extern void __jbd2_journal_file_buffer(struct journal_head *, transaction_t *, int); @@ -1165,7 +1164,6 @@ extern void jbd2_journal_release_jbd_inode(journal_t *journal, struct jbd2_in */ struct journal_head *jbd2_journal_add_journal_head(struct buffer_head *bh); struct journal_head *jbd2_journal_grab_journal_head(struct buffer_head *bh); -void jbd2_journal_remove_journal_head(struct buffer_head *bh); void jbd2_journal_put_journal_head(struct journal_head *jh); /* -- cgit v1.2.3-70-g09d2 From 09223371deac67d08ca0b70bd18787920284c967 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Tue, 14 Jun 2011 13:26:25 +0800 Subject: rcu: Use softirq to address performance regression Commit a26ac2455ffcf3(rcu: move TREE_RCU from softirq to kthread) introduced performance regression. In an AIM7 test, this commit degraded performance by about 40%. The commit runs rcu callbacks in a kthread instead of softirq. We observed high rate of context switch which is caused by this. Out test system has 64 CPUs and HZ is 1000, so we saw more than 64k context switch per second which is caused by RCU's per-CPU kthread. A trace showed that most of the time the RCU per-CPU kthread doesn't actually handle any callbacks, but instead just does a very small amount of work handling grace periods. This means that RCU's per-CPU kthreads are making the scheduler do quite a bit of work in order to allow a very small amount of RCU-related processing to be done. Alex Shi's analysis determined that this slowdown is due to lock contention within the scheduler. Unfortunately, as Peter Zijlstra points out, the scheduler's real-time semantics require global action, which means that this contention is inherent in real-time scheduling. (Yes, perhaps someone will come up with a workaround -- otherwise, -rt is not going to do well on large SMP systems -- but this patch will work around this issue in the meantime. And "the meantime" might well be forever.) This patch therefore re-introduces softirq processing to RCU, but only for core RCU work. RCU callbacks are still executed in kthread context, so that only a small amount of RCU work runs in softirq context in the common case. This should minimize ksoftirqd execution, allowing us to skip boosting of ksoftirqd for CONFIG_RCU_BOOST=y kernels. Signed-off-by: Shaohua Li Tested-by: "Alex,Shi" Signed-off-by: Paul E. McKenney --- Documentation/filesystems/proc.txt | 1 + include/linux/interrupt.h | 1 + include/trace/events/irq.h | 3 ++- kernel/rcutree.c | 23 +++++++++++++++++++---- kernel/rcutree.h | 1 + kernel/rcutree_plugin.h | 9 +++++++++ kernel/softirq.c | 2 +- tools/perf/util/trace-event-parse.c | 1 + 8 files changed, 35 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index f4817802406..db3b1aba32a 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -843,6 +843,7 @@ Provides counts of softirq handlers serviced since boot time, for each cpu. TASKLET: 0 0 0 290 SCHED: 27035 26983 26971 26746 HRTIMER: 0 0 0 0 + RCU: 1678 1769 2178 2250 1.3 IDE devices in /proc/ide diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 6c12989839d..f6efed0039e 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -414,6 +414,7 @@ enum TASKLET_SOFTIRQ, SCHED_SOFTIRQ, HRTIMER_SOFTIRQ, + RCU_SOFTIRQ, /* Preferable RCU should always be the last softirq */ NR_SOFTIRQS }; diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h index ae045ca7d35..1c09820df58 100644 --- a/include/trace/events/irq.h +++ b/include/trace/events/irq.h @@ -20,7 +20,8 @@ struct softirq_action; softirq_name(BLOCK_IOPOLL), \ softirq_name(TASKLET), \ softirq_name(SCHED), \ - softirq_name(HRTIMER)) + softirq_name(HRTIMER), \ + softirq_name(RCU)) /** * irq_handler_entry - called immediately before the irq action handler diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 0a8ec5b2e20..ae5c9ea6866 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -100,6 +100,7 @@ static char rcu_kthreads_spawnable; static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu); static void invoke_rcu_cpu_kthread(void); +static void __invoke_rcu_cpu_kthread(void); #define RCU_KTHREAD_PRIO 1 /* RT priority for per-CPU kthreads. */ @@ -1442,13 +1443,21 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) } /* If there are callbacks ready, invoke them. */ - rcu_do_batch(rsp, rdp); + if (cpu_has_callbacks_ready_to_invoke(rdp)) + __invoke_rcu_cpu_kthread(); +} + +static void rcu_kthread_do_work(void) +{ + rcu_do_batch(&rcu_sched_state, &__get_cpu_var(rcu_sched_data)); + rcu_do_batch(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); + rcu_preempt_do_callbacks(); } /* * Do softirq processing for the current CPU. */ -static void rcu_process_callbacks(void) +static void rcu_process_callbacks(struct softirq_action *unused) { __rcu_process_callbacks(&rcu_sched_state, &__get_cpu_var(rcu_sched_data)); @@ -1465,7 +1474,7 @@ static void rcu_process_callbacks(void) * the current CPU with interrupts disabled, the rcu_cpu_kthread_task * cannot disappear out from under us. */ -static void invoke_rcu_cpu_kthread(void) +static void __invoke_rcu_cpu_kthread(void) { unsigned long flags; @@ -1479,6 +1488,11 @@ static void invoke_rcu_cpu_kthread(void) local_irq_restore(flags); } +static void invoke_rcu_cpu_kthread(void) +{ + raise_softirq(RCU_SOFTIRQ); +} + /* * Wake up the specified per-rcu_node-structure kthread. * Because the per-rcu_node kthreads are immortal, we don't need @@ -1613,7 +1627,7 @@ static int rcu_cpu_kthread(void *arg) *workp = 0; local_irq_restore(flags); if (work) - rcu_process_callbacks(); + rcu_kthread_do_work(); local_bh_enable(); if (*workp != 0) spincnt++; @@ -2387,6 +2401,7 @@ void __init rcu_init(void) rcu_init_one(&rcu_sched_state, &rcu_sched_data); rcu_init_one(&rcu_bh_state, &rcu_bh_data); __rcu_init_preempt(); + open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); /* * We don't need protection against CPU-hotplug here because diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 7b9a08b4aae..0fed6b934d2 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -439,6 +439,7 @@ static void rcu_preempt_offline_cpu(int cpu); #endif /* #ifdef CONFIG_HOTPLUG_CPU */ static void rcu_preempt_check_callbacks(int cpu); static void rcu_preempt_process_callbacks(void); +static void rcu_preempt_do_callbacks(void); void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp); diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index ea2e2fb79e8..38d09c5f2b4 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -602,6 +602,11 @@ static void rcu_preempt_process_callbacks(void) &__get_cpu_var(rcu_preempt_data)); } +static void rcu_preempt_do_callbacks(void) +{ + rcu_do_batch(&rcu_preempt_state, &__get_cpu_var(rcu_preempt_data)); +} + /* * Queue a preemptible-RCU callback for invocation after a grace period. */ @@ -997,6 +1002,10 @@ static void rcu_preempt_process_callbacks(void) { } +static void rcu_preempt_do_callbacks(void) +{ +} + /* * Wait for an rcu-preempt grace period, but make it happen quickly. * But because preemptible RCU does not exist, map to rcu-sched. diff --git a/kernel/softirq.c b/kernel/softirq.c index 13960170cad..40cf63ddd4b 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -58,7 +58,7 @@ DEFINE_PER_CPU(struct task_struct *, ksoftirqd); char *softirq_to_name[NR_SOFTIRQS] = { "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL", - "TASKLET", "SCHED", "HRTIMER" + "TASKLET", "SCHED", "HRTIMER", "RCU" }; /* diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index 1e88485c16a..0a7ed5b5e28 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -2187,6 +2187,7 @@ static const struct flag flags[] = { { "TASKLET_SOFTIRQ", 6 }, { "SCHED_SOFTIRQ", 7 }, { "HRTIMER_SOFTIRQ", 8 }, + { "RCU_SOFTIRQ", 9 }, { "HRTIMER_NORESTART", 0 }, { "HRTIMER_RESTART", 1 }, -- cgit v1.2.3-70-g09d2 From 0b760113a3a155269a3fba93a409c640031dd68f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 31 May 2011 15:15:34 -0400 Subject: NLM: Don't hang forever on NLM unlock requests If the NLM daemon is killed on the NFS server, we can currently end up hanging forever on an 'unlock' request, instead of aborting. Basically, if the rpcbind request fails, or the server keeps returning garbage, we really want to quit instead of retrying. Tested-by: Vasily Averin Signed-off-by: Trond Myklebust Cc: stable@kernel.org --- fs/lockd/clntproc.c | 8 +++++++- include/linux/sunrpc/sched.h | 3 ++- net/sunrpc/clnt.c | 3 +++ net/sunrpc/sched.c | 1 + 4 files changed, 13 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index adb45ec9038..e374050a911 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -708,7 +708,13 @@ static void nlmclnt_unlock_callback(struct rpc_task *task, void *data) if (task->tk_status < 0) { dprintk("lockd: unlock failed (err = %d)\n", -task->tk_status); - goto retry_rebind; + switch (task->tk_status) { + case -EACCES: + case -EIO: + goto die; + default: + goto retry_rebind; + } } if (status == NLM_LCK_DENIED_GRACE_PERIOD) { rpc_delay(task, NLMCLNT_GRACE_WAIT); diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index f73c482ec9c..fe2d8e6b923 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -84,7 +84,8 @@ struct rpc_task { #endif unsigned char tk_priority : 2,/* Task priority */ tk_garb_retry : 2, - tk_cred_retry : 2; + tk_cred_retry : 2, + tk_rebind_retry : 2; }; #define tk_xprt tk_client->cl_xprt diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index b84d7395535..566bcfd067f 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1175,6 +1175,9 @@ call_bind_status(struct rpc_task *task) status = -EOPNOTSUPP; break; } + if (task->tk_rebind_retry == 0) + break; + task->tk_rebind_retry--; rpc_delay(task, 3*HZ); goto retry_timeout; case -ETIMEDOUT: diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 6b43ee7221d..a27406b1654 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -792,6 +792,7 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta /* Initialize retry counters */ task->tk_garb_retry = 2; task->tk_cred_retry = 2; + task->tk_rebind_retry = 2; task->tk_priority = task_setup_data->priority - RPC_PRIORITY_LOW; task->tk_owner = current->tgid; -- cgit v1.2.3-70-g09d2 From c9c30dd5f73dccaa326a54dfcf490316946aea87 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Sat, 11 Jun 2011 17:08:39 -0400 Subject: NFSv4.1: deprecate headerpadsz in CREATE_SESSION We don't support header padding yet so better off ditching it Reported-by: Sid Moore Signed-off-by: Benny Halevy Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 4 ---- fs/nfs/nfs4xdr.c | 10 ++++++---- include/linux/nfs_xdr.h | 1 - 3 files changed, 6 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d2c4b59c896..3ca44978954 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5098,7 +5098,6 @@ static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args) if (mxresp_sz == 0) mxresp_sz = NFS_MAX_FILE_IO_SIZE; /* Fore channel attributes */ - args->fc_attrs.headerpadsz = 0; args->fc_attrs.max_rqst_sz = mxrqst_sz; args->fc_attrs.max_resp_sz = mxresp_sz; args->fc_attrs.max_ops = NFS4_MAX_OPS; @@ -5111,7 +5110,6 @@ static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args) args->fc_attrs.max_ops, args->fc_attrs.max_reqs); /* Back channel attributes */ - args->bc_attrs.headerpadsz = 0; args->bc_attrs.max_rqst_sz = PAGE_SIZE; args->bc_attrs.max_resp_sz = PAGE_SIZE; args->bc_attrs.max_resp_sz_cached = 0; @@ -5131,8 +5129,6 @@ static int nfs4_verify_fore_channel_attrs(struct nfs41_create_session_args *args struct nfs4_channel_attrs *sent = &args->fc_attrs; struct nfs4_channel_attrs *rcvd = &session->fc_attrs; - if (rcvd->headerpadsz > sent->headerpadsz) - return -EINVAL; if (rcvd->max_resp_sz > sent->max_resp_sz) return -EINVAL; /* diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index d869a5e5464..c4b7d6c0494 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1725,7 +1725,7 @@ static void encode_create_session(struct xdr_stream *xdr, *p++ = cpu_to_be32(args->flags); /*flags */ /* Fore Channel */ - *p++ = cpu_to_be32(args->fc_attrs.headerpadsz); /* header padding size */ + *p++ = cpu_to_be32(0); /* header padding size */ *p++ = cpu_to_be32(args->fc_attrs.max_rqst_sz); /* max req size */ *p++ = cpu_to_be32(args->fc_attrs.max_resp_sz); /* max resp size */ *p++ = cpu_to_be32(max_resp_sz_cached); /* Max resp sz cached */ @@ -1734,7 +1734,7 @@ static void encode_create_session(struct xdr_stream *xdr, *p++ = cpu_to_be32(0); /* rdmachannel_attrs */ /* Back Channel */ - *p++ = cpu_to_be32(args->fc_attrs.headerpadsz); /* header padding size */ + *p++ = cpu_to_be32(0); /* header padding size */ *p++ = cpu_to_be32(args->bc_attrs.max_rqst_sz); /* max req size */ *p++ = cpu_to_be32(args->bc_attrs.max_resp_sz); /* max resp size */ *p++ = cpu_to_be32(args->bc_attrs.max_resp_sz_cached); /* Max resp sz cached */ @@ -4997,12 +4997,14 @@ static int decode_chan_attrs(struct xdr_stream *xdr, struct nfs4_channel_attrs *attrs) { __be32 *p; - u32 nr_attrs; + u32 nr_attrs, val; p = xdr_inline_decode(xdr, 28); if (unlikely(!p)) goto out_overflow; - attrs->headerpadsz = be32_to_cpup(p++); + val = be32_to_cpup(p++); /* headerpadsz */ + if (val) + return -EINVAL; /* no support for header padding yet */ attrs->max_rqst_sz = be32_to_cpup(p++); attrs->max_resp_sz = be32_to_cpup(p++); attrs->max_resp_sz_cached = be32_to_cpup(p++); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 5e8444a11ad..00848d86ffb 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -158,7 +158,6 @@ struct nfs_seqid; /* nfs41 sessions channel attributes */ struct nfs4_channel_attrs { - u32 headerpadsz; u32 max_rqst_sz; u32 max_resp_sz; u32 max_resp_sz_cached; -- cgit v1.2.3-70-g09d2 From a59ec1e7ff98cc4365d5b1bff4e7102e86b5716b Mon Sep 17 00:00:00 2001 From: Michael Hennerich Date: Wed, 15 Jun 2011 15:08:11 -0700 Subject: backlight: new driver for the ADP8870 backlight devices Signed-off-by: Michael Hennerich Signed-off-by: Mike Frysinger Cc: Richard Purdie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- .../testing/sysfs-class-backlight-driver-adp8870 | 56 ++ drivers/video/backlight/Kconfig | 12 + drivers/video/backlight/Makefile | 1 + drivers/video/backlight/adp8870_bl.c | 1011 ++++++++++++++++++++ include/linux/i2c/adp8870.h | 153 +++ 5 files changed, 1233 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-class-backlight-driver-adp8870 create mode 100644 drivers/video/backlight/adp8870_bl.c create mode 100644 include/linux/i2c/adp8870.h (limited to 'include') diff --git a/Documentation/ABI/testing/sysfs-class-backlight-driver-adp8870 b/Documentation/ABI/testing/sysfs-class-backlight-driver-adp8870 new file mode 100644 index 00000000000..aa11dbdd794 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-class-backlight-driver-adp8870 @@ -0,0 +1,56 @@ +What: /sys/class/backlight//_max +What: /sys/class/backlight//l1_daylight_max +What: /sys/class/backlight//l2_bright_max +What: /sys/class/backlight//l3_office_max +What: /sys/class/backlight//l4_indoor_max +What: /sys/class/backlight//l5_dark_max +Date: Mai 2011 +KernelVersion: 2.6.40 +Contact: device-drivers-devel@blackfin.uclinux.org +Description: + Control the maximum brightness for + on this . Values are between 0 and 127. This file + will also show the brightness level stored for this + . + +What: /sys/class/backlight//_dim +What: /sys/class/backlight//l2_bright_dim +What: /sys/class/backlight//l3_office_dim +What: /sys/class/backlight//l4_indoor_dim +What: /sys/class/backlight//l5_dark_dim +Date: Mai 2011 +KernelVersion: 2.6.40 +Contact: device-drivers-devel@blackfin.uclinux.org +Description: + Control the dim brightness for + on this . Values are between 0 and 127, typically + set to 0. Full off when the backlight is disabled. + This file will also show the dim brightness level stored for + this . + +What: /sys/class/backlight//ambient_light_level +Date: Mai 2011 +KernelVersion: 2.6.40 +Contact: device-drivers-devel@blackfin.uclinux.org +Description: + Get conversion value of the light sensor. + This value is updated every 80 ms (when the light sensor + is enabled). Returns integer between 0 (dark) and + 8000 (max ambient brightness) + +What: /sys/class/backlight//ambient_light_zone +Date: Mai 2011 +KernelVersion: 2.6.40 +Contact: device-drivers-devel@blackfin.uclinux.org +Description: + Get/Set current ambient light zone. Reading returns + integer between 1..5 (1 = daylight, 2 = bright, ..., 5 = dark). + Writing a value between 1..5 forces the backlight controller + to enter the corresponding ambient light zone. + Writing 0 returns to normal/automatic ambient light level + operation. The ambient light sensing feature on these devices + is an extension to the API documented in + Documentation/ABI/stable/sysfs-class-backlight. + It can be enabled by writing the value stored in + /sys/class/backlight//max_brightness to + /sys/class/backlight//brightness. \ No newline at end of file diff --git a/drivers/video/backlight/Kconfig b/drivers/video/backlight/Kconfig index 0c9373bedd1..2d93c8d61ad 100644 --- a/drivers/video/backlight/Kconfig +++ b/drivers/video/backlight/Kconfig @@ -302,6 +302,18 @@ config BACKLIGHT_ADP8860 To compile this driver as a module, choose M here: the module will be called adp8860_bl. +config BACKLIGHT_ADP8870 + tristate "Backlight Driver for ADP8870 using WLED" + depends on BACKLIGHT_CLASS_DEVICE && I2C + select NEW_LEDS + select LEDS_CLASS + help + If you have a LCD backlight connected to the ADP8870, + say Y here to enable this driver. + + To compile this driver as a module, choose M here: the module will + be called adp8870_bl. + config BACKLIGHT_88PM860X tristate "Backlight Driver for 88PM8606 using WLED" depends on MFD_88PM860X diff --git a/drivers/video/backlight/Makefile b/drivers/video/backlight/Makefile index b9ca8490df8..ee72adb8786 100644 --- a/drivers/video/backlight/Makefile +++ b/drivers/video/backlight/Makefile @@ -34,6 +34,7 @@ obj-$(CONFIG_BACKLIGHT_WM831X) += wm831x_bl.o obj-$(CONFIG_BACKLIGHT_ADX) += adx_bl.o obj-$(CONFIG_BACKLIGHT_ADP5520) += adp5520_bl.o obj-$(CONFIG_BACKLIGHT_ADP8860) += adp8860_bl.o +obj-$(CONFIG_BACKLIGHT_ADP8870) += adp8870_bl.o obj-$(CONFIG_BACKLIGHT_88PM860X) += 88pm860x_bl.o obj-$(CONFIG_BACKLIGHT_PCF50633) += pcf50633-backlight.o diff --git a/drivers/video/backlight/adp8870_bl.c b/drivers/video/backlight/adp8870_bl.c new file mode 100644 index 00000000000..e320b62576e --- /dev/null +++ b/drivers/video/backlight/adp8870_bl.c @@ -0,0 +1,1011 @@ +/* + * Backlight driver for Analog Devices ADP8870 Backlight Devices + * + * Copyright 2009-2011 Analog Devices Inc. + * + * Licensed under the GPL-2 or later. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#define ADP8870_EXT_FEATURES +#define ADP8870_USE_LEDS + + +#define ADP8870_MFDVID 0x00 /* Manufacturer and device ID */ +#define ADP8870_MDCR 0x01 /* Device mode and status */ +#define ADP8870_INT_STAT 0x02 /* Interrupts status */ +#define ADP8870_INT_EN 0x03 /* Interrupts enable */ +#define ADP8870_CFGR 0x04 /* Configuration register */ +#define ADP8870_BLSEL 0x05 /* Sink enable backlight or independent */ +#define ADP8870_PWMLED 0x06 /* PWM Enable Selection Register */ +#define ADP8870_BLOFF 0x07 /* Backlight off timeout */ +#define ADP8870_BLDIM 0x08 /* Backlight dim timeout */ +#define ADP8870_BLFR 0x09 /* Backlight fade in and out rates */ +#define ADP8870_BLMX1 0x0A /* Backlight (Brightness Level 1-daylight) maximum current */ +#define ADP8870_BLDM1 0x0B /* Backlight (Brightness Level 1-daylight) dim current */ +#define ADP8870_BLMX2 0x0C /* Backlight (Brightness Level 2-bright) maximum current */ +#define ADP8870_BLDM2 0x0D /* Backlight (Brightness Level 2-bright) dim current */ +#define ADP8870_BLMX3 0x0E /* Backlight (Brightness Level 3-office) maximum current */ +#define ADP8870_BLDM3 0x0F /* Backlight (Brightness Level 3-office) dim current */ +#define ADP8870_BLMX4 0x10 /* Backlight (Brightness Level 4-indoor) maximum current */ +#define ADP8870_BLDM4 0x11 /* Backlight (Brightness Level 4-indoor) dim current */ +#define ADP8870_BLMX5 0x12 /* Backlight (Brightness Level 5-dark) maximum current */ +#define ADP8870_BLDM5 0x13 /* Backlight (Brightness Level 5-dark) dim current */ +#define ADP8870_ISCLAW 0x1A /* Independent sink current fade law register */ +#define ADP8870_ISCC 0x1B /* Independent sink current control register */ +#define ADP8870_ISCT1 0x1C /* Independent Sink Current Timer Register LED[7:5] */ +#define ADP8870_ISCT2 0x1D /* Independent Sink Current Timer Register LED[4:1] */ +#define ADP8870_ISCF 0x1E /* Independent sink current fade register */ +#define ADP8870_ISC1 0x1F /* Independent Sink Current LED1 */ +#define ADP8870_ISC2 0x20 /* Independent Sink Current LED2 */ +#define ADP8870_ISC3 0x21 /* Independent Sink Current LED3 */ +#define ADP8870_ISC4 0x22 /* Independent Sink Current LED4 */ +#define ADP8870_ISC5 0x23 /* Independent Sink Current LED5 */ +#define ADP8870_ISC6 0x24 /* Independent Sink Current LED6 */ +#define ADP8870_ISC7 0x25 /* Independent Sink Current LED7 (Brightness Level 1-daylight) */ +#define ADP8870_ISC7_L2 0x26 /* Independent Sink Current LED7 (Brightness Level 2-bright) */ +#define ADP8870_ISC7_L3 0x27 /* Independent Sink Current LED7 (Brightness Level 3-office) */ +#define ADP8870_ISC7_L4 0x28 /* Independent Sink Current LED7 (Brightness Level 4-indoor) */ +#define ADP8870_ISC7_L5 0x29 /* Independent Sink Current LED7 (Brightness Level 5-dark) */ +#define ADP8870_CMP_CTL 0x2D /* ALS Comparator Control Register */ +#define ADP8870_ALS1_EN 0x2E /* Main ALS comparator level enable */ +#define ADP8870_ALS2_EN 0x2F /* Second ALS comparator level enable */ +#define ADP8870_ALS1_STAT 0x30 /* Main ALS Comparator Status Register */ +#define ADP8870_ALS2_STAT 0x31 /* Second ALS Comparator Status Register */ +#define ADP8870_L2TRP 0x32 /* L2 comparator reference */ +#define ADP8870_L2HYS 0x33 /* L2 hysteresis */ +#define ADP8870_L3TRP 0x34 /* L3 comparator reference */ +#define ADP8870_L3HYS 0x35 /* L3 hysteresis */ +#define ADP8870_L4TRP 0x36 /* L4 comparator reference */ +#define ADP8870_L4HYS 0x37 /* L4 hysteresis */ +#define ADP8870_L5TRP 0x38 /* L5 comparator reference */ +#define ADP8870_L5HYS 0x39 /* L5 hysteresis */ +#define ADP8870_PH1LEVL 0x40 /* First phototransistor ambient light level-low byte register */ +#define ADP8870_PH1LEVH 0x41 /* First phototransistor ambient light level-high byte register */ +#define ADP8870_PH2LEVL 0x42 /* Second phototransistor ambient light level-low byte register */ +#define ADP8870_PH2LEVH 0x43 /* Second phototransistor ambient light level-high byte register */ + +#define ADP8870_MANUFID 0x3 /* Analog Devices AD8870 Manufacturer and device ID */ +#define ADP8870_DEVID(x) ((x) & 0xF) +#define ADP8870_MANID(x) ((x) >> 4) + +/* MDCR Device mode and status */ +#define D7ALSEN (1 << 7) +#define INT_CFG (1 << 6) +#define NSTBY (1 << 5) +#define DIM_EN (1 << 4) +#define GDWN_DIS (1 << 3) +#define SIS_EN (1 << 2) +#define CMP_AUTOEN (1 << 1) +#define BLEN (1 << 0) + +/* ADP8870_ALS1_EN Main ALS comparator level enable */ +#define L5_EN (1 << 3) +#define L4_EN (1 << 2) +#define L3_EN (1 << 1) +#define L2_EN (1 << 0) + +#define CFGR_BLV_SHIFT 3 +#define CFGR_BLV_MASK 0x7 +#define ADP8870_FLAG_LED_MASK 0xFF + +#define FADE_VAL(in, out) ((0xF & (in)) | ((0xF & (out)) << 4)) +#define BL_CFGR_VAL(law, blv) ((((blv) & CFGR_BLV_MASK) << CFGR_BLV_SHIFT) | ((0x3 & (law)) << 1)) +#define ALS_CMPR_CFG_VAL(filt) ((0x7 & (filt)) << 1) + +struct adp8870_bl { + struct i2c_client *client; + struct backlight_device *bl; + struct adp8870_led *led; + struct adp8870_backlight_platform_data *pdata; + struct mutex lock; + unsigned long cached_daylight_max; + int id; + int revid; + int current_brightness; +}; + +struct adp8870_led { + struct led_classdev cdev; + struct work_struct work; + struct i2c_client *client; + enum led_brightness new_brightness; + int id; + int flags; +}; + +static int adp8870_read(struct i2c_client *client, int reg, uint8_t *val) +{ + int ret; + + ret = i2c_smbus_read_byte_data(client, reg); + if (ret < 0) { + dev_err(&client->dev, "failed reading at 0x%02x\n", reg); + return ret; + } + + *val = ret; + return 0; +} + + +static int adp8870_write(struct i2c_client *client, u8 reg, u8 val) +{ + int ret = i2c_smbus_write_byte_data(client, reg, val); + if (ret) + dev_err(&client->dev, "failed to write\n"); + + return ret; +} + +static int adp8870_set_bits(struct i2c_client *client, int reg, uint8_t bit_mask) +{ + struct adp8870_bl *data = i2c_get_clientdata(client); + uint8_t reg_val; + int ret; + + mutex_lock(&data->lock); + + ret = adp8870_read(client, reg, ®_val); + + if (!ret && ((reg_val & bit_mask) == 0)) { + reg_val |= bit_mask; + ret = adp8870_write(client, reg, reg_val); + } + + mutex_unlock(&data->lock); + return ret; +} + +static int adp8870_clr_bits(struct i2c_client *client, int reg, uint8_t bit_mask) +{ + struct adp8870_bl *data = i2c_get_clientdata(client); + uint8_t reg_val; + int ret; + + mutex_lock(&data->lock); + + ret = adp8870_read(client, reg, ®_val); + + if (!ret && (reg_val & bit_mask)) { + reg_val &= ~bit_mask; + ret = adp8870_write(client, reg, reg_val); + } + + mutex_unlock(&data->lock); + return ret; +} + +/* + * Independent sink / LED + */ +#if defined(ADP8870_USE_LEDS) +static void adp8870_led_work(struct work_struct *work) +{ + struct adp8870_led *led = container_of(work, struct adp8870_led, work); + adp8870_write(led->client, ADP8870_ISC1 + led->id - 1, + led->new_brightness >> 1); +} + +static void adp8870_led_set(struct led_classdev *led_cdev, + enum led_brightness value) +{ + struct adp8870_led *led; + + led = container_of(led_cdev, struct adp8870_led, cdev); + led->new_brightness = value; + /* + * Use workqueue for IO since I2C operations can sleep. + */ + schedule_work(&led->work); +} + +static int adp8870_led_setup(struct adp8870_led *led) +{ + struct i2c_client *client = led->client; + int ret = 0; + + ret = adp8870_write(client, ADP8870_ISC1 + led->id - 1, 0); + if (ret) + return ret; + + ret = adp8870_set_bits(client, ADP8870_ISCC, 1 << (led->id - 1)); + if (ret) + return ret; + + if (led->id > 4) + ret = adp8870_set_bits(client, ADP8870_ISCT1, + (led->flags & 0x3) << ((led->id - 5) * 2)); + else + ret = adp8870_set_bits(client, ADP8870_ISCT2, + (led->flags & 0x3) << ((led->id - 1) * 2)); + + return ret; +} + +static int __devinit adp8870_led_probe(struct i2c_client *client) +{ + struct adp8870_backlight_platform_data *pdata = + client->dev.platform_data; + struct adp8870_bl *data = i2c_get_clientdata(client); + struct adp8870_led *led, *led_dat; + struct led_info *cur_led; + int ret, i; + + + led = kcalloc(pdata->num_leds, sizeof(*led), GFP_KERNEL); + if (led == NULL) { + dev_err(&client->dev, "failed to alloc memory\n"); + return -ENOMEM; + } + + ret = adp8870_write(client, ADP8870_ISCLAW, pdata->led_fade_law); + if (ret) + goto err_free; + + ret = adp8870_write(client, ADP8870_ISCT1, + (pdata->led_on_time & 0x3) << 6); + if (ret) + goto err_free; + + ret = adp8870_write(client, ADP8870_ISCF, + FADE_VAL(pdata->led_fade_in, pdata->led_fade_out)); + if (ret) + goto err_free; + + for (i = 0; i < pdata->num_leds; ++i) { + cur_led = &pdata->leds[i]; + led_dat = &led[i]; + + led_dat->id = cur_led->flags & ADP8870_FLAG_LED_MASK; + + if (led_dat->id > 7 || led_dat->id < 1) { + dev_err(&client->dev, "Invalid LED ID %d\n", + led_dat->id); + goto err; + } + + if (pdata->bl_led_assign & (1 << (led_dat->id - 1))) { + dev_err(&client->dev, "LED %d used by Backlight\n", + led_dat->id); + goto err; + } + + led_dat->cdev.name = cur_led->name; + led_dat->cdev.default_trigger = cur_led->default_trigger; + led_dat->cdev.brightness_set = adp8870_led_set; + led_dat->cdev.brightness = LED_OFF; + led_dat->flags = cur_led->flags >> FLAG_OFFT_SHIFT; + led_dat->client = client; + led_dat->new_brightness = LED_OFF; + INIT_WORK(&led_dat->work, adp8870_led_work); + + ret = led_classdev_register(&client->dev, &led_dat->cdev); + if (ret) { + dev_err(&client->dev, "failed to register LED %d\n", + led_dat->id); + goto err; + } + + ret = adp8870_led_setup(led_dat); + if (ret) { + dev_err(&client->dev, "failed to write\n"); + i++; + goto err; + } + } + + data->led = led; + + return 0; + + err: + for (i = i - 1; i >= 0; --i) { + led_classdev_unregister(&led[i].cdev); + cancel_work_sync(&led[i].work); + } + + err_free: + kfree(led); + + return ret; +} + +static int __devexit adp8870_led_remove(struct i2c_client *client) +{ + struct adp8870_backlight_platform_data *pdata = + client->dev.platform_data; + struct adp8870_bl *data = i2c_get_clientdata(client); + int i; + + for (i = 0; i < pdata->num_leds; i++) { + led_classdev_unregister(&data->led[i].cdev); + cancel_work_sync(&data->led[i].work); + } + + kfree(data->led); + return 0; +} +#else +static int __devinit adp8870_led_probe(struct i2c_client *client) +{ + return 0; +} + +static int __devexit adp8870_led_remove(struct i2c_client *client) +{ + return 0; +} +#endif + +static int adp8870_bl_set(struct backlight_device *bl, int brightness) +{ + struct adp8870_bl *data = bl_get_data(bl); + struct i2c_client *client = data->client; + int ret = 0; + + if (data->pdata->en_ambl_sens) { + if ((brightness > 0) && (brightness < ADP8870_MAX_BRIGHTNESS)) { + /* Disable Ambient Light auto adjust */ + ret = adp8870_clr_bits(client, ADP8870_MDCR, + CMP_AUTOEN); + if (ret) + return ret; + ret = adp8870_write(client, ADP8870_BLMX1, brightness); + if (ret) + return ret; + } else { + /* + * MAX_BRIGHTNESS -> Enable Ambient Light auto adjust + * restore daylight l1 sysfs brightness + */ + ret = adp8870_write(client, ADP8870_BLMX1, + data->cached_daylight_max); + if (ret) + return ret; + + ret = adp8870_set_bits(client, ADP8870_MDCR, + CMP_AUTOEN); + if (ret) + return ret; + } + } else { + ret = adp8870_write(client, ADP8870_BLMX1, brightness); + if (ret) + return ret; + } + + if (data->current_brightness && brightness == 0) + ret = adp8870_set_bits(client, + ADP8870_MDCR, DIM_EN); + else if (data->current_brightness == 0 && brightness) + ret = adp8870_clr_bits(client, + ADP8870_MDCR, DIM_EN); + + if (!ret) + data->current_brightness = brightness; + + return ret; +} + +static int adp8870_bl_update_status(struct backlight_device *bl) +{ + int brightness = bl->props.brightness; + if (bl->props.power != FB_BLANK_UNBLANK) + brightness = 0; + + if (bl->props.fb_blank != FB_BLANK_UNBLANK) + brightness = 0; + + return adp8870_bl_set(bl, brightness); +} + +static int adp8870_bl_get_brightness(struct backlight_device *bl) +{ + struct adp8870_bl *data = bl_get_data(bl); + + return data->current_brightness; +} + +static const struct backlight_ops adp8870_bl_ops = { + .update_status = adp8870_bl_update_status, + .get_brightness = adp8870_bl_get_brightness, +}; + +static int adp8870_bl_setup(struct backlight_device *bl) +{ + struct adp8870_bl *data = bl_get_data(bl); + struct i2c_client *client = data->client; + struct adp8870_backlight_platform_data *pdata = data->pdata; + int ret = 0; + + ret = adp8870_write(client, ADP8870_BLSEL, ~pdata->bl_led_assign); + if (ret) + return ret; + + ret = adp8870_write(client, ADP8870_PWMLED, pdata->pwm_assign); + if (ret) + return ret; + + ret = adp8870_write(client, ADP8870_BLMX1, pdata->l1_daylight_max); + if (ret) + return ret; + + ret = adp8870_write(client, ADP8870_BLDM1, pdata->l1_daylight_dim); + if (ret) + return ret; + + if (pdata->en_ambl_sens) { + data->cached_daylight_max = pdata->l1_daylight_max; + ret = adp8870_write(client, ADP8870_BLMX2, + pdata->l2_bright_max); + if (ret) + return ret; + ret = adp8870_write(client, ADP8870_BLDM2, + pdata->l2_bright_dim); + if (ret) + return ret; + + ret = adp8870_write(client, ADP8870_BLMX3, + pdata->l3_office_max); + if (ret) + return ret; + ret = adp8870_write(client, ADP8870_BLDM3, + pdata->l3_office_dim); + if (ret) + return ret; + + ret = adp8870_write(client, ADP8870_BLMX4, + pdata->l4_indoor_max); + if (ret) + return ret; + + ret = adp8870_write(client, ADP8870_BLDM4, + pdata->l4_indor_dim); + if (ret) + return ret; + + ret = adp8870_write(client, ADP8870_BLMX5, + pdata->l5_dark_max); + if (ret) + return ret; + + ret = adp8870_write(client, ADP8870_BLDM5, + pdata->l5_dark_dim); + if (ret) + return ret; + + ret = adp8870_write(client, ADP8870_L2TRP, pdata->l2_trip); + if (ret) + return ret; + + ret = adp8870_write(client, ADP8870_L2HYS, pdata->l2_hyst); + if (ret) + return ret; + + ret = adp8870_write(client, ADP8870_L3TRP, pdata->l3_trip); + if (ret) + return ret; + + ret = adp8870_write(client, ADP8870_L3HYS, pdata->l3_hyst); + if (ret) + return ret; + + ret = adp8870_write(client, ADP8870_L4TRP, pdata->l4_trip); + if (ret) + return ret; + + ret = adp8870_write(client, ADP8870_L4HYS, pdata->l4_hyst); + if (ret) + return ret; + + ret = adp8870_write(client, ADP8870_L5TRP, pdata->l5_trip); + if (ret) + return ret; + + ret = adp8870_write(client, ADP8870_L5HYS, pdata->l5_hyst); + if (ret) + return ret; + + ret = adp8870_write(client, ADP8870_ALS1_EN, L5_EN | L4_EN | + L3_EN | L2_EN); + if (ret) + return ret; + + ret = adp8870_write(client, ADP8870_CMP_CTL, + ALS_CMPR_CFG_VAL(pdata->abml_filt)); + if (ret) + return ret; + } + + ret = adp8870_write(client, ADP8870_CFGR, + BL_CFGR_VAL(pdata->bl_fade_law, 0)); + if (ret) + return ret; + + ret = adp8870_write(client, ADP8870_BLFR, FADE_VAL(pdata->bl_fade_in, + pdata->bl_fade_out)); + if (ret) + return ret; + /* + * ADP8870 Rev0 requires GDWN_DIS bit set + */ + + ret = adp8870_set_bits(client, ADP8870_MDCR, BLEN | DIM_EN | NSTBY | + (data->revid == 0 ? GDWN_DIS : 0)); + + return ret; +} + +static ssize_t adp8870_show(struct device *dev, char *buf, int reg) +{ + struct adp8870_bl *data = dev_get_drvdata(dev); + int error; + uint8_t reg_val; + + mutex_lock(&data->lock); + error = adp8870_read(data->client, reg, ®_val); + mutex_unlock(&data->lock); + + if (error < 0) + return error; + + return sprintf(buf, "%u\n", reg_val); +} + +static ssize_t adp8870_store(struct device *dev, const char *buf, + size_t count, int reg) +{ + struct adp8870_bl *data = dev_get_drvdata(dev); + unsigned long val; + int ret; + + ret = strict_strtoul(buf, 10, &val); + if (ret) + return ret; + + mutex_lock(&data->lock); + adp8870_write(data->client, reg, val); + mutex_unlock(&data->lock); + + return count; +} + +static ssize_t adp8870_bl_l5_dark_max_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return adp8870_show(dev, buf, ADP8870_BLMX5); +} + +static ssize_t adp8870_bl_l5_dark_max_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + return adp8870_store(dev, buf, count, ADP8870_BLMX5); +} +static DEVICE_ATTR(l5_dark_max, 0664, adp8870_bl_l5_dark_max_show, + adp8870_bl_l5_dark_max_store); + + +static ssize_t adp8870_bl_l4_indoor_max_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return adp8870_show(dev, buf, ADP8870_BLMX4); +} + +static ssize_t adp8870_bl_l4_indoor_max_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + return adp8870_store(dev, buf, count, ADP8870_BLMX4); +} +static DEVICE_ATTR(l4_indoor_max, 0664, adp8870_bl_l4_indoor_max_show, + adp8870_bl_l4_indoor_max_store); + + +static ssize_t adp8870_bl_l3_office_max_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return adp8870_show(dev, buf, ADP8870_BLMX3); +} + +static ssize_t adp8870_bl_l3_office_max_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + return adp8870_store(dev, buf, count, ADP8870_BLMX3); +} + +static DEVICE_ATTR(l3_office_max, 0664, adp8870_bl_l3_office_max_show, + adp8870_bl_l3_office_max_store); + +static ssize_t adp8870_bl_l2_bright_max_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return adp8870_show(dev, buf, ADP8870_BLMX2); +} + +static ssize_t adp8870_bl_l2_bright_max_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + return adp8870_store(dev, buf, count, ADP8870_BLMX2); +} +static DEVICE_ATTR(l2_bright_max, 0664, adp8870_bl_l2_bright_max_show, + adp8870_bl_l2_bright_max_store); + +static ssize_t adp8870_bl_l1_daylight_max_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return adp8870_show(dev, buf, ADP8870_BLMX1); +} + +static ssize_t adp8870_bl_l1_daylight_max_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct adp8870_bl *data = dev_get_drvdata(dev); + int ret = strict_strtoul(buf, 10, &data->cached_daylight_max); + if (ret) + return ret; + + return adp8870_store(dev, buf, count, ADP8870_BLMX1); +} +static DEVICE_ATTR(l1_daylight_max, 0664, adp8870_bl_l1_daylight_max_show, + adp8870_bl_l1_daylight_max_store); + +static ssize_t adp8870_bl_l5_dark_dim_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return adp8870_show(dev, buf, ADP8870_BLDM5); +} + +static ssize_t adp8870_bl_l5_dark_dim_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + return adp8870_store(dev, buf, count, ADP8870_BLDM5); +} +static DEVICE_ATTR(l5_dark_dim, 0664, adp8870_bl_l5_dark_dim_show, + adp8870_bl_l5_dark_dim_store); + +static ssize_t adp8870_bl_l4_indoor_dim_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return adp8870_show(dev, buf, ADP8870_BLDM4); +} + +static ssize_t adp8870_bl_l4_indoor_dim_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + return adp8870_store(dev, buf, count, ADP8870_BLDM4); +} +static DEVICE_ATTR(l4_indoor_dim, 0664, adp8870_bl_l4_indoor_dim_show, + adp8870_bl_l4_indoor_dim_store); + + +static ssize_t adp8870_bl_l3_office_dim_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return adp8870_show(dev, buf, ADP8870_BLDM3); +} + +static ssize_t adp8870_bl_l3_office_dim_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + return adp8870_store(dev, buf, count, ADP8870_BLDM3); +} +static DEVICE_ATTR(l3_office_dim, 0664, adp8870_bl_l3_office_dim_show, + adp8870_bl_l3_office_dim_store); + +static ssize_t adp8870_bl_l2_bright_dim_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return adp8870_show(dev, buf, ADP8870_BLDM2); +} + +static ssize_t adp8870_bl_l2_bright_dim_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + return adp8870_store(dev, buf, count, ADP8870_BLDM2); +} +static DEVICE_ATTR(l2_bright_dim, 0664, adp8870_bl_l2_bright_dim_show, + adp8870_bl_l2_bright_dim_store); + +static ssize_t adp8870_bl_l1_daylight_dim_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return adp8870_show(dev, buf, ADP8870_BLDM1); +} + +static ssize_t adp8870_bl_l1_daylight_dim_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + return adp8870_store(dev, buf, count, ADP8870_BLDM1); +} +static DEVICE_ATTR(l1_daylight_dim, 0664, adp8870_bl_l1_daylight_dim_show, + adp8870_bl_l1_daylight_dim_store); + +#ifdef ADP8870_EXT_FEATURES +static ssize_t adp8870_bl_ambient_light_level_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct adp8870_bl *data = dev_get_drvdata(dev); + int error; + uint8_t reg_val; + uint16_t ret_val; + + mutex_lock(&data->lock); + error = adp8870_read(data->client, ADP8870_PH1LEVL, ®_val); + if (error < 0) { + mutex_unlock(&data->lock); + return error; + } + ret_val = reg_val; + error = adp8870_read(data->client, ADP8870_PH1LEVH, ®_val); + mutex_unlock(&data->lock); + + if (error < 0) + return error; + + /* Return 13-bit conversion value for the first light sensor */ + ret_val += (reg_val & 0x1F) << 8; + + return sprintf(buf, "%u\n", ret_val); +} +static DEVICE_ATTR(ambient_light_level, 0444, + adp8870_bl_ambient_light_level_show, NULL); + +static ssize_t adp8870_bl_ambient_light_zone_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct adp8870_bl *data = dev_get_drvdata(dev); + int error; + uint8_t reg_val; + + mutex_lock(&data->lock); + error = adp8870_read(data->client, ADP8870_CFGR, ®_val); + mutex_unlock(&data->lock); + + if (error < 0) + return error; + + return sprintf(buf, "%u\n", + ((reg_val >> CFGR_BLV_SHIFT) & CFGR_BLV_MASK) + 1); +} + +static ssize_t adp8870_bl_ambient_light_zone_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct adp8870_bl *data = dev_get_drvdata(dev); + unsigned long val; + uint8_t reg_val; + int ret; + + ret = strict_strtoul(buf, 10, &val); + if (ret) + return ret; + + if (val == 0) { + /* Enable automatic ambient light sensing */ + adp8870_set_bits(data->client, ADP8870_MDCR, CMP_AUTOEN); + } else if ((val > 0) && (val < 6)) { + /* Disable automatic ambient light sensing */ + adp8870_clr_bits(data->client, ADP8870_MDCR, CMP_AUTOEN); + + /* Set user supplied ambient light zone */ + mutex_lock(&data->lock); + adp8870_read(data->client, ADP8870_CFGR, ®_val); + reg_val &= ~(CFGR_BLV_MASK << CFGR_BLV_SHIFT); + reg_val |= (val - 1) << CFGR_BLV_SHIFT; + adp8870_write(data->client, ADP8870_CFGR, reg_val); + mutex_unlock(&data->lock); + } + + return count; +} +static DEVICE_ATTR(ambient_light_zone, 0664, + adp8870_bl_ambient_light_zone_show, + adp8870_bl_ambient_light_zone_store); +#endif + +static struct attribute *adp8870_bl_attributes[] = { + &dev_attr_l5_dark_max.attr, + &dev_attr_l5_dark_dim.attr, + &dev_attr_l4_indoor_max.attr, + &dev_attr_l4_indoor_dim.attr, + &dev_attr_l3_office_max.attr, + &dev_attr_l3_office_dim.attr, + &dev_attr_l2_bright_max.attr, + &dev_attr_l2_bright_dim.attr, + &dev_attr_l1_daylight_max.attr, + &dev_attr_l1_daylight_dim.attr, +#ifdef ADP8870_EXT_FEATURES + &dev_attr_ambient_light_level.attr, + &dev_attr_ambient_light_zone.attr, +#endif + NULL +}; + +static const struct attribute_group adp8870_bl_attr_group = { + .attrs = adp8870_bl_attributes, +}; + +static int __devinit adp8870_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct backlight_properties props; + struct backlight_device *bl; + struct adp8870_bl *data; + struct adp8870_backlight_platform_data *pdata = + client->dev.platform_data; + uint8_t reg_val; + int ret; + + if (!i2c_check_functionality(client->adapter, + I2C_FUNC_SMBUS_BYTE_DATA)) { + dev_err(&client->dev, "SMBUS Byte Data not Supported\n"); + return -EIO; + } + + if (!pdata) { + dev_err(&client->dev, "no platform data?\n"); + return -EINVAL; + } + + ret = adp8870_read(client, ADP8870_MFDVID, ®_val); + if (ret < 0) + return -EIO; + + if (ADP8870_MANID(reg_val) != ADP8870_MANUFID) { + dev_err(&client->dev, "failed to probe\n"); + return -ENODEV; + } + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (data == NULL) + return -ENOMEM; + + data->revid = ADP8870_DEVID(reg_val); + data->client = client; + data->pdata = pdata; + data->id = id->driver_data; + data->current_brightness = 0; + i2c_set_clientdata(client, data); + + mutex_init(&data->lock); + + memset(&props, 0, sizeof(props)); + props.max_brightness = props.brightness = ADP8870_MAX_BRIGHTNESS; + bl = backlight_device_register(dev_driver_string(&client->dev), + &client->dev, data, &adp8870_bl_ops, &props); + if (IS_ERR(bl)) { + dev_err(&client->dev, "failed to register backlight\n"); + ret = PTR_ERR(bl); + goto out2; + } + + data->bl = bl; + + if (pdata->en_ambl_sens) + ret = sysfs_create_group(&bl->dev.kobj, + &adp8870_bl_attr_group); + + if (ret) { + dev_err(&client->dev, "failed to register sysfs\n"); + goto out1; + } + + ret = adp8870_bl_setup(bl); + if (ret) { + ret = -EIO; + goto out; + } + + backlight_update_status(bl); + + dev_info(&client->dev, "Rev.%d Backlight\n", data->revid); + + if (pdata->num_leds) + adp8870_led_probe(client); + + return 0; + +out: + if (data->pdata->en_ambl_sens) + sysfs_remove_group(&data->bl->dev.kobj, + &adp8870_bl_attr_group); +out1: + backlight_device_unregister(bl); +out2: + i2c_set_clientdata(client, NULL); + kfree(data); + + return ret; +} + +static int __devexit adp8870_remove(struct i2c_client *client) +{ + struct adp8870_bl *data = i2c_get_clientdata(client); + + adp8870_clr_bits(client, ADP8870_MDCR, NSTBY); + + if (data->led) + adp8870_led_remove(client); + + if (data->pdata->en_ambl_sens) + sysfs_remove_group(&data->bl->dev.kobj, + &adp8870_bl_attr_group); + + backlight_device_unregister(data->bl); + i2c_set_clientdata(client, NULL); + kfree(data); + + return 0; +} + +#ifdef CONFIG_PM +static int adp8870_i2c_suspend(struct i2c_client *client, pm_message_t message) +{ + adp8870_clr_bits(client, ADP8870_MDCR, NSTBY); + + return 0; +} + +static int adp8870_i2c_resume(struct i2c_client *client) +{ + adp8870_set_bits(client, ADP8870_MDCR, NSTBY); + + return 0; +} +#else +#define adp8870_i2c_suspend NULL +#define adp8870_i2c_resume NULL +#endif + +static const struct i2c_device_id adp8870_id[] = { + { "adp8870", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, adp8870_id); + +static struct i2c_driver adp8870_driver = { + .driver = { + .name = KBUILD_MODNAME, + }, + .probe = adp8870_probe, + .remove = __devexit_p(adp8870_remove), + .suspend = adp8870_i2c_suspend, + .resume = adp8870_i2c_resume, + .id_table = adp8870_id, +}; + +static int __init adp8870_init(void) +{ + return i2c_add_driver(&adp8870_driver); +} +module_init(adp8870_init); + +static void __exit adp8870_exit(void) +{ + i2c_del_driver(&adp8870_driver); +} +module_exit(adp8870_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Michael Hennerich "); +MODULE_DESCRIPTION("ADP8870 Backlight driver"); +MODULE_ALIAS("platform:adp8870-backlight"); diff --git a/include/linux/i2c/adp8870.h b/include/linux/i2c/adp8870.h new file mode 100644 index 00000000000..624dceccbd5 --- /dev/null +++ b/include/linux/i2c/adp8870.h @@ -0,0 +1,153 @@ +/* + * Definitions and platform data for Analog Devices + * Backlight drivers ADP8870 + * + * Copyright 2009-2010 Analog Devices Inc. + * + * Licensed under the GPL-2 or later. + */ + +#ifndef __LINUX_I2C_ADP8870_H +#define __LINUX_I2C_ADP8870_H + +#define ID_ADP8870 8870 + +#define ADP8870_MAX_BRIGHTNESS 0x7F +#define FLAG_OFFT_SHIFT 8 + +/* + * LEDs subdevice platform data + */ + +#define ADP8870_LED_DIS_BLINK (0 << FLAG_OFFT_SHIFT) +#define ADP8870_LED_OFFT_600ms (1 << FLAG_OFFT_SHIFT) +#define ADP8870_LED_OFFT_1200ms (2 << FLAG_OFFT_SHIFT) +#define ADP8870_LED_OFFT_1800ms (3 << FLAG_OFFT_SHIFT) + +#define ADP8870_LED_ONT_200ms 0 +#define ADP8870_LED_ONT_600ms 1 +#define ADP8870_LED_ONT_800ms 2 +#define ADP8870_LED_ONT_1200ms 3 + +#define ADP8870_LED_D7 (7) +#define ADP8870_LED_D6 (6) +#define ADP8870_LED_D5 (5) +#define ADP8870_LED_D4 (4) +#define ADP8870_LED_D3 (3) +#define ADP8870_LED_D2 (2) +#define ADP8870_LED_D1 (1) + +/* + * Backlight subdevice platform data + */ + +#define ADP8870_BL_D7 (1 << 6) +#define ADP8870_BL_D6 (1 << 5) +#define ADP8870_BL_D5 (1 << 4) +#define ADP8870_BL_D4 (1 << 3) +#define ADP8870_BL_D3 (1 << 2) +#define ADP8870_BL_D2 (1 << 1) +#define ADP8870_BL_D1 (1 << 0) + +#define ADP8870_FADE_T_DIS 0 /* Fade Timer Disabled */ +#define ADP8870_FADE_T_300ms 1 /* 0.3 Sec */ +#define ADP8870_FADE_T_600ms 2 +#define ADP8870_FADE_T_900ms 3 +#define ADP8870_FADE_T_1200ms 4 +#define ADP8870_FADE_T_1500ms 5 +#define ADP8870_FADE_T_1800ms 6 +#define ADP8870_FADE_T_2100ms 7 +#define ADP8870_FADE_T_2400ms 8 +#define ADP8870_FADE_T_2700ms 9 +#define ADP8870_FADE_T_3000ms 10 +#define ADP8870_FADE_T_3500ms 11 +#define ADP8870_FADE_T_4000ms 12 +#define ADP8870_FADE_T_4500ms 13 +#define ADP8870_FADE_T_5000ms 14 +#define ADP8870_FADE_T_5500ms 15 /* 5.5 Sec */ + +#define ADP8870_FADE_LAW_LINEAR 0 +#define ADP8870_FADE_LAW_SQUARE 1 +#define ADP8870_FADE_LAW_CUBIC1 2 +#define ADP8870_FADE_LAW_CUBIC2 3 + +#define ADP8870_BL_AMBL_FILT_80ms 0 /* Light sensor filter time */ +#define ADP8870_BL_AMBL_FILT_160ms 1 +#define ADP8870_BL_AMBL_FILT_320ms 2 +#define ADP8870_BL_AMBL_FILT_640ms 3 +#define ADP8870_BL_AMBL_FILT_1280ms 4 +#define ADP8870_BL_AMBL_FILT_2560ms 5 +#define ADP8870_BL_AMBL_FILT_5120ms 6 +#define ADP8870_BL_AMBL_FILT_10240ms 7 /* 10.24 sec */ + +/* + * Blacklight current 0..30mA + */ +#define ADP8870_BL_CUR_mA(I) ((I * 127) / 30) + +/* + * L2 comparator current 0..1106uA + */ +#define ADP8870_L2_COMP_CURR_uA(I) ((I * 255) / 1106) + +/* + * L3 comparator current 0..551uA + */ +#define ADP8870_L3_COMP_CURR_uA(I) ((I * 255) / 551) + +/* + * L4 comparator current 0..275uA + */ +#define ADP8870_L4_COMP_CURR_uA(I) ((I * 255) / 275) + +/* + * L5 comparator current 0..138uA + */ +#define ADP8870_L5_COMP_CURR_uA(I) ((I * 255) / 138) + +struct adp8870_backlight_platform_data { + u8 bl_led_assign; /* 1 = Backlight 0 = Individual LED */ + u8 pwm_assign; /* 1 = Enables PWM mode */ + + u8 bl_fade_in; /* Backlight Fade-In Timer */ + u8 bl_fade_out; /* Backlight Fade-Out Timer */ + u8 bl_fade_law; /* fade-on/fade-off transfer characteristic */ + + u8 en_ambl_sens; /* 1 = enable ambient light sensor */ + u8 abml_filt; /* Light sensor filter time */ + + u8 l1_daylight_max; /* use BL_CUR_mA(I) 0 <= I <= 30 mA */ + u8 l1_daylight_dim; /* typ = 0, use BL_CUR_mA(I) 0 <= I <= 30 mA */ + u8 l2_bright_max; /* use BL_CUR_mA(I) 0 <= I <= 30 mA */ + u8 l2_bright_dim; /* typ = 0, use BL_CUR_mA(I) 0 <= I <= 30 mA */ + u8 l3_office_max; /* use BL_CUR_mA(I) 0 <= I <= 30 mA */ + u8 l3_office_dim; /* typ = 0, use BL_CUR_mA(I) 0 <= I <= 30 mA */ + u8 l4_indoor_max; /* use BL_CUR_mA(I) 0 <= I <= 30 mA */ + u8 l4_indor_dim; /* typ = 0, use BL_CUR_mA(I) 0 <= I <= 30 mA */ + u8 l5_dark_max; /* use BL_CUR_mA(I) 0 <= I <= 30 mA */ + u8 l5_dark_dim; /* typ = 0, use BL_CUR_mA(I) 0 <= I <= 30 mA */ + + u8 l2_trip; /* use L2_COMP_CURR_uA(I) 0 <= I <= 1106 uA */ + u8 l2_hyst; /* use L2_COMP_CURR_uA(I) 0 <= I <= 1106 uA */ + u8 l3_trip; /* use L3_COMP_CURR_uA(I) 0 <= I <= 551 uA */ + u8 l3_hyst; /* use L3_COMP_CURR_uA(I) 0 <= I <= 551 uA */ + u8 l4_trip; /* use L4_COMP_CURR_uA(I) 0 <= I <= 275 uA */ + u8 l4_hyst; /* use L4_COMP_CURR_uA(I) 0 <= I <= 275 uA */ + u8 l5_trip; /* use L5_COMP_CURR_uA(I) 0 <= I <= 138 uA */ + u8 l5_hyst; /* use L6_COMP_CURR_uA(I) 0 <= I <= 138 uA */ + + /** + * Independent Current Sinks / LEDS + * Sinks not assigned to the Backlight can be exposed to + * user space using the LEDS CLASS interface + */ + + int num_leds; + struct led_info *leds; + u8 led_fade_in; /* LED Fade-In Timer */ + u8 led_fade_out; /* LED Fade-Out Timer */ + u8 led_fade_law; /* fade-on/fade-off transfer characteristic */ + u8 led_on_time; +}; + +#endif /* __LINUX_I2C_ADP8870_H */ -- cgit v1.2.3-70-g09d2 From a433658c30974fc87ba3ff52d7e4e6299762aa3d Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Wed, 15 Jun 2011 15:08:13 -0700 Subject: vmscan,memcg: memcg aware swap token Currently, memcg reclaim can disable swap token even if the swap token mm doesn't belong in its memory cgroup. It's slightly risky. If an admin creates very small mem-cgroup and silly guy runs contentious heavy memory pressure workload, every tasks are going to lose swap token and then system may become unresponsive. That's bad. This patch adds 'memcg' parameter into disable_swap_token(). and if the parameter doesn't match swap token, VM doesn't disable it. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: KOSAKI Motohiro Reviewed-by: KAMEZAWA Hiroyuki Reviewed-by: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 6 ++++ include/linux/swap.h | 8 ++--- mm/memcontrol.c | 16 ++++----- mm/thrash.c | 87 ++++++++++++++++++++++++++++++++++++---------- mm/vmscan.c | 4 +-- 5 files changed, 85 insertions(+), 36 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 9724a38ee69..50940da6adf 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -84,6 +84,7 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem); extern struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page); extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); +extern struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm); static inline int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup) @@ -246,6 +247,11 @@ static inline struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) return NULL; } +static inline struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm) +{ + return NULL; +} + static inline int mm_match_cgroup(struct mm_struct *mm, struct mem_cgroup *mem) { return 1; diff --git a/include/linux/swap.h b/include/linux/swap.h index 384eb5fe530..e7056464703 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -358,6 +358,7 @@ struct backing_dev_info; extern struct mm_struct *swap_token_mm; extern void grab_swap_token(struct mm_struct *); extern void __put_swap_token(struct mm_struct *); +extern void disable_swap_token(struct mem_cgroup *memcg); static inline int has_swap_token(struct mm_struct *mm) { @@ -370,11 +371,6 @@ static inline void put_swap_token(struct mm_struct *mm) __put_swap_token(mm); } -static inline void disable_swap_token(void) -{ - put_swap_token(swap_token_mm); -} - #ifdef CONFIG_CGROUP_MEM_RES_CTLR extern void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout); @@ -500,7 +496,7 @@ static inline int has_swap_token(struct mm_struct *mm) return 0; } -static inline void disable_swap_token(void) +static inline void disable_swap_token(struct mem_cgroup *memcg) { } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index bd9052a5d3a..e37c44d88d4 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -735,7 +735,7 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p) struct mem_cgroup, css); } -static struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm) +struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm) { struct mem_cgroup *mem = NULL; @@ -5414,18 +5414,16 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss, struct cgroup *old_cont, struct task_struct *p) { - struct mm_struct *mm; + struct mm_struct *mm = get_task_mm(p); - if (!mc.to) - /* no need to move charge */ - return; - - mm = get_task_mm(p); if (mm) { - mem_cgroup_move_charge(mm); + if (mc.to) + mem_cgroup_move_charge(mm); + put_swap_token(mm); mmput(mm); } - mem_cgroup_clear_mc(); + if (mc.to) + mem_cgroup_clear_mc(); } #else /* !CONFIG_MMU */ static int mem_cgroup_can_attach(struct cgroup_subsys *ss, diff --git a/mm/thrash.c b/mm/thrash.c index 2372d4ed5dd..6cdf8651138 100644 --- a/mm/thrash.c +++ b/mm/thrash.c @@ -21,11 +21,31 @@ #include #include #include +#include static DEFINE_SPINLOCK(swap_token_lock); struct mm_struct *swap_token_mm; +struct mem_cgroup *swap_token_memcg; static unsigned int global_faults; +#ifdef CONFIG_CGROUP_MEM_RES_CTLR +static struct mem_cgroup *swap_token_memcg_from_mm(struct mm_struct *mm) +{ + struct mem_cgroup *memcg; + + memcg = try_get_mem_cgroup_from_mm(mm); + if (memcg) + css_put(mem_cgroup_css(memcg)); + + return memcg; +} +#else +static struct mem_cgroup *swap_token_memcg_from_mm(struct mm_struct *mm) +{ + return NULL; +} +#endif + void grab_swap_token(struct mm_struct *mm) { int current_interval; @@ -38,40 +58,69 @@ void grab_swap_token(struct mm_struct *mm) return; /* First come first served */ - if (swap_token_mm == NULL) { - mm->token_priority = mm->token_priority + 2; - swap_token_mm = mm; + if (!swap_token_mm) + goto replace_token; + + if (mm == swap_token_mm) { + mm->token_priority += 2; goto out; } - if (mm != swap_token_mm) { - if (current_interval < mm->last_interval) - mm->token_priority++; - else { - if (likely(mm->token_priority > 0)) - mm->token_priority--; - } - /* Check if we deserve the token */ - if (mm->token_priority > swap_token_mm->token_priority) { - mm->token_priority += 2; - swap_token_mm = mm; - } - } else { - /* Token holder came in again! */ - mm->token_priority += 2; + if (current_interval < mm->last_interval) + mm->token_priority++; + else { + if (likely(mm->token_priority > 0)) + mm->token_priority--; } + /* Check if we deserve the token */ + if (mm->token_priority > swap_token_mm->token_priority) + goto replace_token; + out: mm->faultstamp = global_faults; mm->last_interval = current_interval; spin_unlock(&swap_token_lock); + return; + +replace_token: + mm->token_priority += 2; + swap_token_mm = mm; + swap_token_memcg = swap_token_memcg_from_mm(mm); + goto out; } /* Called on process exit. */ void __put_swap_token(struct mm_struct *mm) { spin_lock(&swap_token_lock); - if (likely(mm == swap_token_mm)) + if (likely(mm == swap_token_mm)) { swap_token_mm = NULL; + swap_token_memcg = NULL; + } spin_unlock(&swap_token_lock); } + +static bool match_memcg(struct mem_cgroup *a, struct mem_cgroup *b) +{ + if (!a) + return true; + if (!b) + return true; + if (a == b) + return true; + return false; +} + +void disable_swap_token(struct mem_cgroup *memcg) +{ + /* memcg reclaim don't disable unrelated mm token. */ + if (match_memcg(memcg, swap_token_memcg)) { + spin_lock(&swap_token_lock); + if (match_memcg(memcg, swap_token_memcg)) { + swap_token_mm = NULL; + swap_token_memcg = NULL; + } + spin_unlock(&swap_token_lock); + } +} diff --git a/mm/vmscan.c b/mm/vmscan.c index faa0a088f9c..dbe6ea321df 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2081,7 +2081,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, for (priority = DEF_PRIORITY; priority >= 0; priority--) { sc->nr_scanned = 0; if (!priority) - disable_swap_token(); + disable_swap_token(sc->mem_cgroup); total_scanned += shrink_zones(priority, zonelist, sc); /* * Don't shrink slabs when reclaiming memory from @@ -2407,7 +2407,7 @@ loop_again: /* The swap token gets in the way of swapout... */ if (!priority) - disable_swap_token(); + disable_swap_token(NULL); all_zones_ok = 1; balanced = 0; -- cgit v1.2.3-70-g09d2 From 83cd81a34357a632509f7491eec81e62e71d65f7 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Wed, 15 Jun 2011 15:08:14 -0700 Subject: vmscan: implement swap token trace This is useful for observing swap token activity. example output: zsh-1845 [000] 598.962716: update_swap_token_priority: mm=ffff88015eaf7700 old_prio=1 new_prio=0 memtoy-1830 [001] 602.033900: update_swap_token_priority: mm=ffff880037a45880 old_prio=947 new_prio=949 memtoy-1830 [000] 602.041509: update_swap_token_priority: mm=ffff880037a45880 old_prio=949 new_prio=951 memtoy-1830 [000] 602.051959: update_swap_token_priority: mm=ffff880037a45880 old_prio=951 new_prio=953 memtoy-1830 [000] 602.052188: update_swap_token_priority: mm=ffff880037a45880 old_prio=953 new_prio=955 memtoy-1830 [001] 602.427184: put_swap_token: token_mm=ffff880037a45880 zsh-1789 [000] 602.427281: replace_swap_token: old_token_mm= (null) old_prio=0 new_token_mm=ffff88015eaf7018 new_prio=2 zsh-1789 [001] 602.433456: update_swap_token_priority: mm=ffff88015eaf7018 old_prio=2 new_prio=4 zsh-1789 [000] 602.437613: update_swap_token_priority: mm=ffff88015eaf7018 old_prio=4 new_prio=6 zsh-1789 [000] 602.443924: update_swap_token_priority: mm=ffff88015eaf7018 old_prio=6 new_prio=8 zsh-1789 [000] 602.451873: update_swap_token_priority: mm=ffff88015eaf7018 old_prio=8 new_prio=10 zsh-1789 [001] 602.462639: update_swap_token_priority: mm=ffff88015eaf7018 old_prio=10 new_prio=12 Signed-off-by: KOSAKI Motohiro Acked-by: Rik van Riel Reviewed-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/vmscan.h | 77 +++++++++++++++++++++++++++++++++++++++++++ mm/thrash.c | 11 ++++++- 2 files changed, 87 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index ea422aaa23e..1798e0cee2a 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h @@ -6,6 +6,8 @@ #include #include +#include +#include #include "gfpflags.h" #define RECLAIM_WB_ANON 0x0001u @@ -310,6 +312,81 @@ TRACE_EVENT(mm_vmscan_lru_shrink_inactive, show_reclaim_flags(__entry->reclaim_flags)) ); +TRACE_EVENT(replace_swap_token, + TP_PROTO(struct mm_struct *old_mm, + struct mm_struct *new_mm), + + TP_ARGS(old_mm, new_mm), + + TP_STRUCT__entry( + __field(struct mm_struct*, old_mm) + __field(unsigned int, old_prio) + __field(struct mm_struct*, new_mm) + __field(unsigned int, new_prio) + ), + + TP_fast_assign( + __entry->old_mm = old_mm; + __entry->old_prio = old_mm ? old_mm->token_priority : 0; + __entry->new_mm = new_mm; + __entry->new_prio = new_mm->token_priority; + ), + + TP_printk("old_token_mm=%p old_prio=%u new_token_mm=%p new_prio=%u", + __entry->old_mm, __entry->old_prio, + __entry->new_mm, __entry->new_prio) +); + +DECLARE_EVENT_CLASS(put_swap_token_template, + TP_PROTO(struct mm_struct *swap_token_mm), + + TP_ARGS(swap_token_mm), + + TP_STRUCT__entry( + __field(struct mm_struct*, swap_token_mm) + ), + + TP_fast_assign( + __entry->swap_token_mm = swap_token_mm; + ), + + TP_printk("token_mm=%p", __entry->swap_token_mm) +); + +DEFINE_EVENT(put_swap_token_template, put_swap_token, + TP_PROTO(struct mm_struct *swap_token_mm), + TP_ARGS(swap_token_mm) +); + +DEFINE_EVENT_CONDITION(put_swap_token_template, disable_swap_token, + TP_PROTO(struct mm_struct *swap_token_mm), + TP_ARGS(swap_token_mm), + TP_CONDITION(swap_token_mm != NULL) +); + +TRACE_EVENT_CONDITION(update_swap_token_priority, + TP_PROTO(struct mm_struct *mm, + unsigned int old_prio), + + TP_ARGS(mm, old_prio), + + TP_CONDITION(mm->token_priority != old_prio), + + TP_STRUCT__entry( + __field(struct mm_struct*, mm) + __field(unsigned int, old_prio) + __field(unsigned int, new_prio) + ), + + TP_fast_assign( + __entry->mm = mm; + __entry->old_prio = old_prio; + __entry->new_prio = mm->token_priority; + ), + + TP_printk("mm=%p old_prio=%u new_prio=%u", + __entry->mm, __entry->old_prio, __entry->new_prio) +); #endif /* _TRACE_VMSCAN_H */ diff --git a/mm/thrash.c b/mm/thrash.c index 6cdf8651138..17d9e29e4c9 100644 --- a/mm/thrash.c +++ b/mm/thrash.c @@ -23,6 +23,8 @@ #include #include +#include + static DEFINE_SPINLOCK(swap_token_lock); struct mm_struct *swap_token_mm; struct mem_cgroup *swap_token_memcg; @@ -49,6 +51,7 @@ static struct mem_cgroup *swap_token_memcg_from_mm(struct mm_struct *mm) void grab_swap_token(struct mm_struct *mm) { int current_interval; + unsigned int old_prio = mm->token_priority; global_faults++; @@ -63,7 +66,7 @@ void grab_swap_token(struct mm_struct *mm) if (mm == swap_token_mm) { mm->token_priority += 2; - goto out; + goto update_priority; } if (current_interval < mm->last_interval) @@ -77,6 +80,9 @@ void grab_swap_token(struct mm_struct *mm) if (mm->token_priority > swap_token_mm->token_priority) goto replace_token; +update_priority: + trace_update_swap_token_priority(mm, old_prio); + out: mm->faultstamp = global_faults; mm->last_interval = current_interval; @@ -85,6 +91,7 @@ out: replace_token: mm->token_priority += 2; + trace_replace_swap_token(swap_token_mm, mm); swap_token_mm = mm; swap_token_memcg = swap_token_memcg_from_mm(mm); goto out; @@ -95,6 +102,7 @@ void __put_swap_token(struct mm_struct *mm) { spin_lock(&swap_token_lock); if (likely(mm == swap_token_mm)) { + trace_put_swap_token(swap_token_mm); swap_token_mm = NULL; swap_token_memcg = NULL; } @@ -118,6 +126,7 @@ void disable_swap_token(struct mem_cgroup *memcg) if (match_memcg(memcg, swap_token_memcg)) { spin_lock(&swap_token_lock); if (match_memcg(memcg, swap_token_memcg)) { + trace_disable_swap_token(swap_token_mm); swap_token_mm = NULL; swap_token_memcg = NULL; } -- cgit v1.2.3-70-g09d2 From d7911ef30cb7bec52234c2b7a5c275ac8f07905a Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Wed, 15 Jun 2011 15:08:15 -0700 Subject: vmscan: implement swap token priority aging While testing for memcg aware swap token, I observed a swap token was often grabbed an intermittent running process (eg init, auditd) and they never release a token. Why? Some processes (eg init, auditd, audispd) wake up when a process exiting. And swap token can be get first page-in process when a process exiting makes no swap token owner. Thus such above intermittent running process often get a token. And currently, swap token priority is only decreased at page fault path. Then, if the process sleep immediately after to grab swap token, the swap token priority never be decreased. That's obviously undesirable. This patch implement very poor (and lightweight) priority aging. It only be affect to the above corner case and doesn't change swap tendency workload performance (eg multi process qsbench load) Signed-off-by: KOSAKI Motohiro Reviewed-by: Rik van Riel Reviewed-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/vmscan.h | 20 +++++++++++++------- mm/thrash.c | 11 ++++++++++- 2 files changed, 23 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index 1798e0cee2a..b2c33bd955f 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h @@ -366,9 +366,10 @@ DEFINE_EVENT_CONDITION(put_swap_token_template, disable_swap_token, TRACE_EVENT_CONDITION(update_swap_token_priority, TP_PROTO(struct mm_struct *mm, - unsigned int old_prio), + unsigned int old_prio, + struct mm_struct *swap_token_mm), - TP_ARGS(mm, old_prio), + TP_ARGS(mm, old_prio, swap_token_mm), TP_CONDITION(mm->token_priority != old_prio), @@ -376,16 +377,21 @@ TRACE_EVENT_CONDITION(update_swap_token_priority, __field(struct mm_struct*, mm) __field(unsigned int, old_prio) __field(unsigned int, new_prio) + __field(struct mm_struct*, swap_token_mm) + __field(unsigned int, swap_token_prio) ), TP_fast_assign( - __entry->mm = mm; - __entry->old_prio = old_prio; - __entry->new_prio = mm->token_priority; + __entry->mm = mm; + __entry->old_prio = old_prio; + __entry->new_prio = mm->token_priority; + __entry->swap_token_mm = swap_token_mm; + __entry->swap_token_prio = swap_token_mm ? swap_token_mm->token_priority : 0; ), - TP_printk("mm=%p old_prio=%u new_prio=%u", - __entry->mm, __entry->old_prio, __entry->new_prio) + TP_printk("mm=%p old_prio=%u new_prio=%u swap_token_mm=%p token_prio=%u", + __entry->mm, __entry->old_prio, __entry->new_prio, + __entry->swap_token_mm, __entry->swap_token_prio) ); #endif /* _TRACE_VMSCAN_H */ diff --git a/mm/thrash.c b/mm/thrash.c index 17d9e29e4c9..fabf2d0f516 100644 --- a/mm/thrash.c +++ b/mm/thrash.c @@ -25,10 +25,13 @@ #include +#define TOKEN_AGING_INTERVAL (0xFF) + static DEFINE_SPINLOCK(swap_token_lock); struct mm_struct *swap_token_mm; struct mem_cgroup *swap_token_memcg; static unsigned int global_faults; +static unsigned int last_aging; #ifdef CONFIG_CGROUP_MEM_RES_CTLR static struct mem_cgroup *swap_token_memcg_from_mm(struct mm_struct *mm) @@ -64,6 +67,11 @@ void grab_swap_token(struct mm_struct *mm) if (!swap_token_mm) goto replace_token; + if ((global_faults - last_aging) > TOKEN_AGING_INTERVAL) { + swap_token_mm->token_priority /= 2; + last_aging = global_faults; + } + if (mm == swap_token_mm) { mm->token_priority += 2; goto update_priority; @@ -81,7 +89,7 @@ void grab_swap_token(struct mm_struct *mm) goto replace_token; update_priority: - trace_update_swap_token_priority(mm, old_prio); + trace_update_swap_token_priority(mm, old_prio, swap_token_mm); out: mm->faultstamp = global_faults; @@ -94,6 +102,7 @@ replace_token: trace_replace_swap_token(swap_token_mm, mm); swap_token_mm = mm; swap_token_memcg = swap_token_memcg_from_mm(mm); + last_aging = global_faults; goto out; } -- cgit v1.2.3-70-g09d2 From ac5622418bbff9cd3dc607aa57dfb4f62a7f2043 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 15 Jun 2011 15:08:17 -0700 Subject: kmsg_dump.h: fix build when CONFIG_PRINTK is disabled Fix when CONFIG_PRINTK is not enabled: include/linux/kmsg_dump.h:56: error: 'EINVAL' undeclared (first use in this function) include/linux/kmsg_dump.h:61: error: 'EINVAL' undeclared (first use in this function) Looks like commit 595dd3d8bf95 ("kmsg_dump: fix build for CONFIG_PRINTK=n") uses EINVAL without having the needed header file(s), but I'm sure that I build tested that patch also. oh well. Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kmsg_dump.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h index 2a0d7d651dc..ee0c952188d 100644 --- a/include/linux/kmsg_dump.h +++ b/include/linux/kmsg_dump.h @@ -12,6 +12,7 @@ #ifndef _LINUX_KMSG_DUMP_H #define _LINUX_KMSG_DUMP_H +#include #include enum kmsg_dump_reason { -- cgit v1.2.3-70-g09d2 From 32e45ff43eaf5c17f5a82c9ad358d515622c2562 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Wed, 15 Jun 2011 15:08:20 -0700 Subject: mm: increase RECLAIM_DISTANCE to 30 Recently, Robert Mueller reported (http://lkml.org/lkml/2010/9/12/236) that zone_reclaim_mode doesn't work properly on his new NUMA server (Dual Xeon E5520 + Intel S5520UR MB). He is using Cyrus IMAPd and it's built on a very traditional single-process model. * a master process which reads config files and manages the other process * multiple imapd processes, one per connection * multiple pop3d processes, one per connection * multiple lmtpd processes, one per connection * periodical "cleanup" processes. There are thousands of independent processes. The problem is, recent Intel motherboard turn on zone_reclaim_mode by default and traditional prefork model software don't work well on it. Unfortunatelly, such models are still typical even in the 21st century. We can't ignore them. This patch raises the zone_reclaim_mode threshold to 30. 30 doesn't have any specific meaning. but 20 means that one-hop QPI/Hypertransport and such relatively cheap 2-4 socket machine are often used for traditional servers as above. The intention is that these machines don't use zone_reclaim_mode. Note: ia64 and Power have arch specific RECLAIM_DISTANCE definitions. This patch doesn't change such high-end NUMA machine behavior. Dave Hansen said: : I know specifically of pieces of x86 hardware that set the information : in the BIOS to '21' *specifically* so they'll get the zone_reclaim_mode : behavior which that implies. : : They've done performance testing and run very large and scary benchmarks : to make sure that they _want_ this turned on. What this means for them : is that they'll probably be de-optimized, at least on newer versions of : the kernel. : : If you want to do this for particular systems, maybe _that_'s what we : should do. Have a list of specific configurations that need the : defaults overridden either because they're buggy, or they have an : unusual hardware configuration not really reflected in the distance : table. And later said: : The original change in the hardware tables was for the benefit of a : benchmark. Said benchmark isn't going to get run on mainline until the : next batch of enterprise distros drops, at which point the hardware where : this was done will be irrelevant for the benchmark. I'm sure any new : hardware will just set this distance to another yet arbitrary value to : make the kernel do what it wants. :) : : Also, when the hardware got _set_ to this initially, I complained. So, I : guess I'm getting my way now, with this patch. I'm cool with it. Reported-by: Robert Mueller Signed-off-by: KOSAKI Motohiro Acked-by: Christoph Lameter Acked-by: David Rientjes Reviewed-by: KAMEZAWA Hiroyuki Cc: Benjamin Herrenschmidt Cc: "Luck, Tony" Acked-by: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/topology.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/topology.h b/include/linux/topology.h index b91a40e847d..fc839bfa793 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -60,7 +60,7 @@ int arch_update_cpu_topology(void); * (in whatever arch specific measurement units returned by node_distance()) * then switch on zone reclaim on boot. */ -#define RECLAIM_DISTANCE 20 +#define RECLAIM_DISTANCE 30 #endif #ifndef PENALTY_FOR_NODE_WITH_CPUS #define PENALTY_FOR_NODE_WITH_CPUS (1) -- cgit v1.2.3-70-g09d2 From ca39599c633fb02aceac31a7e67563612e4fe347 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Wed, 15 Jun 2011 15:08:27 -0700 Subject: BUILD_BUG_ON_ZERO: fix sparse breakage BUILD_BUG_ON_ZERO and BUILD_BUG_ON_NULL must return values, even in the CHECKER case otherwise various users of it become syntactically invalid. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index fb0e7329fee..953352a8833 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -671,8 +671,8 @@ struct sysinfo { #ifdef __CHECKER__ #define BUILD_BUG_ON_NOT_POWER_OF_2(n) -#define BUILD_BUG_ON_ZERO(e) -#define BUILD_BUG_ON_NULL(e) +#define BUILD_BUG_ON_ZERO(e) (0) +#define BUILD_BUG_ON_NULL(e) ((void*)0) #define BUILD_BUG_ON(condition) #else /* __CHECKER__ */ -- cgit v1.2.3-70-g09d2 From bd5dc17be87b3a3073d50b23802647db3ae3fa8e Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Wed, 15 Jun 2011 15:08:28 -0700 Subject: uts: make default hostname configurable, rather than always using "(none)" The "hostname" tool falls back to setting the hostname to "localhost" if /etc/hostname does not exist. Distribution init scripts have the same fallback. However, if userspace never calls sethostname, such as when booting with init=/bin/sh, or otherwise booting a minimal system without the usual init scripts, the default hostname of "(none)" remains, unhelpfully appearing in various places such as prompts ("root@(none):~#") and logs. Furthermore, "(none)" doesn't typically resolve to anything useful. Make the default hostname configurable. This removes the need for the standard fallback, provides a useful default for systems that never call sethostname, and makes minimal systems that much more useful with less configuration. Distributions could choose to use "localhost" here to avoid the fallback, while embedded systems may wish to use a specific target hostname. Signed-off-by: Josh Triplett Acked-by: Linus Torvalds Acked-by: David Miller Cc: Serge Hallyn Cc: Kel Modderman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/uts.h | 2 +- init/Kconfig | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/uts.h b/include/linux/uts.h index 73eb1ed36ec..6ddbd86377d 100644 --- a/include/linux/uts.h +++ b/include/linux/uts.h @@ -9,7 +9,7 @@ #endif #ifndef UTS_NODENAME -#define UTS_NODENAME "(none)" /* set by sethostname() */ +#define UTS_NODENAME CONFIG_DEFAULT_HOSTNAME /* set by sethostname() */ #endif #ifndef UTS_DOMAINNAME diff --git a/init/Kconfig b/init/Kconfig index ebafac4231e..e0a22e42d39 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -204,6 +204,15 @@ config KERNEL_LZO endchoice +config DEFAULT_HOSTNAME + string "Default hostname" + default "(none)" + help + This option determines the default system hostname before userspace + calls sethostname(2). The kernel traditionally uses "(none)" here, + but you may wish to use a different default here to make a minimal + system more usable with less configuration. + config SWAP bool "Support for paging of anonymous memory (swap)" depends on MMU && BLOCK -- cgit v1.2.3-70-g09d2 From 49b24d6b41c576ba43153fc94695f871cce139a5 Mon Sep 17 00:00:00 2001 From: Nicolas Kaiser Date: Wed, 15 Jun 2011 15:08:34 -0700 Subject: include/asm-generic/pgtable.h: fix unbalanced parenthesis Signed-off-by: Nicolas Kaiser Reviewed-by: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index e9b8e5926be..76bff2bff15 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -88,7 +88,7 @@ static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm, pmd_t pmd = *pmdp; pmd_clear(mm, address, pmdp); return pmd; -}) +} #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif -- cgit v1.2.3-70-g09d2 From 4a9a8b71e12d41abb71c4e741bff524f016cfef4 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 14 Jun 2011 06:13:55 +0000 Subject: drm/radeon: workaround a hw bug on some radeon chipsets with all-0 EDIDs. Some RS690 chipsets seem to end up with floating connectors, either a DVI connector isn't actually populated, or an add-in HDMI card is available but not installed. In this case we seem to get a NULL byte response for each byte of the i2c transaction, so we detect this case and if we see it we don't do anymore DDC transactions on this connector. I've tested this on my RS690 without the HDMI card installed and it seems to work fine. Signed-off-by: Dave Airlie Reviewed-by: Alex Deucher --- drivers/gpu/drm/drm_edid.c | 15 +++++++++++++++ drivers/gpu/drm/radeon/radeon_connectors.c | 7 +++++++ include/drm/drm_crtc.h | 2 ++ 3 files changed, 24 insertions(+) (limited to 'include') diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 3618d29c79a..09292193daf 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -258,6 +258,17 @@ drm_do_probe_ddc_edid(struct i2c_adapter *adapter, unsigned char *buf, return ret == 2 ? 0 : -1; } +static bool drm_edid_is_zero(u8 *in_edid, int length) +{ + int i; + u32 *raw_edid = (u32 *)in_edid; + + for (i = 0; i < length / 4; i++) + if (*(raw_edid + i) != 0) + return false; + return true; +} + static u8 * drm_do_get_edid(struct drm_connector *connector, struct i2c_adapter *adapter) { @@ -273,6 +284,10 @@ drm_do_get_edid(struct drm_connector *connector, struct i2c_adapter *adapter) goto out; if (drm_edid_block_valid(block)) break; + if (i == 0 && drm_edid_is_zero(block, EDID_LENGTH)) { + connector->null_edid_counter++; + goto carp; + } } if (i == 4) goto carp; diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index 9c2929c7e79..c04e18ee8a8 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -836,6 +836,13 @@ radeon_dvi_detect(struct drm_connector *connector, bool force) if (!radeon_connector->edid) { DRM_ERROR("%s: probed a monitor but no|invalid EDID\n", drm_get_connector_name(connector)); + /* rs690 seems to have a problem with connectors not existing and always + * return a block of 0's. If we see this just stop polling on this output */ + if ((rdev->family == CHIP_RS690 || rdev->family == CHIP_RS740) && radeon_connector->base.null_edid_counter) { + ret = connector_status_disconnected; + DRM_ERROR("%s: detected RS690 floating bus bug, stopping ddc detect\n", drm_get_connector_name(connector)); + radeon_connector->ddc_bus = NULL; + } } else { radeon_connector->use_digital = !!(radeon_connector->edid->input & DRM_EDID_INPUT_DIGITAL); diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index 9573e0ce312..33d12f87f0e 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -520,6 +520,8 @@ struct drm_connector { uint32_t encoder_ids[DRM_CONNECTOR_MAX_ENCODER]; uint32_t force_encoder_id; struct drm_encoder *encoder; /* currently active encoder */ + + int null_edid_counter; /* needed to workaround some HW bugs where we get all 0s */ }; /** -- cgit v1.2.3-70-g09d2 From c001fb72a7b705f902bdfdd05b5d2408efe6f848 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 14 Jun 2011 17:05:11 -0700 Subject: gpio: add GPIOF_ values regardless on kconfig settings Make GPIOF_ defined values available even when GPIOLIB nor GENERIC_GPIO is enabled by moving them to . Fixes these build errors in linux-next: sound/soc/codecs/ak4641.c:524: error: 'GPIOF_OUT_INIT_LOW' undeclared (first use in this function) sound/soc/codecs/wm8915.c:2921: error: 'GPIOF_OUT_INIT_LOW' undeclared (first use in this function) Signed-off-by: Randy Dunlap Signed-off-by: Grant Likely --- include/asm-generic/gpio.h | 10 ---------- include/linux/gpio.h | 11 +++++++++++ 2 files changed, 11 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h index fcdcb5d5c99..d494001b122 100644 --- a/include/asm-generic/gpio.h +++ b/include/asm-generic/gpio.h @@ -170,16 +170,6 @@ extern int __gpio_cansleep(unsigned gpio); extern int __gpio_to_irq(unsigned gpio); -#define GPIOF_DIR_OUT (0 << 0) -#define GPIOF_DIR_IN (1 << 0) - -#define GPIOF_INIT_LOW (0 << 1) -#define GPIOF_INIT_HIGH (1 << 1) - -#define GPIOF_IN (GPIOF_DIR_IN) -#define GPIOF_OUT_INIT_LOW (GPIOF_DIR_OUT | GPIOF_INIT_LOW) -#define GPIOF_OUT_INIT_HIGH (GPIOF_DIR_OUT | GPIOF_INIT_HIGH) - /** * struct gpio - a structure describing a GPIO with configuration * @gpio: the GPIO number diff --git a/include/linux/gpio.h b/include/linux/gpio.h index 32d47e71066..17b5a0d80e4 100644 --- a/include/linux/gpio.h +++ b/include/linux/gpio.h @@ -3,6 +3,17 @@ /* see Documentation/gpio.txt */ +/* make these flag values available regardless of GPIO kconfig options */ +#define GPIOF_DIR_OUT (0 << 0) +#define GPIOF_DIR_IN (1 << 0) + +#define GPIOF_INIT_LOW (0 << 1) +#define GPIOF_INIT_HIGH (1 << 1) + +#define GPIOF_IN (GPIOF_DIR_IN) +#define GPIOF_OUT_INIT_LOW (GPIOF_DIR_OUT | GPIOF_INIT_LOW) +#define GPIOF_OUT_INIT_HIGH (GPIOF_DIR_OUT | GPIOF_INIT_HIGH) + #ifdef CONFIG_GENERIC_GPIO #include -- cgit v1.2.3-70-g09d2 From 42c1edd345c8412d96e7a362ee06feb7be73bb6c Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Thu, 16 Jun 2011 17:29:22 +0200 Subject: netfilter: nf_nat: avoid double seq_adjust for loopback Avoid double seq adjustment for loopback traffic because it causes silent repetition of TCP data. One example is passive FTP with DNAT rule and difference in the length of IP addresses. This patch adds check if packet is sent and received via loopback device. As the same conntrack is used both for outgoing and incoming direction, we restrict seq adjustment to happen only in POSTROUTING. Signed-off-by: Julian Anastasov Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack.h | 6 ++++++ net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 4 +++- 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index c7c42e7acc3..5d4f8e586e3 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -307,6 +307,12 @@ static inline int nf_ct_is_untracked(const struct nf_conn *ct) return test_bit(IPS_UNTRACKED_BIT, &ct->status); } +/* Packet is received from loopback */ +static inline bool nf_is_loopback_packet(const struct sk_buff *skb) +{ + return skb->dev && skb->skb_iif && skb->dev->flags & IFF_LOOPBACK; +} + extern int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp); extern unsigned int nf_conntrack_htable_size; extern unsigned int nf_conntrack_max; diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index db10075dd88..de9da21113a 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -121,7 +121,9 @@ static unsigned int ipv4_confirm(unsigned int hooknum, return ret; } - if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) { + /* adjust seqs for loopback traffic only in outgoing direction */ + if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && + !nf_is_loopback_packet(skb)) { typeof(nf_nat_seq_adjust_hook) seq_adjust; seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook); -- cgit v1.2.3-70-g09d2 From b5199515c25cca622495eb9c6a8a1d275e775088 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 16 Jun 2011 16:22:08 +0200 Subject: clocksource: Make watchdog robust vs. interruption The clocksource watchdog code is interruptible and it has been observed that this can trigger false positives which disable the TSC. The reason is that an interrupt storm or a long running interrupt handler between the read of the watchdog source and the read of the TSC brings the two far enough apart that the delta is larger than the unstable treshold. Move both reads into a short interrupt disabled region to avoid that. Reported-and-tested-by: Vernon Mauery Signed-off-by: Thomas Gleixner Cc: stable@kernel.org --- include/linux/clocksource.h | 1 + kernel/time/clocksource.c | 24 +++++++++++++----------- 2 files changed, 14 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index d4646b48dc4..18a1baf31f2 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -188,6 +188,7 @@ struct clocksource { #ifdef CONFIG_CLOCKSOURCE_WATCHDOG /* Watchdog related data, used by the framework */ struct list_head wd_list; + cycle_t cs_last; cycle_t wd_last; #endif } ____cacheline_aligned; diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 1c95fd67732..e0980f0d9a0 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -185,7 +185,6 @@ static struct clocksource *watchdog; static struct timer_list watchdog_timer; static DECLARE_WORK(watchdog_work, clocksource_watchdog_work); static DEFINE_SPINLOCK(watchdog_lock); -static cycle_t watchdog_last; static int watchdog_running; static int clocksource_watchdog_kthread(void *data); @@ -254,11 +253,6 @@ static void clocksource_watchdog(unsigned long data) if (!watchdog_running) goto out; - wdnow = watchdog->read(watchdog); - wd_nsec = clocksource_cyc2ns((wdnow - watchdog_last) & watchdog->mask, - watchdog->mult, watchdog->shift); - watchdog_last = wdnow; - list_for_each_entry(cs, &watchdog_list, wd_list) { /* Clocksource already marked unstable? */ @@ -268,19 +262,28 @@ static void clocksource_watchdog(unsigned long data) continue; } + local_irq_disable(); csnow = cs->read(cs); + wdnow = watchdog->read(watchdog); + local_irq_enable(); /* Clocksource initialized ? */ if (!(cs->flags & CLOCK_SOURCE_WATCHDOG)) { cs->flags |= CLOCK_SOURCE_WATCHDOG; - cs->wd_last = csnow; + cs->wd_last = wdnow; + cs->cs_last = csnow; continue; } - /* Check the deviation from the watchdog clocksource. */ - cs_nsec = clocksource_cyc2ns((csnow - cs->wd_last) & + wd_nsec = clocksource_cyc2ns((wdnow - cs->wd_last) & watchdog->mask, + watchdog->mult, watchdog->shift); + + cs_nsec = clocksource_cyc2ns((csnow - cs->cs_last) & cs->mask, cs->mult, cs->shift); - cs->wd_last = csnow; + cs->cs_last = csnow; + cs->wd_last = wdnow; + + /* Check the deviation from the watchdog clocksource. */ if (abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) { clocksource_unstable(cs, cs_nsec - wd_nsec); continue; @@ -318,7 +321,6 @@ static inline void clocksource_start_watchdog(void) return; init_timer(&watchdog_timer); watchdog_timer.function = clocksource_watchdog; - watchdog_last = watchdog->read(watchdog); watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL; add_timer_on(&watchdog_timer, cpumask_first(cpu_online_mask)); watchdog_running = 1; -- cgit v1.2.3-70-g09d2 From c16d51a32bbb61ac8fd96f78b5ce2fccfe0fb4c3 Mon Sep 17 00:00:00 2001 From: Shreshtha Kumar Sahu Date: Mon, 13 Jun 2011 10:11:33 +0200 Subject: amba pl011: workaround for uart registers lockup This workaround aims to break the deadlock situation which raises during continuous transfer of data for long duration over uart with hardware flow control. It is observed that CTS interrupt cannot be cleared in uart interrupt register (ICR). Hence further transfer over uart gets blocked. It is seen that during such deadlock condition ICR don't get cleared even on multiple write. This leads pass_counter to decrease and finally reach zero. This can be taken as trigger point to run this UART_BT_WA. Workaround backups the register configuration, does soft reset of UART using BIT-0 of PRCC_K_SOFTRST_SET/CLEAR registers and restores the registers. This patch also provides support for uart init and exit function calls if present. Signed-off-by: Shreshtha Kumar Sahu Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/amba-pl011.c | 123 +++++++++++++++++++++++++++++++++++++++- include/linux/amba/serial.h | 3 + 2 files changed, 125 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c index 8dc0541feec..f5f6831b0a6 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c @@ -50,6 +50,7 @@ #include #include #include +#include #include #include @@ -65,6 +66,30 @@ #define UART_DR_ERROR (UART011_DR_OE|UART011_DR_BE|UART011_DR_PE|UART011_DR_FE) #define UART_DUMMY_DR_RX (1 << 16) + +#define UART_WA_SAVE_NR 14 + +static void pl011_lockup_wa(unsigned long data); +static const u32 uart_wa_reg[UART_WA_SAVE_NR] = { + ST_UART011_DMAWM, + ST_UART011_TIMEOUT, + ST_UART011_LCRH_RX, + UART011_IBRD, + UART011_FBRD, + ST_UART011_LCRH_TX, + UART011_IFLS, + ST_UART011_XFCR, + ST_UART011_XON1, + ST_UART011_XON2, + ST_UART011_XOFF1, + ST_UART011_XOFF2, + UART011_CR, + UART011_IMSC +}; + +static u32 uart_wa_regdata[UART_WA_SAVE_NR]; +static DECLARE_TASKLET(pl011_lockup_tlet, pl011_lockup_wa, 0); + /* There is by now at least one vendor with differing details, so handle it */ struct vendor_data { unsigned int ifls; @@ -72,6 +97,7 @@ struct vendor_data { unsigned int lcrh_tx; unsigned int lcrh_rx; bool oversampling; + bool interrupt_may_hang; /* vendor-specific */ bool dma_threshold; }; @@ -90,9 +116,12 @@ static struct vendor_data vendor_st = { .lcrh_tx = ST_UART011_LCRH_TX, .lcrh_rx = ST_UART011_LCRH_RX, .oversampling = true, + .interrupt_may_hang = true, .dma_threshold = true, }; +static struct uart_amba_port *amba_ports[UART_NR]; + /* Deals with DMA transactions */ struct pl011_sgbuf { @@ -132,6 +161,7 @@ struct uart_amba_port { unsigned int lcrh_rx; /* vendor-specific */ bool autorts; char type[12]; + bool interrupt_may_hang; /* vendor-specific */ #ifdef CONFIG_DMA_ENGINE /* DMA stuff */ bool using_tx_dma; @@ -1008,6 +1038,68 @@ static inline bool pl011_dma_rx_running(struct uart_amba_port *uap) #endif +/* + * pl011_lockup_wa + * This workaround aims to break the deadlock situation + * when after long transfer over uart in hardware flow + * control, uart interrupt registers cannot be cleared. + * Hence uart transfer gets blocked. + * + * It is seen that during such deadlock condition ICR + * don't get cleared even on multiple write. This leads + * pass_counter to decrease and finally reach zero. This + * can be taken as trigger point to run this UART_BT_WA. + * + */ +static void pl011_lockup_wa(unsigned long data) +{ + struct uart_amba_port *uap = amba_ports[0]; + void __iomem *base = uap->port.membase; + struct circ_buf *xmit = &uap->port.state->xmit; + struct tty_struct *tty = uap->port.state->port.tty; + int buf_empty_retries = 200; + int loop; + + /* Stop HCI layer from submitting data for tx */ + tty->hw_stopped = 1; + while (!uart_circ_empty(xmit)) { + if (buf_empty_retries-- == 0) + break; + udelay(100); + } + + /* Backup registers */ + for (loop = 0; loop < UART_WA_SAVE_NR; loop++) + uart_wa_regdata[loop] = readl(base + uart_wa_reg[loop]); + + /* Disable UART so that FIFO data is flushed out */ + writew(0x00, uap->port.membase + UART011_CR); + + /* Soft reset UART module */ + if (uap->port.dev->platform_data) { + struct amba_pl011_data *plat; + + plat = uap->port.dev->platform_data; + if (plat->reset) + plat->reset(); + } + + /* Restore registers */ + for (loop = 0; loop < UART_WA_SAVE_NR; loop++) + writew(uart_wa_regdata[loop] , + uap->port.membase + uart_wa_reg[loop]); + + /* Initialise the old status of the modem signals */ + uap->old_status = readw(uap->port.membase + UART01x_FR) & + UART01x_FR_MODEM_ANY; + + if (readl(base + UART011_MIS) & 0x2) + printk(KERN_EMERG "UART_BT_WA: ***FAILED***\n"); + + /* Start Tx/Rx */ + tty->hw_stopped = 0; +} + static void pl011_stop_tx(struct uart_port *port) { struct uart_amba_port *uap = (struct uart_amba_port *)port; @@ -1158,8 +1250,11 @@ static irqreturn_t pl011_int(int irq, void *dev_id) if (status & UART011_TXIS) pl011_tx_chars(uap); - if (pass_counter-- == 0) + if (pass_counter-- == 0) { + if (uap->interrupt_may_hang) + tasklet_schedule(&pl011_lockup_tlet); break; + } status = readw(uap->port.membase + UART011_MIS); } while (status != 0); @@ -1339,6 +1434,14 @@ static int pl011_startup(struct uart_port *port) writew(uap->im, uap->port.membase + UART011_IMSC); spin_unlock_irq(&uap->port.lock); + if (uap->port.dev->platform_data) { + struct amba_pl011_data *plat; + + plat = uap->port.dev->platform_data; + if (plat->init) + plat->init(); + } + return 0; clk_dis: @@ -1394,6 +1497,15 @@ static void pl011_shutdown(struct uart_port *port) * Shut down the clock producer */ clk_disable(uap->clk); + + if (uap->port.dev->platform_data) { + struct amba_pl011_data *plat; + + plat = uap->port.dev->platform_data; + if (plat->exit) + plat->exit(); + } + } static void @@ -1700,6 +1812,14 @@ static int __init pl011_console_setup(struct console *co, char *options) if (!uap) return -ENODEV; + if (uap->port.dev->platform_data) { + struct amba_pl011_data *plat; + + plat = uap->port.dev->platform_data; + if (plat->init) + plat->init(); + } + uap->port.uartclk = clk_get_rate(uap->clk); if (options) @@ -1774,6 +1894,7 @@ static int pl011_probe(struct amba_device *dev, const struct amba_id *id) uap->lcrh_rx = vendor->lcrh_rx; uap->lcrh_tx = vendor->lcrh_tx; uap->fifosize = vendor->fifosize; + uap->interrupt_may_hang = vendor->interrupt_may_hang; uap->port.dev = &dev->dev; uap->port.mapbase = dev->res.start; uap->port.membase = base; diff --git a/include/linux/amba/serial.h b/include/linux/amba/serial.h index 5479fdc849e..514ed45c462 100644 --- a/include/linux/amba/serial.h +++ b/include/linux/amba/serial.h @@ -201,6 +201,9 @@ struct amba_pl011_data { bool (*dma_filter)(struct dma_chan *chan, void *filter_param); void *dma_rx_param; void *dma_tx_param; + void (*init) (void); + void (*exit) (void); + void (*reset) (void); }; #endif -- cgit v1.2.3-70-g09d2 From d8ad7d1123a960cc9f276bd499f9325c6f5e1bd1 Mon Sep 17 00:00:00 2001 From: Takao Indoh Date: Tue, 29 Mar 2011 12:35:04 -0400 Subject: generic-ipi: Fix kexec boot crash by initializing call_single_queue before enabling interrupts There is a problem that kdump(2nd kernel) sometimes hangs up due to a pending IPI from 1st kernel. Kernel panic occurs because IPI comes before call_single_queue is initialized. To fix the crash, rename init_call_single_data() to call_function_init() and call it in start_kernel() so that call_single_queue can be initialized before enabling interrupts. The details of the crash are: (1) 2nd kernel boots up (2) A pending IPI from 1st kernel comes when irqs are first enabled in start_kernel(). (3) Kernel tries to handle the interrupt, but call_single_queue is not initialized yet at this point. As a result, in the generic_smp_call_function_single_interrupt(), NULL pointer dereference occurs when list_replace_init() tries to access &q->list.next. Therefore this patch changes the name of init_call_single_data() to call_function_init() and calls it before local_irq_enable() in start_kernel(). Signed-off-by: Takao Indoh Reviewed-by: WANG Cong Acked-by: Neil Horman Acked-by: Vivek Goyal Acked-by: Peter Zijlstra Cc: Milton Miller Cc: Jens Axboe Cc: Paul E. McKenney Cc: kexec@lists.infradead.org Link: http://lkml.kernel.org/r/D6CBEE2F420741indou.takao@jp.fujitsu.com Signed-off-by: Ingo Molnar --- include/linux/smp.h | 5 ++++- init/main.c | 1 + kernel/smp.c | 5 +---- 3 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/smp.h b/include/linux/smp.h index 7ad824d510a..8cc38d3bab0 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -85,12 +85,15 @@ int smp_call_function_any(const struct cpumask *mask, * Generic and arch helpers */ #ifdef CONFIG_USE_GENERIC_SMP_HELPERS +void __init call_function_init(void); void generic_smp_call_function_single_interrupt(void); void generic_smp_call_function_interrupt(void); void ipi_call_lock(void); void ipi_call_unlock(void); void ipi_call_lock_irq(void); void ipi_call_unlock_irq(void); +#else +static inline void call_function_init(void) { } #endif /* @@ -134,7 +137,7 @@ static inline void smp_send_reschedule(int cpu) { } #define smp_prepare_boot_cpu() do {} while (0) #define smp_call_function_many(mask, func, info, wait) \ (up_smp_call_function(func, info)) -static inline void init_call_single_data(void) { } +static inline void call_function_init(void) { } static inline int smp_call_function_any(const struct cpumask *mask, smp_call_func_t func, diff --git a/init/main.c b/init/main.c index cafba67c13b..d7211faed2a 100644 --- a/init/main.c +++ b/init/main.c @@ -542,6 +542,7 @@ asmlinkage void __init start_kernel(void) timekeeping_init(); time_init(); profile_init(); + call_function_init(); if (!irqs_disabled()) printk(KERN_CRIT "start_kernel(): bug: interrupts were " "enabled early\n"); diff --git a/kernel/smp.c b/kernel/smp.c index 73a19519355..fb67dfa8394 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -74,7 +74,7 @@ static struct notifier_block __cpuinitdata hotplug_cfd_notifier = { .notifier_call = hotplug_cfd, }; -static int __cpuinit init_call_single_data(void) +void __init call_function_init(void) { void *cpu = (void *)(long)smp_processor_id(); int i; @@ -88,10 +88,7 @@ static int __cpuinit init_call_single_data(void) hotplug_cfd(&hotplug_cfd_notifier, CPU_UP_PREPARE, cpu); register_cpu_notifier(&hotplug_cfd_notifier); - - return 0; } -early_initcall(init_call_single_data); /* * csd_lock/csd_unlock used to serialize access to per-cpu csd resources -- cgit v1.2.3-70-g09d2 From 879669961b11e7f40b518784863a259f735a72bf Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 17 Jun 2011 11:25:59 +0100 Subject: KEYS/DNS: Fix ____call_usermodehelper() to not lose the session keyring ____call_usermodehelper() now erases any credentials set by the subprocess_inf::init() function. The problem is that commit 17f60a7da150 ("capabilites: allow the application of capability limits to usermode helpers") creates and commits new credentials with prepare_kernel_cred() after the call to the init() function. This wipes all keyrings after umh_keys_init() is called. The best way to deal with this is to put the init() call just prior to the commit_creds() call, and pass the cred pointer to init(). That means that umh_keys_init() and suchlike can modify the credentials _before_ they are published and potentially in use by the rest of the system. This prevents request_key() from working as it is prevented from passing the session keyring it set up with the authorisation token to /sbin/request-key, and so the latter can't assume the authority to instantiate the key. This causes the in-kernel DNS resolver to fail with ENOKEY unconditionally. Signed-off-by: David Howells Acked-by: Eric Paris Tested-by: Jeff Layton Signed-off-by: Linus Torvalds --- fs/exec.c | 2 +- include/linux/kmod.h | 8 ++++---- kernel/kmod.c | 16 +++++++++------- security/keys/request_key.c | 3 +-- 4 files changed, 15 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/fs/exec.c b/fs/exec.c index 97e0d52d72f..6075a1e727a 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1996,7 +1996,7 @@ static void wait_for_dump_helpers(struct file *file) * is a special value that we use to trap recursive * core dumps */ -static int umh_pipe_setup(struct subprocess_info *info) +static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) { struct file *rp, *wp; struct fdtable *fdt; diff --git a/include/linux/kmod.h b/include/linux/kmod.h index d4a5c84c503..0da38cf7db7 100644 --- a/include/linux/kmod.h +++ b/include/linux/kmod.h @@ -45,7 +45,7 @@ static inline int request_module_nowait(const char *name, ...) { return -ENOSYS; #endif -struct key; +struct cred; struct file; enum umh_wait { @@ -62,7 +62,7 @@ struct subprocess_info { char **envp; enum umh_wait wait; int retval; - int (*init)(struct subprocess_info *info); + int (*init)(struct subprocess_info *info, struct cred *new); void (*cleanup)(struct subprocess_info *info); void *data; }; @@ -73,7 +73,7 @@ struct subprocess_info *call_usermodehelper_setup(char *path, char **argv, /* Set various pieces of state into the subprocess_info structure */ void call_usermodehelper_setfns(struct subprocess_info *info, - int (*init)(struct subprocess_info *info), + int (*init)(struct subprocess_info *info, struct cred *new), void (*cleanup)(struct subprocess_info *info), void *data); @@ -87,7 +87,7 @@ void call_usermodehelper_freeinfo(struct subprocess_info *info); static inline int call_usermodehelper_fns(char *path, char **argv, char **envp, enum umh_wait wait, - int (*init)(struct subprocess_info *info), + int (*init)(struct subprocess_info *info, struct cred *new), void (*cleanup)(struct subprocess_info *), void *data) { struct subprocess_info *info; diff --git a/kernel/kmod.c b/kernel/kmod.c index ad6a81c58b4..47613dfb7b2 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -156,12 +156,6 @@ static int ____call_usermodehelper(void *data) */ set_user_nice(current, 0); - if (sub_info->init) { - retval = sub_info->init(sub_info); - if (retval) - goto fail; - } - retval = -ENOMEM; new = prepare_kernel_cred(current); if (!new) @@ -173,6 +167,14 @@ static int ____call_usermodehelper(void *data) new->cap_inheritable); spin_unlock(&umh_sysctl_lock); + if (sub_info->init) { + retval = sub_info->init(sub_info, new); + if (retval) { + abort_creds(new); + goto fail; + } + } + commit_creds(new); retval = kernel_execve(sub_info->path, @@ -388,7 +390,7 @@ EXPORT_SYMBOL(call_usermodehelper_setup); * context in which call_usermodehelper_exec is called. */ void call_usermodehelper_setfns(struct subprocess_info *info, - int (*init)(struct subprocess_info *info), + int (*init)(struct subprocess_info *info, struct cred *new), void (*cleanup)(struct subprocess_info *info), void *data) { diff --git a/security/keys/request_key.c b/security/keys/request_key.c index d31862e0aa1..8e319a416ee 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -71,9 +71,8 @@ EXPORT_SYMBOL(complete_request_key); * This is called in context of freshly forked kthread before kernel_execve(), * so we can simply install the desired session_keyring at this point. */ -static int umh_keys_init(struct subprocess_info *info) +static int umh_keys_init(struct subprocess_info *info, struct cred *cred) { - struct cred *cred = (struct cred*)current_cred(); struct key *keyring = info->data; return install_session_keyring_to_cred(cred, keyring); -- cgit v1.2.3-70-g09d2 From cca23d0b5350c9ca0473625c3f5879422ba534a6 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Sat, 18 Jun 2011 02:51:52 -0700 Subject: Input: sh_keysc - 8x8 MODE_6 fix According to the data sheet for G4, AP4 and AG5 KEYSC MODE_6 is 8x8 keys. Bump up MAXKEYS to 64 too. Signed-off-by: Magnus Damm Reviewed-by: Simon Horman Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/sh_keysc.c | 2 +- include/linux/input/sh_keysc.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/input/keyboard/sh_keysc.c b/drivers/input/keyboard/sh_keysc.c index 834cf98e7ef..6876700a446 100644 --- a/drivers/input/keyboard/sh_keysc.c +++ b/drivers/input/keyboard/sh_keysc.c @@ -32,7 +32,7 @@ static const struct { [SH_KEYSC_MODE_3] = { 2, 4, 7 }, [SH_KEYSC_MODE_4] = { 3, 6, 6 }, [SH_KEYSC_MODE_5] = { 4, 6, 7 }, - [SH_KEYSC_MODE_6] = { 5, 7, 7 }, + [SH_KEYSC_MODE_6] = { 5, 8, 8 }, }; struct sh_keysc_priv { diff --git a/include/linux/input/sh_keysc.h b/include/linux/input/sh_keysc.h index 649dc7f1292..5d253cd9369 100644 --- a/include/linux/input/sh_keysc.h +++ b/include/linux/input/sh_keysc.h @@ -1,7 +1,7 @@ #ifndef __SH_KEYSC_H__ #define __SH_KEYSC_H__ -#define SH_KEYSC_MAXKEYS 49 +#define SH_KEYSC_MAXKEYS 64 struct sh_keysc_info { enum { SH_KEYSC_MODE_1, SH_KEYSC_MODE_2, SH_KEYSC_MODE_3, -- cgit v1.2.3-70-g09d2 From be98ca652faa6468916a9b7608befff215a8ca70 Mon Sep 17 00:00:00 2001 From: Manoj Iyer Date: Thu, 26 May 2011 11:19:05 -0500 Subject: mmc: Add PCI fixup quirks for Ricoh 1180:e823 reader Signed-off-by: Manoj Iyer Cc: Signed-off-by: Chris Ball --- drivers/pci/quirks.c | 2 ++ include/linux/pci_ids.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include') diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index e8a140669f9..02145e9697a 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -2761,6 +2761,8 @@ static void ricoh_mmc_fixup_r5c832(struct pci_dev *dev) } DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5C832, ricoh_mmc_fixup_r5c832); DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5C832, ricoh_mmc_fixup_r5c832); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5CE823, ricoh_mmc_fixup_r5c832); +DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5CE823, ricoh_mmc_fixup_r5c832); #endif /*CONFIG_MMC_RICOH_MMC*/ #if defined(CONFIG_DMAR) || defined(CONFIG_INTR_REMAP) diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index a311008af5e..f8910e15556 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1537,6 +1537,7 @@ #define PCI_DEVICE_ID_RICOH_RL5C476 0x0476 #define PCI_DEVICE_ID_RICOH_RL5C478 0x0478 #define PCI_DEVICE_ID_RICOH_R5C822 0x0822 +#define PCI_DEVICE_ID_RICOH_R5CE823 0xe823 #define PCI_DEVICE_ID_RICOH_R5C832 0x0832 #define PCI_DEVICE_ID_RICOH_R5C843 0x0843 -- cgit v1.2.3-70-g09d2 From 155d109b5f52ffd749219b27702462dcd9cf4f8d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 20 Jun 2011 13:23:14 +0200 Subject: block: add REQ_SECURE to REQ_COMMON_MASK Add REQ_SECURE flag to REQ_COMMON_MASK so that init_request_from_bio() can pass it to @req->cmd_flags. Signed-off-by: Namhyung Kim Acked-by: Adrian Hunter Cc: stable@kernel.org # 2.6.36 and newer Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 2a7cea53ca0..6395692b2e7 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -167,7 +167,7 @@ enum rq_flag_bits { (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) #define REQ_COMMON_MASK \ (REQ_WRITE | REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_DISCARD | \ - REQ_NOIDLE | REQ_FLUSH | REQ_FUA) + REQ_NOIDLE | REQ_FLUSH | REQ_FUA | REQ_SECURE) #define REQ_CLONE_MASK REQ_COMMON_MASK #define REQ_RAHEAD (1 << __REQ_RAHEAD) -- cgit v1.2.3-70-g09d2 From 482e0cd3dbaa70f2a2bead4b5f2c0d203ef654ba Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 19 Jun 2011 13:01:04 -0400 Subject: devcgroup_inode_permission: take "is it a device node" checks to inlined wrapper inode_permission() calls devcgroup_inode_permission() and almost all such calls are _not_ for device nodes; let's at least keep the common path straight... Signed-off-by: Al Viro --- include/linux/device_cgroup.h | 10 +++++++++- security/device_cgroup.c | 8 +------- 2 files changed, 10 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/device_cgroup.h b/include/linux/device_cgroup.h index 0b0d9c39ed6..7aad1f44086 100644 --- a/include/linux/device_cgroup.h +++ b/include/linux/device_cgroup.h @@ -2,8 +2,16 @@ #include #ifdef CONFIG_CGROUP_DEVICE -extern int devcgroup_inode_permission(struct inode *inode, int mask); +extern int __devcgroup_inode_permission(struct inode *inode, int mask); extern int devcgroup_inode_mknod(int mode, dev_t dev); +static inline int devcgroup_inode_permission(struct inode *inode, int mask) +{ + if (likely(!inode->i_rdev)) + return 0; + if (!S_ISBLK(inode->i_mode) && !S_ISCHR(inode->i_mode)) + return 0; + return __devcgroup_inode_permission(inode, mask); +} #else static inline int devcgroup_inode_permission(struct inode *inode, int mask) { return 0; } diff --git a/security/device_cgroup.c b/security/device_cgroup.c index cd1f779fa51..1be68269e1c 100644 --- a/security/device_cgroup.c +++ b/security/device_cgroup.c @@ -474,17 +474,11 @@ struct cgroup_subsys devices_subsys = { .subsys_id = devices_subsys_id, }; -int devcgroup_inode_permission(struct inode *inode, int mask) +int __devcgroup_inode_permission(struct inode *inode, int mask) { struct dev_cgroup *dev_cgroup; struct dev_whitelist_item *wh; - dev_t device = inode->i_rdev; - if (!device) - return 0; - if (!S_ISBLK(inode->i_mode) && !S_ISCHR(inode->i_mode)) - return 0; - rcu_read_lock(); dev_cgroup = task_devcgroup(current); -- cgit v1.2.3-70-g09d2 From 19345cb299e8234006c5125151ab723e851a1d24 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Sun, 19 Jun 2011 18:33:46 -0400 Subject: NFSv4.1: file layout must consider pg_bsize for coalescing Otherwise we end up overflowing the rpc buffer size on the receive end. Signed-off-by: Benny Halevy Signed-off-by: Trond Myklebust --- fs/nfs/nfs4filelayout.c | 6 ++++-- fs/nfs/pagelist.c | 3 ++- include/linux/nfs_page.h | 3 +++ 3 files changed, 9 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 5d6f369b15d..0bafcc91c27 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -30,6 +30,7 @@ */ #include +#include #include "internal.h" #include "nfs4filelayout.h" @@ -666,8 +667,9 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, u64 p_stripe, r_stripe; u32 stripe_unit; - if (!pnfs_generic_pg_test(pgio, prev, req)) - return 0; + if (!pnfs_generic_pg_test(pgio, prev, req) || + !nfs_generic_pg_test(pgio, prev, req)) + return false; if (!pgio->pg_lseg) return 1; diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 7913961aff2..00985571628 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -204,7 +204,7 @@ nfs_wait_on_request(struct nfs_page *req) TASK_UNINTERRUPTIBLE); } -static bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req) +bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req) { /* * FIXME: ideally we should be able to coalesce all requests @@ -218,6 +218,7 @@ static bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_p return desc->pg_count + req->wb_bytes <= desc->pg_bsize; } +EXPORT_SYMBOL_GPL(nfs_generic_pg_test); /** * nfs_pageio_init - initialise a page io descriptor diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 3a34e80ae92..25311b3bedf 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -92,6 +92,9 @@ extern int nfs_pageio_add_request(struct nfs_pageio_descriptor *, struct nfs_page *); extern void nfs_pageio_complete(struct nfs_pageio_descriptor *desc); extern void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *, pgoff_t); +extern bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, + struct nfs_page *prev, + struct nfs_page *req); extern int nfs_wait_on_request(struct nfs_page *); extern void nfs_unlock_request(struct nfs_page *req); extern int nfs_set_page_tag_locked(struct nfs_page *req); -- cgit v1.2.3-70-g09d2 From 79568f5be06c91071697c065f01f3ebfbeb25a61 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 20 Jun 2011 20:13:49 -0700 Subject: vfs: i_state needs to be 'unsigned long' for now Commit 13e12d14e2dc ("vfs: reorganize 'struct inode' layout a bit") moved things around a bit changed i_state to be unsigned int instead of unsigned long. That was to help structure layout for the 64-bit case, and shrink 'struct inode' a bit (admittedly that only happened when spinlock debugging was on and i_flags didn't pack with i_lock). However, Meelis Roos reports that this results in unaligned exceptions on sprc, and it turns out that the bit-locking primitives that we use for the I_NEW bit want to use the bitops. Which want 'unsigned long', not 'unsigned int'. We really should fix the bit locking code to not have that kind of requirement, but that's a much bigger change. So for now, revert that field back to 'unsigned long' (but keep the other re-ordering changes from the commit that caused this). Andi points out that we have played games with this in 'struct page', so it's solvable with other hacks too, but since right now the struct inode size advantage only happens with some rare config options, it's not worth fighting. It _would_ be worth fixing the bitlocking code, though. Especially since there is no type safety in the bitlocking code (this never caused any warnings, and worked fine on x86-64, because the bitlocks take a 'void *' and x86-64 doesn't care that deeply about alignment). So it's currently a very easy problem to trigger by mistake and never notice. Reported-by: Meelis Roos Cc: Andi Kleen Cc: David Miller Signed-off-by: Linus Torvalds --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 1c777878f1e..6e73e2e9ae3 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -744,7 +744,7 @@ struct inode { spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ unsigned int i_flags; - unsigned int i_state; + unsigned long i_state; #ifdef CONFIG_SECURITY void *i_security; #endif -- cgit v1.2.3-70-g09d2 From f76b168b6f117a49d36307053e1acbe30580ea5b Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Sat, 18 Jun 2011 20:22:23 +0200 Subject: PM: Rename dev_pm_info.in_suspend to is_prepared This patch (as1473) renames the "in_suspend" field in struct dev_pm_info to "is_prepared", in preparation for an upcoming change. The new name is more descriptive of what the field really means. Signed-off-by: Alan Stern Signed-off-by: Rafael J. Wysocki Cc: stable@kernel.org --- drivers/base/power/main.c | 14 +++++++++----- drivers/usb/core/driver.c | 6 +++--- include/linux/device.h | 4 ++-- include/linux/pm.h | 2 +- 4 files changed, 15 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index aa632020774..bf5a59ac195 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -57,7 +57,7 @@ static int async_error; */ void device_pm_init(struct device *dev) { - dev->power.in_suspend = false; + dev->power.is_prepared = false; init_completion(&dev->power.completion); complete_all(&dev->power.completion); dev->power.wakeup = NULL; @@ -91,7 +91,7 @@ void device_pm_add(struct device *dev) pr_debug("PM: Adding info for %s:%s\n", dev->bus ? dev->bus->name : "No Bus", dev_name(dev)); mutex_lock(&dpm_list_mtx); - if (dev->parent && dev->parent->power.in_suspend) + if (dev->parent && dev->parent->power.is_prepared) dev_warn(dev, "parent %s should not be sleeping\n", dev_name(dev->parent)); list_add_tail(&dev->power.entry, &dpm_list); @@ -511,7 +511,11 @@ static int device_resume(struct device *dev, pm_message_t state, bool async) dpm_wait(dev->parent, async); device_lock(dev); - dev->power.in_suspend = false; + /* + * This is a fib. But we'll allow new children to be added below + * a resumed device, even if the device hasn't been completed yet. + */ + dev->power.is_prepared = false; if (dev->pwr_domain) { pm_dev_dbg(dev, state, "power domain "); @@ -670,7 +674,7 @@ void dpm_complete(pm_message_t state) struct device *dev = to_device(dpm_prepared_list.prev); get_device(dev); - dev->power.in_suspend = false; + dev->power.is_prepared = false; list_move(&dev->power.entry, &list); mutex_unlock(&dpm_list_mtx); @@ -1042,7 +1046,7 @@ int dpm_prepare(pm_message_t state) put_device(dev); break; } - dev->power.in_suspend = true; + dev->power.is_prepared = true; if (!list_empty(&dev->power.entry)) list_move_tail(&dev->power.entry, &dpm_prepared_list); put_device(dev); diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c index e35a17687c0..aa3cc465a60 100644 --- a/drivers/usb/core/driver.c +++ b/drivers/usb/core/driver.c @@ -375,7 +375,7 @@ static int usb_unbind_interface(struct device *dev) * Just re-enable it without affecting the endpoint toggles. */ usb_enable_interface(udev, intf, false); - } else if (!error && !intf->dev.power.in_suspend) { + } else if (!error && !intf->dev.power.is_prepared) { r = usb_set_interface(udev, intf->altsetting[0]. desc.bInterfaceNumber, 0); if (r < 0) @@ -960,7 +960,7 @@ void usb_rebind_intf(struct usb_interface *intf) } /* Try to rebind the interface */ - if (!intf->dev.power.in_suspend) { + if (!intf->dev.power.is_prepared) { intf->needs_binding = 0; rc = device_attach(&intf->dev); if (rc < 0) @@ -1107,7 +1107,7 @@ static int usb_resume_interface(struct usb_device *udev, if (intf->condition == USB_INTERFACE_UNBOUND) { /* Carry out a deferred switch to altsetting 0 */ - if (intf->needs_altsetting0 && !intf->dev.power.in_suspend) { + if (intf->needs_altsetting0 && !intf->dev.power.is_prepared) { usb_set_interface(udev, intf->altsetting[0]. desc.bInterfaceNumber, 0); intf->needs_altsetting0 = 0; diff --git a/include/linux/device.h b/include/linux/device.h index c66111affca..553fd37b173 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -654,13 +654,13 @@ static inline int device_is_registered(struct device *dev) static inline void device_enable_async_suspend(struct device *dev) { - if (!dev->power.in_suspend) + if (!dev->power.is_prepared) dev->power.async_suspend = true; } static inline void device_disable_async_suspend(struct device *dev) { - if (!dev->power.in_suspend) + if (!dev->power.is_prepared) dev->power.async_suspend = false; } diff --git a/include/linux/pm.h b/include/linux/pm.h index 3160648ccdd..cc536bd8098 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -425,7 +425,7 @@ struct dev_pm_info { pm_message_t power_state; unsigned int can_wakeup:1; unsigned int async_suspend:1; - unsigned int in_suspend:1; /* Owned by the PM core */ + bool is_prepared:1; /* Owned by the PM core */ spinlock_t lock; #ifdef CONFIG_PM_SLEEP struct list_head entry; -- cgit v1.2.3-70-g09d2 From 6d0e0e84f66d32c33511984dd3badd32364b863c Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Sat, 18 Jun 2011 22:42:09 +0200 Subject: PM: Fix async resume following suspend failure The PM core doesn't handle suspend failures correctly when it comes to asynchronously suspended devices. These devices are moved onto the dpm_suspended_list as soon as the corresponding async thread is started up, and they remain on the list even if they fail to suspend or the sleep transition is cancelled before they get suspended. As a result, when the PM core unwinds the transition, it tries to resume the devices even though they were never suspended. This patch (as1474) fixes the problem by adding a new "is_suspended" flag to dev_pm_info. Devices are resumed only if the flag is set. [rjw: * Moved the dev->power.is_suspended check into device_resume(), because we need to complete dev->power.completion and clear dev->power.is_prepared too for devices whose dev->power.is_suspended flags are unset. * Fixed __device_suspend() to avoid setting dev->power.is_suspended if async_error is different from zero.] Signed-off-by: Alan Stern Signed-off-by: Rafael J. Wysocki Cc: stable@kernel.org --- drivers/base/power/main.c | 14 ++++++++++++-- include/linux/pm.h | 1 + 2 files changed, 13 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index bf5a59ac195..06f09bf89cb 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -58,6 +58,7 @@ static int async_error; void device_pm_init(struct device *dev) { dev->power.is_prepared = false; + dev->power.is_suspended = false; init_completion(&dev->power.completion); complete_all(&dev->power.completion); dev->power.wakeup = NULL; @@ -517,6 +518,9 @@ static int device_resume(struct device *dev, pm_message_t state, bool async) */ dev->power.is_prepared = false; + if (!dev->power.is_suspended) + goto Unlock; + if (dev->pwr_domain) { pm_dev_dbg(dev, state, "power domain "); error = pm_op(dev, &dev->pwr_domain->ops, state); @@ -552,6 +556,9 @@ static int device_resume(struct device *dev, pm_message_t state, bool async) } End: + dev->power.is_suspended = false; + + Unlock: device_unlock(dev); complete_all(&dev->power.completion); @@ -839,11 +846,11 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) device_lock(dev); if (async_error) - goto End; + goto Unlock; if (pm_wakeup_pending()) { async_error = -EBUSY; - goto End; + goto Unlock; } if (dev->pwr_domain) { @@ -881,6 +888,9 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) } End: + dev->power.is_suspended = !error; + + Unlock: device_unlock(dev); complete_all(&dev->power.completion); diff --git a/include/linux/pm.h b/include/linux/pm.h index cc536bd8098..411e4f4be52 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -426,6 +426,7 @@ struct dev_pm_info { unsigned int can_wakeup:1; unsigned int async_suspend:1; bool is_prepared:1; /* Owned by the PM core */ + bool is_suspended:1; /* Ditto */ spinlock_t lock; #ifdef CONFIG_PM_SLEEP struct list_head entry; -- cgit v1.2.3-70-g09d2 From 39785eb1d3e6c58cc8bf8f6990956a58037ecc75 Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Thu, 23 Jun 2011 20:20:26 +0000 Subject: fsl-diu-fb: remove check for pixel clock ranges The Freescale DIU framebuffer driver defines two constants, MIN_PIX_CLK and MAX_PIX_CLK, that are supposed to represent the lower and upper limits of the pixel clock. These values, however, are true only for one platform clock rate (533MHz) and only for the MPC8610. So the actual range for the pixel clock is chip-specific, which means the current values are almost always wrong. The chance of an out-of-range pixel clock being used are also remote. Rather than try to detect an out-of-range clock in the DIU driver, we depend on the board-specific pixel clock function (e.g. p1022ds_set_pixel_clock) to clamp the pixel clock to a supported value. Signed-off-by: Timur Tabi Signed-off-by: Paul Mundt --- drivers/video/fsl-diu-fb.c | 16 ---------------- include/linux/fsl-diu-fb.h | 6 ------ 2 files changed, 22 deletions(-) (limited to 'include') diff --git a/drivers/video/fsl-diu-fb.c b/drivers/video/fsl-diu-fb.c index bedf5be27f0..0acc7d65aea 100644 --- a/drivers/video/fsl-diu-fb.c +++ b/drivers/video/fsl-diu-fb.c @@ -555,8 +555,6 @@ static void adjust_aoi_size_position(struct fb_var_screeninfo *var, static int fsl_diu_check_var(struct fb_var_screeninfo *var, struct fb_info *info) { - unsigned long htotal, vtotal; - pr_debug("check_var xres: %d\n", var->xres); pr_debug("check_var yres: %d\n", var->yres); @@ -635,20 +633,6 @@ static int fsl_diu_check_var(struct fb_var_screeninfo *var, break; } - /* If the pixclock is below the minimum spec'd value then set to - * refresh rate for 60Hz since this is supported by most monitors. - * Refer to Documentation/fb/ for calculations. - */ - if ((var->pixclock < MIN_PIX_CLK) || (var->pixclock > MAX_PIX_CLK)) { - htotal = var->xres + var->right_margin + var->hsync_len + - var->left_margin; - vtotal = var->yres + var->lower_margin + var->vsync_len + - var->upper_margin; - var->pixclock = (vtotal * htotal * 6UL) / 100UL; - var->pixclock = KHZ2PICOS(var->pixclock); - pr_debug("pixclock set for 60Hz refresh = %u ps\n", - var->pixclock); - } var->height = -1; var->width = -1; diff --git a/include/linux/fsl-diu-fb.h b/include/linux/fsl-diu-fb.h index 781d4671415..daa9952d217 100644 --- a/include/linux/fsl-diu-fb.h +++ b/include/linux/fsl-diu-fb.h @@ -24,12 +24,6 @@ * See mpc8610fb_set_par(), map_video_memory(), and unmap_video_memory() */ #define MEM_ALLOC_THRESHOLD (1024*768*4+32) -/* Minimum value that the pixel clock can be set to in pico seconds - * This is determined by platform clock/3 where the minimum platform - * clock is 533MHz. This gives 5629 pico seconds. - */ -#define MIN_PIX_CLK 5629 -#define MAX_PIX_CLK 96096 #include -- cgit v1.2.3-70-g09d2 From a66b98db570a638afd909459e1e6bfa272344bd3 Mon Sep 17 00:00:00 2001 From: Arik Nemtsov Date: Thu, 23 Jun 2011 00:00:24 +0300 Subject: mac80211: fix rx->key NULL dereference during mic failure Sometimes when reporting a MIC failure rx->key may be unset. This code path is hit when receiving a packet meant for a multicast address, and decryption is performed in HW. Fortunately, the failing key_idx is not used for anything up to (and including) usermode, so we allow ourselves to drop it on the way up when a key cannot be retrieved. Signed-off-by: Arik Nemtsov Cc: stable@kernel.org Signed-off-by: John W. Linville --- include/net/cfg80211.h | 2 +- net/mac80211/wpa.c | 8 +++++++- net/wireless/nl80211.c | 3 ++- 3 files changed, 10 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 0589f554788..396e8fc8910 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2688,7 +2688,7 @@ void cfg80211_send_unprot_disassoc(struct net_device *dev, const u8 *buf, * @dev: network device * @addr: The source MAC address of the frame * @key_type: The key type that the received frame used - * @key_id: Key identifier (0..3) + * @key_id: Key identifier (0..3). Can be -1 if missing. * @tsc: The TSC value of the frame that generated the MIC failure (6 octets) * @gfp: allocation flags * diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 9dc3b5f26e8..d91c1a26630 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -154,7 +154,13 @@ update_iv: return RX_CONTINUE; mic_fail: - mac80211_ev_michael_mic_failure(rx->sdata, rx->key->conf.keyidx, + /* + * In some cases the key can be unset - e.g. a multicast packet, in + * a driver that supports HW encryption. Send up the key idx only if + * the key is set. + */ + mac80211_ev_michael_mic_failure(rx->sdata, + rx->key ? rx->key->conf.keyidx : -1, (void *) skb->data, NULL, GFP_ATOMIC); return RX_DROP_UNUSABLE; } diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 98fa8eb6cc4..f07602d7bf6 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -6463,7 +6463,8 @@ void nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev, if (addr) NLA_PUT(msg, NL80211_ATTR_MAC, ETH_ALEN, addr); NLA_PUT_U32(msg, NL80211_ATTR_KEY_TYPE, key_type); - NLA_PUT_U8(msg, NL80211_ATTR_KEY_IDX, key_id); + if (key_id != -1) + NLA_PUT_U8(msg, NL80211_ATTR_KEY_IDX, key_id); if (tsc) NLA_PUT(msg, NL80211_ATTR_KEY_SEQ, 6, tsc); -- cgit v1.2.3-70-g09d2 From c6830c22603aaecf65405af23f6da2d55892f9cb Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Thu, 16 Jun 2011 17:28:07 +0900 Subject: Fix node_start/end_pfn() definition for mm/page_cgroup.c commit 21a3c96 uses node_start/end_pfn(nid) for detection start/end of nodes. But, it's not defined in linux/mmzone.h but defined in /arch/???/include/mmzone.h which is included only under CONFIG_NEED_MULTIPLE_NODES=y. Then, we see mm/page_cgroup.c: In function 'page_cgroup_init': mm/page_cgroup.c:308: error: implicit declaration of function 'node_start_pfn' mm/page_cgroup.c:309: error: implicit declaration of function 'node_end_pfn' So, fixiing page_cgroup.c is an idea... But node_start_pfn()/node_end_pfn() is a very generic macro and should be implemented in the same manner for all archs. (m32r has different implementation...) This patch removes definitions of node_start/end_pfn() in each archs and defines a unified one in linux/mmzone.h. It's not under CONFIG_NEED_MULTIPLE_NODES, now. A result of macro expansion is here (mm/page_cgroup.c) for !NUMA start_pfn = ((&contig_page_data)->node_start_pfn); end_pfn = ({ pg_data_t *__pgdat = (&contig_page_data); __pgdat->node_start_pfn + __pgdat->node_spanned_pages;}); for NUMA (x86-64) start_pfn = ((node_data[nid])->node_start_pfn); end_pfn = ({ pg_data_t *__pgdat = (node_data[nid]); __pgdat->node_start_pfn + __pgdat->node_spanned_pages;}); Changelog: - fixed to avoid using "nid" twice in node_end_pfn() macro. Reported-and-acked-by: Randy Dunlap Reported-and-tested-by: Ingo Molnar Acked-by: Mel Gorman Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/mmzone.h | 1 - arch/m32r/include/asm/mmzone.h | 8 +------- arch/parisc/include/asm/mmzone.h | 7 ------- arch/powerpc/include/asm/mmzone.h | 7 ------- arch/sh/include/asm/mmzone.h | 4 ---- arch/sparc/include/asm/mmzone.h | 2 -- arch/tile/include/asm/mmzone.h | 11 ----------- arch/x86/include/asm/mmzone_32.h | 11 ----------- arch/x86/include/asm/mmzone_64.h | 3 --- include/linux/mmzone.h | 7 +++++++ 10 files changed, 8 insertions(+), 53 deletions(-) (limited to 'include') diff --git a/arch/alpha/include/asm/mmzone.h b/arch/alpha/include/asm/mmzone.h index 8af56ce346a..445dc42e033 100644 --- a/arch/alpha/include/asm/mmzone.h +++ b/arch/alpha/include/asm/mmzone.h @@ -56,7 +56,6 @@ PLAT_NODE_DATA_LOCALNR(unsigned long p, int n) * Given a kernel address, find the home node of the underlying memory. */ #define kvaddr_to_nid(kaddr) pa_to_nid(__pa(kaddr)) -#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) /* * Given a kaddr, LOCAL_BASE_ADDR finds the owning node of the memory diff --git a/arch/m32r/include/asm/mmzone.h b/arch/m32r/include/asm/mmzone.h index 9f3b5accda8..115ced33feb 100644 --- a/arch/m32r/include/asm/mmzone.h +++ b/arch/m32r/include/asm/mmzone.h @@ -14,12 +14,6 @@ extern struct pglist_data *node_data[]; #define NODE_DATA(nid) (node_data[nid]) #define node_localnr(pfn, nid) ((pfn) - NODE_DATA(nid)->node_start_pfn) -#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) -#define node_end_pfn(nid) \ -({ \ - pg_data_t *__pgdat = NODE_DATA(nid); \ - __pgdat->node_start_pfn + __pgdat->node_spanned_pages - 1; \ -}) #define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT)) /* @@ -44,7 +38,7 @@ static __inline__ int pfn_to_nid(unsigned long pfn) int node; for (node = 0 ; node < MAX_NUMNODES ; node++) - if (pfn >= node_start_pfn(node) && pfn <= node_end_pfn(node)) + if (pfn >= node_start_pfn(node) && pfn < node_end_pfn(node)) break; return node; diff --git a/arch/parisc/include/asm/mmzone.h b/arch/parisc/include/asm/mmzone.h index 9608d2cf214..e67eb9c3d1b 100644 --- a/arch/parisc/include/asm/mmzone.h +++ b/arch/parisc/include/asm/mmzone.h @@ -14,13 +14,6 @@ extern struct node_map_data node_data[]; #define NODE_DATA(nid) (&node_data[nid].pg_data) -#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) -#define node_end_pfn(nid) \ -({ \ - pg_data_t *__pgdat = NODE_DATA(nid); \ - __pgdat->node_start_pfn + __pgdat->node_spanned_pages; \ -}) - /* We have these possible memory map layouts: * Astro: 0-3.75, 67.75-68, 4-64 * zx1: 0-1, 257-260, 4-256 diff --git a/arch/powerpc/include/asm/mmzone.h b/arch/powerpc/include/asm/mmzone.h index fd3fd58bad8..7b589178be4 100644 --- a/arch/powerpc/include/asm/mmzone.h +++ b/arch/powerpc/include/asm/mmzone.h @@ -38,13 +38,6 @@ u64 memory_hotplug_max(void); #define memory_hotplug_max() memblock_end_of_DRAM() #endif -/* - * Following are macros that each numa implmentation must define. - */ - -#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) -#define node_end_pfn(nid) (NODE_DATA(nid)->node_end_pfn) - #else #define memory_hotplug_max() memblock_end_of_DRAM() #endif /* CONFIG_NEED_MULTIPLE_NODES */ diff --git a/arch/sh/include/asm/mmzone.h b/arch/sh/include/asm/mmzone.h index 8887baff5ef..15a8496960e 100644 --- a/arch/sh/include/asm/mmzone.h +++ b/arch/sh/include/asm/mmzone.h @@ -9,10 +9,6 @@ extern struct pglist_data *node_data[]; #define NODE_DATA(nid) (node_data[nid]) -#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) -#define node_end_pfn(nid) (NODE_DATA(nid)->node_start_pfn + \ - NODE_DATA(nid)->node_spanned_pages) - static inline int pfn_to_nid(unsigned long pfn) { int nid; diff --git a/arch/sparc/include/asm/mmzone.h b/arch/sparc/include/asm/mmzone.h index e8c648741ed..99d9b9f577b 100644 --- a/arch/sparc/include/asm/mmzone.h +++ b/arch/sparc/include/asm/mmzone.h @@ -8,8 +8,6 @@ extern struct pglist_data *node_data[]; #define NODE_DATA(nid) (node_data[nid]) -#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) -#define node_end_pfn(nid) (NODE_DATA(nid)->node_end_pfn) extern int numa_cpu_lookup_table[]; extern cpumask_t numa_cpumask_lookup_table[]; diff --git a/arch/tile/include/asm/mmzone.h b/arch/tile/include/asm/mmzone.h index c6344c4f32a..9d3dbce8f95 100644 --- a/arch/tile/include/asm/mmzone.h +++ b/arch/tile/include/asm/mmzone.h @@ -40,17 +40,6 @@ static inline int pfn_to_nid(unsigned long pfn) return highbits_to_node[__pfn_to_highbits(pfn)]; } -/* - * Following are macros that each numa implmentation must define. - */ - -#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) -#define node_end_pfn(nid) \ -({ \ - pg_data_t *__pgdat = NODE_DATA(nid); \ - __pgdat->node_start_pfn + __pgdat->node_spanned_pages; \ -}) - #define kern_addr_valid(kaddr) virt_addr_valid((void *)kaddr) static inline int pfn_valid(int pfn) diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h index 5e83a416eca..224e8c5eb30 100644 --- a/arch/x86/include/asm/mmzone_32.h +++ b/arch/x86/include/asm/mmzone_32.h @@ -48,17 +48,6 @@ static inline int pfn_to_nid(unsigned long pfn) #endif } -/* - * Following are macros that each numa implmentation must define. - */ - -#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) -#define node_end_pfn(nid) \ -({ \ - pg_data_t *__pgdat = NODE_DATA(nid); \ - __pgdat->node_start_pfn + __pgdat->node_spanned_pages; \ -}) - static inline int pfn_valid(int pfn) { int nid = pfn_to_nid(pfn); diff --git a/arch/x86/include/asm/mmzone_64.h b/arch/x86/include/asm/mmzone_64.h index b3f88d7867c..129d9aa3ceb 100644 --- a/arch/x86/include/asm/mmzone_64.h +++ b/arch/x86/include/asm/mmzone_64.h @@ -13,8 +13,5 @@ extern struct pglist_data *node_data[]; #define NODE_DATA(nid) (node_data[nid]) -#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) -#define node_end_pfn(nid) (NODE_DATA(nid)->node_start_pfn + \ - NODE_DATA(nid)->node_spanned_pages) #endif #endif /* _ASM_X86_MMZONE_64_H */ diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index c928dac6cad..9f7c3ebcbba 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -647,6 +647,13 @@ typedef struct pglist_data { #endif #define nid_page_nr(nid, pagenr) pgdat_page_nr(NODE_DATA(nid),(pagenr)) +#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) + +#define node_end_pfn(nid) ({\ + pg_data_t *__pgdat = NODE_DATA(nid);\ + __pgdat->node_start_pfn + __pgdat->node_spanned_pages;\ +}) + #include extern struct mutex zonelists_mutex; -- cgit v1.2.3-70-g09d2 From 4d258b25d947521c8b913154db61ec55198243f8 Mon Sep 17 00:00:00 2001 From: Vitaliy Ivanov Date: Mon, 27 Jun 2011 19:07:08 +0300 Subject: Fix some kernel-doc warnings Fix 'make htmldocs' warnings: Warning(/include/linux/hrtimer.h:153): No description found for parameter 'clockid' Warning(/include/linux/device.h:604): Excess struct/union/enum/typedef member 'of_match' description in 'device' Warning(/include/net/sock.h:349): Excess struct/union/enum/typedef member 'sk_rmem_alloc' description in 'sock' Signed-off-by: Vitaliy Ivanov Acked-by: Grant Likely Acked-by: David S. Miller Signed-off-by: Linus Torvalds --- include/linux/device.h | 1 - include/linux/hrtimer.h | 1 + include/net/sock.h | 1 - 3 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/device.h b/include/linux/device.h index 553fd37b173..e4f62d8896b 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -530,7 +530,6 @@ struct device_dma_parameters { * @dma_mem: Internal for coherent mem override. * @archdata: For arch-specific additions. * @of_node: Associated device tree node. - * @of_match: Matching of_device_id from driver. * @devt: For creating the sysfs "dev". * @devres_lock: Spinlock to protect the resource of the device. * @devres_head: The resources list of the device. diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 51932e5acf7..fd0dc30c9f1 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -135,6 +135,7 @@ struct hrtimer_sleeper { * @cpu_base: per cpu clock base * @index: clock type index for per_cpu support when moving a * timer to a base on another cpu. + * @clockid: clock id for per_cpu support * @active: red black tree root node for the active timers * @resolution: the resolution of the clock, in nanoseconds * @get_time: function to retrieve the current time of the clock diff --git a/include/net/sock.h b/include/net/sock.h index f2046e404a6..c0b938cb4b1 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -178,7 +178,6 @@ struct sock_common { * @sk_dst_cache: destination cache * @sk_dst_lock: destination cache lock * @sk_policy: flow policy - * @sk_rmem_alloc: receive queue bytes committed * @sk_receive_queue: incoming packets * @sk_wmem_alloc: transmit queue bytes committed * @sk_write_queue: Packet sending queue -- cgit v1.2.3-70-g09d2 From 072441e21ddcd1140606b7d4ef6eab579a86b0b3 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 27 Jun 2011 16:18:02 -0700 Subject: mm: move shmem prototypes to shmem_fs.h Before adding any more global entry points into shmem.c, gather such prototypes into shmem_fs.h. Remove mm's own declarations from swap.h, but for now leave the ones in mm.h: because shmem_file_setup() and shmem_zero_setup() are called from various places, and we should not force other subsystems to update immediately. Signed-off-by: Hugh Dickins Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/shmem_fs.h | 17 +++++++++++++++++ include/linux/swap.h | 10 ---------- mm/memcontrol.c | 1 + mm/swapfile.c | 2 +- 4 files changed, 19 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 2b7fec84051..cae65dc42bc 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -5,6 +5,13 @@ #include #include +struct page; +struct file; +struct inode; +struct super_block; +struct user_struct; +struct vm_area_struct; + /* inode in-kernel data */ #define SHMEM_NR_DIRECT 16 @@ -45,7 +52,17 @@ static inline struct shmem_inode_info *SHMEM_I(struct inode *inode) return container_of(inode, struct shmem_inode_info, vfs_inode); } +/* + * Functions in mm/shmem.c called directly from elsewhere: + */ extern int init_tmpfs(void); extern int shmem_fill_super(struct super_block *sb, void *data, int silent); +extern struct file *shmem_file_setup(const char *name, + loff_t size, unsigned long flags); +extern int shmem_zero_setup(struct vm_area_struct *); +extern int shmem_lock(struct file *file, int lock, struct user_struct *user); +extern int shmem_unuse(swp_entry_t entry, struct page *page); +extern void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff, + struct page **pagep, swp_entry_t *ent); #endif diff --git a/include/linux/swap.h b/include/linux/swap.h index e7056464703..a273468f828 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -300,16 +300,6 @@ static inline void scan_unevictable_unregister_node(struct node *node) extern int kswapd_run(int nid); extern void kswapd_stop(int nid); -#ifdef CONFIG_MMU -/* linux/mm/shmem.c */ -extern int shmem_unuse(swp_entry_t entry, struct page *page); -#endif /* CONFIG_MMU */ - -#ifdef CONFIG_CGROUP_MEM_RES_CTLR -extern void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff, - struct page **pagep, swp_entry_t *ent); -#endif - #ifdef CONFIG_SWAP /* linux/mm/page_io.c */ extern int swap_readpage(struct page *); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index cf7d027a884..ddffc74cdeb 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include diff --git a/mm/swapfile.c b/mm/swapfile.c index d537d29e9b7..ff8dc1a18cb 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include -- cgit v1.2.3-70-g09d2 From 94c1e62df4494b79782cb9c7279f827212d1de70 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 27 Jun 2011 16:18:03 -0700 Subject: tmpfs: take control of its truncate_range 2.6.35's new truncate convention gave tmpfs the opportunity to control its file truncation, no longer enforced from outside by vmtruncate(). We shall want to build upon that, to handle pagecache and swap together. Slightly redefine the ->truncate_range interface: let it now be called between the unmap_mapping_range()s, with the filesystem responsible for doing the truncate_inode_pages_range() from it - just as the filesystem is nowadays responsible for doing that from its ->setattr. Let's rename shmem_notify_change() to shmem_setattr(). Instead of calling the generic truncate_setsize(), bring that code in so we can call shmem_truncate_range() - which will later be updated to perform its own variant of truncate_inode_pages_range(). Remove the punch_hole unmap_mapping_range() from shmem_truncate_range(): now that the COW's unmap_mapping_range() comes after ->truncate_range, there is no need to call it a third time. Export shmem_truncate_range() and add it to the list in shmem_fs.h, so that i915_gem_object_truncate() can call it explicitly in future; get this patch in first, then update drm/i915 once this is available (until then, i915 will just be doing the truncate_inode_pages() twice). Though introduced five years ago, no other filesystem is implementing ->truncate_range, and its only other user is madvise(,,MADV_REMOVE): we expect to convert it to fallocate(,FALLOC_FL_PUNCH_HOLE,,) shortly, whereupon ->truncate_range can be removed from inode_operations - shmem_truncate_range() will help i915 across that transition too. Signed-off-by: Hugh Dickins Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/shmem_fs.h | 1 + mm/shmem.c | 51 +++++++++++++++++++++++++++--------------------- mm/truncate.c | 4 ++-- 3 files changed, 32 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index cae65dc42bc..22a20af4d78 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -61,6 +61,7 @@ extern struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags); extern int shmem_zero_setup(struct vm_area_struct *); extern int shmem_lock(struct file *file, int lock, struct user_struct *user); +extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end); extern int shmem_unuse(swp_entry_t entry, struct page *page); extern void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff, struct page **pagep, swp_entry_t *ent); diff --git a/mm/shmem.c b/mm/shmem.c index d221a1cfd7b..f1714758ea9 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -539,7 +539,7 @@ static void shmem_free_pages(struct list_head *next) } while (next); } -static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) +void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) { struct shmem_inode_info *info = SHMEM_I(inode); unsigned long idx; @@ -562,6 +562,8 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) spinlock_t *punch_lock; unsigned long upper_limit; + truncate_inode_pages_range(inode->i_mapping, start, end); + inode->i_ctime = inode->i_mtime = CURRENT_TIME; idx = (start + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; if (idx >= info->next_index) @@ -738,16 +740,8 @@ done2: * lowered next_index. Also, though shmem_getpage checks * i_size before adding to cache, no recheck after: so fix the * narrow window there too. - * - * Recalling truncate_inode_pages_range and unmap_mapping_range - * every time for punch_hole (which never got a chance to clear - * SHMEM_PAGEIN at the start of vmtruncate_range) is expensive, - * yet hardly ever necessary: try to optimize them out later. */ truncate_inode_pages_range(inode->i_mapping, start, end); - if (punch_hole) - unmap_mapping_range(inode->i_mapping, start, - end - start, 1); } spin_lock(&info->lock); @@ -766,22 +760,23 @@ done2: shmem_free_pages(pages_to_free.next); } } +EXPORT_SYMBOL_GPL(shmem_truncate_range); -static int shmem_notify_change(struct dentry *dentry, struct iattr *attr) +static int shmem_setattr(struct dentry *dentry, struct iattr *attr) { struct inode *inode = dentry->d_inode; - loff_t newsize = attr->ia_size; int error; error = inode_change_ok(inode, attr); if (error) return error; - if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE) - && newsize != inode->i_size) { + if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { + loff_t oldsize = inode->i_size; + loff_t newsize = attr->ia_size; struct page *page = NULL; - if (newsize < inode->i_size) { + if (newsize < oldsize) { /* * If truncating down to a partial page, then * if that page is already allocated, hold it @@ -810,12 +805,19 @@ static int shmem_notify_change(struct dentry *dentry, struct iattr *attr) spin_unlock(&info->lock); } } - - /* XXX(truncate): truncate_setsize should be called last */ - truncate_setsize(inode, newsize); + if (newsize != oldsize) { + i_size_write(inode, newsize); + inode->i_ctime = inode->i_mtime = CURRENT_TIME; + } + if (newsize < oldsize) { + loff_t holebegin = round_up(newsize, PAGE_SIZE); + unmap_mapping_range(inode->i_mapping, holebegin, 0, 1); + shmem_truncate_range(inode, newsize, (loff_t)-1); + /* unmap again to remove racily COWed private pages */ + unmap_mapping_range(inode->i_mapping, holebegin, 0, 1); + } if (page) page_cache_release(page); - shmem_truncate_range(inode, newsize, (loff_t)-1); } setattr_copy(inode, attr); @@ -832,7 +834,6 @@ static void shmem_evict_inode(struct inode *inode) struct shmem_xattr *xattr, *nxattr; if (inode->i_mapping->a_ops == &shmem_aops) { - truncate_inode_pages(inode->i_mapping, 0); shmem_unacct_size(info->flags, inode->i_size); inode->i_size = 0; shmem_truncate_range(inode, 0, (loff_t)-1); @@ -2706,7 +2707,7 @@ static const struct file_operations shmem_file_operations = { }; static const struct inode_operations shmem_inode_operations = { - .setattr = shmem_notify_change, + .setattr = shmem_setattr, .truncate_range = shmem_truncate_range, #ifdef CONFIG_TMPFS_XATTR .setxattr = shmem_setxattr, @@ -2739,7 +2740,7 @@ static const struct inode_operations shmem_dir_inode_operations = { .removexattr = shmem_removexattr, #endif #ifdef CONFIG_TMPFS_POSIX_ACL - .setattr = shmem_notify_change, + .setattr = shmem_setattr, .check_acl = generic_check_acl, #endif }; @@ -2752,7 +2753,7 @@ static const struct inode_operations shmem_special_inode_operations = { .removexattr = shmem_removexattr, #endif #ifdef CONFIG_TMPFS_POSIX_ACL - .setattr = shmem_notify_change, + .setattr = shmem_setattr, .check_acl = generic_check_acl, #endif }; @@ -2908,6 +2909,12 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user) return 0; } +void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) +{ + truncate_inode_pages_range(inode->i_mapping, start, end); +} +EXPORT_SYMBOL_GPL(shmem_truncate_range); + #ifdef CONFIG_CGROUP_MEM_RES_CTLR /** * mem_cgroup_get_shmem_target - find a page or entry assigned to the shmem file diff --git a/mm/truncate.c b/mm/truncate.c index 5b4c3a4847e..29a9b8a5a31 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -619,9 +619,9 @@ int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end) mutex_lock(&inode->i_mutex); down_write(&inode->i_alloc_sem); unmap_mapping_range(mapping, offset, (end - offset), 1); - truncate_inode_pages_range(mapping, offset, end); - unmap_mapping_range(mapping, offset, (end - offset), 1); inode->i_op->truncate_range(inode, offset, end); + /* unmap again to remove racily COWed private pages */ + unmap_mapping_range(mapping, offset, (end - offset), 1); up_write(&inode->i_alloc_sem); mutex_unlock(&inode->i_mutex); -- cgit v1.2.3-70-g09d2 From d9d90e5eb70e09903dadff42099b6c948f814050 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 27 Jun 2011 16:18:04 -0700 Subject: tmpfs: add shmem_read_mapping_page_gfp Although it is used (by i915) on nothing but tmpfs, read_cache_page_gfp() is unsuited to tmpfs, because it inserts a page into pagecache before calling the filesystem's ->readpage: tmpfs may have pages in swapcache which only it knows how to locate and switch to filecache. At present tmpfs provides a ->readpage method, and copes with this by copying pages; but soon we can simplify it by removing its ->readpage. Provide shmem_read_mapping_page_gfp() now, ready for that transition, Export shmem_read_mapping_page_gfp() and add it to list in shmem_fs.h, with shmem_read_mapping_page() inline for the common mapping_gfp case. (shmem_read_mapping_page_gfp or shmem_read_cache_page_gfp? Generally the read_mapping_page functions use the mapping's ->readpage, and the read_cache_page functions use the supplied filler, so I think read_cache_page_gfp was slightly misnamed.) Signed-off-by: Hugh Dickins Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/shmem_fs.h | 17 ++++++++++------- mm/shmem.c | 23 +++++++++++++++++++++++ 2 files changed, 33 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 22a20af4d78..aa08fa8fd79 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -3,15 +3,9 @@ #include #include +#include #include -struct page; -struct file; -struct inode; -struct super_block; -struct user_struct; -struct vm_area_struct; - /* inode in-kernel data */ #define SHMEM_NR_DIRECT 16 @@ -61,9 +55,18 @@ extern struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags); extern int shmem_zero_setup(struct vm_area_struct *); extern int shmem_lock(struct file *file, int lock, struct user_struct *user); +extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, + pgoff_t index, gfp_t gfp_mask); extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end); extern int shmem_unuse(swp_entry_t entry, struct page *page); extern void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff, struct page **pagep, swp_entry_t *ent); +static inline struct page *shmem_read_mapping_page( + struct address_space *mapping, pgoff_t index) +{ + return shmem_read_mapping_page_gfp(mapping, index, + mapping_gfp_mask(mapping)); +} + #endif diff --git a/mm/shmem.c b/mm/shmem.c index f1714758ea9..fcedf5464eb 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -3035,3 +3035,26 @@ int shmem_zero_setup(struct vm_area_struct *vma) vma->vm_flags |= VM_CAN_NONLINEAR; return 0; } + +/** + * shmem_read_mapping_page_gfp - read into page cache, using specified page allocation flags. + * @mapping: the page's address_space + * @index: the page index + * @gfp: the page allocator flags to use if allocating + * + * This behaves as a tmpfs "read_cache_page_gfp(mapping, index, gfp)", + * with any new page allocations done using the specified allocation flags. + * But read_cache_page_gfp() uses the ->readpage() method: which does not + * suit tmpfs, since it may have pages in swapcache, and needs to find those + * for itself; although drivers/gpu/drm i915 and ttm rely upon this support. + * + * Provide a stub for those callers to start using now, then later + * flesh it out to call shmem_getpage() with additional gfp mask, when + * shmem_file_splice_read() is added and shmem_readpage() is removed. + */ +struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, + pgoff_t index, gfp_t gfp) +{ + return read_cache_page_gfp(mapping, index, gfp); +} +EXPORT_SYMBOL_GPL(shmem_read_mapping_page_gfp); -- cgit v1.2.3-70-g09d2 From 507c5f1224014f9956e604ee8703b3bbea7da4a4 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Mon, 27 Jun 2011 16:18:07 -0700 Subject: include/linux/compat.h: declare compat_sys_sendmmsg() This is required for tilegx to be able to use the compat unistd.h header where compat_sys_sendmmsg() is now mentioned. Signed-off-by: Chris Metcalf Cc: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compat.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/compat.h b/include/linux/compat.h index ddcb7db38e6..846bb179257 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -467,6 +467,8 @@ asmlinkage long compat_sys_setsockopt(int fd, int level, int optname, char __user *optval, unsigned int optlen); asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned flags); +asmlinkage long compat_sys_sendmmsg(int fd, struct compat_mmsghdr __user *mmsg, + unsigned vlen, unsigned int flags); asmlinkage long compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg, unsigned int flags); asmlinkage long compat_sys_recv(int fd, void __user *buf, size_t len, -- cgit v1.2.3-70-g09d2 From 08142579b6ca35883c1ed066a2681de6f6917062 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 27 Jun 2011 16:18:10 -0700 Subject: mm: fix assertion mapping->nrpages == 0 in end_writeback() Under heavy memory and filesystem load, users observe the assertion mapping->nrpages == 0 in end_writeback() trigger. This can be caused by page reclaim reclaiming the last page from a mapping in the following race: CPU0 CPU1 ... shrink_page_list() __remove_mapping() __delete_from_page_cache() radix_tree_delete() evict_inode() truncate_inode_pages() truncate_inode_pages_range() pagevec_lookup() - finds nothing end_writeback() mapping->nrpages != 0 -> BUG page->mapping = NULL mapping->nrpages-- Fix the problem by doing a reliable check of mapping->nrpages under mapping->tree_lock in end_writeback(). Analyzed by Jay , lost in LKML, and dug out by Miklos Szeredi . Cc: Jay Cc: Miklos Szeredi Signed-off-by: Jan Kara Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/inode.c | 7 +++++++ include/linux/fs.h | 1 + mm/truncate.c | 5 +++++ 3 files changed, 13 insertions(+) (limited to 'include') diff --git a/fs/inode.c b/fs/inode.c index 0f7e88a7803..43566d17d1b 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -423,7 +423,14 @@ EXPORT_SYMBOL(remove_inode_hash); void end_writeback(struct inode *inode) { might_sleep(); + /* + * We have to cycle tree_lock here because reclaim can be still in the + * process of removing the last page (in __delete_from_page_cache()) + * and we must not free mapping under it. + */ + spin_lock_irq(&inode->i_data.tree_lock); BUG_ON(inode->i_data.nrpages); + spin_unlock_irq(&inode->i_data.tree_lock); BUG_ON(!list_empty(&inode->i_data.private_list)); BUG_ON(!(inode->i_state & I_FREEING)); BUG_ON(inode->i_state & I_CLEAR); diff --git a/include/linux/fs.h b/include/linux/fs.h index 6e73e2e9ae3..b5b97924786 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -639,6 +639,7 @@ struct address_space { struct prio_tree_root i_mmap; /* tree of private and shared mappings */ struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */ struct mutex i_mmap_mutex; /* protect tree, count, list */ + /* Protected by tree_lock together with the radix tree */ unsigned long nrpages; /* number of total pages */ pgoff_t writeback_index;/* writeback starts here */ const struct address_space_operations *a_ops; /* methods */ diff --git a/mm/truncate.c b/mm/truncate.c index 29a9b8a5a31..e13f22efaad 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -304,6 +304,11 @@ EXPORT_SYMBOL(truncate_inode_pages_range); * @lstart: offset from which to truncate * * Called under (and serialised by) inode->i_mutex. + * + * Note: When this function returns, there can be a page in the process of + * deletion (inside __delete_from_page_cache()) in the specified range. Thus + * mapping->nrpages can be non-zero when this function returns even after + * truncation of the whole mapping. */ void truncate_inode_pages(struct address_space *mapping, loff_t lstart) { -- cgit v1.2.3-70-g09d2 From 15b493d11fcce3c5547e3d7fb6d90e11ffe12777 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 28 Jun 2011 09:48:48 +0200 Subject: drbd: fix limit define, we support 1 PiByte now Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_limits.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 246f576c981..447c3675238 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -117,10 +117,10 @@ /* drbdsetup XY resize -d Z * you are free to reduce the device size to nothing, if you want to. * the upper limit with 64bit kernel, enough ram and flexible meta data - * is 16 TB, currently. */ + * is 1 PiB, currently. */ /* DRBD_MAX_SECTORS */ #define DRBD_DISK_SIZE_SECT_MIN 0 -#define DRBD_DISK_SIZE_SECT_MAX (16 * (2LLU << 30)) +#define DRBD_DISK_SIZE_SECT_MAX (1 * (2LLU << 40)) #define DRBD_DISK_SIZE_SECT_DEF 0 /* = disabled = no user size... */ #define DRBD_ON_IO_ERROR_DEF EP_PASS_ON -- cgit v1.2.3-70-g09d2 From 4f3c7a18d9e8a287d31f828a259d713fe4859471 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 30 Jun 2011 15:08:04 +0200 Subject: ALSA: sb16 - Fix build errors on MIPS and others with 13bit ioctl size One of ioctl definition in sound/sb16_csp.h contains the data size over 8kB, and this causes build errors on architectures like MIPS, which define _IOC_SIZEBITS=13. For avoiding this build errors but keeping the compatibility, manually expand with _IOC() instead of using _IOW() for the problematic ioctl. Signed-off-by: Takashi Iwai --- include/sound/sb16_csp.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/sound/sb16_csp.h b/include/sound/sb16_csp.h index 736eac71d05..af1b49e982d 100644 --- a/include/sound/sb16_csp.h +++ b/include/sound/sb16_csp.h @@ -99,7 +99,14 @@ struct snd_sb_csp_info { /* get CSP information */ #define SNDRV_SB_CSP_IOCTL_INFO _IOR('H', 0x10, struct snd_sb_csp_info) /* load microcode to CSP */ -#define SNDRV_SB_CSP_IOCTL_LOAD_CODE _IOW('H', 0x11, struct snd_sb_csp_microcode) +/* NOTE: struct snd_sb_csp_microcode overflows the max size (13 bits) + * defined for some architectures like MIPS, and it leads to build errors. + * (x86 and co have 14-bit size, thus it's valid, though.) + * As a workaround for skipping the size-limit check, here we don't use the + * normal _IOW() macro but _IOC() with the manual argument. + */ +#define SNDRV_SB_CSP_IOCTL_LOAD_CODE \ + _IOC(_IOC_WRITE, 'H', 0x11, sizeof(struct snd_sb_csp_microcode)) /* unload microcode from CSP */ #define SNDRV_SB_CSP_IOCTL_UNLOAD_CODE _IO('H', 0x12) /* start CSP */ -- cgit v1.2.3-70-g09d2 From 957c665f37007de93ccbe45902a23143724170d0 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 24 Jun 2011 15:25:00 -0700 Subject: ipv6: Don't put artificial limit on routing table size. IPV6, unlike IPV4, doesn't have a routing cache. Routing table entries, as well as clones made in response to route lookup requests, all live in the same table. And all of these things are together collected in the destination cache table for ipv6. This means that routing table entries count against the garbage collection limits, even though such entries cannot ever be reclaimed and are added explicitly by the administrator (rather than being created in response to lookups). Therefore it makes no sense to count ipv6 routing table entries against the GC limits. Add a DST_NOCOUNT destination cache entry flag, and skip the counting if it is set. Use this flag bit in ipv6 when adding routing table entries. Signed-off-by: David S. Miller --- include/net/dst.h | 1 + net/core/dst.c | 6 ++++-- net/ipv6/route.c | 13 +++++++------ 3 files changed, 12 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/dst.h b/include/net/dst.h index 7d15d238b6e..e12ddfb9eb1 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -77,6 +77,7 @@ struct dst_entry { #define DST_NOPOLICY 0x0004 #define DST_NOHASH 0x0008 #define DST_NOCACHE 0x0010 +#define DST_NOCOUNT 0x0020 union { struct dst_entry *next; struct rtable __rcu *rt_next; diff --git a/net/core/dst.c b/net/core/dst.c index 9ccca038444..6135f367169 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -190,7 +190,8 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev, dst->lastuse = jiffies; dst->flags = flags; dst->next = NULL; - dst_entries_add(ops, 1); + if (!(flags & DST_NOCOUNT)) + dst_entries_add(ops, 1); return dst; } EXPORT_SYMBOL(dst_alloc); @@ -243,7 +244,8 @@ again: neigh_release(neigh); } - dst_entries_add(dst->ops, -1); + if (!(dst->flags & DST_NOCOUNT)) + dst_entries_add(dst->ops, -1); if (dst->ops->destroy) dst->ops->destroy(dst); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index c2af4da074b..0ef1f086feb 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -228,9 +228,10 @@ static struct rt6_info ip6_blk_hole_entry_template = { /* allocate dst with ip6_dst_ops */ static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops, - struct net_device *dev) + struct net_device *dev, + int flags) { - struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, 0); + struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags); memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry)); @@ -1042,7 +1043,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, if (unlikely(idev == NULL)) return NULL; - rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev); + rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0); if (unlikely(rt == NULL)) { in6_dev_put(idev); goto out; @@ -1206,7 +1207,7 @@ int ip6_route_add(struct fib6_config *cfg) goto out; } - rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL); + rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT); if (rt == NULL) { err = -ENOMEM; @@ -1726,7 +1727,7 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort) { struct net *net = dev_net(ort->rt6i_dev); struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, - ort->dst.dev); + ort->dst.dev, 0); if (rt) { rt->dst.input = ort->dst.input; @@ -2005,7 +2006,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, { struct net *net = dev_net(idev->dev); struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, - net->loopback_dev); + net->loopback_dev, 0); struct neighbour *neigh; if (rt == NULL) { -- cgit v1.2.3-70-g09d2 From e4c2fb0d5776b58049d2556b456144a4db3fe5a9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 5 Jul 2011 10:56:32 +0200 Subject: sched: Disable (revert) SCHED_LOAD_SCALE increase Alex reported that commit c8b281161df ("sched: Increase SCHED_LOAD_SCALE resolution") caused a power usage regression under light load as it increases the number of load-balance operations and keeps idle cpus from staying idle. Time has run out to find the root cause for this release so disable the feature for v3.0 until we can figure out what causes the problem. Reported-by: "Alex, Shi" Signed-off-by: Peter Zijlstra Cc: Nikhil Rao Cc: Ming Lei Cc: Mike Galbraith Cc: Linus Torvalds Cc: Andrew Morton Link: http://lkml.kernel.org/n/tip-m4onxn0sxnyn5iz9o88eskc3@git.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index a837b20ba19..496770a9648 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -808,7 +808,7 @@ enum cpu_idle_type { * when BITS_PER_LONG <= 32 are pretty high and the returns do not justify the * increased costs. */ -#if BITS_PER_LONG > 32 +#if 0 /* BITS_PER_LONG > 32 -- currently broken: it increases power usage under light load */ # define SCHED_LOAD_RESOLUTION 10 # define scale_load(w) ((w) << SCHED_LOAD_RESOLUTION) # define scale_load_down(w) ((w) >> SCHED_LOAD_RESOLUTION) -- cgit v1.2.3-70-g09d2 From c902ce1bfb40d8b049bd2319b388b4b68b04bc27 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 7 Jul 2011 12:19:48 +0100 Subject: FS-Cache: Add a helper to bulk uncache pages on an inode Add an FS-Cache helper to bulk uncache pages on an inode. This will only work for the circumstance where the pages in the cache correspond 1:1 with the pages attached to an inode's page cache. This is required for CIFS and NFS: When disabling inode cookie, we were returning the cookie and setting cifsi->fscache to NULL but failed to invalidate any previously mapped pages. This resulted in "Bad page state" errors and manifested in other kind of errors when running fsstress. Fix it by uncaching mapped pages when we disable the inode cookie. This patch should fix the following oops and "Bad page state" errors seen during fsstress testing. ------------[ cut here ]------------ kernel BUG at fs/cachefiles/namei.c:201! invalid opcode: 0000 [#1] SMP Pid: 5, comm: kworker/u:0 Not tainted 2.6.38.7-30.fc15.x86_64 #1 Bochs Bochs RIP: 0010: cachefiles_walk_to_object+0x436/0x745 [cachefiles] RSP: 0018:ffff88002ce6dd00 EFLAGS: 00010282 RAX: ffff88002ef165f0 RBX: ffff88001811f500 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000100 RDI: 0000000000000282 RBP: ffff88002ce6dda0 R08: 0000000000000100 R09: ffffffff81b3a300 R10: 0000ffff00066c0a R11: 0000000000000003 R12: ffff88002ae54840 R13: ffff88002ae54840 R14: ffff880029c29c00 R15: ffff88001811f4b0 FS: 00007f394dd32720(0000) GS:ffff88002ef00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00007fffcb62ddf8 CR3: 000000001825f000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process kworker/u:0 (pid: 5, threadinfo ffff88002ce6c000, task ffff88002ce55cc0) Stack: 0000000000000246 ffff88002ce55cc0 ffff88002ce6dd58 ffff88001815dc00 ffff8800185246c0 ffff88001811f618 ffff880029c29d18 ffff88001811f380 ffff88002ce6dd50 ffffffff814757e4 ffff88002ce6dda0 ffffffff8106ac56 Call Trace: cachefiles_lookup_object+0x78/0xd4 [cachefiles] fscache_lookup_object+0x131/0x16d [fscache] fscache_object_work_func+0x1bc/0x669 [fscache] process_one_work+0x186/0x298 worker_thread+0xda/0x15d kthread+0x84/0x8c kernel_thread_helper+0x4/0x10 RIP cachefiles_walk_to_object+0x436/0x745 [cachefiles] ---[ end trace 1d481c9af1804caa ]--- I tested the uncaching by the following means: (1) Create a big file on my NFS server (104857600 bytes). (2) Read the file into the cache with md5sum on the NFS client. Look in /proc/fs/fscache/stats: Pages : mrk=25601 unc=0 (3) Open the file for read/write ("bash 5<>/warthog/bigfile"). Look in proc again: Pages : mrk=25601 unc=25601 Reported-by: Jeff Layton Signed-off-by: David Howells Reviewed-and-Tested-by: Suresh Jayaraman cc: stable@kernel.org Signed-off-by: Linus Torvalds --- Documentation/filesystems/caching/netfs-api.txt | 16 +++++++++ fs/cifs/fscache.c | 1 + fs/fscache/page.c | 44 +++++++++++++++++++++++++ fs/nfs/fscache.c | 8 ++--- include/linux/fscache.h | 21 ++++++++++++ 5 files changed, 85 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/Documentation/filesystems/caching/netfs-api.txt b/Documentation/filesystems/caching/netfs-api.txt index a167ab876c3..7cc6bf2871e 100644 --- a/Documentation/filesystems/caching/netfs-api.txt +++ b/Documentation/filesystems/caching/netfs-api.txt @@ -673,6 +673,22 @@ storage request to complete, or it may attempt to cancel the storage request - in which case the page will not be stored in the cache this time. +BULK INODE PAGE UNCACHE +----------------------- + +A convenience routine is provided to perform an uncache on all the pages +attached to an inode. This assumes that the pages on the inode correspond on a +1:1 basis with the pages in the cache. + + void fscache_uncache_all_inode_pages(struct fscache_cookie *cookie, + struct inode *inode); + +This takes the netfs cookie that the pages were cached with and the inode that +the pages are attached to. This function will wait for pages to finish being +written to the cache and for the cache to finish with the page generally. No +error is returned. + + ========================== INDEX AND DATA FILE UPDATE ========================== diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c index 816696621ec..42e5363b410 100644 --- a/fs/cifs/fscache.c +++ b/fs/cifs/fscache.c @@ -92,6 +92,7 @@ static void cifs_fscache_disable_inode_cookie(struct inode *inode) if (cifsi->fscache) { cFYI(1, "%s: (0x%p)", __func__, cifsi->fscache); + fscache_uncache_all_inode_pages(cifsi->fscache, inode); fscache_relinquish_cookie(cifsi->fscache, 1); cifsi->fscache = NULL; } diff --git a/fs/fscache/page.c b/fs/fscache/page.c index a2a5d19ece6..2f343b4d7a7 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -954,3 +954,47 @@ void fscache_mark_pages_cached(struct fscache_retrieval *op, pagevec_reinit(pagevec); } EXPORT_SYMBOL(fscache_mark_pages_cached); + +/* + * Uncache all the pages in an inode that are marked PG_fscache, assuming them + * to be associated with the given cookie. + */ +void __fscache_uncache_all_inode_pages(struct fscache_cookie *cookie, + struct inode *inode) +{ + struct address_space *mapping = inode->i_mapping; + struct pagevec pvec; + pgoff_t next; + int i; + + _enter("%p,%p", cookie, inode); + + if (!mapping || mapping->nrpages == 0) { + _leave(" [no pages]"); + return; + } + + pagevec_init(&pvec, 0); + next = 0; + while (next <= (loff_t)-1 && + pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE) + ) { + for (i = 0; i < pagevec_count(&pvec); i++) { + struct page *page = pvec.pages[i]; + pgoff_t page_index = page->index; + + ASSERTCMP(page_index, >=, next); + next = page_index + 1; + + if (PageFsCache(page)) { + __fscache_wait_on_page_write(cookie, page); + __fscache_uncache_page(cookie, page); + } + } + pagevec_release(&pvec); + cond_resched(); + } + + _leave(""); +} +EXPORT_SYMBOL(__fscache_uncache_all_inode_pages); diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index ce153a6b3ae..419119c371b 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -259,12 +259,10 @@ static void nfs_fscache_disable_inode_cookie(struct inode *inode) dfprintk(FSCACHE, "NFS: nfsi 0x%p turning cache off\n", NFS_I(inode)); - /* Need to invalidate any mapped pages that were read in before - * turning off the cache. + /* Need to uncache any pages attached to this inode that + * fscache knows about before turning off the cache. */ - if (inode->i_mapping && inode->i_mapping->nrpages) - invalidate_inode_pages2(inode->i_mapping); - + fscache_uncache_all_inode_pages(NFS_I(inode)->fscache, inode); nfs_fscache_zap_inode_cookie(inode); } } diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 7c4d72f5581..9ec20dec335 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -204,6 +204,8 @@ extern bool __fscache_check_page_write(struct fscache_cookie *, struct page *); extern void __fscache_wait_on_page_write(struct fscache_cookie *, struct page *); extern bool __fscache_maybe_release_page(struct fscache_cookie *, struct page *, gfp_t); +extern void __fscache_uncache_all_inode_pages(struct fscache_cookie *, + struct inode *); /** * fscache_register_netfs - Register a filesystem as desiring caching services @@ -643,4 +645,23 @@ bool fscache_maybe_release_page(struct fscache_cookie *cookie, return false; } +/** + * fscache_uncache_all_inode_pages - Uncache all an inode's pages + * @cookie: The cookie representing the inode's cache object. + * @inode: The inode to uncache pages from. + * + * Uncache all the pages in an inode that are marked PG_fscache, assuming them + * to be associated with the given cookie. + * + * This function may sleep. It will wait for pages that are being written out + * and will wait whilst the PG_fscache mark is removed by the cache. + */ +static inline +void fscache_uncache_all_inode_pages(struct fscache_cookie *cookie, + struct inode *inode) +{ + if (fscache_cookie_valid(cookie)) + __fscache_uncache_all_inode_pages(cookie, inode); +} + #endif /* _LINUX_FSCACHE_H */ -- cgit v1.2.3-70-g09d2 From f607e7fc5fb94d92030c4527287e9c149ddf9e65 Mon Sep 17 00:00:00 2001 From: Jean-François Dagenais Date: Fri, 8 Jul 2011 15:39:44 -0700 Subject: w1: ds1wm: add a reset recovery parameter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes a regression in 3.0 reported by Paul Parsons regarding the removal of the msleep(1) in the ds1wm_reset() function: : The linux-3.0-rc4 DS1WM 1-wire driver is logging "bus error, retrying" : error messages on an HP iPAQ hx4700 PDA (XScale-PXA270): : : : Driver for 1-wire Dallas network protocol. : DS1WM w1 busmaster driver - (c) 2004 Szabolcs Gyurko : 1-Wire driver for the DS2760 battery monitor chip - (c) 2004-2005, Szabolcs Gyurko : ds1wm ds1wm: pass: 1 bus error, retrying : ds1wm ds1wm: pass: 2 bus error, retrying : ds1wm ds1wm: pass: 3 bus error, retrying : ds1wm ds1wm: pass: 4 bus error, retrying : ds1wm ds1wm: pass: 5 bus error, retrying : ... : : The visible result is that the battery charging LED is erratic; sometimes : it works, mostly it doesn't. : : The linux-2.6.39 DS1WM 1-wire driver worked OK. I haven't tried 3.0-rc1, : 3.0-rc2, or 3.0-rc3. This sleep should not be required on normal circuitry provided the pull-ups on the bus are correctly adapted to the slaves. Unfortunately, this is not always the case. The sleep is restored but as a parameter to the probe function in the pdata. [akpm@linux-foundation.org: coding-style fixes] Reported-by: Paul Parsons Tested-by: Paul Parsons Signed-off-by: Jean-François Dagenais Cc: Evgeniy Polyakov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/mfd/asic3.c | 1 + drivers/mfd/htc-pasic3.c | 1 + drivers/w1/masters/ds1wm.c | 5 +++++ include/linux/mfd/ds1wm.h | 7 +++++++ 4 files changed, 14 insertions(+) (limited to 'include') diff --git a/drivers/mfd/asic3.c b/drivers/mfd/asic3.c index c27fd1fc3b8..c71ae09430c 100644 --- a/drivers/mfd/asic3.c +++ b/drivers/mfd/asic3.c @@ -619,6 +619,7 @@ static void asic3_clk_disable(struct asic3 *asic, struct asic3_clk *clk) /* MFD cells (SPI, PWM, LED, DS1WM, MMC) */ static struct ds1wm_driver_data ds1wm_pdata = { .active_high = 1, + .reset_recover_delay = 1, }; static struct resource ds1wm_resources[] = { diff --git a/drivers/mfd/htc-pasic3.c b/drivers/mfd/htc-pasic3.c index 2808bd125d1..04c7093d649 100644 --- a/drivers/mfd/htc-pasic3.c +++ b/drivers/mfd/htc-pasic3.c @@ -99,6 +99,7 @@ static int ds1wm_disable(struct platform_device *pdev) static struct ds1wm_driver_data ds1wm_pdata = { .active_high = 0, + .reset_recover_delay = 1, }; static struct resource ds1wm_resources[] __initdata = { diff --git a/drivers/w1/masters/ds1wm.c b/drivers/w1/masters/ds1wm.c index ad57593d224..a0c8965c1a7 100644 --- a/drivers/w1/masters/ds1wm.c +++ b/drivers/w1/masters/ds1wm.c @@ -109,6 +109,7 @@ struct ds1wm_data { /* byte to write that makes all intr disabled, */ /* considering active_state (IAS) (optimization) */ u8 int_en_reg_none; + unsigned int reset_recover_delay; /* see ds1wm.h */ }; static inline void ds1wm_write_register(struct ds1wm_data *ds1wm_data, u32 reg, @@ -187,6 +188,9 @@ static int ds1wm_reset(struct ds1wm_data *ds1wm_data) return 1; } + if (ds1wm_data->reset_recover_delay) + msleep(ds1wm_data->reset_recover_delay); + return 0; } @@ -490,6 +494,7 @@ static int ds1wm_probe(struct platform_device *pdev) } ds1wm_data->irq = res->start; ds1wm_data->int_en_reg_none = (plat->active_high ? DS1WM_INTEN_IAS : 0); + ds1wm_data->reset_recover_delay = plat->reset_recover_delay; if (res->flags & IORESOURCE_IRQ_HIGHEDGE) irq_set_irq_type(ds1wm_data->irq, IRQ_TYPE_EDGE_RISING); diff --git a/include/linux/mfd/ds1wm.h b/include/linux/mfd/ds1wm.h index be469a357cb..38a372a0e28 100644 --- a/include/linux/mfd/ds1wm.h +++ b/include/linux/mfd/ds1wm.h @@ -3,4 +3,11 @@ struct ds1wm_driver_data { int active_high; int clock_rate; + /* in milliseconds, the amount of time to */ + /* sleep following a reset pulse. Zero */ + /* should work if your bus devices recover*/ + /* time respects the 1-wire spec since the*/ + /* ds1wm implements the precise timings of*/ + /* a reset pulse/presence detect sequence.*/ + unsigned int reset_recover_delay; }; -- cgit v1.2.3-70-g09d2