From d7eecb483cc29e929bbc5515b8def830d7fc6ad2 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Thu, 28 Jan 2010 16:13:01 +0800 Subject: jfs_dmap.[ch]: trivial typo fix: s/heigth/height/g MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Daniel Mack Signed-off-by: Dave Kleikamp Cc: Jiri Kosina Cc: André Goddard Rosa Cc: jfs-discussion@lists.sourceforge.net --- fs/jfs/jfs_dmap.c | 16 ++++++++-------- fs/jfs/jfs_dmap.h | 6 +++--- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index d9b031cf69f..7e19d2fe009 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c @@ -195,7 +195,7 @@ int dbMount(struct inode *ipbmap) bmp->db_maxag = le32_to_cpu(dbmp_le->dn_maxag); bmp->db_agpref = le32_to_cpu(dbmp_le->dn_agpref); bmp->db_aglevel = le32_to_cpu(dbmp_le->dn_aglevel); - bmp->db_agheigth = le32_to_cpu(dbmp_le->dn_agheigth); + bmp->db_agheight = le32_to_cpu(dbmp_le->dn_agheight); bmp->db_agwidth = le32_to_cpu(dbmp_le->dn_agwidth); bmp->db_agstart = le32_to_cpu(dbmp_le->dn_agstart); bmp->db_agl2size = le32_to_cpu(dbmp_le->dn_agl2size); @@ -287,7 +287,7 @@ int dbSync(struct inode *ipbmap) dbmp_le->dn_maxag = cpu_to_le32(bmp->db_maxag); dbmp_le->dn_agpref = cpu_to_le32(bmp->db_agpref); dbmp_le->dn_aglevel = cpu_to_le32(bmp->db_aglevel); - dbmp_le->dn_agheigth = cpu_to_le32(bmp->db_agheigth); + dbmp_le->dn_agheight = cpu_to_le32(bmp->db_agheight); dbmp_le->dn_agwidth = cpu_to_le32(bmp->db_agwidth); dbmp_le->dn_agstart = cpu_to_le32(bmp->db_agstart); dbmp_le->dn_agl2size = cpu_to_le32(bmp->db_agl2size); @@ -1440,7 +1440,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) * tree index of this allocation group within the control page. */ agperlev = - (1 << (L2LPERCTL - (bmp->db_agheigth << 1))) / bmp->db_agwidth; + (1 << (L2LPERCTL - (bmp->db_agheight << 1))) / bmp->db_agwidth; ti = bmp->db_agstart + bmp->db_agwidth * (agno & (agperlev - 1)); /* dmap control page trees fan-out by 4 and a single allocation @@ -1459,7 +1459,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) * the subtree to find the leftmost leaf that describes this * free space. */ - for (k = bmp->db_agheigth; k > 0; k--) { + for (k = bmp->db_agheight; k > 0; k--) { for (n = 0, m = (ti << 2) + 1; n < 4; n++) { if (l2nb <= dcp->stree[m + n]) { ti = m + n; @@ -3606,7 +3606,7 @@ void dbFinalizeBmap(struct inode *ipbmap) } /* - * compute db_aglevel, db_agheigth, db_width, db_agstart: + * compute db_aglevel, db_agheight, db_width, db_agstart: * an ag is covered in aglevel dmapctl summary tree, * at agheight level height (from leaf) with agwidth number of nodes * each, which starts at agstart index node of the smmary tree node @@ -3615,9 +3615,9 @@ void dbFinalizeBmap(struct inode *ipbmap) bmp->db_aglevel = BMAPSZTOLEV(bmp->db_agsize); l2nl = bmp->db_agl2size - (L2BPERDMAP + bmp->db_aglevel * L2LPERCTL); - bmp->db_agheigth = l2nl >> 1; - bmp->db_agwidth = 1 << (l2nl - (bmp->db_agheigth << 1)); - for (i = 5 - bmp->db_agheigth, bmp->db_agstart = 0, n = 1; i > 0; + bmp->db_agheight = l2nl >> 1; + bmp->db_agwidth = 1 << (l2nl - (bmp->db_agheight << 1)); + for (i = 5 - bmp->db_agheight, bmp->db_agstart = 0, n = 1; i > 0; i--) { bmp->db_agstart += n; n <<= 2; diff --git a/fs/jfs/jfs_dmap.h b/fs/jfs/jfs_dmap.h index 1a6eb41569b..6dcb906c55d 100644 --- a/fs/jfs/jfs_dmap.h +++ b/fs/jfs/jfs_dmap.h @@ -210,7 +210,7 @@ struct dbmap_disk { __le32 dn_maxag; /* 4: max active alloc group number */ __le32 dn_agpref; /* 4: preferred alloc group (hint) */ __le32 dn_aglevel; /* 4: dmapctl level holding the AG */ - __le32 dn_agheigth; /* 4: height in dmapctl of the AG */ + __le32 dn_agheight; /* 4: height in dmapctl of the AG */ __le32 dn_agwidth; /* 4: width in dmapctl of the AG */ __le32 dn_agstart; /* 4: start tree index at AG height */ __le32 dn_agl2size; /* 4: l2 num of blks per alloc group */ @@ -229,7 +229,7 @@ struct dbmap { int dn_maxag; /* max active alloc group number */ int dn_agpref; /* preferred alloc group (hint) */ int dn_aglevel; /* dmapctl level holding the AG */ - int dn_agheigth; /* height in dmapctl of the AG */ + int dn_agheight; /* height in dmapctl of the AG */ int dn_agwidth; /* width in dmapctl of the AG */ int dn_agstart; /* start tree index at AG height */ int dn_agl2size; /* l2 num of blks per alloc group */ @@ -255,7 +255,7 @@ struct bmap { #define db_agsize db_bmap.dn_agsize #define db_agl2size db_bmap.dn_agl2size #define db_agwidth db_bmap.dn_agwidth -#define db_agheigth db_bmap.dn_agheigth +#define db_agheight db_bmap.dn_agheight #define db_agstart db_bmap.dn_agstart #define db_numag db_bmap.dn_numag #define db_maxlevel db_bmap.dn_maxlevel -- cgit v1.2.3-18-g5258 From 17d9ddc72fb8bba0d4f67868c9c612e472a594a9 Mon Sep 17 00:00:00 2001 From: "Pallipadi, Venkatesh" Date: Wed, 10 Feb 2010 15:23:44 -0800 Subject: rbtree: Add support for augmented rbtrees Add support for augmented rbtrees in core rbtree code. This will be used in subsequent patches, in x86 PAT code, which needs interval trees to efficiently keep track of PAT ranges. Signed-off-by: Venkatesh Pallipadi LKML-Reference: <20100210232343.GA11465@linux-os.sc.intel.com> Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- Documentation/rbtree.txt | 58 ++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/rbtree.h | 5 ++++- lib/rbtree.c | 48 +++++++++++++++++++++++++++++++++++---- 3 files changed, 106 insertions(+), 5 deletions(-) diff --git a/Documentation/rbtree.txt b/Documentation/rbtree.txt index aae8355d316..221f38be98f 100644 --- a/Documentation/rbtree.txt +++ b/Documentation/rbtree.txt @@ -190,3 +190,61 @@ Example: for (node = rb_first(&mytree); node; node = rb_next(node)) printk("key=%s\n", rb_entry(node, struct mytype, node)->keystring); +Support for Augmented rbtrees +----------------------------- + +Augmented rbtree is an rbtree with "some" additional data stored in each node. +This data can be used to augment some new functionality to rbtree. +Augmented rbtree is an optional feature built on top of basic rbtree +infrastructure. rbtree user who wants this feature will have an augment +callback function in rb_root initialized. + +This callback function will be called from rbtree core routines whenever +a node has a change in one or both of its children. It is the responsibility +of the callback function to recalculate the additional data that is in the +rb node using new children information. Note that if this new additional +data affects the parent node's additional data, then callback function has +to handle it and do the recursive updates. + + +Interval tree is an example of augmented rb tree. Reference - +"Introduction to Algorithms" by Cormen, Leiserson, Rivest and Stein. +More details about interval trees: + +Classical rbtree has a single key and it cannot be directly used to store +interval ranges like [lo:hi] and do a quick lookup for any overlap with a new +lo:hi or to find whether there is an exact match for a new lo:hi. + +However, rbtree can be augmented to store such interval ranges in a structured +way making it possible to do efficient lookup and exact match. + +This "extra information" stored in each node is the maximum hi +(max_hi) value among all the nodes that are its descendents. This +information can be maintained at each node just be looking at the node +and its immediate children. And this will be used in O(log n) lookup +for lowest match (lowest start address among all possible matches) +with something like: + +find_lowest_match(lo, hi, node) +{ + lowest_match = NULL; + while (node) { + if (max_hi(node->left) > lo) { + // Lowest overlap if any must be on left side + node = node->left; + } else if (overlap(lo, hi, node)) { + lowest_match = node; + break; + } else if (lo > node->lo) { + // Lowest overlap if any must be on right side + node = node->right; + } else { + break; + } + } + return lowest_match; +} + +Finding exact match will be to first find lowest match and then to follow +successor nodes looking for exact match, until the start of a node is beyond +the hi value we are looking for. diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h index 9c295411d01..8e33a256ea0 100644 --- a/include/linux/rbtree.h +++ b/include/linux/rbtree.h @@ -110,6 +110,7 @@ struct rb_node struct rb_root { struct rb_node *rb_node; + void (*augment_cb)(struct rb_node *node); }; @@ -129,7 +130,9 @@ static inline void rb_set_color(struct rb_node *rb, int color) rb->rb_parent_color = (rb->rb_parent_color & ~1) | color; } -#define RB_ROOT (struct rb_root) { NULL, } +#define RB_ROOT (struct rb_root) { NULL, NULL, } +#define RB_AUGMENT_ROOT(x) (struct rb_root) { NULL, x} + #define rb_entry(ptr, type, member) container_of(ptr, type, member) #define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL) diff --git a/lib/rbtree.c b/lib/rbtree.c index e2aa3be2985..15e10b1afdd 100644 --- a/lib/rbtree.c +++ b/lib/rbtree.c @@ -44,6 +44,11 @@ static void __rb_rotate_left(struct rb_node *node, struct rb_root *root) else root->rb_node = right; rb_set_parent(node, right); + + if (root->augment_cb) { + root->augment_cb(node); + root->augment_cb(right); + } } static void __rb_rotate_right(struct rb_node *node, struct rb_root *root) @@ -67,12 +72,20 @@ static void __rb_rotate_right(struct rb_node *node, struct rb_root *root) else root->rb_node = left; rb_set_parent(node, left); + + if (root->augment_cb) { + root->augment_cb(node); + root->augment_cb(left); + } } void rb_insert_color(struct rb_node *node, struct rb_root *root) { struct rb_node *parent, *gparent; + if (root->augment_cb) + root->augment_cb(node); + while ((parent = rb_parent(node)) && rb_is_red(parent)) { gparent = rb_parent(parent); @@ -227,12 +240,15 @@ void rb_erase(struct rb_node *node, struct rb_root *root) else { struct rb_node *old = node, *left; + int old_parent_cb = 0; + int successor_parent_cb = 0; node = node->rb_right; while ((left = node->rb_left) != NULL) node = left; if (rb_parent(old)) { + old_parent_cb = 1; if (rb_parent(old)->rb_left == old) rb_parent(old)->rb_left = node; else @@ -247,8 +263,10 @@ void rb_erase(struct rb_node *node, struct rb_root *root) if (parent == old) { parent = node; } else { + successor_parent_cb = 1; if (child) rb_set_parent(child, parent); + parent->rb_left = child; node->rb_right = old->rb_right; @@ -259,6 +277,24 @@ void rb_erase(struct rb_node *node, struct rb_root *root) node->rb_left = old->rb_left; rb_set_parent(old->rb_left, node); + if (root->augment_cb) { + /* + * Here, three different nodes can have new children. + * The parent of the successor node that was selected + * to replace the node to be erased. + * The node that is getting erased and is now replaced + * by its successor. + * The parent of the node getting erased-replaced. + */ + if (successor_parent_cb) + root->augment_cb(parent); + + root->augment_cb(node); + + if (old_parent_cb) + root->augment_cb(rb_parent(old)); + } + goto color; } @@ -267,15 +303,19 @@ void rb_erase(struct rb_node *node, struct rb_root *root) if (child) rb_set_parent(child, parent); - if (parent) - { + + if (parent) { if (parent->rb_left == node) parent->rb_left = child; else parent->rb_right = child; - } - else + + if (root->augment_cb) + root->augment_cb(parent); + + } else { root->rb_node = child; + } color: if (color == RB_BLACK) -- cgit v1.2.3-18-g5258 From be5a0c126ad1dea2128dc5aef12c87083518d1ab Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Wed, 10 Feb 2010 11:57:06 -0800 Subject: x86, pat: Preparatory changes in pat.c for bigger rbtree change Minor changes in pat.c to cleanup code and make it smoother to introduce bigger rbtree only change in the following patch. The changes are cleaup only and should not have any functional impact. Signed-off-by: Venkatesh Pallipadi LKML-Reference: <20100210195909.792781000@intel.com> Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- arch/x86/mm/pat.c | 170 +++++++++++++++++++++++---------------------- arch/x86/mm/pat_internal.h | 28 ++++++++ 2 files changed, 116 insertions(+), 82 deletions(-) create mode 100644 arch/x86/mm/pat_internal.h diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index ae9648eb1c7..628e507b793 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -30,6 +30,8 @@ #include #include +#include "pat_internal.h" + #ifdef CONFIG_X86_PAT int __read_mostly pat_enabled = 1; @@ -53,19 +55,15 @@ static inline void pat_disable(const char *reason) #endif -static int debug_enable; +int pat_debug_enable; static int __init pat_debug_setup(char *str) { - debug_enable = 1; + pat_debug_enable = 1; return 0; } __setup("debugpat", pat_debug_setup); -#define dprintk(fmt, arg...) \ - do { if (debug_enable) printk(KERN_INFO fmt, ##arg); } while (0) - - static u64 __read_mostly boot_pat_state; enum { @@ -132,17 +130,6 @@ void pat_init(void) #undef PAT -static char *cattr_name(unsigned long flags) -{ - switch (flags & _PAGE_CACHE_MASK) { - case _PAGE_CACHE_UC: return "uncached"; - case _PAGE_CACHE_UC_MINUS: return "uncached-minus"; - case _PAGE_CACHE_WB: return "write-back"; - case _PAGE_CACHE_WC: return "write-combining"; - default: return "broken"; - } -} - /* * The global memtype list keeps track of memory type for specific * physical memory areas. Conflicting memory types in different @@ -159,14 +146,6 @@ static char *cattr_name(unsigned long flags) * memtype_lock protects both the linear list and rbtree. */ -struct memtype { - u64 start; - u64 end; - unsigned long type; - struct list_head nd; - struct rb_node rb; -}; - static struct rb_root memtype_rbroot = RB_ROOT; static LIST_HEAD(memtype_list); static DEFINE_SPINLOCK(memtype_lock); /* protects memtype list */ @@ -349,6 +328,64 @@ static int free_ram_pages_type(u64 start, u64 end) return 0; } +static int memtype_check_insert(struct memtype *new, unsigned long *new_type) +{ + struct memtype *entry; + u64 start, end; + unsigned long actual_type; + struct list_head *where; + int err = 0; + + start = new->start; + end = new->end; + actual_type = new->type; + + /* Search for existing mapping that overlaps the current range */ + where = NULL; + list_for_each_entry(entry, &memtype_list, nd) { + if (end <= entry->start) { + where = entry->nd.prev; + break; + } else if (start <= entry->start) { /* end > entry->start */ + err = chk_conflict(new, entry, new_type); + if (!err) { + dprintk("Overlap at 0x%Lx-0x%Lx\n", + entry->start, entry->end); + where = entry->nd.prev; + } + break; + } else if (start < entry->end) { /* start > entry->start */ + err = chk_conflict(new, entry, new_type); + if (!err) { + dprintk("Overlap at 0x%Lx-0x%Lx\n", + entry->start, entry->end); + + /* + * Move to right position in the linked + * list to add this new entry + */ + list_for_each_entry_continue(entry, + &memtype_list, nd) { + if (start <= entry->start) { + where = entry->nd.prev; + break; + } + } + } + break; + } + } + if (!err) { + if (where) + list_add(&new->nd, where); + else + list_add_tail(&new->nd, &memtype_list); + + memtype_rb_insert(&memtype_rbroot, new); + } + return err; +} + /* * req_type typically has one of the: * - _PAGE_CACHE_WB @@ -364,9 +401,8 @@ static int free_ram_pages_type(u64 start, u64 end) int reserve_memtype(u64 start, u64 end, unsigned long req_type, unsigned long *new_type) { - struct memtype *new, *entry; + struct memtype *new; unsigned long actual_type; - struct list_head *where; int is_range_ram; int err = 0; @@ -423,42 +459,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, spin_lock(&memtype_lock); - /* Search for existing mapping that overlaps the current range */ - where = NULL; - list_for_each_entry(entry, &memtype_list, nd) { - if (end <= entry->start) { - where = entry->nd.prev; - break; - } else if (start <= entry->start) { /* end > entry->start */ - err = chk_conflict(new, entry, new_type); - if (!err) { - dprintk("Overlap at 0x%Lx-0x%Lx\n", - entry->start, entry->end); - where = entry->nd.prev; - } - break; - } else if (start < entry->end) { /* start > entry->start */ - err = chk_conflict(new, entry, new_type); - if (!err) { - dprintk("Overlap at 0x%Lx-0x%Lx\n", - entry->start, entry->end); - - /* - * Move to right position in the linked - * list to add this new entry - */ - list_for_each_entry_continue(entry, - &memtype_list, nd) { - if (start <= entry->start) { - where = entry->nd.prev; - break; - } - } - } - break; - } - } - + err = memtype_check_insert(new, new_type); if (err) { printk(KERN_INFO "reserve_memtype failed 0x%Lx-0x%Lx, " "track %s, req %s\n", @@ -469,13 +470,6 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, return err; } - if (where) - list_add(&new->nd, where); - else - list_add_tail(&new->nd, &memtype_list); - - memtype_rb_insert(&memtype_rbroot, new); - spin_unlock(&memtype_lock); dprintk("reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n", @@ -937,28 +931,40 @@ EXPORT_SYMBOL_GPL(pgprot_writecombine); #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT) /* get Nth element of the linked list */ -static struct memtype *memtype_get_idx(loff_t pos) +static int copy_memtype_nth_element(struct memtype *out, loff_t pos) { - struct memtype *list_node, *print_entry; + struct memtype *list_node; int i = 1; - print_entry = kmalloc(sizeof(struct memtype), GFP_KERNEL); - if (!print_entry) - return NULL; - - spin_lock(&memtype_lock); list_for_each_entry(list_node, &memtype_list, nd) { if (pos == i) { - *print_entry = *list_node; - spin_unlock(&memtype_lock); - return print_entry; + *out = *list_node; + return 0; } ++i; } + return 1; +} + +static struct memtype *memtype_get_idx(loff_t pos) +{ + struct memtype *print_entry; + int ret; + + print_entry = kzalloc(sizeof(struct memtype), GFP_KERNEL); + if (!print_entry) + return NULL; + + spin_lock(&memtype_lock); + ret = copy_memtype_nth_element(print_entry, pos); spin_unlock(&memtype_lock); - kfree(print_entry); - return NULL; + if (!ret) { + return print_entry; + } else { + kfree(print_entry); + return NULL; + } } static void *memtype_seq_start(struct seq_file *seq, loff_t *pos) diff --git a/arch/x86/mm/pat_internal.h b/arch/x86/mm/pat_internal.h new file mode 100644 index 00000000000..6c98780eb73 --- /dev/null +++ b/arch/x86/mm/pat_internal.h @@ -0,0 +1,28 @@ +#ifndef __PAT_INTERNAL_H_ +#define __PAT_INTERNAL_H_ + +extern int pat_debug_enable; + +#define dprintk(fmt, arg...) \ + do { if (pat_debug_enable) printk(KERN_INFO fmt, ##arg); } while (0) + +struct memtype { + u64 start; + u64 end; + unsigned long type; + struct list_head nd; + struct rb_node rb; +}; + +static inline char *cattr_name(unsigned long flags) +{ + switch (flags & _PAGE_CACHE_MASK) { + case _PAGE_CACHE_UC: return "uncached"; + case _PAGE_CACHE_UC_MINUS: return "uncached-minus"; + case _PAGE_CACHE_WB: return "write-back"; + case _PAGE_CACHE_WC: return "write-combining"; + default: return "broken"; + } +} + +#endif /* __PAT_INTERNAL_H_ */ -- cgit v1.2.3-18-g5258 From 9e41a49aab88a5a6c8f4875bf10a5543bc321f2d Mon Sep 17 00:00:00 2001 From: "Pallipadi, Venkatesh" Date: Wed, 10 Feb 2010 15:26:07 -0800 Subject: x86, pat: Migrate to rbtree only backend for pat memtype management Move pat backend to fully rbtree based implementation from the existing rbtree and linked list hybrid. New rbtree based solution uses interval trees (augmented rbtrees) in order to store the PAT ranges. The new code seprates out the pat backend to pat_rbtree.c file, making is cleaner. The change also makes the PAT lookup, reserve and free operations more optimal, as we don't have to traverse linear linked list of few tens of entries in normal case. Signed-off-by: Venkatesh Pallipadi LKML-Reference: <20100210232607.GB11465@linux-os.sc.intel.com> Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- arch/x86/mm/Makefile | 1 + arch/x86/mm/pat.c | 209 +--------------------------------- arch/x86/mm/pat_internal.h | 20 +++- arch/x86/mm/pat_rbtree.c | 271 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 296 insertions(+), 205 deletions(-) create mode 100644 arch/x86/mm/pat_rbtree.c diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 06630d26e56..a4c768397ba 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -6,6 +6,7 @@ nostackp := $(call cc-option, -fno-stack-protector) CFLAGS_physaddr.o := $(nostackp) CFLAGS_setup_nx.o := $(nostackp) +obj-$(CONFIG_X86_PAT) += pat_rbtree.o obj-$(CONFIG_SMP) += tlb.o obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 628e507b793..951011166ef 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -130,65 +130,7 @@ void pat_init(void) #undef PAT -/* - * The global memtype list keeps track of memory type for specific - * physical memory areas. Conflicting memory types in different - * mappings can cause CPU cache corruption. To avoid this we keep track. - * - * The list is sorted based on starting address and can contain multiple - * entries for each address (this allows reference counting for overlapping - * areas). All the aliases have the same cache attributes of course. - * Zero attributes are represented as holes. - * - * The data structure is a list that is also organized as an rbtree - * sorted on the start address of memtype range. - * - * memtype_lock protects both the linear list and rbtree. - */ - -static struct rb_root memtype_rbroot = RB_ROOT; -static LIST_HEAD(memtype_list); -static DEFINE_SPINLOCK(memtype_lock); /* protects memtype list */ - -static struct memtype *memtype_rb_search(struct rb_root *root, u64 start) -{ - struct rb_node *node = root->rb_node; - struct memtype *last_lower = NULL; - - while (node) { - struct memtype *data = container_of(node, struct memtype, rb); - - if (data->start < start) { - last_lower = data; - node = node->rb_right; - } else if (data->start > start) { - node = node->rb_left; - } else - return data; - } - - /* Will return NULL if there is no entry with its start <= start */ - return last_lower; -} - -static void memtype_rb_insert(struct rb_root *root, struct memtype *data) -{ - struct rb_node **new = &(root->rb_node); - struct rb_node *parent = NULL; - - while (*new) { - struct memtype *this = container_of(*new, struct memtype, rb); - - parent = *new; - if (data->start <= this->start) - new = &((*new)->rb_left); - else if (data->start > this->start) - new = &((*new)->rb_right); - } - - rb_link_node(&data->rb, parent, new); - rb_insert_color(&data->rb, root); -} +static DEFINE_SPINLOCK(memtype_lock); /* protects memtype accesses */ /* * Does intersection of PAT memory type and MTRR memory type and returns @@ -216,33 +158,6 @@ static unsigned long pat_x_mtrr_type(u64 start, u64 end, unsigned long req_type) return req_type; } -static int -chk_conflict(struct memtype *new, struct memtype *entry, unsigned long *type) -{ - if (new->type != entry->type) { - if (type) { - new->type = entry->type; - *type = entry->type; - } else - goto conflict; - } - - /* check overlaps with more than one entry in the list */ - list_for_each_entry_continue(entry, &memtype_list, nd) { - if (new->end <= entry->start) - break; - else if (new->type != entry->type) - goto conflict; - } - return 0; - - conflict: - printk(KERN_INFO "%s:%d conflicting memory types " - "%Lx-%Lx %s<->%s\n", current->comm, current->pid, new->start, - new->end, cattr_name(new->type), cattr_name(entry->type)); - return -EBUSY; -} - static int pat_pagerange_is_ram(unsigned long start, unsigned long end) { int ram_page = 0, not_rampage = 0; @@ -328,64 +243,6 @@ static int free_ram_pages_type(u64 start, u64 end) return 0; } -static int memtype_check_insert(struct memtype *new, unsigned long *new_type) -{ - struct memtype *entry; - u64 start, end; - unsigned long actual_type; - struct list_head *where; - int err = 0; - - start = new->start; - end = new->end; - actual_type = new->type; - - /* Search for existing mapping that overlaps the current range */ - where = NULL; - list_for_each_entry(entry, &memtype_list, nd) { - if (end <= entry->start) { - where = entry->nd.prev; - break; - } else if (start <= entry->start) { /* end > entry->start */ - err = chk_conflict(new, entry, new_type); - if (!err) { - dprintk("Overlap at 0x%Lx-0x%Lx\n", - entry->start, entry->end); - where = entry->nd.prev; - } - break; - } else if (start < entry->end) { /* start > entry->start */ - err = chk_conflict(new, entry, new_type); - if (!err) { - dprintk("Overlap at 0x%Lx-0x%Lx\n", - entry->start, entry->end); - - /* - * Move to right position in the linked - * list to add this new entry - */ - list_for_each_entry_continue(entry, - &memtype_list, nd) { - if (start <= entry->start) { - where = entry->nd.prev; - break; - } - } - } - break; - } - } - if (!err) { - if (where) - list_add(&new->nd, where); - else - list_add_tail(&new->nd, &memtype_list); - - memtype_rb_insert(&memtype_rbroot, new); - } - return err; -} - /* * req_type typically has one of the: * - _PAGE_CACHE_WB @@ -459,7 +316,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, spin_lock(&memtype_lock); - err = memtype_check_insert(new, new_type); + err = rbt_memtype_check_insert(new, new_type); if (err) { printk(KERN_INFO "reserve_memtype failed 0x%Lx-0x%Lx, " "track %s, req %s\n", @@ -481,7 +338,6 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, int free_memtype(u64 start, u64 end) { - struct memtype *entry, *saved_entry; int err = -EINVAL; int is_range_ram; @@ -505,46 +361,7 @@ int free_memtype(u64 start, u64 end) } spin_lock(&memtype_lock); - - entry = memtype_rb_search(&memtype_rbroot, start); - if (unlikely(entry == NULL)) - goto unlock_ret; - - /* - * Saved entry points to an entry with start same or less than what - * we searched for. Now go through the list in both directions to look - * for the entry that matches with both start and end, with list stored - * in sorted start address - */ - saved_entry = entry; - list_for_each_entry_from(entry, &memtype_list, nd) { - if (entry->start == start && entry->end == end) { - rb_erase(&entry->rb, &memtype_rbroot); - list_del(&entry->nd); - kfree(entry); - err = 0; - break; - } else if (entry->start > start) { - break; - } - } - - if (!err) - goto unlock_ret; - - entry = saved_entry; - list_for_each_entry_reverse(entry, &memtype_list, nd) { - if (entry->start == start && entry->end == end) { - rb_erase(&entry->rb, &memtype_rbroot); - list_del(&entry->nd); - kfree(entry); - err = 0; - break; - } else if (entry->start < start) { - break; - } - } -unlock_ret: + err = rbt_memtype_erase(start, end); spin_unlock(&memtype_lock); if (err) { @@ -593,7 +410,7 @@ static unsigned long lookup_memtype(u64 paddr) spin_lock(&memtype_lock); - entry = memtype_rb_search(&memtype_rbroot, paddr); + entry = rbt_memtype_lookup(paddr); if (entry != NULL) rettype = entry->type; else @@ -930,22 +747,6 @@ EXPORT_SYMBOL_GPL(pgprot_writecombine); #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT) -/* get Nth element of the linked list */ -static int copy_memtype_nth_element(struct memtype *out, loff_t pos) -{ - struct memtype *list_node; - int i = 1; - - list_for_each_entry(list_node, &memtype_list, nd) { - if (pos == i) { - *out = *list_node; - return 0; - } - ++i; - } - return 1; -} - static struct memtype *memtype_get_idx(loff_t pos) { struct memtype *print_entry; @@ -956,7 +757,7 @@ static struct memtype *memtype_get_idx(loff_t pos) return NULL; spin_lock(&memtype_lock); - ret = copy_memtype_nth_element(print_entry, pos); + ret = rbt_memtype_copy_nth_element(print_entry, pos); spin_unlock(&memtype_lock); if (!ret) { diff --git a/arch/x86/mm/pat_internal.h b/arch/x86/mm/pat_internal.h index 6c98780eb73..4f39eefa3e6 100644 --- a/arch/x86/mm/pat_internal.h +++ b/arch/x86/mm/pat_internal.h @@ -9,8 +9,8 @@ extern int pat_debug_enable; struct memtype { u64 start; u64 end; + u64 subtree_max_end; unsigned long type; - struct list_head nd; struct rb_node rb; }; @@ -25,4 +25,22 @@ static inline char *cattr_name(unsigned long flags) } } +#ifdef CONFIG_X86_PAT +extern int rbt_memtype_check_insert(struct memtype *new, + unsigned long *new_type); +extern int rbt_memtype_erase(u64 start, u64 end); +extern struct memtype *rbt_memtype_lookup(u64 addr); +extern int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos); +#else +static inline int rbt_memtype_check_insert(struct memtype *new, + unsigned long *new_type) +{ return 0; } +static inline int rbt_memtype_erase(u64 start, u64 end) +{ return 0; } +static inline struct memtype *rbt_memtype_lookup(u64 addr) +{ return NULL; } +static inline int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos) +{ return 0; } +#endif + #endif /* __PAT_INTERNAL_H_ */ diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c new file mode 100644 index 00000000000..9063f40b638 --- /dev/null +++ b/arch/x86/mm/pat_rbtree.c @@ -0,0 +1,271 @@ +/* + * Handle caching attributes in page tables (PAT) + * + * Authors: Venkatesh Pallipadi + * Suresh B Siddha + * + * Interval tree (augmented rbtree) used to store the PAT memory type + * reservations. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "pat_internal.h" + +/* + * The memtype tree keeps track of memory type for specific + * physical memory areas. Without proper tracking, conflicting memory + * types in different mappings can cause CPU cache corruption. + * + * The tree is an interval tree (augmented rbtree) with tree ordered + * on starting address. Tree can contain multiple entries for + * different regions which overlap. All the aliases have the same + * cache attributes of course. + * + * memtype_lock protects the rbtree. + */ + +static void memtype_rb_augment_cb(struct rb_node *node); +static struct rb_root memtype_rbroot = RB_AUGMENT_ROOT(&memtype_rb_augment_cb); + +static int is_node_overlap(struct memtype *node, u64 start, u64 end) +{ + if (node->start >= end || node->end <= start) + return 0; + + return 1; +} + +static u64 get_subtree_max_end(struct rb_node *node) +{ + u64 ret = 0; + if (node) { + struct memtype *data = container_of(node, struct memtype, rb); + ret = data->subtree_max_end; + } + return ret; +} + +/* Update 'subtree_max_end' for a node, based on node and its children */ +static void update_node_max_end(struct rb_node *node) +{ + struct memtype *data; + u64 max_end, child_max_end; + + if (!node) + return; + + data = container_of(node, struct memtype, rb); + max_end = data->end; + + child_max_end = get_subtree_max_end(node->rb_right); + if (child_max_end > max_end) + max_end = child_max_end; + + child_max_end = get_subtree_max_end(node->rb_left); + if (child_max_end > max_end) + max_end = child_max_end; + + data->subtree_max_end = max_end; +} + +/* Update 'subtree_max_end' for a node and all its ancestors */ +static void update_path_max_end(struct rb_node *node) +{ + u64 old_max_end, new_max_end; + + while (node) { + struct memtype *data = container_of(node, struct memtype, rb); + + old_max_end = data->subtree_max_end; + update_node_max_end(node); + new_max_end = data->subtree_max_end; + + if (new_max_end == old_max_end) + break; + + node = rb_parent(node); + } +} + +/* Find the first (lowest start addr) overlapping range from rb tree */ +static struct memtype *memtype_rb_lowest_match(struct rb_root *root, + u64 start, u64 end) +{ + struct rb_node *node = root->rb_node; + struct memtype *last_lower = NULL; + + while (node) { + struct memtype *data = container_of(node, struct memtype, rb); + + if (get_subtree_max_end(node->rb_left) > start) { + /* Lowest overlap if any must be on left side */ + node = node->rb_left; + } else if (is_node_overlap(data, start, end)) { + last_lower = data; + break; + } else if (start >= data->start) { + /* Lowest overlap if any must be on right side */ + node = node->rb_right; + } else { + break; + } + } + return last_lower; /* Returns NULL if there is no overlap */ +} + +static struct memtype *memtype_rb_exact_match(struct rb_root *root, + u64 start, u64 end) +{ + struct memtype *match; + + match = memtype_rb_lowest_match(root, start, end); + while (match != NULL && match->start < end) { + struct rb_node *node; + + if (match->start == start && match->end == end) + return match; + + node = rb_next(&match->rb); + if (node) + match = container_of(node, struct memtype, rb); + else + match = NULL; + } + + return NULL; /* Returns NULL if there is no exact match */ +} + +static int memtype_rb_check_conflict(struct rb_root *root, + u64 start, u64 end, + unsigned long reqtype, unsigned long *newtype) +{ + struct rb_node *node; + struct memtype *match; + int found_type = reqtype; + + match = memtype_rb_lowest_match(&memtype_rbroot, start, end); + if (match == NULL) + goto success; + + if (match->type != found_type && newtype == NULL) + goto failure; + + dprintk("Overlap at 0x%Lx-0x%Lx\n", match->start, match->end); + found_type = match->type; + + node = rb_next(&match->rb); + while (node) { + match = container_of(node, struct memtype, rb); + + if (match->start >= end) /* Checked all possible matches */ + goto success; + + if (is_node_overlap(match, start, end) && + match->type != found_type) { + goto failure; + } + + node = rb_next(&match->rb); + } +success: + if (newtype) + *newtype = found_type; + + return 0; + +failure: + printk(KERN_INFO "%s:%d conflicting memory types " + "%Lx-%Lx %s<->%s\n", current->comm, current->pid, start, + end, cattr_name(found_type), cattr_name(match->type)); + return -EBUSY; +} + +static void memtype_rb_augment_cb(struct rb_node *node) +{ + if (node) + update_path_max_end(node); +} + +static void memtype_rb_insert(struct rb_root *root, struct memtype *newdata) +{ + struct rb_node **node = &(root->rb_node); + struct rb_node *parent = NULL; + + while (*node) { + struct memtype *data = container_of(*node, struct memtype, rb); + + parent = *node; + if (newdata->start <= data->start) + node = &((*node)->rb_left); + else if (newdata->start > data->start) + node = &((*node)->rb_right); + } + + rb_link_node(&newdata->rb, parent, node); + rb_insert_color(&newdata->rb, root); +} + +int rbt_memtype_check_insert(struct memtype *new, unsigned long *ret_type) +{ + int err = 0; + + err = memtype_rb_check_conflict(&memtype_rbroot, new->start, new->end, + new->type, ret_type); + + if (!err) { + new->type = *ret_type; + memtype_rb_insert(&memtype_rbroot, new); + } + return err; +} + +int rbt_memtype_erase(u64 start, u64 end) +{ + struct memtype *data; + + data = memtype_rb_exact_match(&memtype_rbroot, start, end); + if (!data) + return -EINVAL; + + rb_erase(&data->rb, &memtype_rbroot); + return 0; +} + +struct memtype *rbt_memtype_lookup(u64 addr) +{ + struct memtype *data; + data = memtype_rb_lowest_match(&memtype_rbroot, addr, addr + PAGE_SIZE); + return data; +} + +#if defined(CONFIG_DEBUG_FS) +int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos) +{ + struct rb_node *node; + int i = 1; + + node = rb_first(&memtype_rbroot); + while (node && pos != i) { + node = rb_next(node); + i++; + } + + if (node) { /* pos == i */ + struct memtype *this = container_of(node, struct memtype, rb); + *out = *this; + return 0; + } else { + return 1; + } +} +#endif -- cgit v1.2.3-18-g5258 From b3ac891b67bd4b1fc728d1c784cad1212dea433d Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Wed, 24 Feb 2010 10:54:22 +0100 Subject: x86: Add support for lock prefix in alternatives The current lock prefix UP/SMP alternative code doesn't allow LOCK_PREFIX to be used in alternatives code. This patch solves the problem by adding a new LOCK_PREFIX_ALTERNATIVE_PATCH macro that only records the lock prefix location but does not emit the prefix. The user of this macro can then start any alternative sequence with "lock" and have it UP/SMP patched. To make this work, the UP/SMP alternative code is changed to do the lock/DS prefix switching only if the byte actually contains a lock or DS prefix. Thus, if an alternative without the "lock" is selected, it will now do nothing instead of clobbering the code. Changes in v2: - Naming change - Change label to not conflict with alternatives Signed-off-by: Luca Barbieri LKML-Reference: <1267005265-27958-2-git-send-email-luca@luca-barbieri.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/alternative.h | 8 +++++--- arch/x86/kernel/alternative.c | 6 ++++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 3b5b828767b..55fee12cea6 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -28,12 +28,14 @@ */ #ifdef CONFIG_SMP -#define LOCK_PREFIX \ +#define LOCK_PREFIX_HERE \ ".section .smp_locks,\"a\"\n" \ _ASM_ALIGN "\n" \ - _ASM_PTR "661f\n" /* address */ \ + _ASM_PTR "671f\n" /* address */ \ ".previous\n" \ - "661:\n\tlock; " + "671:" + +#define LOCK_PREFIX LOCK_PREFIX_HERE "\n\tlock; " #else /* ! CONFIG_SMP */ #define LOCK_PREFIX "" diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 2589ea4c60c..80b222ea4cf 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -244,7 +244,8 @@ static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end) if (*ptr > text_end) continue; /* turn DS segment override prefix into lock prefix */ - text_poke(*ptr, ((unsigned char []){0xf0}), 1); + if (**ptr == 0x3e) + text_poke(*ptr, ((unsigned char []){0xf0}), 1); }; mutex_unlock(&text_mutex); } @@ -263,7 +264,8 @@ static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end if (*ptr > text_end) continue; /* turn lock prefix into DS segment override prefix */ - text_poke(*ptr, ((unsigned char []){0x3E}), 1); + if (**ptr == 0xf0) + text_poke(*ptr, ((unsigned char []){0x3E}), 1); }; mutex_unlock(&text_mutex); } -- cgit v1.2.3-18-g5258 From 9c76b38476b18c45f97098a10b0176b321eba3ea Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Wed, 24 Feb 2010 10:54:23 +0100 Subject: x86-32: Allow UP/SMP lock replacement in cmpxchg64 Use the functionality just introduced in the previous patch: mark the lock prefixes in cmpxchg64 alternatives for UP removal. Changes in v2: - Naming change Signed-off-by: Luca Barbieri LKML-Reference: <1267005265-27958-3-git-send-email-luca@luca-barbieri.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/cmpxchg_32.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index ffb9bb6b6c3..8859e12dd3c 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h @@ -271,7 +271,8 @@ extern unsigned long long cmpxchg_486_u64(volatile void *, u64, u64); __typeof__(*(ptr)) __ret; \ __typeof__(*(ptr)) __old = (o); \ __typeof__(*(ptr)) __new = (n); \ - alternative_io("call cmpxchg8b_emu", \ + alternative_io(LOCK_PREFIX_HERE \ + "call cmpxchg8b_emu", \ "lock; cmpxchg8b (%%esi)" , \ X86_FEATURE_CX8, \ "=A" (__ret), \ -- cgit v1.2.3-18-g5258 From 86a8938078a8bb518c5376de493e348c7490d506 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Wed, 24 Feb 2010 10:54:24 +0100 Subject: lib: Add self-test for atomic64_t This patch adds self-test on boot code for atomic64_t. This has been used to test the later changes in this patchset. Signed-off-by: Luca Barbieri LKML-Reference: <1267005265-27958-4-git-send-email-luca@luca-barbieri.com> Signed-off-by: H. Peter Anvin --- lib/Kconfig.debug | 7 +++ lib/Makefile | 2 + lib/atomic64_test.c | 158 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 167 insertions(+) create mode 100644 lib/atomic64_test.c diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 25c3ed594c5..3676c517a07 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1054,6 +1054,13 @@ config DMA_API_DEBUG This option causes a performance degredation. Use only if you want to debug device drivers. If unsure, say N. +config ATOMIC64_SELFTEST + bool "Perform an atomic64_t self-test at boot" + help + Enable this option to test the atomic64_t functions at boot. + + If unsure, say N. + source "samples/Kconfig" source "lib/Kconfig.kgdb" diff --git a/lib/Makefile b/lib/Makefile index 347ad8db29d..4af4786fe28 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -99,6 +99,8 @@ obj-$(CONFIG_GENERIC_CSUM) += checksum.o obj-$(CONFIG_GENERIC_ATOMIC64) += atomic64.o +obj-$(CONFIG_ATOMIC64_SELFTEST) += atomic64_test.o + hostprogs-y := gen_crc32table clean-files := crc32table.h diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c new file mode 100644 index 00000000000..4ff649e46ba --- /dev/null +++ b/lib/atomic64_test.c @@ -0,0 +1,158 @@ +/* + * Testsuite for atomic64_t functions + * + * Copyright © 2010 Luca Barbieri + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ +#include +#include + +#define INIT(c) do { atomic64_set(&v, c); r = c; } while (0) +static __init int test_atomic64(void) +{ + long long v0 = 0xaaa31337c001d00dLL; + long long v1 = 0xdeadbeefdeafcafeLL; + long long v2 = 0xfaceabadf00df001LL; + long long onestwos = 0x1111111122222222LL; + long long one = 1LL; + + atomic64_t v = ATOMIC64_INIT(v0); + long long r = v0; + BUG_ON(v.counter != r); + + atomic64_set(&v, v1); + r = v1; + BUG_ON(v.counter != r); + BUG_ON(atomic64_read(&v) != r); + + INIT(v0); + atomic64_add(onestwos, &v); + r += onestwos; + BUG_ON(v.counter != r); + + INIT(v0); + atomic64_add(-one, &v); + r += -one; + BUG_ON(v.counter != r); + + INIT(v0); + r += onestwos; + BUG_ON(atomic64_add_return(onestwos, &v) != r); + BUG_ON(v.counter != r); + + INIT(v0); + r += -one; + BUG_ON(atomic64_add_return(-one, &v) != r); + BUG_ON(v.counter != r); + + INIT(v0); + atomic64_sub(onestwos, &v); + r -= onestwos; + BUG_ON(v.counter != r); + + INIT(v0); + atomic64_sub(-one, &v); + r -= -one; + BUG_ON(v.counter != r); + + INIT(v0); + r -= onestwos; + BUG_ON(atomic64_sub_return(onestwos, &v) != r); + BUG_ON(v.counter != r); + + INIT(v0); + r -= -one; + BUG_ON(atomic64_sub_return(-one, &v) != r); + BUG_ON(v.counter != r); + + INIT(v0); + atomic64_inc(&v); + r += one; + BUG_ON(v.counter != r); + + INIT(v0); + r += one; + BUG_ON(atomic64_inc_return(&v) != r); + BUG_ON(v.counter != r); + + INIT(v0); + atomic64_dec(&v); + r -= one; + BUG_ON(v.counter != r); + + INIT(v0); + r -= one; + BUG_ON(atomic64_dec_return(&v) != r); + BUG_ON(v.counter != r); + + INIT(v0); + BUG_ON(atomic64_xchg(&v, v1) != v0); + r = v1; + BUG_ON(v.counter != r); + + INIT(v0); + BUG_ON(atomic64_cmpxchg(&v, v0, v1) != v0); + r = v1; + BUG_ON(v.counter != r); + + INIT(v0); + BUG_ON(atomic64_cmpxchg(&v, v2, v1) != v0); + BUG_ON(v.counter != r); + + INIT(v0); + BUG_ON(!atomic64_add_unless(&v, one, v0)); + BUG_ON(v.counter != r); + + INIT(v0); + BUG_ON(atomic64_add_unless(&v, one, v1)); + r += one; + BUG_ON(v.counter != r); + + INIT(onestwos); + BUG_ON(atomic64_dec_if_positive(&v) != (onestwos - 1)); + r -= one; + BUG_ON(v.counter != r); + + INIT(0); + BUG_ON(atomic64_dec_if_positive(&v) != -one); + BUG_ON(v.counter != r); + + INIT(-one); + BUG_ON(atomic64_dec_if_positive(&v) != (-one - one)); + BUG_ON(v.counter != r); + + INIT(onestwos); + BUG_ON(atomic64_inc_not_zero(&v)); + r += one; + BUG_ON(v.counter != r); + + INIT(0); + BUG_ON(!atomic64_inc_not_zero(&v)); + BUG_ON(v.counter != r); + + INIT(-one); + BUG_ON(atomic64_inc_not_zero(&v)); + r += one; + BUG_ON(v.counter != r); + +#ifdef CONFIG_X86 + printk(KERN_INFO "atomic64 test passed for %s+ platform %s CX8 and %s SSE\n", +#ifdef CONFIG_X86_CMPXCHG64 + "586", +#else + "386", +#endif + boot_cpu_has(X86_FEATURE_CX8) ? "with" : "without", + boot_cpu_has(X86_FEATURE_XMM) ? "with" : "without"); +#else + printk(KERN_INFO "atomic64 test passed\n"); +#endif + + return 0; +} + +core_initcall(test_atomic64); -- cgit v1.2.3-18-g5258 From a7e926abc3adfbd2e5e20d2b46177adb4e313915 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Wed, 24 Feb 2010 10:54:25 +0100 Subject: x86-32: Rewrite 32-bit atomic64 functions in assembly This patch replaces atomic64_32.c with two assembly implementations, one for 386/486 machines using pushf/cli/popf and one for 586+ machines using cmpxchg8b. The cmpxchg8b implementation provides the following advantages over the current one: 1. Implements atomic64_add_unless, atomic64_dec_if_positive and atomic64_inc_not_zero 2. Uses the ZF flag changed by cmpxchg8b instead of doing a comparison 3. Uses custom register calling conventions that reduce or eliminate register moves to suit cmpxchg8b 4. Reads the initial value instead of using cmpxchg8b to do that. Currently we use lock xaddl and movl, which seems the fastest. 5. Does not use the lock prefix for atomic64_set 64-bit writes are already atomic, so we don't need that. We still need it for atomic64_read to avoid restoring a value changed in the meantime. 6. Allocates registers as well or better than gcc The 386 implementation provides support for 386 and 486 machines. 386/486 SMP is not supported (we dropped it), but such support can be added easily if desired. A pure assembly implementation is required due to the custom calling conventions, and desire to use %ebp in atomic64_add_return (we need 7 registers...), as well as the ability to use pushf/popf in the 386 code without an intermediate pop/push. The parameter names are changed to match the convention in atomic_64.h Changes in v3 (due to rebasing to tip/x86/asm): - Patches atomic64_32.h instead of atomic_32.h - Uses the CALL alternative mechanism from commit 1b1d9258181bae199dc940f4bd0298126b9a73d9 Changes in v2: - Merged 386 and cx8 support in the same patch - 386 support now done in assembly, C code no longer used at all - cmpxchg64 is used for atomic64_cmpxchg - stop using macros, use one-line inline functions instead - miscellanous changes and improvements Signed-off-by: Luca Barbieri LKML-Reference: <1267005265-27958-5-git-send-email-luca@luca-barbieri.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/atomic64_32.h | 278 ++++++++++++++++++++++++++++--------- arch/x86/lib/Makefile | 3 +- arch/x86/lib/atomic64_32.c | 273 +++++++----------------------------- arch/x86/lib/atomic64_386_32.S | 175 +++++++++++++++++++++++ arch/x86/lib/atomic64_cx8_32.S | 225 ++++++++++++++++++++++++++++++ 5 files changed, 664 insertions(+), 290 deletions(-) create mode 100644 arch/x86/lib/atomic64_386_32.S create mode 100644 arch/x86/lib/atomic64_cx8_32.S diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h index 03027bf28de..2a934aa19a4 100644 --- a/arch/x86/include/asm/atomic64_32.h +++ b/arch/x86/include/asm/atomic64_32.h @@ -14,109 +14,193 @@ typedef struct { #define ATOMIC64_INIT(val) { (val) } -extern u64 atomic64_cmpxchg(atomic64_t *ptr, u64 old_val, u64 new_val); +#ifdef CONFIG_X86_CMPXCHG64 +#define ATOMIC64_ALTERNATIVE_(f, g) "call atomic64_" #g "_cx8" +#else +#define ATOMIC64_ALTERNATIVE_(f, g) ALTERNATIVE("call atomic64_" #f "_386", "call atomic64_" #g "_cx8", X86_FEATURE_CX8) +#endif + +#define ATOMIC64_ALTERNATIVE(f) ATOMIC64_ALTERNATIVE_(f, f) + +/** + * atomic64_cmpxchg - cmpxchg atomic64 variable + * @p: pointer to type atomic64_t + * @o: expected value + * @n: new value + * + * Atomically sets @v to @n if it was equal to @o and returns + * the old value. + */ + +static inline long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n) +{ + return cmpxchg64(&v->counter, o, n); +} /** * atomic64_xchg - xchg atomic64 variable - * @ptr: pointer to type atomic64_t - * @new_val: value to assign + * @v: pointer to type atomic64_t + * @n: value to assign * - * Atomically xchgs the value of @ptr to @new_val and returns + * Atomically xchgs the value of @v to @n and returns * the old value. */ -extern u64 atomic64_xchg(atomic64_t *ptr, u64 new_val); +static inline long long atomic64_xchg(atomic64_t *v, long long n) +{ + long long o; + unsigned high = (unsigned)(n >> 32); + unsigned low = (unsigned)n; + asm volatile(ATOMIC64_ALTERNATIVE(xchg) + : "=A" (o), "+b" (low), "+c" (high) + : "S" (v) + : "memory" + ); + return o; +} /** * atomic64_set - set atomic64 variable - * @ptr: pointer to type atomic64_t - * @new_val: value to assign + * @v: pointer to type atomic64_t + * @n: value to assign * - * Atomically sets the value of @ptr to @new_val. + * Atomically sets the value of @v to @n. */ -extern void atomic64_set(atomic64_t *ptr, u64 new_val); +static inline void atomic64_set(atomic64_t *v, long long i) +{ + unsigned high = (unsigned)(i >> 32); + unsigned low = (unsigned)i; + asm volatile(ATOMIC64_ALTERNATIVE(set) + : "+b" (low), "+c" (high) + : "S" (v) + : "eax", "edx", "memory" + ); +} /** * atomic64_read - read atomic64 variable - * @ptr: pointer to type atomic64_t + * @v: pointer to type atomic64_t * - * Atomically reads the value of @ptr and returns it. + * Atomically reads the value of @v and returns it. */ -static inline u64 atomic64_read(atomic64_t *ptr) +static inline long long atomic64_read(atomic64_t *v) { - u64 res; - - /* - * Note, we inline this atomic64_t primitive because - * it only clobbers EAX/EDX and leaves the others - * untouched. We also (somewhat subtly) rely on the - * fact that cmpxchg8b returns the current 64-bit value - * of the memory location we are touching: - */ - asm volatile( - "mov %%ebx, %%eax\n\t" - "mov %%ecx, %%edx\n\t" - LOCK_PREFIX "cmpxchg8b %1\n" - : "=&A" (res) - : "m" (*ptr) - ); - - return res; -} - -extern u64 atomic64_read(atomic64_t *ptr); + long long r; + asm volatile(ATOMIC64_ALTERNATIVE(read) + : "=A" (r), "+c" (v) + : : "memory" + ); + return r; + } /** * atomic64_add_return - add and return - * @delta: integer value to add - * @ptr: pointer to type atomic64_t + * @i: integer value to add + * @v: pointer to type atomic64_t * - * Atomically adds @delta to @ptr and returns @delta + *@ptr + * Atomically adds @i to @v and returns @i + *@v */ -extern u64 atomic64_add_return(u64 delta, atomic64_t *ptr); +static inline long long atomic64_add_return(long long i, atomic64_t *v) +{ + asm volatile(ATOMIC64_ALTERNATIVE(add_return) + : "+A" (i), "+c" (v) + : : "memory" + ); + return i; +} /* * Other variants with different arithmetic operators: */ -extern u64 atomic64_sub_return(u64 delta, atomic64_t *ptr); -extern u64 atomic64_inc_return(atomic64_t *ptr); -extern u64 atomic64_dec_return(atomic64_t *ptr); +static inline long long atomic64_sub_return(long long i, atomic64_t *v) +{ + asm volatile(ATOMIC64_ALTERNATIVE(sub_return) + : "+A" (i), "+c" (v) + : : "memory" + ); + return i; +} + +static inline long long atomic64_inc_return(atomic64_t *v) +{ + long long a; + asm volatile(ATOMIC64_ALTERNATIVE(inc_return) + : "=A" (a) + : "S" (v) + : "memory", "ecx" + ); + return a; +} + +static inline long long atomic64_dec_return(atomic64_t *v) +{ + long long a; + asm volatile(ATOMIC64_ALTERNATIVE(dec_return) + : "=A" (a) + : "S" (v) + : "memory", "ecx" + ); + return a; +} /** * atomic64_add - add integer to atomic64 variable - * @delta: integer value to add - * @ptr: pointer to type atomic64_t + * @i: integer value to add + * @v: pointer to type atomic64_t * - * Atomically adds @delta to @ptr. + * Atomically adds @i to @v. */ -extern void atomic64_add(u64 delta, atomic64_t *ptr); +static inline long long atomic64_add(long long i, atomic64_t *v) +{ + asm volatile(ATOMIC64_ALTERNATIVE_(add, add_return) + : "+A" (i), "+c" (v) + : : "memory" + ); + return i; +} /** * atomic64_sub - subtract the atomic64 variable - * @delta: integer value to subtract - * @ptr: pointer to type atomic64_t + * @i: integer value to subtract + * @v: pointer to type atomic64_t * - * Atomically subtracts @delta from @ptr. + * Atomically subtracts @i from @v. */ -extern void atomic64_sub(u64 delta, atomic64_t *ptr); +static inline long long atomic64_sub(long long i, atomic64_t *v) +{ + asm volatile(ATOMIC64_ALTERNATIVE_(sub, sub_return) + : "+A" (i), "+c" (v) + : : "memory" + ); + return i; +} /** * atomic64_sub_and_test - subtract value from variable and test result - * @delta: integer value to subtract - * @ptr: pointer to type atomic64_t - * - * Atomically subtracts @delta from @ptr and returns + * @i: integer value to subtract + * @v: pointer to type atomic64_t + * + * Atomically subtracts @i from @v and returns * true if the result is zero, or false for all * other cases. */ -extern int atomic64_sub_and_test(u64 delta, atomic64_t *ptr); +static inline int atomic64_sub_and_test(long long i, atomic64_t *v) +{ + return atomic64_sub_return(i, v) == 0; +} /** * atomic64_inc - increment atomic64 variable - * @ptr: pointer to type atomic64_t + * @v: pointer to type atomic64_t * - * Atomically increments @ptr by 1. + * Atomically increments @v by 1. */ -extern void atomic64_inc(atomic64_t *ptr); +static inline void atomic64_inc(atomic64_t *v) +{ + asm volatile(ATOMIC64_ALTERNATIVE_(inc, inc_return) + : : "S" (v) + : "memory", "eax", "ecx", "edx" + ); +} /** * atomic64_dec - decrement atomic64 variable @@ -124,37 +208,97 @@ extern void atomic64_inc(atomic64_t *ptr); * * Atomically decrements @ptr by 1. */ -extern void atomic64_dec(atomic64_t *ptr); +static inline void atomic64_dec(atomic64_t *v) +{ + asm volatile(ATOMIC64_ALTERNATIVE_(dec, dec_return) + : : "S" (v) + : "memory", "eax", "ecx", "edx" + ); +} /** * atomic64_dec_and_test - decrement and test - * @ptr: pointer to type atomic64_t + * @v: pointer to type atomic64_t * - * Atomically decrements @ptr by 1 and + * Atomically decrements @v by 1 and * returns true if the result is 0, or false for all other * cases. */ -extern int atomic64_dec_and_test(atomic64_t *ptr); +static inline int atomic64_dec_and_test(atomic64_t *v) +{ + return atomic64_dec_return(v) == 0; +} /** * atomic64_inc_and_test - increment and test - * @ptr: pointer to type atomic64_t + * @v: pointer to type atomic64_t * - * Atomically increments @ptr by 1 + * Atomically increments @v by 1 * and returns true if the result is zero, or false for all * other cases. */ -extern int atomic64_inc_and_test(atomic64_t *ptr); +static inline int atomic64_inc_and_test(atomic64_t *v) +{ + return atomic64_inc_return(v) == 0; +} /** * atomic64_add_negative - add and test if negative - * @delta: integer value to add - * @ptr: pointer to type atomic64_t + * @i: integer value to add + * @v: pointer to type atomic64_t * - * Atomically adds @delta to @ptr and returns true + * Atomically adds @i to @v and returns true * if the result is negative, or false when * result is greater than or equal to zero. */ -extern int atomic64_add_negative(u64 delta, atomic64_t *ptr); +static inline int atomic64_add_negative(long long i, atomic64_t *v) +{ + return atomic64_add_return(i, v) < 0; +} + +/** + * atomic64_add_unless - add unless the number is a given value + * @v: pointer of type atomic64_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, so long as it was not @u. + * Returns non-zero if @v was not @u, and zero otherwise. + */ +static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u) +{ + unsigned low = (unsigned)u; + unsigned high = (unsigned)(u >> 32); + asm volatile(ATOMIC64_ALTERNATIVE(add_unless) "\n\t" + : "+A" (a), "+c" (v), "+S" (low), "+D" (high) + : : "memory"); + return (int)a; +} + + +static inline int atomic64_inc_not_zero(atomic64_t *v) +{ + int r; + asm volatile(ATOMIC64_ALTERNATIVE(inc_not_zero) + : "=a" (r) + : "S" (v) + : "ecx", "edx", "memory" + ); + return r; +} + +static inline long long atomic64_dec_if_positive(atomic64_t *v) +{ + long long r; + asm volatile(ATOMIC64_ALTERNATIVE(dec_if_positive) + : "=A" (r) + : "S" (v) + : "ecx", "memory" + ); + return r; +} + +#undef ATOMIC64_ALTERNATIVE +#undef ATOMIC64_ALTERNATIVE_ #endif /* _ASM_X86_ATOMIC64_32_H */ diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index cffd754f303..05d686bbbe9 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -26,11 +26,12 @@ obj-y += msr.o msr-reg.o msr-reg-export.o ifeq ($(CONFIG_X86_32),y) obj-y += atomic64_32.o + lib-y += atomic64_cx8_32.o lib-y += checksum_32.o lib-y += strstr_32.o lib-y += semaphore_32.o string_32.o ifneq ($(CONFIG_X86_CMPXCHG64),y) - lib-y += cmpxchg8b_emu.o + lib-y += cmpxchg8b_emu.o atomic64_386_32.o endif lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o else diff --git a/arch/x86/lib/atomic64_32.c b/arch/x86/lib/atomic64_32.c index 824fa0be55a..540179e8e9f 100644 --- a/arch/x86/lib/atomic64_32.c +++ b/arch/x86/lib/atomic64_32.c @@ -6,225 +6,54 @@ #include #include -static noinline u64 cmpxchg8b(u64 *ptr, u64 old, u64 new) -{ - u32 low = new; - u32 high = new >> 32; - - asm volatile( - LOCK_PREFIX "cmpxchg8b %1\n" - : "+A" (old), "+m" (*ptr) - : "b" (low), "c" (high) - ); - return old; -} - -u64 atomic64_cmpxchg(atomic64_t *ptr, u64 old_val, u64 new_val) -{ - return cmpxchg8b(&ptr->counter, old_val, new_val); -} -EXPORT_SYMBOL(atomic64_cmpxchg); - -/** - * atomic64_xchg - xchg atomic64 variable - * @ptr: pointer to type atomic64_t - * @new_val: value to assign - * - * Atomically xchgs the value of @ptr to @new_val and returns - * the old value. - */ -u64 atomic64_xchg(atomic64_t *ptr, u64 new_val) -{ - /* - * Try first with a (possibly incorrect) assumption about - * what we have there. We'll do two loops most likely, - * but we'll get an ownership MESI transaction straight away - * instead of a read transaction followed by a - * flush-for-ownership transaction: - */ - u64 old_val, real_val = 0; - - do { - old_val = real_val; - - real_val = atomic64_cmpxchg(ptr, old_val, new_val); - - } while (real_val != old_val); - - return old_val; -} -EXPORT_SYMBOL(atomic64_xchg); - -/** - * atomic64_set - set atomic64 variable - * @ptr: pointer to type atomic64_t - * @new_val: value to assign - * - * Atomically sets the value of @ptr to @new_val. - */ -void atomic64_set(atomic64_t *ptr, u64 new_val) -{ - atomic64_xchg(ptr, new_val); -} -EXPORT_SYMBOL(atomic64_set); - -/** -EXPORT_SYMBOL(atomic64_read); - * atomic64_add_return - add and return - * @delta: integer value to add - * @ptr: pointer to type atomic64_t - * - * Atomically adds @delta to @ptr and returns @delta + *@ptr - */ -noinline u64 atomic64_add_return(u64 delta, atomic64_t *ptr) -{ - /* - * Try first with a (possibly incorrect) assumption about - * what we have there. We'll do two loops most likely, - * but we'll get an ownership MESI transaction straight away - * instead of a read transaction followed by a - * flush-for-ownership transaction: - */ - u64 old_val, new_val, real_val = 0; - - do { - old_val = real_val; - new_val = old_val + delta; - - real_val = atomic64_cmpxchg(ptr, old_val, new_val); - - } while (real_val != old_val); - - return new_val; -} -EXPORT_SYMBOL(atomic64_add_return); - -u64 atomic64_sub_return(u64 delta, atomic64_t *ptr) -{ - return atomic64_add_return(-delta, ptr); -} -EXPORT_SYMBOL(atomic64_sub_return); - -u64 atomic64_inc_return(atomic64_t *ptr) -{ - return atomic64_add_return(1, ptr); -} -EXPORT_SYMBOL(atomic64_inc_return); - -u64 atomic64_dec_return(atomic64_t *ptr) -{ - return atomic64_sub_return(1, ptr); -} -EXPORT_SYMBOL(atomic64_dec_return); - -/** - * atomic64_add - add integer to atomic64 variable - * @delta: integer value to add - * @ptr: pointer to type atomic64_t - * - * Atomically adds @delta to @ptr. - */ -void atomic64_add(u64 delta, atomic64_t *ptr) -{ - atomic64_add_return(delta, ptr); -} -EXPORT_SYMBOL(atomic64_add); - -/** - * atomic64_sub - subtract the atomic64 variable - * @delta: integer value to subtract - * @ptr: pointer to type atomic64_t - * - * Atomically subtracts @delta from @ptr. - */ -void atomic64_sub(u64 delta, atomic64_t *ptr) -{ - atomic64_add(-delta, ptr); -} -EXPORT_SYMBOL(atomic64_sub); - -/** - * atomic64_sub_and_test - subtract value from variable and test result - * @delta: integer value to subtract - * @ptr: pointer to type atomic64_t - * - * Atomically subtracts @delta from @ptr and returns - * true if the result is zero, or false for all - * other cases. - */ -int atomic64_sub_and_test(u64 delta, atomic64_t *ptr) -{ - u64 new_val = atomic64_sub_return(delta, ptr); - - return new_val == 0; -} -EXPORT_SYMBOL(atomic64_sub_and_test); - -/** - * atomic64_inc - increment atomic64 variable - * @ptr: pointer to type atomic64_t - * - * Atomically increments @ptr by 1. - */ -void atomic64_inc(atomic64_t *ptr) -{ - atomic64_add(1, ptr); -} -EXPORT_SYMBOL(atomic64_inc); - -/** - * atomic64_dec - decrement atomic64 variable - * @ptr: pointer to type atomic64_t - * - * Atomically decrements @ptr by 1. - */ -void atomic64_dec(atomic64_t *ptr) -{ - atomic64_sub(1, ptr); -} -EXPORT_SYMBOL(atomic64_dec); - -/** - * atomic64_dec_and_test - decrement and test - * @ptr: pointer to type atomic64_t - * - * Atomically decrements @ptr by 1 and - * returns true if the result is 0, or false for all other - * cases. - */ -int atomic64_dec_and_test(atomic64_t *ptr) -{ - return atomic64_sub_and_test(1, ptr); -} -EXPORT_SYMBOL(atomic64_dec_and_test); - -/** - * atomic64_inc_and_test - increment and test - * @ptr: pointer to type atomic64_t - * - * Atomically increments @ptr by 1 - * and returns true if the result is zero, or false for all - * other cases. - */ -int atomic64_inc_and_test(atomic64_t *ptr) -{ - return atomic64_sub_and_test(-1, ptr); -} -EXPORT_SYMBOL(atomic64_inc_and_test); - -/** - * atomic64_add_negative - add and test if negative - * @delta: integer value to add - * @ptr: pointer to type atomic64_t - * - * Atomically adds @delta to @ptr and returns true - * if the result is negative, or false when - * result is greater than or equal to zero. - */ -int atomic64_add_negative(u64 delta, atomic64_t *ptr) -{ - s64 new_val = atomic64_add_return(delta, ptr); - - return new_val < 0; -} -EXPORT_SYMBOL(atomic64_add_negative); +long long atomic64_read_cx8(long long, const atomic64_t *v); +EXPORT_SYMBOL(atomic64_read_cx8); +long long atomic64_set_cx8(long long, const atomic64_t *v); +EXPORT_SYMBOL(atomic64_set_cx8); +l