diff options
author | Mel Gorman <mgorman@suse.de> | 2012-10-08 16:29:12 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-09 16:22:21 +0900 |
commit | 1fb3f8ca0e9222535a39b884cb67a34628411b9f (patch) | |
tree | 5e72498f44d4fcae18f29eb87dd05a849a7a23c9 /mm/compaction.c | |
parent | 83fde0f22872aa8c1d46f775cc7bdfa864499e65 (diff) |
mm: compaction: capture a suitable high-order page immediately when it is made available
While compaction is migrating pages to free up large contiguous blocks
for allocation it races with other allocation requests that may steal
these blocks or break them up. This patch alters direct compaction to
capture a suitable free page as soon as it becomes available to reduce
this race. It uses similar logic to split_free_page() to ensure that
watermarks are still obeyed.
Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Reviewed-by: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/compaction.c')
-rw-r--r-- | mm/compaction.c | 90 |
1 files changed, 79 insertions, 11 deletions
diff --git a/mm/compaction.c b/mm/compaction.c index 7168edc7592..0fbc6b73a52 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -91,6 +91,60 @@ static inline bool compact_trylock_irqsave(spinlock_t *lock, return compact_checklock_irqsave(lock, flags, false, cc); } +static void compact_capture_page(struct compact_control *cc) +{ + unsigned long flags; + int mtype, mtype_low, mtype_high; + + if (!cc->page || *cc->page) + return; + + /* + * For MIGRATE_MOVABLE allocations we capture a suitable page ASAP + * regardless of the migratetype of the freelist is is captured from. + * This is fine because the order for a high-order MIGRATE_MOVABLE + * allocation is typically at least a pageblock size and overall + * fragmentation is not impaired. Other allocation types must + * capture pages from their own migratelist because otherwise they + * could pollute other pageblocks like MIGRATE_MOVABLE with + * difficult to move pages and making fragmentation worse overall. + */ + if (cc->migratetype == MIGRATE_MOVABLE) { + mtype_low = 0; + mtype_high = MIGRATE_PCPTYPES; + } else { + mtype_low = cc->migratetype; + mtype_high = cc->migratetype + 1; + } + + /* Speculatively examine the free lists without zone lock */ + for (mtype = mtype_low; mtype < mtype_high; mtype++) { + int order; + for (order = cc->order; order < MAX_ORDER; order++) { + struct page *page; + struct free_area *area; + area = &(cc->zone->free_area[order]); + if (list_empty(&area->free_list[mtype])) + continue; + + /* Take the lock and attempt capture of the page */ + if (!compact_trylock_irqsave(&cc->zone->lock, &flags, cc)) + return; + if (!list_empty(&area->free_list[mtype])) { + page = list_entry(area->free_list[mtype].next, + struct page, lru); + if (capture_free_page(page, cc->order, mtype)) { + spin_unlock_irqrestore(&cc->zone->lock, + flags); + *cc->page = page; + return; + } + } + spin_unlock_irqrestore(&cc->zone->lock, flags); + } + } +} + /* * Isolate free pages onto a private freelist. Caller must hold zone->lock. * If @strict is true, will abort returning 0 on any invalid PFNs or non-free @@ -645,7 +699,6 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, static int compact_finished(struct zone *zone, struct compact_control *cc) { - unsigned int order; unsigned long watermark; if (fatal_signal_pending(current)) @@ -688,14 +741,22 @@ static int compact_finished(struct zone *zone, return COMPACT_CONTINUE; /* Direct compactor: Is a suitable page free? */ - for (order = cc->order; order < MAX_ORDER; order++) { - /* Job done if page is free of the right migratetype */ - if (!list_empty(&zone->free_area[order].free_list[cc->migratetype])) - return COMPACT_PARTIAL; - - /* Job done if allocation would set block type */ - if (order >= pageblock_order && zone->free_area[order].nr_free) + if (cc->page) { + /* Was a suitable page captured? */ + if (*cc->page) return COMPACT_PARTIAL; + } else { + unsigned int order; + for (order = cc->order; order < MAX_ORDER; order++) { + struct free_area *area = &zone->free_area[cc->order]; + /* Job done if page is free of the right migratetype */ + if (!list_empty(&area->free_list[cc->migratetype])) + return COMPACT_PARTIAL; + + /* Job done if allocation would set block type */ + if (cc->order >= pageblock_order && area->nr_free) + return COMPACT_PARTIAL; + } } return COMPACT_CONTINUE; @@ -817,6 +878,9 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) goto out; } } + + /* Capture a page now if it is a suitable size */ + compact_capture_page(cc); } out: @@ -829,7 +893,8 @@ out: static unsigned long compact_zone_order(struct zone *zone, int order, gfp_t gfp_mask, - bool sync, bool *contended) + bool sync, bool *contended, + struct page **page) { struct compact_control cc = { .nr_freepages = 0, @@ -839,6 +904,7 @@ static unsigned long compact_zone_order(struct zone *zone, .zone = zone, .sync = sync, .contended = contended, + .page = page, }; INIT_LIST_HEAD(&cc.freepages); INIT_LIST_HEAD(&cc.migratepages); @@ -860,7 +926,7 @@ int sysctl_extfrag_threshold = 500; */ unsigned long try_to_compact_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *nodemask, - bool sync, bool *contended) + bool sync, bool *contended, struct page **page) { enum zone_type high_zoneidx = gfp_zone(gfp_mask); int may_enter_fs = gfp_mask & __GFP_FS; @@ -881,7 +947,7 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist, int status; status = compact_zone_order(zone, order, gfp_mask, sync, - contended); + contended, page); rc = max(status, rc); /* If a normal allocation would succeed, stop compacting */ @@ -936,6 +1002,7 @@ int compact_pgdat(pg_data_t *pgdat, int order) struct compact_control cc = { .order = order, .sync = false, + .page = NULL, }; return __compact_pgdat(pgdat, &cc); @@ -946,6 +1013,7 @@ static int compact_node(int nid) struct compact_control cc = { .order = -1, .sync = true, + .page = NULL, }; return __compact_pgdat(NODE_DATA(nid), &cc); |