From: Nick Piggin Date: Thu, 19 Jan 2006 01:42:27 +0000 (-0800) Subject: [PATCH] mm: migration page refcounting fix X-Git-Tag: v2.6.16-rc2~344 X-Git-Url: http://pilppa.com/gitweb/?a=commitdiff_plain;h=053837fce7aa79025ed57656855df09f80175527;p=linux-2.6-omap-h63xx.git [PATCH] mm: migration page refcounting fix Migration code currently does not take a reference to target page properly, so between unlocking the pte and trying to take a new reference to the page with isolate_lru_page, anything could happen to it. Fix this by holding the pte lock until we get a chance to elevate the refcount. Other small cleanups while we're here. Signed-off-by: Nick Piggin Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 49cc68af01f..8ac854f7f19 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -39,24 +39,3 @@ del_page_from_lru(struct zone *zone, struct page *page) } } -/* - * Isolate one page from the LRU lists. - * - * - zone->lru_lock must be held - */ -static inline int __isolate_lru_page(struct page *page) -{ - if (unlikely(!TestClearPageLRU(page))) - return 0; - - if (get_page_testone(page)) { - /* - * It is being freed elsewhere - */ - __put_page(page); - SetPageLRU(page); - return -ENOENT; - } - - return 1; -} diff --git a/include/linux/swap.h b/include/linux/swap.h index e92054d6530..d01f7efb0f2 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -167,6 +167,7 @@ extern void FASTCALL(lru_cache_add_active(struct page *)); extern void FASTCALL(activate_page(struct page *)); extern void FASTCALL(mark_page_accessed(struct page *)); extern void lru_add_drain(void); +extern int lru_add_drain_all(void); extern int rotate_reclaimable_page(struct page *page); extern void swap_setup(void); diff --git a/mm/filemap.c b/mm/filemap.c index a965b6b35f2..44da3d47699 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -94,6 +94,7 @@ generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, * ->private_lock (try_to_unmap_one) * ->tree_lock (try_to_unmap_one) * ->zone.lru_lock (follow_page->mark_page_accessed) + * ->zone.lru_lock (check_pte_range->isolate_lru_page) * ->private_lock (page_remove_rmap->set_page_dirty) * ->tree_lock (page_remove_rmap->set_page_dirty) * ->inode_lock (page_remove_rmap->set_page_dirty) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 3171f884d24..551cde40520 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -208,6 +208,17 @@ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd, page = vm_normal_page(vma, addr, *pte); if (!page) continue; + /* + * The check for PageReserved here is important to avoid + * handling zero pages and other pages that may have been + * marked special by the system. + * + * If the PageReserved would not be checked here then f.e. + * the location of the zero page could have an influence + * on MPOL_MF_STRICT, zero pages would be counted for + * the per node stats, and there would be useless attempts + * to put zero pages on the migration list. + */ if (PageReserved(page)) continue; nid = page_to_nid(page); @@ -216,11 +227,8 @@ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd, if (flags & MPOL_MF_STATS) gather_stats(page, private); - else if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { - spin_unlock(ptl); + else if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) migrate_page_add(vma, page, private, flags); - spin_lock(ptl); - } else break; } while (pte++, addr += PAGE_SIZE, addr != end); @@ -309,6 +317,10 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end, int err; struct vm_area_struct *first, *vma, *prev; + /* Clear the LRU lists so pages can be isolated */ + if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) + lru_add_drain_all(); + first = find_vma(mm, start); if (!first) return ERR_PTR(-EFAULT); @@ -555,15 +567,8 @@ static void migrate_page_add(struct vm_area_struct *vma, if ((flags & MPOL_MF_MOVE_ALL) || !page->mapping || PageAnon(page) || mapping_writably_mapped(page->mapping) || single_mm_mapping(vma->vm_mm, page->mapping)) { - int rc = isolate_lru_page(page); - - if (rc == 1) + if (isolate_lru_page(page)) list_add(&page->lru, pagelist); - /* - * If the isolate attempt was not successful then we just - * encountered an unswappable page. Something must be wrong. - */ - WARN_ON(rc == 0); } } diff --git a/mm/rmap.c b/mm/rmap.c index dfbb89f99a1..d85a99d28c0 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -33,7 +33,7 @@ * mapping->i_mmap_lock * anon_vma->lock * mm->page_table_lock or pte_lock - * zone->lru_lock (in mark_page_accessed) + * zone->lru_lock (in mark_page_accessed, isolate_lru_page) * swap_lock (in swap_duplicate, swap_info_get) * mmlist_lock (in mmput, drain_mmlist and others) * mapping->private_lock (in __set_page_dirty_buffers) diff --git a/mm/swap.c b/mm/swap.c index cbb48e721ab..bc2442a7b0e 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -174,6 +174,32 @@ void lru_add_drain(void) put_cpu(); } +#ifdef CONFIG_NUMA +static void lru_add_drain_per_cpu(void *dummy) +{ + lru_add_drain(); +} + +/* + * Returns 0 for success + */ +int lru_add_drain_all(void) +{ + return schedule_on_each_cpu(lru_add_drain_per_cpu, NULL); +} + +#else + +/* + * Returns 0 for success + */ +int lru_add_drain_all(void) +{ + lru_add_drain(); + return 0; +} +#endif + /* * This path almost never happens for VM activity - pages are normally * freed via pagevecs. But it gets used by networking. diff --git a/mm/vmscan.c b/mm/vmscan.c index bf903b2d198..827bf674577 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -586,7 +586,7 @@ static inline void move_to_lru(struct page *page) } /* - * Add isolated pages on the list back to the LRU + * Add isolated pages on the list back to the LRU. * * returns the number of pages put back. */ @@ -760,46 +760,33 @@ next: return nr_failed + retry; } -static void lru_add_drain_per_cpu(void *dummy) -{ - lru_add_drain(); -} - /* * Isolate one page from the LRU lists and put it on the - * indicated list. Do necessary cache draining if the - * page is not on the LRU lists yet. + * indicated list with elevated refcount. * * Result: * 0 = page not on LRU list * 1 = page removed from LRU list and added to the specified list. - * -ENOENT = page is being freed elsewhere. */ int isolate_lru_page(struct page *page) { - int rc = 0; - struct zone *zone = page_zone(page); + int ret = 0; -redo: - spin_lock_irq(&zone->lru_lock); - rc = __isolate_lru_page(page); - if (rc == 1) { - if (PageActive(page)) - del_page_from_active_list(zone, page); - else - del_page_from_inactive_list(zone, page); - } - spin_unlock_irq(&zone->lru_lock); - if (rc == 0) { - /* - * Maybe this page is still waiting for a cpu to drain it - * from one of the lru lists? - */ - rc = schedule_on_each_cpu(lru_add_drain_per_cpu, NULL); - if (rc == 0 && PageLRU(page)) - goto redo; + if (PageLRU(page)) { + struct zone *zone = page_zone(page); + spin_lock_irq(&zone->lru_lock); + if (TestClearPageLRU(page)) { + ret = 1; + get_page(page); + if (PageActive(page)) + del_page_from_active_list(zone, page); + else + del_page_from_inactive_list(zone, page); + } + spin_unlock_irq(&zone->lru_lock); } - return rc; + + return ret; } #endif @@ -831,18 +818,20 @@ static int isolate_lru_pages(int nr_to_scan, struct list_head *src, page = lru_to_page(src); prefetchw_prev_lru_page(page, src, flags); - switch (__isolate_lru_page(page)) { - case 1: - /* Succeeded to isolate page */ - list_move(&page->lru, dst); - nr_taken++; - break; - case -ENOENT: - /* Not possible to isolate */ - list_move(&page->lru, src); - break; - default: + if (!TestClearPageLRU(page)) BUG(); + list_del(&page->lru); + if (get_page_testone(page)) { + /* + * It is being freed elsewhere + */ + __put_page(page); + SetPageLRU(page); + list_add(&page->lru, src); + continue; + } else { + list_add(&page->lru, dst); + nr_taken++; } }