[PATCH] More page migration: use migration entries for file pages

author Christoph Lameter <clameter@sgi.com>

Fri, 23 Jun 2006 09:03:38 +0000 (02:03 -0700)

committer Linus Torvalds <torvalds@g5.osdl.org>

Fri, 23 Jun 2006 14:42:51 +0000 (07:42 -0700)
author Christoph Lameter <clameter@sgi.com>
Fri, 23 Jun 2006 09:03:38 +0000 (02:03 -0700)
committer Linus Torvalds <torvalds@g5.osdl.org>
Fri, 23 Jun 2006 14:42:51 +0000 (07:42 -0700)
diff --git a/include/linux/swap.h b/include/linux/swap.h

index 7cee73ef4f15c79bacc9a4b1bdd400d5f323be71..1cf234e8df55747a06dd64317d8e3b13d4c58c26 100644 (file)
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -186,20 +186,6 @@ extern unsigned long shrink_all_memory(unsigned long nr_pages);
  extern int vm_swappiness;
  extern int remove_mapping(struct address_space *mapping, struct page *page);
  
-/* possible outcome of pageout() */
-typedef enum {
-       /* failed to write page out, page is locked */
-       PAGE_KEEP,
-       /* move page to the active list, page is locked */
-       PAGE_ACTIVATE,
-       /* page has been sent to the disk successfully, page is unlocked */
-       PAGE_SUCCESS,
-       /* page is clean and locked */
-       PAGE_CLEAN,
-} pageout_t;
-
-extern pageout_t pageout(struct page *page, struct address_space *mapping);
-
  #ifdef CONFIG_NUMA
  extern int zone_reclaim_mode;
  extern int zone_reclaim_interval;
@@ -259,7 +245,6 @@ extern int remove_exclusive_swap_page(struct page *);
  struct backing_dev_info;
  
  extern spinlock_t swap_lock;
-extern int remove_vma_swap(struct vm_area_struct *vma, struct page *page);
  
  /* linux/mm/thrash.c */
  extern struct mm_struct * swap_token_mm;
diff --git a/mm/migrate.c b/mm/migrate.c

index 96b9546e69e0f23cd99f03c6d06a67dfeadeda23..b5000d463893ffcb8af281aef6635b61904e4680 100644 (file)
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -24,6 +24,7 @@
  #include <linux/topology.h>
  #include <linux/cpu.h>
  #include <linux/cpuset.h>
+#include <linux/writeback.h>
  
  #include "internal.h"
  
@@ -123,7 +124,7 @@ static inline int is_swap_pte(pte_t pte)
  /*
   * Restore a potential migration pte to a working pte entry
   */
-static void remove_migration_pte(struct vm_area_struct *vma, unsigned long addr,
+static void remove_migration_pte(struct vm_area_struct *vma,
                 struct page *old, struct page *new)
  {
         struct mm_struct *mm = vma->vm_mm;
@@ -133,6 +134,10 @@ static void remove_migration_pte(struct vm_area_struct *vma, unsigned long addr,
         pmd_t *pmd;
         pte_t *ptep, pte;
         spinlock_t *ptl;
+       unsigned long addr = page_address_in_vma(new, vma);
+
+       if (addr == -EFAULT)
+               return;
  
         pgd = pgd_offset(mm, addr);
         if (!pgd_present(*pgd))
@@ -169,19 +174,47 @@ static void remove_migration_pte(struct vm_area_struct *vma, unsigned long addr,
         if (is_write_migration_entry(entry))
                 pte = pte_mkwrite(pte);
         set_pte_at(mm, addr, ptep, pte);
-       page_add_anon_rmap(new, vma, addr);
+
+       if (PageAnon(new))
+               page_add_anon_rmap(new, vma, addr);
+       else
+               page_add_file_rmap(new);
+
+       /* No need to invalidate - it was non-present before */
+       update_mmu_cache(vma, addr, pte);
+       lazy_mmu_prot_update(pte);
+
  out:
         pte_unmap_unlock(ptep, ptl);
  }
  
  /*
- * Get rid of all migration entries and replace them by
- * references to the indicated page.
- *
+ * Note that remove_file_migration_ptes will only work on regular mappings,
+ * Nonlinear mappings do not use migration entries.
+ */
+static void remove_file_migration_ptes(struct page *old, struct page *new)
+{
+       struct vm_area_struct *vma;
+       struct address_space *mapping = page_mapping(new);
+       struct prio_tree_iter iter;
+       pgoff_t pgoff = new->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+
+       if (!mapping)
+               return;
+
+       spin_lock(&mapping->i_mmap_lock);
+
+       vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff)
+               remove_migration_pte(vma, old, new);
+
+       spin_unlock(&mapping->i_mmap_lock);
+}
+
+/*
   * Must hold mmap_sem lock on at least one of the vmas containing
   * the page so that the anon_vma cannot vanish.
   */
-static void remove_migration_ptes(struct page *old, struct page *new)
+static void remove_anon_migration_ptes(struct page *old, struct page *new)
  {
         struct anon_vma *anon_vma;
         struct vm_area_struct *vma;
@@ -199,12 +232,23 @@ static void remove_migration_ptes(struct page *old, struct page *new)
         spin_lock(&anon_vma->lock);
  
         list_for_each_entry(vma, &anon_vma->head, anon_vma_node)
-               remove_migration_pte(vma, page_address_in_vma(new, vma),
-                                       old, new);
+               remove_migration_pte(vma, old, new);
  
         spin_unlock(&anon_vma->lock);
  }
  
+/*
+ * Get rid of all migration entries and replace them by
+ * references to the indicated page.
+ */
+static void remove_migration_ptes(struct page *old, struct page *new)
+{
+       if (PageAnon(new))
+               remove_anon_migration_ptes(old, new);
+       else
+               remove_file_migration_ptes(old, new);
+}
+
  /*
   * Something used the pte of a page under migration. We need to
   * get to the page and wait until migration is finished.
@@ -424,30 +468,59 @@ int buffer_migrate_page(struct address_space *mapping,
  }
  EXPORT_SYMBOL(buffer_migrate_page);
  
-static int fallback_migrate_page(struct address_space *mapping,
-       struct page *newpage, struct page *page)
+/*
+ * Writeback a page to clean the dirty state
+ */
+static int writeout(struct address_space *mapping, struct page *page)
  {
+       struct writeback_control wbc = {
+               .sync_mode = WB_SYNC_NONE,
+               .nr_to_write = 1,
+               .range_start = 0,
+               .range_end = LLONG_MAX,
+               .nonblocking = 1,
+               .for_reclaim = 1
+       };
+       int rc;
+
+       if (!mapping->a_ops->writepage)
+               /* No write method for the address space */
+               return -EINVAL;
+
+       if (!clear_page_dirty_for_io(page))
+               /* Someone else already triggered a write */
+               return -EAGAIN;
+
         /*
-        * Default handling if a filesystem does not provide
-        * a migration function. We can only migrate clean
-        * pages so try to write out any dirty pages first.
+        * A dirty page may imply that the underlying filesystem has
+        * the page on some queue. So the page must be clean for
+        * migration. Writeout may mean we loose the lock and the
+        * page state is no longer what we checked for earlier.
+        * At this point we know that the migration attempt cannot
+        * be successful.
          */
-       if (PageDirty(page)) {
-               switch (pageout(page, mapping)) {
-               case PAGE_KEEP:
-               case PAGE_ACTIVATE:
-                       return -EAGAIN;
+       remove_migration_ptes(page, page);
  
-               case PAGE_SUCCESS:
-                       /* Relock since we lost the lock */
-                       lock_page(page);
-                       /* Must retry since page state may have changed */
-                       return -EAGAIN;
+       rc = mapping->a_ops->writepage(page, &wbc);
+       if (rc < 0)
+               /* I/O Error writing */
+               return -EIO;
  
-               case PAGE_CLEAN:
-                       ; /* try to migrate the page below */
-               }
-       }
+       if (rc != AOP_WRITEPAGE_ACTIVATE)
+               /* unlocked. Relock */
+               lock_page(page);
+
+       return -EAGAIN;
+}
+
+/*
+ * Default handling if a filesystem does not provide a migration function.
+ */
+static int fallback_migrate_page(struct address_space *mapping,
+       struct page *newpage, struct page *page)
+{
+       if (PageDirty(page))
+               return writeout(mapping, page);
  
         /*
          * Buffers may be managed in a filesystem specific way.
diff --git a/mm/rmap.c b/mm/rmap.c

index 05d6d73a692dc7d42588922ef5fdad13b0b26769..882a85826bb2d2c6cf5ef61ee7d3fb982b125e9b 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -596,6 +596,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
                                 spin_unlock(&mmlist_lock);
                         }
                         dec_mm_counter(mm, anon_rss);
+#ifdef CONFIG_MIGRATION
                 } else {
                         /*
                          * Store the pfn of the page in a special migration
@@ -604,12 +605,22 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
                          */
                         BUG_ON(!migration);
                         entry = make_migration_entry(page, pte_write(pteval));
+#endif
                 }
                 set_pte_at(mm, address, pte, swp_entry_to_pte(entry));
                 BUG_ON(pte_file(*pte));
         } else
+#ifdef CONFIG_MIGRATION
+       if (migration) {
+               /* Establish migration entry for a file page */
+               swp_entry_t entry;
+               entry = make_migration_entry(page, pte_write(pteval));
+               set_pte_at(mm, address, pte, swp_entry_to_pte(entry));
+       } else
+#endif
                 dec_mm_counter(mm, file_rss);
  
+
         page_remove_rmap(page);
         page_cache_release(page);
  
diff --git a/mm/vmscan.c b/mm/vmscan.c

index bc5d4f43036cc100d566b0628d89b8433104224f..71a02e2950379626001ea4d7d87f46a875ee7c45 100644 (file)
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -290,11 +290,23 @@ static void handle_write_error(struct address_space *mapping,
         unlock_page(page);
  }
  
+/* possible outcome of pageout() */
+typedef enum {
+       /* failed to write page out, page is locked */
+       PAGE_KEEP,
+       /* move page to the active list, page is locked */
+       PAGE_ACTIVATE,
+       /* page has been sent to the disk successfully, page is unlocked */
+       PAGE_SUCCESS,
+       /* page is clean and locked */
+       PAGE_CLEAN,
+} pageout_t;
+
  /*
   * pageout is called by shrink_page_list() for each dirty page.
   * Calls ->writepage().
   */
-pageout_t pageout(struct page *page, struct address_space *mapping)
+static pageout_t pageout(struct page *page, struct address_space *mapping)
  {
         /*
          * If the page is dirty, only perform writeback if that write
author	Christoph Lameter <clameter@sgi.com>
	Fri, 23 Jun 2006 09:03:38 +0000 (02:03 -0700)
committer	Linus Torvalds <torvalds@g5.osdl.org>
	Fri, 23 Jun 2006 14:42:51 +0000 (07:42 -0700)
include/linux/swap.h		patch \| blob \| history
mm/migrate.c		patch \| blob \| history
mm/rmap.c		patch \| blob \| history
mm/vmscan.c		patch \| blob \| history