x86: make DEBUG_PAGEALLOC and CPA more robust

author Ingo Molnar <mingo@elte.hu>

Wed, 13 Feb 2008 13:37:52 +0000 (14:37 +0100)

committer Ingo Molnar <mingo@elte.hu>

Tue, 26 Feb 2008 11:55:50 +0000 (12:55 +0100)
author Ingo Molnar <mingo@elte.hu>
Wed, 13 Feb 2008 13:37:52 +0000 (14:37 +0100)
committer Ingo Molnar <mingo@elte.hu>
Tue, 26 Feb 2008 11:55:50 +0000 (12:55 +0100)
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c

index 464d8fc21ce69b67d8d8bf057f5a57ef6a84c677..14e48b5a94ba193c1cb4acc667482e05905d373e 100644 (file)
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -44,6 +44,12 @@ static inline unsigned long highmap_end_pfn(void)
  
  #endif
  
+#ifdef CONFIG_DEBUG_PAGEALLOC
+# define debug_pagealloc 1
+#else
+# define debug_pagealloc 0
+#endif
+
  static inline int
  within(unsigned long addr, unsigned long start, unsigned long end)
  {
@@ -355,45 +361,48 @@ out_unlock:
  
  static LIST_HEAD(page_pool);
  static unsigned long pool_size, pool_pages, pool_low;
-static unsigned long pool_used, pool_failed, pool_refill;
+static unsigned long pool_used, pool_failed;
  
-static void cpa_fill_pool(void)
+static void cpa_fill_pool(struct page **ret)
  {
-       struct page *p;
         gfp_t gfp = GFP_KERNEL;
+       unsigned long flags;
+       struct page *p;
  
-       /* Do not allocate from interrupt context */
-       if (in_irq() || irqs_disabled())
-               return;
         /*
-        * Check unlocked. I does not matter when we have one more
-        * page in the pool. The bit lock avoids recursive pool
-        * allocations:
+        * Avoid recursion (on debug-pagealloc) and also signal
+        * our priority to get to these pagetables:
          */
-       if (pool_pages >= pool_size || test_and_set_bit_lock(0, &pool_refill))
+       if (current->flags & PF_MEMALLOC)
                 return;
+       current->flags |= PF_MEMALLOC;
  
-#ifdef CONFIG_DEBUG_PAGEALLOC
         /*
-        * We could do:
-        * gfp = in_atomic() ? GFP_ATOMIC : GFP_KERNEL;
-        * but this fails on !PREEMPT kernels
+        * Allocate atomically from atomic contexts:
          */
-       gfp =  GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN;
-#endif
+       if (in_atomic() || irqs_disabled() || debug_pagealloc)
+               gfp =  GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN;
  
-       while (pool_pages < pool_size) {
+       while (pool_pages < pool_size || (ret && !*ret)) {
                 p = alloc_pages(gfp, 0);
                 if (!p) {
                         pool_failed++;
                         break;
                 }
-               spin_lock_irq(&pgd_lock);
+               /*
+                * If the call site needs a page right now, provide it:
+                */
+               if (ret && !*ret) {
+                       *ret = p;
+                       continue;
+               }
+               spin_lock_irqsave(&pgd_lock, flags);
                 list_add(&p->lru, &page_pool);
                 pool_pages++;
-               spin_unlock_irq(&pgd_lock);
+               spin_unlock_irqrestore(&pgd_lock, flags);
         }
-       clear_bit_unlock(0, &pool_refill);
+
+       current->flags &= ~PF_MEMALLOC;
  }
  
  #define SHIFT_MB               (20 - PAGE_SHIFT)
@@ -414,11 +423,15 @@ void __init cpa_init(void)
          * GiB. Shift MiB to Gib and multiply the result by
          * POOL_PAGES_PER_GB:
          */
-       gb = ((si.totalram >> SHIFT_MB) + ROUND_MB_GB) >> SHIFT_MB_GB;
-       pool_size = POOL_PAGES_PER_GB * gb;
+       if (debug_pagealloc) {
+               gb = ((si.totalram >> SHIFT_MB) + ROUND_MB_GB) >> SHIFT_MB_GB;
+               pool_size = POOL_PAGES_PER_GB * gb;
+       } else {
+               pool_size = 1;
+       }
         pool_low = pool_size;
  
-       cpa_fill_pool();
+       cpa_fill_pool(NULL);
         printk(KERN_DEBUG
                "CPA: page pool initialized %lu of %lu pages preallocated\n",
                pool_pages, pool_size);
@@ -440,16 +453,20 @@ static int split_large_page(pte_t *kpte, unsigned long address)
         spin_lock_irqsave(&pgd_lock, flags);
         if (list_empty(&page_pool)) {
                 spin_unlock_irqrestore(&pgd_lock, flags);
-               return -ENOMEM;
+               base = NULL;
+               cpa_fill_pool(&base);
+               if (!base)
+                       return -ENOMEM;
+               spin_lock_irqsave(&pgd_lock, flags);
+       } else {
+               base = list_first_entry(&page_pool, struct page, lru);
+               list_del(&base->lru);
+               pool_pages--;
+
+               if (pool_pages < pool_low)
+                       pool_low = pool_pages;
         }
  
-       base = list_first_entry(&page_pool, struct page, lru);
-       list_del(&base->lru);
-       pool_pages--;
-
-       if (pool_pages < pool_low)
-               pool_low = pool_pages;
-
         /*
          * Check for races, another CPU might have split this page
          * up for us already:
@@ -734,7 +751,8 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
                 cpa_flush_all(cache);
  
  out:
-       cpa_fill_pool();
+       cpa_fill_pool(NULL);
+
         return ret;
  }
  
@@ -897,7 +915,7 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
          * Try to refill the page pool here. We can do this only after
          * the tlb flush.
          */
-       cpa_fill_pool();
+       cpa_fill_pool(NULL);
  }
  
  #ifdef CONFIG_HIBERNATION
author	Ingo Molnar <mingo@elte.hu>
	Wed, 13 Feb 2008 13:37:52 +0000 (14:37 +0100)
committer	Ingo Molnar <mingo@elte.hu>
	Tue, 26 Feb 2008 11:55:50 +0000 (12:55 +0100)