mlock: downgrade mmap sem while populating mlocked regions

author Lee Schermerhorn <lee.schermerhorn@hp.com>

Sun, 19 Oct 2008 03:26:49 +0000 (20:26 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Mon, 20 Oct 2008 15:52:31 +0000 (08:52 -0700)
author Lee Schermerhorn <lee.schermerhorn@hp.com>
Sun, 19 Oct 2008 03:26:49 +0000 (20:26 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Mon, 20 Oct 2008 15:52:31 +0000 (08:52 -0700)
diff --git a/mm/mlock.c b/mm/mlock.c

index 8746fe3f973040e4447f880d853d3b8cb49419ec..c83896a725042d45694777305107fd33ba9c9c28 100644 (file)
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -318,6 +318,7 @@ static void __munlock_vma_pages_range(struct vm_area_struct *vma,
  int mlock_vma_pages_range(struct vm_area_struct *vma,
                         unsigned long start, unsigned long end)
  {
+       struct mm_struct *mm = vma->vm_mm;
         int nr_pages = (end - start) / PAGE_SIZE;
         BUG_ON(!(vma->vm_flags & VM_LOCKED));
  
@@ -329,8 +330,19 @@ int mlock_vma_pages_range(struct vm_area_struct *vma,
  
         if (!((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) ||
                         is_vm_hugetlb_page(vma) ||
-                       vma == get_gate_vma(current)))
-               return __mlock_vma_pages_range(vma, start, end);
+                       vma == get_gate_vma(current))) {
+               downgrade_write(&mm->mmap_sem);
+               nr_pages = __mlock_vma_pages_range(vma, start, end);
+
+               up_read(&mm->mmap_sem);
+               /* vma can change or disappear */
+               down_write(&mm->mmap_sem);
+               vma = find_vma(mm, start);
+               /* non-NULL vma must contain @start, but need to check @end */
+               if (!vma ||  end > vma->vm_end)
+                       return -EAGAIN;
+               return nr_pages;
+       }
  
         /*
          * User mapped kernel pages or huge pages:
@@ -424,13 +436,41 @@ success:
         vma->vm_flags = newflags;
  
         if (lock) {
+               /*
+                * mmap_sem is currently held for write.  Downgrade the write
+                * lock to a read lock so that other faults, mmap scans, ...
+                * while we fault in all pages.
+                */
+               downgrade_write(&mm->mmap_sem);
+
                 ret = __mlock_vma_pages_range(vma, start, end);
                 if (ret > 0) {
                         mm->locked_vm -= ret;
                         ret = 0;
                 }
-       } else
+               /*
+                * Need to reacquire mmap sem in write mode, as our callers
+                * expect this.  We have no support for atomically upgrading
+                * a sem to write, so we need to check for ranges while sem
+                * is unlocked.
+                */
+               up_read(&mm->mmap_sem);
+               /* vma can change or disappear */
+               down_write(&mm->mmap_sem);
+               *prev = find_vma(mm, start);
+               /* non-NULL *prev must contain @start, but need to check @end */
+               if (!(*prev) || end > (*prev)->vm_end)
+                       ret = -EAGAIN;
+       } else {
+               /*
+                * TODO:  for unlocking, pages will already be resident, so
+                * we don't need to wait for allocations/reclaim/pagein, ...
+                * However, unlocking a very large region can still take a
+                * while.  Should we downgrade the semaphore for both lock
+                * AND unlock ?
+                */
                 __munlock_vma_pages_range(vma, start, end);
+       }
  
  out:
         *prev = vma;
author	Lee Schermerhorn <lee.schermerhorn@hp.com>
	Sun, 19 Oct 2008 03:26:49 +0000 (20:26 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 20 Oct 2008 15:52:31 +0000 (08:52 -0700)