From c56004901fa5dcf55f92318f192ab3c0e87db2d1 Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Sat, 3 Sep 2005 15:57:36 -0700 Subject: [PATCH] [PATCH] uml: TLB operation batching This adds VM op batching to skas0. Rather than having a context switch to and from the userspace stub for each address space change, we write a number of operations to the stub data page and invoke a different stub which loops over them and executes them all in one go. The operations are stored as [ system call number, arg1, arg2, ... ] tuples. The set is terminated by a system call number of 0. Single operations, i.e. page faults, are handled in the old way, since that is slightly more efficient. For a kernel build, a minority (~1/4) of the operations are part of a set. These sets averaged ~100 in length, so for this quarter, the context switching overhead is greatly reduced. Signed-off-by: Jeff Dike Cc: Paolo Giarrusso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/include/tlb.h | 22 +-- arch/um/kernel/skas/include/skas.h | 13 +- arch/um/kernel/skas/mem_user.c | 112 +++++++++++--- arch/um/kernel/skas/tlb.c | 23 +-- arch/um/kernel/tlb.c | 226 +++++++++++++++-------------- arch/um/kernel/tt/tlb.c | 5 +- arch/um/sys-i386/stub.S | 17 +++ arch/um/sys-x86_64/stub.S | 21 +++ 8 files changed, 273 insertions(+), 166 deletions(-) diff --git a/arch/um/include/tlb.h b/arch/um/include/tlb.h index c6f9628f39b..2deefb99c63 100644 --- a/arch/um/include/tlb.h +++ b/arch/um/include/tlb.h @@ -9,7 +9,7 @@ #include "um_mmu.h" struct host_vm_op { - enum { MMAP, MUNMAP, MPROTECT } type; + enum { NONE, MMAP, MUNMAP, MPROTECT } type; union { struct { unsigned long addr; @@ -38,24 +38,10 @@ extern void mprotect_kernel_vm(int w); extern void force_flush_all(void); extern void fix_range_common(struct mm_struct *mm, unsigned long start_addr, unsigned long end_addr, int force, - void (*do_ops)(union mm_context *, - struct host_vm_op *, int)); + void *(*do_ops)(union mm_context *, + struct host_vm_op *, int, int, + void *)); extern int flush_tlb_kernel_range_common(unsigned long start, unsigned long end); -extern int add_mmap(unsigned long virt, unsigned long phys, unsigned long len, - int r, int w, int x, struct host_vm_op *ops, int index, - int last_filled, union mm_context *mmu, - void (*do_ops)(union mm_context *, struct host_vm_op *, - int)); -extern int add_munmap(unsigned long addr, unsigned long len, - struct host_vm_op *ops, int index, int last_filled, - union mm_context *mmu, - void (*do_ops)(union mm_context *, struct host_vm_op *, - int)); -extern int add_mprotect(unsigned long addr, unsigned long len, int r, int w, - int x, struct host_vm_op *ops, int index, - int last_filled, union mm_context *mmu, - void (*do_ops)(union mm_context *, struct host_vm_op *, - int)); #endif diff --git a/arch/um/kernel/skas/include/skas.h b/arch/um/kernel/skas/include/skas.h index d983ea84254..e91064b7e5a 100644 --- a/arch/um/kernel/skas/include/skas.h +++ b/arch/um/kernel/skas/include/skas.h @@ -24,11 +24,14 @@ extern void new_thread_proc(void *stack, void (*handler)(int sig)); extern void remove_sigstack(void); extern void new_thread_handler(int sig); extern void handle_syscall(union uml_pt_regs *regs); -extern int map(struct mm_id * mm_idp, unsigned long virt, unsigned long len, - int r, int w, int x, int phys_fd, unsigned long long offset); -extern int unmap(struct mm_id * mm_idp, void *addr, unsigned long len); -extern int protect(struct mm_id * mm_idp, unsigned long addr, - unsigned long len, int r, int w, int x); +extern void *map(struct mm_id * mm_idp, unsigned long virt, + unsigned long len, int r, int w, int x, int phys_fd, + unsigned long long offset, int done, void *data); +extern void *unmap(struct mm_id * mm_idp, void *addr, + unsigned long len, int done, void *data); +extern void *protect(struct mm_id * mm_idp, unsigned long addr, + unsigned long len, int r, int w, int x, int done, + void *data); extern void user_signal(int sig, union uml_pt_regs *regs, int pid); extern int new_mm(int from); extern int start_userspace(unsigned long stub_stack); diff --git a/arch/um/kernel/skas/mem_user.c b/arch/um/kernel/skas/mem_user.c index b0980ff3bd9..c976320ebe8 100644 --- a/arch/um/kernel/skas/mem_user.c +++ b/arch/um/kernel/skas/mem_user.c @@ -25,12 +25,14 @@ #include "sysdep/stub.h" #include "skas.h" -extern unsigned long syscall_stub, __syscall_stub_start; +extern unsigned long syscall_stub, batch_syscall_stub, __syscall_stub_start; extern void wait_stub_done(int pid, int sig, char * fname); -static long run_syscall_stub(struct mm_id * mm_idp, int syscall, - unsigned long *args) +int single_count = 0; + +static long one_syscall_stub(struct mm_id * mm_idp, int syscall, + unsigned long *args) { int n, pid = mm_idp->u.pid; unsigned long regs[MAX_REG_NR]; @@ -49,18 +51,80 @@ static long run_syscall_stub(struct mm_id * mm_idp, int syscall, regs[REGS_SYSCALL_ARG6] = args[5]; n = ptrace_setregs(pid, regs); if(n < 0){ - printk("run_syscall_stub : PTRACE_SETREGS failed, " + printk("one_syscall_stub : PTRACE_SETREGS failed, " + "errno = %d\n", n); + return(n); + } + + wait_stub_done(pid, 0, "one_syscall_stub"); + + return(*((unsigned long *) mm_idp->stack)); +} + +int multi_count = 0; +int multi_op_count = 0; + +static long many_syscall_stub(struct mm_id * mm_idp, int syscall, + unsigned long *args, int done, void **addr_out) +{ + unsigned long regs[MAX_REG_NR], *stack; + int n, pid = mm_idp->u.pid; + + stack = *addr_out; + if(stack == NULL) + stack = (unsigned long *) current_stub_stack(); + *stack++ = syscall; + *stack++ = args[0]; + *stack++ = args[1]; + *stack++ = args[2]; + *stack++ = args[3]; + *stack++ = args[4]; + *stack++ = args[5]; + *stack = 0; + multi_op_count++; + + if(!done && ((((unsigned long) stack) & ~PAGE_MASK) < + PAGE_SIZE - 8 * sizeof(long))){ + *addr_out = stack; + return 0; + } + + multi_count++; + get_safe_registers(regs); + regs[REGS_IP_INDEX] = UML_CONFIG_STUB_CODE + + ((unsigned long) &batch_syscall_stub - + (unsigned long) &__syscall_stub_start); + regs[REGS_SP_INDEX] = UML_CONFIG_STUB_DATA; + + n = ptrace_setregs(pid, regs); + if(n < 0){ + printk("many_syscall_stub : PTRACE_SETREGS failed, " "errno = %d\n", n); return(n); } - wait_stub_done(pid, 0, "run_syscall_stub"); + wait_stub_done(pid, 0, "many_syscall_stub"); + stack = (unsigned long *) mm_idp->stack; - return(*((unsigned long *) mm_idp->stack)); + *addr_out = stack; + return(*stack); } -int map(struct mm_id *mm_idp, unsigned long virt, unsigned long len, - int r, int w, int x, int phys_fd, unsigned long long offset) +static long run_syscall_stub(struct mm_id * mm_idp, int syscall, + unsigned long *args, void **addr, int done) +{ + long res; + + if((*addr == NULL) && done) + res = one_syscall_stub(mm_idp, syscall, args); + else res = many_syscall_stub(mm_idp, syscall, args, done, addr); + + return res; +} + +void *map(struct mm_id * mm_idp, unsigned long virt, unsigned long len, + int r, int w, int x, int phys_fd, unsigned long long offset, + int done, void *data) { int prot, n; @@ -70,6 +134,7 @@ int map(struct mm_id *mm_idp, unsigned long virt, unsigned long len, if(proc_mm){ struct proc_mm_op map; int fd = mm_idp->u.mm_fd; + map = ((struct proc_mm_op) { .op = MM_MMAP, .u = { .mmap = @@ -91,21 +156,24 @@ int map(struct mm_id *mm_idp, unsigned long virt, unsigned long len, MAP_SHARED | MAP_FIXED, phys_fd, MMAP_OFFSET(offset) }; - res = run_syscall_stub(mm_idp, STUB_MMAP_NR, args); + res = run_syscall_stub(mm_idp, STUB_MMAP_NR, args, + &data, done); if((void *) res == MAP_FAILED) printk("mmap stub failed, errno = %d\n", res); } - return 0; + return data; } -int unmap(struct mm_id *mm_idp, void *addr, unsigned long len) +void *unmap(struct mm_id * mm_idp, void *addr, unsigned long len, int done, + void *data) { int n; if(proc_mm){ struct proc_mm_op unmap; int fd = mm_idp->u.mm_fd; + unmap = ((struct proc_mm_op) { .op = MM_MUNMAP, .u = { .munmap = @@ -113,28 +181,25 @@ int unmap(struct mm_id *mm_idp, void *addr, unsigned long len) (unsigned long) addr, .len = len } } } ); n = os_write_file(fd, &unmap, sizeof(unmap)); - if(n != sizeof(unmap)) { - if(n < 0) - return(n); - else if(n > 0) - return(-EIO); - } + if(n != sizeof(unmap)) + printk("unmap - proc_mm write returned %d\n", n); } else { int res; unsigned long args[] = { (unsigned long) addr, len, 0, 0, 0, 0 }; - res = run_syscall_stub(mm_idp, __NR_munmap, args); + res = run_syscall_stub(mm_idp, __NR_munmap, args, + &data, done); if(res < 0) printk("munmap stub failed, errno = %d\n", res); } - return(0); + return data; } -int protect(struct mm_id *mm_idp, unsigned long addr, unsigned long len, - int r, int w, int x) +void *protect(struct mm_id * mm_idp, unsigned long addr, unsigned long len, + int r, int w, int x, int done, void *data) { struct proc_mm_op protect; int prot, n; @@ -160,12 +225,13 @@ int protect(struct mm_id *mm_idp, unsigned long addr, unsigned long len, int res; unsigned long args[] = { addr, len, prot, 0, 0, 0 }; - res = run_syscall_stub(mm_idp, __NR_mprotect, args); + res = run_syscall_stub(mm_idp, __NR_mprotect, args, + &data, done); if(res < 0) panic("mprotect stub failed, errno = %d\n", res); } - return(0); + return data; } void before_mem_skas(unsigned long unused) diff --git a/arch/um/kernel/skas/tlb.c b/arch/um/kernel/skas/tlb.c index 6230999c672..4b5fd204954 100644 --- a/arch/um/kernel/skas/tlb.c +++ b/arch/um/kernel/skas/tlb.c @@ -18,7 +18,8 @@ #include "os.h" #include "tlb.h" -static void do_ops(union mm_context *mmu, struct host_vm_op *ops, int last) +static void *do_ops(union mm_context *mmu, struct host_vm_op *ops, int last, + int finished, void *flush) { struct host_vm_op *op; int i; @@ -27,24 +28,28 @@ static void do_ops(union mm_context *mmu, struct host_vm_op *ops, int last) op = &ops[i]; switch(op->type){ case MMAP: - map(&mmu->skas.id, op->u.mmap.addr, op->u.mmap.len, - op->u.mmap.r, op->u.mmap.w, op->u.mmap.x, - op->u.mmap.fd, op->u.mmap.offset); + flush = map(&mmu->skas.id, op->u.mmap.addr, + op->u.mmap.len, op->u.mmap.r, op->u.mmap.w, + op->u.mmap.x, op->u.mmap.fd, + op->u.mmap.offset, finished, flush); break; case MUNMAP: - unmap(&mmu->skas.id, (void *) op->u.munmap.addr, - op->u.munmap.len); + flush = unmap(&mmu->skas.id, (void *) op->u.munmap.addr, + op->u.munmap.len, finished, flush); break; case MPROTECT: - protect(&mmu->skas.id, op->u.mprotect.addr, - op->u.mprotect.len, op->u.mprotect.r, - op->u.mprotect.w, op->u.mprotect.x); + flush = protect(&mmu->skas.id, op->u.mprotect.addr, + op->u.mprotect.len, op->u.mprotect.r, + op->u.mprotect.w, op->u.mprotect.x, + finished, flush); break; default: printk("Unknown op type %d in do_ops\n", op->type); break; } } + + return flush; } extern int proc_mm; diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c index 83ec8d4747f..7d914bb6b00 100644 --- a/arch/um/kernel/tlb.c +++ b/arch/um/kernel/tlb.c @@ -15,12 +15,116 @@ #include "mem_user.h" #include "os.h" +static int add_mmap(unsigned long virt, unsigned long phys, unsigned long len, + int r, int w, int x, struct host_vm_op *ops, int index, + int last_filled, union mm_context *mmu, void **flush, + void *(*do_ops)(union mm_context *, struct host_vm_op *, + int, int, void *)) +{ + __u64 offset; + struct host_vm_op *last; + int fd; + + fd = phys_mapping(phys, &offset); + if(index != -1){ + last = &ops[index]; + if((last->type == MMAP) && + (last->u.mmap.addr + last->u.mmap.len == virt) && + (last->u.mmap.r == r) && (last->u.mmap.w == w) && + (last->u.mmap.x == x) && (last->u.mmap.fd == fd) && + (last->u.mmap.offset + last->u.mmap.len == offset)){ + last->u.mmap.len += len; + return index; + } + } + + if(index == last_filled){ + *flush = (*do_ops)(mmu, ops, last_filled, 0, *flush); + index = -1; + } + + ops[++index] = ((struct host_vm_op) { .type = MMAP, + .u = { .mmap = { + .addr = virt, + .len = len, + .r = r, + .w = w, + .x = x, + .fd = fd, + .offset = offset } + } }); + return index; +} + +static int add_munmap(unsigned long addr, unsigned long len, + struct host_vm_op *ops, int index, int last_filled, + union mm_context *mmu, void **flush, + void *(*do_ops)(union mm_context *, struct host_vm_op *, + int, int, void *)) +{ + struct host_vm_op *last; + + if(index != -1){ + last = &ops[index]; + if((last->type == MUNMAP) && + (last->u.munmap.addr + last->u.mmap.len == addr)){ + last->u.munmap.len += len; + return index; + } + } + + if(index == last_filled){ + *flush = (*do_ops)(mmu, ops, last_filled, 0, *flush); + index = -1; + } + + ops[++index] = ((struct host_vm_op) { .type = MUNMAP, + .u = { .munmap = { + .addr = addr, + .len = len } } }); + return index; +} + +static int add_mprotect(unsigned long addr, unsigned long len, int r, int w, + int x, struct host_vm_op *ops, int index, + int last_filled, union mm_context *mmu, void **flush, + void *(*do_ops)(union mm_context *, + struct host_vm_op *, int, int, void *)) +{ + struct host_vm_op *last; + + if(index != -1){ + last = &ops[index]; + if((last->type == MPROTECT) && + (last->u.mprotect.addr + last->u.mprotect.len == addr) && + (last->u.mprotect.r == r) && (last->u.mprotect.w == w) && + (last->u.mprotect.x == x)){ + last->u.mprotect.len += len; + return index; + } + } + + if(index == last_filled){ + *flush = (*do_ops)(mmu, ops, last_filled, 0, *flush); + index = -1; + } + + ops[++index] = ((struct host_vm_op) { .type = MPROTECT, + .u = { .mprotect = { + .addr = addr, + .len = len, + .r = r, + .w = w, + .x = x } } }); + return index; +} + #define ADD_ROUND(n, inc) (((n) + (inc)) & ~((inc) - 1)) void fix_range_common(struct mm_struct *mm, unsigned long start_addr, unsigned long end_addr, int force, - void (*do_ops)(union mm_context *, struct host_vm_op *, - int)) + void *(*do_ops)(union mm_context *, struct host_vm_op *, + int, int, void *)) { pgd_t *npgd; pud_t *npud; @@ -29,11 +133,13 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr, union mm_context *mmu = &mm->context; unsigned long addr, end; int r, w, x; - struct host_vm_op ops[16]; + struct host_vm_op ops[1]; + void *flush = NULL; int op_index = -1, last_op = sizeof(ops) / sizeof(ops[0]) - 1; if(mm == NULL) return; + ops[0].type = NONE; for(addr = start_addr; addr < end_addr;){ npgd = pgd_offset(mm, addr); if(!pgd_present(*npgd)){ @@ -43,7 +149,7 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr, if(force || pgd_newpage(*npgd)){ op_index = add_munmap(addr, end - addr, ops, op_index, last_op, mmu, - do_ops); + &flush, do_ops); pgd_mkuptodate(*npgd); } addr = end; @@ -58,7 +164,7 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr, if(force || pud_newpage(*npud)){ op_index = add_munmap(addr, end - addr, ops, op_index, last_op, mmu, - do_ops); + &flush, do_ops); pud_mkuptodate(*npud); } addr = end; @@ -73,7 +179,7 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr, if(force || pmd_newpage(*npmd)){ op_index = add_munmap(addr, end - addr, ops, op_index, last_op, mmu, - do_ops); + &flush, do_ops); pmd_mkuptodate(*npmd); } addr = end; @@ -96,20 +202,20 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr, pte_val(*npte) & PAGE_MASK, PAGE_SIZE, r, w, x, ops, op_index, last_op, mmu, - do_ops); + &flush, do_ops); else op_index = add_munmap(addr, PAGE_SIZE, ops, op_index, last_op, mmu, - do_ops); + &flush, do_ops); } else if(pte_newprot(*npte)) op_index = add_mprotect(addr, PAGE_SIZE, r, w, x, ops, op_index, last_op, mmu, - do_ops); + &flush, do_ops); *npte = pte_mkuptodate(*npte); addr += PAGE_SIZE; } - (*do_ops)(mmu, ops, op_index); + flush = (*do_ops)(mmu, ops, op_index, 1, flush); } int flush_tlb_kernel_range_common(unsigned long start, unsigned long end) @@ -226,106 +332,6 @@ pte_t *addr_pte(struct task_struct *task, unsigned long addr) return(pte_offset_map(pmd, addr)); } -int add_mmap(unsigned long virt, unsigned long phys, unsigned long len, - int r, int w, int x, struct host_vm_op *ops, int index, - int last_filled, union mm_context *mmu, - void (*do_ops)(union mm_context *, struct host_vm_op *, int)) -{ - __u64 offset; - struct host_vm_op *last; - int fd; - - fd = phys_mapping(phys, &offset); - if(index != -1){ - last = &ops[index]; - if((last->type == MMAP) && - (last->u.mmap.addr + last->u.mmap.len == virt) && - (last->u.mmap.r == r) && (last->u.mmap.w == w) && - (last->u.mmap.x == x) && (last->u.mmap.fd == fd) && - (last->u.mmap.offset + last->u.mmap.len == offset)){ - last->u.mmap.len += len; - return(index); - } - } - - if(index == last_filled){ - (*do_ops)(mmu, ops, last_filled); - index = -1; - } - - ops[++index] = ((struct host_vm_op) { .type = MMAP, - .u = { .mmap = { - .addr = virt, - .len = len, - .r = r, - .w = w, - .x = x, - .fd = fd, - .offset = offset } - } }); - return(index); -} - -int add_munmap(unsigned long addr, unsigned long len, struct host_vm_op *ops, - int index, int last_filled, union mm_context *mmu, - void (*do_ops)(union mm_context *, struct host_vm_op *, int)) -{ - struct host_vm_op *last; - - if(index != -1){ - last = &ops[index]; - if((last->type == MUNMAP) && - (last->u.munmap.addr + last->u.mmap.len == addr)){ - last->u.munmap.len += len; - return(index); - } - } - - if(index == last_filled){ - (*do_ops)(mmu, ops, last_filled); - index = -1; - } - - ops[++index] = ((struct host_vm_op) { .type = MUNMAP, - .u = { .munmap = { - .addr = addr, - .len = len } } }); - return(index); -} - -int add_mprotect(unsigned long addr, unsigned long len, int r, int w, int x, - struct host_vm_op *ops, int index, int last_filled, - union mm_context *mmu, - void (*do_ops)(union mm_context *, struct host_vm_op *, int)) -{ - struct host_vm_op *last; - - if(index != -1){ - last = &ops[index]; - if((last->type == MPROTECT) && - (last->u.mprotect.addr + last->u.mprotect.len == addr) && - (last->u.mprotect.r == r) && (last->u.mprotect.w == w) && - (last->u.mprotect.x == x)){ - last->u.mprotect.len += len; - return(index); - } - } - - if(index == last_filled){ - (*do_ops)(mmu, ops, last_filled); - index = -1; - } - - ops[++index] = ((struct host_vm_op) { .type = MPROTECT, - .u = { .mprotect = { - .addr = addr, - .len = len, - .r = r, - .w = w, - .x = x } } }); - return(index); -} - void flush_tlb_page(struct vm_area_struct *vma, unsigned long address) { address &= PAGE_MASK; diff --git a/arch/um/kernel/tt/tlb.c b/arch/um/kernel/tt/tlb.c index 2eefb43bc9c..16fc6a28882 100644 --- a/arch/um/kernel/tt/tlb.c +++ b/arch/um/kernel/tt/tlb.c @@ -17,7 +17,8 @@ #include "os.h" #include "tlb.h" -static void do_ops(union mm_context *mmu, struct host_vm_op *ops, int last) +static void *do_ops(union mm_context *mmu, struct host_vm_op *ops, int last, + int finished, void *flush) { struct host_vm_op *op; int i; @@ -45,6 +46,8 @@ static void do_ops(union mm_context *mmu, struct host_vm_op *ops, int last) break; } } + + return NULL; } static void fix_range(struct mm_struct *mm, unsigned long start_addr, diff --git a/arch/um/sys-i386/stub.S b/arch/um/sys-i386/stub.S index 2f2c70a8f04..a0f9506312d 100644 --- a/arch/um/sys-i386/stub.S +++ b/arch/um/sys-i386/stub.S @@ -6,3 +6,20 @@ syscall_stub: int $0x80 mov %eax, UML_CONFIG_STUB_DATA int3 + + .globl batch_syscall_stub +batch_syscall_stub: + mov $UML_CONFIG_STUB_DATA, %esp +again: pop %eax + cmpl $0, %eax + jz done + pop %ebx + pop %ecx + pop %edx + pop %esi + pop %edi + pop %ebp + int $0x80 + mov %eax, UML_CONFIG_STUB_DATA + jmp again +done: int3 diff --git a/arch/um/sys-x86_64/stub.S b/arch/um/sys-x86_64/stub.S index 31c14925716..957f2eff32c 100644 --- a/arch/um/sys-x86_64/stub.S +++ b/arch/um/sys-x86_64/stub.S @@ -13,3 +13,24 @@ syscall_stub: or %rcx, %rbx movq %rax, (%rbx) int3 + + .globl batch_syscall_stub +batch_syscall_stub: + movq $(UML_CONFIG_STUB_DATA >> 32), %rbx + salq $32, %rbx + movq $(UML_CONFIG_STUB_DATA & 0xffffffff), %rcx + or %rcx, %rbx + movq %rbx, %rsp +again: pop %rax + cmpq $0, %rax +jz done + pop %rdi + pop %rsi + pop %rdx + pop %r10 + pop %r8 + pop %r9 + syscall + mov %rax, (%rbx) + jmp again +done: int3 -- 2.41.1