From: David Mosberger-Tang Date: Thu, 28 Apr 2005 04:15:13 +0000 (-0700) Subject: [IA64] Schedule fp-clearing insns at least 6 cycles after reading ar.bsp. X-Git-Tag: v2.6.13-rc2~59^2~14^2~14 X-Git-Url: http://pilppa.com/gitweb/?a=commitdiff_plain;h=3c79c8b1d92a9ae3edf3cbcd2c43c553ee0f1d83;p=linux-2.6-omap-h63xx.git [IA64] Schedule fp-clearing insns at least 6 cycles after reading ar.bsp. Decreases syscall overhead by approximately 6 cycles. Signed-off-by: David Mosberger-Tang Signed-off-by: Tony Luck --- diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index c0f28339d58..0c84bed1bda 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -705,15 +705,15 @@ ENTRY(ia64_leave_syscall) // start restoring the state saved on the kernel stack (struct pt_regs): ld8 r9=[r2],PT(CR_IPSR)-PT(R9) ld8 r11=[r3],PT(CR_IIP)-PT(R11) - mov f6=f0 // clear f6 + nop.i 0 ;; invala // M0|1 invalidate ALAT rsm psr.i | psr.ic // M2 initiate turning off of interrupt and interruption collection - mov f9=f0 // clear f9 + nop.i 0 ld8 r29=[r2],16 // load cr.ipsr ld8 r28=[r3],16 // load cr.iip - mov f8=f0 // clear f8 + mov r22=r0 // clear r22 ;; ld8 r30=[r2],16 // M0|1 load cr.ifs ld8 r25=[r3],16 // M0|1 load ar.unat @@ -721,15 +721,15 @@ ENTRY(ia64_leave_syscall) ;; ld8 r26=[r2],PT(B0)-PT(AR_PFS) // M0|1 load ar.pfs (pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled - mov f10=f0 // clear f10 +(pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13 ;; ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // load b0 ld8 r27=[r3],PT(PR)-PT(AR_RSC) // load ar.rsc - mov f11=f0 // clear f11 + mov f6=f0 // clear f6 ;; ld8 r24=[r2],PT(AR_FPSR)-PT(AR_RNAT) // load ar.rnat (may be garbage) ld8 r31=[r3],PT(R1)-PT(PR) // load predicates -(pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13 + mov f7=f0 // clear f7 ;; ld8 r20=[r2],PT(R12)-PT(AR_FPSR) // load ar.fpsr ld8.fill r1=[r3],16 // load r1 @@ -737,24 +737,29 @@ ENTRY(ia64_leave_syscall) ;; srlz.d // M0 ensure interruption collection is off ld8.fill r13=[r3],16 - mov f7=f0 // clear f7 + mov f8=f0 // clear f8 ;; ld8.fill r12=[r2] // restore r12 (sp) mov.m ar.ssd=r0 // M2 clear ar.ssd - mov r22=r0 // clear r22 + mov b6=r18 // I0 restore b6 + nop.m 0 + mov f9=f0 // clear f9 + shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition + ;; ld8.fill r15=[r3] // restore r15 (pUStk) st1 [r14]=r17 addl r3=THIS_CPU(ia64_phys_stacked_size_p8),r0 ;; (pUStk) ld4 r17=[r3] // r17 = cpu_data->phys_stacked_size_p8 mov.m ar.csd=r0 // M2 clear ar.csd - mov b6=r18 // I0 restore b6 + mov f10=f0 // clear f10 ;; mov r14=r0 // clear r14 - shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition + mov f11=f0 // clear f11 (pKStk) br.cond.dpnt.many skip_rbs_switch + mov.m ar.ccv=r0 // clear ar.ccv (pNonSys) br.cond.dpnt.many dont_preserve_current_frame br.cond.sptk.many rbs_switch