From e31aa453bbc4886a7bd33e5c2afa526d6f55bd7a Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Sat, 30 Aug 2008 11:41:12 +1000 Subject: [PATCH] powerpc: Use LOAD_REG_IMMEDIATE only for constants on 64-bit Using LOAD_REG_IMMEDIATE to get the address of kernel symbols generates 5 instructions where LOAD_REG_ADDR can do it in one, and will generate R_PPC64_ADDR16_* relocations in the output when we get to making the kernel as a position-independent executable, which we'd rather not have to handle. This changes various bits of assembly code to use LOAD_REG_ADDR when we need to get the address of a symbol, or to use suitable position-independent code for cases where we can't access the TOC for various reasons, or if we're not running at the address we were linked at. It also cleans up a few minor things; there's no reason to save and restore SRR0/1 around RTAS calls, __mmu_off can get the return address from LR more conveniently than the caller can supply it in R4 (and we already assume elsewhere that EA == RA if the MMU is on in early boot), and enable_64b_mode was using 5 instructions where 2 would do. Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/ppc_asm.h | 2 +- arch/powerpc/kernel/cpu_setup_ppc970.S | 4 +- arch/powerpc/kernel/entry_64.S | 16 +- arch/powerpc/kernel/head_64.S | 181 ++++++++++----------- arch/powerpc/kernel/misc.S | 10 +- arch/powerpc/platforms/iseries/exception.S | 23 ++- 6 files changed, 110 insertions(+), 126 deletions(-) diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 0966899d974..c4a029ccb4d 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -268,7 +268,7 @@ n: * Loads the value of the constant expression 'expr' into register 'rn' * using immediate instructions only. Use this when it's important not * to reference other data (i.e. on ppc64 when the TOC pointer is not - * valid). + * valid) and when 'expr' is a constant or absolute address. * * LOAD_REG_ADDR(rn, name) * Loads the address of label 'name' into register 'rn'. Use this when diff --git a/arch/powerpc/kernel/cpu_setup_ppc970.S b/arch/powerpc/kernel/cpu_setup_ppc970.S index bf118c38575..27f2507279d 100644 --- a/arch/powerpc/kernel/cpu_setup_ppc970.S +++ b/arch/powerpc/kernel/cpu_setup_ppc970.S @@ -110,7 +110,7 @@ load_hids: isync /* Save away cpu state */ - LOAD_REG_IMMEDIATE(r5,cpu_state_storage) + LOAD_REG_ADDR(r5,cpu_state_storage) /* Save HID0,1,4 and 5 */ mfspr r3,SPRN_HID0 @@ -134,7 +134,7 @@ _GLOBAL(__restore_cpu_ppc970) rldicl. r0,r0,4,63 beqlr - LOAD_REG_IMMEDIATE(r5,cpu_state_storage) + LOAD_REG_ADDR(r5,cpu_state_storage) /* Before accessing memory, we make sure rm_ci is clear */ li r0,0 mfspr r3,SPRN_HID4 diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 55445f1dba8..fd8b4bae9b0 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -690,10 +690,6 @@ _GLOBAL(enter_rtas) std r7,_DAR(r1) mfdsisr r8 std r8,_DSISR(r1) - mfsrr0 r9 - std r9,_SRR0(r1) - mfsrr1 r10 - std r10,_SRR1(r1) /* Temporary workaround to clear CR until RTAS can be modified to * ignore all bits. @@ -754,6 +750,10 @@ _STATIC(rtas_return_loc) mfspr r4,SPRN_SPRG3 /* Get PACA */ clrldi r4,r4,2 /* convert to realmode address */ + bcl 20,31,$+4 +0: mflr r3 + ld r3,(1f-0b)(r3) /* get &.rtas_restore_regs */ + mfmsr r6 li r0,MSR_RI andc r6,r6,r0 @@ -761,7 +761,6 @@ _STATIC(rtas_return_loc) mtmsrd r6 ld r1,PACAR1(r4) /* Restore our SP */ - LOAD_REG_IMMEDIATE(r3,.rtas_restore_regs) ld r4,PACASAVEDMSR(r4) /* Restore our MSR */ mtspr SPRN_SRR0,r3 @@ -769,6 +768,9 @@ _STATIC(rtas_return_loc) rfid b . /* prevent speculative execution */ + .align 3 +1: .llong .rtas_restore_regs + _STATIC(rtas_restore_regs) /* relocation is on at this point */ REST_GPR(2, r1) /* Restore the TOC */ @@ -788,10 +790,6 @@ _STATIC(rtas_restore_regs) mtdar r7 ld r8,_DSISR(r1) mtdsisr r8 - ld r9,_SRR0(r1) - mtsrr0 r9 - ld r10,_SRR1(r1) - mtsrr1 r10 addi r1,r1,RTAS_FRAME_SIZE /* Unstack our frame */ ld r0,16(r1) /* get return address */ diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 97bb6e6f67b..6cdfd44d8ef 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -128,11 +128,11 @@ __secondary_hold: /* Tell the master cpu we're here */ /* Relocation is off & we are located at an address less */ /* than 0x100, so only need to grab low order offset. */ - std r24,__secondary_hold_acknowledge@l(0) + std r24,__secondary_hold_acknowledge-_stext(0) sync /* All secondary cpus wait here until told to start. */ -100: ld r4,__secondary_hold_spinloop@l(0) +100: ld r4,__secondary_hold_spinloop-_stext(0) cmpdi 0,r4,0 beq 100b @@ -1223,11 +1223,14 @@ _GLOBAL(generic_secondary_smp_init) /* turn on 64-bit mode */ bl .enable_64b_mode + /* get the TOC pointer (real address) */ + bl .relative_toc + /* Set up a paca value for this processor. Since we have the * physical cpu id in r24, we need to search the pacas to find * which logical id maps to our physical one. */ - LOAD_REG_IMMEDIATE(r13, paca) /* Get base vaddr of paca array */ + LOAD_REG_ADDR(r13, paca) /* Get base vaddr of paca array */ li r5,0 /* logical cpu id */ 1: lhz r6,PACAHWCPUID(r13) /* Load HW procid from paca */ cmpw r6,r24 /* Compare to our id */ @@ -1256,7 +1259,7 @@ _GLOBAL(generic_secondary_smp_init) sync /* order paca.run and cur_cpu_spec */ /* See if we need to call a cpu state restore handler */ - LOAD_REG_IMMEDIATE(r23, cur_cpu_spec) + LOAD_REG_ADDR(r23, cur_cpu_spec) ld r23,0(r23) ld r23,CPU_SPEC_RESTORE(r23) cmpdi 0,r23,0 @@ -1272,10 +1275,15 @@ _GLOBAL(generic_secondary_smp_init) b __secondary_start #endif +/* + * Turn the MMU off. + * Assumes we're mapped EA == RA if the MMU is on. + */ _STATIC(__mmu_off) mfmsr r3 andi. r0,r3,MSR_IR|MSR_DR beqlr + mflr r4 andc r3,r3,r0 mtspr SPRN_SRR0,r4 mtspr SPRN_SRR1,r3 @@ -1296,6 +1304,18 @@ _STATIC(__mmu_off) * */ _GLOBAL(__start_initialization_multiplatform) + /* Make sure we are running in 64 bits mode */ + bl .enable_64b_mode + + /* Get TOC pointer (current runtime address) */ + bl .relative_toc + + /* find out where we are now */ + bcl 20,31,$+4 +0: mflr r26 /* r26 = runtime addr here */ + addis r26,r26,(_stext - 0b)@ha + addi r26,r26,(_stext - 0b)@l /* current runtime base addr */ + /* * Are we booted from a PROM Of-type client-interface ? */ @@ -1307,9 +1327,6 @@ _GLOBAL(__start_initialization_multiplatform) mr r31,r3 mr r30,r4 - /* Make sure we are running in 64 bits mode */ - bl .enable_64b_mode - /* Setup some critical 970 SPRs before switching MMU off */ mfspr r0,SPRN_PVR srwi r0,r0,16 @@ -1324,9 +1341,7 @@ _GLOBAL(__start_initialization_multiplatform) 1: bl .__cpu_preinit_ppc970 2: - /* Switch off MMU if not already */ - LOAD_REG_IMMEDIATE(r4, .__after_prom_start - KERNELBASE) - add r4,r4,r30 + /* Switch off MMU if not already off */ bl .__mmu_off b .__after_prom_start @@ -1341,23 +1356,10 @@ _INIT_STATIC(__boot_from_prom) /* * Align the stack to 16-byte boundary * Depending on the size and layout of the ELF sections in the initial - * boot binary, the stack pointer will be unalignet on PowerMac + * boot binary, the stack pointer may be unaligned on PowerMac */ rldicr r1,r1,0,59 - /* Make sure we are running in 64 bits mode */ - bl .enable_64b_mode - - /* put a relocation offset into r3 */ - bl .reloc_offset - - LOAD_REG_IMMEDIATE(r2,__toc_start) - addi r2,r2,0x4000 - addi r2,r2,0x4000 - - /* Relocate the TOC from a virt addr to a real addr */ - add r2,r2,r3 - /* Restore parameters */ mr r3,r31 mr r4,r30 @@ -1373,53 +1375,37 @@ _INIT_STATIC(__boot_from_prom) _STATIC(__after_prom_start) /* - * We need to run with __start at physical address PHYSICAL_START. + * We need to run with _stext at physical address PHYSICAL_START. * This will leave some code in the first 256B of * real memory, which are reserved for software use. - * The remainder of the first page is loaded with the fixed - * interrupt vectors. The next two pages are filled with - * unknown exception placeholders. * * Note: This process overwrites the OF exception vectors. - * r26 == relocation offset - * r27 == KERNELBASE */ - bl .reloc_offset - mr r26,r3 - LOAD_REG_IMMEDIATE(r27, KERNELBASE) - LOAD_REG_IMMEDIATE(r3, PHYSICAL_START) /* target addr */ - - // XXX FIXME: Use phys returned by OF (r30) - add r4,r27,r26 /* source addr */ - /* current address of _start */ - /* i.e. where we are running */ - /* the source addr */ - - cmpdi r4,0 /* In some cases the loader may */ - bne 1f - b .start_here_multiplatform /* have already put us at zero */ - /* so we can skip the copy. */ -1: LOAD_REG_IMMEDIATE(r5,copy_to_here) /* # bytes of memory to copy */ - sub r5,r5,r27 - + cmpd r3,r26 /* In some cases the loader may */ + beq 9f /* have already put us at zero */ + mr r4,r26 /* source address */ + lis r5,(copy_to_here - _stext)@ha + addi r5,r5,(copy_to_here - _stext)@l /* # bytes of memory to copy */ li r6,0x100 /* Start offset, the first 0x100 */ /* bytes were copied earlier. */ bl .copy_and_flush /* copy the first n bytes */ /* this includes the code being */ /* executed here. */ - - LOAD_REG_IMMEDIATE(r0, 4f) /* Jump to the copy of this code */ - mtctr r0 /* that we just made/relocated */ + addis r8,r3,(4f - _stext)@ha /* Jump to the copy of this code */ + addi r8,r8,(4f - _stext)@l /* that we just made */ + mtctr r8 bctr -4: LOAD_REG_IMMEDIATE(r5,klimit) - add r5,r5,r26 - ld r5,0(r5) /* get the value of klimit */ - sub r5,r5,r27 +4: /* Now copy the rest of the kernel up to _end */ + addis r5,r26,(p_end - _stext)@ha + ld r5,(p_end - _stext)@l(r5) /* get _end */ bl .copy_and_flush /* copy the rest */ - b .start_here_multiplatform + +9: b .start_here_multiplatform + +p_end: .llong _end - _stext /* * Copy routine used to copy the kernel to start at physical address 0 @@ -1484,6 +1470,9 @@ _GLOBAL(pmac_secondary_start) /* turn on 64-bit mode */ bl .enable_64b_mode + /* get TOC pointer (real address) */ + bl .relative_toc + /* Copy some CPU settings from CPU 0 */ bl .__restore_cpu_ppc970 @@ -1493,10 +1482,10 @@ _GLOBAL(pmac_secondary_start) mtmsrd r3 /* RI on */ /* Set up a paca value for this processor. */ - LOAD_REG_IMMEDIATE(r4, paca) /* Get base vaddr of paca array */ - mulli r13,r24,PACA_SIZE /* Calculate vaddr of right paca */ + LOAD_REG_ADDR(r4,paca) /* Get base vaddr of paca array */ + mulli r13,r24,PACA_SIZE /* Calculate vaddr of right paca */ add r13,r13,r4 /* for this processor. */ - mtspr SPRN_SPRG3,r13 /* Save vaddr of paca in SPRG3 */ + mtspr SPRN_SPRG3,r13 /* Save vaddr of paca in SPRG3 */ /* Create a temp kernel stack for use before relocation is on. */ ld r1,PACAEMERGSP(r13) @@ -1524,9 +1513,6 @@ __secondary_start: /* Set thread priority to MEDIUM */ HMT_MEDIUM - /* Load TOC */ - ld r2,PACATOC(r13) - /* Do early setup for that CPU (stab, slb, hash table pointer) */ bl .early_setup_secondary @@ -1563,9 +1549,11 @@ END_FW_FTR_SECTION_IFCLR(FW_FEATURE_ISERIES) /* * Running with relocation on at this point. All we want to do is - * zero the stack back-chain pointer before going into C code. + * zero the stack back-chain pointer and get the TOC virtual address + * before going into C code. */ _GLOBAL(start_secondary_prolog) + ld r2,PACATOC(r13) li r3,0 std r3,0(r1) /* Zero the stack frame pointer */ bl .start_secondary @@ -1577,34 +1565,46 @@ _GLOBAL(start_secondary_prolog) */ _GLOBAL(enable_64b_mode) mfmsr r11 /* grab the current MSR */ - li r12,1 - rldicr r12,r12,MSR_SF_LG,(63-MSR_SF_LG) - or r11,r11,r12 - li r12,1 - rldicr r12,r12,MSR_ISF_LG,(63-MSR_ISF_LG) + li r12,(MSR_SF | MSR_ISF)@highest + sldi r12,r12,48 or r11,r11,r12 mtmsrd r11 isync blr +/* + * This puts the TOC pointer into r2, offset by 0x8000 (as expected + * by the toolchain). It computes the correct value for wherever we + * are running at the moment, using position-independent code. + */ +_GLOBAL(relative_toc) + mflr r0 + bcl 20,31,$+4 +0: mflr r9 + ld r2,(p_toc - 0b)(r9) + add r2,r2,r9 + mtlr r0 + blr + +p_toc: .llong __toc_start + 0x8000 - 0b + /* * This is where the main kernel code starts. */ _INIT_STATIC(start_here_multiplatform) - /* get a new offset, now that the kernel has moved. */ - bl .reloc_offset - mr r26,r3 + /* set up the TOC (real address) */ + bl .relative_toc /* Clear out the BSS. It may have been done in prom_init, * already but that's irrelevant since prom_init will soon * be detached from the kernel completely. Besides, we need * to clear it now for kexec-style entry. */ - LOAD_REG_IMMEDIATE(r11,__bss_stop) - LOAD_REG_IMMEDIATE(r8,__bss_start) + LOAD_REG_ADDR(r11,__bss_stop) + LOAD_REG_ADDR(r8,__bss_start) sub r11,r11,r8 /* bss size */ addi r11,r11,7 /* round up to an even double word */ - rldicl. r11,r11,61,3 /* shift right by 3 */ + srdi. r11,r11,3 /* shift right by 3 */ beq 4f addi r8,r8,-8 li r0,0 @@ -1617,35 +1617,28 @@ _INIT_STATIC(start_here_multiplatform) ori r6,r6,MSR_RI mtmsrd r6 /* RI on */ - /* The following gets the stack and TOC set up with the regs */ + /* The following gets the stack set up with the regs */ /* pointing to the real addr of the kernel stack. This is */ /* all done to support the C function call below which sets */ /* up the htab. This is done because we have relocated the */ /* kernel but are still running in real mode. */ - LOAD_REG_IMMEDIATE(r3,init_thread_union) - add r3,r3,r26 + LOAD_REG_ADDR(r3,init_thread_union) - /* set up a stack pointer (physical address) */ + /* set up a stack pointer */ addi r1,r3,THREAD_SIZE li r0,0 stdu r0,-STACK_FRAME_OVERHEAD(r1) - /* set up the TOC (physical address) */ - LOAD_REG_IMMEDIATE(r2,__toc_start) - addi r2,r2,0x4000 - addi r2,r2,0x4000 - add r2,r2,r26 - /* Do very early kernel initializations, including initial hash table, * stab and slb setup before we turn on relocation. */ /* Restore parameters passed from prom_init/kexec */ mr r3,r31 - bl .early_setup + bl .early_setup /* also sets r13 and SPRG3 */ - LOAD_REG_IMMEDIATE(r3, .start_here_common) - LOAD_REG_IMMEDIATE(r4, MSR_KERNEL) + LOAD_REG_ADDR(r3, .start_here_common) + ld r4,PACAKMSR(r13) mtspr SPRN_SRR0,r3 mtspr SPRN_SRR1,r4 rfid @@ -1654,20 +1647,10 @@ _INIT_STATIC(start_here_multiplatform) /* This is where all platforms converge execution */ _INIT_GLOBAL(start_here_common) /* relocation is on at this point */ + std r1,PACAKSAVE(r13) - /* The following code sets up the SP and TOC now that we are */ - /* running with translation enabled. */ - - LOAD_REG_IMMEDIATE(r3,init_thread_union) - - /* set up the stack */ - addi r1,r3,THREAD_SIZE - li r0,0 - stdu r0,-STACK_FRAME_OVERHEAD(r1) - - /* Load the TOC */ + /* Load the TOC (virtual address) */ ld r2,PACATOC(r13) - std r1,PACAKSAVE(r13) bl .setup_system diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S index 85cb6f34084..2d29752cbe1 100644 --- a/arch/powerpc/kernel/misc.S +++ b/arch/powerpc/kernel/misc.S @@ -31,11 +31,14 @@ _GLOBAL(reloc_offset) mflr r0 bl 1f 1: mflr r3 - LOAD_REG_IMMEDIATE(r4,1b) + PPC_LL r4,(2f-1b)(r3) subf r3,r4,r3 mtlr r0 blr + .align 3 +2: PPC_LONG 1b + /* * add_reloc_offset(x) returns x + reloc_offset(). */ @@ -43,12 +46,15 @@ _GLOBAL(add_reloc_offset) mflr r0 bl 1f 1: mflr r5 - LOAD_REG_IMMEDIATE(r4,1b) + PPC_LL r4,(2f-1b)(r5) subf r5,r4,r5 add r3,r3,r5 mtlr r0 blr + .align 3 +2: PPC_LONG 1b + _GLOBAL(kernel_execve) li r0,__NR_execve sc diff --git a/arch/powerpc/platforms/iseries/exception.S b/arch/powerpc/platforms/iseries/exception.S index 8ff330d026c..2f581521eb9 100644 --- a/arch/powerpc/platforms/iseries/exception.S +++ b/arch/powerpc/platforms/iseries/exception.S @@ -38,12 +38,13 @@ .globl system_reset_iSeries system_reset_iSeries: + bl .relative_toc mfspr r13,SPRN_SPRG3 /* Get alpaca address */ - LOAD_REG_IMMEDIATE(r23, alpaca) + LOAD_REG_ADDR(r23, alpaca) li r0,ALPACA_SIZE sub r23,r13,r23 divdu r23,r23,r0 /* r23 has cpu number */ - LOAD_REG_IMMEDIATE(r13, paca) + LOAD_REG_ADDR(r13, paca) mulli r0,r23,PACA_SIZE add r13,r13,r0 mtspr SPRN_SPRG3,r13 /* Save it away for the future */ @@ -60,14 +61,14 @@ system_reset_iSeries: mtspr SPRN_CTRLT,r4 /* Spin on __secondary_hold_spinloop until it is updated by the boot cpu. */ -/* In the UP case we'll yeild() later, and we will not access the paca anyway */ +/* In the UP case we'll yield() later, and we will not access the paca anyway */ #ifdef CONFIG_SMP 1: HMT_LOW - LOAD_REG_IMMEDIATE(r23, __secondary_hold_spinloop) + LOAD_REG_ADDR(r23, __secondary_hold_spinloop) ld r23,0(r23) sync - LOAD_REG_IMMEDIATE(r3,current_set) + LOAD_REG_ADDR(r3,current_set) sldi r28,r24,3 /* get current_set[cpu#] */ ldx r3,r3,r28 addi r1,r3,THREAD_SIZE @@ -90,7 +91,7 @@ system_reset_iSeries: lbz r23,PACAPROCSTART(r13) /* Test if this processor * should start */ sync - LOAD_REG_IMMEDIATE(r3,current_set) + LOAD_REG_ADDR(r3,current_set) sldi r28,r24,3 /* get current_set[cpu#] */ ldx r3,r3,r28 addi r1,r3,THREAD_SIZE @@ -255,8 +256,8 @@ hardware_interrupt_iSeries_masked: _INIT_STATIC(__start_initialization_iSeries) /* Clear out the BSS */ - LOAD_REG_IMMEDIATE(r11,__bss_stop) - LOAD_REG_IMMEDIATE(r8,__bss_start) + LOAD_REG_ADDR(r11,__bss_stop) + LOAD_REG_ADDR(r8,__bss_start) sub r11,r11,r8 /* bss size */ addi r11,r11,7 /* round up to an even double word */ rldicl. r11,r11,61,3 /* shift right by 3 */ @@ -267,15 +268,11 @@ _INIT_STATIC(__start_initialization_iSeries) 3: stdu r0,8(r8) bdnz 3b 4: - LOAD_REG_IMMEDIATE(r1,init_thread_union) + LOAD_REG_ADDR(r1,init_thread_union) addi r1,r1,THREAD_SIZE li r0,0 stdu r0,-STACK_FRAME_OVERHEAD(r1) - LOAD_REG_IMMEDIATE(r2,__toc_start) - addi r2,r2,0x4000 - addi r2,r2,0x4000 - bl .iSeries_early_setup bl .early_setup -- 2.41.1