From 4c18422773362093fe0d60feb8011a116138ec33 Mon Sep 17 00:00:00 2001 From: Paul Brook Date: Mon, 22 Jan 2007 10:03:36 +0000 Subject: [PATCH] ARMv7: Linux Thumb-2 support for user-space applications The attached patch implements Thumb-2 application support in Linux. There are two main changes: - Use IFAR when handling prefetch aborts - Handle undefined instruction traps from coprocessor instructions in Thumb mode Signed-off-by: Paul Brook --- arch/arm/kernel/entry-armv.S | 47 ++++++++++++++++++++++++++++------ arch/arm/kernel/entry-common.S | 5 ++++ arch/arm/mm/Kconfig | 22 ++++++++++++++++ arch/arm/mm/proc-arm1020.S | 1 + arch/arm/mm/proc-arm1020e.S | 1 + arch/arm/mm/proc-arm1022.S | 1 + arch/arm/mm/proc-arm1026.S | 1 + arch/arm/mm/proc-arm6_7.S | 2 ++ arch/arm/mm/proc-arm720.S | 1 + arch/arm/mm/proc-arm920.S | 1 + arch/arm/mm/proc-arm922.S | 1 + arch/arm/mm/proc-arm925.S | 1 + arch/arm/mm/proc-arm926.S | 1 + arch/arm/mm/proc-sa110.S | 1 + arch/arm/mm/proc-sa1100.S | 1 + arch/arm/mm/proc-v6.S | 1 + arch/arm/mm/proc-v7.S | 1 + arch/arm/mm/proc-xscale.S | 1 + arch/arm/nwfpe/entry.S | 9 ++++--- include/asm-arm/cpu-multi32.h | 4 +++ include/asm-arm/glue.h | 27 +++++++++++++++++++ 21 files changed, 119 insertions(+), 11 deletions(-) diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 58f8cffa771..fbea4de806f 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -280,7 +280,6 @@ __pabt_svc: mrs r9, cpsr tst r3, #PSR_I_BIT biceq r9, r9, #PSR_I_BIT - msr cpsr_c, r9 @ @ set args, then call main handler @@ -288,7 +287,15 @@ __pabt_svc: @ r0 - address of faulting instruction @ r1 - pointer to registers on stack @ - mov r0, r2 @ address (pc) +#ifdef MULTI_PABORT + mov r0, r2 @ pass address of aborted instruction. + ldr r4, .LCprocfns + mov lr, pc + ldr pc, [r4, #] +#else + CPU_PABORT_HANDLER(r0, r2) +#endif + msr cpsr_c, r9 @ Maybe enable interrupts mov r1, sp @ regs bl do_PrefetchAbort @ call abort handler @@ -435,8 +442,6 @@ __irq_usr: __und_usr: usr_entry - tst r3, #PSR_T_BIT @ Thumb mode? - bne __und_usr_unknown @ ignore FP sub r4, r2, #4 @ @@ -446,9 +451,24 @@ __und_usr: @ @ r0 - instruction @ -1: ldrt r0, [r4] adr r9, ret_from_exception adr lr, __und_usr_unknown + + tst r3, #PSR_T_BIT @ Thumb mode? +1: ldreqt r0, [r4] + beq call_fpe + @ Thumb instruction +#if __LINUX_ARM_ARCH__ >= 7 +2: ldrht r5, [r4], #2 + and r0, r5, #0xee + cmp r0, #0xee + bne __und_usr_unknown +3: ldrht r0, [r4] + orr r0, r0, r5, lsl #16 +#else + b __und_usr_unknown +#endif + @ @ fallthrough to call_fpe @ @@ -457,10 +477,14 @@ __und_usr: * The out of line fixup for the ldrt above. */ .section .fixup, "ax" -2: mov pc, r9 +4: mov pc, r9 .previous .section __ex_table,"a" - .long 1b, 2b + .long 1b, 4b +#if __LINUX_ARM_ARCH__ >= 7 + .long 2b, 4b + .long 3b, 4b +#endif .previous /* @@ -606,8 +630,15 @@ __und_usr_unknown: __pabt_usr: usr_entry +#ifdef MULTI_PABORT + mov r0, r2 @ pass address of aborted instruction. + ldr r4, .LCprocfns + mov lr, pc + ldr pc, [r4, #] +#else + CPU_PABORT_HANDLER(r0, r2) +#endif enable_irq @ Enable interrupts - mov r0, r2 @ address (pc) mov r1, sp @ regs bl do_PrefetchAbort @ call abort handler /* fall through */ diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index c589dc3ecd1..9999feebe24 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -352,6 +352,11 @@ sys_mmap2: b do_mmap2 #endif +ENTRY(pabort_ifar) + mrc p15, 0, r0, cr6, cr0, 2 +ENTRY(pabort_noifar) + mov pc, lr + #ifdef CONFIG_OABI_COMPAT /* diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 15392220f6e..1937114157c 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -18,6 +18,7 @@ config CPU_ARM610 select CPU_CP15_MMU select CPU_COPY_V3 if MMU select CPU_TLB_V3 if MMU + select CPU_PABRT_NOIFAR help The ARM610 is the successor to the ARM3 processor and was produced by VLSI Technology Inc. @@ -49,6 +50,7 @@ config CPU_ARM710 select CPU_CP15_MMU select CPU_COPY_V3 if MMU select CPU_TLB_V3 if MMU + select CPU_PABRT_NOIFAR help A 32-bit RISC microprocessor based on the ARM7 processor core designed by Advanced RISC Machines Ltd. The ARM710 is the @@ -64,6 +66,7 @@ config CPU_ARM720T default y if ARCH_CLPS711X || ARCH_L7200 || ARCH_CDB89712 || ARCH_H720X select CPU_32v4T select CPU_ABRT_LV4T + select CPU_PABRT_NOIFAR select CPU_CACHE_V4 select CPU_CACHE_VIVT select CPU_CP15_MMU @@ -113,6 +116,7 @@ config CPU_ARM920T default y if CPU_S3C2410 || CPU_S3C2440 || CPU_S3C2442 || ARCH_AT91RM9200 select CPU_32v4T select CPU_ABRT_EV4T + select CPU_PABRT_NOIFAR select CPU_CACHE_V4WT select CPU_CACHE_VIVT select CPU_CP15_MMU @@ -135,6 +139,7 @@ config CPU_ARM922T default y if ARCH_LH7A40X || ARCH_KS8695 select CPU_32v4T select CPU_ABRT_EV4T + select CPU_PABRT_NOIFAR select CPU_CACHE_V4WT select CPU_CACHE_VIVT select CPU_CP15_MMU @@ -155,6 +160,7 @@ config CPU_ARM925T default y if ARCH_OMAP15XX select CPU_32v4T select CPU_ABRT_EV4T + select CPU_PABRT_NOIFAR select CPU_CACHE_V4WT select CPU_CACHE_VIVT select CPU_CP15_MMU @@ -175,6 +181,7 @@ config CPU_ARM926T default y if ARCH_VERSATILE_PB || MACH_VERSATILE_AB || ARCH_OMAP730 || ARCH_OMAP16XX || ARCH_PNX4008 || ARCH_NETX || CPU_S3C2412 || ARCH_AT91SAM9260 || ARCH_AT91SAM9261 || ARCH_AT91SAM9263 || ARCH_AT91SAM9RL || ARCH_NS9XXX || ARCH_DAVINCI select CPU_32v5 select CPU_ABRT_EV5TJ + select CPU_PABRT_NOIFAR select CPU_CACHE_VIVT select CPU_CP15_MMU select CPU_COPY_V4WB if MMU @@ -226,6 +233,7 @@ config CPU_ARM1020 depends on ARCH_INTEGRATOR select CPU_32v5 select CPU_ABRT_EV4T + select CPU_PABRT_NOIFAR select CPU_CACHE_V4WT select CPU_CACHE_VIVT select CPU_CP15_MMU @@ -244,6 +252,7 @@ config CPU_ARM1020E depends on ARCH_INTEGRATOR select CPU_32v5 select CPU_ABRT_EV4T + select CPU_PABRT_NOIFAR select CPU_CACHE_V4WT select CPU_CACHE_VIVT select CPU_CP15_MMU @@ -257,6 +266,7 @@ config CPU_ARM1022 depends on ARCH_INTEGRATOR select CPU_32v5 select CPU_ABRT_EV4T + select CPU_PABRT_NOIFAR select CPU_CACHE_VIVT select CPU_CP15_MMU select CPU_COPY_V4WB if MMU # can probably do better @@ -275,6 +285,7 @@ config CPU_ARM1026 depends on ARCH_INTEGRATOR select CPU_32v5 select CPU_ABRT_EV5T # But need Jazelle, but EV5TJ ignores bit 10 + select CPU_PABRT_NOIFAR select CPU_CACHE_VIVT select CPU_CP15_MMU select CPU_COPY_V4WB if MMU # can probably do better @@ -293,6 +304,7 @@ config CPU_SA110 select CPU_32v3 if ARCH_RPC select CPU_32v4 if !ARCH_RPC select CPU_ABRT_EV4 + select CPU_PABRT_NOIFAR select CPU_CACHE_V4WB select CPU_CACHE_VIVT select CPU_CP15_MMU @@ -314,6 +326,7 @@ config CPU_SA1100 default y select CPU_32v4 select CPU_ABRT_EV4 + select CPU_PABRT_NOIFAR select CPU_CACHE_V4WB select CPU_CACHE_VIVT select CPU_CP15_MMU @@ -326,6 +339,7 @@ config CPU_XSCALE default y select CPU_32v5 select CPU_ABRT_EV5T + select CPU_PABRT_NOIFAR select CPU_CACHE_VIVT select CPU_CP15_MMU select CPU_TLB_V4WBI if MMU @@ -348,6 +362,7 @@ config CPU_V6 depends on ARCH_INTEGRATOR || MACH_REALVIEW_EB || ARCH_OMAP2 select CPU_32v6 select CPU_ABRT_EV6 + select CPU_PABRT_NOIFAR select CPU_CACHE_V6 select CPU_CACHE_VIPT select CPU_CP15_MMU @@ -374,6 +389,7 @@ config CPU_V7 select CPU_32v6K select CPU_32v7 select CPU_ABRT_EV7 + select CPU_PABRT_IFAR select CPU_CACHE_V7 select CPU_CACHE_VIPT select CPU_CP15_MMU @@ -434,6 +450,12 @@ config CPU_ABRT_EV6 config CPU_ABRT_EV7 bool +config CPU_PABRT_IFAR + bool + +config CPU_PABRT_NOIFAR + bool + # The cache model config CPU_CACHE_V3 bool diff --git a/arch/arm/mm/proc-arm1020.S b/arch/arm/mm/proc-arm1020.S index 700c04d6996..32fd7ea533f 100644 --- a/arch/arm/mm/proc-arm1020.S +++ b/arch/arm/mm/proc-arm1020.S @@ -478,6 +478,7 @@ arm1020_processor_functions: .word cpu_arm1020_dcache_clean_area .word cpu_arm1020_switch_mm .word cpu_arm1020_set_pte_ext + .word pabort_noifar .size arm1020_processor_functions, . - arm1020_processor_functions .section ".rodata" diff --git a/arch/arm/mm/proc-arm1020e.S b/arch/arm/mm/proc-arm1020e.S index 1cc206ab5ea..fe2b0ae7027 100644 --- a/arch/arm/mm/proc-arm1020e.S +++ b/arch/arm/mm/proc-arm1020e.S @@ -459,6 +459,7 @@ arm1020e_processor_functions: .word cpu_arm1020e_dcache_clean_area .word cpu_arm1020e_switch_mm .word cpu_arm1020e_set_pte_ext + .word pabort_noifar .size arm1020e_processor_functions, . - arm1020e_processor_functions .section ".rodata" diff --git a/arch/arm/mm/proc-arm1022.S b/arch/arm/mm/proc-arm1022.S index aff0ea08e2f..06dde678e19 100644 --- a/arch/arm/mm/proc-arm1022.S +++ b/arch/arm/mm/proc-arm1022.S @@ -442,6 +442,7 @@ arm1022_processor_functions: .word cpu_arm1022_dcache_clean_area .word cpu_arm1022_switch_mm .word cpu_arm1022_set_pte_ext + .word pabort_noifar .size arm1022_processor_functions, . - arm1022_processor_functions .section ".rodata" diff --git a/arch/arm/mm/proc-arm1026.S b/arch/arm/mm/proc-arm1026.S index 65e43a10908..f5506e6e681 100644 --- a/arch/arm/mm/proc-arm1026.S +++ b/arch/arm/mm/proc-arm1026.S @@ -437,6 +437,7 @@ arm1026_processor_functions: .word cpu_arm1026_dcache_clean_area .word cpu_arm1026_switch_mm .word cpu_arm1026_set_pte_ext + .word pabort_noifar .size arm1026_processor_functions, . - arm1026_processor_functions .section .rodata diff --git a/arch/arm/mm/proc-arm6_7.S b/arch/arm/mm/proc-arm6_7.S index 123a7dc7a43..14b6a95c8d4 100644 --- a/arch/arm/mm/proc-arm6_7.S +++ b/arch/arm/mm/proc-arm6_7.S @@ -300,6 +300,7 @@ ENTRY(arm6_processor_functions) .word cpu_arm6_dcache_clean_area .word cpu_arm6_switch_mm .word cpu_arm6_set_pte_ext + .word pabort_noifar .size arm6_processor_functions, . - arm6_processor_functions /* @@ -316,6 +317,7 @@ ENTRY(arm7_processor_functions) .word cpu_arm7_dcache_clean_area .word cpu_arm7_switch_mm .word cpu_arm7_set_pte_ext + .word pabort_noifar .size arm7_processor_functions, . - arm7_processor_functions .section ".rodata" diff --git a/arch/arm/mm/proc-arm720.S b/arch/arm/mm/proc-arm720.S index dc763be4336..ca5e7aac2da 100644 --- a/arch/arm/mm/proc-arm720.S +++ b/arch/arm/mm/proc-arm720.S @@ -205,6 +205,7 @@ ENTRY(arm720_processor_functions) .word cpu_arm720_dcache_clean_area .word cpu_arm720_switch_mm .word cpu_arm720_set_pte_ext + .word pabort_noifar .size arm720_processor_functions, . - arm720_processor_functions .section ".rodata" diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S index 75c945ed6c4..0170d4f466e 100644 --- a/arch/arm/mm/proc-arm920.S +++ b/arch/arm/mm/proc-arm920.S @@ -424,6 +424,7 @@ arm920_processor_functions: .word cpu_arm920_dcache_clean_area .word cpu_arm920_switch_mm .word cpu_arm920_set_pte_ext + .word pabort_noifar .size arm920_processor_functions, . - arm920_processor_functions .section ".rodata" diff --git a/arch/arm/mm/proc-arm922.S b/arch/arm/mm/proc-arm922.S index ffb751b877f..b7952493d40 100644 --- a/arch/arm/mm/proc-arm922.S +++ b/arch/arm/mm/proc-arm922.S @@ -428,6 +428,7 @@ arm922_processor_functions: .word cpu_arm922_dcache_clean_area .word cpu_arm922_switch_mm .word cpu_arm922_set_pte_ext + .word pabort_noifar .size arm922_processor_functions, . - arm922_processor_functions .section ".rodata" diff --git a/arch/arm/mm/proc-arm925.S b/arch/arm/mm/proc-arm925.S index 44c2c997819..e2988eba4cf 100644 --- a/arch/arm/mm/proc-arm925.S +++ b/arch/arm/mm/proc-arm925.S @@ -491,6 +491,7 @@ arm925_processor_functions: .word cpu_arm925_dcache_clean_area .word cpu_arm925_switch_mm .word cpu_arm925_set_pte_ext + .word pabort_noifar .size arm925_processor_functions, . - arm925_processor_functions .section ".rodata" diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S index 5b80b6bdd0c..b7961a1dac3 100644 --- a/arch/arm/mm/proc-arm926.S +++ b/arch/arm/mm/proc-arm926.S @@ -440,6 +440,7 @@ arm926_processor_functions: .word cpu_arm926_dcache_clean_area .word cpu_arm926_switch_mm .word cpu_arm926_set_pte_ext + .word pabort_noifar .size arm926_processor_functions, . - arm926_processor_functions .section ".rodata" diff --git a/arch/arm/mm/proc-sa110.S b/arch/arm/mm/proc-sa110.S index 6e226e12989..4db3d6299a2 100644 --- a/arch/arm/mm/proc-sa110.S +++ b/arch/arm/mm/proc-sa110.S @@ -223,6 +223,7 @@ ENTRY(sa110_processor_functions) .word cpu_sa110_dcache_clean_area .word cpu_sa110_switch_mm .word cpu_sa110_set_pte_ext + .word pabort_noifar .size sa110_processor_functions, . - sa110_processor_functions .section ".rodata" diff --git a/arch/arm/mm/proc-sa1100.S b/arch/arm/mm/proc-sa1100.S index 9afb11d089f..3cdef043760 100644 --- a/arch/arm/mm/proc-sa1100.S +++ b/arch/arm/mm/proc-sa1100.S @@ -238,6 +238,7 @@ ENTRY(sa1100_processor_functions) .word cpu_sa1100_dcache_clean_area .word cpu_sa1100_switch_mm .word cpu_sa1100_set_pte_ext + .word pabort_noifar .size sa1100_processor_functions, . - sa1100_processor_functions .section ".rodata" diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S index eb42e5b9486..2162a692d99 100644 --- a/arch/arm/mm/proc-v6.S +++ b/arch/arm/mm/proc-v6.S @@ -240,6 +240,7 @@ ENTRY(v6_processor_functions) .word cpu_v6_dcache_clean_area .word cpu_v6_switch_mm .word cpu_v6_set_pte_ext + .word pabort_noifar .size v6_processor_functions, . - v6_processor_functions .type cpu_arch_name, #object diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 718f4782ee8..31453d81aa3 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -219,6 +219,7 @@ ENTRY(v7_processor_functions) .word cpu_v7_dcache_clean_area .word cpu_v7_switch_mm .word cpu_v7_set_pte_ext + .word pabort_ifar .size v7_processor_functions, . - v7_processor_functions .type cpu_arch_name, #object diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S index c156ddab9a2..86faebb58b3 100644 --- a/arch/arm/mm/proc-xscale.S +++ b/arch/arm/mm/proc-xscale.S @@ -530,6 +530,7 @@ ENTRY(xscale_processor_functions) .word cpu_xscale_dcache_clean_area .word cpu_xscale_switch_mm .word cpu_xscale_set_pte_ext + .word pabort_noifar .size xscale_processor_functions, . - xscale_processor_functions .section ".rodata" diff --git a/arch/arm/nwfpe/entry.S b/arch/arm/nwfpe/entry.S index 1dc13bc6d81..ff0094d75f1 100644 --- a/arch/arm/nwfpe/entry.S +++ b/arch/arm/nwfpe/entry.S @@ -63,7 +63,8 @@ the next instruction. If it is a floating point instruction, it executes the instruction, without returning to user space. In this way it repeatedly looks ahead and executes floating point instructions until it encounters a non floating point instruction, at which time it -returns via _fpreturn. +returns via _fpreturn. Decoding Thumb-2 instructions is hard so only +one instuction is emulated before returning. This is done to reduce the effect of the trap overhead on each floating point instructions. GCC attempts to group floating point @@ -80,7 +81,9 @@ emulate: bl EmulateAll @ emulate the instruction cmp r0, #0 @ was emulation successful moveq pc, r4 @ no, return failure - + ldr r7, [sp, #64] @ fetch the PSR + tst r7, #0x20 + movne pc, r9 @ return ok if in Thumb mode next: .Lx1: ldrt r6, [r5], #4 @ get the next instruction and @ increment PC @@ -94,7 +97,7 @@ next: str r5, [sp, #60] @ update PC copy in regs mov r0, r6 @ save a copy - ldr r1, [sp, #64] @ fetch the condition codes + mov r1, r7 @ fetch the condition codes bl checkCondition @ check the condition cmp r0, #0 @ r0 = 0 ==> condition failed diff --git a/include/asm-arm/cpu-multi32.h b/include/asm-arm/cpu-multi32.h index 715e18a4add..1146c8b2eb8 100644 --- a/include/asm-arm/cpu-multi32.h +++ b/include/asm-arm/cpu-multi32.h @@ -54,6 +54,10 @@ extern struct processor { * ignore 'ext'. */ void (*set_pte_ext)(pte_t *ptep, pte_t pte, unsigned int ext); + /* + * Retrieve prefetch fault address. + */ + unsigned long (*pabort_addr)(unsigned long lr); } processor; #define cpu_proc_init() processor._proc_init() diff --git a/include/asm-arm/glue.h b/include/asm-arm/glue.h index 22274ce8137..898b888b8f7 100644 --- a/include/asm-arm/glue.h +++ b/include/asm-arm/glue.h @@ -119,4 +119,31 @@ #error Unknown data abort handler type #endif +/* + * Prefetch abort handler. If the CPU has an IFAR use that, otherwise + * use the address of teh aborted instruction + */ +#undef CPU_PABORT_HANDLER +#undef MULTI_PABORT + +#ifdef CONFIG_CPU_PABRT_IFAR +# ifdef CPU_PABORT_HANDLER +# define MULTI_PABORT 1 +# else +# define CPU_PABORT_HANDLER(reg, insn) mrc p15, 0, reg, cr6, cr0, 2 +# endif +#endif + +#ifdef CONFIG_CPU_PABRT_NOIFAR +# ifdef CPU_PABORT_HANDLER +# define MULTI_PABORT 1 +# else +# define CPU_PABORT_HANDLER(reg, insn) mov reg, insn +# endif +#endif + +#ifndef CPU_PABORT_HANDLER +#error Unknown prefetch abort handler type +#endif + #endif -- 2.41.1