On Fri, Dec 20, 2013 at 08:48:42AM -0800, Victor Kamensky wrote:
ARM v7 KVM assembler files fixes to work in big endian mode:
I don't think 'files fixes' is proper English, could be something like:
Fix ARM v7 KVM assembler files to work...
vgic h/w registers are little endian; when asm code reads/writes from/to
the vgic h/w registers
them, it needs to do byteswap after/before. Byte swap code uses ARM_BE8
Byteswap
wrapper to add swap only if BIG_ENDIAN kernel is configured
what is the config symbol, CONFIG_BIG_ENDIAN?
mcrr and mrrc instructions take couple 32 bit registers as argument, one
The mcrr and mrrc...
a couple of
as their arguments
is supposed to be high part of 64 bit value and another is low part of 64 bit. Typically those values are loaded/stored with ldrd and strd
one is supposed to be?
instructions and those will load high and low parts in opposite register depending on endianity. Introduce and use rr_lo_hi macro that swap
opposite register? This text is more confusing that clarifying, I think you need to explain what how the rr_lo_hi macro is intended to be used if anything.
registers in BE mode when they are passed to mcrr and mrrc instructions.
function that returns 64 bit result __kvm_vcpu_run in couple registers has to be adjusted for BE case.
The __kvm_vcpu_run function returns a 64-bit result in two registers, which has...
Signed-off-by: Victor Kamensky victor.kamensky@linaro.org
arch/arm/include/asm/assembler.h | 7 +++++++ arch/arm/include/asm/kvm_asm.h | 4 ++-- arch/arm/kvm/init.S | 7 +++++-- arch/arm/kvm/interrupts.S | 12 +++++++++--- arch/arm/kvm/interrupts_head.S | 27 ++++++++++++++++++++------- 5 files changed, 43 insertions(+), 14 deletions(-)
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 5c22851..ad1ad31 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -60,6 +60,13 @@ #define ARM_BE8(code...) #endif +/* swap pair of registers position depending on current endianity */ +#ifdef CONFIG_CPU_ENDIAN_BE8 +#define rr_lo_hi(a1, a2) a2, a1 +#else +#define rr_lo_hi(a1, a2) a1, a2 +#endif
I'm not convinced that this is needed generally in the kernel and not locally to KVM, but if it is, then I think it needs to be documented more. I assume the idea here is that a1 is always the lowered number register in an ldrd instruction loading the values to write to the register?
/*
- Data preload for architectures that support it
*/ diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h index 661da11..12981d6 100644 --- a/arch/arm/include/asm/kvm_asm.h +++ b/arch/arm/include/asm/kvm_asm.h @@ -26,9 +26,9 @@ #define c1_ACTLR 4 /* Auxilliary Control Register */ #define c1_CPACR 5 /* Coprocessor Access Control */ #define c2_TTBR0 6 /* Translation Table Base Register 0 */ -#define c2_TTBR0_high 7 /* TTBR0 top 32 bits */ +#define c2_TTBR0_hilo 7 /* TTBR0 top 32 bits in LE case, low 32 bits in BE case */ #define c2_TTBR1 8 /* Translation Table Base Register 1 */ -#define c2_TTBR1_high 9 /* TTBR1 top 32 bits */ +#define c2_TTBR1_hilo 9 /* TTBR1 top 32 bits in LE case, low 32 bits in BE case */
These lines far exceed 80 chars, but not sure how to improve on that...
#define c2_TTBCR 10 /* Translation Table Base Control R. */ #define c3_DACR 11 /* Domain Access Control Register */ #define c5_DFSR 12 /* Data Fault Status Register */ diff --git a/arch/arm/kvm/init.S b/arch/arm/kvm/init.S index 1b9844d..2d10b2d 100644 --- a/arch/arm/kvm/init.S +++ b/arch/arm/kvm/init.S @@ -22,6 +22,7 @@ #include <asm/kvm_asm.h> #include <asm/kvm_arm.h> #include <asm/kvm_mmu.h> +#include <asm/assembler.h> /********************************************************************
- Hypervisor initialization
@@ -70,8 +71,10 @@ __do_hyp_init: cmp r0, #0 @ We have a SP? bne phase2 @ Yes, second stage init +ARM_BE8(setend be) @ Switch to Big Endian mode if needed
- @ Set the HTTBR to point to the hypervisor PGD pointer passed
- mcrr p15, 4, r2, r3, c2
- mcrr p15, 4, rr_lo_hi(r2, r3), c2
@ Set the HTCR and VTCR to the same shareability and cacheability @ settings as the non-secure TTBCR and with T0SZ == 0. @@ -137,7 +140,7 @@ phase2: mov pc, r0 target: @ We're now in the trampoline code, switch page tables
- mcrr p15, 4, r2, r3, c2
- mcrr p15, 4, rr_lo_hi(r2, r3), c2 isb
I guess you could switch r2 and r3 (without a third register or using stack space) on big endian to avoid the need for the macro in a header file and define the macro locally in the interrupts*.S files... Hmmm, undecided.
@ Invalidate the old TLBs diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S index df19133..0784ec3 100644 --- a/arch/arm/kvm/interrupts.S +++ b/arch/arm/kvm/interrupts.S @@ -25,6 +25,7 @@ #include <asm/kvm_asm.h> #include <asm/kvm_arm.h> #include <asm/vfpmacros.h> +#include <asm/assembler.h> #include "interrupts_head.S" .text @@ -52,14 +53,14 @@ ENTRY(__kvm_tlb_flush_vmid_ipa) dsb ishst add r0, r0, #KVM_VTTBR ldrd r2, r3, [r0]
- mcrr p15, 6, r2, r3, c2 @ Write VTTBR
- mcrr p15, 6, rr_lo_hi(r2, r3), c2 @ Write VTTBR isb mcr p15, 0, r0, c8, c3, 0 @ TLBIALLIS (rt ignored) dsb ish isb mov r2, #0 mov r3, #0
- mcrr p15, 6, r2, r3, c2 @ Back to VMID #0
- mcrr p15, 6, rr_lo_hi(r2, r3), c2 @ Back to VMID #0 isb @ Not necessary if followed by eret
ldmia sp!, {r2, r3} @@ -135,7 +136,7 @@ ENTRY(__kvm_vcpu_run) ldr r1, [vcpu, #VCPU_KVM] add r1, r1, #KVM_VTTBR ldrd r2, r3, [r1]
- mcrr p15, 6, r2, r3, c2 @ Write VTTBR
- mcrr p15, 6, rr_lo_hi(r2, r3), c2 @ Write VTTBR
@ We're all done, just restore the GPRs and go to the guest restore_guest_regs @@ -199,8 +200,13 @@ after_vfp_restore: restore_host_regs clrex @ Clear exclusive monitor +#ifndef __ARMEB__ mov r0, r1 @ Return the return code mov r1, #0 @ Clear upper bits in return value +#else
- @ r1 already has return code
- mov r0, #0 @ Clear upper bits in return value
+#endif /* __ARMEB__ */ bx lr @ return to IOCTL /******************************************************************** diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S index c371db7..67b4002 100644 --- a/arch/arm/kvm/interrupts_head.S +++ b/arch/arm/kvm/interrupts_head.S @@ -251,8 +251,8 @@ vcpu .req r0 @ vcpu pointer always in r0 mrc p15, 0, r3, c1, c0, 2 @ CPACR mrc p15, 0, r4, c2, c0, 2 @ TTBCR mrc p15, 0, r5, c3, c0, 0 @ DACR
- mrrc p15, 0, r6, r7, c2 @ TTBR 0
- mrrc p15, 1, r8, r9, c2 @ TTBR 1
- mrrc p15, 0, rr_lo_hi(r6, r7), c2 @ TTBR 0
- mrrc p15, 1, rr_lo_hi(r8, r9), c2 @ TTBR 1 mrc p15, 0, r10, c10, c2, 0 @ PRRR mrc p15, 0, r11, c10, c2, 1 @ NMRR mrc p15, 2, r12, c0, c0, 0 @ CSSELR
@@ -380,8 +380,8 @@ vcpu .req r0 @ vcpu pointer always in r0 mcr p15, 0, r3, c1, c0, 2 @ CPACR mcr p15, 0, r4, c2, c0, 2 @ TTBCR mcr p15, 0, r5, c3, c0, 0 @ DACR
- mcrr p15, 0, r6, r7, c2 @ TTBR 0
- mcrr p15, 1, r8, r9, c2 @ TTBR 1
- mcrr p15, 0, rr_lo_hi(r6, r7), c2 @ TTBR 0
- mcrr p15, 1, rr_lo_hi(r8, r9), c2 @ TTBR 1 mcr p15, 0, r10, c10, c2, 0 @ PRRR mcr p15, 0, r11, c10, c2, 1 @ NMRR mcr p15, 2, r12, c0, c0, 0 @ CSSELR
@@ -413,13 +413,21 @@ vcpu .req r0 @ vcpu pointer always in r0 ldr r9, [r2, #GICH_ELRSR1] ldr r10, [r2, #GICH_APR] +ARM_BE8(rev r3, r3 ) str r3, [r11, #VGIC_CPU_HCR] +ARM_BE8(rev r4, r4 ) str r4, [r11, #VGIC_CPU_VMCR] +ARM_BE8(rev r5, r5 ) str r5, [r11, #VGIC_CPU_MISR] +ARM_BE8(rev r6, r6 ) str r6, [r11, #VGIC_CPU_EISR] +ARM_BE8(rev r7, r7 ) str r7, [r11, #(VGIC_CPU_EISR + 4)] +ARM_BE8(rev r8, r8 ) str r8, [r11, #VGIC_CPU_ELRSR] +ARM_BE8(rev r9, r9 ) str r9, [r11, #(VGIC_CPU_ELRSR + 4)] +ARM_BE8(rev r10, r10 ) str r10, [r11, #VGIC_CPU_APR]
Wouldn't it be semantically cleaner to to the byteswap after the loads from the hardware instead?
/* Clear GICH_HCR */ @@ -431,6 +439,7 @@ vcpu .req r0 @ vcpu pointer always in r0 add r3, r11, #VGIC_CPU_LR ldr r4, [r11, #VGIC_CPU_NR_LR] 1: ldr r6, [r2], #4 +ARM_BE8(rev r6, r6 ) str r6, [r3], #4 subs r4, r4, #1 bne 1b @@ -459,8 +468,11 @@ vcpu .req r0 @ vcpu pointer always in r0 ldr r4, [r11, #VGIC_CPU_VMCR] ldr r8, [r11, #VGIC_CPU_APR] +ARM_BE8(rev r3, r3 ) str r3, [r2, #GICH_HCR] +ARM_BE8(rev r4, r4 ) str r4, [r2, #GICH_VMCR] +ARM_BE8(rev r8, r8 ) str r8, [r2, #GICH_APR] /* Restore list registers */ @@ -468,6 +480,7 @@ vcpu .req r0 @ vcpu pointer always in r0 add r3, r11, #VGIC_CPU_LR ldr r4, [r11, #VGIC_CPU_NR_LR] 1: ldr r6, [r3], #4 +ARM_BE8(rev r6, r6 ) str r6, [r2], #4 subs r4, r4, #1 bne 1b @@ -498,7 +511,7 @@ vcpu .req r0 @ vcpu pointer always in r0 mcr p15, 0, r2, c14, c3, 1 @ CNTV_CTL isb
- mrrc p15, 3, r2, r3, c14 @ CNTV_CVAL
- mrrc p15, 3, rr_lo_hi(r2, r3), c14 @ CNTV_CVAL ldr r4, =VCPU_TIMER_CNTV_CVAL add r5, vcpu, r4 strd r2, r3, [r5]
@@ -538,12 +551,12 @@ vcpu .req r0 @ vcpu pointer always in r0 ldr r2, [r4, #KVM_TIMER_CNTVOFF] ldr r3, [r4, #(KVM_TIMER_CNTVOFF + 4)]
- mcrr p15, 4, r2, r3, c14 @ CNTVOFF
- mcrr p15, 4, rr_lo_hi(r2, r3), c14 @ CNTVOFF
ldr r4, =VCPU_TIMER_CNTV_CVAL add r5, vcpu, r4 ldrd r2, r3, [r5]
- mcrr p15, 3, r2, r3, c14 @ CNTV_CVAL
- mcrr p15, 3, rr_lo_hi(r2, r3), c14 @ CNTV_CVAL isb
ldr r2, [vcpu, #VCPU_TIMER_CNTV_CTL] -- 1.8.1.4
Thanks,