User space can use the MEM_OP ioctl to make storage key checked reads and writes to the guest, however, it has no way of performing atomic, key checked, accesses to the guest. Extend the MEM_OP ioctl in order to allow for this, by adding a cmpxchg mode. For now, support this mode for absolute accesses only.
This mode can be use, for example, to set the device-state-change indicator and the adapter-local-summary indicator atomically.
Also contains some fixes/changes for the memop selftest independent of the cmpxchg changes.
v1 -> v2 * get rid of xrk instruction for cmpxchg byte and short implementation * pass old parameter via pointer instead of in mem_op struct * indicate failure of cmpxchg due to wrong old value by special return code * picked up R-b's (thanks Thomas)
Janis Schoetterl-Glausch (9): s390/uaccess: Add storage key checked cmpxchg access to user space KVM: s390: Extend MEM_OP ioctl by storage key checked cmpxchg Documentation: KVM: s390: Describe KVM_S390_MEMOP_F_CMPXCHG KVM: s390: selftest: memop: Pass mop_desc via pointer KVM: s390: selftest: memop: Replace macros by functions KVM: s390: selftest: memop: Add cmpxchg tests KVM: s390: selftest: memop: Add bad address test KVM: s390: selftest: memop: Fix typo KVM: s390: selftest: memop: Fix wrong address being used in test
Documentation/virt/kvm/api.rst | 21 +- include/uapi/linux/kvm.h | 5 + arch/s390/include/asm/uaccess.h | 189 ++++++ arch/s390/kvm/gaccess.h | 4 + arch/s390/kvm/gaccess.c | 57 ++ arch/s390/kvm/kvm-s390.c | 35 +- tools/testing/selftests/kvm/s390x/memop.c | 674 +++++++++++++++++----- 7 files changed, 833 insertions(+), 152 deletions(-)
Range-diff against v1: 1: 7b4392170faa ! 1: 58adf2b7688a s390/uaccess: Add storage key checked cmpxchg access to user space @@ arch/s390/include/asm/uaccess.h: do { \ + unsigned __int128 *old_p, + unsigned __int128 new, u8 access_key) +{ -+ u32 shift, mask, old_word, new_word, align_mask, tmp, diff; ++ u32 shift, mask, old_word, new_word, align_mask, tmp; + u64 aligned; + int ret = -EFAULT; + @@ arch/s390/include/asm/uaccess.h: do { \ + new_word = ((u8)new) << shift; + break; + } ++ tmp = old_word; /* don't modify *old_p on fault */ + asm volatile( + "spka 0(%[access_key])\n" + " sacf 256\n" + "0: l %[tmp],%[aligned]\n" -+ "1: nr %[tmp],%[hole_mask]\n" ++ "1: nr %[tmp],%[mask]\n" ++ " xilf %[mask],0xffffffff\n" + " or %[new_word],%[tmp]\n" -+ " or %[old_word],%[tmp]\n" -+ " lr %[tmp],%[old_word]\n" -+ "2: cs %[tmp],%[new_word],%[aligned]\n" -+ "3: jnl 4f\n" -+ " xrk %[diff],%[tmp],%[old_word]\n" -+ " nr %[diff],%[hole_mask]\n" -+ " xr %[new_word],%[diff]\n" -+ " xr %[old_word],%[diff]\n" -+ " xrk %[diff],%[tmp],%[old_word]\n" ++ " or %[tmp],%[old_word]\n" ++ "2: lr %[old_word],%[tmp]\n" ++ "3: cs %[tmp],%[new_word],%[aligned]\n" ++ "4: jnl 5f\n" ++ /* We'll restore old_word before the cs, use reg for the diff */ ++ " xr %[old_word],%[tmp]\n" ++ /* Apply diff assuming only bits outside target byte(s) changed */ ++ " xr %[new_word],%[old_word]\n" ++ /* If prior assumption false we exit loop, so not an issue */ ++ " nr %[old_word],%[mask]\n" + " jz 2b\n" -+ "4: ipm %[ret]\n" ++ "5: ipm %[ret]\n" + " srl %[ret],28\n" -+ "5: sacf 768\n" ++ "6: sacf 768\n" + " spka %[default_key]\n" -+ EX_TABLE(0b, 5b) EX_TABLE(1b, 5b) -+ EX_TABLE(2b, 5b) EX_TABLE(3b, 5b) ++ EX_TABLE(0b, 6b) EX_TABLE(1b, 6b) ++ EX_TABLE(3b, 6b) EX_TABLE(4b, 6b) + : [old_word] "+&d" (old_word), + [new_word] "+&d" (new_word), -+ [tmp] "=&d" (tmp), ++ [tmp] "+&d" (tmp), + [aligned] "+Q" (*(u32 *)aligned), -+ [diff] "=&d" (diff), + [ret] "+d" (ret) + : [access_key] "a" (access_key << 4), -+ [hole_mask] "d" (~mask), ++ [mask] "d" (~mask), + [default_key] "J" (PAGE_DEFAULT_KEY) + : "cc" + ); @@ arch/s390/include/asm/uaccess.h: do { \ + * cmpxchg_user_key_size() - cmpxchg with user space target, honoring storage keys + * @size: Size of the value being cmpxchg'ed, one of 1,2,4,8,16. + * @address: User space address of value to compare to *@old_p and exchange with -+ * *@new. Must be aligned to @size. ++ * @new. Must be aligned to @size. + * @old_p: Pointer to old value. Interpreted as a @size byte integer and compared + * to the content pointed to by @address in order to determine if the + * exchange occurs. The value read from @address is written back to *@old_p. 2: 80e3fda3d2af ! 2: c6731b0063ab KVM: s390: Extend MEM_OP ioctl by storage key checked cmpxchg @@ Commit message Signed-off-by: Janis Schoetterl-Glausch scgl@linux.ibm.com
## include/uapi/linux/kvm.h ## -@@ include/uapi/linux/kvm.h: struct kvm_translation { - struct kvm_s390_mem_op { - /* in */ - __u64 gaddr; /* the guest address */ -+ /* in & out */ - __u64 flags; /* flags */ -+ /* in */ - __u32 size; /* amount of bytes */ - __u32 op; /* type of operation */ - __u64 buf; /* buffer in userspace */ @@ include/uapi/linux/kvm.h: struct kvm_s390_mem_op { struct { __u8 ar; /* the access register number */ __u8 key; /* access key, ignored if flag unset */ -+ /* in & out */ -+ __u64 old[2]; /* ignored if flag unset */ ++ __u8 pad1[6]; /* ignored */ ++ __u64 old_p; /* ignored if flag unset */ }; __u32 sida_offset; /* offset into the sida */ __u8 reserved[32]; /* ignored */ @@ include/uapi/linux/kvm.h: struct kvm_s390_mem_op { #define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1) #define KVM_S390_MEMOP_F_SKEY_PROTECTION (1ULL << 2) +#define KVM_S390_MEMOP_F_CMPXCHG (1ULL << 3) ++/* Non program exception return codes (pgm codes are 16 bit) */ ++#define KVM_S390_MEMOP_R_NO_XCHG ((1 << 16) + 0)
/* for KVM_INTERRUPT */ struct kvm_interrupt { @@ arch/s390/kvm/gaccess.c: int access_guest_real(struct kvm_vcpu *vcpu, unsigned l + if (kvm_is_error_hva(hva)) + return PGM_ADDRESSING; + /* -+ * Check if it's a ro memslot, even tho that can't occur (they're unsupported). ++ * Check if it's a read-only memslot, even though that cannot occur ++ * since those are unsupported. + * Don't try to actually handle that case. + */ + if (!writable) @@ arch/s390/kvm/gaccess.c: int access_guest_real(struct kvm_vcpu *vcpu, unsigned l + ret = cmpxchg_user_key_size(len, (void __user *)hva, old_p, new, access_key); + mark_page_dirty_in_slot(kvm, slot, gfn); + /* -+ * Assume that the fault is caused by key protection, the alternative -+ * is that the user page is write protected. ++ * Assume that the fault is caused by protection, either key protection ++ * or user page write protection. + */ + if (ret == -EFAULT) + ret = PGM_PROTECTION; @@ arch/s390/kvm/kvm-s390.c: int kvm_vm_ioctl_check_extension(struct kvm *kvm, long r = MEM_OP_MAX_SIZE; break; + case KVM_CAP_S390_MEM_OP_EXTENSION: ++ /* ++ * Flag bits indicating which extensions are supported. ++ * The first extension doesn't use a flag, but pretend it does, ++ * this way that can be changed in the future. ++ */ + r = 0x3; + break; case KVM_CAP_NR_VCPUS: case KVM_CAP_MAX_VCPUS: case KVM_CAP_MAX_VCPU_ID: @@ arch/s390/kvm/kvm-s390.c: static bool access_key_invalid(u8 access_key) - return access_key > 0xf; - } - --static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop) -+static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop, bool *modified) + static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop) { void __user *uaddr = (void __user *)mop->buf; -+ unsigned __int128 old; ++ void __user *old_p = (void __user *)mop->old_p; + union { + unsigned __int128 quad; + char raw[sizeof(unsigned __int128)]; -+ } new = { .quad = 0 }; ++ } old = { .quad = 0}, new = { .quad = 0 }; ++ unsigned int off_in_quad = sizeof(unsigned __int128) - mop->size; u64 supported_flags; void *tmpbuf = NULL; int r, srcu_idx;
-+ *modified = false; supported_flags = KVM_S390_MEMOP_F_SKEY_PROTECTION - | KVM_S390_MEMOP_F_CHECK_ONLY; + | KVM_S390_MEMOP_F_CHECK_ONLY @@ arch/s390/kvm/kvm-s390.c: static int kvm_s390_vm_mem_op(struct kvm *kvm, struct + if (mop->flags & KVM_S390_MEMOP_F_CMPXCHG) { + if (mop->size > sizeof(new)) + return -EINVAL; -+ if (copy_from_user(&new.raw[sizeof(new) - mop->size], uaddr, mop->size)) ++ /* off_in_quad has been validated */ ++ if (copy_from_user(&new.raw[off_in_quad], uaddr, mop->size)) ++ return -EFAULT; ++ if (copy_from_user(&old.raw[off_in_quad], old_p, mop->size)) + return -EFAULT; -+ memcpy(&old, mop->old, sizeof(old)); + } if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) { tmpbuf = vmalloc(mop->size); @@ arch/s390/kvm/kvm-s390.c: static int kvm_s390_vm_mem_op(struct kvm *kvm, struct r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_STORE, mop->key); + } else if (mop->flags & KVM_S390_MEMOP_F_CMPXCHG) { + r = cmpxchg_guest_abs_with_key(kvm, mop->gaddr, mop->size, -+ &old, new.quad, mop->key); -+ if (!r) { -+ mop->flags &= ~KVM_S390_MEMOP_F_CMPXCHG; -+ } else if (r == 1) { -+ memcpy(mop->old, &old, sizeof(old)); -+ r = 0; ++ &old.quad, new.quad, mop->key); ++ if (r == 1) { ++ r = KVM_S390_MEMOP_R_NO_XCHG; ++ if (copy_to_user(old_p, &old.raw[off_in_quad], mop->size)) ++ r = -EFAULT; + } -+ *modified = true; } else { if (copy_from_user(tmpbuf, uaddr, mop->size)) { r = -EFAULT; -@@ arch/s390/kvm/kvm-s390.c: long kvm_arch_vm_ioctl(struct file *filp, - } - case KVM_S390_MEM_OP: { - struct kvm_s390_mem_op mem_op; -+ bool modified; - -- if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0) -- r = kvm_s390_vm_mem_op(kvm, &mem_op); -- else -+ r = copy_from_user(&mem_op, argp, sizeof(mem_op)); -+ if (r) { - r = -EFAULT; -+ break; -+ } -+ r = kvm_s390_vm_mem_op(kvm, &mem_op, &modified); -+ if (r) -+ break; -+ if (modified) { -+ r = copy_to_user(argp, &mem_op, sizeof(mem_op)); -+ if (r) { -+ r = -EFAULT; -+ break; -+ } -+ } - break; - } - case KVM_S390_ZPCI_OP: { 3: cf036cd58aff < -: ------------ Documentation: KVM: s390: Describe KVM_S390_MEMOP_F_CMPXCHG -: ------------ > 3: 6cb32b244899 Documentation: KVM: s390: Describe KVM_S390_MEMOP_F_CMPXCHG 4: e1d25110a983 ! 4: 5f1217ad9d31 KVM: s390: selftest: memop: Pass mop_desc via pointer @@ Commit message The struct is quite large, so this seems nicer.
Signed-off-by: Janis Schoetterl-Glausch scgl@linux.ibm.com + Reviewed-by: Thomas Huth thuth@redhat.com
## tools/testing/selftests/kvm/s390x/memop.c ## @@ tools/testing/selftests/kvm/s390x/memop.c: struct mop_desc { 5: e02924290577 = 5: 86a15b53846a KVM: s390: selftest: memop: Replace macros by functions 7: de6ac5a125e2 ! 6: 49e67d7559de KVM: s390: selftest: memop: Add cmpxchg tests @@ tools/testing/selftests/kvm/s390x/memop.c: static struct kvm_s390_mem_op ksmo_fr } + if (desc->old) { + ksmo.flags |= KVM_S390_MEMOP_F_CMPXCHG; -+ switch (ksmo.size) { -+ case 1: -+ ksmo.old[1] = *(uint8_t *)desc->old; -+ break; -+ case 2: -+ ksmo.old[1] = *(uint16_t *)desc->old; -+ break; -+ case 4: -+ ksmo.old[1] = *(uint32_t *)desc->old; -+ break; -+ case 8: -+ ksmo.old[1] = *(uint64_t *)desc->old; -+ break; -+ case 16: -+ memcpy(ksmo.old, desc->old, sizeof(ksmo.old)); -+ break; -+ } ++ ksmo.old_p = (uint64_t)desc->old; + } if (desc->_ar) ksmo.ar = desc->ar; else -@@ tools/testing/selftests/kvm/s390x/memop.c: static struct kvm_s390_mem_op ksmo_from_desc(const struct mop_desc *desc) - return ksmo; - } - -+static void cmpxchg_write_back(struct kvm_s390_mem_op *ksmo, struct mop_desc *desc) -+{ -+ if (desc->old) { -+ switch (ksmo->size) { -+ case 1: -+ *(uint8_t *)desc->old = ksmo->old[1]; -+ break; -+ case 2: -+ *(uint16_t *)desc->old = ksmo->old[1]; -+ break; -+ case 4: -+ *(uint32_t *)desc->old = ksmo->old[1]; -+ break; -+ case 8: -+ *(uint64_t *)desc->old = ksmo->old[1]; -+ break; -+ case 16: -+ memcpy(desc->old, ksmo->old, sizeof(ksmo->old)); -+ break; -+ } -+ } -+ if (desc->cmpxchg_success) -+ *desc->cmpxchg_success = !(ksmo->flags & KVM_S390_MEMOP_F_CMPXCHG); -+} -+ - struct test_info { - struct kvm_vm *vm; - struct kvm_vcpu *vcpu; @@ tools/testing/selftests/kvm/s390x/memop.c: static void print_memop(struct kvm_vcpu *vcpu, const struct kvm_s390_mem_op *ksm printf("ABSOLUTE, WRITE, "); break; } - printf("gaddr=%llu, size=%u, buf=%llu, ar=%u, key=%u", - ksmo->gaddr, ksmo->size, ksmo->buf, ksmo->ar, ksmo->key); -+ printf("gaddr=%llu, size=%u, buf=%llu, ar=%u, key=%u, old[0]=%llu, old[1]=%llu", ++ printf("gaddr=%llu, size=%u, buf=%llu, ar=%u, key=%u, old_p=%llx", + ksmo->gaddr, ksmo->size, ksmo->buf, ksmo->ar, ksmo->key, -+ ksmo->old[0], ksmo->old[1]); ++ ksmo->old_p); if (ksmo->flags & KVM_S390_MEMOP_F_CHECK_ONLY) printf(", CHECK_ONLY"); if (ksmo->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) @@ tools/testing/selftests/kvm/s390x/memop.c: static void print_memop(struct kvm_vc }
-static void memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo) -+static void memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo, -+ struct mop_desc *desc) ++static int err_memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo, ++ struct mop_desc *desc) { struct kvm_vcpu *vcpu = info.vcpu;
-@@ tools/testing/selftests/kvm/s390x/memop.c: static void memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo) - vm_ioctl(info.vm, KVM_S390_MEM_OP, ksmo); + if (!vcpu) +- vm_ioctl(info.vm, KVM_S390_MEM_OP, ksmo); ++ return __vm_ioctl(info.vm, KVM_S390_MEM_OP, ksmo); else - vcpu_ioctl(vcpu, KVM_S390_MEM_OP, ksmo); -+ cmpxchg_write_back(ksmo, desc); +- vcpu_ioctl(vcpu, KVM_S390_MEM_OP, ksmo); ++ return __vcpu_ioctl(vcpu, KVM_S390_MEM_OP, ksmo); }
-static int err_memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo) -+static int err_memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo, -+ struct mop_desc *desc) ++static void memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo, ++ struct mop_desc *desc) { - struct kvm_vcpu *vcpu = info.vcpu; +- struct kvm_vcpu *vcpu = info.vcpu; + int r; ++ ++ r = err_memop_ioctl(info, ksmo, desc); ++ if (ksmo->flags & KVM_S390_MEMOP_F_CMPXCHG) { ++ if (desc->cmpxchg_success) ++ *desc->cmpxchg_success = !r; ++ if (r == KVM_S390_MEMOP_R_NO_XCHG) ++ r = 0; ++ } ++ TEST_ASSERT(!r, __KVM_IOCTL_ERROR("KVM_S390_MEM_OP", r));
- if (!vcpu) +- if (!vcpu) - return __vm_ioctl(info.vm, KVM_S390_MEM_OP, ksmo); -+ r = __vm_ioctl(info.vm, KVM_S390_MEM_OP, ksmo); - else +- else - return __vcpu_ioctl(vcpu, KVM_S390_MEM_OP, ksmo); -+ r = __vcpu_ioctl(vcpu, KVM_S390_MEM_OP, ksmo); -+ cmpxchg_write_back(ksmo, desc); -+ return r; }
#define MEMOP(err, info_p, mop_target_p, access_mode_p, buf_p, size_p, ...) \ 6: f4ce20cd7eff = 7: faad9cf03ea6 KVM: s390: selftest: memop: Add bad address test 8: 0bad86fd6183 ! 8: 8070036aa89a KVM: s390: selftest: memop: Fix typo @@ Metadata ## Commit message ## KVM: s390: selftest: memop: Fix typo
+ "acceeded" isn't a word, should be "exceeded". + Signed-off-by: Janis Schoetterl-Glausch scgl@linux.ibm.com + Reviewed-by: Thomas Huth thuth@redhat.com
## tools/testing/selftests/kvm/s390x/memop.c ## @@ tools/testing/selftests/kvm/s390x/memop.c: static void test_errors_key_fetch_prot_override_enabled(void) 9: 7a1e9cb79bbb = 9: 18c423e4e3ad KVM: s390: selftest: memop: Fix wrong address being used in test
base-commit: 4fe89d07dcc2804c8b562f6c7896a45643d34b2f