User space can use the MEM_OP ioctl to make storage key checked reads and writes to the guest, however, it has no way of performing atomic, key checked, accesses to the guest. Extend the MEM_OP ioctl in order to allow for this, by adding a cmpxchg mode. For now, support this mode for absolute accesses only.
This mode can be use, for example, to set the device-state-change indicator and the adapter-local-summary indicator atomically.
Also contains some fixes/changes for the memop selftest independent of the cmpxchg changes.
v1 -> v2 * get rid of xrk instruction for cmpxchg byte and short implementation * pass old parameter via pointer instead of in mem_op struct * indicate failure of cmpxchg due to wrong old value by special return code * picked up R-b's (thanks Thomas)
Janis Schoetterl-Glausch (9): s390/uaccess: Add storage key checked cmpxchg access to user space KVM: s390: Extend MEM_OP ioctl by storage key checked cmpxchg Documentation: KVM: s390: Describe KVM_S390_MEMOP_F_CMPXCHG KVM: s390: selftest: memop: Pass mop_desc via pointer KVM: s390: selftest: memop: Replace macros by functions KVM: s390: selftest: memop: Add cmpxchg tests KVM: s390: selftest: memop: Add bad address test KVM: s390: selftest: memop: Fix typo KVM: s390: selftest: memop: Fix wrong address being used in test
Documentation/virt/kvm/api.rst | 21 +- include/uapi/linux/kvm.h | 5 + arch/s390/include/asm/uaccess.h | 189 ++++++ arch/s390/kvm/gaccess.h | 4 + arch/s390/kvm/gaccess.c | 57 ++ arch/s390/kvm/kvm-s390.c | 35 +- tools/testing/selftests/kvm/s390x/memop.c | 674 +++++++++++++++++----- 7 files changed, 833 insertions(+), 152 deletions(-)
Range-diff against v1: 1: 7b4392170faa ! 1: 58adf2b7688a s390/uaccess: Add storage key checked cmpxchg access to user space @@ arch/s390/include/asm/uaccess.h: do { \ + unsigned __int128 *old_p, + unsigned __int128 new, u8 access_key) +{ -+ u32 shift, mask, old_word, new_word, align_mask, tmp, diff; ++ u32 shift, mask, old_word, new_word, align_mask, tmp; + u64 aligned; + int ret = -EFAULT; + @@ arch/s390/include/asm/uaccess.h: do { \ + new_word = ((u8)new) << shift; + break; + } ++ tmp = old_word; /* don't modify *old_p on fault */ + asm volatile( + "spka 0(%[access_key])\n" + " sacf 256\n" + "0: l %[tmp],%[aligned]\n" -+ "1: nr %[tmp],%[hole_mask]\n" ++ "1: nr %[tmp],%[mask]\n" ++ " xilf %[mask],0xffffffff\n" + " or %[new_word],%[tmp]\n" -+ " or %[old_word],%[tmp]\n" -+ " lr %[tmp],%[old_word]\n" -+ "2: cs %[tmp],%[new_word],%[aligned]\n" -+ "3: jnl 4f\n" -+ " xrk %[diff],%[tmp],%[old_word]\n" -+ " nr %[diff],%[hole_mask]\n" -+ " xr %[new_word],%[diff]\n" -+ " xr %[old_word],%[diff]\n" -+ " xrk %[diff],%[tmp],%[old_word]\n" ++ " or %[tmp],%[old_word]\n" ++ "2: lr %[old_word],%[tmp]\n" ++ "3: cs %[tmp],%[new_word],%[aligned]\n" ++ "4: jnl 5f\n" ++ /* We'll restore old_word before the cs, use reg for the diff */ ++ " xr %[old_word],%[tmp]\n" ++ /* Apply diff assuming only bits outside target byte(s) changed */ ++ " xr %[new_word],%[old_word]\n" ++ /* If prior assumption false we exit loop, so not an issue */ ++ " nr %[old_word],%[mask]\n" + " jz 2b\n" -+ "4: ipm %[ret]\n" ++ "5: ipm %[ret]\n" + " srl %[ret],28\n" -+ "5: sacf 768\n" ++ "6: sacf 768\n" + " spka %[default_key]\n" -+ EX_TABLE(0b, 5b) EX_TABLE(1b, 5b) -+ EX_TABLE(2b, 5b) EX_TABLE(3b, 5b) ++ EX_TABLE(0b, 6b) EX_TABLE(1b, 6b) ++ EX_TABLE(3b, 6b) EX_TABLE(4b, 6b) + : [old_word] "+&d" (old_word), + [new_word] "+&d" (new_word), -+ [tmp] "=&d" (tmp), ++ [tmp] "+&d" (tmp), + [aligned] "+Q" (*(u32 *)aligned), -+ [diff] "=&d" (diff), + [ret] "+d" (ret) + : [access_key] "a" (access_key << 4), -+ [hole_mask] "d" (~mask), ++ [mask] "d" (~mask), + [default_key] "J" (PAGE_DEFAULT_KEY) + : "cc" + ); @@ arch/s390/include/asm/uaccess.h: do { \ + * cmpxchg_user_key_size() - cmpxchg with user space target, honoring storage keys + * @size: Size of the value being cmpxchg'ed, one of 1,2,4,8,16. + * @address: User space address of value to compare to *@old_p and exchange with -+ * *@new. Must be aligned to @size. ++ * @new. Must be aligned to @size. + * @old_p: Pointer to old value. Interpreted as a @size byte integer and compared + * to the content pointed to by @address in order to determine if the + * exchange occurs. The value read from @address is written back to *@old_p. 2: 80e3fda3d2af ! 2: c6731b0063ab KVM: s390: Extend MEM_OP ioctl by storage key checked cmpxchg @@ Commit message Signed-off-by: Janis Schoetterl-Glausch scgl@linux.ibm.com
## include/uapi/linux/kvm.h ## -@@ include/uapi/linux/kvm.h: struct kvm_translation { - struct kvm_s390_mem_op { - /* in */ - __u64 gaddr; /* the guest address */ -+ /* in & out */ - __u64 flags; /* flags */ -+ /* in */ - __u32 size; /* amount of bytes */ - __u32 op; /* type of operation */ - __u64 buf; /* buffer in userspace */ @@ include/uapi/linux/kvm.h: struct kvm_s390_mem_op { struct { __u8 ar; /* the access register number */ __u8 key; /* access key, ignored if flag unset */ -+ /* in & out */ -+ __u64 old[2]; /* ignored if flag unset */ ++ __u8 pad1[6]; /* ignored */ ++ __u64 old_p; /* ignored if flag unset */ }; __u32 sida_offset; /* offset into the sida */ __u8 reserved[32]; /* ignored */ @@ include/uapi/linux/kvm.h: struct kvm_s390_mem_op { #define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1) #define KVM_S390_MEMOP_F_SKEY_PROTECTION (1ULL << 2) +#define KVM_S390_MEMOP_F_CMPXCHG (1ULL << 3) ++/* Non program exception return codes (pgm codes are 16 bit) */ ++#define KVM_S390_MEMOP_R_NO_XCHG ((1 << 16) + 0)
/* for KVM_INTERRUPT */ struct kvm_interrupt { @@ arch/s390/kvm/gaccess.c: int access_guest_real(struct kvm_vcpu *vcpu, unsigned l + if (kvm_is_error_hva(hva)) + return PGM_ADDRESSING; + /* -+ * Check if it's a ro memslot, even tho that can't occur (they're unsupported). ++ * Check if it's a read-only memslot, even though that cannot occur ++ * since those are unsupported. + * Don't try to actually handle that case. + */ + if (!writable) @@ arch/s390/kvm/gaccess.c: int access_guest_real(struct kvm_vcpu *vcpu, unsigned l + ret = cmpxchg_user_key_size(len, (void __user *)hva, old_p, new, access_key); + mark_page_dirty_in_slot(kvm, slot, gfn); + /* -+ * Assume that the fault is caused by key protection, the alternative -+ * is that the user page is write protected. ++ * Assume that the fault is caused by protection, either key protection ++ * or user page write protection. + */ + if (ret == -EFAULT) + ret = PGM_PROTECTION; @@ arch/s390/kvm/kvm-s390.c: int kvm_vm_ioctl_check_extension(struct kvm *kvm, long r = MEM_OP_MAX_SIZE; break; + case KVM_CAP_S390_MEM_OP_EXTENSION: ++ /* ++ * Flag bits indicating which extensions are supported. ++ * The first extension doesn't use a flag, but pretend it does, ++ * this way that can be changed in the future. ++ */ + r = 0x3; + break; case KVM_CAP_NR_VCPUS: case KVM_CAP_MAX_VCPUS: case KVM_CAP_MAX_VCPU_ID: @@ arch/s390/kvm/kvm-s390.c: static bool access_key_invalid(u8 access_key) - return access_key > 0xf; - } - --static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop) -+static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop, bool *modified) + static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop) { void __user *uaddr = (void __user *)mop->buf; -+ unsigned __int128 old; ++ void __user *old_p = (void __user *)mop->old_p; + union { + unsigned __int128 quad; + char raw[sizeof(unsigned __int128)]; -+ } new = { .quad = 0 }; ++ } old = { .quad = 0}, new = { .quad = 0 }; ++ unsigned int off_in_quad = sizeof(unsigned __int128) - mop->size; u64 supported_flags; void *tmpbuf = NULL; int r, srcu_idx;
-+ *modified = false; supported_flags = KVM_S390_MEMOP_F_SKEY_PROTECTION - | KVM_S390_MEMOP_F_CHECK_ONLY; + | KVM_S390_MEMOP_F_CHECK_ONLY @@ arch/s390/kvm/kvm-s390.c: static int kvm_s390_vm_mem_op(struct kvm *kvm, struct + if (mop->flags & KVM_S390_MEMOP_F_CMPXCHG) { + if (mop->size > sizeof(new)) + return -EINVAL; -+ if (copy_from_user(&new.raw[sizeof(new) - mop->size], uaddr, mop->size)) ++ /* off_in_quad has been validated */ ++ if (copy_from_user(&new.raw[off_in_quad], uaddr, mop->size)) ++ return -EFAULT; ++ if (copy_from_user(&old.raw[off_in_quad], old_p, mop->size)) + return -EFAULT; -+ memcpy(&old, mop->old, sizeof(old)); + } if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) { tmpbuf = vmalloc(mop->size); @@ arch/s390/kvm/kvm-s390.c: static int kvm_s390_vm_mem_op(struct kvm *kvm, struct r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_STORE, mop->key); + } else if (mop->flags & KVM_S390_MEMOP_F_CMPXCHG) { + r = cmpxchg_guest_abs_with_key(kvm, mop->gaddr, mop->size, -+ &old, new.quad, mop->key); -+ if (!r) { -+ mop->flags &= ~KVM_S390_MEMOP_F_CMPXCHG; -+ } else if (r == 1) { -+ memcpy(mop->old, &old, sizeof(old)); -+ r = 0; ++ &old.quad, new.quad, mop->key); ++ if (r == 1) { ++ r = KVM_S390_MEMOP_R_NO_XCHG; ++ if (copy_to_user(old_p, &old.raw[off_in_quad], mop->size)) ++ r = -EFAULT; + } -+ *modified = true; } else { if (copy_from_user(tmpbuf, uaddr, mop->size)) { r = -EFAULT; -@@ arch/s390/kvm/kvm-s390.c: long kvm_arch_vm_ioctl(struct file *filp, - } - case KVM_S390_MEM_OP: { - struct kvm_s390_mem_op mem_op; -+ bool modified; - -- if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0) -- r = kvm_s390_vm_mem_op(kvm, &mem_op); -- else -+ r = copy_from_user(&mem_op, argp, sizeof(mem_op)); -+ if (r) { - r = -EFAULT; -+ break; -+ } -+ r = kvm_s390_vm_mem_op(kvm, &mem_op, &modified); -+ if (r) -+ break; -+ if (modified) { -+ r = copy_to_user(argp, &mem_op, sizeof(mem_op)); -+ if (r) { -+ r = -EFAULT; -+ break; -+ } -+ } - break; - } - case KVM_S390_ZPCI_OP: { 3: cf036cd58aff < -: ------------ Documentation: KVM: s390: Describe KVM_S390_MEMOP_F_CMPXCHG -: ------------ > 3: 6cb32b244899 Documentation: KVM: s390: Describe KVM_S390_MEMOP_F_CMPXCHG 4: e1d25110a983 ! 4: 5f1217ad9d31 KVM: s390: selftest: memop: Pass mop_desc via pointer @@ Commit message The struct is quite large, so this seems nicer.
Signed-off-by: Janis Schoetterl-Glausch scgl@linux.ibm.com + Reviewed-by: Thomas Huth thuth@redhat.com
## tools/testing/selftests/kvm/s390x/memop.c ## @@ tools/testing/selftests/kvm/s390x/memop.c: struct mop_desc { 5: e02924290577 = 5: 86a15b53846a KVM: s390: selftest: memop: Replace macros by functions 7: de6ac5a125e2 ! 6: 49e67d7559de KVM: s390: selftest: memop: Add cmpxchg tests @@ tools/testing/selftests/kvm/s390x/memop.c: static struct kvm_s390_mem_op ksmo_fr } + if (desc->old) { + ksmo.flags |= KVM_S390_MEMOP_F_CMPXCHG; -+ switch (ksmo.size) { -+ case 1: -+ ksmo.old[1] = *(uint8_t *)desc->old; -+ break; -+ case 2: -+ ksmo.old[1] = *(uint16_t *)desc->old; -+ break; -+ case 4: -+ ksmo.old[1] = *(uint32_t *)desc->old; -+ break; -+ case 8: -+ ksmo.old[1] = *(uint64_t *)desc->old; -+ break; -+ case 16: -+ memcpy(ksmo.old, desc->old, sizeof(ksmo.old)); -+ break; -+ } ++ ksmo.old_p = (uint64_t)desc->old; + } if (desc->_ar) ksmo.ar = desc->ar; else -@@ tools/testing/selftests/kvm/s390x/memop.c: static struct kvm_s390_mem_op ksmo_from_desc(const struct mop_desc *desc) - return ksmo; - } - -+static void cmpxchg_write_back(struct kvm_s390_mem_op *ksmo, struct mop_desc *desc) -+{ -+ if (desc->old) { -+ switch (ksmo->size) { -+ case 1: -+ *(uint8_t *)desc->old = ksmo->old[1]; -+ break; -+ case 2: -+ *(uint16_t *)desc->old = ksmo->old[1]; -+ break; -+ case 4: -+ *(uint32_t *)desc->old = ksmo->old[1]; -+ break; -+ case 8: -+ *(uint64_t *)desc->old = ksmo->old[1]; -+ break; -+ case 16: -+ memcpy(desc->old, ksmo->old, sizeof(ksmo->old)); -+ break; -+ } -+ } -+ if (desc->cmpxchg_success) -+ *desc->cmpxchg_success = !(ksmo->flags & KVM_S390_MEMOP_F_CMPXCHG); -+} -+ - struct test_info { - struct kvm_vm *vm; - struct kvm_vcpu *vcpu; @@ tools/testing/selftests/kvm/s390x/memop.c: static void print_memop(struct kvm_vcpu *vcpu, const struct kvm_s390_mem_op *ksm printf("ABSOLUTE, WRITE, "); break; } - printf("gaddr=%llu, size=%u, buf=%llu, ar=%u, key=%u", - ksmo->gaddr, ksmo->size, ksmo->buf, ksmo->ar, ksmo->key); -+ printf("gaddr=%llu, size=%u, buf=%llu, ar=%u, key=%u, old[0]=%llu, old[1]=%llu", ++ printf("gaddr=%llu, size=%u, buf=%llu, ar=%u, key=%u, old_p=%llx", + ksmo->gaddr, ksmo->size, ksmo->buf, ksmo->ar, ksmo->key, -+ ksmo->old[0], ksmo->old[1]); ++ ksmo->old_p); if (ksmo->flags & KVM_S390_MEMOP_F_CHECK_ONLY) printf(", CHECK_ONLY"); if (ksmo->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) @@ tools/testing/selftests/kvm/s390x/memop.c: static void print_memop(struct kvm_vc }
-static void memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo) -+static void memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo, -+ struct mop_desc *desc) ++static int err_memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo, ++ struct mop_desc *desc) { struct kvm_vcpu *vcpu = info.vcpu;
-@@ tools/testing/selftests/kvm/s390x/memop.c: static void memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo) - vm_ioctl(info.vm, KVM_S390_MEM_OP, ksmo); + if (!vcpu) +- vm_ioctl(info.vm, KVM_S390_MEM_OP, ksmo); ++ return __vm_ioctl(info.vm, KVM_S390_MEM_OP, ksmo); else - vcpu_ioctl(vcpu, KVM_S390_MEM_OP, ksmo); -+ cmpxchg_write_back(ksmo, desc); +- vcpu_ioctl(vcpu, KVM_S390_MEM_OP, ksmo); ++ return __vcpu_ioctl(vcpu, KVM_S390_MEM_OP, ksmo); }
-static int err_memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo) -+static int err_memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo, -+ struct mop_desc *desc) ++static void memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo, ++ struct mop_desc *desc) { - struct kvm_vcpu *vcpu = info.vcpu; +- struct kvm_vcpu *vcpu = info.vcpu; + int r; ++ ++ r = err_memop_ioctl(info, ksmo, desc); ++ if (ksmo->flags & KVM_S390_MEMOP_F_CMPXCHG) { ++ if (desc->cmpxchg_success) ++ *desc->cmpxchg_success = !r; ++ if (r == KVM_S390_MEMOP_R_NO_XCHG) ++ r = 0; ++ } ++ TEST_ASSERT(!r, __KVM_IOCTL_ERROR("KVM_S390_MEM_OP", r));
- if (!vcpu) +- if (!vcpu) - return __vm_ioctl(info.vm, KVM_S390_MEM_OP, ksmo); -+ r = __vm_ioctl(info.vm, KVM_S390_MEM_OP, ksmo); - else +- else - return __vcpu_ioctl(vcpu, KVM_S390_MEM_OP, ksmo); -+ r = __vcpu_ioctl(vcpu, KVM_S390_MEM_OP, ksmo); -+ cmpxchg_write_back(ksmo, desc); -+ return r; }
#define MEMOP(err, info_p, mop_target_p, access_mode_p, buf_p, size_p, ...) \ 6: f4ce20cd7eff = 7: faad9cf03ea6 KVM: s390: selftest: memop: Add bad address test 8: 0bad86fd6183 ! 8: 8070036aa89a KVM: s390: selftest: memop: Fix typo @@ Metadata ## Commit message ## KVM: s390: selftest: memop: Fix typo
+ "acceeded" isn't a word, should be "exceeded". + Signed-off-by: Janis Schoetterl-Glausch scgl@linux.ibm.com + Reviewed-by: Thomas Huth thuth@redhat.com
## tools/testing/selftests/kvm/s390x/memop.c ## @@ tools/testing/selftests/kvm/s390x/memop.c: static void test_errors_key_fetch_prot_override_enabled(void) 9: 7a1e9cb79bbb = 9: 18c423e4e3ad KVM: s390: selftest: memop: Fix wrong address being used in test
base-commit: 4fe89d07dcc2804c8b562f6c7896a45643d34b2f
Add cmpxchg functionality similar to that in cmpxchg.h except that the target is a user space address and that the address' storage key is matched with the access_key argument in order to honor key-controlled protection. The access is performed by changing to the secondary-spaces mode and setting the PSW key for the duration of the compare and swap.
Signed-off-by: Janis Schoetterl-Glausch scgl@linux.ibm.com ---
Possible variations: * check the assumptions made in cmpxchg_user_key_size and error out * call functions called by copy_to_user * access_ok? is a nop * should_fail_usercopy? * instrument_copy_to_user? doesn't make sense IMO * don't be overly strict in cmpxchg_user_key
arch/s390/include/asm/uaccess.h | 189 ++++++++++++++++++++++++++++++++ 1 file changed, 189 insertions(+)
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index f7038b800cc3..f148f5a22c93 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -19,6 +19,8 @@ #include <asm/extable.h> #include <asm/facility.h> #include <asm-generic/access_ok.h> +#include <asm/page.h> +#include <linux/log2.h>
void debug_user_asce(int exit);
@@ -390,4 +392,191 @@ do { \ goto err_label; \ } while (0)
+static __always_inline int __cmpxchg_user_key_small(int size, u64 address, + unsigned __int128 *old_p, + unsigned __int128 new, u8 access_key) +{ + u32 shift, mask, old_word, new_word, align_mask, tmp; + u64 aligned; + int ret = -EFAULT; + + switch (size) { + case 2: + align_mask = 2; + aligned = (address ^ (address & align_mask)); + shift = (sizeof(u32) - (address & align_mask) - size) * 8; + mask = 0xffff << shift; + old_word = ((u16)*old_p) << shift; + new_word = ((u16)new) << shift; + break; + case 1: + align_mask = 3; + aligned = (address ^ (address & align_mask)); + shift = (sizeof(u32) - (address & align_mask) - size) * 8; + mask = 0xff << shift; + old_word = ((u8)*old_p) << shift; + new_word = ((u8)new) << shift; + break; + } + tmp = old_word; /* don't modify *old_p on fault */ + asm volatile( + "spka 0(%[access_key])\n" + " sacf 256\n" + "0: l %[tmp],%[aligned]\n" + "1: nr %[tmp],%[mask]\n" + " xilf %[mask],0xffffffff\n" + " or %[new_word],%[tmp]\n" + " or %[tmp],%[old_word]\n" + "2: lr %[old_word],%[tmp]\n" + "3: cs %[tmp],%[new_word],%[aligned]\n" + "4: jnl 5f\n" + /* We'll restore old_word before the cs, use reg for the diff */ + " xr %[old_word],%[tmp]\n" + /* Apply diff assuming only bits outside target byte(s) changed */ + " xr %[new_word],%[old_word]\n" + /* If prior assumption false we exit loop, so not an issue */ + " nr %[old_word],%[mask]\n" + " jz 2b\n" + "5: ipm %[ret]\n" + " srl %[ret],28\n" + "6: sacf 768\n" + " spka %[default_key]\n" + EX_TABLE(0b, 6b) EX_TABLE(1b, 6b) + EX_TABLE(3b, 6b) EX_TABLE(4b, 6b) + : [old_word] "+&d" (old_word), + [new_word] "+&d" (new_word), + [tmp] "+&d" (tmp), + [aligned] "+Q" (*(u32 *)aligned), + [ret] "+d" (ret) + : [access_key] "a" (access_key << 4), + [mask] "d" (~mask), + [default_key] "J" (PAGE_DEFAULT_KEY) + : "cc" + ); + *old_p = (tmp & mask) >> shift; + return ret; +} + +/** + * cmpxchg_user_key_size() - cmpxchg with user space target, honoring storage keys + * @size: Size of the value being cmpxchg'ed, one of 1,2,4,8,16. + * @address: User space address of value to compare to *@old_p and exchange with + * @new. Must be aligned to @size. + * @old_p: Pointer to old value. Interpreted as a @size byte integer and compared + * to the content pointed to by @address in order to determine if the + * exchange occurs. The value read from @address is written back to *@old_p. + * @new: New value to place at @address, interpreted as a @size byte integer. + * @access_key: Access key to use for checking storage key protection. + * + * Perform a cmpxchg on a user space target, honoring storage key protection. + * @access_key alone determines how key checking is performed, neither + * storage-protection-override nor fetch-protection-override apply. + * + * Return: 0: successful exchange + * 1: exchange failed + * -EFAULT: @address not accessible or not naturally aligned + * -EINVAL: invalid @size + */ +static __always_inline int cmpxchg_user_key_size(int size, void __user *address, + unsigned __int128 *old_p, + unsigned __int128 new, u8 access_key) +{ + union { + u32 word; + u64 doubleword; + } old; + int ret = -EFAULT; + + /* + * The following assumes that: + * * the current psw key is the default key + * * no storage protection overrides are in effect + */ + might_fault(); + switch (size) { + case 16: + asm volatile( + "spka 0(%[access_key])\n" + " sacf 256\n" + "0: cdsg %[old],%[new],%[target]\n" + "1: ipm %[ret]\n" + " srl %[ret],28\n" + "2: sacf 768\n" + " spka %[default_key]\n" + EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) + : [old] "+d" (*old_p), + [target] "+Q" (*(unsigned __int128 __user *)address), + [ret] "+d" (ret) + : [access_key] "a" (access_key << 4), + [new] "d" (new), + [default_key] "J" (PAGE_DEFAULT_KEY) + : "cc" + ); + return ret; + case 8: + old.doubleword = *old_p; + asm volatile( + "spka 0(%[access_key])\n" + " sacf 256\n" + "0: csg %[old],%[new],%[target]\n" + "1: ipm %[ret]\n" + " srl %[ret],28\n" + "2: sacf 768\n" + " spka %[default_key]\n" + EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) + : [old] "+d" (old.doubleword), + [target] "+Q" (*(u64 __user *)address), + [ret] "+d" (ret) + : [access_key] "a" (access_key << 4), + [new] "d" ((u64)new), + [default_key] "J" (PAGE_DEFAULT_KEY) + : "cc" + ); + *old_p = old.doubleword; + return ret; + case 4: + old.word = *old_p; + asm volatile( + "spka 0(%[access_key])\n" + " sacf 256\n" + "0: cs %[old],%[new],%[target]\n" + "1: ipm %[ret]\n" + " srl %[ret],28\n" + "2: sacf 768\n" + " spka %[default_key]\n" + EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) + : [old] "+d" (old.word), + [target] "+Q" (*(u32 __user *)address), + [ret] "+d" (ret) + : [access_key] "a" (access_key << 4), + [new] "d" ((u32)new), + [default_key] "J" (PAGE_DEFAULT_KEY) + : "cc" + ); + *old_p = old.word; + return ret; + case 2: + case 1: + return __cmpxchg_user_key_small(size, (u64)address, old_p, new, access_key); + default: + return -EINVAL; + } +} + +#define cmpxchg_user_key(target_p, old_p, new, access_key) \ +({ \ + __typeof__(old_p) __old_p = (old_p); \ + unsigned __int128 __old = *__old_p; \ + size_t __size = sizeof(*(target_p)); \ + int __ret; \ + \ + BUILD_BUG_ON(__size != sizeof(*__old_p)); \ + BUILD_BUG_ON(__size != sizeof(new)); \ + BUILD_BUG_ON(__size > 16 || !is_power_of_2(__size)); \ + __ret = cmpxchg_user_key_size(__size, (target_p), &__old, (new), \ + (access_key)); \ + *__old_p = __old; \ + __ret; \ +}) + #endif /* __S390_UACCESS_H */
Hi Janis,
On Wed, Oct 12, 2022 at 10:56:01PM +0200, Janis Schoetterl-Glausch wrote:
Add cmpxchg functionality similar to that in cmpxchg.h except that the target is a user space address and that the address' storage key is matched with the access_key argument in order to honor key-controlled protection. The access is performed by changing to the secondary-spaces mode and setting the PSW key for the duration of the compare and swap.
Signed-off-by: Janis Schoetterl-Glausch scgl@linux.ibm.com
Possible variations:
- check the assumptions made in cmpxchg_user_key_size and error out
- call functions called by copy_to_user
- access_ok? is a nop
- should_fail_usercopy?
- instrument_copy_to_user? doesn't make sense IMO
- don't be overly strict in cmpxchg_user_key
arch/s390/include/asm/uaccess.h | 189 ++++++++++++++++++++++++++++++++ 1 file changed, 189 insertions(+)
This not how I would have expected something that would be symmetrical/consistent with what we already have. However instead of spending several iterations I'll send something for this.
This might take a bit due to my limited time. So please be patient.
Quoting Janis Schoetterl-Glausch (2022-10-12 22:56:01) [...]
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index f7038b800cc3..f148f5a22c93 100644
[...]
+static __always_inline int __cmpxchg_user_key_small(int size, u64 address,
unsigned __int128 *old_p,
unsigned __int128 new, u8 access_key)
+{
This function is quite hard to understand for me without some context. I have a few suggestions for some comments and one small question below.
u32 shift, mask, old_word, new_word, align_mask, tmp;
u64 aligned;
int ret = -EFAULT;
something like this:
/* * There is no instruction for 2 and 1 byte compare swap, hence emulate it with * a 4-byte compare swap. * When the 4-bytes compare swap fails, it can be because the actual value user * space wanted to exchange mismatched. In this case, return to user space. * Or it can be because something outside of the value user space wanted to * access mismatched (the "remainder" of the word). In this case, retry in * kernel space. */
switch (size) {
case 2:
/* assume address is 2-byte-aligned - cannot cross word boundary */
align_mask = 2;
/* fancy way of saying aligned = address & ~align_mask */
aligned = (address ^ (address & align_mask));
/* generate mask to extract value to xchg from the word */
shift = (sizeof(u32) - (address & align_mask) - size) * 8;
mask = 0xffff << shift;
old_word = ((u16)*old_p) << shift;
new_word = ((u16)new) << shift;
break;
case 1:
align_mask = 3;
aligned = (address ^ (address & align_mask));
shift = (sizeof(u32) - (address & align_mask) - size) * 8;
mask = 0xff << shift;
old_word = ((u8)*old_p) << shift;
new_word = ((u8)new) << shift;
break;
}
tmp = old_word; /* don't modify *old_p on fault */
asm volatile(
"spka 0(%[access_key])\n"
/* secondary space has user asce loaded */
" sacf 256\n"
"0: l %[tmp],%[aligned]\n"
"1: nr %[tmp],%[mask]\n"
/* invert mask to generate mask for the remainder */
" xilf %[mask],0xffffffff\n"
" or %[new_word],%[tmp]\n"
" or %[tmp],%[old_word]\n"
"2: lr %[old_word],%[tmp]\n"
"3: cs %[tmp],%[new_word],%[aligned]\n"
"4: jnl 5f\n"
/* We'll restore old_word before the cs, use reg for the diff */
" xr %[old_word],%[tmp]\n"
/* Apply diff assuming only bits outside target byte(s) changed */
" xr %[new_word],%[old_word]\n"
/* If prior assumption false we exit loop, so not an issue */
" nr %[old_word],%[mask]\n"
" jz 2b\n"
So if the remainder changed but the actual value to exchange stays the same, we loop in the kernel. Does it maybe make sense to limit the number of iterations we spend retrying? I think while looping here the calling process can't be killed, can it?
On Thu, Oct 20, 2022 at 03:40:56PM +0200, Nico Boehr wrote:
Quoting Janis Schoetterl-Glausch (2022-10-12 22:56:01)
"2: lr %[old_word],%[tmp]\n"
"3: cs %[tmp],%[new_word],%[aligned]\n"
"4: jnl 5f\n"
/* We'll restore old_word before the cs, use reg for the diff */
" xr %[old_word],%[tmp]\n"
/* Apply diff assuming only bits outside target byte(s) changed */
" xr %[new_word],%[old_word]\n"
/* If prior assumption false we exit loop, so not an issue */
" nr %[old_word],%[mask]\n"
" jz 2b\n"
So if the remainder changed but the actual value to exchange stays the same, we loop in the kernel. Does it maybe make sense to limit the number of iterations we spend retrying? I think while looping here the calling process can't be killed, can it?
Yes, the number of loops should be limited; quite similar what arm64 implemented with commit 03110a5cb216 ("arm64: futex: Bound number of LDXR/STXR loops in FUTEX_WAKE_OP").
Hi Janis,
On Wed, Oct 12, 2022 at 10:56:01PM +0200, Janis Schoetterl-Glausch wrote:
Add cmpxchg functionality similar to that in cmpxchg.h except that the target is a user space address and that the address' storage key is matched with the access_key argument in order to honor key-controlled protection. The access is performed by changing to the secondary-spaces mode and setting the PSW key for the duration of the compare and swap.
Signed-off-by: Janis Schoetterl-Glausch scgl@linux.ibm.com
Possible variations:
- check the assumptions made in cmpxchg_user_key_size and error out
- call functions called by copy_to_user
- access_ok? is a nop
- should_fail_usercopy?
- instrument_copy_to_user? doesn't make sense IMO
- don't be overly strict in cmpxchg_user_key
arch/s390/include/asm/uaccess.h | 189 ++++++++++++++++++++++++++++++++ 1 file changed, 189 insertions(+)
So finally I send the uaccess/cmpxchg patches in reply to this mail. Sorry for the long delay!
The first three patches are not required for the functionality you need, but given that I always stress that the code should be consistent I include them anyway.
The changes are probably quite obvious:
- Keep uaccess cmpxchg code more or less identical to regular cmpxchg code. I wasn't able to come up with a readable code base which could be used for both variants.
- Users may only use the cmpxchg_user_key() macro - _not_ the inline function, which is an internal API. This will require that you need to add a switch statement and couple of casts within the KVM code, but shouldn't have much of an impact on the generated code.
- Cause link error for non-integral sizes, similar to other uaccess functions.
- cmpxchg_user_key() has now a simple return value: 0 or -EFAULT, and writes the old value to a location provided by a pointer. This is quite similar to the futex code. Users must compare the old and expected value to figure out if something was exchanged. Note that this is in most cases more efficient than extracting the condition code from the PSW with ipm, since nowadays we have instructions like compare and branch relative on condition, etc.
- Couple of other minor changes which I forgot.
Code is untested (of course :) ). Please give it a try and let me know if this is good enough for your purposes.
I also did not limit the number of retries for the one and two byte scenarion. Before doing that we need to have proof that there really is a problem. Maybe Nico or you will give this a try.
Thanks, Heiko
Make cmpxchg() inline assemblies more readable by using symbolic names for operands.
Signed-off-by: Heiko Carstens hca@linux.ibm.com --- arch/s390/include/asm/cmpxchg.h | 76 ++++++++++++++++++--------------- 1 file changed, 42 insertions(+), 34 deletions(-)
diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h index 84c3f0d576c5..56fb8aa08945 100644 --- a/arch/s390/include/asm/cmpxchg.h +++ b/arch/s390/include/asm/cmpxchg.h @@ -96,56 +96,64 @@ static __always_inline unsigned long __cmpxchg(unsigned long address, shift = (3 ^ (address & 3)) << 3; address ^= address & 3; asm volatile( - " l %0,%2\n" - "0: nr %0,%5\n" - " lr %1,%0\n" - " or %0,%3\n" - " or %1,%4\n" - " cs %0,%1,%2\n" - " jnl 1f\n" - " xr %1,%0\n" - " nr %1,%5\n" - " jnz 0b\n" + " l %[prev],%[address]\n" + "0: nr %[prev],%[mask]\n" + " lr %[tmp],%[prev]\n" + " or %[prev],%[old]\n" + " or %[tmp],%[new]\n" + " cs %[prev],%[tmp],%[address]\n" + " jnl 1f\n" + " xr %[tmp],%[prev]\n" + " nr %[tmp],%[mask]\n" + " jnz 0b\n" "1:" - : "=&d" (prev), "=&d" (tmp), "+Q" (*(int *) address) - : "d" ((old & 0xff) << shift), - "d" ((new & 0xff) << shift), - "d" (~(0xff << shift)) + : [prev] "=&d" (prev), + [tmp] "=&d" (tmp), + [address] "+Q" (*(int *)address) + : [old] "d" ((old & 0xff) << shift), + [new] "d" ((new & 0xff) << shift), + [mask] "d" (~(0xff << shift)) : "memory", "cc"); return prev >> shift; case 2: shift = (2 ^ (address & 2)) << 3; address ^= address & 2; asm volatile( - " l %0,%2\n" - "0: nr %0,%5\n" - " lr %1,%0\n" - " or %0,%3\n" - " or %1,%4\n" - " cs %0,%1,%2\n" - " jnl 1f\n" - " xr %1,%0\n" - " nr %1,%5\n" - " jnz 0b\n" + " l %[prev],%[address]\n" + "0: nr %[prev],%[mask]\n" + " lr %[tmp],%[prev]\n" + " or %[prev],%[old]\n" + " or %[tmp],%[new]\n" + " cs %[prev],%[tmp],%[address]\n" + " jnl 1f\n" + " xr %[tmp],%[prev]\n" + " nr %[tmp],%[mask]\n" + " jnz 0b\n" "1:" - : "=&d" (prev), "=&d" (tmp), "+Q" (*(int *) address) - : "d" ((old & 0xffff) << shift), - "d" ((new & 0xffff) << shift), - "d" (~(0xffff << shift)) + : [prev] "=&d" (prev), + [tmp] "=&d" (tmp), + [address] "+Q" (*(int *)address) + : [old] "d" ((old & 0xffff) << shift), + [new] "d" ((new & 0xffff) << shift), + [mask] "d" (~(0xffff << shift)) : "memory", "cc"); return prev >> shift; case 4: asm volatile( - " cs %0,%3,%1\n" - : "=&d" (prev), "+Q" (*(int *) address) - : "0" (old), "d" (new) + " cs %[prev],%[new],%[address]\n" + : [prev] "=&d" (prev), + [address] "+Q" (*(int *)address) + : "0" (old), + [new] "d" (new) : "memory", "cc"); return prev; case 8: asm volatile( - " csg %0,%3,%1\n" - : "=&d" (prev), "+QS" (*(long *) address) - : "0" (old), "d" (new) + " csg %[prev],%[new],%[address]\n" + : [prev] "=&d" (prev), + [address] "+QS" (*(long *)address) + : "0" (old), + [new] "d" (new) : "memory", "cc"); return prev; }
Make variables local to each case label. This limits the scope of variables and allows to use proper types everywhere.
Signed-off-by: Heiko Carstens hca@linux.ibm.com --- arch/s390/include/asm/cmpxchg.h | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-)
diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h index 56fb8aa08945..2ad057b94481 100644 --- a/arch/s390/include/asm/cmpxchg.h +++ b/arch/s390/include/asm/cmpxchg.h @@ -88,11 +88,10 @@ static __always_inline unsigned long __cmpxchg(unsigned long address, unsigned long old, unsigned long new, int size) { - unsigned long prev, tmp; - int shift; - switch (size) { - case 1: + case 1: { + unsigned int prev, tmp, shift; + shift = (3 ^ (address & 3)) << 3; address ^= address & 3; asm volatile( @@ -115,7 +114,10 @@ static __always_inline unsigned long __cmpxchg(unsigned long address, [mask] "d" (~(0xff << shift)) : "memory", "cc"); return prev >> shift; - case 2: + } + case 2: { + unsigned int prev, tmp, shift; + shift = (2 ^ (address & 2)) << 3; address ^= address & 2; asm volatile( @@ -138,7 +140,10 @@ static __always_inline unsigned long __cmpxchg(unsigned long address, [mask] "d" (~(0xffff << shift)) : "memory", "cc"); return prev >> shift; - case 4: + } + case 4: { + unsigned int prev; + asm volatile( " cs %[prev],%[new],%[address]\n" : [prev] "=&d" (prev), @@ -147,7 +152,10 @@ static __always_inline unsigned long __cmpxchg(unsigned long address, [new] "d" (new) : "memory", "cc"); return prev; - case 8: + } + case 8: { + unsigned long prev; + asm volatile( " csg %[prev],%[new],%[address]\n" : [prev] "=&d" (prev), @@ -157,6 +165,7 @@ static __always_inline unsigned long __cmpxchg(unsigned long address, : "memory", "cc"); return prev; } + } __cmpxchg_called_with_bad_pointer(); return old; }
Instead of using a digit for input constraints simply initialize the corresponding output operand in C code and use a "+" constraint modifier.
Signed-off-by: Heiko Carstens hca@linux.ibm.com --- arch/s390/include/asm/cmpxchg.h | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-)
diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h index 2ad057b94481..1c5785b851ec 100644 --- a/arch/s390/include/asm/cmpxchg.h +++ b/arch/s390/include/asm/cmpxchg.h @@ -142,26 +142,24 @@ static __always_inline unsigned long __cmpxchg(unsigned long address, return prev >> shift; } case 4: { - unsigned int prev; + unsigned int prev = old;
asm volatile( " cs %[prev],%[new],%[address]\n" - : [prev] "=&d" (prev), + : [prev] "+&d" (prev), [address] "+Q" (*(int *)address) - : "0" (old), - [new] "d" (new) + : [new] "d" (new) : "memory", "cc"); return prev; } case 8: { - unsigned long prev; + unsigned long prev = old;
asm volatile( " csg %[prev],%[new],%[address]\n" - : [prev] "=&d" (prev), + : [prev] "+&d" (prev), [address] "+QS" (*(long *)address) - : "0" (old), - [new] "d" (new) + : [new] "d" (new) : "memory", "cc"); return prev; }
Add new exception table type which is able to handle register pairs. If an exception is recognized on such an instruction the specified register pair will be zeroed, and the specified error register will be modified so it contains -EFAULT, similar to the existing EX_TABLE_UA_LOAD_REG() macro.
Signed-off-by: Heiko Carstens hca@linux.ibm.com --- arch/s390/include/asm/asm-extable.h | 4 ++++ arch/s390/mm/extable.c | 9 +++++++-- 2 files changed, 11 insertions(+), 2 deletions(-)
diff --git a/arch/s390/include/asm/asm-extable.h b/arch/s390/include/asm/asm-extable.h index b74f1070ddb2..55a02a153dfc 100644 --- a/arch/s390/include/asm/asm-extable.h +++ b/arch/s390/include/asm/asm-extable.h @@ -12,6 +12,7 @@ #define EX_TYPE_UA_STORE 3 #define EX_TYPE_UA_LOAD_MEM 4 #define EX_TYPE_UA_LOAD_REG 5 +#define EX_TYPE_UA_LOAD_REGPAIR 6
#define EX_DATA_REG_ERR_SHIFT 0 #define EX_DATA_REG_ERR GENMASK(3, 0) @@ -85,4 +86,7 @@ #define EX_TABLE_UA_LOAD_REG(_fault, _target, _regerr, _regzero) \ __EX_TABLE_UA(__ex_table, _fault, _target, EX_TYPE_UA_LOAD_REG, _regerr, _regzero, 0)
+#define EX_TABLE_UA_LOAD_REGPAIR(_fault, _target, _regerr, _regzero) \ + __EX_TABLE_UA(__ex_table, _fault, _target, EX_TYPE_UA_LOAD_REGPAIR, _regerr, _regzero, 0) + #endif /* __ASM_EXTABLE_H */ diff --git a/arch/s390/mm/extable.c b/arch/s390/mm/extable.c index 1e4d2187541a..fe87291df95d 100644 --- a/arch/s390/mm/extable.c +++ b/arch/s390/mm/extable.c @@ -47,13 +47,16 @@ static bool ex_handler_ua_load_mem(const struct exception_table_entry *ex, struc return true; }
-static bool ex_handler_ua_load_reg(const struct exception_table_entry *ex, struct pt_regs *regs) +static bool ex_handler_ua_load_reg(const struct exception_table_entry *ex, + bool pair, struct pt_regs *regs) { unsigned int reg_zero = FIELD_GET(EX_DATA_REG_ADDR, ex->data); unsigned int reg_err = FIELD_GET(EX_DATA_REG_ERR, ex->data);
regs->gprs[reg_err] = -EFAULT; regs->gprs[reg_zero] = 0; + if (pair) + regs->gprs[reg_zero + 1] = 0; regs->psw.addr = extable_fixup(ex); return true; } @@ -75,7 +78,9 @@ bool fixup_exception(struct pt_regs *regs) case EX_TYPE_UA_LOAD_MEM: return ex_handler_ua_load_mem(ex, regs); case EX_TYPE_UA_LOAD_REG: - return ex_handler_ua_load_reg(ex, regs); + return ex_handler_ua_load_reg(ex, false, regs); + case EX_TYPE_UA_LOAD_REGPAIR: + return ex_handler_ua_load_reg(ex, true, regs); } panic("invalid exception table entry"); }
Add cmpxchg_user_key() which allows to execute a compare and exchange on a user space address. This allows also to specify a storage key which makes sure that key-controlled protection is considered.
This is based on a patch written by Janis Schoetterl-Glausch.
Link: https://lore.kernel.org/all/20220930210751.225873-2-scgl@linux.ibm.com Cc: Janis Schoetterl-Glausch scgl@linux.ibm.com Signed-off-by: Heiko Carstens hca@linux.ibm.com --- arch/s390/include/asm/uaccess.h | 183 ++++++++++++++++++++++++++++++++ 1 file changed, 183 insertions(+)
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index f7038b800cc3..9bbdecb80e06 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -390,4 +390,187 @@ do { \ goto err_label; \ } while (0)
+void __cmpxchg_user_key_called_with_bad_pointer(void); + +static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval, + __uint128_t old, __uint128_t new, + unsigned long key, int size) +{ + int rc = 0; + + switch (size) { + case 1: { + unsigned int prev, tmp, shift; + + shift = (3 ^ (address & 3)) << 3; + address ^= address & 3; + asm volatile( + " spka 0(%[key])\n" + " sacf 256\n" + "0: l %[prev],%[address]\n" + "1: nr %[prev],%[mask]\n" + " lr %[tmp],%[prev]\n" + " or %[prev],%[old]\n" + " or %[tmp],%[new]\n" + "2: cs %[prev],%[tmp],%[address]\n" + "3: jnl 4f\n" + " xr %[tmp],%[prev]\n" + " nr %[tmp],%[mask]\n" + " jnz 1b\n" + "4: sacf 768\n" + " spka %[default_key]\n" + EX_TABLE_UA_LOAD_REG(0b, 4b, %[rc], %[prev]) + EX_TABLE_UA_LOAD_REG(1b, 4b, %[rc], %[prev]) + EX_TABLE_UA_LOAD_REG(2b, 4b, %[rc], %[prev]) + EX_TABLE_UA_LOAD_REG(3b, 4b, %[rc], %[prev]) + : [rc] "+&d" (rc), + [prev] "=&d" (prev), + [tmp] "=&d" (tmp), + [address] "+Q" (*(int *)address) + : [old] "d" (((unsigned int)old & 0xff) << shift), + [new] "d" (((unsigned int)new & 0xff) << shift), + [mask] "d" (~(0xff << shift)), + [key] "a" (key), + [default_key] "J" (PAGE_DEFAULT_KEY) + : "memory", "cc"); + *(unsigned char *)uval = prev >> shift; + return rc; + } + case 2: { + unsigned int prev, tmp, shift; + + shift = (2 ^ (address & 2)) << 3; + address ^= address & 2; + asm volatile( + " spka 0(%[key])\n" + " sacf 256\n" + "0: l %[prev],%[address]\n" + "1: nr %[prev],%[mask]\n" + " lr %[tmp],%[prev]\n" + " or %[prev],%[old]\n" + " or %[tmp],%[new]\n" + "2: cs %[prev],%[tmp],%[address]\n" + "3: jnl 4f\n" + " xr %[tmp],%[prev]\n" + " nr %[tmp],%[mask]\n" + " jnz 1b\n" + "4: sacf 768\n" + " spka %[default_key]\n" + EX_TABLE_UA_LOAD_REG(0b, 4b, %[rc], %[prev]) + EX_TABLE_UA_LOAD_REG(1b, 4b, %[rc], %[prev]) + EX_TABLE_UA_LOAD_REG(2b, 4b, %[rc], %[prev]) + EX_TABLE_UA_LOAD_REG(3b, 4b, %[rc], %[prev]) + : [rc] "+&d" (rc), + [prev] "=&d" (prev), + [tmp] "=&d" (tmp), + [address] "+Q" (*(int *)address) + : [old] "d" (((unsigned int)old & 0xffff) << shift), + [new] "d" (((unsigned int)new & 0xffff) << shift), + [mask] "d" (~(0xffff << shift)), + [key] "a" (key), + [default_key] "J" (PAGE_DEFAULT_KEY) + : "memory", "cc"); + *(unsigned short *)uval = prev >> shift; + return rc; + } + case 4: { + unsigned int prev = old; + + asm volatile( + " spka 0(%[key])\n" + " sacf 256\n" + "0: cs %[prev],%[new],%[address]\n" + "1: sacf 768\n" + " spka %[default_key]\n" + EX_TABLE_UA_LOAD_REG(0b, 1b, %[rc], %[prev]) + EX_TABLE_UA_LOAD_REG(1b, 1b, %[rc], %[prev]) + : [rc] "+&d" (rc), + [prev] "+&d" (prev), + [address] "+Q" (*(int *)address) + : [new] "d" ((unsigned int)new), + [key] "a" (key), + [default_key] "J" (PAGE_DEFAULT_KEY) + : "memory", "cc"); + *(unsigned int *)uval = prev; + return rc; + } + case 8: { + unsigned long prev = old; + + asm volatile( + " spka 0(%[key])\n" + " sacf 256\n" + "0: csg %[prev],%[new],%[address]\n" + "1: sacf 768\n" + " spka %[default_key]\n" + EX_TABLE_UA_LOAD_REG(0b, 1b, %[rc], %[prev]) + EX_TABLE_UA_LOAD_REG(1b, 1b, %[rc], %[prev]) + : [rc] "+&d" (rc), + [prev] "+&d" (prev), + [address] "+QS" (*(long *)address) + : [new] "d" ((unsigned long)new), + [key] "a" (key), + [default_key] "J" (PAGE_DEFAULT_KEY) + : "memory", "cc"); + *(unsigned long *)uval = prev; + return rc; + } + case 16: { + __uint128_t prev = old; + + asm volatile( + " spka 0(%[key])\n" + " sacf 256\n" + "0: cdsg %[prev],%[new],%[address]\n" + "1: sacf 768\n" + " spka %[default_key]\n" + EX_TABLE_UA_LOAD_REGPAIR(0b, 1b, %[rc], %[prev]) + EX_TABLE_UA_LOAD_REGPAIR(1b, 1b, %[rc], %[prev]) + : [rc] "+&d" (rc), + [prev] "+&d" (prev), + [address] "+QS" (*(__int128_t *)address) + : [new] "d" (new), + [key] "a" (key), + [default_key] "J" (PAGE_DEFAULT_KEY) + : "memory", "cc"); + *(__uint128_t *)uval = prev; + return rc; + } + } + __cmpxchg_user_key_called_with_bad_pointer(); + return rc; +} + +/** + * cmpxchg_user_key() - cmpxchg with user space target, honoring storage keys + * @ptr: User space address of value to compare to @old and exchange with + * @new. Must be aligned to sizeof(*@size). + * @uval: Address where the old value of *@ptr is written to. + * @old: Old value. Compared to the content pointed to by @ptr in order to + * determine if the exchange occurs. The old value read from *@ptr is + * written to *@uval. + * @new: New value to place at *@ptr. + * @key: Access key to use for checking storage key protection. + * + * Perform a cmpxchg on a user space target, honoring storage key protection. + * @key alone determines how key checking is performed, neither + * storage-protection-override nor fetch-protection-override apply. + * The caller must compare *@uval and @old to determine if values have been + * exchanged. In case of an exception *@uval is set to zero. + * + * Return: 0: cmpxchg executed + * -EFAULT: an exception happened when trying to access *@ptr + */ +#define cmpxchg_user_key(ptr, uval, old, new, key) \ +({ \ + __typeof__(ptr) __ptr = (ptr); \ + __typeof__(uval) __uval = (uval); \ + \ + BUILD_BUG_ON(sizeof(*(__ptr)) != sizeof(*(__uval))); \ + might_fault(); \ + __chk_user_ptr(__ptr); \ + __cmpxchg_user_key((unsigned long)(__ptr), (void *)(__uval), \ + (old), (new), (key), sizeof(*(__ptr))); \ +}) + #endif /* __S390_UACCESS_H */
On Wed, 2022-11-02 at 15:19 +0100, Heiko Carstens wrote:
Add cmpxchg_user_key() which allows to execute a compare and exchange on a user space address. This allows also to specify a storage key which makes sure that key-controlled protection is considered.
This is based on a patch written by Janis Schoetterl-Glausch.
Link: https://lore.kernel.org/all/20220930210751.225873-2-scgl@linux.ibm.com Cc: Janis Schoetterl-Glausch scgl@linux.ibm.com Signed-off-by: Heiko Carstens hca@linux.ibm.com
arch/s390/include/asm/uaccess.h | 183 ++++++++++++++++++++++++++++++++ 1 file changed, 183 insertions(+)
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index f7038b800cc3..9bbdecb80e06 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -390,4 +390,187 @@ do { \ goto err_label; \ } while (0) +void __cmpxchg_user_key_called_with_bad_pointer(void);
+static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval,
__uint128_t old, __uint128_t new,
unsigned long key, int size)
+{
- int rc = 0;
- switch (size) {
- case 1: {
unsigned int prev, tmp, shift;
shift = (3 ^ (address & 3)) << 3;
address ^= address & 3;
asm volatile(
" spka 0(%[key])\n"
" sacf 256\n"
"0: l %[prev],%[address]\n"
"1: nr %[prev],%[mask]\n"
" lr %[tmp],%[prev]\n"
" or %[prev],%[old]\n"
" or %[tmp],%[new]\n"
"2: cs %[prev],%[tmp],%[address]\n"
"3: jnl 4f\n"
" xr %[tmp],%[prev]\n"
" nr %[tmp],%[mask]\n"
Are you only entertaining cosmetic changes to cmpxchg.h? The loop condition being imprecise seems non-ideal.
" jnz 1b\n"
"4: sacf 768\n"
" spka %[default_key]\n"
EX_TABLE_UA_LOAD_REG(0b, 4b, %[rc], %[prev])
EX_TABLE_UA_LOAD_REG(1b, 4b, %[rc], %[prev])
EX_TABLE_UA_LOAD_REG(2b, 4b, %[rc], %[prev])
EX_TABLE_UA_LOAD_REG(3b, 4b, %[rc], %[prev])
: [rc] "+&d" (rc),
[prev] "=&d" (prev),
[tmp] "=&d" (tmp),
[address] "+Q" (*(int *)address)
: [old] "d" (((unsigned int)old & 0xff) << shift),
[new] "d" (((unsigned int)new & 0xff) << shift),
[mask] "d" (~(0xff << shift)),
[key] "a" (key),
Why did you get rid of the << 4 shift? That's inconsistent with the other uaccess functions that take an access key.
[default_key] "J" (PAGE_DEFAULT_KEY)
: "memory", "cc");
*(unsigned char *)uval = prev >> shift;
return rc;
- }
[...]
On Wed, Nov 09, 2022 at 04:46:29PM +0100, Janis Schoetterl-Glausch wrote:
On Wed, 2022-11-02 at 15:19 +0100, Heiko Carstens wrote:
- case 1: {
unsigned int prev, tmp, shift;
shift = (3 ^ (address & 3)) << 3;
address ^= address & 3;
asm volatile(
" spka 0(%[key])\n"
" sacf 256\n"
"0: l %[prev],%[address]\n"
"1: nr %[prev],%[mask]\n"
" lr %[tmp],%[prev]\n"
" or %[prev],%[old]\n"
" or %[tmp],%[new]\n"
"2: cs %[prev],%[tmp],%[address]\n"
"3: jnl 4f\n"
" xr %[tmp],%[prev]\n"
" nr %[tmp],%[mask]\n"
Are you only entertaining cosmetic changes to cmpxchg.h?
I fail to parse what you are trying to say. Please elaborate.
The loop condition being imprecise seems non-ideal.
What exactly is imprecise?
[key] "a" (key),
Why did you get rid of the << 4 shift? That's inconsistent with the other uaccess functions that take an access key.
That's not only inconsistent, but also a bug. Thank you for pointing this out. Will be fixed.
On Wed, 2022-11-09 at 23:24 +0100, Heiko Carstens wrote:
On Wed, Nov 09, 2022 at 04:46:29PM +0100, Janis Schoetterl-Glausch wrote:
On Wed, 2022-11-02 at 15:19 +0100, Heiko Carstens wrote:
- case 1: {
unsigned int prev, tmp, shift;
shift = (3 ^ (address & 3)) << 3;
address ^= address & 3;
asm volatile(
" spka 0(%[key])\n"
" sacf 256\n"
"0: l %[prev],%[address]\n"
"1: nr %[prev],%[mask]\n"
" lr %[tmp],%[prev]\n"
" or %[prev],%[old]\n"
" or %[tmp],%[new]\n"
"2: cs %[prev],%[tmp],%[address]\n"
"3: jnl 4f\n"
" xr %[tmp],%[prev]\n"
" nr %[tmp],%[mask]\n"
Are you only entertaining cosmetic changes to cmpxchg.h?
I fail to parse what you are trying to say. Please elaborate.
The loop condition being imprecise seems non-ideal.
What exactly is imprecise?
The loop retries the CS if bits outside the target byte changed instead of retrying until the target byte differs from the old value. So if you attempt to exchange (prev_left_0 old_byte prev_right_0) and that fails because the word at the address is (prev_left_1 x prev_right_1) where both x != old_byte and one of the prev_*_1 values differs from the respective prev_*_0 value, the CS is retried. If there were a native 1 byte compare and swap, the exchange would just fail here. Instead the loop retries the CS until the margin values are stable and it can infer from that that the CS failed because of the target value. (Assuming that doesn't change to the old_byte value.)
It's not a problem, but it struck me as non-ideal, which is why for v2 I inverted the mask after using it to punch the hole for the old/new values. Then you can use it to test if bits inside the target byte differ.
That's why I asked about cmpxchg.h. If you don't want non-cosmetic changes to the existing cmpxchg function and consistency of the new key checked function, then obviously the loop condition needs to be the same.
[key] "a" (key),
Why did you get rid of the << 4 shift? That's inconsistent with the other uaccess functions that take an access key.
That's not only inconsistent, but also a bug. Thank you for pointing this out. Will be fixed.
Well, you could pass in the shifted key as argument, but yeah.
On Thu, Nov 10, 2022 at 12:01:23PM +0100, Janis Schoetterl-Glausch wrote:
On Wed, 2022-11-09 at 23:24 +0100, Heiko Carstens wrote:
On Wed, Nov 09, 2022 at 04:46:29PM +0100, Janis Schoetterl-Glausch wrote:
Are you only entertaining cosmetic changes to cmpxchg.h?
I fail to parse what you are trying to say. Please elaborate.
The loop condition being imprecise seems non-ideal.
What exactly is imprecise?
The loop retries the CS if bits outside the target byte changed instead of retrying until the target byte differs from the old value. So if you attempt to exchange (prev_left_0 old_byte prev_right_0) and that fails because the word at the address is (prev_left_1 x prev_right_1) where both x != old_byte and one of the prev_*_1 values differs from the respective prev_*_0 value, the CS is retried. If there were a native 1 byte compare and swap, the exchange would just fail here. Instead the loop retries the CS until the margin values are stable and it can infer from that that the CS failed because of the target value. (Assuming that doesn't change to the old_byte value.)
It's not a problem, but it struck me as non-ideal, which is why for v2 I inverted the mask after using it to punch the hole for the old/new values. Then you can use it to test if bits inside the target byte differ.
That's why I asked about cmpxchg.h. If you don't want non-cosmetic changes to the existing cmpxchg function and consistency of the new key checked function, then obviously the loop condition needs to be the same.
Such a change is fine of course, even though compare-and-swap for one and two byte patterns don't really matter. I would appreciate if you could send one or two patches on-top of this series which adds the improved logic to (now) both variants.
And, since the question will come up anyway: as soon as we agreed on a complete patch series, I think we should go for a features branch on s390's kernel.org tree which would contain the first five patches sent by me plus potential addon patches provided by you. This tree can then be pulled in by the kvms390 tree where your kvm specific patches can then be applied on top.
On Thu, Nov 10, 2022 at 12:32:06PM +0100, Heiko Carstens wrote:
That's why I asked about cmpxchg.h. If you don't want non-cosmetic changes to the existing cmpxchg function and consistency of the new key checked function, then obviously the loop condition needs to be the same.
Such a change is fine of course, even though compare-and-swap for one and two byte patterns don't really matter. I would appreciate if you could send one or two patches on-top of this series which adds the improved logic to (now) both variants.
And, since the question will come up anyway: as soon as we agreed on a complete patch series, I think we should go for a features branch on s390's kernel.org tree which would contain the first five patches sent by me plus potential addon patches provided by you. This tree can then be pulled in by the kvms390 tree where your kvm specific patches can then be applied on top.
FWIW, pushed a non-stable work-in-progress branch to git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git wip/cmpxchg_user_key
This includes also an updated patch, which fixes the missing shift of the access key.
On Wed, 2022-11-02 at 15:12 +0100, Heiko Carstens wrote:
[...]
I also did not limit the number of retries for the one and two byte scenarion. Before doing that we need to have proof that there really is a problem. Maybe Nico or you will give this a try.
I wrote a memop selftest testcase where the main thread uses the one byte cmpxchg while n vcpus flip adjacent bits. The time the test case runs increases superlinearly with n. With 248 vcpus, 1000 one byte cmpxchgs take 25s. I'm not sure how meaningful the test is since the worst case would be if the threads hammering the word would run on a cpu dedicated to them.
In any case, why not err on the side of caution and limit the iterations? I'll send an rfc patch.
Thanks, Heiko
Quoting Janis Schoetterl-Glausch (2022-11-16 20:36:46)
On Wed, 2022-11-02 at 15:12 +0100, Heiko Carstens wrote:
[...]
I also did not limit the number of retries for the one and two byte scenarion. Before doing that we need to have proof that there really is a problem. Maybe Nico or you will give this a try.
I wrote a memop selftest testcase where the main thread uses the one byte cmpxchg while n vcpus flip adjacent bits. The time the test case runs increases superlinearly with n. With 248 vcpus, 1000 one byte cmpxchgs take 25s. I'm not sure how meaningful the test is since the worst case would be if the threads hammering the word would run on a cpu dedicated to them.
In any case, why not err on the side of caution and limit the iterations? I'll send an rfc patch.
I agree, limiting sounds like the safe choice.
User space can use the MEM_OP ioctl to make storage key checked reads and writes to the guest, however, it has no way of performing atomic, key checked, accesses to the guest. Extend the MEM_OP ioctl in order to allow for this, by adding a cmpxchg mode. For now, support this mode for absolute accesses only.
This mode can be use, for example, to set the device-state-change indicator and the adapter-local-summary indicator atomically.
Signed-off-by: Janis Schoetterl-Glausch scgl@linux.ibm.com --- include/uapi/linux/kvm.h | 5 ++++ arch/s390/kvm/gaccess.h | 4 +++ arch/s390/kvm/gaccess.c | 57 ++++++++++++++++++++++++++++++++++++++++ arch/s390/kvm/kvm-s390.c | 35 ++++++++++++++++++++++-- 4 files changed, 99 insertions(+), 2 deletions(-)
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index eed0315a77a6..b28ef88eff41 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -588,6 +588,8 @@ struct kvm_s390_mem_op { struct { __u8 ar; /* the access register number */ __u8 key; /* access key, ignored if flag unset */ + __u8 pad1[6]; /* ignored */ + __u64 old_p; /* ignored if flag unset */ }; __u32 sida_offset; /* offset into the sida */ __u8 reserved[32]; /* ignored */ @@ -604,6 +606,9 @@ struct kvm_s390_mem_op { #define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0) #define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1) #define KVM_S390_MEMOP_F_SKEY_PROTECTION (1ULL << 2) +#define KVM_S390_MEMOP_F_CMPXCHG (1ULL << 3) +/* Non program exception return codes (pgm codes are 16 bit) */ +#define KVM_S390_MEMOP_R_NO_XCHG ((1 << 16) + 0)
/* for KVM_INTERRUPT */ struct kvm_interrupt { diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h index 9408d6cc8e2c..a1cb66ae0995 100644 --- a/arch/s390/kvm/gaccess.h +++ b/arch/s390/kvm/gaccess.h @@ -206,6 +206,10 @@ int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data, unsigned long len, enum gacc_mode mode);
+int cmpxchg_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, int len, + unsigned __int128 *old, + unsigned __int128 new, u8 access_key); + /** * write_guest_with_key - copy data from kernel space to guest space * @vcpu: virtual cpu diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 0243b6e38d36..d51cbae4f228 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -1161,6 +1161,63 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, return rc; }
+/** + * cmpxchg_guest_abs_with_key() - Perform cmpxchg on guest absolute address. + * @kvm: Virtual machine instance. + * @gpa: Absolute guest address of the location to be changed. + * @len: Operand length of the cmpxchg, required: 1 <= len <= 16. Providing a + * non power of two will result in failure. + * @old_p: Pointer to old value. If the location at @gpa contains this value, the + * exchange will succeed. After calling cmpxchg_guest_abs_with_key() *@old + * contains the value at @gpa before the attempt to exchange the value. + * @new: The value to place at @gpa. + * @access_key: The access key to use for the guest access. + * + * Atomically exchange the value at @gpa by @new, if it contains *@old. + * Honors storage keys. + * + * Return: * 0: successful exchange + * * 1: exchange unsuccessful + * * a program interruption code indicating the reason cmpxchg could + * not be attempted + * * -EINVAL: address misaligned or len not power of two + */ +int cmpxchg_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, int len, + unsigned __int128 *old_p, unsigned __int128 new, + u8 access_key) +{ + gfn_t gfn = gpa >> PAGE_SHIFT; + struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); + bool writable; + hva_t hva; + int ret; + + if (!IS_ALIGNED(gpa, len)) + return -EINVAL; + + hva = gfn_to_hva_memslot_prot(slot, gfn, &writable); + if (kvm_is_error_hva(hva)) + return PGM_ADDRESSING; + /* + * Check if it's a read-only memslot, even though that cannot occur + * since those are unsupported. + * Don't try to actually handle that case. + */ + if (!writable) + return -EOPNOTSUPP; + + hva += offset_in_page(gpa); + ret = cmpxchg_user_key_size(len, (void __user *)hva, old_p, new, access_key); + mark_page_dirty_in_slot(kvm, slot, gfn); + /* + * Assume that the fault is caused by protection, either key protection + * or user page write protection. + */ + if (ret == -EFAULT) + ret = PGM_PROTECTION; + return ret; +} + /** * guest_translate_address_with_key - translate guest logical into guest absolute address * @vcpu: virtual cpu diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index b7ef0b71014d..88f0b83229f6 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -576,7 +576,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_VCPU_RESETS: case KVM_CAP_SET_GUEST_DEBUG: case KVM_CAP_S390_DIAG318: - case KVM_CAP_S390_MEM_OP_EXTENSION: r = 1; break; case KVM_CAP_SET_GUEST_DEBUG2: @@ -590,6 +589,14 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_MEM_OP: r = MEM_OP_MAX_SIZE; break; + case KVM_CAP_S390_MEM_OP_EXTENSION: + /* + * Flag bits indicating which extensions are supported. + * The first extension doesn't use a flag, but pretend it does, + * this way that can be changed in the future. + */ + r = 0x3; + break; case KVM_CAP_NR_VCPUS: case KVM_CAP_MAX_VCPUS: case KVM_CAP_MAX_VCPU_ID: @@ -2714,12 +2721,19 @@ static bool access_key_invalid(u8 access_key) static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop) { void __user *uaddr = (void __user *)mop->buf; + void __user *old_p = (void __user *)mop->old_p; + union { + unsigned __int128 quad; + char raw[sizeof(unsigned __int128)]; + } old = { .quad = 0}, new = { .quad = 0 }; + unsigned int off_in_quad = sizeof(unsigned __int128) - mop->size; u64 supported_flags; void *tmpbuf = NULL; int r, srcu_idx;
supported_flags = KVM_S390_MEMOP_F_SKEY_PROTECTION - | KVM_S390_MEMOP_F_CHECK_ONLY; + | KVM_S390_MEMOP_F_CHECK_ONLY + | KVM_S390_MEMOP_F_CMPXCHG; if (mop->flags & ~supported_flags || !mop->size) return -EINVAL; if (mop->size > MEM_OP_MAX_SIZE) @@ -2741,6 +2755,15 @@ static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop) } else { mop->key = 0; } + if (mop->flags & KVM_S390_MEMOP_F_CMPXCHG) { + if (mop->size > sizeof(new)) + return -EINVAL; + /* off_in_quad has been validated */ + if (copy_from_user(&new.raw[off_in_quad], uaddr, mop->size)) + return -EFAULT; + if (copy_from_user(&old.raw[off_in_quad], old_p, mop->size)) + return -EFAULT; + } if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) { tmpbuf = vmalloc(mop->size); if (!tmpbuf) @@ -2771,6 +2794,14 @@ static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop) case KVM_S390_MEMOP_ABSOLUTE_WRITE: { if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_STORE, mop->key); + } else if (mop->flags & KVM_S390_MEMOP_F_CMPXCHG) { + r = cmpxchg_guest_abs_with_key(kvm, mop->gaddr, mop->size, + &old.quad, new.quad, mop->key); + if (r == 1) { + r = KVM_S390_MEMOP_R_NO_XCHG; + if (copy_to_user(old_p, &old.raw[off_in_quad], mop->size)) + r = -EFAULT; + } } else { if (copy_from_user(tmpbuf, uaddr, mop->size)) { r = -EFAULT;
Hi Janis,
Thank you for the patch! Perhaps something to improve:
[auto build test WARNING on 4fe89d07dcc2804c8b562f6c7896a45643d34b2f]
url: https://github.com/intel-lab-lkp/linux/commits/Janis-Schoetterl-Glausch/KVM-... base: 4fe89d07dcc2804c8b562f6c7896a45643d34b2f config: s390-randconfig-s051-20221012 compiler: s390-linux-gcc (GCC) 12.1.0 reproduce: wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # apt-get install sparse # sparse version: v0.6.4-39-gce1a6720-dirty # https://github.com/intel-lab-lkp/linux/commit/427c3a07629c563c58a83fa1febb07... git remote add linux-review https://github.com/intel-lab-lkp/linux git fetch --no-tags linux-review Janis-Schoetterl-Glausch/KVM-s390-Extend-MEM_OP-ioctl-by-storage-key-checked-cmpxchg/20221013-045733 git checkout 427c3a07629c563c58a83fa1febb07d1345e7a9d # save the config file mkdir build_dir && cp config build_dir/.config COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' O=build_dir ARCH=s390 SHELL=/bin/bash
If you fix the issue, kindly add following tag where applicable | Reported-by: kernel test robot lkp@intel.com
sparse warnings: (new ones prefixed by >>) arch/s390/kvm/gaccess.c: note: in included file (through include/linux/uaccess.h, include/linux/sched/task.h, include/linux/sched/signal.h, ...):
arch/s390/include/asm/uaccess.h:560:56: sparse: sparse: cast removes address space '__user' of expression
vim +/__user +560 arch/s390/include/asm/uaccess.h
a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 459 a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 460 /** a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 461 * cmpxchg_user_key_size() - cmpxchg with user space target, honoring storage keys a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 462 * @size: Size of the value being cmpxchg'ed, one of 1,2,4,8,16. a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 463 * @address: User space address of value to compare to *@old_p and exchange with a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 464 * @new. Must be aligned to @size. a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 465 * @old_p: Pointer to old value. Interpreted as a @size byte integer and compared a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 466 * to the content pointed to by @address in order to determine if the a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 467 * exchange occurs. The value read from @address is written back to *@old_p. a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 468 * @new: New value to place at @address, interpreted as a @size byte integer. a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 469 * @access_key: Access key to use for checking storage key protection. a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 470 * a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 471 * Perform a cmpxchg on a user space target, honoring storage key protection. a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 472 * @access_key alone determines how key checking is performed, neither a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 473 * storage-protection-override nor fetch-protection-override apply. a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 474 * a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 475 * Return: 0: successful exchange a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 476 * 1: exchange failed a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 477 * -EFAULT: @address not accessible or not naturally aligned a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 478 * -EINVAL: invalid @size a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 479 */ a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 480 static __always_inline int cmpxchg_user_key_size(int size, void __user *address, a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 481 unsigned __int128 *old_p, a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 482 unsigned __int128 new, u8 access_key) a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 483 { a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 484 union { a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 485 u32 word; a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 486 u64 doubleword; a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 487 } old; a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 488 int ret = -EFAULT; a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 489 a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 490 /* a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 491 * The following assumes that: a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 492 * * the current psw key is the default key a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 493 * * no storage protection overrides are in effect a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 494 */ a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 495 might_fault(); a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 496 switch (size) { a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 497 case 16: a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 498 asm volatile( a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 499 "spka 0(%[access_key])\n" a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 500 " sacf 256\n" a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 501 "0: cdsg %[old],%[new],%[target]\n" a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 502 "1: ipm %[ret]\n" a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 503 " srl %[ret],28\n" a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 504 "2: sacf 768\n" a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 505 " spka %[default_key]\n" a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 506 EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 507 : [old] "+d" (*old_p), a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 508 [target] "+Q" (*(unsigned __int128 __user *)address), a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 509 [ret] "+d" (ret) a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 510 : [access_key] "a" (access_key << 4), a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 511 [new] "d" (new), a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 512 [default_key] "J" (PAGE_DEFAULT_KEY) a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 513 : "cc" a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 514 ); a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 515 return ret; a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 516 case 8: a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 517 old.doubleword = *old_p; a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 518 asm volatile( a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 519 "spka 0(%[access_key])\n" a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 520 " sacf 256\n" a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 521 "0: csg %[old],%[new],%[target]\n" a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 522 "1: ipm %[ret]\n" a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 523 " srl %[ret],28\n" a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 524 "2: sacf 768\n" a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 525 " spka %[default_key]\n" a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 526 EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 527 : [old] "+d" (old.doubleword), a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 528 [target] "+Q" (*(u64 __user *)address), a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 529 [ret] "+d" (ret) a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 530 : [access_key] "a" (access_key << 4), a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 531 [new] "d" ((u64)new), a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 532 [default_key] "J" (PAGE_DEFAULT_KEY) a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 533 : "cc" a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 534 ); a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 535 *old_p = old.doubleword; a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 536 return ret; a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 537 case 4: a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 538 old.word = *old_p; a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 539 asm volatile( a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 540 "spka 0(%[access_key])\n" a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 541 " sacf 256\n" a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 542 "0: cs %[old],%[new],%[target]\n" a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 543 "1: ipm %[ret]\n" a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 544 " srl %[ret],28\n" a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 545 "2: sacf 768\n" a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 546 " spka %[default_key]\n" a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 547 EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 548 : [old] "+d" (old.word), a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 549 [target] "+Q" (*(u32 __user *)address), a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 550 [ret] "+d" (ret) a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 551 : [access_key] "a" (access_key << 4), a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 552 [new] "d" ((u32)new), a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 553 [default_key] "J" (PAGE_DEFAULT_KEY) a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 554 : "cc" a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 555 ); a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 556 *old_p = old.word; a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 557 return ret; a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 558 case 2: a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 559 case 1: a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 @560 return __cmpxchg_user_key_small(size, (u64)address, old_p, new, access_key); a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 561 default: a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 562 return -EINVAL; a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 563 } a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 564 } a5a09360ca0f69 Janis Schoetterl-Glausch 2022-10-12 565
Describe the semantics of the new KVM_S390_MEMOP_F_CMPXCHG flag for absolute vm write memops which allows user space to perform (storage key checked) cmpxchg operations on guest memory.
Signed-off-by: Janis Schoetterl-Glausch scgl@linux.ibm.com --- Documentation/virt/kvm/api.rst | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-)
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index abd7c32126ce..f972f5d95e6c 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -3753,7 +3753,8 @@ The fields in each entry are defined as follows: :Parameters: struct kvm_s390_mem_op (in) :Returns: = 0 on success, < 0 on generic error (e.g. -EFAULT or -ENOMEM), - > 0 if an exception occurred while walking the page tables + 16 bit program exception code if the access causes such an exception + other code > maximum 16 bit value with special meaning
Read or write data from/to the VM's memory. The KVM_CAP_S390_MEM_OP_EXTENSION capability specifies what functionality is @@ -3771,6 +3772,8 @@ Parameters are specified via the following structure:: struct { __u8 ar; /* the access register number */ __u8 key; /* access key, ignored if flag unset */ + __u8 pad1[6]; /* ignored */ + __u64 old_p; /* ignored if flag unset */ }; __u32 sida_offset; /* offset into the sida */ __u8 reserved[32]; /* ignored */ @@ -3853,8 +3856,22 @@ Absolute accesses are permitted for non-protected guests only. Supported flags: * ``KVM_S390_MEMOP_F_CHECK_ONLY`` * ``KVM_S390_MEMOP_F_SKEY_PROTECTION`` + * ``KVM_S390_MEMOP_F_CMPXCHG`` + +The semantics of the flags common with logical acesses are as for logical +accesses. + +For write accesses, the KVM_S390_MEMOP_F_CMPXCHG might be supported. +In this case, instead of doing an unconditional write, the access occurs only +if the target location contains the "size" byte long value pointed to by +"old_p". This is performed as an atomic cmpxchg. "size" must be a power of two +up to and including 16. +The value at the target location is written to the location "old_p" points to. +If the exchange did not take place because the target value doesn't match the +old value KVM_S390_MEMOP_R_NO_XCHG is returned. +The KVM_S390_MEMOP_F_CMPXCHG flag is supported if KVM_CAP_S390_MEM_OP_EXTENSION +has bit 1 (i.e. bit with value 2) set.
-The semantics of the flags are as for logical accesses.
SIDA read/write: ^^^^^^^^^^^^^^^^
The struct is quite large, so this seems nicer.
Signed-off-by: Janis Schoetterl-Glausch scgl@linux.ibm.com Reviewed-by: Thomas Huth thuth@redhat.com --- tools/testing/selftests/kvm/s390x/memop.c | 44 +++++++++++------------ 1 file changed, 22 insertions(+), 22 deletions(-)
diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390x/memop.c index 9113696d5178..69869c7e2ab1 100644 --- a/tools/testing/selftests/kvm/s390x/memop.c +++ b/tools/testing/selftests/kvm/s390x/memop.c @@ -48,53 +48,53 @@ struct mop_desc { uint8_t key; };
-static struct kvm_s390_mem_op ksmo_from_desc(struct mop_desc desc) +static struct kvm_s390_mem_op ksmo_from_desc(const struct mop_desc *desc) { struct kvm_s390_mem_op ksmo = { - .gaddr = (uintptr_t)desc.gaddr, - .size = desc.size, - .buf = ((uintptr_t)desc.buf), + .gaddr = (uintptr_t)desc->gaddr, + .size = desc->size, + .buf = ((uintptr_t)desc->buf), .reserved = "ignored_ignored_ignored_ignored" };
- switch (desc.target) { + switch (desc->target) { case LOGICAL: - if (desc.mode == READ) + if (desc->mode == READ) ksmo.op = KVM_S390_MEMOP_LOGICAL_READ; - if (desc.mode == WRITE) + if (desc->mode == WRITE) ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE; break; case SIDA: - if (desc.mode == READ) + if (desc->mode == READ) ksmo.op = KVM_S390_MEMOP_SIDA_READ; - if (desc.mode == WRITE) + if (desc->mode == WRITE) ksmo.op = KVM_S390_MEMOP_SIDA_WRITE; break; case ABSOLUTE: - if (desc.mode == READ) + if (desc->mode == READ) ksmo.op = KVM_S390_MEMOP_ABSOLUTE_READ; - if (desc.mode == WRITE) + if (desc->mode == WRITE) ksmo.op = KVM_S390_MEMOP_ABSOLUTE_WRITE; break; case INVALID: ksmo.op = -1; } - if (desc.f_check) + if (desc->f_check) ksmo.flags |= KVM_S390_MEMOP_F_CHECK_ONLY; - if (desc.f_inject) + if (desc->f_inject) ksmo.flags |= KVM_S390_MEMOP_F_INJECT_EXCEPTION; - if (desc._set_flags) - ksmo.flags = desc.set_flags; - if (desc.f_key) { + if (desc->_set_flags) + ksmo.flags = desc->set_flags; + if (desc->f_key) { ksmo.flags |= KVM_S390_MEMOP_F_SKEY_PROTECTION; - ksmo.key = desc.key; + ksmo.key = desc->key; } - if (desc._ar) - ksmo.ar = desc.ar; + if (desc->_ar) + ksmo.ar = desc->ar; else ksmo.ar = 0; - if (desc._sida_offset) - ksmo.sida_offset = desc.sida_offset; + if (desc->_sida_offset) + ksmo.sida_offset = desc->sida_offset;
return ksmo; } @@ -183,7 +183,7 @@ static int err_memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo) else \ __desc.gaddr = __desc.gaddr_v; \ } \ - __ksmo = ksmo_from_desc(__desc); \ + __ksmo = ksmo_from_desc(&__desc); \ print_memop(__info.vcpu, &__ksmo); \ err##memop_ioctl(__info, &__ksmo); \ })
Replace the DEFAULT_* test helpers by functions, as they don't need the exta flexibility.
Signed-off-by: Janis Schoetterl-Glausch scgl@linux.ibm.com --- tools/testing/selftests/kvm/s390x/memop.c | 82 +++++++++++------------ 1 file changed, 39 insertions(+), 43 deletions(-)
diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390x/memop.c index 69869c7e2ab1..286185a59238 100644 --- a/tools/testing/selftests/kvm/s390x/memop.c +++ b/tools/testing/selftests/kvm/s390x/memop.c @@ -48,6 +48,8 @@ struct mop_desc { uint8_t key; };
+const uint8_t NO_KEY = 0xff; + static struct kvm_s390_mem_op ksmo_from_desc(const struct mop_desc *desc) { struct kvm_s390_mem_op ksmo = { @@ -85,7 +87,7 @@ static struct kvm_s390_mem_op ksmo_from_desc(const struct mop_desc *desc) ksmo.flags |= KVM_S390_MEMOP_F_INJECT_EXCEPTION; if (desc->_set_flags) ksmo.flags = desc->set_flags; - if (desc->f_key) { + if (desc->f_key && desc->key != NO_KEY) { ksmo.flags |= KVM_S390_MEMOP_F_SKEY_PROTECTION; ksmo.key = desc->key; } @@ -268,34 +270,28 @@ static void prepare_mem12(void) #define ASSERT_MEM_EQ(p1, p2, size) \ TEST_ASSERT(!memcmp(p1, p2, size), "Memory contents do not match!")
-#define DEFAULT_WRITE_READ(copy_cpu, mop_cpu, mop_target_p, size, ...) \ -({ \ - struct test_info __copy_cpu = (copy_cpu), __mop_cpu = (mop_cpu); \ - enum mop_target __target = (mop_target_p); \ - uint32_t __size = (size); \ - \ - prepare_mem12(); \ - CHECK_N_DO(MOP, __mop_cpu, __target, WRITE, mem1, __size, \ - GADDR_V(mem1), ##__VA_ARGS__); \ - HOST_SYNC(__copy_cpu, STAGE_COPIED); \ - CHECK_N_DO(MOP, __mop_cpu, __target, READ, mem2, __size, \ - GADDR_V(mem2), ##__VA_ARGS__); \ - ASSERT_MEM_EQ(mem1, mem2, __size); \ -}) +static void default_write_read(struct test_info copy_cpu, struct test_info mop_cpu, + enum mop_target mop_target, uint32_t size, uint8_t key) +{ + prepare_mem12(); + CHECK_N_DO(MOP, mop_cpu, mop_target, WRITE, mem1, size, + GADDR_V(mem1), KEY(key)); + HOST_SYNC(copy_cpu, STAGE_COPIED); + CHECK_N_DO(MOP, mop_cpu, mop_target, READ, mem2, size, + GADDR_V(mem2), KEY(key)); + ASSERT_MEM_EQ(mem1, mem2, size); +}
-#define DEFAULT_READ(copy_cpu, mop_cpu, mop_target_p, size, ...) \ -({ \ - struct test_info __copy_cpu = (copy_cpu), __mop_cpu = (mop_cpu); \ - enum mop_target __target = (mop_target_p); \ - uint32_t __size = (size); \ - \ - prepare_mem12(); \ - CHECK_N_DO(MOP, __mop_cpu, __target, WRITE, mem1, __size, \ - GADDR_V(mem1)); \ - HOST_SYNC(__copy_cpu, STAGE_COPIED); \ - CHECK_N_DO(MOP, __mop_cpu, __target, READ, mem2, __size, ##__VA_ARGS__);\ - ASSERT_MEM_EQ(mem1, mem2, __size); \ -}) +static void default_read(struct test_info copy_cpu, struct test_info mop_cpu, + enum mop_target mop_target, uint32_t size, uint8_t key) +{ + prepare_mem12(); + CHECK_N_DO(MOP, mop_cpu, mop_target, WRITE, mem1, size, GADDR_V(mem1)); + HOST_SYNC(copy_cpu, STAGE_COPIED); + CHECK_N_DO(MOP, mop_cpu, mop_target, READ, mem2, size, + GADDR_V(mem2), KEY(key)); + ASSERT_MEM_EQ(mem1, mem2, size); +}
static void guest_copy(void) { @@ -310,7 +306,7 @@ static void test_copy(void)
HOST_SYNC(t.vcpu, STAGE_INITED);
- DEFAULT_WRITE_READ(t.vcpu, t.vcpu, LOGICAL, t.size); + default_write_read(t.vcpu, t.vcpu, LOGICAL, t.size, NO_KEY);
kvm_vm_free(t.kvm_vm); } @@ -357,26 +353,26 @@ static void test_copy_key(void) HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
/* vm, no key */ - DEFAULT_WRITE_READ(t.vcpu, t.vm, ABSOLUTE, t.size); + default_write_read(t.vcpu, t.vm, ABSOLUTE, t.size, NO_KEY);
/* vm/vcpu, machting key or key 0 */ - DEFAULT_WRITE_READ(t.vcpu, t.vcpu, LOGICAL, t.size, KEY(0)); - DEFAULT_WRITE_READ(t.vcpu, t.vcpu, LOGICAL, t.size, KEY(9)); - DEFAULT_WRITE_READ(t.vcpu, t.vm, ABSOLUTE, t.size, KEY(0)); - DEFAULT_WRITE_READ(t.vcpu, t.vm, ABSOLUTE, t.size, KEY(9)); + default_write_read(t.vcpu, t.vcpu, LOGICAL, t.size, 0); + default_write_read(t.vcpu, t.vcpu, LOGICAL, t.size, 9); + default_write_read(t.vcpu, t.vm, ABSOLUTE, t.size, 0); + default_write_read(t.vcpu, t.vm, ABSOLUTE, t.size, 9); /* * There used to be different code paths for key handling depending on * if the region crossed a page boundary. * There currently are not, but the more tests the merrier. */ - DEFAULT_WRITE_READ(t.vcpu, t.vcpu, LOGICAL, 1, KEY(0)); - DEFAULT_WRITE_READ(t.vcpu, t.vcpu, LOGICAL, 1, KEY(9)); - DEFAULT_WRITE_READ(t.vcpu, t.vm, ABSOLUTE, 1, KEY(0)); - DEFAULT_WRITE_READ(t.vcpu, t.vm, ABSOLUTE, 1, KEY(9)); + default_write_read(t.vcpu, t.vcpu, LOGICAL, 1, 0); + default_write_read(t.vcpu, t.vcpu, LOGICAL, 1, 9); + default_write_read(t.vcpu, t.vm, ABSOLUTE, 1, 0); + default_write_read(t.vcpu, t.vm, ABSOLUTE, 1, 9);
/* vm/vcpu, mismatching keys on read, but no fetch protection */ - DEFAULT_READ(t.vcpu, t.vcpu, LOGICAL, t.size, GADDR_V(mem2), KEY(2)); - DEFAULT_READ(t.vcpu, t.vm, ABSOLUTE, t.size, GADDR_V(mem1), KEY(2)); + default_read(t.vcpu, t.vcpu, LOGICAL, t.size, 2); + default_read(t.vcpu, t.vm, ABSOLUTE, t.size, 2);
kvm_vm_free(t.kvm_vm); } @@ -409,7 +405,7 @@ static void test_copy_key_storage_prot_override(void) HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
/* vcpu, mismatching keys, storage protection override in effect */ - DEFAULT_WRITE_READ(t.vcpu, t.vcpu, LOGICAL, t.size, KEY(2)); + default_write_read(t.vcpu, t.vcpu, LOGICAL, t.size, 2);
kvm_vm_free(t.kvm_vm); } @@ -422,8 +418,8 @@ static void test_copy_key_fetch_prot(void) HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
/* vm/vcpu, matching key, fetch protection in effect */ - DEFAULT_READ(t.vcpu, t.vcpu, LOGICAL, t.size, GADDR_V(mem2), KEY(9)); - DEFAULT_READ(t.vcpu, t.vm, ABSOLUTE, t.size, GADDR_V(mem2), KEY(9)); + default_read(t.vcpu, t.vcpu, LOGICAL, t.size, 9); + default_read(t.vcpu, t.vm, ABSOLUTE, t.size, 9);
kvm_vm_free(t.kvm_vm); }
Test successful exchange, unsuccessful exchange, storage key protection and invalid arguments.
Signed-off-by: Janis Schoetterl-Glausch scgl@linux.ibm.com --- tools/testing/selftests/kvm/s390x/memop.c | 540 ++++++++++++++++++---- 1 file changed, 460 insertions(+), 80 deletions(-)
diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390x/memop.c index 286185a59238..d4f4fb4022b9 100644 --- a/tools/testing/selftests/kvm/s390x/memop.c +++ b/tools/testing/selftests/kvm/s390x/memop.c @@ -9,6 +9,7 @@ #include <stdlib.h> #include <string.h> #include <sys/ioctl.h> +#include <pthread.h>
#include <linux/bits.h>
@@ -44,10 +45,14 @@ struct mop_desc { enum mop_access_mode mode; void *buf; uint32_t sida_offset; + void *old; + bool *cmpxchg_success; uint8_t ar; uint8_t key; };
+typedef unsigned __int128 uint128; + const uint8_t NO_KEY = 0xff;
static struct kvm_s390_mem_op ksmo_from_desc(const struct mop_desc *desc) @@ -91,6 +96,10 @@ static struct kvm_s390_mem_op ksmo_from_desc(const struct mop_desc *desc) ksmo.flags |= KVM_S390_MEMOP_F_SKEY_PROTECTION; ksmo.key = desc->key; } + if (desc->old) { + ksmo.flags |= KVM_S390_MEMOP_F_CMPXCHG; + ksmo.old_p = (uint64_t)desc->old; + } if (desc->_ar) ksmo.ar = desc->ar; else @@ -136,35 +145,45 @@ static void print_memop(struct kvm_vcpu *vcpu, const struct kvm_s390_mem_op *ksm printf("ABSOLUTE, WRITE, "); break; } - printf("gaddr=%llu, size=%u, buf=%llu, ar=%u, key=%u", - ksmo->gaddr, ksmo->size, ksmo->buf, ksmo->ar, ksmo->key); + printf("gaddr=%llu, size=%u, buf=%llu, ar=%u, key=%u, old_p=%llx", + ksmo->gaddr, ksmo->size, ksmo->buf, ksmo->ar, ksmo->key, + ksmo->old_p); if (ksmo->flags & KVM_S390_MEMOP_F_CHECK_ONLY) printf(", CHECK_ONLY"); if (ksmo->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) printf(", INJECT_EXCEPTION"); if (ksmo->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) printf(", SKEY_PROTECTION"); + if (ksmo->flags & KVM_S390_MEMOP_F_CMPXCHG) + printf(", CMPXCHG"); puts(")"); }
-static void memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo) +static int err_memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo, + struct mop_desc *desc) { struct kvm_vcpu *vcpu = info.vcpu;
if (!vcpu) - vm_ioctl(info.vm, KVM_S390_MEM_OP, ksmo); + return __vm_ioctl(info.vm, KVM_S390_MEM_OP, ksmo); else - vcpu_ioctl(vcpu, KVM_S390_MEM_OP, ksmo); + return __vcpu_ioctl(vcpu, KVM_S390_MEM_OP, ksmo); }
-static int err_memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo) +static void memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo, + struct mop_desc *desc) { - struct kvm_vcpu *vcpu = info.vcpu; + int r; + + r = err_memop_ioctl(info, ksmo, desc); + if (ksmo->flags & KVM_S390_MEMOP_F_CMPXCHG) { + if (desc->cmpxchg_success) + *desc->cmpxchg_success = !r; + if (r == KVM_S390_MEMOP_R_NO_XCHG) + r = 0; + } + TEST_ASSERT(!r, __KVM_IOCTL_ERROR("KVM_S390_MEM_OP", r));
- if (!vcpu) - return __vm_ioctl(info.vm, KVM_S390_MEM_OP, ksmo); - else - return __vcpu_ioctl(vcpu, KVM_S390_MEM_OP, ksmo); }
#define MEMOP(err, info_p, mop_target_p, access_mode_p, buf_p, size_p, ...) \ @@ -187,7 +206,7 @@ static int err_memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo) } \ __ksmo = ksmo_from_desc(&__desc); \ print_memop(__info.vcpu, &__ksmo); \ - err##memop_ioctl(__info, &__ksmo); \ + err##memop_ioctl(__info, &__ksmo, &__desc); \ })
#define MOP(...) MEMOP(, __VA_ARGS__) @@ -201,6 +220,8 @@ static int err_memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo) #define AR(a) ._ar = 1, .ar = (a) #define KEY(a) .f_key = 1, .key = (a) #define INJECT .f_inject = 1 +#define CMPXCHG_OLD(o) .old = (o) +#define CMPXCHG_SUCCESS(s) .cmpxchg_success = (s)
#define CHECK_N_DO(f, ...) ({ f(__VA_ARGS__, CHECK_ONLY); f(__VA_ARGS__); })
@@ -210,8 +231,8 @@ static int err_memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo) #define CR0_FETCH_PROTECTION_OVERRIDE (1UL << (63 - 38)) #define CR0_STORAGE_PROTECTION_OVERRIDE (1UL << (63 - 39))
-static uint8_t mem1[65536]; -static uint8_t mem2[65536]; +static uint8_t __aligned(PAGE_SIZE) mem1[65536]; +static uint8_t __aligned(PAGE_SIZE) mem2[65536];
struct test_default { struct kvm_vm *kvm_vm; @@ -243,6 +264,8 @@ enum stage { STAGE_SKEYS_SET, /* Guest copied memory (locations up to test case) */ STAGE_COPIED, + /* End of guest code reached */ + STAGE_DONE, };
#define HOST_SYNC(info_p, stage) \ @@ -254,6 +277,11 @@ enum stage { \ vcpu_run(__vcpu); \ get_ucall(__vcpu, &uc); \ + if (uc.cmd == UCALL_ABORT) { \ + TEST_FAIL("line %lu: %s, hints: %lu, %lu", \ + uc.args[1], (const char *)uc.args[0], \ + uc.args[2], uc.args[3]); \ + } \ ASSERT_EQ(uc.cmd, UCALL_SYNC); \ ASSERT_EQ(uc.args[1], __stage); \ }) \ @@ -293,6 +321,44 @@ static void default_read(struct test_info copy_cpu, struct test_info mop_cpu, ASSERT_MEM_EQ(mem1, mem2, size); }
+static void default_cmpxchg(struct test_default *test, uint8_t key) +{ + for (int size = 1; size <= 16; size *= 2) { + for (int offset = 0; offset < 16; offset += size) { + uint8_t __aligned(16) new[16] = {}; + uint8_t __aligned(16) old[16]; + bool succ; + + prepare_mem12(); + default_write_read(test->vcpu, test->vcpu, LOGICAL, 16, NO_KEY); + + memcpy(&old, mem1, 16); + CHECK_N_DO(MOP, test->vm, ABSOLUTE, WRITE, new + offset, + size, GADDR_V(mem1 + offset), + CMPXCHG_OLD(old + offset), + CMPXCHG_SUCCESS(&succ), KEY(key)); + HOST_SYNC(test->vcpu, STAGE_COPIED); + MOP(test->vm, ABSOLUTE, READ, mem2, 16, GADDR_V(mem2)); + TEST_ASSERT(succ, "exchange of values should succeed"); + memcpy(mem1 + offset, new + offset, size); + ASSERT_MEM_EQ(mem1, mem2, 16); + + memcpy(&old, mem1, 16); + new[offset]++; + old[offset]++; + CHECK_N_DO(MOP, test->vm, ABSOLUTE, WRITE, new + offset, + size, GADDR_V(mem1 + offset), + CMPXCHG_OLD(old + offset), + CMPXCHG_SUCCESS(&succ), KEY(key)); + HOST_SYNC(test->vcpu, STAGE_COPIED); + MOP(test->vm, ABSOLUTE, READ, mem2, 16, GADDR_V(mem2)); + TEST_ASSERT(!succ, "exchange of values should not succeed"); + ASSERT_MEM_EQ(mem1, mem2, 16); + ASSERT_MEM_EQ(&old, mem1, 16); + } + } +} + static void guest_copy(void) { GUEST_SYNC(STAGE_INITED); @@ -377,6 +443,250 @@ static void test_copy_key(void) kvm_vm_free(t.kvm_vm); }
+static void test_cmpxchg_key(void) +{ + struct test_default t = test_default_init(guest_copy_key); + + HOST_SYNC(t.vcpu, STAGE_SKEYS_SET); + + default_cmpxchg(&t, NO_KEY); + default_cmpxchg(&t, 0); + default_cmpxchg(&t, 9); + + kvm_vm_free(t.kvm_vm); +} + +static uint128 cut_to_size(int size, uint128 val) +{ + switch (size) { + case 1: + return (uint8_t)val; + case 2: + return (uint16_t)val; + case 4: + return (uint32_t)val; + case 8: + return (uint64_t)val; + case 16: + return val; + } + GUEST_ASSERT_1(false, "Invalid size"); + return 0; +} + +static bool popcount_eq(uint128 a, uint128 b) +{ + unsigned int count_a, count_b; + + count_a = __builtin_popcountl((uint64_t)(a >> 64)) + + __builtin_popcountl((uint64_t)a); + count_b = __builtin_popcountl((uint64_t)(b >> 64)) + + __builtin_popcountl((uint64_t)b); + return count_a == count_b; +} + +static uint128 rotate(int size, uint128 val, int amount) +{ + unsigned int bits = size * 8; + + amount = (amount + bits) % bits; + val = cut_to_size(size, val); + return (val << (bits - amount)) | (val >> amount); +} + +const unsigned int max_block = 16; + +static void choose_block(bool guest, int i, int *size, int *offset) +{ + unsigned int rand; + + rand = i; + if (guest) { + rand = rand * 19 + 11; + *size = 1 << ((rand % 3) + 2); + rand = rand * 19 + 11; + *offset = (rand % max_block) & ~(*size - 1); + } else { + rand = rand * 17 + 5; + *size = 1 << (rand % 5); + rand = rand * 17 + 5; + *offset = (rand % max_block) & ~(*size - 1); + } +} + +static uint128 permutate_bits(bool guest, int i, int size, uint128 old) +{ + unsigned int rand; + bool swap; + + rand = i; + rand = rand * 3 + 1; + if (guest) + rand = rand * 3 + 1; + swap = rand % 2 == 0; + if (swap) { + int i, j; + uint128 new; + uint8_t byte0, byte1; + + rand = rand * 3 + 1; + i = rand % size; + rand = rand * 3 + 1; + j = rand % size; + if (i == j) + return old; + new = rotate(16, old, i * 8); + byte0 = new & 0xff; + new &= ~0xff; + new = rotate(16, new, -i * 8); + new = rotate(16, new, j * 8); + byte1 = new & 0xff; + new = (new & ~0xff) | byte0; + new = rotate(16, new, -j * 8); + new = rotate(16, new, i * 8); + new = new | byte1; + new = rotate(16, new, -i * 8); + return new; + } else { + int amount; + + rand = rand * 3 + 1; + amount = rand % (size * 8); + return rotate(size, old, amount); + } +} + +static bool _cmpxchg(int size, void *target, uint128 *old_p, uint128 new) +{ + bool ret; + + switch (size) { + case 4: { + uint32_t old = *old_p; + + asm volatile ("cs %[old],%[new],%[address]" + : [old] "+d" (old), + [address] "+Q" (*(uint32_t *)(target)) + : [new] "d" ((uint32_t)new) + : "cc" + ); + ret = old == (uint32_t)*old_p; + *old_p = old; + return ret; + } + case 8: { + uint64_t old = *old_p; + + asm volatile ("csg %[old],%[new],%[address]" + : [old] "+d" (old), + [address] "+Q" (*(uint64_t *)(target)) + : [new] "d" ((uint64_t)new) + : "cc" + ); + ret = old == (uint64_t)*old_p; + *old_p = old; + return ret; + } + case 16: { + uint128 old = *old_p; + + asm volatile ("cdsg %[old],%[new],%[address]" + : [old] "+d" (old), + [address] "+Q" (*(uint128 *)(target)) + : [new] "d" (new) + : "cc" + ); + ret = old == *old_p; + *old_p = old; + return ret; + } + } + GUEST_ASSERT_1(false, "Invalid size"); + return 0; +} + +const unsigned int cmpxchg_iter_outer = 100, cmpxchg_iter_inner = 10000; + +static void guest_cmpxchg_key(void) +{ + int size, offset; + uint128 old, new; + + set_storage_key_range(mem1, max_block, 0x10); + set_storage_key_range(mem2, max_block, 0x10); + GUEST_SYNC(STAGE_SKEYS_SET); + + for (int i = 0; i < cmpxchg_iter_outer; i++) { + do { + old = 1; + } while (!_cmpxchg(16, mem1, &old, 0)); + for (int j = 0; j < cmpxchg_iter_inner; j++) { + choose_block(true, i + j, &size, &offset); + do { + new = permutate_bits(true, i + j, size, old); + } while (!_cmpxchg(size, mem2 + offset, &old, new)); + } + } + + GUEST_SYNC(STAGE_DONE); +} + +static void *run_guest(void *data) +{ + struct test_info *info = data; + + HOST_SYNC(*info, STAGE_DONE); + return NULL; +} + +static char *quad_to_char(uint128 *quad, int size) +{ + return ((char *)quad) + (sizeof(*quad) - size); +} + +static void test_cmpxchg_key_concurrent(void) +{ + struct test_default t = test_default_init(guest_cmpxchg_key); + int size, offset; + uint128 old, new; + bool success; + pthread_t thread; + + HOST_SYNC(t.vcpu, STAGE_SKEYS_SET); + prepare_mem12(); + MOP(t.vcpu, LOGICAL, WRITE, mem1, max_block, GADDR_V(mem2)); + pthread_create(&thread, NULL, run_guest, &t.vcpu); + + for (int i = 0; i < cmpxchg_iter_outer; i++) { + do { + old = 0; + new = 1; + MOP(t.vm, ABSOLUTE, WRITE, &new, + sizeof(new), GADDR_V(mem1), + CMPXCHG_OLD(&old), + CMPXCHG_SUCCESS(&success), KEY(1)); + } while (!success); + for (int j = 0; j < cmpxchg_iter_inner; j++) { + choose_block(false, i + j, &size, &offset); + do { + new = permutate_bits(false, i + j, size, old); + MOP(t.vm, ABSOLUTE, WRITE, quad_to_char(&new, size), + size, GADDR_V(mem2 + offset), + CMPXCHG_OLD(quad_to_char(&old, size)), + CMPXCHG_SUCCESS(&success), KEY(1)); + } while (!success); + } + } + + pthread_join(thread, NULL); + + MOP(t.vcpu, LOGICAL, READ, mem2, max_block, GADDR_V(mem2)); + TEST_ASSERT(popcount_eq(*(uint128 *)mem1, *(uint128 *)mem2), + "Must retain number of set bits"); + + kvm_vm_free(t.kvm_vm); +} + static void guest_copy_key_fetch_prot(void) { /* @@ -457,6 +767,24 @@ static void test_errors_key(void) kvm_vm_free(t.kvm_vm); }
+static void test_errors_cmpxchg_key(void) +{ + struct test_default t = test_default_init(guest_copy_key_fetch_prot); + int i; + + HOST_SYNC(t.vcpu, STAGE_INITED); + HOST_SYNC(t.vcpu, STAGE_SKEYS_SET); + + for (i = 1; i <= 16; i *= 2) { + uint128 old = 0; + + CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, WRITE, mem2, i, GADDR_V(mem2), + CMPXCHG_OLD(&old), KEY(2)); + } + + kvm_vm_free(t.kvm_vm); +} + static void test_termination(void) { struct test_default t = test_default_init(guest_error_key); @@ -690,87 +1018,139 @@ static void test_errors(void) kvm_vm_free(t.kvm_vm); }
-struct testdef { - const char *name; - void (*test)(void); - int extension; -} testlist[] = { - { - .name = "simple copy", - .test = test_copy, - }, - { - .name = "generic error checks", - .test = test_errors, - }, - { - .name = "copy with storage keys", - .test = test_copy_key, - .extension = 1, - }, - { - .name = "copy with key storage protection override", - .test = test_copy_key_storage_prot_override, - .extension = 1, - }, - { - .name = "copy with key fetch protection", - .test = test_copy_key_fetch_prot, - .extension = 1, - }, - { - .name = "copy with key fetch protection override", - .test = test_copy_key_fetch_prot_override, - .extension = 1, - }, - { - .name = "error checks with key", - .test = test_errors_key, - .extension = 1, - }, - { - .name = "termination", - .test = test_termination, - .extension = 1, - }, - { - .name = "error checks with key storage protection override", - .test = test_errors_key_storage_prot_override, - .extension = 1, - }, - { - .name = "error checks without key fetch prot override", - .test = test_errors_key_fetch_prot_override_not_enabled, - .extension = 1, - }, - { - .name = "error checks with key fetch prot override", - .test = test_errors_key_fetch_prot_override_enabled, - .extension = 1, - }, -}; +static void test_errors_cmpxchg(void) +{ + struct test_default t = test_default_init(guest_idle); + uint128 old; + int rv, i, power = 1; + + HOST_SYNC(t.vcpu, STAGE_INITED); + + for (i = 0; i < 32; i++) { + if (i == power) { + power *= 2; + continue; + } + rv = ERR_MOP(t.vm, ABSOLUTE, WRITE, mem1, i, GADDR_V(mem1), + CMPXCHG_OLD(&old)); + TEST_ASSERT(rv == -1 && errno == EINVAL, + "ioctl allows bad size for cmpxchg"); + } + for (i = 1; i <= 16; i *= 2) { + rv = ERR_MOP(t.vm, ABSOLUTE, WRITE, mem1, i, GADDR((void *)~0xfffUL), + CMPXCHG_OLD(&old)); + TEST_ASSERT(rv > 0, "ioctl allows bad guest address for cmpxchg"); + } + for (i = 2; i <= 16; i *= 2) { + rv = ERR_MOP(t.vm, ABSOLUTE, WRITE, mem1, i, GADDR_V(mem1 + 1), + CMPXCHG_OLD(&old)); + TEST_ASSERT(rv == -1 && errno == EINVAL, + "ioctl allows bad alignment for cmpxchg"); + } + + kvm_vm_free(t.kvm_vm); +}
int main(int argc, char *argv[]) { int extension_cap, idx;
+ setbuf(stdout, NULL); /* Tell stdout not to buffer its content */ TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_MEM_OP)); + extension_cap = kvm_check_cap(KVM_CAP_S390_MEM_OP_EXTENSION);
- setbuf(stdout, NULL); /* Tell stdout not to buffer its content */ + struct testdef { + const char *name; + void (*test)(void); + bool requirements_met; + } testlist[] = { + { + .name = "simple copy", + .test = test_copy, + .requirements_met = true, + }, + { + .name = "generic error checks", + .test = test_errors, + .requirements_met = true, + }, + { + .name = "copy with storage keys", + .test = test_copy_key, + .requirements_met = extension_cap > 0, + }, + { + .name = "cmpxchg with storage keys", + .test = test_cmpxchg_key, + .requirements_met = extension_cap & 0x2, + }, + { + .name = "concurrently cmpxchg with storage keys", + .test = test_cmpxchg_key_concurrent, + .requirements_met = extension_cap & 0x2, + }, + { + .name = "copy with key storage protection override", + .test = test_copy_key_storage_prot_override, + .requirements_met = extension_cap > 0, + }, + { + .name = "copy with key fetch protection", + .test = test_copy_key_fetch_prot, + .requirements_met = extension_cap > 0, + }, + { + .name = "copy with key fetch protection override", + .test = test_copy_key_fetch_prot_override, + .requirements_met = extension_cap > 0, + }, + { + .name = "error checks with key", + .test = test_errors_key, + .requirements_met = extension_cap > 0, + }, + { + .name = "error checks for cmpxchg with key", + .test = test_errors_cmpxchg_key, + .requirements_met = extension_cap & 0x2, + }, + { + .name = "error checks for cmpxchg", + .test = test_errors_cmpxchg, + .requirements_met = extension_cap & 0x2, + }, + { + .name = "termination", + .test = test_termination, + .requirements_met = extension_cap > 0, + }, + { + .name = "error checks with key storage protection override", + .test = test_errors_key_storage_prot_override, + .requirements_met = extension_cap > 0, + }, + { + .name = "error checks without key fetch prot override", + .test = test_errors_key_fetch_prot_override_not_enabled, + .requirements_met = extension_cap > 0, + }, + { + .name = "error checks with key fetch prot override", + .test = test_errors_key_fetch_prot_override_enabled, + .requirements_met = extension_cap > 0, + }, + };
ksft_print_header(); - ksft_set_plan(ARRAY_SIZE(testlist));
- extension_cap = kvm_check_cap(KVM_CAP_S390_MEM_OP_EXTENSION); for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) { - if (extension_cap >= testlist[idx].extension) { + if (testlist[idx].requirements_met) { testlist[idx].test(); ksft_test_result_pass("%s\n", testlist[idx].name); } else { - ksft_test_result_skip("%s - extension level %d not supported\n", - testlist[idx].name, - testlist[idx].extension); + ksft_test_result_skip("%s - requirements not met (kernel has extension cap %#x\n)", + testlist[idx].name, extension_cap); } }
Add test that tries to access, instead of CHECK_ONLY.
Signed-off-by: Janis Schoetterl-Glausch scgl@linux.ibm.com --- tools/testing/selftests/kvm/s390x/memop.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390x/memop.c index d4f4fb4022b9..404ddb6fa855 100644 --- a/tools/testing/selftests/kvm/s390x/memop.c +++ b/tools/testing/selftests/kvm/s390x/memop.c @@ -969,7 +969,9 @@ static void _test_errors_common(struct test_info info, enum mop_target target, i
/* Bad guest address: */ rv = ERR_MOP(info, target, WRITE, mem1, size, GADDR((void *)~0xfffUL), CHECK_ONLY); - TEST_ASSERT(rv > 0, "ioctl does not report bad guest memory access"); + TEST_ASSERT(rv > 0, "ioctl does not report bad guest memory address"); + rv = ERR_MOP(info, target, WRITE, mem1, size, GADDR((void *)~0xfffUL)); + TEST_ASSERT(rv > 0, "ioctl does not report bad guest memory address");
/* Bad host address: */ rv = ERR_MOP(info, target, WRITE, 0, size, GADDR_V(mem1));
Quoting Janis Schoetterl-Glausch (2022-10-12 22:56:07)
Add test that tries to access, instead of CHECK_ONLY.
Signed-off-by: Janis Schoetterl-Glausch scgl@linux.ibm.com
Reviewed-by: Nico Boehr nrb@linux.ibm.com
"acceeded" isn't a word, should be "exceeded".
Signed-off-by: Janis Schoetterl-Glausch scgl@linux.ibm.com Reviewed-by: Thomas Huth thuth@redhat.com --- tools/testing/selftests/kvm/s390x/memop.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390x/memop.c index 404ddb6fa855..7491f1731460 100644 --- a/tools/testing/selftests/kvm/s390x/memop.c +++ b/tools/testing/selftests/kvm/s390x/memop.c @@ -930,7 +930,7 @@ static void test_errors_key_fetch_prot_override_enabled(void)
/* * vcpu, mismatching keys on fetch, - * fetch protection override does not apply because memory range acceeded + * fetch protection override does not apply because memory range exceeded */ CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, 2048 + 1, GADDR_V(0), KEY(2)); CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, PAGE_SIZE + 2048 + 1,
Quoting Janis Schoetterl-Glausch (2022-10-12 22:56:08)
"acceeded" isn't a word, should be "exceeded".
Signed-off-by: Janis Schoetterl-Glausch scgl@linux.ibm.com Reviewed-by: Thomas Huth thuth@redhat.com
Reviewed-by: Nico Boehr nrb@linux.ibm.com
The guest code sets the key for mem1 only. In order to provoke a protection exception the test codes needs to address mem1.
Signed-off-by: Janis Schoetterl-Glausch scgl@linux.ibm.com --- tools/testing/selftests/kvm/s390x/memop.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390x/memop.c index 7491f1731460..e7b3897ee60a 100644 --- a/tools/testing/selftests/kvm/s390x/memop.c +++ b/tools/testing/selftests/kvm/s390x/memop.c @@ -760,9 +760,9 @@ static void test_errors_key(void)
/* vm/vcpu, mismatching keys, fetch protection in effect */ CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, WRITE, mem1, t.size, GADDR_V(mem1), KEY(2)); - CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, t.size, GADDR_V(mem2), KEY(2)); + CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, t.size, GADDR_V(mem1), KEY(2)); CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, WRITE, mem1, t.size, GADDR_V(mem1), KEY(2)); - CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, READ, mem2, t.size, GADDR_V(mem2), KEY(2)); + CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, READ, mem2, t.size, GADDR_V(mem1), KEY(2));
kvm_vm_free(t.kvm_vm); }
Quoting Janis Schoetterl-Glausch (2022-10-12 22:56:09)
The guest code sets the key for mem1 only. In order to provoke a protection exception the test codes needs to address mem1.
Signed-off-by: Janis Schoetterl-Glausch scgl@linux.ibm.com
Reviewed-by: Nico Boehr nrb@linux.ibm.com
linux-kselftest-mirror@lists.linaro.org