On 1/25/23 22:26, Janis Schoetterl-Glausch wrote:
User space can use the MEM_OP ioctl to make storage key checked reads and writes to the guest, however, it has no way of performing atomic, key checked, accesses to the guest. Extend the MEM_OP ioctl in order to allow for this, by adding a cmpxchg op. For now, support this op for absolute accesses only.
This op can be use, for example, to set the device-state-change
s/use/used/
indicator and the adapter-local-summary indicator atomically.
Signed-off-by: Janis Schoetterl-Glausch scgl@linux.ibm.com
[...]
+/**
- cmpxchg_guest_abs_with_key() - Perform cmpxchg on guest absolute address.
- @kvm: Virtual machine instance.
- @gpa: Absolute guest address of the location to be changed.
- @len: Operand length of the cmpxchg, required: 1 <= len <= 16. Providing a
non power of two will result in failure.
- @old_addr: Pointer to old value. If the location at @gpa contains this value,
the exchange will succeed. After calling cmpxchg_guest_abs_with_key()
*@old_addr contains the value at @gpa before the attempt to
exchange the value.
- @new: The value to place at @gpa.
- @access_key: The access key to use for the guest access.
- @success: output value indicating if an exchange occurred.
- Atomically exchange the value at @gpa by @new, if it contains *@old.
- Honors storage keys.
- Return: * 0: successful exchange
* a program interruption code indicating the reason cmpxchg could
not be attempted
Nit:
0: a program interruption code...
* -EINVAL: address misaligned or len not power of two
* -EAGAIN: transient failure (len 1 or 2)
* -EOPNOTSUPP: read-only memslot (should never occur)
- */
+int cmpxchg_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, int len,
__uint128_t *old_addr, __uint128_t new,
u8 access_key, bool *success)
+{
- gfn_t gfn = gpa >> PAGE_SHIFT;
gpa_to_gfn()?
- struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
- bool writable;
- hva_t hva;
- int ret;
- if (!IS_ALIGNED(gpa, len))
return -EINVAL;
- hva = gfn_to_hva_memslot_prot(slot, gfn, &writable);
- if (kvm_is_error_hva(hva))
return PGM_ADDRESSING;
- /*
* Check if it's a read-only memslot, even though that cannot occur
* since those are unsupported.
* Don't try to actually handle that case.
*/
- if (!writable)
return -EOPNOTSUPP;
- hva += offset_in_page(gpa);
Hmm if we don't use a macro to generate these then I'd add an explanation:
cmpxchg_user_key() is a macro that is dependent on the type of "old" so there's no deduplication possible without further macros.
- switch (len) {
- case 1: {
u8 old;
ret = cmpxchg_user_key((u8 *)hva, &old, *old_addr, new, access_key);
*success = !ret && old == *old_addr;
*old_addr = old;
break;
- }
- case 2: {
u16 old;
ret = cmpxchg_user_key((u16 *)hva, &old, *old_addr, new, access_key);
*success = !ret && old == *old_addr;
*old_addr = old;
break;
- }
- case 4: {
u32 old;
ret = cmpxchg_user_key((u32 *)hva, &old, *old_addr, new, access_key);
*success = !ret && old == *old_addr;
*old_addr = old;
break;
- }
- case 8: {
u64 old;
ret = cmpxchg_user_key((u64 *)hva, &old, *old_addr, new, access_key);
*success = !ret && old == *old_addr;
*old_addr = old;
break;
- }
- case 16: {
__uint128_t old;
ret = cmpxchg_user_key((__uint128_t *)hva, &old, *old_addr, new, access_key);
*success = !ret && old == *old_addr;
*old_addr = old;
break;
- }
- default:
return -EINVAL;
- }
- mark_page_dirty_in_slot(kvm, slot, gfn);
Is that needed if we failed the store?
- /*
* Assume that the fault is caused by protection, either key protection
* or user page write protection.
*/
- if (ret == -EFAULT)
ret = PGM_PROTECTION;
- return ret;
+}
- /**
- guest_translate_address_with_key - translate guest logical into guest absolute address
- @vcpu: virtual cpu
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 4b8b41be7aed..86e9734d5782 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -584,7 +584,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_VCPU_RESETS: case KVM_CAP_SET_GUEST_DEBUG: case KVM_CAP_S390_DIAG318:
- case KVM_CAP_S390_MEM_OP_EXTENSION: r = 1; break; case KVM_CAP_SET_GUEST_DEBUG2:
@@ -598,6 +597,15 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_MEM_OP: r = MEM_OP_MAX_SIZE; break;
- case KVM_CAP_S390_MEM_OP_EXTENSION:
/*
* Flag bits indicating which extensions are supported.
* If r > 0, the base extension must also be supported/indicated,
* in order to maintain backwards compatibility.
*/
r = KVM_S390_MEMOP_EXTENSION_CAP_BASE |
KVM_S390_MEMOP_EXTENSION_CAP_CMPXCHG;
case KVM_CAP_NR_VCPUS: case KVM_CAP_MAX_VCPUS: case KVM_CAP_MAX_VCPU_ID:break;
@@ -2840,6 +2848,50 @@ static int kvm_s390_vm_mem_op_abs(struct kvm *kvm, struct kvm_s390_mem_op *mop) return r; } +static int kvm_s390_vm_mem_op_cmpxchg(struct kvm *kvm, struct kvm_s390_mem_op *mop) +{
- void __user *uaddr = (void __user *)mop->buf;
- void __user *old_addr = (void __user *)mop->old_addr;
- union {
__uint128_t quad;
char raw[sizeof(__uint128_t)];
- } old = { .quad = 0}, new = { .quad = 0 };
- unsigned int off_in_quad = sizeof(new) - mop->size;
- int r, srcu_idx;
- bool success;
- r = mem_op_validate_common(mop, KVM_S390_MEMOP_F_SKEY_PROTECTION);
- if (r)
return r;
- /*
* This validates off_in_quad. Checking that size is a power
* of two is not necessary, as cmpxchg_guest_abs_with_key
* takes care of that
*/
- if (mop->size > sizeof(new))
return -EINVAL;
- if (copy_from_user(&new.raw[off_in_quad], uaddr, mop->size))
return -EFAULT;
- if (copy_from_user(&old.raw[off_in_quad], old_addr, mop->size))
return -EFAULT;
- srcu_idx = srcu_read_lock(&kvm->srcu);
- if (kvm_is_error_gpa(kvm, mop->gaddr)) {
r = PGM_ADDRESSING;
goto out_unlock;
- }
- r = cmpxchg_guest_abs_with_key(kvm, mop->gaddr, mop->size, &old.quad,
new.quad, mop->key, &success);
- if (!success && copy_to_user(old_addr, &old.raw[off_in_quad], mop->size))
r = -EFAULT;
+out_unlock:
- srcu_read_unlock(&kvm->srcu, srcu_idx);
- return r;
+}
- static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop) { /*
@@ -2858,6 +2910,8 @@ static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop) case KVM_S390_MEMOP_ABSOLUTE_READ: case KVM_S390_MEMOP_ABSOLUTE_WRITE: return kvm_s390_vm_mem_op_abs(kvm, mop);
- case KVM_S390_MEMOP_ABSOLUTE_CMPXCHG:
default: return -EINVAL; }return kvm_s390_vm_mem_op_cmpxchg(kvm, mop);