On Thu, 4 Feb 2021 18:05:15 +0100 Janosch Frank frankja@linux.ibm.com wrote:
On 2/4/21 5:34 PM, Janosch Frank wrote:
On 2/2/21 7:00 PM, Claudio Imbrenda wrote:
Extend kvm_s390_shadow_fault to return the pointer to the valid leaf DAT table entry, or to the invalid entry.
Also return some flags in the lower bits of the address: DAT_PROT: indicates that DAT protection applies because of the protection bit in the segment (or, if EDAT, region) tables NOT_PTE: indicates that the address of the DAT table entry returned does not refer to a PTE, but to a segment or region table.
Signed-off-by: Claudio Imbrenda imbrenda@linux.ibm.com Cc: stable@vger.kernel.org
arch/s390/kvm/gaccess.c | 26 ++++++++++++++++++++++---- arch/s390/kvm/gaccess.h | 5 ++++- arch/s390/kvm/vsie.c | 8 ++++---- 3 files changed, 30 insertions(+), 9 deletions(-)
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 6d6b57059493..2d7bcbfb185e 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -1034,6 +1034,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, rfte.val = ptr; goto shadow_r2t; }
*pgt = ptr + vaddr.rfx * 8;
So pgt either is a table entry if rc > 0 or a pointer to the first pte on rc == 0 after this change?
Hrm, if it is really based on RCs than I might be able to come to terms with having two things in a ptr with the name pgt. But it needs a comment change.
rc = gmap_read_table(parent, ptr + vaddr.rfx * 8,
&rfte.val); if (rc) return rc; @@ -1060,6 +1061,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, rste.val = ptr; goto shadow_r3t; }
rc = gmap_read_table(parent, ptr + vaddr.rsx * 8,*pgt = ptr + vaddr.rsx * 8;
&rste.val); if (rc) return rc; @@ -1087,6 +1089,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, rtte.val = ptr; goto shadow_sgt; }
rc = gmap_read_table(parent, ptr + vaddr.rtx * 8,*pgt = ptr + vaddr.rtx * 8;
&rtte.val); if (rc) return rc; @@ -1123,6 +1126,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, ste.val = ptr; goto shadow_pgt; }
rc = gmap_read_table(parent, ptr + vaddr.sx * 8,*pgt = ptr + vaddr.sx * 8;
&ste.val); if (rc) return rc; @@ -1157,6 +1161,8 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
- @vcpu: virtual cpu
- @sg: pointer to the shadow guest address space structure
- @saddr: faulting address in the shadow gmap
- @pteptr: will contain the address of the faulting DAT table
entry, or of
the valid leaf, plus some flags
pteptr is not the right name if it can be two things
You use it for pei only, right?
yes
- Returns: - 0 if the shadow fault was successfully resolved
- > 0 (pgm exception code) on exceptions while
faulting @@ -1165,11 +1171,11 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
- -ENOMEM if out of memory
*/ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
unsigned long saddr)
unsigned long saddr, unsigned long
*pteptr) { union vaddress vaddr; union page_table_entry pte;
- unsigned long pgt;
- unsigned long pgt = 0; int dat_protection, fake; int rc;
@@ -1191,8 +1197,20 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg, pte.val = pgt + vaddr.px * PAGE_SIZE; goto shadow_page; }
- if (!rc)
rc = gmap_read_table(sg->parent, pgt + vaddr.px *
8, &pte.val); +
- switch (rc) {
- case PGM_SEGMENT_TRANSLATION:
- case PGM_REGION_THIRD_TRANS:
- case PGM_REGION_SECOND_TRANS:
- case PGM_REGION_FIRST_TRANS:
pgt |= NOT_PTE;
GACC_TRANSL_ENTRY_INV ?
break;
- case 0:
pgt += vaddr.px * 8;
rc = gmap_read_table(sg->parent, pgt, &pte.val);
- }
- if (*pteptr)
if (!rc && pte.i) rc = PGM_PAGE_TRANSLATION; if (!rc && pte.z)*pteptr = pgt | dat_protection * DAT_PROT;
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h index f4c51756c462..66a6e2cec97a 100644 --- a/arch/s390/kvm/gaccess.h +++ b/arch/s390/kvm/gaccess.h @@ -359,7 +359,10 @@ void ipte_unlock(struct kvm_vcpu *vcpu); int ipte_lock_held(struct kvm_vcpu *vcpu); int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra); +#define DAT_PROT 2
GACC_TRANSL_ENTRY_PROT
Ok after a second pass that's not what's going on here. Those basically directly correspond to the MVPG PEI indication bits, right?
yes :)
Do we also need to consider bit 63?
no, that can only happen if a specific SIE feature is used, which KVM neither uses nor supports for VSIE, so it cannot happen
+#define NOT_PTE 4
int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *shadow,
unsigned long saddr);
unsigned long saddr, unsigned long
*pteptr); #endif /* __KVM_S390_GACCESS_H */ diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index c5d0a58b2c29..7db022141db3 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -619,10 +619,10 @@ static int map_prefix(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) /* with mso/msl, the prefix lies at offset *mso* */ prefix += scb_s->mso;
- rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix);
- rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix,
NULL); if (!rc && (scb_s->ecb & ECB_TE)) rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
prefix + PAGE_SIZE);
prefix + PAGE_SIZE,
NULL); /* * We don't have to mprotect, we will be called for all unshadows. * SIE will detect if protection applies and trigger a validity. @@ -913,7 +913,7 @@ static int handle_fault(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) current->thread.gmap_addr, 1); rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
current->thread.gmap_addr);
current->thread.gmap_addr,
NULL); if (rc > 0) { rc = inject_fault(vcpu, rc, current->thread.gmap_addr, @@ -935,7 +935,7 @@ static void handle_last_fault(struct kvm_vcpu *vcpu, { if (vsie_page->fault_addr) kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
vsie_page->fault_addr);
vsie_page->fault_addr,
NULL);
Ok
vsie_page->fault_addr = 0; }