Hello Julien,
Julien Grall writes:
Hi Volodymyr,
On 18/12/2018 21:11, Volodymyr Babchuk wrote:
From: Volodymyr Babchuk vlad.babchuk@gmail.com
The main way to communicate with OP-TEE is to issue standard SMCCC call. "Standard" is a SMCCC term and it means that call can be interrupted and OP-TEE can return control to NW before completing the call.
In contrast with fast calls, where arguments and return values are passed in registers, standard calls use shared memory. Register pair a1,a2 holds 64-bit PA of command buffer, where all arguments are stored and which is used to return data. OP-TEE internally copies contents of this buffer into own secure memory before accessing and validating any data in command buffer. This is done to make sure that NW will not change contents of the validated parameters.
Mediator needs to do the same for number of reasons:
- To make sure that guest will not change data after validation.
- To translate IPAs to PAs in the command buffer (this is not done in this patch).
- To hide translated address from guest, so it will not be able to do IPA->PA translation by misusing mediator.
During standard call OP-TEE can issue multiple "RPC returns", asking NW to do some work for OP-TEE. NW then issues special call OPTEE_SMC_CALL_RETURN_FROM_RPC to resume handling of the original call. Thus, mediator needs to maintain context for original standard call during multiple SMCCC calls.
Standard call is considered complete, when returned value is not a RPC request.
Signed-off-by: Volodymyr Babchuk vlad.babchuk@gmail.com
Changes from v2:
- renamed struct domain_ctx to struct optee_domain
- fixed coding style
- Now I use access_guest_memory_by_ipa() instead of mappings to read command buffer
- Added tracking for in flight calls, so guest can't resume the same call from two CPUs simultaniously
xen/arch/arm/tee/optee.c | 319 ++++++++++++++++++++++++++++++++++- xen/include/asm-arm/domain.h | 3 + 2 files changed, 320 insertions(+), 2 deletions(-)
diff --git a/xen/arch/arm/tee/optee.c b/xen/arch/arm/tee/optee.c index 584241b03a..dc90e2ed8e 100644 --- a/xen/arch/arm/tee/optee.c +++ b/xen/arch/arm/tee/optee.c @@ -12,6 +12,8 @@ */ #include <xen/device_tree.h> +#include <xen/domain_page.h> +#include <xen/guest_access.h> #include <xen/sched.h> #include <asm/smccc.h> #include <asm/tee/tee.h> @@ -22,11 +24,38 @@ /* Client ID 0 is reserved for hypervisor itself */ #define OPTEE_CLIENT_ID(domain) (domain->domain_id + 1) +/*
- Maximal number of concurrent standard calls from one guest. This
- corresponds to OPTEE configuration option CFG_NUM_THREADS, because
- OP-TEE spawns a thread for every standard call.
Looking at OP-TEE, CFG_NUM_THREADS will vary depending on the platform. Is there any way to probe that number of threads from Xen?
Yes, this is per-platform configuration. Easiest way is to add Fast SMC to OP-TEE that will report this parameter. Jens, what do you think about adding additional call?
In any case, I think we should update the comment to reflect that this seems to be the maximum CFG_NUM_THREADS supported by any upstream platform.
Actually, OP-TEE protocol have possibility to handle limited number of threads correctly. OP-TEE can report that all threads are busy and client will wait for a free one. XEN can do the same, although this is not implemented in this patch series. But I can add this.
Basically this means that all can work correctly even with MAX_STD_CALLS==1. It just will be not so efficient.
- */
+#define MAX_STD_CALLS 16
- #define OPTEE_KNOWN_NSEC_CAPS OPTEE_SMC_NSEC_CAP_UNIPROCESSOR #define OPTEE_KNOWN_SEC_CAPS (OPTEE_SMC_SEC_CAP_HAVE_RESERVED_SHM | \ OPTEE_SMC_SEC_CAP_UNREGISTERED_SHM | \ OPTEE_SMC_SEC_CAP_DYNAMIC_SHM) +/*
- Call context. OP-TEE can issue multiple RPC returns during one call.
- We need to preserve context during them.
- */
+struct optee_std_call {
- struct list_head list;
- struct optee_msg_arg *xen_arg;
- paddr_t guest_arg_ipa;
- int optee_thread_id;
- int rpc_op;
- bool in_flight;
+};
+/* Domain context */ +struct optee_domain {
- struct list_head call_list;
- atomic_t call_count;
- spinlock_t lock;
+};
- static bool optee_probe(void) { struct dt_device_node *node;
@@ -52,6 +81,11 @@ static bool optee_probe(void) static int optee_enable(struct domain *d) { struct arm_smccc_res resp;
- struct optee_domain *ctx;
- ctx = xzalloc(struct optee_domain);
- if ( !ctx )
return -ENOMEM; /* * Inform OP-TEE about a new guest.
@@ -69,9 +103,16 @@ static int optee_enable(struct domain *d) { gprintk(XENLOG_WARNING, "Unable to create OPTEE client: rc = 0x%X\n", (uint32_t)resp.a0);
xfree(ctx); return -ENODEV; }
- INIT_LIST_HEAD(&ctx->call_list);
- atomic_set(&ctx->call_count, 0);
- spin_lock_init(&ctx->lock);
- d->arch.tee = ctx;
} @@ -111,9 +152,86 @@ static void set_return(struct cpu_user_regsreturn 0;
*regs, uint32_t ret) set_user_reg(regs, 7, 0); } +static struct optee_std_call *allocate_std_call(struct optee_domain *ctx) +{
- struct optee_std_call *call;
- int count;
- /* Make sure that guest does not execute more than MAX_STD_CALLS */
- count = atomic_add_unless(&ctx->call_count, 1, MAX_STD_CALLS);
- if ( count == MAX_STD_CALLS )
return NULL;
- call = xzalloc(struct optee_std_call);
- if ( !call )
- {
atomic_dec(&ctx->call_count);
return NULL;
- }
- call->optee_thread_id = -1;
- call->in_flight = true;
- spin_lock(&ctx->lock);
- list_add_tail(&call->list, &ctx->call_list);
- spin_unlock(&ctx->lock);
- return call;
+}
+static void free_std_call(struct optee_domain *ctx,
struct optee_std_call *call)
+{
- atomic_dec(&ctx->call_count);
- spin_lock(&ctx->lock);
- list_del(&call->list);
- spin_unlock(&ctx->lock);
- ASSERT(!call->in_flight);
- xfree(call->xen_arg);
- xfree(call);
+}
+static struct optee_std_call *get_std_call(struct optee_domain *ctx,
int thread_id)
+{
- struct optee_std_call *call;
- spin_lock(&ctx->lock);
- list_for_each_entry( call, &ctx->call_list, list )
- {
if ( call->optee_thread_id == thread_id )
{
if ( call->in_flight )
{
gprintk(XENLOG_WARNING, "Guest tries to execute call which is already in flight");
goto out;
}
call->in_flight = true;
spin_unlock(&ctx->lock);
return call;
}
- }
+out:
- spin_unlock(&ctx->lock);
- return NULL;
+}
+static void put_std_call(struct optee_domain *ctx, struct optee_std_call *call) +{
- spin_lock(&ctx->lock);
- ASSERT(call->in_flight);
- call->in_flight = false;
- spin_unlock(&ctx->lock);
+}
static void optee_domain_destroy(struct domain *d) { struct arm_smccc_res resp;
struct optee_std_call *call, *call_tmp;
struct optee_domain *ctx = d->arch.tee;
/* At this time all domain VCPUs should be stopped */ @@ -124,6 +242,199 @@ static void optee_domain_destroy(struct
domain *d) */ arm_smccc_smc(OPTEE_SMC_VM_DESTROYED, OPTEE_CLIENT_ID(d), 0, 0, 0, 0, 0, 0, &resp);
This function can be called without enable and should be idempotent. So I woudld check d->arch.tee before and...
Nothing bad will happen if it called without call to enable.
But yes, I need to check for ctx != NULL.
- ASSERT(!spin_is_locked(&ctx->lock));
- list_for_each_entry_safe( call, call_tmp, &ctx->call_list, list )
free_std_call(ctx, call);
- ASSERT(!atomic_read(&ctx->call_count));
- xfree(d->arch.tee);
use XFREE here.
Oh, looks like I missed this macro introduction. Thank you for pointing out.
+}
+/*
- Copy command buffer into xen memory to:
- Hide translated addresses from guest
- Make sure that guest wouldn't change data in command buffer during call
- */
+static bool copy_std_request(struct cpu_user_regs *regs,
struct optee_std_call *call)
+{
- paddr_t xen_addr;
- call->guest_arg_ipa = (paddr_t)get_user_reg(regs, 1) << 32 |
get_user_reg(regs, 2);
NIT: The indentation looks weird here.
- /*
* Command buffer should start at page boundary.
* This is OP-TEE ABI requirement.
*/
- if ( call->guest_arg_ipa & (OPTEE_MSG_NONCONTIG_PAGE_SIZE - 1) )
return false;
- call->xen_arg = _xmalloc(OPTEE_MSG_NONCONTIG_PAGE_SIZE,
OPTEE_MSG_NONCONTIG_PAGE_SIZE);
- if ( !call->xen_arg )
return false;
- BUILD_BUG_ON(OPTEE_MSG_NONCONTIG_PAGE_SIZE > PAGE_SIZE);
As you use _xmalloc, you should not need this. This is only necessary if you use alloc_xenheap_page.
Yes, right. This is leftover from time when I mapped guest page into XEN. I'll move it down, to place where I map that page.
I am wondering whether it is wise to allocate the memory from xenheap and not domheap. While on Arm64 (for now) xenheap and domheap are the same, on Arm32 they are different. The xenheap is at most 1GB, so pretty limited.
Honestly, I have no opinion there. What are limitations of domheap in this case?
Furthermore, using domheap would have the advantage to allow in the future accounting the allocation to the guest and add more safety (there are discussion to make domheap per domain).
- access_guest_memory_by_ipa(current->domain, call->guest_arg_ipa,
call->xen_arg, OPTEE_MSG_NONCONTIG_PAGE_SIZE,
false);
You need to check the return of access_guest_memory_by_ipa as this function can fail.
Will do.
- xen_addr = virt_to_maddr(call->xen_arg);
- set_user_reg(regs, 1, xen_addr >> 32);
- set_user_reg(regs, 2, xen_addr & 0xFFFFFFFF);
- return true;
+}
+static void copy_std_request_back(struct optee_domain *ctx,
struct cpu_user_regs *regs,
struct optee_std_call *call)
Can you add a comment on top of the function explaining what it does?
Yes, sure.
"Copy OP-TEE response back into guest's request buffer" will be sufficient?
+{
- struct optee_msg_arg *guest_arg;
- struct page_info *page;
- unsigned int i;
- uint32_t attr;
- /* copy_std_request() validated IPA for us */
Not really, the guest is free to modify the stage-2 mapping on another vCPU while this is happening. I agree that the guest will shoot himself, but we at least need to not have weird behavior happening.
In that case, I would check that the type is p2m_ram_rw as you don't want to write in read-only or foreign mapping.
How I can do this atomically? I.e. what if guest will mangle p2m right after the check?
Also, as copy_std_request() and copy_std_request_back may not be called in the same "thread" it would be useful if you specify a bit more the interaction.
I not sure what do you mean there.
- page = get_page_from_gfn(current->domain, paddr_to_pfn(call->guest_arg_ipa),
Please use gfn_x(gaddr_to_gfn(...)) to clarify this is a gfn. The gfn_x will be unnecessary soon with a cleanup that is currently under review.
So there are chances, that it will be removed when time will come to merge this patch series? :)
NULL, P2M_ALLOC);
- if ( !page )
return;
- guest_arg = map_domain_page(page_to_mfn(page));
So here you assume that PAGE_SIZE == OPTEE_MSG_NONCONTIG_PAGE_SIZE. Can you add a BUILD_BUG_ON just above (with a comment) so we don't get some nasty surprise with 64K support.
Yes, sure.
Also, you should be able to use __map_domain_page(page) here.
Oh, thank you for pointing to this macro. Not the best descriptive name, I must say.
- guest_arg->ret = call->xen_arg->ret;
- guest_arg->ret_origin = call->xen_arg->ret_origin;
- guest_arg->session = call->xen_arg->session;
NIT: newline here please.
- for ( i = 0; i < call->xen_arg->num_params; i++ )
- {
attr = call->xen_arg->params[i].attr;
switch ( attr & OPTEE_MSG_ATTR_TYPE_MASK )
{
case OPTEE_MSG_ATTR_TYPE_TMEM_OUTPUT:
case OPTEE_MSG_ATTR_TYPE_TMEM_INOUT:
guest_arg->params[i].u.tmem.size =
call->xen_arg->params[i].u.tmem.size;
continue;
case OPTEE_MSG_ATTR_TYPE_VALUE_OUTPUT:
case OPTEE_MSG_ATTR_TYPE_VALUE_INOUT:
guest_arg->params[i].u.value.a =
call->xen_arg->params[i].u.value.a;
guest_arg->params[i].u.value.b =
call->xen_arg->params[i].u.value.b;
continue;
case OPTEE_MSG_ATTR_TYPE_RMEM_OUTPUT:
case OPTEE_MSG_ATTR_TYPE_RMEM_INOUT:
guest_arg->params[i].u.rmem.size =
call->xen_arg->params[i].u.rmem.size;
continue;
case OPTEE_MSG_ATTR_TYPE_NONE:
case OPTEE_MSG_ATTR_TYPE_RMEM_INPUT:
case OPTEE_MSG_ATTR_TYPE_TMEM_INPUT:
continue;
}
- }
- unmap_domain_page(guest_arg);
- put_page(page);
+}
+static void execute_std_call(struct optee_domain *ctx,
struct cpu_user_regs *regs,
struct optee_std_call *call)
+{
- register_t optee_ret;
- forward_call(regs);
- optee_ret = get_user_reg(regs, 0);
- if ( OPTEE_SMC_RETURN_IS_RPC(optee_ret) )
- {
call->optee_thread_id = get_user_reg(regs, 3);
call->rpc_op = OPTEE_SMC_RETURN_GET_RPC_FUNC(optee_ret);
put_std_call(ctx, call);
return;
- }
- copy_std_request_back(ctx, regs, call);
- put_std_call(ctx, call);
- free_std_call(ctx, call);
+}
Most of the code in this patch is self-explaining, which is quite nice :). However, I think this function would require explaining a bit the logic. For instance in which case the call will be freed.
Thank you :) Yes, this depends on if call was completed or it was interrupted due to RPC, in which case it will be resumed later. I'll add comment.
+static bool handle_std_call(struct optee_domain *ctx,
struct cpu_user_regs *regs)
+{
- struct optee_std_call *call = allocate_std_call(ctx);
- if ( !call )
return false;
- if ( !copy_std_request(regs, call) )
goto err;
- /* Now we can safely examine contents of command buffer */
- if ( OPTEE_MSG_GET_ARG_SIZE(call->xen_arg->num_params) >
OPTEE_MSG_NONCONTIG_PAGE_SIZE )
goto err;
- switch ( call->xen_arg->cmd )
- {
- case OPTEE_MSG_CMD_OPEN_SESSION:
- case OPTEE_MSG_CMD_CLOSE_SESSION:
- case OPTEE_MSG_CMD_INVOKE_COMMAND:
- case OPTEE_MSG_CMD_CANCEL:
- case OPTEE_MSG_CMD_REGISTER_SHM:
- case OPTEE_MSG_CMD_UNREGISTER_SHM:
break;
- default:
goto err;
- }
- execute_std_call(ctx, regs, call);
- return true;
This function is a bit odd to read. I think it would be more clear if you move this code before the break.
Yes, you are right.
+err:
- put_std_call(ctx, call);
- free_std_call(ctx, call);
- return false;
+}
+static bool handle_rpc(struct optee_domain *ctx, struct cpu_user_regs *regs) +{
- struct optee_std_call *call;
- int optee_thread_id = get_user_reg(regs, 3);
- call = get_std_call(ctx, optee_thread_id);
- if ( !call )
return false;
- switch ( call->rpc_op )
- {
- case OPTEE_SMC_RPC_FUNC_ALLOC:
/* TODO: Add handling */
break;
- case OPTEE_SMC_RPC_FUNC_FREE:
/* TODO: Add handling */
break;
- case OPTEE_SMC_RPC_FUNC_FOREIGN_INTR:
break;
- case OPTEE_SMC_RPC_FUNC_CMD:
/* TODO: Add handling */
break;
- }
- execute_std_call(ctx, regs, call);
- return true; } static bool handle_exchange_capabilities(struct cpu_user_regs
*regs) @@ -161,6 +472,8 @@ static bool handle_exchange_capabilities(struct cpu_user_regs *regs) static bool optee_handle_call(struct cpu_user_regs *regs) {
- struct optee_domain *ctx = current->domain->arch.tee;
switch ( get_user_reg(regs, 0) ) { case OPTEE_SMC_CALLS_COUNT:
@@ -170,8 +483,6 @@ static bool optee_handle_call(struct cpu_user_regs *regs) case OPTEE_SMC_FUNCID_GET_OS_REVISION: case OPTEE_SMC_ENABLE_SHM_CACHE: case OPTEE_SMC_DISABLE_SHM_CACHE:
- case OPTEE_SMC_CALL_WITH_ARG:
- case OPTEE_SMC_CALL_RETURN_FROM_RPC: forward_call(regs); return true; case OPTEE_SMC_GET_SHM_CONFIG:
@@ -180,6 +491,10 @@ static bool optee_handle_call(struct cpu_user_regs *regs) return true; case OPTEE_SMC_EXCHANGE_CAPABILITIES: return handle_exchange_capabilities(regs);
- case OPTEE_SMC_CALL_WITH_ARG:
return handle_std_call(ctx, regs);
- case OPTEE_SMC_CALL_RETURN_FROM_RPC:
return handle_rpc(ctx, regs); default: return false; }
diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h index 175de44927..88b48697bd 100644 --- a/xen/include/asm-arm/domain.h +++ b/xen/include/asm-arm/domain.h @@ -97,6 +97,9 @@ struct arch_domain struct vpl011 vpl011; #endif +#ifdef CONFIG_TEE
- void *tee;
+#endif
Did you look whether there are any hole in arch_domain that could be re-used?
I thought about that. But what are chances to find 64bit-wide, 64bit-aligned hole in a structure? If I remember C standard correctly, there are no reasons for compiler to leave such holes.
I'm talking about aarch64 there, but I assume, that this is true for aarch32/armv7.
} __cacheline_aligned; struct arch_vcpu
Cheers,
-- Best regards,Volodymyr Babchuk