On 20/11/2024 5:31 pm, Oliver Upton wrote:
> On Tue, Nov 12, 2024 at 10:37:10AM +0000, James Clark wrote:
>> +void kvm_set_trfcr(u64 host_trfcr, u64 guest_trfcr)
>> +{
>> + if (kvm_arm_skip_trace_state())
>> + return;
>> +
>> + if (has_vhe())
>> + write_sysreg_s(guest_trfcr, SYS_TRFCR_EL12);
>> + else
>> + if (host_trfcr != guest_trfcr) {
>> + *host_data_ptr(host_debug_state.trfcr_el1) = guest_trfcr;
>
> Huh? That's going into host_debug_state, which is the dumping grounds
> for *host* context when entering a guest.
>
> Not sure why we'd stick a *guest* value in there...
>
Only to save a 3rd storage place for trfcr when just the register and
one place is technically enough. But yes if it's more readable to have
guest_trfcr_el1 separately then that makes sense.
>> + host_data_set_flag(HOST_STATE_SWAP_TRFCR);
>> + } else
>> + host_data_clear_flag(HOST_STATE_SWAP_TRFCR);
>> +}
>> +EXPORT_SYMBOL_GPL(kvm_set_trfcr);
>
> I have a rather strong distaste for this interface, both with the
> coresight driver and internally with the hypervisor. It'd be better if
> the driver actually told KVM what the *intent* is rather than throwing a
> pile of bits over the fence and forcing KVM to interpret what that
> configuration means.
>
That works, it would be nice to have it consistent and have it that way
for filtering, like kvm_set_guest_trace_filters(bool kernel, bool user).
But I suppose we can justify not doing it there because we're not really
interpreting the TRFCR value just writing it whole.
>> +static void __debug_swap_trace(void)
>> +{
>> + u64 trfcr = read_sysreg_el1(SYS_TRFCR);
>> +
>> + write_sysreg_el1(*host_data_ptr(host_debug_state.trfcr_el1), SYS_TRFCR);
>> + *host_data_ptr(host_debug_state.trfcr_el1) = trfcr;
>> + host_data_set_flag(HOST_STATE_RESTORE_TRFCR);
>> +}
>> +
>
> What if trace is disabled in the guest or in the host? Do we need to
> synchronize when transitioning from an enabled -> disabled state like we
> do today?
>
By synchronize do you mean the tsb_csync()? I can only see it being
necessary for the TRBE case because then writing to the buffer is fatal.
Without TRBE the trace sinks still work and the boundary of when exactly
tracing is disabled in the kernel isn't critical.
> I took a stab at this, completely untested of course && punts on
> protected mode. But this is _generally_ how I'd like to see everything
> fit together.
>
Would you expect to see the protected mode stuff ignored if I sent
another version more like yours below? Or was that just skipped to keep
the example shorter?
I think I'm a bit uncertain on that one because removing HAS_TRBE means
you can't check if TRBE is enabled or not in protected mode and it will
go wrong if it is.
But other than that I think I get the general point of what you mean:
* Add an explicit guest_trfcr variable rather than cheating and using
the host one
* kvm_enable_trbe() rather than interpreting the TRBLIMITR value
* Some code reuse by calling __trace_do_switch() with flipped
arguments on both entry and exit
And see below but I think it requires one minor change to support
filtering without TRBE
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index 8bc0ec151684..b4714cece5f0 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -611,7 +611,7 @@ struct cpu_sve_state {
> */
> struct kvm_host_data {
> #define KVM_HOST_DATA_FLAG_HAS_SPE 0
> -#define KVM_HOST_DATA_FLAG_HAS_TRBE 1
> +#define KVM_HOST_DATA_FLAG_HOST_TRBE_ENABLED 1
> #define KVM_HOST_DATA_FLAG_HOST_SVE_ENABLED 2
> #define KVM_HOST_DATA_FLAG_HOST_SME_ENABLED 3
> unsigned long flags;
> @@ -659,6 +659,9 @@ struct kvm_host_data {
> u64 mdcr_el2;
> } host_debug_state;
>
> + /* Guest trace filter value */
> + u64 guest_trfcr_el1;
> +
> /* Number of programmable event counters (PMCR_EL0.N) for this CPU */
> unsigned int nr_event_counters;
>
> @@ -1381,6 +1384,8 @@ static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr)
> void kvm_set_pmu_events(u64 set, struct perf_event_attr *attr);
> void kvm_clr_pmu_events(u64 clr);
> bool kvm_set_pmuserenr(u64 val);
> +void kvm_enable_trbe(u64 guest_trfcr);
> +void kvm_disable_trbe(void);
> #else
> static inline void kvm_set_pmu_events(u64 set, struct perf_event_attr *attr) {}
> static inline void kvm_clr_pmu_events(u64 clr) {}
> @@ -1388,6 +1393,8 @@ static inline bool kvm_set_pmuserenr(u64 val)
> {
> return false;
> }
> +void kvm_enable_trbe(u64 guest_trfcr) {}
> +void kvm_disable_trbe(void) {}
> #endif
>
> void kvm_vcpu_load_vhe(struct kvm_vcpu *vcpu);
> diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
> index 46dbeabd6833..6ef8d8f4b452 100644
> --- a/arch/arm64/kvm/debug.c
> +++ b/arch/arm64/kvm/debug.c
> @@ -72,10 +72,6 @@ void kvm_init_host_debug_data(void)
> if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_PMSVer_SHIFT) &&
> !(read_sysreg_s(SYS_PMBIDR_EL1) & PMBIDR_EL1_P))
> host_data_set_flag(HAS_SPE);
> -
> - if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_TraceBuffer_SHIFT) &&
> - !(read_sysreg_s(SYS_TRBIDR_EL1) & TRBIDR_EL1_P))
> - host_data_set_flag(HAS_TRBE);
> }
>
> /*
> @@ -215,3 +211,27 @@ void kvm_debug_handle_oslar(struct kvm_vcpu *vcpu, u64 val)
> kvm_arch_vcpu_load(vcpu, smp_processor_id());
> preempt_enable();
> }
> +
> +void kvm_enable_trbe(u64 guest_trfcr)
> +{
> + if (WARN_ON_ONCE(preemptible()))
> + return;
> +
> + if (has_vhe()) {
> + write_sysreg_s(guest_trfcr, SYS_TRFCR_EL12);
> + return;
> + }
> +
> + *host_data_ptr(guest_trfcr_el1) = guest_trfcr;
> + host_data_set_flag(HOST_TRBE_ENABLED);
FWIW TRBE and TRF are separate features, so this wouldn't do the
filtering correctly if TRBE wasn't in use, but I can split it out into
separate kvm_enable_trbe(void) and kvm_set_guest_filters(u64 guest_trfcr).
> +}
> +EXPORT_SYMBOL_GPL(kvm_enable_trbe);
> +
> +void kvm_disable_trbe(void)
> +{
> + if (has_vhe() || WARN_ON_ONCE(preemptible()))
> + return;
> +
> + host_data_clear_flag(HOST_TRBE_ENABLED);
> +}
> +EXPORT_SYMBOL_GPL(kvm_disable_trbe);
> diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
> index 858bb38e273f..d36cbce75bee 100644
> --- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c
> +++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
> @@ -51,32 +51,33 @@ static void __debug_restore_spe(u64 pmscr_el1)
> write_sysreg_el1(pmscr_el1, SYS_PMSCR);
> }
>
> -static void __debug_save_trace(u64 *trfcr_el1)
> +static void __trace_do_switch(u64 *saved_trfcr, u64 new_trfcr)
> {
> - *trfcr_el1 = 0;
> + *saved_trfcr = read_sysreg_el1(SYS_TRFCR);
> + write_sysreg_el1(new_trfcr, SYS_TRFCR);
>
> - /* Check if the TRBE is enabled */
> - if (!(read_sysreg_s(SYS_TRBLIMITR_EL1) & TRBLIMITR_EL1_E))
> + /* Nothing left to do if going to an enabled state */
> + if (new_trfcr)
> return;
> +
> /*
> - * Prohibit trace generation while we are in guest.
> - * Since access to TRFCR_EL1 is trapped, the guest can't
> - * modify the filtering set by the host.
> + * Switching to a context with trace generation disabled. Drain the
> + * trace buffer to memory.
> */
> - *trfcr_el1 = read_sysreg_el1(SYS_TRFCR);
> - write_sysreg_el1(0, SYS_TRFCR);
> isb();
> - /* Drain the trace buffer to memory */
> tsb_csync();
> }
>
> -static void __debug_restore_trace(u64 trfcr_el1)
> +static void __trace_switch_to_guest(void)
> {
> - if (!trfcr_el1)
> - return;
> + __trace_do_switch(host_data_ptr(host_debug_state.trfcr_el1),
> + *host_data_ptr(guest_trfcr_el1));
> +}
>
> - /* Restore trace filter controls */
> - write_sysreg_el1(trfcr_el1, SYS_TRFCR);
> +static void __trace_switch_to_host(void)
> +{
> + __trace_do_switch(host_data_ptr(guest_trfcr_el1),
> + *host_data_ptr(host_debug_state.trfcr_el1));
> }
>
> void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu)
> @@ -84,9 +85,13 @@ void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu)
> /* Disable and flush SPE data generation */
> if (host_data_test_flag(HAS_SPE))
> __debug_save_spe(host_data_ptr(host_debug_state.pmscr_el1));
> - /* Disable and flush Self-Hosted Trace generation */
> - if (host_data_test_flag(HAS_TRBE))
> - __debug_save_trace(host_data_ptr(host_debug_state.trfcr_el1));
> +
> + /*
> + * Switch the trace filter, potentially disabling and flushing trace
> + * data generation
> + */
> + if (host_data_test_flag(HOST_TRBE_ENABLED))
> + __trace_switch_to_guest();
> }
>
> void __debug_switch_to_guest(struct kvm_vcpu *vcpu)
> @@ -98,8 +103,8 @@ void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu)
> {
> if (host_data_test_flag(HAS_SPE))
> __debug_restore_spe(*host_data_ptr(host_debug_state.pmscr_el1));
> - if (host_data_test_flag(HAS_TRBE))
> - __debug_restore_trace(*host_data_ptr(host_debug_state.trfcr_el1));
> + if (host_data_test_flag(HOST_TRBE_ENABLED))
> + __trace_switch_to_host();
> }
>
> void __debug_switch_to_host(struct kvm_vcpu *vcpu)
>
On 20/11/2024 9:16 am, Oliver Upton wrote:
> Hi James,
>
> On Tue, Nov 12, 2024 at 10:37:06AM +0000, James Clark wrote:
>> Currently in nVHE, KVM has to check if SPE is enabled on every guest
>> switch even if it was never used. Because it's a debug feature and is
>> more likely to not be used than used, give KVM the SPE buffer status to
>> allow a much simpler and faster do-nothing path in the hyp.
>>
>> This is always called with preemption disabled except for probe/hotplug
>> which gets wrapped with preempt_disable().
>
> Unless the performance penalty of checking if SPE is measurably bad, I'd
> rather we keep things as-is.
>
> Folks that want to go fast are probably using VHE to begin with. As you
> note below, we need the hypervisor to decide if SPE is enabled based on
> hardware in protected mode anyway. Using a common flow for protected and
> non-protected configs keeps complexity down and increases the likelihood
> SPE save/restore code actually gets tested.
>
I'm not sure if there is any measurable difference. This change was
actually in response to this review from Marc here [1]:
> Why do we need to save anything if nothing was enabled, which is
> *all the time*? I'm sorry to break it to you, but nobody uses these
> features. So I'd like them to have zero cost when not in use.
> Surely there is something there that should say "yup, tracing" or
> not (such as the enable bits), which would avoid hitting the sysreg
> pointlessly?
I suppose I could have taken the "zero cost" bit a bit too literally and
maybe there were some simpler optimizations that didn't involve strongly
coupling the driver to KVM. At least for enable/disable, for filtering
it would still be required.
I'm trying to think if there is some middle ground where there is a
systemwide flag or static key that gets set on the very first SPE or
trace session. In theory it could be simpler than this per-cpu enable
disable stuff, but in the end it pretty much ends up needing the same
info from the driver (and has the same protected mode issue). So you
might as well do it as fine grained as this or not at all like you suggest.
[1]: https://lore.kernel.org/linux-arm-kernel/86bk832jza.wl-maz@kernel.org/
Hi
On 19/11/2024 12:40, Yicong Yang wrote:
> On 2024/11/15 1:20, Suzuki K Poulose wrote:
>> Hi,
>>
>> Thanks for the report, see my comments inline.
>>
>> On 14/11/2024 15:26, James Clark wrote:
>>>
>>>
>>> On 14/11/2024 2:51 pm, Yicong Yang wrote:
>>>> On 2024/11/14 18:30, James Clark wrote:
>>>>>
>>>>>
>>>>> On 14/11/2024 8:16 am, Yicong Yang wrote:
>>>>>> From: Yicong Yang <yangyicong(a)hisilicon.com>
>>>>>>
>>>>>> Enable the trace in below steps will crash the kernel by NULL pointer
>>>>>> dereferencing:
>>>>>> echo 1 > /sys/bus/coresight/devices/tmc_etr0/enable_sink
>>>>>> echo 1 > /sys/bus/coresight/devices/etm0/enable_source
>>>>>> echo 0x400000 > /sys/bus/coresight/devices/tmc_etr0/buffer_size
>>>>>> echo 1 > /sys/bus/coresight/devices/etm2/enable_source
>>>>>> dd if=/dev/tmc_etr0 of=test_etm_sysfs_etr_030.data
>>>>>>
>>>>>> The call trace will be like:
>>>>>> WARNING: CPU: 39 PID: 8586 at drivers/hwtracing/coresight/ coresight-tmc-etr.c:1123 __tmc_etr_disable_hw+0x108/0x140 [coresight_tmc]
>>>>>> [...]
>>>>>> Call trace:
>>>>>> __tmc_etr_disable_hw+0x108/0x140 [coresight_tmc]
>>>>>> tmc_read_prepare_etr+0xc0/0xd0 [coresight_tmc]
>>>>>> tmc_open+0x60/0xa0 [coresight_tmc]
>>>>>> misc_open+0x11c/0x170
>>>>>> chrdev_open+0xcc/0x2b0
>>>>>> do_dentry_open+0x140/0x4e0
>>>>>> vfs_open+0x34/0xf8
>>>>>> path_openat+0x2b0/0xf58
>>>>>> do_filp_open+0x8c/0x148
>>>>>> do_sys_openat2+0xb8/0xe8
>>>>>> __arm64_sys_openat+0x70/0xc0
>>>>>> el0_svc_common.constprop.0+0x64/0x148
>>>>>> do_el0_svc+0x24/0x38
>>>>>> el0_svc+0x40/0x140
>>>>>> el0t_64_sync_handler+0xc0/0xc8
>>>>>> el0t_64_sync+0x1a4/0x1a8
>>>>>> ---[ end trace 0000000000000000 ]---
>>>>>> Unable to handle kernel NULL pointer dereference at virtual address 0000000000000028
>>>>>> [...]
>>>>>> Call trace:
>>>>>> tmc_etr_get_sysfs_trace+0x10/0x80 [coresight_tmc]
>>>>>> vfs_read+0xcc/0x310
>>>>>> ksys_read+0x74/0x108
>>>>>> __arm64_sys_read+0x24/0x38
>>>>>> el0_svc_common.constprop.0+0x64/0x148
>>>>>> do_el0_svc+0x24/0x38
>>>>>> el0_svc+0x40/0x140
>>>>>>
>>>>>> Due to the buffer size changed, the buffer will be reallocated in
>>>>>> tmc_etr_get_sysfs_buffer() when the second source enabled. At trace
>>>>>> end tmc_etr_sync_sysfs_buf() will reset the drvdata->sysfs_buf and
>>>>>> trigger the later NULL pointer dereference when reading out the
>>>>>> data.
>>>>>>
>>>>>> But it doesn't make sense to change the buffer size when it's
>>>>>> already in use. So block such behavior.
>>>>>>
>>>>>> Signed-off-by: Yicong Yang <yangyicong(a)hisilicon.com>
>>>>>> ---
>>>>>> drivers/hwtracing/coresight/coresight-tmc-core.c | 5 +++++
>>>>>> 1 file changed, 5 insertions(+)
>>>>>>
>>>>>> diff --git a/drivers/hwtracing/coresight/coresight-tmc-core.c b/ drivers/hwtracing/coresight/coresight-tmc-core.c
>>>>>> index 475fa4bb6813..9660af63e9bc 100644
>>>>>> --- a/drivers/hwtracing/coresight/coresight-tmc-core.c
>>>>>> +++ b/drivers/hwtracing/coresight/coresight-tmc-core.c
>>>>>> @@ -319,6 +319,11 @@ static ssize_t buffer_size_store(struct device *dev,
>>>>>> if (drvdata->config_type != TMC_CONFIG_TYPE_ETR)
>>>>>> return -EPERM;
>>>>>> + /* Don't change the buffer size if it's in use */
>>>>>> + guard(spinlock)(&drvdata->spinlock);
>>>>>> + if (coresight_get_mode(drvdata->csdev) != CS_MODE_DISABLED)
>>
>> Could we do something like this below ?
>>
>> diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c
>> index a48bb85d0e7f..863a645fa88a 100644
>> --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c
>> +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c
>> @@ -1178,7 +1178,9 @@ static struct etr_buf *tmc_etr_get_sysfs_buffer(struct coresight_device *csdev)
>> */
>> spin_lock_irqsave(&drvdata->spinlock, flags);
>> sysfs_buf = READ_ONCE(drvdata->sysfs_buf);
>> - if (!sysfs_buf || (sysfs_buf->size != drvdata->size)) {
>> + if (!sysfs_buf ||
>> + ((sysfs_buf->size != drvdata->size) &&
>> + coresight_get_mode(csdev) != CS_MODE_SYSFS))
>> spin_unlock_irqrestore(&drvdata->spinlock, flags);
>>
>> /* Allocate memory with the locks released */
>>
>> i.e., do not allocate a new buffer if the sysfs mode is active. The new
>> size can be set when the new session starts
>>
>
> tested with steps in the commit and perf (below) simultaneously and don't see the
> problem mentioned.
> perf record -e cs_etm// -C 0 -- sleep 1 2>&1 > /dev/null
Thanks for the testing !
>
> It's a bit confusing with this fix since we actually discard/delay the user's request
> of changing the buffer size but no error/information returned to user. If this is not
> a problem the fix is fine for me.
We do not discard the users request. Also, for all practical purposes
there is no "delay" in the new buffer size usage, since the existing
session (of the "sink") cannot change the buffer size while it is
active. It will only be effective when a new session starts, which is
the case with this patch.
Suzuki
>
> Thanks.
>
>>
>>>>>
>>>>> Size isn't used in perf mode is it? So it can be -EBUSY only when mode == CS_MODE_SYSFS.
>>>>>
>>>>
>>>> alloc_etr_buf() on the perf path will read drvdata->size, not sure it matters if user
>>>> change it through sysfs in the meanwhile. Will test and have a check if there are any
>>>> other places using size on the perf path.
>>
>> That was there to make sure the user can allocate a bigger buffer (of
>> the AUX size vs sysfs configured size) and possibly collect more trace
>> (i.e., in multiple aux buffers). But looks like that is not useful,
>> given we can only ever collect to one AUX (the last one turning ETR off).
>>
>> So we could remove that check.
>>
>> Suzuki
>>
>>
>>>>
>>>
>>> Hmmm I assumed that Perf mode completely ignored anything from sysfs mode. I see that alloc_etr_buf() does sometimes use the sysfs value. I don't really see why that's necessary because that means it sometimes ignores the buffer size from the perf command line depending on what's in sysfs, but the modes should be mutually exclusive.
>>>
>>> Unless we fix that then I think you do need to use the device spinlock. But I think we should tidy up alloc_etr_buf() to only try to allocate from the Perf size down to TMC_ETR_PERF_MIN_BUF_SIZE, ignoring drvdata- >size. Then the behavior is less surprising to users and also anyone reading the code. And rename it to alloc_etr_buf_perf().
>>>
>>> Unless Suzuki knows of a reason it was done that way to begin with? I checked the commit message but it just says that it was like that but not why.
>>>
>>>>>> + return -EBUSY;
>>>>>> +
>>>>>> ret = kstrtoul(buf, 0, &val);
>>>>>> if (ret)
>>>>>> return ret;
>>>>>
>>>>> Looks ok to me. Although for consistency it might be worth changing to guard(mutex)(&coresight_mutex) because this is about sysfs mode only and other usages of mode and comments point to coresight_mutex. Using the device's spinlock will technically work but it did make me go and double check the code. And there are other cases of reading the mode like this:
>>>>>
>>>>
>>>> ok, I thought to also serialize the use of drvdata->size. But as you mentioned
>>>> use coresight_mutex is enough and will be consistenct with other places.
>>>>
>>>>> static ssize_t enable_source_show(struct device *dev,
>>>>> struct device_attribute *attr,
>>>>> char *buf)
>>>>> {
>>>>> struct coresight_device *csdev = to_coresight_device(dev);
>>>>>
>>>>> guard(mutex)(&coresight_mutex);
>>>>> return scnprintf(buf, PAGE_SIZE, "%u\n",
>>>>> coresight_get_mode(csdev) == CS_MODE_SYSFS);
>>>>> }
>>>>>
>>>>> Mode can change to CS_MODE_PERF while inside coresight_mutex but the device would end up not being enabled for sysfs, so it's still ok to update the sysfs size value in that case.
>>>>>
>>>>> With that change:
>>>>>
>>>>> Reviewed-by: James Clark <james.clark(a)linaro.org>
>>>>
>>>> Thanks.
>>>>
>>>
>>
>>
>> .
Some HW has static trace id which cannot be changed via
software programming. For this case, configure the trace id
in device tree with "arm,static-trace-id = <xxx>", and
call coresight_trace_id_get_static_system_id with the trace id value
in device probe function. The id will be reserved for the HW
all the time if the device is probed.
Changes since V5:
1. Remove the warn for staic id not available.
2. Drop the system_id if registering the coresight device fails.
3. Return busy when static id is not available in dummy driver.
Changes since V4:
1. Use fwnode_property_read_u32 in fwnode_property_read_u32.
2. Update date and version in sysfs-bus-coresight-devices-dummy-source
Changes since V3:
1. Adda new API function
int coresight_trace_id_get_system_static_id(int trace_id).
2. Use the term "static trace id" for these devices where
the hardware sets a non-programmable trace ID.
Changes since V2:
1. Change "trace-id" to "arm,trace-id".
2. Add trace id flag for getting preferred id or ODD id.
Changes since V1:
1. Add argument to coresight_trace_id_get_system_id for preferred id
instead of adding new function coresight_trace_id_reserve_system_id.
2. Add constraint to trace-id in dt-binding file.
Mao Jinlong (3):
dt-bindings: arm: Add arm,static-trace-id for coresight dummy source
coresight: Add support to get static id for system trace sources
coresight: dummy: Add static trace id support for dummy source
.../sysfs-bus-coresight-devices-dummy-source | 15 ++++
.../arm/arm,coresight-dummy-source.yaml | 6 ++
drivers/hwtracing/coresight/coresight-dummy.c | 81 ++++++++++++++++---
.../hwtracing/coresight/coresight-platform.c | 6 ++
.../hwtracing/coresight/coresight-trace-id.c | 39 ++++++---
.../hwtracing/coresight/coresight-trace-id.h | 9 +++
include/linux/coresight.h | 1 +
7 files changed, 137 insertions(+), 20 deletions(-)
create mode 100644 Documentation/ABI/testing/sysfs-bus-coresight-devices-dummy-source
--
2.17.1
In our hardware design, by combining a funnel and a replicator, it
implement a hardware device with one-to-one correspondence between
output ports and input ports. The programming usage on this device
is the same as funnel. The software uses a funnel and a static
replicator to implement the driver of this device. Since original
funnels only support a single output connection and original
replicator only support a single input connection, the code needs
to be modified to support this new feature. The following is a
typical topology diagram of multi-port output mechanism.
|----------| |---------| |----------| |---------|
| TPDM 0 | | Source0 | | Source 1 | | TPDM 1 |
|----------| |---------| |----------| |---------|
| | | |
| | | |
| --------- | | |
| | | |
| | | |
| | | |
\-------------/ ---------------------- |
\ Funnel 0 / | |
----------- | ------------------------------
| | |
| | |
\------------------/
\ Funnel 1 / ----|
\--------------/ |
| |----> Combine a funnel and a
| | static replicator
/-----------------\ |
/ replicator 0 \ ----|
/---------------------\
| | |
| | |-----------|
| |---------| |
| |TPDM0 |TPDM1
| \-----------------/
| \ TPDA 0 /
| \-------------/
| |
| |
|Source0/1 |
\-------------------------------/
\ Funnel 2 /
\---------------------------/
Changes in V5:
1. Replace "filter-src" with "filter-source" in the
dt-binding document.
-- Suzuki K Poulose
2. Optimize the comments of the patch "coresight:
Add support for trace filtering by source" due to bad
example.
-- Suzuki K Poulose
3. Correct spelling errors in the patch "coresight:
Add support for trace filtering by source".
-- Suzuki K Poulose
4. Optimize the function "coresight_blocks_source".
-- Suzuki K Poulose
5. Add { } in the function "of_coresight_parse_endpoint".
-- Suzuki K Poulose
6. Adjust the order of the patches.
-- Suzuki K Poulose
7. Adjust the alignment in "coresight-platform.c".
-- Suzuki K Poulose
Changes in V4:
1. Use "coresight_get_source(path)" in the function
"coresight_disable_path_from" instead of explicitly
passing the source.
-- Suzuki K Poulose
2. Optimize the order of the input parameters for
"_coresight_build_path".
-- Suzuki K Poulose
3. Reuse the method "coresight_block_source" in
"_coresight_build_path".
-- Suzuki K Poulose
4. Remove the unnecessary () in "coresight_build_path".
-- Suzuki K Poulose
5. Add a helper to check if a device is SOURCE.
-- Suzuki K Poulose
6. Adjust the posistion of setting "still_orphan" in
"coresight_build_path".
-- Suzuki K Poulose
Changes in V3:
1. Rename the function "coresight_source_filter" to
"coresight_block_source". And refine this function.
-- Suzuki K Poulose
2. Rename the parameters of the function
"coresight_find_out_connection" to avoid confusion.
-- Suzuki K Poulose
3. Get the source of path in "coresight_enable_path" and
"coresight_disable_path".
-- Suzuki K Poulose
4. Fix filter source device before skip the port in
"coresight_orphan_match".
-- Suzuki K Poulose
5. Make sure the device still orphan if whter is a filter
source firmware node but the filter source device is null.
-- Suzuki K Poulose
6. Walk through the entire coresight bus and fixup the
"filter_src_dev" if the source is being removed.
-- Suzuki K Poulose
7. Refine the commit description of patch#2.
-- Suzuki K Poulose
8. Fix the warning reported by kernel test robot.
-- kernel test robot.
9. Use the source device directly if the port has a
hardcoded filter in "tpda_get_element_size".
-- Suzuki K Poulose
Changes in V2:
1. Change the reference for endpoint property in dt-binding.
-- Krzysztof Kozlowski
2. Change the property name "filter_src" to "filter-src".
-- Krzysztof Kozlowski
3. Fix the errors in running 'make dt_binding_check'.
-- Rob Herring
4. Pass in the source parameter instead of path.
-- Suzuki K Poulose
5. Reset the "filter_src_dev" if the "src" csdev is being removed.
-- Suzuki K Poulose
6. Add a warning if the "filter_src_dev" is of not the
type DEV_TYPE_SOURCE.
-- Suzuki K Poulose
7. Optimize the procedure for handling all possible cases.
-- Suzuki K Poulose
Changes in V1:
1. Add a static replicator connect to a funnel to implement the
correspondence between the output ports and the input ports on
funnels.
-- Suzuki K Poulose
2. Add filter_src_dev and filter_src_dev phandle to
"coresight_connection" struct, and populate them if there is one.
-- Suzuki K Poulose
3. To look at the phandle and then fixup/remove the filter_src
device in fixup/remove connections.
-- Suzuki K Poulose
4. When TPDA reads DSB/CMB element size, it is implemented by
looking up filter src device in the connections.
-- Suzuki K Poulose
Tao Zhang (4):
dt-bindings: arm: qcom,coresight-static-replicator: Add property for
source filtering
coresight: Add a helper to check if a device is source
coresight: Add support for trace filtering by source
coresight-tpda: Optimize the function of reading element size
.../arm/arm,coresight-static-replicator.yaml | 19 ++-
drivers/hwtracing/coresight/coresight-core.c | 113 +++++++++++++++---
.../hwtracing/coresight/coresight-platform.c | 18 +++
drivers/hwtracing/coresight/coresight-tpda.c | 13 +-
include/linux/coresight.h | 12 +-
5 files changed, 152 insertions(+), 23 deletions(-)
--
2.17.1
On 18/11/2024 07:29, Wolfram Sang wrote:
> The header clearly states that it does not want to be included directly,
> only via 'device.h'. 'platform_device.h' works equally well. Remove the
> direct inclusion.
>
> Signed-off-by: Wolfram Sang <wsa+renesas(a)sang-engineering.com>
> ---
> drivers/hwtracing/coresight/coresight-etm4x-core.c | 1 -
> 1 file changed, 1 deletion(-)
>
> diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c
> index 66d44a404ad0..559972a00fdf 100644
> --- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
> +++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
> @@ -23,7 +23,6 @@
> #include <linux/cpu_pm.h>
> #include <linux/coresight.h>
> #include <linux/coresight-pmu.h>
> -#include <linux/pm_wakeup.h>
> #include <linux/amba/bus.h>
> #include <linux/seq_file.h>
> #include <linux/uaccess.h>
If you plan to take this as a collection outside of CoreSight tree,
Acked-by: Suzuki K Poulose <suzuki.poulose(a)arm.com>
Otherwise, I can pick this up.
Suzuki
On 18/11/2024 9:00 am, Marc Zyngier wrote:
> On Tue, 12 Nov 2024 10:37:03 +0000,
> James Clark <james.clark(a)linaro.org> wrote:
>>
>> Rename vcpu_* to kvm_* so that the same flags mechanism can be used in
>> places other than vcpu without being confusing. Wherever macros are
>> still related to vcpu like vcpu_get_flag() with hard coded v->arch, keep
>> the vcpu_* name, otherwise change it.
>>
>> Also move the "v->arch" access one macro higher for the same reason.
>>
>> This will be used for moving flags to host_data in a later commit.
>>
>> Signed-off-by: James Clark <james.clark(a)linaro.org>
>> ---
>> arch/arm64/include/asm/kvm_host.h | 88 +++++++++++++++----------------
>> arch/arm64/kvm/hyp/exception.c | 12 ++---
>> arch/arm64/kvm/inject_fault.c | 4 +-
>> arch/arm64/kvm/mmio.c | 10 ++--
>> 4 files changed, 57 insertions(+), 57 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
>> index f333b189fb43..34aa59f498c4 100644
>> --- a/arch/arm64/include/asm/kvm_host.h
>> +++ b/arch/arm64/include/asm/kvm_host.h
>> @@ -790,22 +790,22 @@ struct kvm_vcpu_arch {
>> /*
>> * Each 'flag' is composed of a comma-separated triplet:
>> *
>> - * - the flag-set it belongs to in the vcpu->arch structure
>> + * - the flag-set it belongs to in the structure pointed to by 'v'
>> * - the value for that flag
>> * - the mask for that flag
>> *
>> - * __vcpu_single_flag() builds such a triplet for a single-bit flag.
>> - * unpack_vcpu_flag() extract the flag value from the triplet for
>> + * __kvm_single_flag() builds such a triplet for a single-bit flag.
>> + * unpack_kvm_flag() extract the flag value from the triplet for
>> * direct use outside of the flag accessors.
>> */
>> -#define __vcpu_single_flag(_set, _f) _set, (_f), (_f)
>> +#define __kvm_single_flag(_set, _f) _set, (_f), (_f)
>>
>> #define __unpack_flag(_set, _f, _m) _f
>> -#define unpack_vcpu_flag(...) __unpack_flag(__VA_ARGS__)
>> +#define unpack_kvm_flag(...) __unpack_flag(__VA_ARGS__)
>>
>> #define __build_check_flag(v, flagset, f, m) \
>> do { \
>> - typeof(v->arch.flagset) *_fset; \
>> + typeof(v.flagset) *_fset; \
>> \
>> /* Check that the flags fit in the mask */ \
>> BUILD_BUG_ON(HWEIGHT(m) != HWEIGHT((f) | (m))); \
>> @@ -813,11 +813,11 @@ struct kvm_vcpu_arch {
>> BUILD_BUG_ON((sizeof(*_fset) * 8) <= __fls(m)); \
>> } while (0)
>>
>> -#define __vcpu_get_flag(v, flagset, f, m) \
>> +#define __kvm_get_flag(v, flagset, f, m) \
>> ({ \
>> __build_check_flag(v, flagset, f, m); \
>> \
>> - READ_ONCE(v->arch.flagset) & (m); \
>> + READ_ONCE(v.flagset) & (m); \
>> })
>>
>> /*
>> @@ -826,64 +826,64 @@ struct kvm_vcpu_arch {
>> */
>> #ifdef __KVM_NVHE_HYPERVISOR__
>> /* the nVHE hypervisor is always non-preemptible */
>> -#define __vcpu_flags_preempt_disable()
>> -#define __vcpu_flags_preempt_enable()
>> +#define __kvm_flags_preempt_disable()
>> +#define __kvm_flags_preempt_enable()
>> #else
>> -#define __vcpu_flags_preempt_disable() preempt_disable()
>> -#define __vcpu_flags_preempt_enable() preempt_enable()
>> +#define __kvm_flags_preempt_disable() preempt_disable()
>> +#define __kvm_flags_preempt_enable() preempt_enable()
>> #endif
>>
>> -#define __vcpu_set_flag(v, flagset, f, m) \
>> +#define __kvm_set_flag(v, flagset, f, m) \
>
> Hell no. Never. The whole point of this naming is that we know what
> this applies to. Here, you might as well have replaced 'vcpu' with
> 'carrot', and the result would be the same.
>
> Not to mention the insane churn this generates.
>
> So no, not happening.
>
> M.
>
Fair enough, I wasn't feeling to strongly about this either, was just
anticipating that there might be objection to bare flags if this more
abstracted mechanism existed elsewhere.
Looks like Oliver already did it with just flags for the same end goal
here [1], so I will drop this.
[1]:
https://lore.kernel.org/kvmarm/20241115224924.2132364-4-oliver.upton@linux.…
On 14/11/2024 2:51 pm, Yicong Yang wrote:
> On 2024/11/14 18:30, James Clark wrote:
>>
>>
>> On 14/11/2024 8:16 am, Yicong Yang wrote:
>>> From: Yicong Yang <yangyicong(a)hisilicon.com>
>>>
>>> Enable the trace in below steps will crash the kernel by NULL pointer
>>> dereferencing:
>>> echo 1 > /sys/bus/coresight/devices/tmc_etr0/enable_sink
>>> echo 1 > /sys/bus/coresight/devices/etm0/enable_source
>>> echo 0x400000 > /sys/bus/coresight/devices/tmc_etr0/buffer_size
>>> echo 1 > /sys/bus/coresight/devices/etm2/enable_source
>>> dd if=/dev/tmc_etr0 of=test_etm_sysfs_etr_030.data
>>>
>>> The call trace will be like:
>>> WARNING: CPU: 39 PID: 8586 at drivers/hwtracing/coresight/coresight-tmc-etr.c:1123 __tmc_etr_disable_hw+0x108/0x140 [coresight_tmc]
>>> [...]
>>> Call trace:
>>> __tmc_etr_disable_hw+0x108/0x140 [coresight_tmc]
>>> tmc_read_prepare_etr+0xc0/0xd0 [coresight_tmc]
>>> tmc_open+0x60/0xa0 [coresight_tmc]
>>> misc_open+0x11c/0x170
>>> chrdev_open+0xcc/0x2b0
>>> do_dentry_open+0x140/0x4e0
>>> vfs_open+0x34/0xf8
>>> path_openat+0x2b0/0xf58
>>> do_filp_open+0x8c/0x148
>>> do_sys_openat2+0xb8/0xe8
>>> __arm64_sys_openat+0x70/0xc0
>>> el0_svc_common.constprop.0+0x64/0x148
>>> do_el0_svc+0x24/0x38
>>> el0_svc+0x40/0x140
>>> el0t_64_sync_handler+0xc0/0xc8
>>> el0t_64_sync+0x1a4/0x1a8
>>> ---[ end trace 0000000000000000 ]---
>>> Unable to handle kernel NULL pointer dereference at virtual address 0000000000000028
>>> [...]
>>> Call trace:
>>> tmc_etr_get_sysfs_trace+0x10/0x80 [coresight_tmc]
>>> vfs_read+0xcc/0x310
>>> ksys_read+0x74/0x108
>>> __arm64_sys_read+0x24/0x38
>>> el0_svc_common.constprop.0+0x64/0x148
>>> do_el0_svc+0x24/0x38
>>> el0_svc+0x40/0x140
>>>
>>> Due to the buffer size changed, the buffer will be reallocated in
>>> tmc_etr_get_sysfs_buffer() when the second source enabled. At trace
>>> end tmc_etr_sync_sysfs_buf() will reset the drvdata->sysfs_buf and
>>> trigger the later NULL pointer dereference when reading out the
>>> data.
>>>
>>> But it doesn't make sense to change the buffer size when it's
>>> already in use. So block such behavior.
>>>
>>> Signed-off-by: Yicong Yang <yangyicong(a)hisilicon.com>
>>> ---
>>> drivers/hwtracing/coresight/coresight-tmc-core.c | 5 +++++
>>> 1 file changed, 5 insertions(+)
>>>
>>> diff --git a/drivers/hwtracing/coresight/coresight-tmc-core.c b/drivers/hwtracing/coresight/coresight-tmc-core.c
>>> index 475fa4bb6813..9660af63e9bc 100644
>>> --- a/drivers/hwtracing/coresight/coresight-tmc-core.c
>>> +++ b/drivers/hwtracing/coresight/coresight-tmc-core.c
>>> @@ -319,6 +319,11 @@ static ssize_t buffer_size_store(struct device *dev,
>>> if (drvdata->config_type != TMC_CONFIG_TYPE_ETR)
>>> return -EPERM;
>>> + /* Don't change the buffer size if it's in use */
>>> + guard(spinlock)(&drvdata->spinlock);
>>> + if (coresight_get_mode(drvdata->csdev) != CS_MODE_DISABLED)
>>
>> Size isn't used in perf mode is it? So it can be -EBUSY only when mode == CS_MODE_SYSFS.
>>
>
> alloc_etr_buf() on the perf path will read drvdata->size, not sure it matters if user
> change it through sysfs in the meanwhile. Will test and have a check if there are any
> other places using size on the perf path.
>
Hmmm I assumed that Perf mode completely ignored anything from sysfs
mode. I see that alloc_etr_buf() does sometimes use the sysfs value. I
don't really see why that's necessary because that means it sometimes
ignores the buffer size from the perf command line depending on what's
in sysfs, but the modes should be mutually exclusive.
Unless we fix that then I think you do need to use the device spinlock.
But I think we should tidy up alloc_etr_buf() to only try to allocate
from the Perf size down to TMC_ETR_PERF_MIN_BUF_SIZE, ignoring
drvdata->size. Then the behavior is less surprising to users and also
anyone reading the code. And rename it to alloc_etr_buf_perf().
Unless Suzuki knows of a reason it was done that way to begin with? I
checked the commit message but it just says that it was like that but
not why.
>>> + return -EBUSY;
>>> +
>>> ret = kstrtoul(buf, 0, &val);
>>> if (ret)
>>> return ret;
>>
>> Looks ok to me. Although for consistency it might be worth changing to guard(mutex)(&coresight_mutex) because this is about sysfs mode only and other usages of mode and comments point to coresight_mutex. Using the device's spinlock will technically work but it did make me go and double check the code. And there are other cases of reading the mode like this:
>>
>
> ok, I thought to also serialize the use of drvdata->size. But as you mentioned
> use coresight_mutex is enough and will be consistenct with other places.
>
>> static ssize_t enable_source_show(struct device *dev,
>> struct device_attribute *attr,
>> char *buf)
>> {
>> struct coresight_device *csdev = to_coresight_device(dev);
>>
>> guard(mutex)(&coresight_mutex);
>> return scnprintf(buf, PAGE_SIZE, "%u\n",
>> coresight_get_mode(csdev) == CS_MODE_SYSFS);
>> }
>>
>> Mode can change to CS_MODE_PERF while inside coresight_mutex but the device would end up not being enabled for sysfs, so it's still ok to update the sysfs size value in that case.
>>
>> With that change:
>>
>> Reviewed-by: James Clark <james.clark(a)linaro.org>
>
> Thanks.
>
On 14/11/2024 8:16 am, Yicong Yang wrote:
> From: Yicong Yang <yangyicong(a)hisilicon.com>
>
> tmc_drvdata::reading is used to indicate whether a reading process
> is performed through /dev/xyz.tmc. Document it.
>
> Signed-off-by: Yicong Yang <yangyicong(a)hisilicon.com>
> ---
> drivers/hwtracing/coresight/coresight-tmc.h | 1 +
> 1 file changed, 1 insertion(+)
>
> diff --git a/drivers/hwtracing/coresight/coresight-tmc.h b/drivers/hwtracing/coresight/coresight-tmc.h
> index 2671926be62a..fdf7955e7350 100644
> --- a/drivers/hwtracing/coresight/coresight-tmc.h
> +++ b/drivers/hwtracing/coresight/coresight-tmc.h
> @@ -174,6 +174,7 @@ struct etr_buf {
> * @pid: Process ID of the process that owns the session that is using
> * this component. For example this would be the pid of the Perf
> * process.
> + * @reading: buffer's in the reading through "/dev/xyz.tmc" entry
> * @buf: Snapshot of the trace data for ETF/ETB.
> * @etr_buf: details of buffer used in TMC-ETR
> * @len: size of the available trace for ETF/ETB.
Reviewed-by: James Clark <james.clark(a)linaro.org>