Do some cleanups then add a new format attribute to set the timestamp interval for ETMv4 in Perf mode. The current interval is too high for most use cases, and particularly on the FVP the number of timestamps generated is excessive.
Although it would be good to make only SYNC timestamps the default and have counter timestamps opt-in, this would be a breaking change. We can always do that later, or disable counter timestamps from Perf.
This is added as an event format attribute, rather than a Coresight config because it's something that the driver is already configuring automatically in Perf mode with any unused counter, so it's not possible to modify this with a config.
Applies to coresight/next
Signed-off-by: James Clark james.clark@linaro.org --- James Clark (6): coresight: Change syncfreq to be a u8 coresight: Fix holes in struct etmv4_config coresight: Repack struct etmv4_drvdata coresight: Refactor etm4_config_timestamp_event() coresight: Add format attribute for setting the timestamp interval coresight: docs: Document etm4x ts_interval
Documentation/trace/coresight/coresight.rst | 14 +++ drivers/hwtracing/coresight/coresight-etm-perf.c | 6 +- drivers/hwtracing/coresight/coresight-etm4x-core.c | 110 +++++++++++++-------- drivers/hwtracing/coresight/coresight-etm4x.h | 86 ++++++++++------ 4 files changed, 144 insertions(+), 72 deletions(-) --- base-commit: a80198ba650f50d266d7fc4a6c5262df9970f9f2 change-id: 20250724-james-cs-syncfreq-7c2257a38ed3
Best regards,
TRCSYNCPR.PERIOD is the only functional part of TRCSYNCPR and it only has 5 valid bits so it can be stored in a u8.
Signed-off-by: James Clark james.clark@linaro.org --- drivers/hwtracing/coresight/coresight-etm4x.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h b/drivers/hwtracing/coresight/coresight-etm4x.h index ac649515054d..746627476bd3 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x.h +++ b/drivers/hwtracing/coresight/coresight-etm4x.h @@ -824,7 +824,6 @@ struct etmv4_config { u32 eventctrl1; u32 stall_ctrl; u32 ts_ctrl; - u32 syncfreq; u32 ccctlr; u32 bb_ctrl; u32 vinst_ctrl; @@ -832,6 +831,7 @@ struct etmv4_config { u32 vissctlr; u32 vipcssctlr; u8 seq_idx; + u8 syncfreq; u32 seq_ctrl[ETM_MAX_SEQ_STATES]; u32 seq_rst; u32 seq_state;
Lots of u8s are mixed with u64s and u32s so repack it to save a bit of space because there's one of these for each ETM.
Signed-off-by: James Clark james.clark@linaro.org --- drivers/hwtracing/coresight/coresight-etm4x.h | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-)
diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h b/drivers/hwtracing/coresight/coresight-etm4x.h index 746627476bd3..a355a1e9606d 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x.h +++ b/drivers/hwtracing/coresight/coresight-etm4x.h @@ -832,28 +832,33 @@ struct etmv4_config { u32 vipcssctlr; u8 seq_idx; u8 syncfreq; + u8 cntr_idx; + u8 res_idx; + u8 ss_idx; + u8 addr_idx; + u8 addr_type[ETM_MAX_SINGLE_ADDR_CMP]; + u8 ctxid_idx; + u8 vmid_idx; u32 seq_ctrl[ETM_MAX_SEQ_STATES]; u32 seq_rst; u32 seq_state; - u8 cntr_idx; + u32 cntrldvr[ETMv4_MAX_CNTR]; u32 cntr_ctrl[ETMv4_MAX_CNTR]; u32 cntr_val[ETMv4_MAX_CNTR]; - u8 res_idx; + u32 res_ctrl[ETM_MAX_RES_SEL]; - u8 ss_idx; + u32 ss_ctrl[ETM_MAX_SS_CMP]; u32 ss_status[ETM_MAX_SS_CMP]; u32 ss_pe_cmp[ETM_MAX_SS_CMP]; - u8 addr_idx; + u64 addr_val[ETM_MAX_SINGLE_ADDR_CMP]; u64 addr_acc[ETM_MAX_SINGLE_ADDR_CMP]; - u8 addr_type[ETM_MAX_SINGLE_ADDR_CMP]; - u8 ctxid_idx; + u64 ctxid_pid[ETMv4_MAX_CTXID_CMP]; u32 ctxid_mask0; u32 ctxid_mask1; - u8 vmid_idx; u64 vmid_val[ETM_MAX_VMID_CMP]; u32 vmid_mask0; u32 vmid_mask1;
Fix holes and convert the long list of bools to single bits to save some space because there's one of these for each ETM.
Signed-off-by: James Clark james.clark@linaro.org --- drivers/hwtracing/coresight/coresight-etm4x.h | 39 ++++++++++++++------------- 1 file changed, 20 insertions(+), 19 deletions(-)
diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h b/drivers/hwtracing/coresight/coresight-etm4x.h index a355a1e9606d..1c67b263b01b 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x.h +++ b/drivers/hwtracing/coresight/coresight-etm4x.h @@ -1020,29 +1020,30 @@ struct etmv4_drvdata { u8 ns_ex_level; u8 q_support; u8 os_lock_model; - bool sticky_enable; - bool boot_enable; - bool os_unlock; - bool instrp0; - bool q_filt; - bool trcbb; - bool trccond; - bool retstack; - bool trccci; - bool trc_error; - bool syncpr; - bool stallctl; - bool sysstall; - bool nooverflow; - bool atbtrig; - bool lpoverride; + bool sticky_enable : 1; + bool boot_enable : 1; + bool os_unlock : 1; + bool instrp0 : 1; + bool q_filt : 1; + bool trcbb : 1; + bool trccond : 1; + bool retstack : 1; + bool trccci : 1; + bool trc_error : 1; + bool syncpr : 1; + bool stallctl : 1; + bool sysstall : 1; + bool nooverflow : 1; + bool atbtrig : 1; + bool lpoverride : 1; + bool state_needs_restore : 1; + bool skip_power_up : 1; + bool paused : 1; u64 trfcr; struct etmv4_config config; u64 save_trfcr; struct etmv4_save_state *save_state; - bool state_needs_restore; - bool skip_power_up; - bool paused; + DECLARE_BITMAP(arch_features, ETM4_IMPDEF_FEATURE_MAX); };
Remove some of the magic numbers and try to clarify some of the documentation so it's clearer how this sets up the timestamp interval.
Return errors directly instead of jumping to out and returning ret, nothing needs to be cleaned up at the end and it only obscures the flow and return value.
Signed-off-by: James Clark james.clark@linaro.org --- drivers/hwtracing/coresight/coresight-etm4x-core.c | 95 ++++++++++++++-------- drivers/hwtracing/coresight/coresight-etm4x.h | 20 +++-- 2 files changed, 77 insertions(+), 38 deletions(-)
diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c index cbea200489c8..1a2d02bdcb88 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x-core.c +++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c @@ -608,18 +608,33 @@ static void etm4_enable_hw_smp_call(void *info) * TRCRSCTLR1 (always true) used to get the counter to decrement. From * there a resource selector is configured with the counter and the * timestamp control register to use the resource selector to trigger the - * event that will insert a timestamp packet in the stream. + * event that will insert a timestamp packet in the stream: + * + * +--------------+ + * | Resource 1 | fixed "always-true" resource + * +--------------+ + * | + * +------v-------+ + * | Counter x | (reload to 1 on underflow) + * +--------------+ + * | + * +------v--------------+ + * | Resource Selector y | (trigger on counter x == 0) + * +---------------------+ + * | + * +------v---------------+ + * | Timestamp Generator | (timestamp on resource y) + * +----------------------+ */ static int etm4_config_timestamp_event(struct etmv4_drvdata *drvdata) { - int ctridx, ret = -EINVAL; - int counter, rselector; - u32 val = 0; + int ctridx; + int rselector; struct etmv4_config *config = &drvdata->config;
/* No point in trying if we don't have at least one counter */ if (!drvdata->nr_cntr) - goto out; + return -EINVAL;
/* Find a counter that hasn't been initialised */ for (ctridx = 0; ctridx < drvdata->nr_cntr; ctridx++) @@ -629,15 +644,17 @@ static int etm4_config_timestamp_event(struct etmv4_drvdata *drvdata) /* All the counters have been configured already, bail out */ if (ctridx == drvdata->nr_cntr) { pr_debug("%s: no available counter found\n", __func__); - ret = -ENOSPC; - goto out; + return -ENOSPC; }
/* - * Searching for an available resource selector to use, starting at - * '2' since every implementation has at least 2 resource selector. - * ETMIDR4 gives the number of resource selector _pairs_, - * hence multiply by 2. + * Searching for an available resource selector to use, starting at '2' + * since resource 0 is the fixed 'always returns false' resource and 1 + * is the fixed 'always returns true' resource. See IHI0064H_b '7.3.64 + * TRCRSCTLRn, Resource Selection Control Registers, n=2-31'. + * + * ETMIDR4 gives the number of resource selector _pairs_, hence multiply + * by 2. */ for (rselector = 2; rselector < drvdata->nr_resource * 2; rselector++) if (!config->res_ctrl[rselector]) @@ -646,13 +663,9 @@ static int etm4_config_timestamp_event(struct etmv4_drvdata *drvdata) if (rselector == drvdata->nr_resource * 2) { pr_debug("%s: no available resource selector found\n", __func__); - ret = -ENOSPC; - goto out; + return -ENOSPC; }
- /* Remember what counter we used */ - counter = 1 << ctridx; - /* * Initialise original and reload counter value to the smallest * possible value in order to get as much precision as we can. @@ -660,26 +673,42 @@ static int etm4_config_timestamp_event(struct etmv4_drvdata *drvdata) config->cntr_val[ctridx] = 1; config->cntrldvr[ctridx] = 1;
- /* Set the trace counter control register */ - val = 0x1 << 16 | /* Bit 16, reload counter automatically */ - 0x0 << 7 | /* Select single resource selector */ - 0x1; /* Resource selector 1, i.e always true */ - - config->cntr_ctrl[ctridx] = val; - - val = 0x2 << 16 | /* Group 0b0010 - Counter and sequencers */ - counter << 0; /* Counter to use */ - - config->res_ctrl[rselector] = val; + /* + * Trace Counter Control Register TRCCNTCTLRn + * + * CNTCHAIN = 0, don't reload on the previous counter + * RLDSELF = true, reload counter automatically on underflow + * RLDTYPE = 0, one reload input resource + * RLDSEL = 0, reload on resource 0 (fixed always false resource, never + * reload) + * CNTTYPE = 0, one count input resource + * CNTSEL = 1, count on resource 1 (fixed always true resource, always + * decrement) + */ + config->cntr_ctrl[ctridx] = TRCCNTCTLRn_RLDSELF | + FIELD_PREP(TRCCNTCTLRn_CNTSEL_MASK, 1);
- val = 0x0 << 7 | /* Select single resource selector */ - rselector; /* Resource selector */ + /* + * Resource Selection Control Register TRCRSCTLRn + * + * PAIRINV = 0, INV = 0, don't invert + * GROUP = 2, SELECT = ctridx, trigger when counter 'ctridx' reaches 0 + * + * Multiple counters can be selected, and each bit signifies a counter, + * so set bit 'ctridx' to select our counter. + */ + config->res_ctrl[rselector] = FIELD_PREP(TRCRSCTLRn_GROUP_MASK, 2) | + FIELD_PREP(TRCRSCTLRn_SELECT_MASK, 1 << ctridx);
- config->ts_ctrl = val; + /* + * Global Timestamp Control Register TRCTSCTLR + * + * TYPE = 0, one input resource + * SEL = rselector, generate timestamp on resource 'rselector' + */ + config->ts_ctrl = FIELD_PREP(TRCTSCTLR_SEL_MASK, rselector);
- ret = 0; -out: - return ret; + return 0; }
static int etm4_parse_event_config(struct coresight_device *csdev, diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h b/drivers/hwtracing/coresight/coresight-etm4x.h index 1c67b263b01b..aaa6633b2d67 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x.h +++ b/drivers/hwtracing/coresight/coresight-etm4x.h @@ -224,6 +224,16 @@ #define TRCRSCTLRn_GROUP_MASK GENMASK(19, 16) #define TRCRSCTLRn_SELECT_MASK GENMASK(15, 0)
+#define TRCCNTCTLRn_CNTCHAIN BIT(17) +#define TRCCNTCTLRn_RLDSELF BIT(16) +#define TRCCNTCTLRn_RLDTYPE BIT(15) +#define TRCCNTCTLRn_RLDSEL_MASK GENMASK(12, 8) +#define TRCCNTCTLRn_CNTTYPE_MASK BIT(7) +#define TRCCNTCTLRn_CNTSEL_MASK GENMASK(4, 0) + +#define TRCTSCTLR_TYPE BIT(7) +#define TRCTSCTLR_SEL_MASK GENMASK(4, 0) + /* * System instructions to access ETM registers. * See ETMv4.4 spec ARM IHI0064F section 4.3.6 System instructions @@ -823,7 +833,7 @@ struct etmv4_config { u32 eventctrl0; u32 eventctrl1; u32 stall_ctrl; - u32 ts_ctrl; + u32 ts_ctrl; /* TRCTSCTLR */ u32 ccctlr; u32 bb_ctrl; u32 vinst_ctrl; @@ -843,11 +853,11 @@ struct etmv4_config { u32 seq_rst; u32 seq_state;
- u32 cntrldvr[ETMv4_MAX_CNTR]; - u32 cntr_ctrl[ETMv4_MAX_CNTR]; - u32 cntr_val[ETMv4_MAX_CNTR]; + u32 cntrldvr[ETMv4_MAX_CNTR]; /* TRCCNTRLDVRn */ + u32 cntr_ctrl[ETMv4_MAX_CNTR]; /* TRCCNTCTLRn */ + u32 cntr_val[ETMv4_MAX_CNTR]; /* TRCCNTVRn */
- u32 res_ctrl[ETM_MAX_RES_SEL]; + u32 res_ctrl[ETM_MAX_RES_SEL]; /* TRCRSCTLRn */
u32 ss_ctrl[ETM_MAX_SS_CMP]; u32 ss_status[ETM_MAX_SS_CMP];
Timestamps are currently emitted at the maximum rate possible, which is much too frequent for most use cases. Add an attribute to be able to set the interval. Granular control is not required, so save space in the config by interpreting it as 2 ^ ts_interval. And then 4 bits (0 - 15) is enough to set the interval to be larger than the existing SYNC timestamp interval.
No sysfs file is needed for this attribute because counter generated timestamps are only configured for Perf mode.
Signed-off-by: James Clark james.clark@linaro.org --- drivers/hwtracing/coresight/coresight-etm-perf.c | 6 +++++- drivers/hwtracing/coresight/coresight-etm4x-core.c | 21 ++++++++++++--------- drivers/hwtracing/coresight/coresight-etm4x.h | 6 ++++++ 3 files changed, 23 insertions(+), 10 deletions(-)
diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c index f677c08233ba..c28ed6e63550 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.c +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c @@ -12,6 +12,7 @@ #include <linux/list.h> #include <linux/mm.h> #include <linux/init.h> +#include <linux/perf/arm_pmu.h> #include <linux/perf_event.h> #include <linux/percpu-defs.h> #include <linux/slab.h> @@ -21,6 +22,7 @@
#include "coresight-config.h" #include "coresight-etm-perf.h" +#include "coresight-etm4x.h" #include "coresight-priv.h" #include "coresight-syscfg.h" #include "coresight-trace-id.h" @@ -69,7 +71,8 @@ PMU_FORMAT_ATTR(sinkid, "config2:0-31"); /* config ID - set if a system configuration is selected */ PMU_FORMAT_ATTR(configid, "config2:32-63"); PMU_FORMAT_ATTR(cc_threshold, "config3:0-11"); - +/* Interval = (2 ^ ts_level) */ +GEN_PMU_FORMAT_ATTR(ts_level);
/* * contextid always traces the "PID". The PID is in CONTEXTIDR_EL1 @@ -103,6 +106,7 @@ static struct attribute *etm_config_formats_attr[] = { &format_attr_configid.attr, &format_attr_branch_broadcast.attr, &format_attr_cc_threshold.attr, + &format_attr_ts_level.attr, NULL, };
diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c index 1a2d02bdcb88..42277c201d4f 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x-core.c +++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c @@ -28,6 +28,7 @@ #include <linux/amba/bus.h> #include <linux/seq_file.h> #include <linux/uaccess.h> +#include <linux/perf/arm_pmu.h> #include <linux/perf_event.h> #include <linux/platform_device.h> #include <linux/pm_runtime.h> @@ -615,7 +616,7 @@ static void etm4_enable_hw_smp_call(void *info) * +--------------+ * | * +------v-------+ - * | Counter x | (reload to 1 on underflow) + * | Counter x | (reload to 2 ^ ts_level on underflow) * +--------------+ * | * +------v--------------+ @@ -626,11 +627,17 @@ static void etm4_enable_hw_smp_call(void *info) * | Timestamp Generator | (timestamp on resource y) * +----------------------+ */ -static int etm4_config_timestamp_event(struct etmv4_drvdata *drvdata) +static int etm4_config_timestamp_event(struct etmv4_drvdata *drvdata, + struct perf_event_attr *attr) { int ctridx; int rselector; struct etmv4_config *config = &drvdata->config; + u8 ts_level = ATTR_CFG_GET_FLD(attr, ts_level); + + /* Disable when ts_level == MAX */ + if (ts_level == FIELD_GET(ATTR_CFG_FLD_ts_level_MASK, UINT_MAX)) + return 0;
/* No point in trying if we don't have at least one counter */ if (!drvdata->nr_cntr) @@ -666,12 +673,8 @@ static int etm4_config_timestamp_event(struct etmv4_drvdata *drvdata) return -ENOSPC; }
- /* - * Initialise original and reload counter value to the smallest - * possible value in order to get as much precision as we can. - */ - config->cntr_val[ctridx] = 1; - config->cntrldvr[ctridx] = 1; + /* Initialise original and reload counter value. */ + config->cntr_val[ctridx] = config->cntrldvr[ctridx] = 1 << ts_level;
/* * Trace Counter Control Register TRCCNTCTLRn @@ -761,7 +764,7 @@ static int etm4_parse_event_config(struct coresight_device *csdev, * order to correlate instructions executed on different CPUs * (CPU-wide trace scenarios). */ - ret = etm4_config_timestamp_event(drvdata); + ret = etm4_config_timestamp_event(drvdata, attr);
/* * No need to go further if timestamp intervals can't diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h b/drivers/hwtracing/coresight/coresight-etm4x.h index aaa6633b2d67..54558de158fa 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x.h +++ b/drivers/hwtracing/coresight/coresight-etm4x.h @@ -598,6 +598,12 @@ #define ETM_CNTR_MAX_VAL 0xFFFF #define ETM_TRACEID_MASK 0x3f
+#define ATTR_CFG_FLD_ts_level_CFG config3 +#define ATTR_CFG_FLD_ts_level_LO 12 +#define ATTR_CFG_FLD_ts_level_HI 15 +#define ATTR_CFG_FLD_ts_level_MASK GENMASK(ATTR_CFG_FLD_ts_level_HI, \ + ATTR_CFG_FLD_ts_level_LO) + /* ETMv4 programming modes */ #define ETM_MODE_EXCLUDE BIT(0) #define ETM_MODE_LOAD BIT(1)
Document how the new field is used, maximum value and the interaction with SYNC timestamps.
Signed-off-by: James Clark james.clark@linaro.org --- Documentation/trace/coresight/coresight.rst | 14 ++++++++++++++ 1 file changed, 14 insertions(+)
diff --git a/Documentation/trace/coresight/coresight.rst b/Documentation/trace/coresight/coresight.rst index 806699871b80..0cd83119b83f 100644 --- a/Documentation/trace/coresight/coresight.rst +++ b/Documentation/trace/coresight/coresight.rst @@ -619,6 +619,20 @@ They are also listed in the folder /sys/bus/event_source/devices/cs_etm/format/ - Cycle count threshold value. If nothing is provided here or the provided value is 0, then the default value i.e 0x100 will be used. If provided value is less than minimum cycles threshold value, as indicated via TRCIDR3.CCITMIN, then the minimum value will be used instead. + * - ts_level + - Controls frequency of timestamps. The reload value of the + timestamp counter is 2 raised to the power of this value. If the value is + 0 then the reload value is 1, if the value is 10 then the reload value is + 1024. Maximum allowed value is 15, and setting the maximum disables + generation of timestamps via the counter, freeing the counter resources. + Timestamps will be generated after 2 ^ ts_level cycles. + + Separately to this value, timestamps will also be emitted when a SYNC + packet is generated, although this is only for every 4096 bytes of trace. + Therefore it's not possible to generate timestamps less frequently than + that and ts_level timestamps are always in addition to SYNC timestamps. + Timestamps must be enabled for this to have effect. +
How to use the STM module -------------------------