Commit 70fb86a85dc9 ("drm/xe: Revert some changes that break a mesa debug tool") partially reverted some changes to workaround breakage caused to mesa tools. However, in doing so it also broke fetching the GuC log via debugfs since xe_print_blob_ascii85() simply bails out.
The fix is to avoid the extra newlines: the devcoredump interface is line-oriented and adding random newlines in the middle breaks it. If a tool is able to parse it by looking at the data and checking for chars that are out of the ascii85 space, it can still do so. A format change that breaks the line-oriented output on devcoredump however needs better coordination with existing tools.
Cc: John Harrison John.C.Harrison@Intel.com Cc: Julia Filipchuk julia.filipchuk@intel.com Cc: José Roberto de Souza jose.souza@intel.com Cc: stable@vger.kernel.org Fixes: 70fb86a85dc9 ("drm/xe: Revert some changes that break a mesa debug tool") Fixes: ec1455ce7e35 ("drm/xe/devcoredump: Add ASCII85 dump helper function") Signed-off-by: Lucas De Marchi lucas.demarchi@intel.com --- drivers/gpu/drm/xe/xe_devcoredump.c | 30 +++++++++-------------------- drivers/gpu/drm/xe/xe_devcoredump.h | 2 +- drivers/gpu/drm/xe/xe_guc_ct.c | 3 ++- drivers/gpu/drm/xe/xe_guc_log.c | 4 +++- 4 files changed, 15 insertions(+), 24 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index a7946a76777e7..d9b71bb690860 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -391,42 +391,30 @@ int xe_devcoredump_init(struct xe_device *xe) /** * xe_print_blob_ascii85 - print a BLOB to some useful location in ASCII85 * - * The output is split to multiple lines because some print targets, e.g. dmesg - * cannot handle arbitrarily long lines. Note also that printing to dmesg in - * piece-meal fashion is not possible, each separate call to drm_puts() has a - * line-feed automatically added! Therefore, the entire output line must be - * constructed in a local buffer first, then printed in one atomic output call. + * The output is split to multiple print calls because some print targets, e.g. + * dmesg cannot handle arbitrarily long lines. These targets may add newline + * between calls. * * There is also a scheduler yield call to prevent the 'task has been stuck for * 120s' kernel hang check feature from firing when printing to a slow target * such as dmesg over a serial port. * - * TODO: Add compression prior to the ASCII85 encoding to shrink huge buffers down. - * * @p: the printer object to output to * @prefix: optional prefix to add to output string * @blob: the Binary Large OBject to dump out * @offset: offset in bytes to skip from the front of the BLOB, must be a multiple of sizeof(u32) * @size: the size in bytes of the BLOB, must be a multiple of sizeof(u32) */ -void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, +void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, char suffix, const void *blob, size_t offset, size_t size) { const u32 *blob32 = (const u32 *)blob; char buff[ASCII85_BUFSZ], *line_buff; size_t line_pos = 0;
- /* - * Splitting blobs across multiple lines is not compatible with the mesa - * debug decoder tool. Note that even dropping the explicit '\n' below - * doesn't help because the GuC log is so big some underlying implementation - * still splits the lines at 512K characters. So just bail completely for - * the moment. - */ - return; - #define DMESG_MAX_LINE_LEN 800 -#define MIN_SPACE (ASCII85_BUFSZ + 2) /* 85 + "\n\0" */ + /* Always leave space for the suffix char and the \0 */ +#define MIN_SPACE (ASCII85_BUFSZ + 2) /* 85 + "<suffix>\0" */
if (size & 3) drm_printf(p, "Size not word aligned: %zu", size); @@ -458,7 +446,6 @@ void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, line_pos += strlen(line_buff + line_pos);
if ((line_pos + MIN_SPACE) >= DMESG_MAX_LINE_LEN) { - line_buff[line_pos++] = '\n'; line_buff[line_pos++] = 0;
drm_puts(p, line_buff); @@ -470,10 +457,11 @@ void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, } }
+ if (suffix) + line_buff[line_pos++] = suffix; + if (line_pos) { - line_buff[line_pos++] = '\n'; line_buff[line_pos++] = 0; - drm_puts(p, line_buff); }
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.h b/drivers/gpu/drm/xe/xe_devcoredump.h index 6a17e6d601022..5391a80a4d1ba 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.h +++ b/drivers/gpu/drm/xe/xe_devcoredump.h @@ -29,7 +29,7 @@ static inline int xe_devcoredump_init(struct xe_device *xe) } #endif
-void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, +void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, char suffix, const void *blob, size_t offset, size_t size);
#endif diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 8b65c5e959cc2..50c8076b51585 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -1724,7 +1724,8 @@ void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot, snapshot->g2h_outstanding);
if (snapshot->ctb) - xe_print_blob_ascii85(p, "CTB data", snapshot->ctb, 0, snapshot->ctb_size); + xe_print_blob_ascii85(p, "CTB data", '\n', + snapshot->ctb, 0, snapshot->ctb_size); } else { drm_puts(p, "CT disabled\n"); } diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c index 80151ff6a71f8..44482ea919924 100644 --- a/drivers/gpu/drm/xe/xe_guc_log.c +++ b/drivers/gpu/drm/xe/xe_guc_log.c @@ -207,8 +207,10 @@ void xe_guc_log_snapshot_print(struct xe_guc_log_snapshot *snapshot, struct drm_ remain = snapshot->size; for (i = 0; i < snapshot->num_chunks; i++) { size_t size = min(GUC_LOG_CHUNK_SIZE, remain); + const char *prefix = i ? NULL : "Log data"; + char suffix = i == snapshot->num_chunks - 1 ? '\n' : 0;
- xe_print_blob_ascii85(p, i ? NULL : "Log data", snapshot->copy[i], 0, size); + xe_print_blob_ascii85(p, prefix, suffix, snapshot->copy[i], 0, size); remain -= size; } }
On Wed, 2025-01-22 at 21:11 -0800, Lucas De Marchi wrote:
Commit 70fb86a85dc9 ("drm/xe: Revert some changes that break a mesa debug tool") partially reverted some changes to workaround breakage caused to mesa tools. However, in doing so it also broke fetching the GuC log via debugfs since xe_print_blob_ascii85() simply bails out.
The fix is to avoid the extra newlines: the devcoredump interface is line-oriented and adding random newlines in the middle breaks it. If a tool is able to parse it by looking at the data and checking for chars that are out of the ascii85 space, it can still do so. A format change that breaks the line-oriented output on devcoredump however needs better coordination with existing tools.
Cc: John Harrison John.C.Harrison@Intel.com Cc: Julia Filipchuk julia.filipchuk@intel.com Cc: José Roberto de Souza jose.souza@intel.com Cc: stable@vger.kernel.org Fixes: 70fb86a85dc9 ("drm/xe: Revert some changes that break a mesa debug tool") Fixes: ec1455ce7e35 ("drm/xe/devcoredump: Add ASCII85 dump helper function") Signed-off-by: Lucas De Marchi lucas.demarchi@intel.com
drivers/gpu/drm/xe/xe_devcoredump.c | 30 +++++++++-------------------- drivers/gpu/drm/xe/xe_devcoredump.h | 2 +- drivers/gpu/drm/xe/xe_guc_ct.c | 3 ++- drivers/gpu/drm/xe/xe_guc_log.c | 4 +++- 4 files changed, 15 insertions(+), 24 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index a7946a76777e7..d9b71bb690860 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -391,42 +391,30 @@ int xe_devcoredump_init(struct xe_device *xe) /**
- xe_print_blob_ascii85 - print a BLOB to some useful location in ASCII85
- The output is split to multiple lines because some print targets, e.g. dmesg
- cannot handle arbitrarily long lines. Note also that printing to dmesg in
- piece-meal fashion is not possible, each separate call to drm_puts() has a
- line-feed automatically added! Therefore, the entire output line must be
- constructed in a local buffer first, then printed in one atomic output call.
- The output is split to multiple print calls because some print targets, e.g.
- dmesg cannot handle arbitrarily long lines. These targets may add newline
- between calls.
- There is also a scheduler yield call to prevent the 'task has been stuck for
- 120s' kernel hang check feature from firing when printing to a slow target
- such as dmesg over a serial port.
- TODO: Add compression prior to the ASCII85 encoding to shrink huge buffers down.
*/
- @p: the printer object to output to
- @prefix: optional prefix to add to output string
- @blob: the Binary Large OBject to dump out
- @offset: offset in bytes to skip from the front of the BLOB, must be a multiple of sizeof(u32)
- @size: the size in bytes of the BLOB, must be a multiple of sizeof(u32)
-void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, +void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, char suffix,
just missing document the suffix.
With that this patch is:
Reviewed-by: José Roberto de Souza jose.souza@intel.com
const void *blob, size_t offset, size_t size)
{ const u32 *blob32 = (const u32 *)blob; char buff[ASCII85_BUFSZ], *line_buff; size_t line_pos = 0;
- /*
* Splitting blobs across multiple lines is not compatible with the mesa
* debug decoder tool. Note that even dropping the explicit '\n' below
* doesn't help because the GuC log is so big some underlying implementation
* still splits the lines at 512K characters. So just bail completely for
* the moment.
*/
- return;
#define DMESG_MAX_LINE_LEN 800 -#define MIN_SPACE (ASCII85_BUFSZ + 2) /* 85 + "\n\0" */
- /* Always leave space for the suffix char and the \0 */
+#define MIN_SPACE (ASCII85_BUFSZ + 2) /* 85 + "<suffix>\0" */ if (size & 3) drm_printf(p, "Size not word aligned: %zu", size); @@ -458,7 +446,6 @@ void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, line_pos += strlen(line_buff + line_pos); if ((line_pos + MIN_SPACE) >= DMESG_MAX_LINE_LEN) {
line_buff[line_pos++] = '\n'; line_buff[line_pos++] = 0;
drm_puts(p, line_buff); @@ -470,10 +457,11 @@ void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, } }
- if (suffix)
line_buff[line_pos++] = suffix;
- if (line_pos) {
line_buff[line_pos++] = 0;line_buff[line_pos++] = '\n';
- drm_puts(p, line_buff); }
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.h b/drivers/gpu/drm/xe/xe_devcoredump.h index 6a17e6d601022..5391a80a4d1ba 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.h +++ b/drivers/gpu/drm/xe/xe_devcoredump.h @@ -29,7 +29,7 @@ static inline int xe_devcoredump_init(struct xe_device *xe) } #endif -void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, +void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, char suffix, const void *blob, size_t offset, size_t size); #endif diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 8b65c5e959cc2..50c8076b51585 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -1724,7 +1724,8 @@ void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot, snapshot->g2h_outstanding); if (snapshot->ctb)
xe_print_blob_ascii85(p, "CTB data", snapshot->ctb, 0, snapshot->ctb_size);
xe_print_blob_ascii85(p, "CTB data", '\n',
} else { drm_puts(p, "CT disabled\n"); }snapshot->ctb, 0, snapshot->ctb_size);
diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c index 80151ff6a71f8..44482ea919924 100644 --- a/drivers/gpu/drm/xe/xe_guc_log.c +++ b/drivers/gpu/drm/xe/xe_guc_log.c @@ -207,8 +207,10 @@ void xe_guc_log_snapshot_print(struct xe_guc_log_snapshot *snapshot, struct drm_ remain = snapshot->size; for (i = 0; i < snapshot->num_chunks; i++) { size_t size = min(GUC_LOG_CHUNK_SIZE, remain);
const char *prefix = i ? NULL : "Log data";
char suffix = i == snapshot->num_chunks - 1 ? '\n' : 0;
xe_print_blob_ascii85(p, i ? NULL : "Log data", snapshot->copy[i], 0, size);
remain -= size; }xe_print_blob_ascii85(p, prefix, suffix, snapshot->copy[i], 0, size);
}
On 1/22/2025 21:11, Lucas De Marchi wrote:
Commit 70fb86a85dc9 ("drm/xe: Revert some changes that break a mesa debug tool") partially reverted some changes to workaround breakage caused to mesa tools. However, in doing so it also broke fetching the GuC log via debugfs since xe_print_blob_ascii85() simply bails out.
The fix is to avoid the extra newlines: the devcoredump interface is line-oriented and adding random newlines in the middle breaks it. If a tool is able to parse it by looking at the data and checking for chars that are out of the ascii85 space, it can still do so. A format change that breaks the line-oriented output on devcoredump however needs better coordination with existing tools.
Cc: John Harrison John.C.Harrison@Intel.com Cc: Julia Filipchuk julia.filipchuk@intel.com Cc: José Roberto de Souza jose.souza@intel.com Cc: stable@vger.kernel.org Fixes: 70fb86a85dc9 ("drm/xe: Revert some changes that break a mesa debug tool") Fixes: ec1455ce7e35 ("drm/xe/devcoredump: Add ASCII85 dump helper function") Signed-off-by: Lucas De Marchi lucas.demarchi@intel.com
drivers/gpu/drm/xe/xe_devcoredump.c | 30 +++++++++-------------------- drivers/gpu/drm/xe/xe_devcoredump.h | 2 +- drivers/gpu/drm/xe/xe_guc_ct.c | 3 ++- drivers/gpu/drm/xe/xe_guc_log.c | 4 +++- 4 files changed, 15 insertions(+), 24 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index a7946a76777e7..d9b71bb690860 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -391,42 +391,30 @@ int xe_devcoredump_init(struct xe_device *xe) /**
- xe_print_blob_ascii85 - print a BLOB to some useful location in ASCII85
- The output is split to multiple lines because some print targets, e.g. dmesg
- cannot handle arbitrarily long lines. Note also that printing to dmesg in
- piece-meal fashion is not possible, each separate call to drm_puts() has a
- line-feed automatically added! Therefore, the entire output line must be
- constructed in a local buffer first, then printed in one atomic output call.
- The output is split to multiple print calls because some print targets, e.g.
- dmesg cannot handle arbitrarily long lines. These targets may add newline
- between calls.
Newlines between calls does not help.
- There is also a scheduler yield call to prevent the 'task has been stuck for
- 120s' kernel hang check feature from firing when printing to a slow target
- such as dmesg over a serial port.
- TODO: Add compression prior to the ASCII85 encoding to shrink huge buffers down.
*/
- @p: the printer object to output to
- @prefix: optional prefix to add to output string
- @blob: the Binary Large OBject to dump out
- @offset: offset in bytes to skip from the front of the BLOB, must be a multiple of sizeof(u32)
- @size: the size in bytes of the BLOB, must be a multiple of sizeof(u32)
-void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, +void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, char suffix, const void *blob, size_t offset, size_t size) { const u32 *blob32 = (const u32 *)blob; char buff[ASCII85_BUFSZ], *line_buff; size_t line_pos = 0;
- /*
* Splitting blobs across multiple lines is not compatible with the mesa
* debug decoder tool. Note that even dropping the explicit '\n' below
* doesn't help because the GuC log is so big some underlying implementation
* still splits the lines at 512K characters. So just bail completely for
* the moment.
*/
- return;
- #define DMESG_MAX_LINE_LEN 800
-#define MIN_SPACE (ASCII85_BUFSZ + 2) /* 85 + "\n\0" */
- /* Always leave space for the suffix char and the \0 */
+#define MIN_SPACE (ASCII85_BUFSZ + 2) /* 85 + "<suffix>\0" */ if (size & 3) drm_printf(p, "Size not word aligned: %zu", size); @@ -458,7 +446,6 @@ void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, line_pos += strlen(line_buff + line_pos); if ((line_pos + MIN_SPACE) >= DMESG_MAX_LINE_LEN) {
line_buff[line_pos++] = '\n';
If you remove this then dmesg output is broken. It has to be wrapped at less than the dmesg buffer size. Otherwise the line is truncated and data is lost.
John.
line_buff[line_pos++] = 0;
drm_puts(p, line_buff); @@ -470,10 +457,11 @@ void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, } }
- if (suffix)
line_buff[line_pos++] = suffix;
- if (line_pos) {
line_buff[line_pos++] = 0;line_buff[line_pos++] = '\n';
- drm_puts(p, line_buff); }
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.h b/drivers/gpu/drm/xe/xe_devcoredump.h index 6a17e6d601022..5391a80a4d1ba 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.h +++ b/drivers/gpu/drm/xe/xe_devcoredump.h @@ -29,7 +29,7 @@ static inline int xe_devcoredump_init(struct xe_device *xe) } #endif -void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, +void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, char suffix, const void *blob, size_t offset, size_t size); #endif diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 8b65c5e959cc2..50c8076b51585 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -1724,7 +1724,8 @@ void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot, snapshot->g2h_outstanding); if (snapshot->ctb)
xe_print_blob_ascii85(p, "CTB data", snapshot->ctb, 0, snapshot->ctb_size);
xe_print_blob_ascii85(p, "CTB data", '\n',
} else { drm_puts(p, "CT disabled\n"); }snapshot->ctb, 0, snapshot->ctb_size);
diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c index 80151ff6a71f8..44482ea919924 100644 --- a/drivers/gpu/drm/xe/xe_guc_log.c +++ b/drivers/gpu/drm/xe/xe_guc_log.c @@ -207,8 +207,10 @@ void xe_guc_log_snapshot_print(struct xe_guc_log_snapshot *snapshot, struct drm_ remain = snapshot->size; for (i = 0; i < snapshot->num_chunks; i++) { size_t size = min(GUC_LOG_CHUNK_SIZE, remain);
const char *prefix = i ? NULL : "Log data";
char suffix = i == snapshot->num_chunks - 1 ? '\n' : 0;
xe_print_blob_ascii85(p, i ? NULL : "Log data", snapshot->copy[i], 0, size);
remain -= size; } }xe_print_blob_ascii85(p, prefix, suffix, snapshot->copy[i], 0, size);
On Thu, Jan 23, 2025 at 10:25:13AM -0800, John Harrison wrote:
On 1/22/2025 21:11, Lucas De Marchi wrote:
Commit 70fb86a85dc9 ("drm/xe: Revert some changes that break a mesa debug tool") partially reverted some changes to workaround breakage caused to mesa tools. However, in doing so it also broke fetching the GuC log via debugfs since xe_print_blob_ascii85() simply bails out.
The fix is to avoid the extra newlines: the devcoredump interface is line-oriented and adding random newlines in the middle breaks it. If a tool is able to parse it by looking at the data and checking for chars that are out of the ascii85 space, it can still do so. A format change that breaks the line-oriented output on devcoredump however needs better coordination with existing tools.
Cc: John Harrison John.C.Harrison@Intel.com Cc: Julia Filipchuk julia.filipchuk@intel.com Cc: José Roberto de Souza jose.souza@intel.com Cc: stable@vger.kernel.org Fixes: 70fb86a85dc9 ("drm/xe: Revert some changes that break a mesa debug tool") Fixes: ec1455ce7e35 ("drm/xe/devcoredump: Add ASCII85 dump helper function") Signed-off-by: Lucas De Marchi lucas.demarchi@intel.com
drivers/gpu/drm/xe/xe_devcoredump.c | 30 +++++++++-------------------- drivers/gpu/drm/xe/xe_devcoredump.h | 2 +- drivers/gpu/drm/xe/xe_guc_ct.c | 3 ++- drivers/gpu/drm/xe/xe_guc_log.c | 4 +++- 4 files changed, 15 insertions(+), 24 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index a7946a76777e7..d9b71bb690860 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -391,42 +391,30 @@ int xe_devcoredump_init(struct xe_device *xe) /**
- xe_print_blob_ascii85 - print a BLOB to some useful location in ASCII85
- The output is split to multiple lines because some print targets, e.g. dmesg
- cannot handle arbitrarily long lines. Note also that printing to dmesg in
- piece-meal fashion is not possible, each separate call to drm_puts() has a
- line-feed automatically added! Therefore, the entire output line must be
- constructed in a local buffer first, then printed in one atomic output call.
- The output is split to multiple print calls because some print targets, e.g.
- dmesg cannot handle arbitrarily long lines. These targets may add newline
- between calls.
Newlines between calls does not help.
- There is also a scheduler yield call to prevent the 'task has been stuck for
- 120s' kernel hang check feature from firing when printing to a slow target
- such as dmesg over a serial port.
- TODO: Add compression prior to the ASCII85 encoding to shrink huge buffers down.
*/
- @p: the printer object to output to
- @prefix: optional prefix to add to output string
- @blob: the Binary Large OBject to dump out
- @offset: offset in bytes to skip from the front of the BLOB, must be a multiple of sizeof(u32)
- @size: the size in bytes of the BLOB, must be a multiple of sizeof(u32)
-void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, +void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, char suffix, const void *blob, size_t offset, size_t size) { const u32 *blob32 = (const u32 *)blob; char buff[ASCII85_BUFSZ], *line_buff; size_t line_pos = 0;
- /*
* Splitting blobs across multiple lines is not compatible with the mesa
* debug decoder tool. Note that even dropping the explicit '\n' below
* doesn't help because the GuC log is so big some underlying implementation
* still splits the lines at 512K characters. So just bail completely for
* the moment.
*/
- return;
#define DMESG_MAX_LINE_LEN 800 -#define MIN_SPACE (ASCII85_BUFSZ + 2) /* 85 + "\n\0" */
- /* Always leave space for the suffix char and the \0 */
+#define MIN_SPACE (ASCII85_BUFSZ + 2) /* 85 + "<suffix>\0" */ if (size & 3) drm_printf(p, "Size not word aligned: %zu", size); @@ -458,7 +446,6 @@ void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, line_pos += strlen(line_buff + line_pos); if ((line_pos + MIN_SPACE) >= DMESG_MAX_LINE_LEN) {
line_buff[line_pos++] = '\n';
If you remove this then dmesg output is broken. It has to be wrapped at less than the dmesg buffer size. Otherwise the line is truncated and data is lost.
we broke everything because of the dump to dmesg. Let's restore things in a way that works with guc_log via debugfs and devcoredump rather than block it on dmesg.
Lucas De Marchi
John.
line_buff[line_pos++] = 0; drm_puts(p, line_buff);
@@ -470,10 +457,11 @@ void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, } }
- if (suffix)
line_buff[line_pos++] = suffix;
- if (line_pos) {
line_buff[line_pos++] = 0;line_buff[line_pos++] = '\n';
- drm_puts(p, line_buff); }
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.h b/drivers/gpu/drm/xe/xe_devcoredump.h index 6a17e6d601022..5391a80a4d1ba 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.h +++ b/drivers/gpu/drm/xe/xe_devcoredump.h @@ -29,7 +29,7 @@ static inline int xe_devcoredump_init(struct xe_device *xe) } #endif -void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, +void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, char suffix, const void *blob, size_t offset, size_t size); #endif diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 8b65c5e959cc2..50c8076b51585 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -1724,7 +1724,8 @@ void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot, snapshot->g2h_outstanding); if (snapshot->ctb)
xe_print_blob_ascii85(p, "CTB data", snapshot->ctb, 0, snapshot->ctb_size);
xe_print_blob_ascii85(p, "CTB data", '\n',
} else { drm_puts(p, "CT disabled\n"); }snapshot->ctb, 0, snapshot->ctb_size);
diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c index 80151ff6a71f8..44482ea919924 100644 --- a/drivers/gpu/drm/xe/xe_guc_log.c +++ b/drivers/gpu/drm/xe/xe_guc_log.c @@ -207,8 +207,10 @@ void xe_guc_log_snapshot_print(struct xe_guc_log_snapshot *snapshot, struct drm_ remain = snapshot->size; for (i = 0; i < snapshot->num_chunks; i++) { size_t size = min(GUC_LOG_CHUNK_SIZE, remain);
const char *prefix = i ? NULL : "Log data";
char suffix = i == snapshot->num_chunks - 1 ? '\n' : 0;
xe_print_blob_ascii85(p, i ? NULL : "Log data", snapshot->copy[i], 0, size);
remain -= size; }xe_print_blob_ascii85(p, prefix, suffix, snapshot->copy[i], 0, size);
}
On 1/23/2025 10:36, Lucas De Marchi wrote:
On Thu, Jan 23, 2025 at 10:25:13AM -0800, John Harrison wrote:
On 1/22/2025 21:11, Lucas De Marchi wrote:
Commit 70fb86a85dc9 ("drm/xe: Revert some changes that break a mesa debug tool") partially reverted some changes to workaround breakage caused to mesa tools. However, in doing so it also broke fetching the GuC log via debugfs since xe_print_blob_ascii85() simply bails out.
The fix is to avoid the extra newlines: the devcoredump interface is line-oriented and adding random newlines in the middle breaks it. If a tool is able to parse it by looking at the data and checking for chars that are out of the ascii85 space, it can still do so. A format change that breaks the line-oriented output on devcoredump however needs better coordination with existing tools.
Cc: John Harrison John.C.Harrison@Intel.com Cc: Julia Filipchuk julia.filipchuk@intel.com Cc: José Roberto de Souza jose.souza@intel.com Cc: stable@vger.kernel.org Fixes: 70fb86a85dc9 ("drm/xe: Revert some changes that break a mesa debug tool") Fixes: ec1455ce7e35 ("drm/xe/devcoredump: Add ASCII85 dump helper function") Signed-off-by: Lucas De Marchi lucas.demarchi@intel.com
drivers/gpu/drm/xe/xe_devcoredump.c | 30 +++++++++-------------------- drivers/gpu/drm/xe/xe_devcoredump.h | 2 +- drivers/gpu/drm/xe/xe_guc_ct.c | 3 ++- drivers/gpu/drm/xe/xe_guc_log.c | 4 +++- 4 files changed, 15 insertions(+), 24 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index a7946a76777e7..d9b71bb690860 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -391,42 +391,30 @@ int xe_devcoredump_init(struct xe_device *xe) /** * xe_print_blob_ascii85 - print a BLOB to some useful location in ASCII85 *
- The output is split to multiple lines because some print
targets, e.g. dmesg
- cannot handle arbitrarily long lines. Note also that printing to
dmesg in
- piece-meal fashion is not possible, each separate call to
drm_puts() has a
- line-feed automatically added! Therefore, the entire output line
must be
- constructed in a local buffer first, then printed in one atomic
output call.
- The output is split to multiple print calls because some print
targets, e.g.
- dmesg cannot handle arbitrarily long lines. These targets may
add newline
- between calls.
Newlines between calls does not help.
* * There is also a scheduler yield call to prevent the 'task has been stuck for * 120s' kernel hang check feature from firing when printing to a slow target * such as dmesg over a serial port. *
- TODO: Add compression prior to the ASCII85 encoding to shrink
huge buffers down.
* @p: the printer object to output to * @prefix: optional prefix to add to output string * @blob: the Binary Large OBject to dump out * @offset: offset in bytes to skip from the front of the BLOB, must be a multiple of sizeof(u32) * @size: the size in bytes of the BLOB, must be a multiple of sizeof(u32) */ -void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, +void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, char suffix, const void *blob, size_t offset, size_t size) { const u32 *blob32 = (const u32 *)blob; char buff[ASCII85_BUFSZ], *line_buff; size_t line_pos = 0; - /* - * Splitting blobs across multiple lines is not compatible with the mesa - * debug decoder tool. Note that even dropping the explicit '\n' below - * doesn't help because the GuC log is so big some underlying implementation - * still splits the lines at 512K characters. So just bail completely for - * the moment. - */ - return;
#define DMESG_MAX_LINE_LEN 800 -#define MIN_SPACE (ASCII85_BUFSZ + 2) /* 85 + "\n\0" */ + /* Always leave space for the suffix char and the \0 */ +#define MIN_SPACE (ASCII85_BUFSZ + 2) /* 85 + "<suffix>\0" */ if (size & 3) drm_printf(p, "Size not word aligned: %zu", size); @@ -458,7 +446,6 @@ void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, line_pos += strlen(line_buff + line_pos); if ((line_pos + MIN_SPACE) >= DMESG_MAX_LINE_LEN) { - line_buff[line_pos++] = '\n';
If you remove this then dmesg output is broken. It has to be wrapped at less than the dmesg buffer size. Otherwise the line is truncated and data is lost.
we broke everything because of the dump to dmesg. Let's restore things in a way that works with guc_log via debugfs and devcoredump rather than block it on dmesg.
I'm not saying that you need to leave the line feed in and can't merge anything until we make it all work.
But removing it completely is not helpful. It should be left commented out with a comment as to what it is needed for. Then when someone wants to do a dmesg dump, they just have to follow the instructions and re-instate that line.
And the comment above is inaccurate. It at least implying that adding a newline after each call fixes dmesg output. That is not the case. We should definitely not be adding comments that are misleading.
John.
Lucas De Marchi
John.
line_buff[line_pos++] = 0; drm_puts(p, line_buff); @@ -470,10 +457,11 @@ void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, } } + if (suffix) + line_buff[line_pos++] = suffix;
if (line_pos) { - line_buff[line_pos++] = '\n'; line_buff[line_pos++] = 0;
drm_puts(p, line_buff); } diff --git a/drivers/gpu/drm/xe/xe_devcoredump.h b/drivers/gpu/drm/xe/xe_devcoredump.h index 6a17e6d601022..5391a80a4d1ba 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.h +++ b/drivers/gpu/drm/xe/xe_devcoredump.h @@ -29,7 +29,7 @@ static inline int xe_devcoredump_init(struct xe_device *xe) } #endif -void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, +void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, char suffix, const void *blob, size_t offset, size_t size); #endif diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 8b65c5e959cc2..50c8076b51585 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -1724,7 +1724,8 @@ void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot, snapshot->g2h_outstanding); if (snapshot->ctb) - xe_print_blob_ascii85(p, "CTB data", snapshot->ctb, 0, snapshot->ctb_size); + xe_print_blob_ascii85(p, "CTB data", '\n', + snapshot->ctb, 0, snapshot->ctb_size); } else { drm_puts(p, "CT disabled\n"); } diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c index 80151ff6a71f8..44482ea919924 100644 --- a/drivers/gpu/drm/xe/xe_guc_log.c +++ b/drivers/gpu/drm/xe/xe_guc_log.c @@ -207,8 +207,10 @@ void xe_guc_log_snapshot_print(struct xe_guc_log_snapshot *snapshot, struct drm_ remain = snapshot->size; for (i = 0; i < snapshot->num_chunks; i++) { size_t size = min(GUC_LOG_CHUNK_SIZE, remain); + const char *prefix = i ? NULL : "Log data"; + char suffix = i == snapshot->num_chunks - 1 ? '\n' : 0; - xe_print_blob_ascii85(p, i ? NULL : "Log data", snapshot->copy[i], 0, size); + xe_print_blob_ascii85(p, prefix, suffix, snapshot->copy[i], 0, size); remain -= size; } }
linux-stable-mirror@lists.linaro.org