1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. */
3
4 #include <linux/ascii85.h>
5 #include "msm_gem.h"
6 #include "a6xx_gpu.h"
7 #include "a6xx_gmu.h"
8 #include "a6xx_gpu_state.h"
9 #include "a6xx_gmu.xml.h"
10
11 struct a6xx_gpu_state_obj {
12 const void *handle;
13 u32 *data;
14 };
15
16 struct a6xx_gpu_state {
17 struct msm_gpu_state base;
18
19 struct a6xx_gpu_state_obj *gmu_registers;
20 int nr_gmu_registers;
21
22 struct a6xx_gpu_state_obj *registers;
23 int nr_registers;
24
25 struct a6xx_gpu_state_obj *shaders;
26 int nr_shaders;
27
28 struct a6xx_gpu_state_obj *clusters;
29 int nr_clusters;
30
31 struct a6xx_gpu_state_obj *dbgahb_clusters;
32 int nr_dbgahb_clusters;
33
34 struct a6xx_gpu_state_obj *indexed_regs;
35 int nr_indexed_regs;
36
37 struct a6xx_gpu_state_obj *debugbus;
38 int nr_debugbus;
39
40 struct a6xx_gpu_state_obj *vbif_debugbus;
41
42 struct a6xx_gpu_state_obj *cx_debugbus;
43 int nr_cx_debugbus;
44
45 struct msm_gpu_state_bo *gmu_log;
46 struct msm_gpu_state_bo *gmu_hfi;
47 struct msm_gpu_state_bo *gmu_debug;
48
49 s32 hfi_queue_history[2][HFI_HISTORY_SZ];
50
51 struct list_head objs;
52
53 bool gpu_initialized;
54 };
55
CRASHDUMP_WRITE(u64 * in,u32 reg,u32 val)56 static inline int CRASHDUMP_WRITE(u64 *in, u32 reg, u32 val)
57 {
58 in[0] = val;
59 in[1] = (((u64) reg) << 44 | (1 << 21) | 1);
60
61 return 2;
62 }
63
CRASHDUMP_READ(u64 * in,u32 reg,u32 dwords,u64 target)64 static inline int CRASHDUMP_READ(u64 *in, u32 reg, u32 dwords, u64 target)
65 {
66 in[0] = target;
67 in[1] = (((u64) reg) << 44 | dwords);
68
69 return 2;
70 }
71
CRASHDUMP_FINI(u64 * in)72 static inline int CRASHDUMP_FINI(u64 *in)
73 {
74 in[0] = 0;
75 in[1] = 0;
76
77 return 2;
78 }
79
80 struct a6xx_crashdumper {
81 void *ptr;
82 struct drm_gem_object *bo;
83 u64 iova;
84 };
85
86 struct a6xx_state_memobj {
87 struct list_head node;
88 unsigned long long data[];
89 };
90
state_kcalloc(struct a6xx_gpu_state * a6xx_state,int nr,size_t objsize)91 static void *state_kcalloc(struct a6xx_gpu_state *a6xx_state, int nr, size_t objsize)
92 {
93 struct a6xx_state_memobj *obj =
94 kvzalloc((nr * objsize) + sizeof(*obj), GFP_KERNEL);
95
96 if (!obj)
97 return NULL;
98
99 list_add_tail(&obj->node, &a6xx_state->objs);
100 return &obj->data;
101 }
102
state_kmemdup(struct a6xx_gpu_state * a6xx_state,void * src,size_t size)103 static void *state_kmemdup(struct a6xx_gpu_state *a6xx_state, void *src,
104 size_t size)
105 {
106 void *dst = state_kcalloc(a6xx_state, 1, size);
107
108 if (dst)
109 memcpy(dst, src, size);
110 return dst;
111 }
112
113 /*
114 * Allocate 1MB for the crashdumper scratch region - 8k for the script and
115 * the rest for the data
116 */
117 #define A6XX_CD_DATA_OFFSET 8192
118 #define A6XX_CD_DATA_SIZE (SZ_1M - 8192)
119
a6xx_crashdumper_init(struct msm_gpu * gpu,struct a6xx_crashdumper * dumper)120 static int a6xx_crashdumper_init(struct msm_gpu *gpu,
121 struct a6xx_crashdumper *dumper)
122 {
123 dumper->ptr = msm_gem_kernel_new(gpu->dev,
124 SZ_1M, MSM_BO_WC, gpu->aspace,
125 &dumper->bo, &dumper->iova);
126
127 if (!IS_ERR(dumper->ptr))
128 msm_gem_object_set_name(dumper->bo, "crashdump");
129
130 return PTR_ERR_OR_ZERO(dumper->ptr);
131 }
132
a6xx_crashdumper_run(struct msm_gpu * gpu,struct a6xx_crashdumper * dumper)133 static int a6xx_crashdumper_run(struct msm_gpu *gpu,
134 struct a6xx_crashdumper *dumper)
135 {
136 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
137 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
138 u32 val;
139 int ret;
140
141 if (IS_ERR_OR_NULL(dumper->ptr))
142 return -EINVAL;
143
144 if (!a6xx_gmu_sptprac_is_on(&a6xx_gpu->gmu))
145 return -EINVAL;
146
147 /* Make sure all pending memory writes are posted */
148 wmb();
149
150 gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE_LO,
151 REG_A6XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
152
153 gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 1);
154
155 ret = gpu_poll_timeout(gpu, REG_A6XX_CP_CRASH_DUMP_STATUS, val,
156 val & 0x02, 100, 10000);
157
158 gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 0);
159
160 return ret;
161 }
162
163 /* read a value from the GX debug bus */
debugbus_read(struct msm_gpu * gpu,u32 block,u32 offset,u32 * data)164 static int debugbus_read(struct msm_gpu *gpu, u32 block, u32 offset,
165 u32 *data)
166 {
167 u32 reg = A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(offset) |
168 A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(block);
169
170 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_A, reg);
171 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_B, reg);
172 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_C, reg);
173 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_D, reg);
174
175 /* Wait 1 us to make sure the data is flowing */
176 udelay(1);
177
178 data[0] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2);
179 data[1] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1);
180
181 return 2;
182 }
183
184 #define cxdbg_write(ptr, offset, val) \
185 msm_writel((val), (ptr) + ((offset) << 2))
186
187 #define cxdbg_read(ptr, offset) \
188 msm_readl((ptr) + ((offset) << 2))
189
190 /* read a value from the CX debug bus */
cx_debugbus_read(void __iomem * cxdbg,u32 block,u32 offset,u32 * data)191 static int cx_debugbus_read(void __iomem *cxdbg, u32 block, u32 offset,
192 u32 *data)
193 {
194 u32 reg = A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(offset) |
195 A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block);
196
197 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_A, reg);
198 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_B, reg);
199 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_C, reg);
200 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_D, reg);
201
202 /* Wait 1 us to make sure the data is flowing */
203 udelay(1);
204
205 data[0] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2);
206 data[1] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1);
207
208 return 2;
209 }
210
211 /* Read a chunk of data from the VBIF debug bus */
vbif_debugbus_read(struct msm_gpu * gpu,u32 ctrl0,u32 ctrl1,u32 reg,int count,u32 * data)212 static int vbif_debugbus_read(struct msm_gpu *gpu, u32 ctrl0, u32 ctrl1,
213 u32 reg, int count, u32 *data)
214 {
215 int i;
216
217 gpu_write(gpu, ctrl0, reg);
218
219 for (i = 0; i < count; i++) {
220 gpu_write(gpu, ctrl1, i);
221 data[i] = gpu_read(gpu, REG_A6XX_VBIF_TEST_BUS_OUT);
222 }
223
224 return count;
225 }
226
227 #define AXI_ARB_BLOCKS 2
228 #define XIN_AXI_BLOCKS 5
229 #define XIN_CORE_BLOCKS 4
230
231 #define VBIF_DEBUGBUS_BLOCK_SIZE \
232 ((16 * AXI_ARB_BLOCKS) + \
233 (18 * XIN_AXI_BLOCKS) + \
234 (12 * XIN_CORE_BLOCKS))
235
a6xx_get_vbif_debugbus_block(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,struct a6xx_gpu_state_obj * obj)236 static void a6xx_get_vbif_debugbus_block(struct msm_gpu *gpu,
237 struct a6xx_gpu_state *a6xx_state,
238 struct a6xx_gpu_state_obj *obj)
239 {
240 u32 clk, *ptr;
241 int i;
242
243 obj->data = state_kcalloc(a6xx_state, VBIF_DEBUGBUS_BLOCK_SIZE,
244 sizeof(u32));
245 if (!obj->data)
246 return;
247
248 obj->handle = NULL;
249
250 /* Get the current clock setting */
251 clk = gpu_read(gpu, REG_A6XX_VBIF_CLKON);
252
253 /* Force on the bus so we can read it */
254 gpu_write(gpu, REG_A6XX_VBIF_CLKON,
255 clk | A6XX_VBIF_CLKON_FORCE_ON_TESTBUS);
256
257 /* We will read from BUS2 first, so disable BUS1 */
258 gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS1_CTRL0, 0);
259
260 /* Enable the VBIF bus for reading */
261 gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS_OUT_CTRL, 1);
262
263 ptr = obj->data;
264
265 for (i = 0; i < AXI_ARB_BLOCKS; i++)
266 ptr += vbif_debugbus_read(gpu,
267 REG_A6XX_VBIF_TEST_BUS2_CTRL0,
268 REG_A6XX_VBIF_TEST_BUS2_CTRL1,
269 1 << (i + 16), 16, ptr);
270
271 for (i = 0; i < XIN_AXI_BLOCKS; i++)
272 ptr += vbif_debugbus_read(gpu,
273 REG_A6XX_VBIF_TEST_BUS2_CTRL0,
274 REG_A6XX_VBIF_TEST_BUS2_CTRL1,
275 1 << i, 18, ptr);
276
277 /* Stop BUS2 so we can turn on BUS1 */
278 gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS2_CTRL0, 0);
279
280 for (i = 0; i < XIN_CORE_BLOCKS; i++)
281 ptr += vbif_debugbus_read(gpu,
282 REG_A6XX_VBIF_TEST_BUS1_CTRL0,
283 REG_A6XX_VBIF_TEST_BUS1_CTRL1,
284 1 << i, 12, ptr);
285
286 /* Restore the VBIF clock setting */
287 gpu_write(gpu, REG_A6XX_VBIF_CLKON, clk);
288 }
289
a6xx_get_debugbus_block(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,const struct a6xx_debugbus_block * block,struct a6xx_gpu_state_obj * obj)290 static void a6xx_get_debugbus_block(struct msm_gpu *gpu,
291 struct a6xx_gpu_state *a6xx_state,
292 const struct a6xx_debugbus_block *block,
293 struct a6xx_gpu_state_obj *obj)
294 {
295 int i;
296 u32 *ptr;
297
298 obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64));
299 if (!obj->data)
300 return;
301
302 obj->handle = block;
303
304 for (ptr = obj->data, i = 0; i < block->count; i++)
305 ptr += debugbus_read(gpu, block->id, i, ptr);
306 }
307
a6xx_get_cx_debugbus_block(void __iomem * cxdbg,struct a6xx_gpu_state * a6xx_state,const struct a6xx_debugbus_block * block,struct a6xx_gpu_state_obj * obj)308 static void a6xx_get_cx_debugbus_block(void __iomem *cxdbg,
309 struct a6xx_gpu_state *a6xx_state,
310 const struct a6xx_debugbus_block *block,
311 struct a6xx_gpu_state_obj *obj)
312 {
313 int i;
314 u32 *ptr;
315
316 obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64));
317 if (!obj->data)
318 return;
319
320 obj->handle = block;
321
322 for (ptr = obj->data, i = 0; i < block->count; i++)
323 ptr += cx_debugbus_read(cxdbg, block->id, i, ptr);
324 }
325
a6xx_get_debugbus(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state)326 static void a6xx_get_debugbus(struct msm_gpu *gpu,
327 struct a6xx_gpu_state *a6xx_state)
328 {
329 struct resource *res;
330 void __iomem *cxdbg = NULL;
331 int nr_debugbus_blocks;
332
333 /* Set up the GX debug bus */
334
335 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLT,
336 A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
337
338 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLM,
339 A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
340
341 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0, 0);
342 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1, 0);
343 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2, 0);
344 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3, 0);
345
346 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0, 0x76543210);
347 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1, 0xFEDCBA98);
348
349 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0, 0);
350 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1, 0);
351 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2, 0);
352 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0);
353
354 /* Set up the CX debug bus - it lives elsewhere in the system so do a
355 * temporary ioremap for the registers
356 */
357 res = platform_get_resource_byname(gpu->pdev, IORESOURCE_MEM,
358 "cx_dbgc");
359
360 if (res)
361 cxdbg = ioremap(res->start, resource_size(res));
362
363 if (cxdbg) {
364 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLT,
365 A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
366
367 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLM,
368 A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
369
370 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0, 0);
371 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1, 0);
372 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2, 0);
373 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3, 0);
374
375 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0,
376 0x76543210);
377 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1,
378 0xFEDCBA98);
379
380 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0, 0);
381 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1, 0);
382 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2, 0);
383 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3, 0);
384 }
385
386 nr_debugbus_blocks = ARRAY_SIZE(a6xx_debugbus_blocks) +
387 (a6xx_has_gbif(to_adreno_gpu(gpu)) ? 1 : 0);
388
389 a6xx_state->debugbus = state_kcalloc(a6xx_state, nr_debugbus_blocks,
390 sizeof(*a6xx_state->debugbus));
391
392 if (a6xx_state->debugbus) {
393 int i;
394
395 for (i = 0; i < ARRAY_SIZE(a6xx_debugbus_blocks); i++)
396 a6xx_get_debugbus_block(gpu,
397 a6xx_state,
398 &a6xx_debugbus_blocks[i],
399 &a6xx_state->debugbus[i]);
400
401 a6xx_state->nr_debugbus = ARRAY_SIZE(a6xx_debugbus_blocks);
402
403 /*
404 * GBIF has same debugbus as of other GPU blocks, fall back to
405 * default path if GPU uses GBIF, also GBIF uses exactly same
406 * ID as of VBIF.
407 */
408 if (a6xx_has_gbif(to_adreno_gpu(gpu))) {
409 a6xx_get_debugbus_block(gpu, a6xx_state,
410 &a6xx_gbif_debugbus_block,
411 &a6xx_state->debugbus[i]);
412
413 a6xx_state->nr_debugbus += 1;
414 }
415 }
416
417 /* Dump the VBIF debugbus on applicable targets */
418 if (!a6xx_has_gbif(to_adreno_gpu(gpu))) {
419 a6xx_state->vbif_debugbus =
420 state_kcalloc(a6xx_state, 1,
421 sizeof(*a6xx_state->vbif_debugbus));
422
423 if (a6xx_state->vbif_debugbus)
424 a6xx_get_vbif_debugbus_block(gpu, a6xx_state,
425 a6xx_state->vbif_debugbus);
426 }
427
428 if (cxdbg) {
429 a6xx_state->cx_debugbus =
430 state_kcalloc(a6xx_state,
431 ARRAY_SIZE(a6xx_cx_debugbus_blocks),
432 sizeof(*a6xx_state->cx_debugbus));
433
434 if (a6xx_state->cx_debugbus) {
435 int i;
436
437 for (i = 0; i < ARRAY_SIZE(a6xx_cx_debugbus_blocks); i++)
438 a6xx_get_cx_debugbus_block(cxdbg,
439 a6xx_state,
440 &a6xx_cx_debugbus_blocks[i],
441 &a6xx_state->cx_debugbus[i]);
442
443 a6xx_state->nr_cx_debugbus =
444 ARRAY_SIZE(a6xx_cx_debugbus_blocks);
445 }
446
447 iounmap(cxdbg);
448 }
449 }
450
451 #define RANGE(reg, a) ((reg)[(a) + 1] - (reg)[(a)] + 1)
452
453 /* Read a data cluster from behind the AHB aperture */
a6xx_get_dbgahb_cluster(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,const struct a6xx_dbgahb_cluster * dbgahb,struct a6xx_gpu_state_obj * obj,struct a6xx_crashdumper * dumper)454 static void a6xx_get_dbgahb_cluster(struct msm_gpu *gpu,
455 struct a6xx_gpu_state *a6xx_state,
456 const struct a6xx_dbgahb_cluster *dbgahb,
457 struct a6xx_gpu_state_obj *obj,
458 struct a6xx_crashdumper *dumper)
459 {
460 u64 *in = dumper->ptr;
461 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
462 size_t datasize;
463 int i, regcount = 0;
464
465 for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
466 int j;
467
468 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
469 (dbgahb->statetype + i * 2) << 8);
470
471 for (j = 0; j < dbgahb->count; j += 2) {
472 int count = RANGE(dbgahb->registers, j);
473 u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
474 dbgahb->registers[j] - (dbgahb->base >> 2);
475
476 in += CRASHDUMP_READ(in, offset, count, out);
477
478 out += count * sizeof(u32);
479
480 if (i == 0)
481 regcount += count;
482 }
483 }
484
485 CRASHDUMP_FINI(in);
486
487 datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
488
489 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
490 return;
491
492 if (a6xx_crashdumper_run(gpu, dumper))
493 return;
494
495 obj->handle = dbgahb;
496 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
497 datasize);
498 }
499
a6xx_get_dbgahb_clusters(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,struct a6xx_crashdumper * dumper)500 static void a6xx_get_dbgahb_clusters(struct msm_gpu *gpu,
501 struct a6xx_gpu_state *a6xx_state,
502 struct a6xx_crashdumper *dumper)
503 {
504 int i;
505
506 a6xx_state->dbgahb_clusters = state_kcalloc(a6xx_state,
507 ARRAY_SIZE(a6xx_dbgahb_clusters),
508 sizeof(*a6xx_state->dbgahb_clusters));
509
510 if (!a6xx_state->dbgahb_clusters)
511 return;
512
513 a6xx_state->nr_dbgahb_clusters = ARRAY_SIZE(a6xx_dbgahb_clusters);
514
515 for (i = 0; i < ARRAY_SIZE(a6xx_dbgahb_clusters); i++)
516 a6xx_get_dbgahb_cluster(gpu, a6xx_state,
517 &a6xx_dbgahb_clusters[i],
518 &a6xx_state->dbgahb_clusters[i], dumper);
519 }
520
521 /* Read a data cluster from the CP aperture with the crashdumper */
a6xx_get_cluster(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,const struct a6xx_cluster * cluster,struct a6xx_gpu_state_obj * obj,struct a6xx_crashdumper * dumper)522 static void a6xx_get_cluster(struct msm_gpu *gpu,
523 struct a6xx_gpu_state *a6xx_state,
524 const struct a6xx_cluster *cluster,
525 struct a6xx_gpu_state_obj *obj,
526 struct a6xx_crashdumper *dumper)
527 {
528 u64 *in = dumper->ptr;
529 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
530 size_t datasize;
531 int i, regcount = 0;
532
533 /* Some clusters need a selector register to be programmed too */
534 if (cluster->sel_reg)
535 in += CRASHDUMP_WRITE(in, cluster->sel_reg, cluster->sel_val);
536
537 for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
538 int j;
539
540 in += CRASHDUMP_WRITE(in, REG_A6XX_CP_APERTURE_CNTL_CD,
541 (cluster->id << 8) | (i << 4) | i);
542
543 for (j = 0; j < cluster->count; j += 2) {
544 int count = RANGE(cluster->registers, j);
545
546 in += CRASHDUMP_READ(in, cluster->registers[j],
547 count, out);
548
549 out += count * sizeof(u32);
550
551 if (i == 0)
552 regcount += count;
553 }
554 }
555
556 CRASHDUMP_FINI(in);
557
558 datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
559
560 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
561 return;
562
563 if (a6xx_crashdumper_run(gpu, dumper))
564 return;
565
566 obj->handle = cluster;
567 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
568 datasize);
569 }
570
a6xx_get_clusters(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,struct a6xx_crashdumper * dumper)571 static void a6xx_get_clusters(struct msm_gpu *gpu,
572 struct a6xx_gpu_state *a6xx_state,
573 struct a6xx_crashdumper *dumper)
574 {
575 int i;
576
577 a6xx_state->clusters = state_kcalloc(a6xx_state,
578 ARRAY_SIZE(a6xx_clusters), sizeof(*a6xx_state->clusters));
579
580 if (!a6xx_state->clusters)
581 return;
582
583 a6xx_state->nr_clusters = ARRAY_SIZE(a6xx_clusters);
584
585 for (i = 0; i < ARRAY_SIZE(a6xx_clusters); i++)
586 a6xx_get_cluster(gpu, a6xx_state, &a6xx_clusters[i],
587 &a6xx_state->clusters[i], dumper);
588 }
589
590 /* Read a shader / debug block from the HLSQ aperture with the crashdumper */
a6xx_get_shader_block(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,const struct a6xx_shader_block * block,struct a6xx_gpu_state_obj * obj,struct a6xx_crashdumper * dumper)591 static void a6xx_get_shader_block(struct msm_gpu *gpu,
592 struct a6xx_gpu_state *a6xx_state,
593 const struct a6xx_shader_block *block,
594 struct a6xx_gpu_state_obj *obj,
595 struct a6xx_crashdumper *dumper)
596 {
597 u64 *in = dumper->ptr;
598 size_t datasize = block->size * A6XX_NUM_SHADER_BANKS * sizeof(u32);
599 int i;
600
601 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
602 return;
603
604 for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
605 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
606 (block->type << 8) | i);
607
608 in += CRASHDUMP_READ(in, REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE,
609 block->size, dumper->iova + A6XX_CD_DATA_OFFSET);
610 }
611
612 CRASHDUMP_FINI(in);
613
614 if (a6xx_crashdumper_run(gpu, dumper))
615 return;
616
617 obj->handle = block;
618 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
619 datasize);
620 }
621
a6xx_get_shaders(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,struct a6xx_crashdumper * dumper)622 static void a6xx_get_shaders(struct msm_gpu *gpu,
623 struct a6xx_gpu_state *a6xx_state,
624 struct a6xx_crashdumper *dumper)
625 {
626 int i;
627
628 a6xx_state->shaders = state_kcalloc(a6xx_state,
629 ARRAY_SIZE(a6xx_shader_blocks), sizeof(*a6xx_state->shaders));
630
631 if (!a6xx_state->shaders)
632 return;
633
634 a6xx_state->nr_shaders = ARRAY_SIZE(a6xx_shader_blocks);
635
636 for (i = 0; i < ARRAY_SIZE(a6xx_shader_blocks); i++)
637 a6xx_get_shader_block(gpu, a6xx_state, &a6xx_shader_blocks[i],
638 &a6xx_state->shaders[i], dumper);
639 }
640
641 /* Read registers from behind the HLSQ aperture with the crashdumper */
a6xx_get_crashdumper_hlsq_registers(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,const struct a6xx_registers * regs,struct a6xx_gpu_state_obj * obj,struct a6xx_crashdumper * dumper)642 static void a6xx_get_crashdumper_hlsq_registers(struct msm_gpu *gpu,
643 struct a6xx_gpu_state *a6xx_state,
644 const struct a6xx_registers *regs,
645 struct a6xx_gpu_state_obj *obj,
646 struct a6xx_crashdumper *dumper)
647
648 {
649 u64 *in = dumper->ptr;
650 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
651 int i, regcount = 0;
652
653 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, regs->val1);
654
655 for (i = 0; i < regs->count; i += 2) {
656 u32 count = RANGE(regs->registers, i);
657 u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
658 regs->registers[i] - (regs->val0 >> 2);
659
660 in += CRASHDUMP_READ(in, offset, count, out);
661
662 out += count * sizeof(u32);
663 regcount += count;
664 }
665
666 CRASHDUMP_FINI(in);
667
668 if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
669 return;
670
671 if (a6xx_crashdumper_run(gpu, dumper))
672 return;
673
674 obj->handle = regs;
675 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
676 regcount * sizeof(u32));
677 }
678
679 /* Read a block of registers using the crashdumper */
a6xx_get_crashdumper_registers(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,const struct a6xx_registers * regs,struct a6xx_gpu_state_obj * obj,struct a6xx_crashdumper * dumper)680 static void a6xx_get_crashdumper_registers(struct msm_gpu *gpu,
681 struct a6xx_gpu_state *a6xx_state,
682 const struct a6xx_registers *regs,
683 struct a6xx_gpu_state_obj *obj,
684 struct a6xx_crashdumper *dumper)
685
686 {
687 u64 *in = dumper->ptr;
688 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
689 int i, regcount = 0;
690
691 /* Some blocks might need to program a selector register first */
692 if (regs->val0)
693 in += CRASHDUMP_WRITE(in, regs->val0, regs->val1);
694
695 for (i = 0; i < regs->count; i += 2) {
696 u32 count = RANGE(regs->registers, i);
697
698 in += CRASHDUMP_READ(in, regs->registers[i], count, out);
699
700 out += count * sizeof(u32);
701 regcount += count;
702 }
703
704 CRASHDUMP_FINI(in);
705
706 if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
707 return;
708
709 if (a6xx_crashdumper_run(gpu, dumper))
710 return;
711
712 obj->handle = regs;
713 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
714 regcount * sizeof(u32));
715 }
716
717 /* Read a block of registers via AHB */
a6xx_get_ahb_gpu_registers(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,const struct a6xx_registers * regs,struct a6xx_gpu_state_obj * obj)718 static void a6xx_get_ahb_gpu_registers(struct msm_gpu *gpu,
719 struct a6xx_gpu_state *a6xx_state,
720 const struct a6xx_registers *regs,
721 struct a6xx_gpu_state_obj *obj)
722 {
723 int i, regcount = 0, index = 0;
724
725 for (i = 0; i < regs->count; i += 2)
726 regcount += RANGE(regs->registers, i);
727
728 obj->handle = (const void *) regs;
729 obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
730 if (!obj->data)
731 return;
732
733 for (i = 0; i < regs->count; i += 2) {
734 u32 count = RANGE(regs->registers, i);
735 int j;
736
737 for (j = 0; j < count; j++)
738 obj->data[index++] = gpu_read(gpu,
739 regs->registers[i] + j);
740 }
741 }
742
743 /* Read a block of GMU registers */
_a6xx_get_gmu_registers(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,const struct a6xx_registers * regs,struct a6xx_gpu_state_obj * obj,bool rscc)744 static void _a6xx_get_gmu_registers(struct msm_gpu *gpu,
745 struct a6xx_gpu_state *a6xx_state,
746 const struct a6xx_registers *regs,
747 struct a6xx_gpu_state_obj *obj,
748 bool rscc)
749 {
750 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
751 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
752 struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
753 int i, regcount = 0, index = 0;
754
755 for (i = 0; i < regs->count; i += 2)
756 regcount += RANGE(regs->registers, i);
757
758 obj->handle = (const void *) regs;
759 obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
760 if (!obj->data)
761 return;
762
763 for (i = 0; i < regs->count; i += 2) {
764 u32 count = RANGE(regs->registers, i);
765 int j;
766
767 for (j = 0; j < count; j++) {
768 u32 offset = regs->registers[i] + j;
769 u32 val;
770
771 if (rscc)
772 val = gmu_read_rscc(gmu, offset);
773 else
774 val = gmu_read(gmu, offset);
775
776 obj->data[index++] = val;
777 }
778 }
779 }
780
a6xx_get_gmu_registers(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state)781 static void a6xx_get_gmu_registers(struct msm_gpu *gpu,
782 struct a6xx_gpu_state *a6xx_state)
783 {
784 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
785 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
786
787 a6xx_state->gmu_registers = state_kcalloc(a6xx_state,
788 3, sizeof(*a6xx_state->gmu_registers));
789
790 if (!a6xx_state->gmu_registers)
791 return;
792
793 a6xx_state->nr_gmu_registers = 3;
794
795 /* Get the CX GMU registers from AHB */
796 _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[0],
797 &a6xx_state->gmu_registers[0], false);
798 _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[1],
799 &a6xx_state->gmu_registers[1], true);
800
801 if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
802 return;
803
804 /* Set the fence to ALLOW mode so we can access the registers */
805 gpu_write(gpu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, 0);
806
807 _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[2],
808 &a6xx_state->gmu_registers[2], false);
809 }
810
a6xx_snapshot_gmu_bo(struct a6xx_gpu_state * a6xx_state,struct a6xx_gmu_bo * bo)811 static struct msm_gpu_state_bo *a6xx_snapshot_gmu_bo(
812 struct a6xx_gpu_state *a6xx_state, struct a6xx_gmu_bo *bo)
813 {
814 struct msm_gpu_state_bo *snapshot;
815
816 if (!bo->size)
817 return NULL;
818
819 snapshot = state_kcalloc(a6xx_state, 1, sizeof(*snapshot));
820 if (!snapshot)
821 return NULL;
822
823 snapshot->iova = bo->iova;
824 snapshot->size = bo->size;
825 snapshot->data = kvzalloc(snapshot->size, GFP_KERNEL);
826 if (!snapshot->data)
827 return NULL;
828
829 memcpy(snapshot->data, bo->virt, bo->size);
830
831 return snapshot;
832 }
833
a6xx_snapshot_gmu_hfi_history(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state)834 static void a6xx_snapshot_gmu_hfi_history(struct msm_gpu *gpu,
835 struct a6xx_gpu_state *a6xx_state)
836 {
837 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
838 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
839 struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
840 unsigned i, j;
841
842 BUILD_BUG_ON(ARRAY_SIZE(gmu->queues) != ARRAY_SIZE(a6xx_state->hfi_queue_history));
843
844 for (i = 0; i < ARRAY_SIZE(gmu->queues); i++) {
845 struct a6xx_hfi_queue *queue = &gmu->queues[i];
846 for (j = 0; j < HFI_HISTORY_SZ; j++) {
847 unsigned idx = (j + queue->history_idx) % HFI_HISTORY_SZ;
848 a6xx_state->hfi_queue_history[i][j] = queue->history[idx];
849 }
850 }
851 }
852
853 #define A6XX_GBIF_REGLIST_SIZE 1
a6xx_get_registers(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,struct a6xx_crashdumper * dumper)854 static void a6xx_get_registers(struct msm_gpu *gpu,
855 struct a6xx_gpu_state *a6xx_state,
856 struct a6xx_crashdumper *dumper)
857 {
858 int i, count = ARRAY_SIZE(a6xx_ahb_reglist) +
859 ARRAY_SIZE(a6xx_reglist) +
860 ARRAY_SIZE(a6xx_hlsq_reglist) + A6XX_GBIF_REGLIST_SIZE;
861 int index = 0;
862 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
863
864 a6xx_state->registers = state_kcalloc(a6xx_state,
865 count, sizeof(*a6xx_state->registers));
866
867 if (!a6xx_state->registers)
868 return;
869
870 a6xx_state->nr_registers = count;
871
872 for (i = 0; i < ARRAY_SIZE(a6xx_ahb_reglist); i++)
873 a6xx_get_ahb_gpu_registers(gpu,
874 a6xx_state, &a6xx_ahb_reglist[i],
875 &a6xx_state->registers[index++]);
876
877 if (a6xx_has_gbif(adreno_gpu))
878 a6xx_get_ahb_gpu_registers(gpu,
879 a6xx_state, &a6xx_gbif_reglist,
880 &a6xx_state->registers[index++]);
881 else
882 a6xx_get_ahb_gpu_registers(gpu,
883 a6xx_state, &a6xx_vbif_reglist,
884 &a6xx_state->registers[index++]);
885 if (!dumper) {
886 /*
887 * We can't use the crashdumper when the SMMU is stalled,
888 * because the GPU has no memory access until we resume
889 * translation (but we don't want to do that until after
890 * we have captured as much useful GPU state as possible).
891 * So instead collect registers via the CPU:
892 */
893 for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
894 a6xx_get_ahb_gpu_registers(gpu,
895 a6xx_state, &a6xx_reglist[i],
896 &a6xx_state->registers[index++]);
897 return;
898 }
899
900 for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
901 a6xx_get_crashdumper_registers(gpu,
902 a6xx_state, &a6xx_reglist[i],
903 &a6xx_state->registers[index++],
904 dumper);
905
906 for (i = 0; i < ARRAY_SIZE(a6xx_hlsq_reglist); i++)
907 a6xx_get_crashdumper_hlsq_registers(gpu,
908 a6xx_state, &a6xx_hlsq_reglist[i],
909 &a6xx_state->registers[index++],
910 dumper);
911 }
912
913 /* Read a block of data from an indexed register pair */
a6xx_get_indexed_regs(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state,const struct a6xx_indexed_registers * indexed,struct a6xx_gpu_state_obj * obj)914 static void a6xx_get_indexed_regs(struct msm_gpu *gpu,
915 struct a6xx_gpu_state *a6xx_state,
916 const struct a6xx_indexed_registers *indexed,
917 struct a6xx_gpu_state_obj *obj)
918 {
919 int i;
920
921 obj->handle = (const void *) indexed;
922 obj->data = state_kcalloc(a6xx_state, indexed->count, sizeof(u32));
923 if (!obj->data)
924 return;
925
926 /* All the indexed banks start at address 0 */
927 gpu_write(gpu, indexed->addr, 0);
928
929 /* Read the data - each read increments the internal address by 1 */
930 for (i = 0; i < indexed->count; i++)
931 obj->data[i] = gpu_read(gpu, indexed->data);
932 }
933
a6xx_get_indexed_registers(struct msm_gpu * gpu,struct a6xx_gpu_state * a6xx_state)934 static void a6xx_get_indexed_registers(struct msm_gpu *gpu,
935 struct a6xx_gpu_state *a6xx_state)
936 {
937 u32 mempool_size;
938 int count = ARRAY_SIZE(a6xx_indexed_reglist) + 1;
939 int i;
940
941 a6xx_state->indexed_regs = state_kcalloc(a6xx_state, count,
942 sizeof(*a6xx_state->indexed_regs));
943 if (!a6xx_state->indexed_regs)
944 return;
945
946 for (i = 0; i < ARRAY_SIZE(a6xx_indexed_reglist); i++)
947 a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_indexed_reglist[i],
948 &a6xx_state->indexed_regs[i]);
949
950 /* Set the CP mempool size to 0 to stabilize it while dumping */
951 mempool_size = gpu_read(gpu, REG_A6XX_CP_MEM_POOL_SIZE);
952 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 0);
953
954 /* Get the contents of the CP mempool */
955 a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_cp_mempool_indexed,
956 &a6xx_state->indexed_regs[i]);
957
958 /*
959 * Offset 0x2000 in the mempool is the size - copy the saved size over
960 * so the data is consistent
961 */
962 a6xx_state->indexed_regs[i].data[0x2000] = mempool_size;
963
964 /* Restore the size in the hardware */
965 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, mempool_size);
966
967 a6xx_state->nr_indexed_regs = count;
968 }
969
a6xx_gpu_state_get(struct msm_gpu * gpu)970 struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu)
971 {
972 struct a6xx_crashdumper _dumper = { 0 }, *dumper = NULL;
973 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
974 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
975 struct a6xx_gpu_state *a6xx_state = kzalloc(sizeof(*a6xx_state),
976 GFP_KERNEL);
977 bool stalled = !!(gpu_read(gpu, REG_A6XX_RBBM_STATUS3) &
978 A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT);
979
980 if (!a6xx_state)
981 return ERR_PTR(-ENOMEM);
982
983 INIT_LIST_HEAD(&a6xx_state->objs);
984
985 /* Get the generic state from the adreno core */
986 adreno_gpu_state_get(gpu, &a6xx_state->base);
987
988 a6xx_get_gmu_registers(gpu, a6xx_state);
989
990 a6xx_state->gmu_log = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.log);
991 a6xx_state->gmu_hfi = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.hfi);
992 a6xx_state->gmu_debug = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.debug);
993
994 a6xx_snapshot_gmu_hfi_history(gpu, a6xx_state);
995
996 /* If GX isn't on the rest of the data isn't going to be accessible */
997 if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
998 return &a6xx_state->base;
999
1000 /* Get the banks of indexed registers */
1001 a6xx_get_indexed_registers(gpu, a6xx_state);
1002
1003 /*
1004 * Try to initialize the crashdumper, if we are not dumping state
1005 * with the SMMU stalled. The crashdumper needs memory access to
1006 * write out GPU state, so we need to skip this when the SMMU is
1007 * stalled in response to an iova fault
1008 */
1009 if (!stalled && !gpu->needs_hw_init &&
1010 !a6xx_crashdumper_init(gpu, &_dumper)) {
1011 dumper = &_dumper;
1012 }
1013
1014 a6xx_get_registers(gpu, a6xx_state, dumper);
1015
1016 if (dumper) {
1017 a6xx_get_shaders(gpu, a6xx_state, dumper);
1018 a6xx_get_clusters(gpu, a6xx_state, dumper);
1019 a6xx_get_dbgahb_clusters(gpu, a6xx_state, dumper);
1020
1021 msm_gem_kernel_put(dumper->bo, gpu->aspace);
1022 }
1023
1024 if (snapshot_debugbus)
1025 a6xx_get_debugbus(gpu, a6xx_state);
1026
1027 a6xx_state->gpu_initialized = !gpu->needs_hw_init;
1028
1029 return &a6xx_state->base;
1030 }
1031
a6xx_gpu_state_destroy(struct kref * kref)1032 static void a6xx_gpu_state_destroy(struct kref *kref)
1033 {
1034 struct a6xx_state_memobj *obj, *tmp;
1035 struct msm_gpu_state *state = container_of(kref,
1036 struct msm_gpu_state, ref);
1037 struct a6xx_gpu_state *a6xx_state = container_of(state,
1038 struct a6xx_gpu_state, base);
1039
1040 if (a6xx_state->gmu_log)
1041 kvfree(a6xx_state->gmu_log->data);
1042
1043 if (a6xx_state->gmu_hfi)
1044 kvfree(a6xx_state->gmu_hfi->data);
1045
1046 if (a6xx_state->gmu_debug)
1047 kvfree(a6xx_state->gmu_debug->data);
1048
1049 list_for_each_entry_safe(obj, tmp, &a6xx_state->objs, node) {
1050 list_del(&obj->node);
1051 kvfree(obj);
1052 }
1053
1054 adreno_gpu_state_destroy(state);
1055 kfree(a6xx_state);
1056 }
1057
a6xx_gpu_state_put(struct msm_gpu_state * state)1058 int a6xx_gpu_state_put(struct msm_gpu_state *state)
1059 {
1060 if (IS_ERR_OR_NULL(state))
1061 return 1;
1062
1063 return kref_put(&state->ref, a6xx_gpu_state_destroy);
1064 }
1065
a6xx_show_registers(const u32 * registers,u32 * data,size_t count,struct drm_printer * p)1066 static void a6xx_show_registers(const u32 *registers, u32 *data, size_t count,
1067 struct drm_printer *p)
1068 {
1069 int i, index = 0;
1070
1071 if (!data)
1072 return;
1073
1074 for (i = 0; i < count; i += 2) {
1075 u32 count = RANGE(registers, i);
1076 u32 offset = registers[i];
1077 int j;
1078
1079 for (j = 0; j < count; index++, offset++, j++) {
1080 if (data[index] == 0xdeafbead)
1081 continue;
1082
1083 drm_printf(p, " - { offset: 0x%06x, value: 0x%08x }\n",
1084 offset << 2, data[index]);
1085 }
1086 }
1087 }
1088
print_ascii85(struct drm_printer * p,size_t len,u32 * data)1089 static void print_ascii85(struct drm_printer *p, size_t len, u32 *data)
1090 {
1091 char out[ASCII85_BUFSZ];
1092 long i, l, datalen = 0;
1093
1094 for (i = 0; i < len >> 2; i++) {
1095 if (data[i])
1096 datalen = (i + 1) << 2;
1097 }
1098
1099 if (datalen == 0)
1100 return;
1101
1102 drm_puts(p, " data: !!ascii85 |\n");
1103 drm_puts(p, " ");
1104
1105
1106 l = ascii85_encode_len(datalen);
1107
1108 for (i = 0; i < l; i++)
1109 drm_puts(p, ascii85_encode(data[i], out));
1110
1111 drm_puts(p, "\n");
1112 }
1113
print_name(struct drm_printer * p,const char * fmt,const char * name)1114 static void print_name(struct drm_printer *p, const char *fmt, const char *name)
1115 {
1116 drm_puts(p, fmt);
1117 drm_puts(p, name);
1118 drm_puts(p, "\n");
1119 }
1120
a6xx_show_shader(struct a6xx_gpu_state_obj * obj,struct drm_printer * p)1121 static void a6xx_show_shader(struct a6xx_gpu_state_obj *obj,
1122 struct drm_printer *p)
1123 {
1124 const struct a6xx_shader_block *block = obj->handle;
1125 int i;
1126
1127 if (!obj->handle)
1128 return;
1129
1130 print_name(p, " - type: ", block->name);
1131
1132 for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
1133 drm_printf(p, " - bank: %d\n", i);
1134 drm_printf(p, " size: %d\n", block->size);
1135
1136 if (!obj->data)
1137 continue;
1138
1139 print_ascii85(p, block->size << 2,
1140 obj->data + (block->size * i));
1141 }
1142 }
1143
a6xx_show_cluster_data(const u32 * registers,int size,u32 * data,struct drm_printer * p)1144 static void a6xx_show_cluster_data(const u32 *registers, int size, u32 *data,
1145 struct drm_printer *p)
1146 {
1147 int ctx, index = 0;
1148
1149 for (ctx = 0; ctx < A6XX_NUM_CONTEXTS; ctx++) {
1150 int j;
1151
1152 drm_printf(p, " - context: %d\n", ctx);
1153
1154 for (j = 0; j < size; j += 2) {
1155 u32 count = RANGE(registers, j);
1156 u32 offset = registers[j];
1157 int k;
1158
1159 for (k = 0; k < count; index++, offset++, k++) {
1160 if (data[index] == 0xdeafbead)
1161 continue;
1162
1163 drm_printf(p, " - { offset: 0x%06x, value: 0x%08x }\n",
1164 offset << 2, data[index]);
1165 }
1166 }
1167 }
1168 }
1169
a6xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj * obj,struct drm_printer * p)1170 static void a6xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj,
1171 struct drm_printer *p)
1172 {
1173 const struct a6xx_dbgahb_cluster *dbgahb = obj->handle;
1174
1175 if (dbgahb) {
1176 print_name(p, " - cluster-name: ", dbgahb->name);
1177 a6xx_show_cluster_data(dbgahb->registers, dbgahb->count,
1178 obj->data, p);
1179 }
1180 }
1181
a6xx_show_cluster(struct a6xx_gpu_state_obj * obj,struct drm_printer * p)1182 static void a6xx_show_cluster(struct a6xx_gpu_state_obj *obj,
1183 struct drm_printer *p)
1184 {
1185 const struct a6xx_cluster *cluster = obj->handle;
1186
1187 if (cluster) {
1188 print_name(p, " - cluster-name: ", cluster->name);
1189 a6xx_show_cluster_data(cluster->registers, cluster->count,
1190 obj->data, p);
1191 }
1192 }
1193
a6xx_show_indexed_regs(struct a6xx_gpu_state_obj * obj,struct drm_printer * p)1194 static void a6xx_show_indexed_regs(struct a6xx_gpu_state_obj *obj,
1195 struct drm_printer *p)
1196 {
1197 const struct a6xx_indexed_registers *indexed = obj->handle;
1198
1199 if (!indexed)
1200 return;
1201
1202 print_name(p, " - regs-name: ", indexed->name);
1203 drm_printf(p, " dwords: %d\n", indexed->count);
1204
1205 print_ascii85(p, indexed->count << 2, obj->data);
1206 }
1207
a6xx_show_debugbus_block(const struct a6xx_debugbus_block * block,u32 * data,struct drm_printer * p)1208 static void a6xx_show_debugbus_block(const struct a6xx_debugbus_block *block,
1209 u32 *data, struct drm_printer *p)
1210 {
1211 if (block) {
1212 print_name(p, " - debugbus-block: ", block->name);
1213
1214 /*
1215 * count for regular debugbus data is in quadwords,
1216 * but print the size in dwords for consistency
1217 */
1218 drm_printf(p, " count: %d\n", block->count << 1);
1219
1220 print_ascii85(p, block->count << 3, data);
1221 }
1222 }
1223
a6xx_show_debugbus(struct a6xx_gpu_state * a6xx_state,struct drm_printer * p)1224 static void a6xx_show_debugbus(struct a6xx_gpu_state *a6xx_state,
1225 struct drm_printer *p)
1226 {
1227 int i;
1228
1229 for (i = 0; i < a6xx_state->nr_debugbus; i++) {
1230 struct a6xx_gpu_state_obj *obj = &a6xx_state->debugbus[i];
1231
1232 a6xx_show_debugbus_block(obj->handle, obj->data, p);
1233 }
1234
1235 if (a6xx_state->vbif_debugbus) {
1236 struct a6xx_gpu_state_obj *obj = a6xx_state->vbif_debugbus;
1237
1238 drm_puts(p, " - debugbus-block: A6XX_DBGBUS_VBIF\n");
1239 drm_printf(p, " count: %d\n", VBIF_DEBUGBUS_BLOCK_SIZE);
1240
1241 /* vbif debugbus data is in dwords. Confusing, huh? */
1242 print_ascii85(p, VBIF_DEBUGBUS_BLOCK_SIZE << 2, obj->data);
1243 }
1244
1245 for (i = 0; i < a6xx_state->nr_cx_debugbus; i++) {
1246 struct a6xx_gpu_state_obj *obj = &a6xx_state->cx_debugbus[i];
1247
1248 a6xx_show_debugbus_block(obj->handle, obj->data, p);
1249 }
1250 }
1251
a6xx_show(struct msm_gpu * gpu,struct msm_gpu_state * state,struct drm_printer * p)1252 void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1253 struct drm_printer *p)
1254 {
1255 struct a6xx_gpu_state *a6xx_state = container_of(state,
1256 struct a6xx_gpu_state, base);
1257 int i;
1258
1259 if (IS_ERR_OR_NULL(state))
1260 return;
1261
1262 drm_printf(p, "gpu-initialized: %d\n", a6xx_state->gpu_initialized);
1263
1264 adreno_show(gpu, state, p);
1265
1266 drm_puts(p, "gmu-log:\n");
1267 if (a6xx_state->gmu_log) {
1268 struct msm_gpu_state_bo *gmu_log = a6xx_state->gmu_log;
1269
1270 drm_printf(p, " iova: 0x%016llx\n", gmu_log->iova);
1271 drm_printf(p, " size: %zu\n", gmu_log->size);
1272 adreno_show_object(p, &gmu_log->data, gmu_log->size,
1273 &gmu_log->encoded);
1274 }
1275
1276 drm_puts(p, "gmu-hfi:\n");
1277 if (a6xx_state->gmu_hfi) {
1278 struct msm_gpu_state_bo *gmu_hfi = a6xx_state->gmu_hfi;
1279 unsigned i, j;
1280
1281 drm_printf(p, " iova: 0x%016llx\n", gmu_hfi->iova);
1282 drm_printf(p, " size: %zu\n", gmu_hfi->size);
1283 for (i = 0; i < ARRAY_SIZE(a6xx_state->hfi_queue_history); i++) {
1284 drm_printf(p, " queue-history[%u]:", i);
1285 for (j = 0; j < HFI_HISTORY_SZ; j++) {
1286 drm_printf(p, " %d", a6xx_state->hfi_queue_history[i][j]);
1287 }
1288 drm_printf(p, "\n");
1289 }
1290 adreno_show_object(p, &gmu_hfi->data, gmu_hfi->size,
1291 &gmu_hfi->encoded);
1292 }
1293
1294 drm_puts(p, "gmu-debug:\n");
1295 if (a6xx_state->gmu_debug) {
1296 struct msm_gpu_state_bo *gmu_debug = a6xx_state->gmu_debug;
1297
1298 drm_printf(p, " iova: 0x%016llx\n", gmu_debug->iova);
1299 drm_printf(p, " size: %zu\n", gmu_debug->size);
1300 adreno_show_object(p, &gmu_debug->data, gmu_debug->size,
1301 &gmu_debug->encoded);
1302 }
1303
1304 drm_puts(p, "registers:\n");
1305 for (i = 0; i < a6xx_state->nr_registers; i++) {
1306 struct a6xx_gpu_state_obj *obj = &a6xx_state->registers[i];
1307 const struct a6xx_registers *regs = obj->handle;
1308
1309 if (!obj->handle)
1310 continue;
1311
1312 a6xx_show_registers(regs->registers, obj->data, regs->count, p);
1313 }
1314
1315 drm_puts(p, "registers-gmu:\n");
1316 for (i = 0; i < a6xx_state->nr_gmu_registers; i++) {
1317 struct a6xx_gpu_state_obj *obj = &a6xx_state->gmu_registers[i];
1318 const struct a6xx_registers *regs = obj->handle;
1319
1320 if (!obj->handle)
1321 continue;
1322
1323 a6xx_show_registers(regs->registers, obj->data, regs->count, p);
1324 }
1325
1326 drm_puts(p, "indexed-registers:\n");
1327 for (i = 0; i < a6xx_state->nr_indexed_regs; i++)
1328 a6xx_show_indexed_regs(&a6xx_state->indexed_regs[i], p);
1329
1330 drm_puts(p, "shader-blocks:\n");
1331 for (i = 0; i < a6xx_state->nr_shaders; i++)
1332 a6xx_show_shader(&a6xx_state->shaders[i], p);
1333
1334 drm_puts(p, "clusters:\n");
1335 for (i = 0; i < a6xx_state->nr_clusters; i++)
1336 a6xx_show_cluster(&a6xx_state->clusters[i], p);
1337
1338 for (i = 0; i < a6xx_state->nr_dbgahb_clusters; i++)
1339 a6xx_show_dbgahb_cluster(&a6xx_state->dbgahb_clusters[i], p);
1340
1341 drm_puts(p, "debugbus:\n");
1342 a6xx_show_debugbus(a6xx_state, p);
1343 }
1344