1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2014 The Linux Foundation. All rights reserved.
3 */
4 #include "a4xx_gpu.h"
5
6 #define A4XX_INT0_MASK \
7 (A4XX_INT0_RBBM_AHB_ERROR | \
8 A4XX_INT0_RBBM_ATB_BUS_OVERFLOW | \
9 A4XX_INT0_CP_T0_PACKET_IN_IB | \
10 A4XX_INT0_CP_OPCODE_ERROR | \
11 A4XX_INT0_CP_RESERVED_BIT_ERROR | \
12 A4XX_INT0_CP_HW_FAULT | \
13 A4XX_INT0_CP_IB1_INT | \
14 A4XX_INT0_CP_IB2_INT | \
15 A4XX_INT0_CP_RB_INT | \
16 A4XX_INT0_CP_REG_PROTECT_FAULT | \
17 A4XX_INT0_CP_AHB_ERROR_HALT | \
18 A4XX_INT0_CACHE_FLUSH_TS | \
19 A4XX_INT0_UCHE_OOB_ACCESS)
20
21 extern bool hang_debug;
22 static void a4xx_dump(struct msm_gpu *gpu);
23 static bool a4xx_idle(struct msm_gpu *gpu);
24
a4xx_submit(struct msm_gpu * gpu,struct msm_gem_submit * submit)25 static void a4xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
26 {
27 struct msm_ringbuffer *ring = submit->ring;
28 unsigned int i;
29
30 for (i = 0; i < submit->nr_cmds; i++) {
31 switch (submit->cmd[i].type) {
32 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
33 /* ignore IB-targets */
34 break;
35 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
36 /* ignore if there has not been a ctx switch: */
37 if (gpu->cur_ctx_seqno == submit->queue->ctx->seqno)
38 break;
39 fallthrough;
40 case MSM_SUBMIT_CMD_BUF:
41 OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFE, 2);
42 OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
43 OUT_RING(ring, submit->cmd[i].size);
44 OUT_PKT2(ring);
45 break;
46 }
47 }
48
49 OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1);
50 OUT_RING(ring, submit->seqno);
51
52 /* Flush HLSQ lazy updates to make sure there is nothing
53 * pending for indirect loads after the timestamp has
54 * passed:
55 */
56 OUT_PKT3(ring, CP_EVENT_WRITE, 1);
57 OUT_RING(ring, HLSQ_FLUSH);
58
59 /* wait for idle before cache flush/interrupt */
60 OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
61 OUT_RING(ring, 0x00000000);
62
63 /* BIT(31) of CACHE_FLUSH_TS triggers CACHE_FLUSH_TS IRQ from GPU */
64 OUT_PKT3(ring, CP_EVENT_WRITE, 3);
65 OUT_RING(ring, CACHE_FLUSH_TS | CP_EVENT_WRITE_0_IRQ);
66 OUT_RING(ring, rbmemptr(ring, fence));
67 OUT_RING(ring, submit->seqno);
68
69 adreno_flush(gpu, ring, REG_A4XX_CP_RB_WPTR);
70 }
71
72 /*
73 * a4xx_enable_hwcg() - Program the clock control registers
74 * @device: The adreno device pointer
75 */
a4xx_enable_hwcg(struct msm_gpu * gpu)76 static void a4xx_enable_hwcg(struct msm_gpu *gpu)
77 {
78 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
79 unsigned int i;
80 for (i = 0; i < 4; i++)
81 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_TP(i), 0x02222202);
82 for (i = 0; i < 4; i++)
83 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_TP(i), 0x00002222);
84 for (i = 0; i < 4; i++)
85 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_TP(i), 0x0E739CE7);
86 for (i = 0; i < 4; i++)
87 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_TP(i), 0x00111111);
88 for (i = 0; i < 4; i++)
89 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_SP(i), 0x22222222);
90 for (i = 0; i < 4; i++)
91 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_SP(i), 0x00222222);
92 for (i = 0; i < 4; i++)
93 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_SP(i), 0x00000104);
94 for (i = 0; i < 4; i++)
95 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_SP(i), 0x00000081);
96 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_UCHE, 0x22222222);
97 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_UCHE, 0x02222222);
98 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL3_UCHE, 0x00000000);
99 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL4_UCHE, 0x00000000);
100 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_UCHE, 0x00004444);
101 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_UCHE, 0x00001112);
102 for (i = 0; i < 4; i++)
103 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_RB(i), 0x22222222);
104
105 /* Disable L1 clocking in A420 due to CCU issues with it */
106 for (i = 0; i < 4; i++) {
107 if (adreno_is_a420(adreno_gpu)) {
108 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_RB(i),
109 0x00002020);
110 } else {
111 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_RB(i),
112 0x00022020);
113 }
114 }
115
116 /* No CCU for A405 */
117 if (!adreno_is_a405(adreno_gpu)) {
118 for (i = 0; i < 4; i++) {
119 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_MARB_CCU(i),
120 0x00000922);
121 }
122
123 for (i = 0; i < 4; i++) {
124 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU(i),
125 0x00000000);
126 }
127
128 for (i = 0; i < 4; i++) {
129 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1(i),
130 0x00000001);
131 }
132 }
133
134 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_MODE_GPC, 0x02222222);
135 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_GPC, 0x04100104);
136 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_GPC, 0x00022222);
137 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_COM_DCOM, 0x00000022);
138 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_COM_DCOM, 0x0000010F);
139 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_COM_DCOM, 0x00000022);
140 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_TSE_RAS_RBBM, 0x00222222);
141 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00004104);
142 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00000222);
143 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_HLSQ , 0x00000000);
144 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000);
145 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, 0x00220000);
146 /* Early A430's have a timing issue with SP/TP power collapse;
147 disabling HW clock gating prevents it. */
148 if (adreno_is_a430(adreno_gpu) && adreno_gpu->rev.patchid < 2)
149 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0);
150 else
151 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0xAAAAAAAA);
152 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2, 0);
153 }
154
155
a4xx_me_init(struct msm_gpu * gpu)156 static bool a4xx_me_init(struct msm_gpu *gpu)
157 {
158 struct msm_ringbuffer *ring = gpu->rb[0];
159
160 OUT_PKT3(ring, CP_ME_INIT, 17);
161 OUT_RING(ring, 0x000003f7);
162 OUT_RING(ring, 0x00000000);
163 OUT_RING(ring, 0x00000000);
164 OUT_RING(ring, 0x00000000);
165 OUT_RING(ring, 0x00000080);
166 OUT_RING(ring, 0x00000100);
167 OUT_RING(ring, 0x00000180);
168 OUT_RING(ring, 0x00006600);
169 OUT_RING(ring, 0x00000150);
170 OUT_RING(ring, 0x0000014e);
171 OUT_RING(ring, 0x00000154);
172 OUT_RING(ring, 0x00000001);
173 OUT_RING(ring, 0x00000000);
174 OUT_RING(ring, 0x00000000);
175 OUT_RING(ring, 0x00000000);
176 OUT_RING(ring, 0x00000000);
177 OUT_RING(ring, 0x00000000);
178
179 adreno_flush(gpu, ring, REG_A4XX_CP_RB_WPTR);
180 return a4xx_idle(gpu);
181 }
182
a4xx_hw_init(struct msm_gpu * gpu)183 static int a4xx_hw_init(struct msm_gpu *gpu)
184 {
185 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
186 struct a4xx_gpu *a4xx_gpu = to_a4xx_gpu(adreno_gpu);
187 uint32_t *ptr, len;
188 int i, ret;
189
190 if (adreno_is_a405(adreno_gpu)) {
191 gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
192 } else if (adreno_is_a420(adreno_gpu)) {
193 gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT, 0x0001001F);
194 gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT_CONF, 0x000000A4);
195 gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001);
196 gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
197 gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018);
198 gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
199 gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018);
200 gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
201 } else if (adreno_is_a430(adreno_gpu)) {
202 gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001);
203 gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
204 gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018);
205 gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
206 gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018);
207 gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
208 } else {
209 BUG();
210 }
211
212 /* Make all blocks contribute to the GPU BUSY perf counter */
213 gpu_write(gpu, REG_A4XX_RBBM_GPU_BUSY_MASKED, 0xffffffff);
214
215 /* Tune the hystersis counters for SP and CP idle detection */
216 gpu_write(gpu, REG_A4XX_RBBM_SP_HYST_CNT, 0x10);
217 gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
218
219 if (adreno_is_a430(adreno_gpu)) {
220 gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL2, 0x30);
221 }
222
223 /* Enable the RBBM error reporting bits */
224 gpu_write(gpu, REG_A4XX_RBBM_AHB_CTL0, 0x00000001);
225
226 /* Enable AHB error reporting*/
227 gpu_write(gpu, REG_A4XX_RBBM_AHB_CTL1, 0xa6ffffff);
228
229 /* Enable power counters*/
230 gpu_write(gpu, REG_A4XX_RBBM_RBBM_CTL, 0x00000030);
231
232 /*
233 * Turn on hang detection - this spews a lot of useful information
234 * into the RBBM registers on a hang:
235 */
236 gpu_write(gpu, REG_A4XX_RBBM_INTERFACE_HANG_INT_CTL,
237 (1 << 30) | 0xFFFF);
238
239 gpu_write(gpu, REG_A4XX_RB_GMEM_BASE_ADDR,
240 (unsigned int)(a4xx_gpu->ocmem.base >> 14));
241
242 /* Turn on performance counters: */
243 gpu_write(gpu, REG_A4XX_RBBM_PERFCTR_CTL, 0x01);
244
245 /* use the first CP counter for timestamp queries.. userspace may set
246 * this as well but it selects the same counter/countable:
247 */
248 gpu_write(gpu, REG_A4XX_CP_PERFCTR_CP_SEL_0, CP_ALWAYS_COUNT);
249
250 if (adreno_is_a430(adreno_gpu))
251 gpu_write(gpu, REG_A4XX_UCHE_CACHE_WAYS_VFD, 0x07);
252
253 /* Disable L2 bypass to avoid UCHE out of bounds errors */
254 gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_LO, 0xffff0000);
255 gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_HI, 0xffff0000);
256
257 gpu_write(gpu, REG_A4XX_CP_DEBUG, (1 << 25) |
258 (adreno_is_a420(adreno_gpu) ? (1 << 29) : 0));
259
260 /* On A430 enable SP regfile sleep for power savings */
261 /* TODO downstream does this for !420, so maybe applies for 405 too? */
262 if (!adreno_is_a420(adreno_gpu)) {
263 gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_0,
264 0x00000441);
265 gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_1,
266 0x00000441);
267 }
268
269 a4xx_enable_hwcg(gpu);
270
271 /*
272 * For A420 set RBBM_CLOCK_DELAY_HLSQ.CGC_HLSQ_TP_EARLY_CYC >= 2
273 * due to timing issue with HLSQ_TP_CLK_EN
274 */
275 if (adreno_is_a420(adreno_gpu)) {
276 unsigned int val;
277 val = gpu_read(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ);
278 val &= ~A4XX_CGC_HLSQ_EARLY_CYC__MASK;
279 val |= 2 << A4XX_CGC_HLSQ_EARLY_CYC__SHIFT;
280 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, val);
281 }
282
283 /* setup access protection: */
284 gpu_write(gpu, REG_A4XX_CP_PROTECT_CTRL, 0x00000007);
285
286 /* RBBM registers */
287 gpu_write(gpu, REG_A4XX_CP_PROTECT(0), 0x62000010);
288 gpu_write(gpu, REG_A4XX_CP_PROTECT(1), 0x63000020);
289 gpu_write(gpu, REG_A4XX_CP_PROTECT(2), 0x64000040);
290 gpu_write(gpu, REG_A4XX_CP_PROTECT(3), 0x65000080);
291 gpu_write(gpu, REG_A4XX_CP_PROTECT(4), 0x66000100);
292 gpu_write(gpu, REG_A4XX_CP_PROTECT(5), 0x64000200);
293
294 /* CP registers */
295 gpu_write(gpu, REG_A4XX_CP_PROTECT(6), 0x67000800);
296 gpu_write(gpu, REG_A4XX_CP_PROTECT(7), 0x64001600);
297
298
299 /* RB registers */
300 gpu_write(gpu, REG_A4XX_CP_PROTECT(8), 0x60003300);
301
302 /* HLSQ registers */
303 gpu_write(gpu, REG_A4XX_CP_PROTECT(9), 0x60003800);
304
305 /* VPC registers */
306 gpu_write(gpu, REG_A4XX_CP_PROTECT(10), 0x61003980);
307
308 /* SMMU registers */
309 gpu_write(gpu, REG_A4XX_CP_PROTECT(11), 0x6e010000);
310
311 gpu_write(gpu, REG_A4XX_RBBM_INT_0_MASK, A4XX_INT0_MASK);
312
313 ret = adreno_hw_init(gpu);
314 if (ret)
315 return ret;
316
317 /*
318 * Use the default ringbuffer size and block size but disable the RPTR
319 * shadow
320 */
321 gpu_write(gpu, REG_A4XX_CP_RB_CNTL,
322 MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
323
324 /* Set the ringbuffer address */
325 gpu_write(gpu, REG_A4XX_CP_RB_BASE, lower_32_bits(gpu->rb[0]->iova));
326
327 /* Load PM4: */
328 ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PM4]->data);
329 len = adreno_gpu->fw[ADRENO_FW_PM4]->size / 4;
330 DBG("loading PM4 ucode version: %u", ptr[0]);
331 gpu_write(gpu, REG_A4XX_CP_ME_RAM_WADDR, 0);
332 for (i = 1; i < len; i++)
333 gpu_write(gpu, REG_A4XX_CP_ME_RAM_DATA, ptr[i]);
334
335 /* Load PFP: */
336 ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PFP]->data);
337 len = adreno_gpu->fw[ADRENO_FW_PFP]->size / 4;
338 DBG("loading PFP ucode version: %u", ptr[0]);
339
340 gpu_write(gpu, REG_A4XX_CP_PFP_UCODE_ADDR, 0);
341 for (i = 1; i < len; i++)
342 gpu_write(gpu, REG_A4XX_CP_PFP_UCODE_DATA, ptr[i]);
343
344 /* clear ME_HALT to start micro engine */
345 gpu_write(gpu, REG_A4XX_CP_ME_CNTL, 0);
346
347 return a4xx_me_init(gpu) ? 0 : -EINVAL;
348 }
349
a4xx_recover(struct msm_gpu * gpu)350 static void a4xx_recover(struct msm_gpu *gpu)
351 {
352 int i;
353
354 adreno_dump_info(gpu);
355
356 for (i = 0; i < 8; i++) {
357 printk("CP_SCRATCH_REG%d: %u\n", i,
358 gpu_read(gpu, REG_AXXX_CP_SCRATCH_REG0 + i));
359 }
360
361 /* dump registers before resetting gpu, if enabled: */
362 if (hang_debug)
363 a4xx_dump(gpu);
364
365 gpu_write(gpu, REG_A4XX_RBBM_SW_RESET_CMD, 1);
366 gpu_read(gpu, REG_A4XX_RBBM_SW_RESET_CMD);
367 gpu_write(gpu, REG_A4XX_RBBM_SW_RESET_CMD, 0);
368 adreno_recover(gpu);
369 }
370
a4xx_destroy(struct msm_gpu * gpu)371 static void a4xx_destroy(struct msm_gpu *gpu)
372 {
373 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
374 struct a4xx_gpu *a4xx_gpu = to_a4xx_gpu(adreno_gpu);
375
376 DBG("%s", gpu->name);
377
378 adreno_gpu_cleanup(adreno_gpu);
379
380 adreno_gpu_ocmem_cleanup(&a4xx_gpu->ocmem);
381
382 kfree(a4xx_gpu);
383 }
384
a4xx_idle(struct msm_gpu * gpu)385 static bool a4xx_idle(struct msm_gpu *gpu)
386 {
387 /* wait for ringbuffer to drain: */
388 if (!adreno_idle(gpu, gpu->rb[0]))
389 return false;
390
391 /* then wait for GPU to finish: */
392 if (spin_until(!(gpu_read(gpu, REG_A4XX_RBBM_STATUS) &
393 A4XX_RBBM_STATUS_GPU_BUSY))) {
394 DRM_ERROR("%s: timeout waiting for GPU to idle!\n", gpu->name);
395 /* TODO maybe we need to reset GPU here to recover from hang? */
396 return false;
397 }
398
399 return true;
400 }
401
a4xx_irq(struct msm_gpu * gpu)402 static irqreturn_t a4xx_irq(struct msm_gpu *gpu)
403 {
404 uint32_t status;
405
406 status = gpu_read(gpu, REG_A4XX_RBBM_INT_0_STATUS);
407 DBG("%s: Int status %08x", gpu->name, status);
408
409 if (status & A4XX_INT0_CP_REG_PROTECT_FAULT) {
410 uint32_t reg = gpu_read(gpu, REG_A4XX_CP_PROTECT_STATUS);
411 printk("CP | Protected mode error| %s | addr=%x\n",
412 reg & (1 << 24) ? "WRITE" : "READ",
413 (reg & 0xFFFFF) >> 2);
414 }
415
416 gpu_write(gpu, REG_A4XX_RBBM_INT_CLEAR_CMD, status);
417
418 msm_gpu_retire(gpu);
419
420 return IRQ_HANDLED;
421 }
422
423 static const unsigned int a4xx_registers[] = {
424 /* RBBM */
425 0x0000, 0x0002, 0x0004, 0x0021, 0x0023, 0x0024, 0x0026, 0x0026,
426 0x0028, 0x002B, 0x002E, 0x0034, 0x0037, 0x0044, 0x0047, 0x0066,
427 0x0068, 0x0095, 0x009C, 0x0170, 0x0174, 0x01AF,
428 /* CP */
429 0x0200, 0x0233, 0x0240, 0x0250, 0x04C0, 0x04DD, 0x0500, 0x050B,
430 0x0578, 0x058F,
431 /* VSC */
432 0x0C00, 0x0C03, 0x0C08, 0x0C41, 0x0C50, 0x0C51,
433 /* GRAS */
434 0x0C80, 0x0C81, 0x0C88, 0x0C8F,
435 /* RB */
436 0x0CC0, 0x0CC0, 0x0CC4, 0x0CD2,
437 /* PC */
438 0x0D00, 0x0D0C, 0x0D10, 0x0D17, 0x0D20, 0x0D23,
439 /* VFD */
440 0x0E40, 0x0E4A,
441 /* VPC */
442 0x0E60, 0x0E61, 0x0E63, 0x0E68,
443 /* UCHE */
444 0x0E80, 0x0E84, 0x0E88, 0x0E95,
445 /* VMIDMT */
446 0x1000, 0x1000, 0x1002, 0x1002, 0x1004, 0x1004, 0x1008, 0x100A,
447 0x100C, 0x100D, 0x100F, 0x1010, 0x1012, 0x1016, 0x1024, 0x1024,
448 0x1027, 0x1027, 0x1100, 0x1100, 0x1102, 0x1102, 0x1104, 0x1104,
449 0x1110, 0x1110, 0x1112, 0x1116, 0x1124, 0x1124, 0x1300, 0x1300,
450 0x1380, 0x1380,
451 /* GRAS CTX 0 */
452 0x2000, 0x2004, 0x2008, 0x2067, 0x2070, 0x2078, 0x207B, 0x216E,
453 /* PC CTX 0 */
454 0x21C0, 0x21C6, 0x21D0, 0x21D0, 0x21D9, 0x21D9, 0x21E5, 0x21E7,
455 /* VFD CTX 0 */
456 0x2200, 0x2204, 0x2208, 0x22A9,
457 /* GRAS CTX 1 */
458 0x2400, 0x2404, 0x2408, 0x2467, 0x2470, 0x2478, 0x247B, 0x256E,
459 /* PC CTX 1 */
460 0x25C0, 0x25C6, 0x25D0, 0x25D0, 0x25D9, 0x25D9, 0x25E5, 0x25E7,
461 /* VFD CTX 1 */
462 0x2600, 0x2604, 0x2608, 0x26A9,
463 /* XPU */
464 0x2C00, 0x2C01, 0x2C10, 0x2C10, 0x2C12, 0x2C16, 0x2C1D, 0x2C20,
465 0x2C28, 0x2C28, 0x2C30, 0x2C30, 0x2C32, 0x2C36, 0x2C40, 0x2C40,
466 0x2C50, 0x2C50, 0x2C52, 0x2C56, 0x2C80, 0x2C80, 0x2C94, 0x2C95,
467 /* VBIF */
468 0x3000, 0x3007, 0x300C, 0x3014, 0x3018, 0x301D, 0x3020, 0x3022,
469 0x3024, 0x3026, 0x3028, 0x302A, 0x302C, 0x302D, 0x3030, 0x3031,
470 0x3034, 0x3036, 0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040,
471 0x3049, 0x3049, 0x3058, 0x3058, 0x305B, 0x3061, 0x3064, 0x3068,
472 0x306C, 0x306D, 0x3080, 0x3088, 0x308B, 0x308C, 0x3090, 0x3094,
473 0x3098, 0x3098, 0x309C, 0x309C, 0x30C0, 0x30C0, 0x30C8, 0x30C8,
474 0x30D0, 0x30D0, 0x30D8, 0x30D8, 0x30E0, 0x30E0, 0x3100, 0x3100,
475 0x3108, 0x3108, 0x3110, 0x3110, 0x3118, 0x3118, 0x3120, 0x3120,
476 0x3124, 0x3125, 0x3129, 0x3129, 0x3131, 0x3131, 0x330C, 0x330C,
477 0x3310, 0x3310, 0x3400, 0x3401, 0x3410, 0x3410, 0x3412, 0x3416,
478 0x341D, 0x3420, 0x3428, 0x3428, 0x3430, 0x3430, 0x3432, 0x3436,
479 0x3440, 0x3440, 0x3450, 0x3450, 0x3452, 0x3456, 0x3480, 0x3480,
480 0x3494, 0x3495, 0x4000, 0x4000, 0x4002, 0x4002, 0x4004, 0x4004,
481 0x4008, 0x400A, 0x400C, 0x400D, 0x400F, 0x4012, 0x4014, 0x4016,
482 0x401D, 0x401D, 0x4020, 0x4027, 0x4060, 0x4062, 0x4200, 0x4200,
483 0x4300, 0x4300, 0x4400, 0x4400, 0x4500, 0x4500, 0x4800, 0x4802,
484 0x480F, 0x480F, 0x4811, 0x4811, 0x4813, 0x4813, 0x4815, 0x4816,
485 0x482B, 0x482B, 0x4857, 0x4857, 0x4883, 0x4883, 0x48AF, 0x48AF,
486 0x48C5, 0x48C5, 0x48E5, 0x48E5, 0x4905, 0x4905, 0x4925, 0x4925,
487 0x4945, 0x4945, 0x4950, 0x4950, 0x495B, 0x495B, 0x4980, 0x498E,
488 0x4B00, 0x4B00, 0x4C00, 0x4C00, 0x4D00, 0x4D00, 0x4E00, 0x4E00,
489 0x4E80, 0x4E80, 0x4F00, 0x4F00, 0x4F08, 0x4F08, 0x4F10, 0x4F10,
490 0x4F18, 0x4F18, 0x4F20, 0x4F20, 0x4F30, 0x4F30, 0x4F60, 0x4F60,
491 0x4F80, 0x4F81, 0x4F88, 0x4F89, 0x4FEE, 0x4FEE, 0x4FF3, 0x4FF3,
492 0x6000, 0x6001, 0x6008, 0x600F, 0x6014, 0x6016, 0x6018, 0x601B,
493 0x61FD, 0x61FD, 0x623C, 0x623C, 0x6380, 0x6380, 0x63A0, 0x63A0,
494 0x63C0, 0x63C1, 0x63C8, 0x63C9, 0x63D0, 0x63D4, 0x63D6, 0x63D6,
495 0x63EE, 0x63EE, 0x6400, 0x6401, 0x6408, 0x640F, 0x6414, 0x6416,
496 0x6418, 0x641B, 0x65FD, 0x65FD, 0x663C, 0x663C, 0x6780, 0x6780,
497 0x67A0, 0x67A0, 0x67C0, 0x67C1, 0x67C8, 0x67C9, 0x67D0, 0x67D4,
498 0x67D6, 0x67D6, 0x67EE, 0x67EE, 0x6800, 0x6801, 0x6808, 0x680F,
499 0x6814, 0x6816, 0x6818, 0x681B, 0x69FD, 0x69FD, 0x6A3C, 0x6A3C,
500 0x6B80, 0x6B80, 0x6BA0, 0x6BA0, 0x6BC0, 0x6BC1, 0x6BC8, 0x6BC9,
501 0x6BD0, 0x6BD4, 0x6BD6, 0x6BD6, 0x6BEE, 0x6BEE,
502 ~0 /* sentinel */
503 };
504
505 static const unsigned int a405_registers[] = {
506 /* RBBM */
507 0x0000, 0x0002, 0x0004, 0x0021, 0x0023, 0x0024, 0x0026, 0x0026,
508 0x0028, 0x002B, 0x002E, 0x0034, 0x0037, 0x0044, 0x0047, 0x0066,
509 0x0068, 0x0095, 0x009C, 0x0170, 0x0174, 0x01AF,
510 /* CP */
511 0x0200, 0x0233, 0x0240, 0x0250, 0x04C0, 0x04DD, 0x0500, 0x050B,
512 0x0578, 0x058F,
513 /* VSC */
514 0x0C00, 0x0C03, 0x0C08, 0x0C41, 0x0C50, 0x0C51,
515 /* GRAS */
516 0x0C80, 0x0C81, 0x0C88, 0x0C8F,
517 /* RB */
518 0x0CC0, 0x0CC0, 0x0CC4, 0x0CD2,
519 /* PC */
520 0x0D00, 0x0D0C, 0x0D10, 0x0D17, 0x0D20, 0x0D23,
521 /* VFD */
522 0x0E40, 0x0E4A,
523 /* VPC */
524 0x0E60, 0x0E61, 0x0E63, 0x0E68,
525 /* UCHE */
526 0x0E80, 0x0E84, 0x0E88, 0x0E95,
527 /* GRAS CTX 0 */
528 0x2000, 0x2004, 0x2008, 0x2067, 0x2070, 0x2078, 0x207B, 0x216E,
529 /* PC CTX 0 */
530 0x21C0, 0x21C6, 0x21D0, 0x21D0, 0x21D9, 0x21D9, 0x21E5, 0x21E7,
531 /* VFD CTX 0 */
532 0x2200, 0x2204, 0x2208, 0x22A9,
533 /* GRAS CTX 1 */
534 0x2400, 0x2404, 0x2408, 0x2467, 0x2470, 0x2478, 0x247B, 0x256E,
535 /* PC CTX 1 */
536 0x25C0, 0x25C6, 0x25D0, 0x25D0, 0x25D9, 0x25D9, 0x25E5, 0x25E7,
537 /* VFD CTX 1 */
538 0x2600, 0x2604, 0x2608, 0x26A9,
539 /* VBIF version 0x20050000*/
540 0x3000, 0x3007, 0x302C, 0x302C, 0x3030, 0x3030, 0x3034, 0x3036,
541 0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040, 0x3049, 0x3049,
542 0x3058, 0x3058, 0x305B, 0x3061, 0x3064, 0x3068, 0x306C, 0x306D,
543 0x3080, 0x3088, 0x308B, 0x308C, 0x3090, 0x3094, 0x3098, 0x3098,
544 0x309C, 0x309C, 0x30C0, 0x30C0, 0x30C8, 0x30C8, 0x30D0, 0x30D0,
545 0x30D8, 0x30D8, 0x30E0, 0x30E0, 0x3100, 0x3100, 0x3108, 0x3108,
546 0x3110, 0x3110, 0x3118, 0x3118, 0x3120, 0x3120, 0x3124, 0x3125,
547 0x3129, 0x3129, 0x340C, 0x340C, 0x3410, 0x3410,
548 ~0 /* sentinel */
549 };
550
a4xx_gpu_state_get(struct msm_gpu * gpu)551 static struct msm_gpu_state *a4xx_gpu_state_get(struct msm_gpu *gpu)
552 {
553 struct msm_gpu_state *state = kzalloc(sizeof(*state), GFP_KERNEL);
554
555 if (!state)
556 return ERR_PTR(-ENOMEM);
557
558 adreno_gpu_state_get(gpu, state);
559
560 state->rbbm_status = gpu_read(gpu, REG_A4XX_RBBM_STATUS);
561
562 return state;
563 }
564
a4xx_dump(struct msm_gpu * gpu)565 static void a4xx_dump(struct msm_gpu *gpu)
566 {
567 printk("status: %08x\n",
568 gpu_read(gpu, REG_A4XX_RBBM_STATUS));
569 adreno_dump(gpu);
570 }
571
a4xx_pm_resume(struct msm_gpu * gpu)572 static int a4xx_pm_resume(struct msm_gpu *gpu) {
573 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
574 int ret;
575
576 ret = msm_gpu_pm_resume(gpu);
577 if (ret)
578 return ret;
579
580 if (adreno_is_a430(adreno_gpu)) {
581 unsigned int reg;
582 /* Set the default register values; set SW_COLLAPSE to 0 */
583 gpu_write(gpu, REG_A4XX_RBBM_POWER_CNTL_IP, 0x778000);
584 do {
585 udelay(5);
586 reg = gpu_read(gpu, REG_A4XX_RBBM_POWER_STATUS);
587 } while (!(reg & A4XX_RBBM_POWER_CNTL_IP_SP_TP_PWR_ON));
588 }
589 return 0;
590 }
591
a4xx_pm_suspend(struct msm_gpu * gpu)592 static int a4xx_pm_suspend(struct msm_gpu *gpu) {
593 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
594 int ret;
595
596 ret = msm_gpu_pm_suspend(gpu);
597 if (ret)
598 return ret;
599
600 if (adreno_is_a430(adreno_gpu)) {
601 /* Set the default register values; set SW_COLLAPSE to 1 */
602 gpu_write(gpu, REG_A4XX_RBBM_POWER_CNTL_IP, 0x778001);
603 }
604 return 0;
605 }
606
a4xx_get_timestamp(struct msm_gpu * gpu,uint64_t * value)607 static int a4xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
608 {
609 *value = gpu_read64(gpu, REG_A4XX_RBBM_PERFCTR_CP_0_LO,
610 REG_A4XX_RBBM_PERFCTR_CP_0_HI);
611
612 return 0;
613 }
614
a4xx_get_rptr(struct msm_gpu * gpu,struct msm_ringbuffer * ring)615 static u32 a4xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
616 {
617 ring->memptrs->rptr = gpu_read(gpu, REG_A4XX_CP_RB_RPTR);
618 return ring->memptrs->rptr;
619 }
620
621 static const struct adreno_gpu_funcs funcs = {
622 .base = {
623 .get_param = adreno_get_param,
624 .set_param = adreno_set_param,
625 .hw_init = a4xx_hw_init,
626 .pm_suspend = a4xx_pm_suspend,
627 .pm_resume = a4xx_pm_resume,
628 .recover = a4xx_recover,
629 .submit = a4xx_submit,
630 .active_ring = adreno_active_ring,
631 .irq = a4xx_irq,
632 .destroy = a4xx_destroy,
633 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
634 .show = adreno_show,
635 #endif
636 .gpu_state_get = a4xx_gpu_state_get,
637 .gpu_state_put = adreno_gpu_state_put,
638 .create_address_space = adreno_iommu_create_address_space,
639 .get_rptr = a4xx_get_rptr,
640 },
641 .get_timestamp = a4xx_get_timestamp,
642 };
643
a4xx_gpu_init(struct drm_device * dev)644 struct msm_gpu *a4xx_gpu_init(struct drm_device *dev)
645 {
646 struct a4xx_gpu *a4xx_gpu = NULL;
647 struct adreno_gpu *adreno_gpu;
648 struct msm_gpu *gpu;
649 struct msm_drm_private *priv = dev->dev_private;
650 struct platform_device *pdev = priv->gpu_pdev;
651 struct icc_path *ocmem_icc_path;
652 struct icc_path *icc_path;
653 int ret;
654
655 if (!pdev) {
656 DRM_DEV_ERROR(dev->dev, "no a4xx device\n");
657 ret = -ENXIO;
658 goto fail;
659 }
660
661 a4xx_gpu = kzalloc(sizeof(*a4xx_gpu), GFP_KERNEL);
662 if (!a4xx_gpu) {
663 ret = -ENOMEM;
664 goto fail;
665 }
666
667 adreno_gpu = &a4xx_gpu->base;
668 gpu = &adreno_gpu->base;
669
670 gpu->perfcntrs = NULL;
671 gpu->num_perfcntrs = 0;
672
673 ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
674 if (ret)
675 goto fail;
676
677 adreno_gpu->registers = adreno_is_a405(adreno_gpu) ? a405_registers :
678 a4xx_registers;
679
680 /* if needed, allocate gmem: */
681 ret = adreno_gpu_ocmem_init(dev->dev, adreno_gpu,
682 &a4xx_gpu->ocmem);
683 if (ret)
684 goto fail;
685
686 if (!gpu->aspace) {
687 /* TODO we think it is possible to configure the GPU to
688 * restrict access to VRAM carveout. But the required
689 * registers are unknown. For now just bail out and
690 * limp along with just modesetting. If it turns out
691 * to not be possible to restrict access, then we must
692 * implement a cmdstream validator.
693 */
694 DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n");
695 if (!allow_vram_carveout) {
696 ret = -ENXIO;
697 goto fail;
698 }
699 }
700
701 icc_path = devm_of_icc_get(&pdev->dev, "gfx-mem");
702 if (IS_ERR(icc_path)) {
703 ret = PTR_ERR(icc_path);
704 goto fail;
705 }
706
707 ocmem_icc_path = devm_of_icc_get(&pdev->dev, "ocmem");
708 if (IS_ERR(ocmem_icc_path)) {
709 ret = PTR_ERR(ocmem_icc_path);
710 /* allow -ENODATA, ocmem icc is optional */
711 if (ret != -ENODATA)
712 goto fail;
713 ocmem_icc_path = NULL;
714 }
715
716 /*
717 * Set the ICC path to maximum speed for now by multiplying the fastest
718 * frequency by the bus width (8). We'll want to scale this later on to
719 * improve battery life.
720 */
721 icc_set_bw(icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
722 icc_set_bw(ocmem_icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
723
724 return gpu;
725
726 fail:
727 if (a4xx_gpu)
728 a4xx_destroy(&a4xx_gpu->base.base);
729
730 return ERR_PTR(ret);
731 }
732