1 /*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 */
25
26 #include "amdgpu.h"
27 #include "amdgpu_gfx.h"
28 #include "amdgpu_rlc.h"
29 #include "amdgpu_ras.h"
30
31 /* delay 0.1 second to enable gfx off feature */
32 #define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100)
33
34 #define GFX_OFF_NO_DELAY 0
35
36 /*
37 * GPU GFX IP block helpers function.
38 */
39
amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device * adev,int mec,int pipe,int queue)40 int amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device *adev, int mec,
41 int pipe, int queue)
42 {
43 int bit = 0;
44
45 bit += mec * adev->gfx.mec.num_pipe_per_mec
46 * adev->gfx.mec.num_queue_per_pipe;
47 bit += pipe * adev->gfx.mec.num_queue_per_pipe;
48 bit += queue;
49
50 return bit;
51 }
52
amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device * adev,int bit,int * mec,int * pipe,int * queue)53 void amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device *adev, int bit,
54 int *mec, int *pipe, int *queue)
55 {
56 *queue = bit % adev->gfx.mec.num_queue_per_pipe;
57 *pipe = (bit / adev->gfx.mec.num_queue_per_pipe)
58 % adev->gfx.mec.num_pipe_per_mec;
59 *mec = (bit / adev->gfx.mec.num_queue_per_pipe)
60 / adev->gfx.mec.num_pipe_per_mec;
61
62 }
63
amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device * adev,int mec,int pipe,int queue)64 bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev,
65 int mec, int pipe, int queue)
66 {
67 return test_bit(amdgpu_gfx_mec_queue_to_bit(adev, mec, pipe, queue),
68 adev->gfx.mec.queue_bitmap);
69 }
70
amdgpu_gfx_me_queue_to_bit(struct amdgpu_device * adev,int me,int pipe,int queue)71 int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev,
72 int me, int pipe, int queue)
73 {
74 int bit = 0;
75
76 bit += me * adev->gfx.me.num_pipe_per_me
77 * adev->gfx.me.num_queue_per_pipe;
78 bit += pipe * adev->gfx.me.num_queue_per_pipe;
79 bit += queue;
80
81 return bit;
82 }
83
amdgpu_gfx_bit_to_me_queue(struct amdgpu_device * adev,int bit,int * me,int * pipe,int * queue)84 void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit,
85 int *me, int *pipe, int *queue)
86 {
87 *queue = bit % adev->gfx.me.num_queue_per_pipe;
88 *pipe = (bit / adev->gfx.me.num_queue_per_pipe)
89 % adev->gfx.me.num_pipe_per_me;
90 *me = (bit / adev->gfx.me.num_queue_per_pipe)
91 / adev->gfx.me.num_pipe_per_me;
92 }
93
amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device * adev,int me,int pipe,int queue)94 bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev,
95 int me, int pipe, int queue)
96 {
97 return test_bit(amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue),
98 adev->gfx.me.queue_bitmap);
99 }
100
101 /**
102 * amdgpu_gfx_parse_disable_cu - Parse the disable_cu module parameter
103 *
104 * @mask: array in which the per-shader array disable masks will be stored
105 * @max_se: number of SEs
106 * @max_sh: number of SHs
107 *
108 * The bitmask of CUs to be disabled in the shader array determined by se and
109 * sh is stored in mask[se * max_sh + sh].
110 */
amdgpu_gfx_parse_disable_cu(unsigned * mask,unsigned max_se,unsigned max_sh)111 void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_sh)
112 {
113 unsigned se, sh, cu;
114 const char *p;
115
116 memset(mask, 0, sizeof(*mask) * max_se * max_sh);
117
118 if (!amdgpu_disable_cu || !*amdgpu_disable_cu)
119 return;
120
121 p = amdgpu_disable_cu;
122 for (;;) {
123 char *next;
124 int ret = sscanf(p, "%u.%u.%u", &se, &sh, &cu);
125 if (ret < 3) {
126 DRM_ERROR("amdgpu: could not parse disable_cu\n");
127 return;
128 }
129
130 if (se < max_se && sh < max_sh && cu < 16) {
131 DRM_INFO("amdgpu: disabling CU %u.%u.%u\n", se, sh, cu);
132 mask[se * max_sh + sh] |= 1u << cu;
133 } else {
134 DRM_ERROR("amdgpu: disable_cu %u.%u.%u is out of range\n",
135 se, sh, cu);
136 }
137
138 next = strchr(p, ',');
139 if (!next)
140 break;
141 p = next + 1;
142 }
143 }
144
amdgpu_gfx_is_multipipe_capable(struct amdgpu_device * adev)145 static bool amdgpu_gfx_is_multipipe_capable(struct amdgpu_device *adev)
146 {
147 if (amdgpu_compute_multipipe != -1) {
148 DRM_INFO("amdgpu: forcing compute pipe policy %d\n",
149 amdgpu_compute_multipipe);
150 return amdgpu_compute_multipipe == 1;
151 }
152
153 /* FIXME: spreading the queues across pipes causes perf regressions
154 * on POLARIS11 compute workloads */
155 if (adev->asic_type == CHIP_POLARIS11)
156 return false;
157
158 return adev->gfx.mec.num_mec > 1;
159 }
160
amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device * adev,struct amdgpu_ring * ring)161 bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
162 struct amdgpu_ring *ring)
163 {
164 /* Policy: use 1st queue as high priority compute queue if we
165 * have more than one compute queue.
166 */
167 if (adev->gfx.num_compute_rings > 1 &&
168 ring == &adev->gfx.compute_ring[0])
169 return true;
170
171 return false;
172 }
173
amdgpu_gfx_compute_queue_acquire(struct amdgpu_device * adev)174 void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
175 {
176 int i, queue, pipe;
177 bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
178 int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec *
179 adev->gfx.mec.num_queue_per_pipe,
180 adev->gfx.num_compute_rings);
181
182 if (multipipe_policy) {
183 /* policy: make queues evenly cross all pipes on MEC1 only */
184 for (i = 0; i < max_queues_per_mec; i++) {
185 pipe = i % adev->gfx.mec.num_pipe_per_mec;
186 queue = (i / adev->gfx.mec.num_pipe_per_mec) %
187 adev->gfx.mec.num_queue_per_pipe;
188
189 set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue,
190 adev->gfx.mec.queue_bitmap);
191 }
192 } else {
193 /* policy: amdgpu owns all queues in the given pipe */
194 for (i = 0; i < max_queues_per_mec; ++i)
195 set_bit(i, adev->gfx.mec.queue_bitmap);
196 }
197
198 dev_dbg(adev->dev, "mec queue bitmap weight=%d\n", bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES));
199 }
200
amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device * adev)201 void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
202 {
203 int i, queue, me;
204
205 for (i = 0; i < AMDGPU_MAX_GFX_QUEUES; ++i) {
206 queue = i % adev->gfx.me.num_queue_per_pipe;
207 me = (i / adev->gfx.me.num_queue_per_pipe)
208 / adev->gfx.me.num_pipe_per_me;
209
210 if (me >= adev->gfx.me.num_me)
211 break;
212 /* policy: amdgpu owns the first queue per pipe at this stage
213 * will extend to mulitple queues per pipe later */
214 if (me == 0 && queue < 1)
215 set_bit(i, adev->gfx.me.queue_bitmap);
216 }
217
218 /* update the number of active graphics rings */
219 adev->gfx.num_gfx_rings =
220 bitmap_weight(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
221 }
222
amdgpu_gfx_kiq_acquire(struct amdgpu_device * adev,struct amdgpu_ring * ring)223 static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
224 struct amdgpu_ring *ring)
225 {
226 int queue_bit;
227 int mec, pipe, queue;
228
229 queue_bit = adev->gfx.mec.num_mec
230 * adev->gfx.mec.num_pipe_per_mec
231 * adev->gfx.mec.num_queue_per_pipe;
232
233 while (--queue_bit >= 0) {
234 if (test_bit(queue_bit, adev->gfx.mec.queue_bitmap))
235 continue;
236
237 amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue);
238
239 /*
240 * 1. Using pipes 2/3 from MEC 2 seems cause problems.
241 * 2. It must use queue id 0, because CGPG_IDLE/SAVE/LOAD/RUN
242 * only can be issued on queue 0.
243 */
244 if ((mec == 1 && pipe > 1) || queue != 0)
245 continue;
246
247 ring->me = mec + 1;
248 ring->pipe = pipe;
249 ring->queue = queue;
250
251 return 0;
252 }
253
254 dev_err(adev->dev, "Failed to find a queue for KIQ\n");
255 return -EINVAL;
256 }
257
amdgpu_gfx_kiq_init_ring(struct amdgpu_device * adev,struct amdgpu_ring * ring,struct amdgpu_irq_src * irq)258 int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
259 struct amdgpu_ring *ring,
260 struct amdgpu_irq_src *irq)
261 {
262 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
263 int r = 0;
264
265 spin_lock_init(&kiq->ring_lock);
266
267 ring->adev = NULL;
268 ring->ring_obj = NULL;
269 ring->use_doorbell = true;
270 ring->doorbell_index = adev->doorbell_index.kiq;
271
272 r = amdgpu_gfx_kiq_acquire(adev, ring);
273 if (r)
274 return r;
275
276 ring->eop_gpu_addr = kiq->eop_gpu_addr;
277 ring->no_scheduler = true;
278 sprintf(ring->name, "kiq_%d.%d.%d", ring->me, ring->pipe, ring->queue);
279 r = amdgpu_ring_init(adev, ring, 1024, irq, AMDGPU_CP_KIQ_IRQ_DRIVER0,
280 AMDGPU_RING_PRIO_DEFAULT, NULL);
281 if (r)
282 dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
283
284 return r;
285 }
286
amdgpu_gfx_kiq_free_ring(struct amdgpu_ring * ring)287 void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring)
288 {
289 amdgpu_ring_fini(ring);
290 }
291
amdgpu_gfx_kiq_fini(struct amdgpu_device * adev)292 void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev)
293 {
294 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
295
296 amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
297 }
298
amdgpu_gfx_kiq_init(struct amdgpu_device * adev,unsigned hpd_size)299 int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
300 unsigned hpd_size)
301 {
302 int r;
303 u32 *hpd;
304 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
305
306 r = amdgpu_bo_create_kernel(adev, hpd_size, PAGE_SIZE,
307 AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
308 &kiq->eop_gpu_addr, (void **)&hpd);
309 if (r) {
310 dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r);
311 return r;
312 }
313
314 memset(hpd, 0, hpd_size);
315
316 r = amdgpu_bo_reserve(kiq->eop_obj, true);
317 if (unlikely(r != 0))
318 dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r);
319 amdgpu_bo_kunmap(kiq->eop_obj);
320 amdgpu_bo_unreserve(kiq->eop_obj);
321
322 return 0;
323 }
324
325 /* create MQD for each compute/gfx queue */
amdgpu_gfx_mqd_sw_init(struct amdgpu_device * adev,unsigned mqd_size)326 int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
327 unsigned mqd_size)
328 {
329 struct amdgpu_ring *ring = NULL;
330 int r, i;
331
332 /* create MQD for KIQ */
333 ring = &adev->gfx.kiq.ring;
334 if (!adev->enable_mes_kiq && !ring->mqd_obj) {
335 /* originaly the KIQ MQD is put in GTT domain, but for SRIOV VRAM domain is a must
336 * otherwise hypervisor trigger SAVE_VF fail after driver unloaded which mean MQD
337 * deallocated and gart_unbind, to strict diverage we decide to use VRAM domain for
338 * KIQ MQD no matter SRIOV or Bare-metal
339 */
340 r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
341 AMDGPU_GEM_DOMAIN_VRAM, &ring->mqd_obj,
342 &ring->mqd_gpu_addr, &ring->mqd_ptr);
343 if (r) {
344 dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
345 return r;
346 }
347
348 /* prepare MQD backup */
349 adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS] = kmalloc(mqd_size, GFP_KERNEL);
350 if (!adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS])
351 dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
352 }
353
354 if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
355 /* create MQD for each KGQ */
356 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
357 ring = &adev->gfx.gfx_ring[i];
358 if (!ring->mqd_obj) {
359 r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
360 AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
361 &ring->mqd_gpu_addr, &ring->mqd_ptr);
362 if (r) {
363 dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
364 return r;
365 }
366
367 /* prepare MQD backup */
368 adev->gfx.me.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL);
369 if (!adev->gfx.me.mqd_backup[i])
370 dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
371 }
372 }
373 }
374
375 /* create MQD for each KCQ */
376 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
377 ring = &adev->gfx.compute_ring[i];
378 if (!ring->mqd_obj) {
379 r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
380 AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
381 &ring->mqd_gpu_addr, &ring->mqd_ptr);
382 if (r) {
383 dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
384 return r;
385 }
386
387 /* prepare MQD backup */
388 adev->gfx.mec.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL);
389 if (!adev->gfx.mec.mqd_backup[i])
390 dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
391 }
392 }
393
394 return 0;
395 }
396
amdgpu_gfx_mqd_sw_fini(struct amdgpu_device * adev)397 void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev)
398 {
399 struct amdgpu_ring *ring = NULL;
400 int i;
401
402 if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
403 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
404 ring = &adev->gfx.gfx_ring[i];
405 kfree(adev->gfx.me.mqd_backup[i]);
406 amdgpu_bo_free_kernel(&ring->mqd_obj,
407 &ring->mqd_gpu_addr,
408 &ring->mqd_ptr);
409 }
410 }
411
412 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
413 ring = &adev->gfx.compute_ring[i];
414 kfree(adev->gfx.mec.mqd_backup[i]);
415 amdgpu_bo_free_kernel(&ring->mqd_obj,
416 &ring->mqd_gpu_addr,
417 &ring->mqd_ptr);
418 }
419
420 ring = &adev->gfx.kiq.ring;
421 kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]);
422 amdgpu_bo_free_kernel(&ring->mqd_obj,
423 &ring->mqd_gpu_addr,
424 &ring->mqd_ptr);
425 }
426
amdgpu_gfx_disable_kcq(struct amdgpu_device * adev)427 int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev)
428 {
429 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
430 struct amdgpu_ring *kiq_ring = &kiq->ring;
431 int i, r = 0;
432
433 if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
434 return -EINVAL;
435
436 spin_lock(&adev->gfx.kiq.ring_lock);
437 if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
438 adev->gfx.num_compute_rings)) {
439 spin_unlock(&adev->gfx.kiq.ring_lock);
440 return -ENOMEM;
441 }
442
443 for (i = 0; i < adev->gfx.num_compute_rings; i++)
444 kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.compute_ring[i],
445 RESET_QUEUES, 0, 0);
446
447 if (adev->gfx.kiq.ring.sched.ready)
448 r = amdgpu_ring_test_helper(kiq_ring);
449 spin_unlock(&adev->gfx.kiq.ring_lock);
450
451 return r;
452 }
453
amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device * adev,int queue_bit)454 int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev,
455 int queue_bit)
456 {
457 int mec, pipe, queue;
458 int set_resource_bit = 0;
459
460 amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue);
461
462 set_resource_bit = mec * 4 * 8 + pipe * 8 + queue;
463
464 return set_resource_bit;
465 }
466
amdgpu_gfx_enable_kcq(struct amdgpu_device * adev)467 int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev)
468 {
469 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
470 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
471 uint64_t queue_mask = 0;
472 int r, i;
473
474 if (!kiq->pmf || !kiq->pmf->kiq_map_queues || !kiq->pmf->kiq_set_resources)
475 return -EINVAL;
476
477 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
478 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
479 continue;
480
481 /* This situation may be hit in the future if a new HW
482 * generation exposes more than 64 queues. If so, the
483 * definition of queue_mask needs updating */
484 if (WARN_ON(i > (sizeof(queue_mask)*8))) {
485 DRM_ERROR("Invalid KCQ enabled: %d\n", i);
486 break;
487 }
488
489 queue_mask |= (1ull << amdgpu_queue_mask_bit_to_set_resource_bit(adev, i));
490 }
491
492 DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe,
493 kiq_ring->queue);
494 spin_lock(&adev->gfx.kiq.ring_lock);
495 r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
496 adev->gfx.num_compute_rings +
497 kiq->pmf->set_resources_size);
498 if (r) {
499 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
500 spin_unlock(&adev->gfx.kiq.ring_lock);
501 return r;
502 }
503
504 if (adev->enable_mes)
505 queue_mask = ~0ULL;
506
507 kiq->pmf->kiq_set_resources(kiq_ring, queue_mask);
508 for (i = 0; i < adev->gfx.num_compute_rings; i++)
509 kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.compute_ring[i]);
510
511 r = amdgpu_ring_test_helper(kiq_ring);
512 spin_unlock(&adev->gfx.kiq.ring_lock);
513 if (r)
514 DRM_ERROR("KCQ enable failed\n");
515
516 return r;
517 }
518
519 /* amdgpu_gfx_off_ctrl - Handle gfx off feature enable/disable
520 *
521 * @adev: amdgpu_device pointer
522 * @bool enable true: enable gfx off feature, false: disable gfx off feature
523 *
524 * 1. gfx off feature will be enabled by gfx ip after gfx cg gp enabled.
525 * 2. other client can send request to disable gfx off feature, the request should be honored.
526 * 3. other client can cancel their request of disable gfx off feature
527 * 4. other client should not send request to enable gfx off feature before disable gfx off feature.
528 */
529
amdgpu_gfx_off_ctrl(struct amdgpu_device * adev,bool enable)530 void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
531 {
532 unsigned long delay = GFX_OFF_DELAY_ENABLE;
533
534 if (!(adev->pm.pp_feature & PP_GFXOFF_MASK))
535 return;
536
537 mutex_lock(&adev->gfx.gfx_off_mutex);
538
539 if (enable) {
540 /* If the count is already 0, it means there's an imbalance bug somewhere.
541 * Note that the bug may be in a different caller than the one which triggers the
542 * WARN_ON_ONCE.
543 */
544 if (WARN_ON_ONCE(adev->gfx.gfx_off_req_count == 0))
545 goto unlock;
546
547 adev->gfx.gfx_off_req_count--;
548
549 if (adev->gfx.gfx_off_req_count == 0 &&
550 !adev->gfx.gfx_off_state) {
551 /* If going to s2idle, no need to wait */
552 if (adev->in_s0ix)
553 delay = GFX_OFF_NO_DELAY;
554 schedule_delayed_work(&adev->gfx.gfx_off_delay_work,
555 delay);
556 }
557 } else {
558 if (adev->gfx.gfx_off_req_count == 0) {
559 cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
560
561 if (adev->gfx.gfx_off_state &&
562 !amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false)) {
563 adev->gfx.gfx_off_state = false;
564
565 if (adev->gfx.funcs->init_spm_golden) {
566 dev_dbg(adev->dev,
567 "GFXOFF is disabled, re-init SPM golden settings\n");
568 amdgpu_gfx_init_spm_golden(adev);
569 }
570 }
571 }
572
573 adev->gfx.gfx_off_req_count++;
574 }
575
576 unlock:
577 mutex_unlock(&adev->gfx.gfx_off_mutex);
578 }
579
amdgpu_get_gfx_off_status(struct amdgpu_device * adev,uint32_t * value)580 int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value)
581 {
582
583 int r = 0;
584
585 mutex_lock(&adev->gfx.gfx_off_mutex);
586
587 r = amdgpu_dpm_get_status_gfxoff(adev, value);
588
589 mutex_unlock(&adev->gfx.gfx_off_mutex);
590
591 return r;
592 }
593
amdgpu_gfx_ras_late_init(struct amdgpu_device * adev,struct ras_common_if * ras_block)594 int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
595 {
596 int r;
597
598 if (amdgpu_ras_is_supported(adev, ras_block->block)) {
599 if (!amdgpu_persistent_edc_harvesting_supported(adev))
600 amdgpu_ras_reset_error_status(adev, AMDGPU_RAS_BLOCK__GFX);
601
602 r = amdgpu_ras_block_late_init(adev, ras_block);
603 if (r)
604 return r;
605
606 r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
607 if (r)
608 goto late_fini;
609 } else {
610 amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0);
611 }
612
613 return 0;
614 late_fini:
615 amdgpu_ras_block_late_fini(adev, ras_block);
616 return r;
617 }
618
amdgpu_gfx_process_ras_data_cb(struct amdgpu_device * adev,void * err_data,struct amdgpu_iv_entry * entry)619 int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
620 void *err_data,
621 struct amdgpu_iv_entry *entry)
622 {
623 /* TODO ue will trigger an interrupt.
624 *
625 * When “Full RAS” is enabled, the per-IP interrupt sources should
626 * be disabled and the driver should only look for the aggregated
627 * interrupt via sync flood
628 */
629 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
630 kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
631 if (adev->gfx.ras && adev->gfx.ras->ras_block.hw_ops &&
632 adev->gfx.ras->ras_block.hw_ops->query_ras_error_count)
633 adev->gfx.ras->ras_block.hw_ops->query_ras_error_count(adev, err_data);
634 amdgpu_ras_reset_gpu(adev);
635 }
636 return AMDGPU_RAS_SUCCESS;
637 }
638
amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)639 int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
640 struct amdgpu_irq_src *source,
641 struct amdgpu_iv_entry *entry)
642 {
643 struct ras_common_if *ras_if = adev->gfx.ras_if;
644 struct ras_dispatch_if ih_data = {
645 .entry = entry,
646 };
647
648 if (!ras_if)
649 return 0;
650
651 ih_data.head = *ras_if;
652
653 DRM_ERROR("CP ECC ERROR IRQ\n");
654 amdgpu_ras_interrupt_dispatch(adev, &ih_data);
655 return 0;
656 }
657
amdgpu_kiq_rreg(struct amdgpu_device * adev,uint32_t reg)658 uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
659 {
660 signed long r, cnt = 0;
661 unsigned long flags;
662 uint32_t seq, reg_val_offs = 0, value = 0;
663 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
664 struct amdgpu_ring *ring = &kiq->ring;
665
666 if (amdgpu_device_skip_hw_access(adev))
667 return 0;
668
669 BUG_ON(!ring->funcs->emit_rreg);
670
671 spin_lock_irqsave(&kiq->ring_lock, flags);
672 if (amdgpu_device_wb_get(adev, ®_val_offs)) {
673 pr_err("critical bug! too many kiq readers\n");
674 goto failed_unlock;
675 }
676 amdgpu_ring_alloc(ring, 32);
677 amdgpu_ring_emit_rreg(ring, reg, reg_val_offs);
678 r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
679 if (r)
680 goto failed_undo;
681
682 amdgpu_ring_commit(ring);
683 spin_unlock_irqrestore(&kiq->ring_lock, flags);
684
685 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
686
687 /* don't wait anymore for gpu reset case because this way may
688 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
689 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
690 * never return if we keep waiting in virt_kiq_rreg, which cause
691 * gpu_recover() hang there.
692 *
693 * also don't wait anymore for IRQ context
694 * */
695 if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
696 goto failed_kiq_read;
697
698 might_sleep();
699 while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
700 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
701 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
702 }
703
704 if (cnt > MAX_KIQ_REG_TRY)
705 goto failed_kiq_read;
706
707 mb();
708 value = adev->wb.wb[reg_val_offs];
709 amdgpu_device_wb_free(adev, reg_val_offs);
710 return value;
711
712 failed_undo:
713 amdgpu_ring_undo(ring);
714 failed_unlock:
715 spin_unlock_irqrestore(&kiq->ring_lock, flags);
716 failed_kiq_read:
717 if (reg_val_offs)
718 amdgpu_device_wb_free(adev, reg_val_offs);
719 dev_err(adev->dev, "failed to read reg:%x\n", reg);
720 return ~0;
721 }
722
amdgpu_kiq_wreg(struct amdgpu_device * adev,uint32_t reg,uint32_t v)723 void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
724 {
725 signed long r, cnt = 0;
726 unsigned long flags;
727 uint32_t seq;
728 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
729 struct amdgpu_ring *ring = &kiq->ring;
730
731 BUG_ON(!ring->funcs->emit_wreg);
732
733 if (amdgpu_device_skip_hw_access(adev))
734 return;
735
736 spin_lock_irqsave(&kiq->ring_lock, flags);
737 amdgpu_ring_alloc(ring, 32);
738 amdgpu_ring_emit_wreg(ring, reg, v);
739 r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
740 if (r)
741 goto failed_undo;
742
743 amdgpu_ring_commit(ring);
744 spin_unlock_irqrestore(&kiq->ring_lock, flags);
745
746 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
747
748 /* don't wait anymore for gpu reset case because this way may
749 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
750 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
751 * never return if we keep waiting in virt_kiq_rreg, which cause
752 * gpu_recover() hang there.
753 *
754 * also don't wait anymore for IRQ context
755 * */
756 if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
757 goto failed_kiq_write;
758
759 might_sleep();
760 while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
761
762 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
763 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
764 }
765
766 if (cnt > MAX_KIQ_REG_TRY)
767 goto failed_kiq_write;
768
769 return;
770
771 failed_undo:
772 amdgpu_ring_undo(ring);
773 spin_unlock_irqrestore(&kiq->ring_lock, flags);
774 failed_kiq_write:
775 dev_err(adev->dev, "failed to write reg:%x\n", reg);
776 }
777
amdgpu_gfx_get_num_kcq(struct amdgpu_device * adev)778 int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev)
779 {
780 if (amdgpu_num_kcq == -1) {
781 return 8;
782 } else if (amdgpu_num_kcq > 8 || amdgpu_num_kcq < 0) {
783 dev_warn(adev->dev, "set kernel compute queue number to 8 due to invalid parameter provided by user\n");
784 return 8;
785 }
786 return amdgpu_num_kcq;
787 }
788