Lines Matching refs:kfd

58 static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
60 static void kfd_gtt_sa_fini(struct kfd_dev *kfd);
62 static int kfd_resume(struct kfd_dev *kfd);
64 static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd) in kfd_device_info_set_sdma_info() argument
66 uint32_t sdma_version = kfd->adev->ip_versions[SDMA0_HWIP][0]; in kfd_device_info_set_sdma_info()
78 kfd->device_info.num_sdma_queues_per_engine = 2; in kfd_device_info_set_sdma_info()
95 kfd->device_info.num_sdma_queues_per_engine = 8; in kfd_device_info_set_sdma_info()
101 kfd->device_info.num_sdma_queues_per_engine = 8; in kfd_device_info_set_sdma_info()
109 kfd->device_info.num_reserved_sdma_queues_per_engine = 2; in kfd_device_info_set_sdma_info()
111 kfd->device_info.reserved_sdma_queues_bitmap = 0xFULL; in kfd_device_info_set_sdma_info()
115 kfd->device_info.num_reserved_sdma_queues_per_engine = 2; in kfd_device_info_set_sdma_info()
117 kfd->device_info.reserved_sdma_queues_bitmap = 0x3ULL; in kfd_device_info_set_sdma_info()
124 static void kfd_device_info_set_event_interrupt_class(struct kfd_dev *kfd) in kfd_device_info_set_event_interrupt_class() argument
126 uint32_t gc_version = KFD_GC_VERSION(kfd); in kfd_device_info_set_event_interrupt_class()
150 kfd->device_info.event_interrupt_class = &event_interrupt_class_v9; in kfd_device_info_set_event_interrupt_class()
156 kfd->device_info.event_interrupt_class = &event_interrupt_class_v11; in kfd_device_info_set_event_interrupt_class()
161 kfd->device_info.event_interrupt_class = &event_interrupt_class_v9; in kfd_device_info_set_event_interrupt_class()
165 static void kfd_device_info_init(struct kfd_dev *kfd, in kfd_device_info_init() argument
168 uint32_t gc_version = KFD_GC_VERSION(kfd); in kfd_device_info_init()
169 uint32_t asic_type = kfd->adev->asic_type; in kfd_device_info_init()
171 kfd->device_info.max_pasid_bits = 16; in kfd_device_info_init()
172 kfd->device_info.max_no_of_hqd = 24; in kfd_device_info_init()
173 kfd->device_info.num_of_watch_points = 4; in kfd_device_info_init()
174 kfd->device_info.mqd_size_aligned = MQD_SIZE_ALIGNED; in kfd_device_info_init()
175 kfd->device_info.gfx_target_version = gfx_target_version; in kfd_device_info_init()
177 if (KFD_IS_SOC15(kfd)) { in kfd_device_info_init()
178 kfd->device_info.doorbell_size = 8; in kfd_device_info_init()
179 kfd->device_info.ih_ring_entry_size = 8 * sizeof(uint32_t); in kfd_device_info_init()
180 kfd->device_info.supports_cwsr = true; in kfd_device_info_init()
182 kfd_device_info_set_sdma_info(kfd); in kfd_device_info_init()
184 kfd_device_info_set_event_interrupt_class(kfd); in kfd_device_info_init()
189 kfd->device_info.needs_iommu_device = true; in kfd_device_info_init()
194 kfd->device_info.no_atomic_fw_version = 14; in kfd_device_info_init()
196 kfd->device_info.no_atomic_fw_version = 3; in kfd_device_info_init()
198 kfd->device_info.no_atomic_fw_version = 92; in kfd_device_info_init()
200 kfd->device_info.no_atomic_fw_version = 145; in kfd_device_info_init()
204 kfd->device_info.needs_pci_atomics = true; in kfd_device_info_init()
207 kfd->device_info.doorbell_size = 4; in kfd_device_info_init()
208 kfd->device_info.ih_ring_entry_size = 4 * sizeof(uint32_t); in kfd_device_info_init()
209 kfd->device_info.event_interrupt_class = &event_interrupt_class_cik; in kfd_device_info_init()
210 kfd->device_info.num_sdma_queues_per_engine = 2; in kfd_device_info_init()
215 kfd->device_info.supports_cwsr = true; in kfd_device_info_init()
219 kfd->device_info.needs_iommu_device = true; in kfd_device_info_init()
222 kfd->device_info.needs_pci_atomics = true; in kfd_device_info_init()
228 struct kfd_dev *kfd = NULL; in kgd2kfd_probe() local
426 kfd = kzalloc(sizeof(*kfd), GFP_KERNEL); in kgd2kfd_probe()
427 if (!kfd) in kgd2kfd_probe()
430 kfd->adev = adev; in kgd2kfd_probe()
431 kfd_device_info_init(kfd, vf, gfx_target_version); in kgd2kfd_probe()
432 kfd->pdev = pdev; in kgd2kfd_probe()
433 kfd->init_complete = false; in kgd2kfd_probe()
434 kfd->kfd2kgd = f2g; in kgd2kfd_probe()
435 atomic_set(&kfd->compute_profile, 0); in kgd2kfd_probe()
437 mutex_init(&kfd->doorbell_mutex); in kgd2kfd_probe()
438 memset(&kfd->doorbell_available_index, 0, in kgd2kfd_probe()
439 sizeof(kfd->doorbell_available_index)); in kgd2kfd_probe()
441 atomic_set(&kfd->sram_ecc_flag, 0); in kgd2kfd_probe()
443 ida_init(&kfd->doorbell_ida); in kgd2kfd_probe()
445 return kfd; in kgd2kfd_probe()
448 static void kfd_cwsr_init(struct kfd_dev *kfd) in kfd_cwsr_init() argument
450 if (cwsr_enable && kfd->device_info.supports_cwsr) { in kfd_cwsr_init()
451 if (KFD_GC_VERSION(kfd) < IP_VERSION(9, 0, 1)) { in kfd_cwsr_init()
453 kfd->cwsr_isa = cwsr_trap_gfx8_hex; in kfd_cwsr_init()
454 kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex); in kfd_cwsr_init()
455 } else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 1)) { in kfd_cwsr_init()
457 kfd->cwsr_isa = cwsr_trap_arcturus_hex; in kfd_cwsr_init()
458 kfd->cwsr_isa_size = sizeof(cwsr_trap_arcturus_hex); in kfd_cwsr_init()
459 } else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 2)) { in kfd_cwsr_init()
461 kfd->cwsr_isa = cwsr_trap_aldebaran_hex; in kfd_cwsr_init()
462 kfd->cwsr_isa_size = sizeof(cwsr_trap_aldebaran_hex); in kfd_cwsr_init()
463 } else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 1, 1)) { in kfd_cwsr_init()
465 kfd->cwsr_isa = cwsr_trap_gfx9_hex; in kfd_cwsr_init()
466 kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_hex); in kfd_cwsr_init()
467 } else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 3, 0)) { in kfd_cwsr_init()
469 kfd->cwsr_isa = cwsr_trap_nv1x_hex; in kfd_cwsr_init()
470 kfd->cwsr_isa_size = sizeof(cwsr_trap_nv1x_hex); in kfd_cwsr_init()
471 } else if (KFD_GC_VERSION(kfd) < IP_VERSION(11, 0, 0)) { in kfd_cwsr_init()
473 kfd->cwsr_isa = cwsr_trap_gfx10_hex; in kfd_cwsr_init()
474 kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx10_hex); in kfd_cwsr_init()
477 kfd->cwsr_isa = cwsr_trap_gfx11_hex; in kfd_cwsr_init()
478 kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx11_hex); in kfd_cwsr_init()
481 kfd->cwsr_enabled = true; in kfd_cwsr_init()
485 static int kfd_gws_init(struct kfd_dev *kfd) in kfd_gws_init() argument
489 if (kfd->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) in kfd_gws_init()
492 if (hws_gws_support || (KFD_IS_SOC15(kfd) && in kfd_gws_init()
493 ((KFD_GC_VERSION(kfd) == IP_VERSION(9, 0, 1) in kfd_gws_init()
494 && kfd->mec2_fw_version >= 0x81b3) || in kfd_gws_init()
495 (KFD_GC_VERSION(kfd) <= IP_VERSION(9, 4, 0) in kfd_gws_init()
496 && kfd->mec2_fw_version >= 0x1b3) || in kfd_gws_init()
497 (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 1) in kfd_gws_init()
498 && kfd->mec2_fw_version >= 0x30) || in kfd_gws_init()
499 (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 2) in kfd_gws_init()
500 && kfd->mec2_fw_version >= 0x28)))) in kfd_gws_init()
501 ret = amdgpu_amdkfd_alloc_gws(kfd->adev, in kfd_gws_init()
502 kfd->adev->gds.gws_size, &kfd->gws); in kfd_gws_init()
513 bool kgd2kfd_device_init(struct kfd_dev *kfd, in kgd2kfd_device_init() argument
519 kfd->ddev = ddev; in kgd2kfd_device_init()
520 kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev, in kgd2kfd_device_init()
522 kfd->mec2_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev, in kgd2kfd_device_init()
524 kfd->sdma_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev, in kgd2kfd_device_init()
526 kfd->shared_resources = *gpu_resources; in kgd2kfd_device_init()
528 kfd->vm_info.first_vmid_kfd = ffs(gpu_resources->compute_vmid_bitmap)-1; in kgd2kfd_device_init()
529 kfd->vm_info.last_vmid_kfd = fls(gpu_resources->compute_vmid_bitmap)-1; in kgd2kfd_device_init()
530 kfd->vm_info.vmid_num_kfd = kfd->vm_info.last_vmid_kfd in kgd2kfd_device_init()
531 - kfd->vm_info.first_vmid_kfd + 1; in kgd2kfd_device_init()
537 kfd->pci_atomic_requested = amdgpu_amdkfd_have_atomics_support(kfd->adev); in kgd2kfd_device_init()
538 if (!kfd->pci_atomic_requested && in kgd2kfd_device_init()
539 kfd->device_info.needs_pci_atomics && in kgd2kfd_device_init()
540 (!kfd->device_info.no_atomic_fw_version || in kgd2kfd_device_init()
541 kfd->mec_fw_version < kfd->device_info.no_atomic_fw_version)) { in kgd2kfd_device_init()
544 kfd->pdev->vendor, kfd->pdev->device, in kgd2kfd_device_init()
545 kfd->mec_fw_version, in kgd2kfd_device_init()
546 kfd->device_info.no_atomic_fw_version); in kgd2kfd_device_init()
552 kfd->max_proc_per_quantum = min((u32)hws_max_conc_proc, kfd->vm_info.vmid_num_kfd); in kgd2kfd_device_init()
554 kfd->max_proc_per_quantum = kfd->vm_info.vmid_num_kfd; in kgd2kfd_device_init()
558 kfd->device_info.mqd_size_aligned; in kgd2kfd_device_init()
564 map_process_packet_size = KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 2) ? in kgd2kfd_device_init()
578 kfd->adev, size, &kfd->gtt_mem, in kgd2kfd_device_init()
579 &kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr, in kgd2kfd_device_init()
588 if (kfd_gtt_sa_init(kfd, size, 512) != 0) { in kgd2kfd_device_init()
593 if (kfd_doorbell_init(kfd)) { in kgd2kfd_device_init()
600 kfd->hive_id = kfd->adev->gmc.xgmi.hive_id; in kgd2kfd_device_init()
602 kfd->noretry = kfd->adev->gmc.noretry; in kgd2kfd_device_init()
604 if (kfd_interrupt_init(kfd)) { in kgd2kfd_device_init()
609 kfd->dqm = device_queue_manager_init(kfd); in kgd2kfd_device_init()
610 if (!kfd->dqm) { in kgd2kfd_device_init()
618 if (kfd_gws_init(kfd)) { in kgd2kfd_device_init()
620 kfd->adev->gds.gws_size); in kgd2kfd_device_init()
625 kfd_double_confirm_iommu_support(kfd); in kgd2kfd_device_init()
627 if (kfd_iommu_device_init(kfd)) { in kgd2kfd_device_init()
628 kfd->use_iommu_v2 = false; in kgd2kfd_device_init()
633 kfd_cwsr_init(kfd); in kgd2kfd_device_init()
635 svm_migrate_init(kfd->adev); in kgd2kfd_device_init()
637 if (kgd2kfd_resume_iommu(kfd)) in kgd2kfd_device_init()
640 if (kfd_resume(kfd)) in kgd2kfd_device_init()
643 amdgpu_amdkfd_get_local_mem_info(kfd->adev, &kfd->local_mem_info); in kgd2kfd_device_init()
645 if (kfd_topology_add_device(kfd)) { in kgd2kfd_device_init()
650 kfd_smi_init(kfd); in kgd2kfd_device_init()
652 kfd->init_complete = true; in kgd2kfd_device_init()
653 dev_info(kfd_device, "added device %x:%x\n", kfd->pdev->vendor, in kgd2kfd_device_init()
654 kfd->pdev->device); in kgd2kfd_device_init()
657 kfd->dqm->sched_policy); in kgd2kfd_device_init()
665 device_queue_manager_uninit(kfd->dqm); in kgd2kfd_device_init()
667 kfd_interrupt_exit(kfd); in kgd2kfd_device_init()
669 kfd_doorbell_fini(kfd); in kgd2kfd_device_init()
671 kfd_gtt_sa_fini(kfd); in kgd2kfd_device_init()
673 amdgpu_amdkfd_free_gtt_mem(kfd->adev, kfd->gtt_mem); in kgd2kfd_device_init()
675 if (kfd->gws) in kgd2kfd_device_init()
676 amdgpu_amdkfd_free_gws(kfd->adev, kfd->gws); in kgd2kfd_device_init()
679 kfd->pdev->vendor, kfd->pdev->device); in kgd2kfd_device_init()
681 return kfd->init_complete; in kgd2kfd_device_init()
684 void kgd2kfd_device_exit(struct kfd_dev *kfd) in kgd2kfd_device_exit() argument
686 if (kfd->init_complete) { in kgd2kfd_device_exit()
687 device_queue_manager_uninit(kfd->dqm); in kgd2kfd_device_exit()
688 kfd_interrupt_exit(kfd); in kgd2kfd_device_exit()
689 kfd_topology_remove_device(kfd); in kgd2kfd_device_exit()
690 kfd_doorbell_fini(kfd); in kgd2kfd_device_exit()
691 ida_destroy(&kfd->doorbell_ida); in kgd2kfd_device_exit()
692 kfd_gtt_sa_fini(kfd); in kgd2kfd_device_exit()
693 amdgpu_amdkfd_free_gtt_mem(kfd->adev, kfd->gtt_mem); in kgd2kfd_device_exit()
694 if (kfd->gws) in kgd2kfd_device_exit()
695 amdgpu_amdkfd_free_gws(kfd->adev, kfd->gws); in kgd2kfd_device_exit()
698 kfree(kfd); in kgd2kfd_device_exit()
701 int kgd2kfd_pre_reset(struct kfd_dev *kfd) in kgd2kfd_pre_reset() argument
703 if (!kfd->init_complete) in kgd2kfd_pre_reset()
706 kfd_smi_event_update_gpu_reset(kfd, false); in kgd2kfd_pre_reset()
708 kfd->dqm->ops.pre_reset(kfd->dqm); in kgd2kfd_pre_reset()
710 kgd2kfd_suspend(kfd, false); in kgd2kfd_pre_reset()
712 kfd_signal_reset_event(kfd); in kgd2kfd_pre_reset()
722 int kgd2kfd_post_reset(struct kfd_dev *kfd) in kgd2kfd_post_reset() argument
726 if (!kfd->init_complete) in kgd2kfd_post_reset()
729 ret = kfd_resume(kfd); in kgd2kfd_post_reset()
734 atomic_set(&kfd->sram_ecc_flag, 0); in kgd2kfd_post_reset()
736 kfd_smi_event_update_gpu_reset(kfd, true); in kgd2kfd_post_reset()
746 void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm) in kgd2kfd_suspend() argument
748 if (!kfd->init_complete) in kgd2kfd_suspend()
758 kfd->dqm->ops.stop(kfd->dqm); in kgd2kfd_suspend()
759 kfd_iommu_suspend(kfd); in kgd2kfd_suspend()
762 int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm) in kgd2kfd_resume() argument
766 if (!kfd->init_complete) in kgd2kfd_resume()
769 ret = kfd_resume(kfd); in kgd2kfd_resume()
784 int kgd2kfd_resume_iommu(struct kfd_dev *kfd) in kgd2kfd_resume_iommu() argument
788 err = kfd_iommu_resume(kfd); in kgd2kfd_resume_iommu()
792 kfd->pdev->vendor, kfd->pdev->device); in kgd2kfd_resume_iommu()
796 static int kfd_resume(struct kfd_dev *kfd) in kfd_resume() argument
800 err = kfd->dqm->ops.start(kfd->dqm); in kfd_resume()
804 kfd->pdev->vendor, kfd->pdev->device); in kfd_resume()
825 void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) in kgd2kfd_interrupt() argument
831 if (!kfd->init_complete) in kgd2kfd_interrupt()
834 if (kfd->device_info.ih_ring_entry_size > sizeof(patched_ihre)) { in kgd2kfd_interrupt()
839 spin_lock_irqsave(&kfd->interrupt_lock, flags); in kgd2kfd_interrupt()
841 if (kfd->interrupts_active in kgd2kfd_interrupt()
842 && interrupt_is_wanted(kfd, ih_ring_entry, in kgd2kfd_interrupt()
844 && enqueue_ih_ring_entry(kfd, in kgd2kfd_interrupt()
846 kfd_queue_work(kfd->ih_wq, &kfd->interrupt_work); in kgd2kfd_interrupt()
848 spin_unlock_irqrestore(&kfd->interrupt_lock, flags); in kgd2kfd_interrupt()
940 static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, in kfd_gtt_sa_init() argument
950 kfd->gtt_sa_chunk_size = chunk_size; in kfd_gtt_sa_init()
951 kfd->gtt_sa_num_of_chunks = buf_size / chunk_size; in kfd_gtt_sa_init()
953 kfd->gtt_sa_bitmap = bitmap_zalloc(kfd->gtt_sa_num_of_chunks, in kfd_gtt_sa_init()
955 if (!kfd->gtt_sa_bitmap) in kfd_gtt_sa_init()
959 kfd->gtt_sa_num_of_chunks, kfd->gtt_sa_bitmap); in kfd_gtt_sa_init()
961 mutex_init(&kfd->gtt_sa_lock); in kfd_gtt_sa_init()
966 static void kfd_gtt_sa_fini(struct kfd_dev *kfd) in kfd_gtt_sa_fini() argument
968 mutex_destroy(&kfd->gtt_sa_lock); in kfd_gtt_sa_fini()
969 bitmap_free(kfd->gtt_sa_bitmap); in kfd_gtt_sa_fini()
986 int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size, in kfd_gtt_sa_allocate() argument
994 if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size) in kfd_gtt_sa_allocate()
1005 mutex_lock(&kfd->gtt_sa_lock); in kfd_gtt_sa_allocate()
1009 found = find_next_zero_bit(kfd->gtt_sa_bitmap, in kfd_gtt_sa_allocate()
1010 kfd->gtt_sa_num_of_chunks, in kfd_gtt_sa_allocate()
1016 if (found == kfd->gtt_sa_num_of_chunks) in kfd_gtt_sa_allocate()
1023 kfd->gtt_start_gpu_addr, in kfd_gtt_sa_allocate()
1025 kfd->gtt_sa_chunk_size); in kfd_gtt_sa_allocate()
1027 kfd->gtt_start_cpu_ptr, in kfd_gtt_sa_allocate()
1029 kfd->gtt_sa_chunk_size); in kfd_gtt_sa_allocate()
1035 if (size <= kfd->gtt_sa_chunk_size) { in kfd_gtt_sa_allocate()
1037 __set_bit(found, kfd->gtt_sa_bitmap); in kfd_gtt_sa_allocate()
1042 cur_size = size - kfd->gtt_sa_chunk_size; in kfd_gtt_sa_allocate()
1045 find_next_zero_bit(kfd->gtt_sa_bitmap, in kfd_gtt_sa_allocate()
1046 kfd->gtt_sa_num_of_chunks, ++found); in kfd_gtt_sa_allocate()
1060 if (found == kfd->gtt_sa_num_of_chunks) in kfd_gtt_sa_allocate()
1064 if (cur_size <= kfd->gtt_sa_chunk_size) in kfd_gtt_sa_allocate()
1067 cur_size -= kfd->gtt_sa_chunk_size; in kfd_gtt_sa_allocate()
1075 bitmap_set(kfd->gtt_sa_bitmap, (*mem_obj)->range_start, in kfd_gtt_sa_allocate()
1079 mutex_unlock(&kfd->gtt_sa_lock); in kfd_gtt_sa_allocate()
1084 mutex_unlock(&kfd->gtt_sa_lock); in kfd_gtt_sa_allocate()
1089 int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj) in kfd_gtt_sa_free() argument
1098 mutex_lock(&kfd->gtt_sa_lock); in kfd_gtt_sa_free()
1101 bitmap_clear(kfd->gtt_sa_bitmap, mem_obj->range_start, in kfd_gtt_sa_free()
1104 mutex_unlock(&kfd->gtt_sa_lock); in kfd_gtt_sa_free()
1110 void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd) in kgd2kfd_set_sram_ecc_flag() argument
1112 if (kfd) in kgd2kfd_set_sram_ecc_flag()
1113 atomic_inc(&kfd->sram_ecc_flag); in kgd2kfd_set_sram_ecc_flag()
1116 void kfd_inc_compute_active(struct kfd_dev *kfd) in kfd_inc_compute_active() argument
1118 if (atomic_inc_return(&kfd->compute_profile) == 1) in kfd_inc_compute_active()
1119 amdgpu_amdkfd_set_compute_idle(kfd->adev, false); in kfd_inc_compute_active()
1122 void kfd_dec_compute_active(struct kfd_dev *kfd) in kfd_dec_compute_active() argument
1124 int count = atomic_dec_return(&kfd->compute_profile); in kfd_dec_compute_active()
1127 amdgpu_amdkfd_set_compute_idle(kfd->adev, true); in kfd_dec_compute_active()
1131 void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask) in kgd2kfd_smi_event_throttle() argument
1133 if (kfd && kfd->init_complete) in kgd2kfd_smi_event_throttle()
1134 kfd_smi_event_update_thermal_throttling(kfd, throttle_bitmask); in kgd2kfd_smi_event_throttle()