1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4 * Copyright 2020-2022 HabanaLabs, Ltd.
5 * All Rights Reserved.
6 */
7
8 #include "gaudi2P.h"
9 #include "gaudi2_masks.h"
10 #include "../include/hw_ip/mmu/mmu_general.h"
11 #include "../include/hw_ip/mmu/mmu_v2_0.h"
12 #include "../include/gaudi2/gaudi2_packets.h"
13 #include "../include/gaudi2/gaudi2_reg_map.h"
14 #include "../include/gaudi2/gaudi2_async_ids_map_extended.h"
15 #include "../include/gaudi2/arc/gaudi2_arc_common_packets.h"
16
17 #include <linux/module.h>
18 #include <linux/pci.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21
22 #define GAUDI2_DMA_POOL_BLK_SIZE SZ_256 /* 256 bytes */
23
24 #define GAUDI2_RESET_TIMEOUT_MSEC 2000 /* 2000ms */
25 #define GAUDI2_RESET_POLL_TIMEOUT_USEC 50000 /* 50ms */
26 #define GAUDI2_PLDM_HRESET_TIMEOUT_MSEC 25000 /* 25s */
27 #define GAUDI2_PLDM_SRESET_TIMEOUT_MSEC 25000 /* 25s */
28 #define GAUDI2_PLDM_RESET_POLL_TIMEOUT_USEC 3000000 /* 3s */
29 #define GAUDI2_RESET_POLL_CNT 3
30 #define GAUDI2_RESET_WAIT_MSEC 1 /* 1ms */
31 #define GAUDI2_CPU_RESET_WAIT_MSEC 100 /* 100ms */
32 #define GAUDI2_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
33 #define GAUDI2_CB_POOL_CB_CNT 512
34 #define GAUDI2_CB_POOL_CB_SIZE SZ_128K /* 128KB */
35 #define GAUDI2_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */
36 #define GAUDI2_WAIT_FOR_BL_TIMEOUT_USEC 25000000 /* 25s */
37 #define GAUDI2_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
38 #define GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */
39
40 #define GAUDI2_ALLOC_CPU_MEM_RETRY_CNT 3
41
42 /*
43 * since the code already has built-in support for binning of up to MAX_FAULTY_TPCS TPCs
44 * and the code relies on that value (for array size etc..) we define another value
45 * for MAX faulty TPCs which reflects the cluster binning requirements
46 */
47 #define MAX_CLUSTER_BINNING_FAULTY_TPCS 1
48 #define MAX_FAULTY_XBARS 1
49 #define MAX_FAULTY_EDMAS 1
50 #define MAX_FAULTY_DECODERS 1
51
52 #define GAUDI2_TPC_FULL_MASK 0x1FFFFFF
53 #define GAUDI2_HIF_HMMU_FULL_MASK 0xFFFF
54 #define GAUDI2_DECODER_FULL_MASK 0x3FF
55
56 #define GAUDI2_NUM_OF_QM_ERR_CAUSE 18
57 #define GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE 25
58 #define GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE 3
59 #define GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE 14
60 #define GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE 3
61 #define GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE 2
62 #define GAUDI2_NUM_OF_ROT_ERR_CAUSE 22
63 #define GAUDI2_NUM_OF_TPC_INTR_CAUSE 30
64 #define GAUDI2_NUM_OF_DEC_ERR_CAUSE 25
65 #define GAUDI2_NUM_OF_MME_ERR_CAUSE 16
66 #define GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE 5
67 #define GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE 7
68 #define GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE 8
69 #define GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE 19
70 #define GAUDI2_NUM_OF_HBM_SEI_CAUSE 9
71 #define GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE 3
72 #define GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE 3
73 #define GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE 2
74 #define GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE 2
75 #define GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE 2
76 #define GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE 5
77
78 #define GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 10)
79 #define GAUDI2_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 200)
80 #define GAUDI2_ARB_WDT_TIMEOUT (0x1000000)
81
82 #define GAUDI2_VDEC_TIMEOUT_USEC 10000 /* 10ms */
83 #define GAUDI2_PLDM_VDEC_TIMEOUT_USEC (GAUDI2_VDEC_TIMEOUT_USEC * 100)
84
85 #define KDMA_TIMEOUT_USEC USEC_PER_SEC
86
87 #define IS_DMA_IDLE(dma_core_idle_ind_mask) \
88 (!((dma_core_idle_ind_mask) & \
89 ((DCORE0_EDMA0_CORE_IDLE_IND_MASK_DESC_CNT_STS_MASK) | \
90 (DCORE0_EDMA0_CORE_IDLE_IND_MASK_COMP_MASK))))
91
92 #define IS_MME_IDLE(mme_arch_sts) (((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK)
93
94 #define IS_TPC_IDLE(tpc_cfg_sts) (((tpc_cfg_sts) & (TPC_IDLE_MASK)) == (TPC_IDLE_MASK))
95
96 #define IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) \
97 ((((qm_glbl_sts0) & (QM_IDLE_MASK)) == (QM_IDLE_MASK)) && \
98 (((qm_glbl_sts1) & (QM_ARC_IDLE_MASK)) == (QM_ARC_IDLE_MASK)) && \
99 (((qm_cgm_sts) & (CGM_IDLE_MASK)) == (CGM_IDLE_MASK)))
100
101 #define PCIE_DEC_EN_MASK 0x300
102 #define DEC_WORK_STATE_IDLE 0
103 #define DEC_WORK_STATE_PEND 3
104 #define IS_DEC_IDLE(dec_swreg15) \
105 (((dec_swreg15) & DCORE0_DEC0_CMD_SWREG15_SW_WORK_STATE_MASK) == DEC_WORK_STATE_IDLE || \
106 ((dec_swreg15) & DCORE0_DEC0_CMD_SWREG15_SW_WORK_STATE_MASK) == DEC_WORK_STATE_PEND)
107
108 /* HBM MMU address scrambling parameters */
109 #define GAUDI2_HBM_MMU_SCRM_MEM_SIZE SZ_8M
110 #define GAUDI2_HBM_MMU_SCRM_DIV_SHIFT 26
111 #define GAUDI2_HBM_MMU_SCRM_MOD_SHIFT 0
112 #define GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK DRAM_VA_HINT_MASK
113 #define GAUDI2_COMPENSATE_TLB_PAGE_SIZE_FACTOR 16
114 #define MMU_RANGE_INV_VA_LSB_SHIFT 12
115 #define MMU_RANGE_INV_VA_MSB_SHIFT 44
116 #define MMU_RANGE_INV_EN_SHIFT 0
117 #define MMU_RANGE_INV_ASID_EN_SHIFT 1
118 #define MMU_RANGE_INV_ASID_SHIFT 2
119
120 /* The last SPI_SEI cause bit, "burst_fifo_full", is expected to be triggered in PMMU because it has
121 * a 2 entries FIFO, and hence it is not enabled for it.
122 */
123 #define GAUDI2_PMMU_SPI_SEI_ENABLE_MASK GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 2, 0)
124 #define GAUDI2_HMMU_SPI_SEI_ENABLE_MASK GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 1, 0)
125
126 #define GAUDI2_MAX_STRING_LEN 64
127
128 #define GAUDI2_VDEC_MSIX_ENTRIES (GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM - \
129 GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 1)
130
131 enum hl_pmmu_fatal_cause {
132 LATENCY_RD_OUT_FIFO_OVERRUN,
133 LATENCY_WR_OUT_FIFO_OVERRUN,
134 };
135
136 enum hl_pcie_drain_ind_cause {
137 LBW_AXI_DRAIN_IND,
138 HBW_AXI_DRAIN_IND
139 };
140
141 static const u32 cluster_hmmu_hif_enabled_mask[GAUDI2_HBM_NUM] = {
142 [HBM_ID0] = 0xFFFC,
143 [HBM_ID1] = 0xFFCF,
144 [HBM_ID2] = 0xF7F7,
145 [HBM_ID3] = 0x7F7F,
146 [HBM_ID4] = 0xFCFF,
147 [HBM_ID5] = 0xCFFF,
148 };
149
150 static const u8 xbar_edge_to_hbm_cluster[EDMA_ID_SIZE] = {
151 [0] = HBM_ID0,
152 [1] = HBM_ID1,
153 [2] = HBM_ID4,
154 [3] = HBM_ID5,
155 };
156
157 static const u8 edma_to_hbm_cluster[EDMA_ID_SIZE] = {
158 [EDMA_ID_DCORE0_INSTANCE0] = HBM_ID0,
159 [EDMA_ID_DCORE0_INSTANCE1] = HBM_ID2,
160 [EDMA_ID_DCORE1_INSTANCE0] = HBM_ID1,
161 [EDMA_ID_DCORE1_INSTANCE1] = HBM_ID3,
162 [EDMA_ID_DCORE2_INSTANCE0] = HBM_ID2,
163 [EDMA_ID_DCORE2_INSTANCE1] = HBM_ID4,
164 [EDMA_ID_DCORE3_INSTANCE0] = HBM_ID3,
165 [EDMA_ID_DCORE3_INSTANCE1] = HBM_ID5,
166 };
167
168 static const int gaudi2_qman_async_event_id[] = {
169 [GAUDI2_QUEUE_ID_PDMA_0_0] = GAUDI2_EVENT_PDMA0_QM,
170 [GAUDI2_QUEUE_ID_PDMA_0_1] = GAUDI2_EVENT_PDMA0_QM,
171 [GAUDI2_QUEUE_ID_PDMA_0_2] = GAUDI2_EVENT_PDMA0_QM,
172 [GAUDI2_QUEUE_ID_PDMA_0_3] = GAUDI2_EVENT_PDMA0_QM,
173 [GAUDI2_QUEUE_ID_PDMA_1_0] = GAUDI2_EVENT_PDMA1_QM,
174 [GAUDI2_QUEUE_ID_PDMA_1_1] = GAUDI2_EVENT_PDMA1_QM,
175 [GAUDI2_QUEUE_ID_PDMA_1_2] = GAUDI2_EVENT_PDMA1_QM,
176 [GAUDI2_QUEUE_ID_PDMA_1_3] = GAUDI2_EVENT_PDMA1_QM,
177 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = GAUDI2_EVENT_HDMA0_QM,
178 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = GAUDI2_EVENT_HDMA0_QM,
179 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = GAUDI2_EVENT_HDMA0_QM,
180 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = GAUDI2_EVENT_HDMA0_QM,
181 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = GAUDI2_EVENT_HDMA1_QM,
182 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = GAUDI2_EVENT_HDMA1_QM,
183 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = GAUDI2_EVENT_HDMA1_QM,
184 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = GAUDI2_EVENT_HDMA1_QM,
185 [GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = GAUDI2_EVENT_MME0_QM,
186 [GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = GAUDI2_EVENT_MME0_QM,
187 [GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = GAUDI2_EVENT_MME0_QM,
188 [GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = GAUDI2_EVENT_MME0_QM,
189 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = GAUDI2_EVENT_TPC0_QM,
190 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = GAUDI2_EVENT_TPC0_QM,
191 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = GAUDI2_EVENT_TPC0_QM,
192 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = GAUDI2_EVENT_TPC0_QM,
193 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = GAUDI2_EVENT_TPC1_QM,
194 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = GAUDI2_EVENT_TPC1_QM,
195 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = GAUDI2_EVENT_TPC1_QM,
196 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = GAUDI2_EVENT_TPC1_QM,
197 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = GAUDI2_EVENT_TPC2_QM,
198 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = GAUDI2_EVENT_TPC2_QM,
199 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = GAUDI2_EVENT_TPC2_QM,
200 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = GAUDI2_EVENT_TPC2_QM,
201 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = GAUDI2_EVENT_TPC3_QM,
202 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = GAUDI2_EVENT_TPC3_QM,
203 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = GAUDI2_EVENT_TPC3_QM,
204 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = GAUDI2_EVENT_TPC3_QM,
205 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = GAUDI2_EVENT_TPC4_QM,
206 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = GAUDI2_EVENT_TPC4_QM,
207 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = GAUDI2_EVENT_TPC4_QM,
208 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = GAUDI2_EVENT_TPC4_QM,
209 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = GAUDI2_EVENT_TPC5_QM,
210 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = GAUDI2_EVENT_TPC5_QM,
211 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = GAUDI2_EVENT_TPC5_QM,
212 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = GAUDI2_EVENT_TPC5_QM,
213 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = GAUDI2_EVENT_TPC24_QM,
214 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = GAUDI2_EVENT_TPC24_QM,
215 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = GAUDI2_EVENT_TPC24_QM,
216 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = GAUDI2_EVENT_TPC24_QM,
217 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = GAUDI2_EVENT_HDMA2_QM,
218 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = GAUDI2_EVENT_HDMA2_QM,
219 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = GAUDI2_EVENT_HDMA2_QM,
220 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = GAUDI2_EVENT_HDMA2_QM,
221 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = GAUDI2_EVENT_HDMA3_QM,
222 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = GAUDI2_EVENT_HDMA3_QM,
223 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = GAUDI2_EVENT_HDMA3_QM,
224 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = GAUDI2_EVENT_HDMA3_QM,
225 [GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = GAUDI2_EVENT_MME1_QM,
226 [GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = GAUDI2_EVENT_MME1_QM,
227 [GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = GAUDI2_EVENT_MME1_QM,
228 [GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = GAUDI2_EVENT_MME1_QM,
229 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = GAUDI2_EVENT_TPC6_QM,
230 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = GAUDI2_EVENT_TPC6_QM,
231 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = GAUDI2_EVENT_TPC6_QM,
232 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = GAUDI2_EVENT_TPC6_QM,
233 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = GAUDI2_EVENT_TPC7_QM,
234 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = GAUDI2_EVENT_TPC7_QM,
235 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = GAUDI2_EVENT_TPC7_QM,
236 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = GAUDI2_EVENT_TPC7_QM,
237 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = GAUDI2_EVENT_TPC8_QM,
238 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = GAUDI2_EVENT_TPC8_QM,
239 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = GAUDI2_EVENT_TPC8_QM,
240 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = GAUDI2_EVENT_TPC8_QM,
241 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = GAUDI2_EVENT_TPC9_QM,
242 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = GAUDI2_EVENT_TPC9_QM,
243 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = GAUDI2_EVENT_TPC9_QM,
244 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = GAUDI2_EVENT_TPC9_QM,
245 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = GAUDI2_EVENT_TPC10_QM,
246 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = GAUDI2_EVENT_TPC10_QM,
247 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = GAUDI2_EVENT_TPC10_QM,
248 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = GAUDI2_EVENT_TPC10_QM,
249 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = GAUDI2_EVENT_TPC11_QM,
250 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = GAUDI2_EVENT_TPC11_QM,
251 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = GAUDI2_EVENT_TPC11_QM,
252 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = GAUDI2_EVENT_TPC11_QM,
253 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = GAUDI2_EVENT_HDMA4_QM,
254 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = GAUDI2_EVENT_HDMA4_QM,
255 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = GAUDI2_EVENT_HDMA4_QM,
256 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = GAUDI2_EVENT_HDMA4_QM,
257 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = GAUDI2_EVENT_HDMA5_QM,
258 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = GAUDI2_EVENT_HDMA5_QM,
259 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = GAUDI2_EVENT_HDMA5_QM,
260 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = GAUDI2_EVENT_HDMA5_QM,
261 [GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = GAUDI2_EVENT_MME2_QM,
262 [GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = GAUDI2_EVENT_MME2_QM,
263 [GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = GAUDI2_EVENT_MME2_QM,
264 [GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = GAUDI2_EVENT_MME2_QM,
265 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = GAUDI2_EVENT_TPC12_QM,
266 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = GAUDI2_EVENT_TPC12_QM,
267 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = GAUDI2_EVENT_TPC12_QM,
268 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = GAUDI2_EVENT_TPC12_QM,
269 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = GAUDI2_EVENT_TPC13_QM,
270 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = GAUDI2_EVENT_TPC13_QM,
271 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = GAUDI2_EVENT_TPC13_QM,
272 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = GAUDI2_EVENT_TPC13_QM,
273 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = GAUDI2_EVENT_TPC14_QM,
274 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = GAUDI2_EVENT_TPC14_QM,
275 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = GAUDI2_EVENT_TPC14_QM,
276 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = GAUDI2_EVENT_TPC14_QM,
277 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = GAUDI2_EVENT_TPC15_QM,
278 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = GAUDI2_EVENT_TPC15_QM,
279 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = GAUDI2_EVENT_TPC15_QM,
280 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = GAUDI2_EVENT_TPC15_QM,
281 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = GAUDI2_EVENT_TPC16_QM,
282 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = GAUDI2_EVENT_TPC16_QM,
283 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = GAUDI2_EVENT_TPC16_QM,
284 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = GAUDI2_EVENT_TPC16_QM,
285 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = GAUDI2_EVENT_TPC17_QM,
286 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = GAUDI2_EVENT_TPC17_QM,
287 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = GAUDI2_EVENT_TPC17_QM,
288 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = GAUDI2_EVENT_TPC17_QM,
289 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = GAUDI2_EVENT_HDMA6_QM,
290 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = GAUDI2_EVENT_HDMA6_QM,
291 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = GAUDI2_EVENT_HDMA6_QM,
292 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = GAUDI2_EVENT_HDMA6_QM,
293 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = GAUDI2_EVENT_HDMA7_QM,
294 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = GAUDI2_EVENT_HDMA7_QM,
295 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = GAUDI2_EVENT_HDMA7_QM,
296 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = GAUDI2_EVENT_HDMA7_QM,
297 [GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = GAUDI2_EVENT_MME3_QM,
298 [GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = GAUDI2_EVENT_MME3_QM,
299 [GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = GAUDI2_EVENT_MME3_QM,
300 [GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = GAUDI2_EVENT_MME3_QM,
301 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = GAUDI2_EVENT_TPC18_QM,
302 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = GAUDI2_EVENT_TPC18_QM,
303 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = GAUDI2_EVENT_TPC18_QM,
304 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = GAUDI2_EVENT_TPC18_QM,
305 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = GAUDI2_EVENT_TPC19_QM,
306 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = GAUDI2_EVENT_TPC19_QM,
307 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = GAUDI2_EVENT_TPC19_QM,
308 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = GAUDI2_EVENT_TPC19_QM,
309 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = GAUDI2_EVENT_TPC20_QM,
310 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = GAUDI2_EVENT_TPC20_QM,
311 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = GAUDI2_EVENT_TPC20_QM,
312 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = GAUDI2_EVENT_TPC20_QM,
313 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = GAUDI2_EVENT_TPC21_QM,
314 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = GAUDI2_EVENT_TPC21_QM,
315 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = GAUDI2_EVENT_TPC21_QM,
316 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = GAUDI2_EVENT_TPC21_QM,
317 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = GAUDI2_EVENT_TPC22_QM,
318 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = GAUDI2_EVENT_TPC22_QM,
319 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = GAUDI2_EVENT_TPC22_QM,
320 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = GAUDI2_EVENT_TPC22_QM,
321 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = GAUDI2_EVENT_TPC23_QM,
322 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = GAUDI2_EVENT_TPC23_QM,
323 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = GAUDI2_EVENT_TPC23_QM,
324 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = GAUDI2_EVENT_TPC23_QM,
325 [GAUDI2_QUEUE_ID_NIC_0_0] = GAUDI2_EVENT_NIC0_QM0,
326 [GAUDI2_QUEUE_ID_NIC_0_1] = GAUDI2_EVENT_NIC0_QM0,
327 [GAUDI2_QUEUE_ID_NIC_0_2] = GAUDI2_EVENT_NIC0_QM0,
328 [GAUDI2_QUEUE_ID_NIC_0_3] = GAUDI2_EVENT_NIC0_QM0,
329 [GAUDI2_QUEUE_ID_NIC_1_0] = GAUDI2_EVENT_NIC0_QM1,
330 [GAUDI2_QUEUE_ID_NIC_1_1] = GAUDI2_EVENT_NIC0_QM1,
331 [GAUDI2_QUEUE_ID_NIC_1_2] = GAUDI2_EVENT_NIC0_QM1,
332 [GAUDI2_QUEUE_ID_NIC_1_3] = GAUDI2_EVENT_NIC0_QM1,
333 [GAUDI2_QUEUE_ID_NIC_2_0] = GAUDI2_EVENT_NIC1_QM0,
334 [GAUDI2_QUEUE_ID_NIC_2_1] = GAUDI2_EVENT_NIC1_QM0,
335 [GAUDI2_QUEUE_ID_NIC_2_2] = GAUDI2_EVENT_NIC1_QM0,
336 [GAUDI2_QUEUE_ID_NIC_2_3] = GAUDI2_EVENT_NIC1_QM0,
337 [GAUDI2_QUEUE_ID_NIC_3_0] = GAUDI2_EVENT_NIC1_QM1,
338 [GAUDI2_QUEUE_ID_NIC_3_1] = GAUDI2_EVENT_NIC1_QM1,
339 [GAUDI2_QUEUE_ID_NIC_3_2] = GAUDI2_EVENT_NIC1_QM1,
340 [GAUDI2_QUEUE_ID_NIC_3_3] = GAUDI2_EVENT_NIC1_QM1,
341 [GAUDI2_QUEUE_ID_NIC_4_0] = GAUDI2_EVENT_NIC2_QM0,
342 [GAUDI2_QUEUE_ID_NIC_4_1] = GAUDI2_EVENT_NIC2_QM0,
343 [GAUDI2_QUEUE_ID_NIC_4_2] = GAUDI2_EVENT_NIC2_QM0,
344 [GAUDI2_QUEUE_ID_NIC_4_3] = GAUDI2_EVENT_NIC2_QM0,
345 [GAUDI2_QUEUE_ID_NIC_5_0] = GAUDI2_EVENT_NIC2_QM1,
346 [GAUDI2_QUEUE_ID_NIC_5_1] = GAUDI2_EVENT_NIC2_QM1,
347 [GAUDI2_QUEUE_ID_NIC_5_2] = GAUDI2_EVENT_NIC2_QM1,
348 [GAUDI2_QUEUE_ID_NIC_5_3] = GAUDI2_EVENT_NIC2_QM1,
349 [GAUDI2_QUEUE_ID_NIC_6_0] = GAUDI2_EVENT_NIC3_QM0,
350 [GAUDI2_QUEUE_ID_NIC_6_1] = GAUDI2_EVENT_NIC3_QM0,
351 [GAUDI2_QUEUE_ID_NIC_6_2] = GAUDI2_EVENT_NIC3_QM0,
352 [GAUDI2_QUEUE_ID_NIC_6_3] = GAUDI2_EVENT_NIC3_QM0,
353 [GAUDI2_QUEUE_ID_NIC_7_0] = GAUDI2_EVENT_NIC3_QM1,
354 [GAUDI2_QUEUE_ID_NIC_7_1] = GAUDI2_EVENT_NIC3_QM1,
355 [GAUDI2_QUEUE_ID_NIC_7_2] = GAUDI2_EVENT_NIC3_QM1,
356 [GAUDI2_QUEUE_ID_NIC_7_3] = GAUDI2_EVENT_NIC3_QM1,
357 [GAUDI2_QUEUE_ID_NIC_8_0] = GAUDI2_EVENT_NIC4_QM0,
358 [GAUDI2_QUEUE_ID_NIC_8_1] = GAUDI2_EVENT_NIC4_QM0,
359 [GAUDI2_QUEUE_ID_NIC_8_2] = GAUDI2_EVENT_NIC4_QM0,
360 [GAUDI2_QUEUE_ID_NIC_8_3] = GAUDI2_EVENT_NIC4_QM0,
361 [GAUDI2_QUEUE_ID_NIC_9_0] = GAUDI2_EVENT_NIC4_QM1,
362 [GAUDI2_QUEUE_ID_NIC_9_1] = GAUDI2_EVENT_NIC4_QM1,
363 [GAUDI2_QUEUE_ID_NIC_9_2] = GAUDI2_EVENT_NIC4_QM1,
364 [GAUDI2_QUEUE_ID_NIC_9_3] = GAUDI2_EVENT_NIC4_QM1,
365 [GAUDI2_QUEUE_ID_NIC_10_0] = GAUDI2_EVENT_NIC5_QM0,
366 [GAUDI2_QUEUE_ID_NIC_10_1] = GAUDI2_EVENT_NIC5_QM0,
367 [GAUDI2_QUEUE_ID_NIC_10_2] = GAUDI2_EVENT_NIC5_QM0,
368 [GAUDI2_QUEUE_ID_NIC_10_3] = GAUDI2_EVENT_NIC5_QM0,
369 [GAUDI2_QUEUE_ID_NIC_11_0] = GAUDI2_EVENT_NIC5_QM1,
370 [GAUDI2_QUEUE_ID_NIC_11_1] = GAUDI2_EVENT_NIC5_QM1,
371 [GAUDI2_QUEUE_ID_NIC_11_2] = GAUDI2_EVENT_NIC5_QM1,
372 [GAUDI2_QUEUE_ID_NIC_11_3] = GAUDI2_EVENT_NIC5_QM1,
373 [GAUDI2_QUEUE_ID_NIC_12_0] = GAUDI2_EVENT_NIC6_QM0,
374 [GAUDI2_QUEUE_ID_NIC_12_1] = GAUDI2_EVENT_NIC6_QM0,
375 [GAUDI2_QUEUE_ID_NIC_12_2] = GAUDI2_EVENT_NIC6_QM0,
376 [GAUDI2_QUEUE_ID_NIC_12_3] = GAUDI2_EVENT_NIC6_QM0,
377 [GAUDI2_QUEUE_ID_NIC_13_0] = GAUDI2_EVENT_NIC6_QM1,
378 [GAUDI2_QUEUE_ID_NIC_13_1] = GAUDI2_EVENT_NIC6_QM1,
379 [GAUDI2_QUEUE_ID_NIC_13_2] = GAUDI2_EVENT_NIC6_QM1,
380 [GAUDI2_QUEUE_ID_NIC_13_3] = GAUDI2_EVENT_NIC6_QM1,
381 [GAUDI2_QUEUE_ID_NIC_14_0] = GAUDI2_EVENT_NIC7_QM0,
382 [GAUDI2_QUEUE_ID_NIC_14_1] = GAUDI2_EVENT_NIC7_QM0,
383 [GAUDI2_QUEUE_ID_NIC_14_2] = GAUDI2_EVENT_NIC7_QM0,
384 [GAUDI2_QUEUE_ID_NIC_14_3] = GAUDI2_EVENT_NIC7_QM0,
385 [GAUDI2_QUEUE_ID_NIC_15_0] = GAUDI2_EVENT_NIC7_QM1,
386 [GAUDI2_QUEUE_ID_NIC_15_1] = GAUDI2_EVENT_NIC7_QM1,
387 [GAUDI2_QUEUE_ID_NIC_15_2] = GAUDI2_EVENT_NIC7_QM1,
388 [GAUDI2_QUEUE_ID_NIC_15_3] = GAUDI2_EVENT_NIC7_QM1,
389 [GAUDI2_QUEUE_ID_NIC_16_0] = GAUDI2_EVENT_NIC8_QM0,
390 [GAUDI2_QUEUE_ID_NIC_16_1] = GAUDI2_EVENT_NIC8_QM0,
391 [GAUDI2_QUEUE_ID_NIC_16_2] = GAUDI2_EVENT_NIC8_QM0,
392 [GAUDI2_QUEUE_ID_NIC_16_3] = GAUDI2_EVENT_NIC8_QM0,
393 [GAUDI2_QUEUE_ID_NIC_17_0] = GAUDI2_EVENT_NIC8_QM1,
394 [GAUDI2_QUEUE_ID_NIC_17_1] = GAUDI2_EVENT_NIC8_QM1,
395 [GAUDI2_QUEUE_ID_NIC_17_2] = GAUDI2_EVENT_NIC8_QM1,
396 [GAUDI2_QUEUE_ID_NIC_17_3] = GAUDI2_EVENT_NIC8_QM1,
397 [GAUDI2_QUEUE_ID_NIC_18_0] = GAUDI2_EVENT_NIC9_QM0,
398 [GAUDI2_QUEUE_ID_NIC_18_1] = GAUDI2_EVENT_NIC9_QM0,
399 [GAUDI2_QUEUE_ID_NIC_18_2] = GAUDI2_EVENT_NIC9_QM0,
400 [GAUDI2_QUEUE_ID_NIC_18_3] = GAUDI2_EVENT_NIC9_QM0,
401 [GAUDI2_QUEUE_ID_NIC_19_0] = GAUDI2_EVENT_NIC9_QM1,
402 [GAUDI2_QUEUE_ID_NIC_19_1] = GAUDI2_EVENT_NIC9_QM1,
403 [GAUDI2_QUEUE_ID_NIC_19_2] = GAUDI2_EVENT_NIC9_QM1,
404 [GAUDI2_QUEUE_ID_NIC_19_3] = GAUDI2_EVENT_NIC9_QM1,
405 [GAUDI2_QUEUE_ID_NIC_20_0] = GAUDI2_EVENT_NIC10_QM0,
406 [GAUDI2_QUEUE_ID_NIC_20_1] = GAUDI2_EVENT_NIC10_QM0,
407 [GAUDI2_QUEUE_ID_NIC_20_2] = GAUDI2_EVENT_NIC10_QM0,
408 [GAUDI2_QUEUE_ID_NIC_20_3] = GAUDI2_EVENT_NIC10_QM0,
409 [GAUDI2_QUEUE_ID_NIC_21_0] = GAUDI2_EVENT_NIC10_QM1,
410 [GAUDI2_QUEUE_ID_NIC_21_1] = GAUDI2_EVENT_NIC10_QM1,
411 [GAUDI2_QUEUE_ID_NIC_21_2] = GAUDI2_EVENT_NIC10_QM1,
412 [GAUDI2_QUEUE_ID_NIC_21_3] = GAUDI2_EVENT_NIC10_QM1,
413 [GAUDI2_QUEUE_ID_NIC_22_0] = GAUDI2_EVENT_NIC11_QM0,
414 [GAUDI2_QUEUE_ID_NIC_22_1] = GAUDI2_EVENT_NIC11_QM0,
415 [GAUDI2_QUEUE_ID_NIC_22_2] = GAUDI2_EVENT_NIC11_QM0,
416 [GAUDI2_QUEUE_ID_NIC_22_3] = GAUDI2_EVENT_NIC11_QM0,
417 [GAUDI2_QUEUE_ID_NIC_23_0] = GAUDI2_EVENT_NIC11_QM1,
418 [GAUDI2_QUEUE_ID_NIC_23_1] = GAUDI2_EVENT_NIC11_QM1,
419 [GAUDI2_QUEUE_ID_NIC_23_2] = GAUDI2_EVENT_NIC11_QM1,
420 [GAUDI2_QUEUE_ID_NIC_23_3] = GAUDI2_EVENT_NIC11_QM1,
421 [GAUDI2_QUEUE_ID_ROT_0_0] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
422 [GAUDI2_QUEUE_ID_ROT_0_1] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
423 [GAUDI2_QUEUE_ID_ROT_0_2] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
424 [GAUDI2_QUEUE_ID_ROT_0_3] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
425 [GAUDI2_QUEUE_ID_ROT_1_0] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
426 [GAUDI2_QUEUE_ID_ROT_1_1] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
427 [GAUDI2_QUEUE_ID_ROT_1_2] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
428 [GAUDI2_QUEUE_ID_ROT_1_3] = GAUDI2_EVENT_ROTATOR1_ROT1_QM
429 };
430
431 static const int gaudi2_dma_core_async_event_id[] = {
432 [DMA_CORE_ID_EDMA0] = GAUDI2_EVENT_HDMA0_CORE,
433 [DMA_CORE_ID_EDMA1] = GAUDI2_EVENT_HDMA1_CORE,
434 [DMA_CORE_ID_EDMA2] = GAUDI2_EVENT_HDMA2_CORE,
435 [DMA_CORE_ID_EDMA3] = GAUDI2_EVENT_HDMA3_CORE,
436 [DMA_CORE_ID_EDMA4] = GAUDI2_EVENT_HDMA4_CORE,
437 [DMA_CORE_ID_EDMA5] = GAUDI2_EVENT_HDMA5_CORE,
438 [DMA_CORE_ID_EDMA6] = GAUDI2_EVENT_HDMA6_CORE,
439 [DMA_CORE_ID_EDMA7] = GAUDI2_EVENT_HDMA7_CORE,
440 [DMA_CORE_ID_PDMA0] = GAUDI2_EVENT_PDMA0_CORE,
441 [DMA_CORE_ID_PDMA1] = GAUDI2_EVENT_PDMA1_CORE,
442 [DMA_CORE_ID_KDMA] = GAUDI2_EVENT_KDMA0_CORE,
443 };
444
445 static const char * const gaudi2_qm_sei_error_cause[GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE] = {
446 "qman sei intr",
447 "arc sei intr"
448 };
449
450 static const char * const gaudi2_cpu_sei_error_cause[GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE] = {
451 "AXI_TERMINATOR WR",
452 "AXI_TERMINATOR RD",
453 "AXI SPLIT SEI Status"
454 };
455
456 static const char * const gaudi2_arc_sei_error_cause[GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE] = {
457 "cbu_bresp_sei_intr_cause",
458 "cbu_rresp_sei_intr_cause",
459 "lbu_bresp_sei_intr_cause",
460 "lbu_rresp_sei_intr_cause",
461 "cbu_axi_split_intr_cause",
462 "lbu_axi_split_intr_cause",
463 "arc_ip_excptn_sei_intr_cause",
464 "dmi_bresp_sei_intr_cause",
465 "aux2apb_err_sei_intr_cause",
466 "cfg_lbw_wr_terminated_intr_cause",
467 "cfg_lbw_rd_terminated_intr_cause",
468 "cfg_dccm_wr_terminated_intr_cause",
469 "cfg_dccm_rd_terminated_intr_cause",
470 "cfg_hbw_rd_terminated_intr_cause"
471 };
472
473 static const char * const gaudi2_dec_error_cause[GAUDI2_NUM_OF_DEC_ERR_CAUSE] = {
474 "msix_vcd_hbw_sei",
475 "msix_l2c_hbw_sei",
476 "msix_nrm_hbw_sei",
477 "msix_abnrm_hbw_sei",
478 "msix_vcd_lbw_sei",
479 "msix_l2c_lbw_sei",
480 "msix_nrm_lbw_sei",
481 "msix_abnrm_lbw_sei",
482 "apb_vcd_lbw_sei",
483 "apb_l2c_lbw_sei",
484 "apb_nrm_lbw_sei",
485 "apb_abnrm_lbw_sei",
486 "dec_sei",
487 "dec_apb_sei",
488 "trc_apb_sei",
489 "lbw_mstr_if_sei",
490 "axi_split_bresp_err_sei",
491 "hbw_axi_wr_viol_sei",
492 "hbw_axi_rd_viol_sei",
493 "lbw_axi_wr_viol_sei",
494 "lbw_axi_rd_viol_sei",
495 "vcd_spi",
496 "l2c_spi",
497 "nrm_spi",
498 "abnrm_spi",
499 };
500
501 static const char * const gaudi2_qman_error_cause[GAUDI2_NUM_OF_QM_ERR_CAUSE] = {
502 "PQ AXI HBW error",
503 "CQ AXI HBW error",
504 "CP AXI HBW error",
505 "CP error due to undefined OPCODE",
506 "CP encountered STOP OPCODE",
507 "CP AXI LBW error",
508 "CP WRREG32 or WRBULK returned error",
509 "N/A",
510 "FENCE 0 inc over max value and clipped",
511 "FENCE 1 inc over max value and clipped",
512 "FENCE 2 inc over max value and clipped",
513 "FENCE 3 inc over max value and clipped",
514 "FENCE 0 dec under min value and clipped",
515 "FENCE 1 dec under min value and clipped",
516 "FENCE 2 dec under min value and clipped",
517 "FENCE 3 dec under min value and clipped",
518 "CPDMA Up overflow",
519 "PQC L2H error"
520 };
521
522 static const char * const gaudi2_qman_lower_cp_error_cause[GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE] = {
523 "RSVD0",
524 "CQ AXI HBW error",
525 "CP AXI HBW error",
526 "CP error due to undefined OPCODE",
527 "CP encountered STOP OPCODE",
528 "CP AXI LBW error",
529 "CP WRREG32 or WRBULK returned error",
530 "N/A",
531 "FENCE 0 inc over max value and clipped",
532 "FENCE 1 inc over max value and clipped",
533 "FENCE 2 inc over max value and clipped",
534 "FENCE 3 inc over max value and clipped",
535 "FENCE 0 dec under min value and clipped",
536 "FENCE 1 dec under min value and clipped",
537 "FENCE 2 dec under min value and clipped",
538 "FENCE 3 dec under min value and clipped",
539 "CPDMA Up overflow",
540 "RSVD17",
541 "CQ_WR_IFIFO_CI_ERR",
542 "CQ_WR_CTL_CI_ERR",
543 "ARC_CQF_RD_ERR",
544 "ARC_CQ_WR_IFIFO_CI_ERR",
545 "ARC_CQ_WR_CTL_CI_ERR",
546 "ARC_AXI_ERR",
547 "CP_SWITCH_WDT_ERR"
548 };
549
550 static const char * const gaudi2_qman_arb_error_cause[GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE] = {
551 "Choice push while full error",
552 "Choice Q watchdog error",
553 "MSG AXI LBW returned with error"
554 };
555
556 static const char * const guadi2_rot_error_cause[GAUDI2_NUM_OF_ROT_ERR_CAUSE] = {
557 "qm_axi_err",
558 "qm_trace_fence_events",
559 "qm_sw_err",
560 "qm_cp_sw_stop",
561 "lbw_mstr_rresp_err",
562 "lbw_mstr_bresp_err",
563 "lbw_msg_slverr",
564 "hbw_msg_slverr",
565 "wbc_slverr",
566 "hbw_mstr_rresp_err",
567 "hbw_mstr_bresp_err",
568 "sb_resp_intr",
569 "mrsb_resp_intr",
570 "core_dw_status_0",
571 "core_dw_status_1",
572 "core_dw_status_2",
573 "core_dw_status_3",
574 "core_dw_status_4",
575 "core_dw_status_5",
576 "core_dw_status_6",
577 "core_dw_status_7",
578 "async_arc2cpu_sei_intr",
579 };
580
581 static const char * const gaudi2_tpc_interrupts_cause[GAUDI2_NUM_OF_TPC_INTR_CAUSE] = {
582 "tpc_address_exceed_slm",
583 "tpc_div_by_0",
584 "tpc_spu_mac_overflow",
585 "tpc_spu_addsub_overflow",
586 "tpc_spu_abs_overflow",
587 "tpc_spu_fma_fp_dst_nan",
588 "tpc_spu_fma_fp_dst_inf",
589 "tpc_spu_convert_fp_dst_nan",
590 "tpc_spu_convert_fp_dst_inf",
591 "tpc_spu_fp_dst_denorm",
592 "tpc_vpu_mac_overflow",
593 "tpc_vpu_addsub_overflow",
594 "tpc_vpu_abs_overflow",
595 "tpc_vpu_convert_fp_dst_nan",
596 "tpc_vpu_convert_fp_dst_inf",
597 "tpc_vpu_fma_fp_dst_nan",
598 "tpc_vpu_fma_fp_dst_inf",
599 "tpc_vpu_fp_dst_denorm",
600 "tpc_assertions",
601 "tpc_illegal_instruction",
602 "tpc_pc_wrap_around",
603 "tpc_qm_sw_err",
604 "tpc_hbw_rresp_err",
605 "tpc_hbw_bresp_err",
606 "tpc_lbw_rresp_err",
607 "tpc_lbw_bresp_err",
608 "st_unlock_already_locked",
609 "invalid_lock_access",
610 "LD_L protection violation",
611 "ST_L protection violation",
612 };
613
614 static const char * const guadi2_mme_error_cause[GAUDI2_NUM_OF_MME_ERR_CAUSE] = {
615 "agu_resp_intr",
616 "qman_axi_err",
617 "wap sei (wbc axi err)",
618 "arc sei",
619 "cfg access error",
620 "qm_sw_err",
621 "sbte_dbg_intr_0",
622 "sbte_dbg_intr_1",
623 "sbte_dbg_intr_2",
624 "sbte_dbg_intr_3",
625 "sbte_dbg_intr_4",
626 "sbte_prtn_intr_0",
627 "sbte_prtn_intr_1",
628 "sbte_prtn_intr_2",
629 "sbte_prtn_intr_3",
630 "sbte_prtn_intr_4",
631 };
632
633 static const char * const guadi2_mme_sbte_error_cause[GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE] = {
634 "i0",
635 "i1",
636 "i2",
637 "i3",
638 "i4",
639 };
640
641 static const char * const guadi2_mme_wap_error_cause[GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE] = {
642 "WBC ERR RESP_0",
643 "WBC ERR RESP_1",
644 "AP SOURCE POS INF",
645 "AP SOURCE NEG INF",
646 "AP SOURCE NAN",
647 "AP RESULT POS INF",
648 "AP RESULT NEG INF",
649 };
650
651 static const char * const gaudi2_dma_core_interrupts_cause[GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE] = {
652 "HBW Read returned with error RRESP",
653 "HBW write returned with error BRESP",
654 "LBW write returned with error BRESP",
655 "descriptor_fifo_overflow",
656 "KDMA SB LBW Read returned with error",
657 "KDMA WBC LBW Write returned with error",
658 "TRANSPOSE ENGINE DESC FIFO OVERFLOW",
659 "WRONG CFG FOR COMMIT IN LIN DMA"
660 };
661
662 static const char * const gaudi2_kdma_core_interrupts_cause[GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE] = {
663 "HBW/LBW Read returned with error RRESP",
664 "HBW/LBW write returned with error BRESP",
665 "LBW write returned with error BRESP",
666 "descriptor_fifo_overflow",
667 "KDMA SB LBW Read returned with error",
668 "KDMA WBC LBW Write returned with error",
669 "TRANSPOSE ENGINE DESC FIFO OVERFLOW",
670 "WRONG CFG FOR COMMIT IN LIN DMA"
671 };
672
673 struct gaudi2_sm_sei_cause_data {
674 const char *cause_name;
675 const char *log_name;
676 u32 log_mask;
677 };
678
679 static const struct gaudi2_sm_sei_cause_data
680 gaudi2_sm_sei_cause[GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE] = {
681 {"calculated SO value overflow/underflow", "SOB group ID", 0x7FF},
682 {"payload address of monitor is not aligned to 4B", "monitor addr", 0xFFFF},
683 {"armed monitor write got BRESP (SLVERR or DECERR)", "AXI id", 0xFFFF},
684 };
685
686 static const char * const
687 gaudi2_pmmu_fatal_interrupts_cause[GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE] = {
688 "LATENCY_RD_OUT_FIFO_OVERRUN",
689 "LATENCY_WR_OUT_FIFO_OVERRUN",
690 };
691
692 static const char * const
693 gaudi2_hif_fatal_interrupts_cause[GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE] = {
694 "LATENCY_RD_OUT_FIFO_OVERRUN",
695 "LATENCY_WR_OUT_FIFO_OVERRUN",
696 };
697
698 static const char * const
699 gaudi2_psoc_axi_drain_interrupts_cause[GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE] = {
700 "AXI drain HBW",
701 "AXI drain LBW",
702 };
703
704 static const char * const
705 gaudi2_pcie_addr_dec_error_cause[GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE] = {
706 "HBW error response",
707 "LBW error response",
708 "TLP is blocked by RR"
709 };
710
711 const u32 gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_SIZE] = {
712 [GAUDI2_QUEUE_ID_PDMA_0_0] = mmPDMA0_QM_BASE,
713 [GAUDI2_QUEUE_ID_PDMA_0_1] = mmPDMA0_QM_BASE,
714 [GAUDI2_QUEUE_ID_PDMA_0_2] = mmPDMA0_QM_BASE,
715 [GAUDI2_QUEUE_ID_PDMA_0_3] = mmPDMA0_QM_BASE,
716 [GAUDI2_QUEUE_ID_PDMA_1_0] = mmPDMA1_QM_BASE,
717 [GAUDI2_QUEUE_ID_PDMA_1_1] = mmPDMA1_QM_BASE,
718 [GAUDI2_QUEUE_ID_PDMA_1_2] = mmPDMA1_QM_BASE,
719 [GAUDI2_QUEUE_ID_PDMA_1_3] = mmPDMA1_QM_BASE,
720 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = mmDCORE0_EDMA0_QM_BASE,
721 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = mmDCORE0_EDMA0_QM_BASE,
722 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = mmDCORE0_EDMA0_QM_BASE,
723 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = mmDCORE0_EDMA0_QM_BASE,
724 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = mmDCORE0_EDMA1_QM_BASE,
725 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = mmDCORE0_EDMA1_QM_BASE,
726 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = mmDCORE0_EDMA1_QM_BASE,
727 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = mmDCORE0_EDMA1_QM_BASE,
728 [GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = mmDCORE0_MME_QM_BASE,
729 [GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = mmDCORE0_MME_QM_BASE,
730 [GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = mmDCORE0_MME_QM_BASE,
731 [GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = mmDCORE0_MME_QM_BASE,
732 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = mmDCORE0_TPC0_QM_BASE,
733 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = mmDCORE0_TPC0_QM_BASE,
734 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = mmDCORE0_TPC0_QM_BASE,
735 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = mmDCORE0_TPC0_QM_BASE,
736 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = mmDCORE0_TPC1_QM_BASE,
737 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = mmDCORE0_TPC1_QM_BASE,
738 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = mmDCORE0_TPC1_QM_BASE,
739 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = mmDCORE0_TPC1_QM_BASE,
740 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = mmDCORE0_TPC2_QM_BASE,
741 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = mmDCORE0_TPC2_QM_BASE,
742 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = mmDCORE0_TPC2_QM_BASE,
743 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = mmDCORE0_TPC2_QM_BASE,
744 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = mmDCORE0_TPC3_QM_BASE,
745 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = mmDCORE0_TPC3_QM_BASE,
746 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = mmDCORE0_TPC3_QM_BASE,
747 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = mmDCORE0_TPC3_QM_BASE,
748 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = mmDCORE0_TPC4_QM_BASE,
749 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = mmDCORE0_TPC4_QM_BASE,
750 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = mmDCORE0_TPC4_QM_BASE,
751 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = mmDCORE0_TPC4_QM_BASE,
752 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = mmDCORE0_TPC5_QM_BASE,
753 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = mmDCORE0_TPC5_QM_BASE,
754 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = mmDCORE0_TPC5_QM_BASE,
755 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = mmDCORE0_TPC5_QM_BASE,
756 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = mmDCORE0_TPC6_QM_BASE,
757 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = mmDCORE0_TPC6_QM_BASE,
758 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = mmDCORE0_TPC6_QM_BASE,
759 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = mmDCORE0_TPC6_QM_BASE,
760 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = mmDCORE1_EDMA0_QM_BASE,
761 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = mmDCORE1_EDMA0_QM_BASE,
762 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = mmDCORE1_EDMA0_QM_BASE,
763 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = mmDCORE1_EDMA0_QM_BASE,
764 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = mmDCORE1_EDMA1_QM_BASE,
765 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = mmDCORE1_EDMA1_QM_BASE,
766 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = mmDCORE1_EDMA1_QM_BASE,
767 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = mmDCORE1_EDMA1_QM_BASE,
768 [GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = mmDCORE1_MME_QM_BASE,
769 [GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = mmDCORE1_MME_QM_BASE,
770 [GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = mmDCORE1_MME_QM_BASE,
771 [GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = mmDCORE1_MME_QM_BASE,
772 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = mmDCORE1_TPC0_QM_BASE,
773 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = mmDCORE1_TPC0_QM_BASE,
774 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = mmDCORE1_TPC0_QM_BASE,
775 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = mmDCORE1_TPC0_QM_BASE,
776 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = mmDCORE1_TPC1_QM_BASE,
777 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = mmDCORE1_TPC1_QM_BASE,
778 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = mmDCORE1_TPC1_QM_BASE,
779 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = mmDCORE1_TPC1_QM_BASE,
780 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = mmDCORE1_TPC2_QM_BASE,
781 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = mmDCORE1_TPC2_QM_BASE,
782 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = mmDCORE1_TPC2_QM_BASE,
783 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = mmDCORE1_TPC2_QM_BASE,
784 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = mmDCORE1_TPC3_QM_BASE,
785 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = mmDCORE1_TPC3_QM_BASE,
786 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = mmDCORE1_TPC3_QM_BASE,
787 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = mmDCORE1_TPC3_QM_BASE,
788 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = mmDCORE1_TPC4_QM_BASE,
789 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = mmDCORE1_TPC4_QM_BASE,
790 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = mmDCORE1_TPC4_QM_BASE,
791 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = mmDCORE1_TPC4_QM_BASE,
792 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = mmDCORE1_TPC5_QM_BASE,
793 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = mmDCORE1_TPC5_QM_BASE,
794 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = mmDCORE1_TPC5_QM_BASE,
795 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = mmDCORE1_TPC5_QM_BASE,
796 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = mmDCORE2_EDMA0_QM_BASE,
797 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = mmDCORE2_EDMA0_QM_BASE,
798 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = mmDCORE2_EDMA0_QM_BASE,
799 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = mmDCORE2_EDMA0_QM_BASE,
800 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = mmDCORE2_EDMA1_QM_BASE,
801 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = mmDCORE2_EDMA1_QM_BASE,
802 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = mmDCORE2_EDMA1_QM_BASE,
803 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = mmDCORE2_EDMA1_QM_BASE,
804 [GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = mmDCORE2_MME_QM_BASE,
805 [GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = mmDCORE2_MME_QM_BASE,
806 [GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = mmDCORE2_MME_QM_BASE,
807 [GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = mmDCORE2_MME_QM_BASE,
808 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = mmDCORE2_TPC0_QM_BASE,
809 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = mmDCORE2_TPC0_QM_BASE,
810 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = mmDCORE2_TPC0_QM_BASE,
811 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = mmDCORE2_TPC0_QM_BASE,
812 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = mmDCORE2_TPC1_QM_BASE,
813 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = mmDCORE2_TPC1_QM_BASE,
814 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = mmDCORE2_TPC1_QM_BASE,
815 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = mmDCORE2_TPC1_QM_BASE,
816 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = mmDCORE2_TPC2_QM_BASE,
817 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = mmDCORE2_TPC2_QM_BASE,
818 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = mmDCORE2_TPC2_QM_BASE,
819 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = mmDCORE2_TPC2_QM_BASE,
820 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = mmDCORE2_TPC3_QM_BASE,
821 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = mmDCORE2_TPC3_QM_BASE,
822 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = mmDCORE2_TPC3_QM_BASE,
823 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = mmDCORE2_TPC3_QM_BASE,
824 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = mmDCORE2_TPC4_QM_BASE,
825 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = mmDCORE2_TPC4_QM_BASE,
826 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = mmDCORE2_TPC4_QM_BASE,
827 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = mmDCORE2_TPC4_QM_BASE,
828 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = mmDCORE2_TPC5_QM_BASE,
829 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = mmDCORE2_TPC5_QM_BASE,
830 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = mmDCORE2_TPC5_QM_BASE,
831 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = mmDCORE2_TPC5_QM_BASE,
832 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = mmDCORE3_EDMA0_QM_BASE,
833 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = mmDCORE3_EDMA0_QM_BASE,
834 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = mmDCORE3_EDMA0_QM_BASE,
835 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = mmDCORE3_EDMA0_QM_BASE,
836 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = mmDCORE3_EDMA1_QM_BASE,
837 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = mmDCORE3_EDMA1_QM_BASE,
838 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = mmDCORE3_EDMA1_QM_BASE,
839 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = mmDCORE3_EDMA1_QM_BASE,
840 [GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = mmDCORE3_MME_QM_BASE,
841 [GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = mmDCORE3_MME_QM_BASE,
842 [GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = mmDCORE3_MME_QM_BASE,
843 [GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = mmDCORE3_MME_QM_BASE,
844 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = mmDCORE3_TPC0_QM_BASE,
845 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = mmDCORE3_TPC0_QM_BASE,
846 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = mmDCORE3_TPC0_QM_BASE,
847 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = mmDCORE3_TPC0_QM_BASE,
848 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = mmDCORE3_TPC1_QM_BASE,
849 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = mmDCORE3_TPC1_QM_BASE,
850 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = mmDCORE3_TPC1_QM_BASE,
851 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = mmDCORE3_TPC1_QM_BASE,
852 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = mmDCORE3_TPC2_QM_BASE,
853 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = mmDCORE3_TPC2_QM_BASE,
854 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = mmDCORE3_TPC2_QM_BASE,
855 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = mmDCORE3_TPC2_QM_BASE,
856 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = mmDCORE3_TPC3_QM_BASE,
857 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = mmDCORE3_TPC3_QM_BASE,
858 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = mmDCORE3_TPC3_QM_BASE,
859 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = mmDCORE3_TPC3_QM_BASE,
860 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = mmDCORE3_TPC4_QM_BASE,
861 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = mmDCORE3_TPC4_QM_BASE,
862 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = mmDCORE3_TPC4_QM_BASE,
863 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = mmDCORE3_TPC4_QM_BASE,
864 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = mmDCORE3_TPC5_QM_BASE,
865 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = mmDCORE3_TPC5_QM_BASE,
866 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = mmDCORE3_TPC5_QM_BASE,
867 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = mmDCORE3_TPC5_QM_BASE,
868 [GAUDI2_QUEUE_ID_NIC_0_0] = mmNIC0_QM0_BASE,
869 [GAUDI2_QUEUE_ID_NIC_0_1] = mmNIC0_QM0_BASE,
870 [GAUDI2_QUEUE_ID_NIC_0_2] = mmNIC0_QM0_BASE,
871 [GAUDI2_QUEUE_ID_NIC_0_3] = mmNIC0_QM0_BASE,
872 [GAUDI2_QUEUE_ID_NIC_1_0] = mmNIC0_QM1_BASE,
873 [GAUDI2_QUEUE_ID_NIC_1_1] = mmNIC0_QM1_BASE,
874 [GAUDI2_QUEUE_ID_NIC_1_2] = mmNIC0_QM1_BASE,
875 [GAUDI2_QUEUE_ID_NIC_1_3] = mmNIC0_QM1_BASE,
876 [GAUDI2_QUEUE_ID_NIC_2_0] = mmNIC1_QM0_BASE,
877 [GAUDI2_QUEUE_ID_NIC_2_1] = mmNIC1_QM0_BASE,
878 [GAUDI2_QUEUE_ID_NIC_2_2] = mmNIC1_QM0_BASE,
879 [GAUDI2_QUEUE_ID_NIC_2_3] = mmNIC1_QM0_BASE,
880 [GAUDI2_QUEUE_ID_NIC_3_0] = mmNIC1_QM1_BASE,
881 [GAUDI2_QUEUE_ID_NIC_3_1] = mmNIC1_QM1_BASE,
882 [GAUDI2_QUEUE_ID_NIC_3_2] = mmNIC1_QM1_BASE,
883 [GAUDI2_QUEUE_ID_NIC_3_3] = mmNIC1_QM1_BASE,
884 [GAUDI2_QUEUE_ID_NIC_4_0] = mmNIC2_QM0_BASE,
885 [GAUDI2_QUEUE_ID_NIC_4_1] = mmNIC2_QM0_BASE,
886 [GAUDI2_QUEUE_ID_NIC_4_2] = mmNIC2_QM0_BASE,
887 [GAUDI2_QUEUE_ID_NIC_4_3] = mmNIC2_QM0_BASE,
888 [GAUDI2_QUEUE_ID_NIC_5_0] = mmNIC2_QM1_BASE,
889 [GAUDI2_QUEUE_ID_NIC_5_1] = mmNIC2_QM1_BASE,
890 [GAUDI2_QUEUE_ID_NIC_5_2] = mmNIC2_QM1_BASE,
891 [GAUDI2_QUEUE_ID_NIC_5_3] = mmNIC2_QM1_BASE,
892 [GAUDI2_QUEUE_ID_NIC_6_0] = mmNIC3_QM0_BASE,
893 [GAUDI2_QUEUE_ID_NIC_6_1] = mmNIC3_QM0_BASE,
894 [GAUDI2_QUEUE_ID_NIC_6_2] = mmNIC3_QM0_BASE,
895 [GAUDI2_QUEUE_ID_NIC_6_3] = mmNIC3_QM0_BASE,
896 [GAUDI2_QUEUE_ID_NIC_7_0] = mmNIC3_QM1_BASE,
897 [GAUDI2_QUEUE_ID_NIC_7_1] = mmNIC3_QM1_BASE,
898 [GAUDI2_QUEUE_ID_NIC_7_2] = mmNIC3_QM1_BASE,
899 [GAUDI2_QUEUE_ID_NIC_7_3] = mmNIC3_QM1_BASE,
900 [GAUDI2_QUEUE_ID_NIC_8_0] = mmNIC4_QM0_BASE,
901 [GAUDI2_QUEUE_ID_NIC_8_1] = mmNIC4_QM0_BASE,
902 [GAUDI2_QUEUE_ID_NIC_8_2] = mmNIC4_QM0_BASE,
903 [GAUDI2_QUEUE_ID_NIC_8_3] = mmNIC4_QM0_BASE,
904 [GAUDI2_QUEUE_ID_NIC_9_0] = mmNIC4_QM1_BASE,
905 [GAUDI2_QUEUE_ID_NIC_9_1] = mmNIC4_QM1_BASE,
906 [GAUDI2_QUEUE_ID_NIC_9_2] = mmNIC4_QM1_BASE,
907 [GAUDI2_QUEUE_ID_NIC_9_3] = mmNIC4_QM1_BASE,
908 [GAUDI2_QUEUE_ID_NIC_10_0] = mmNIC5_QM0_BASE,
909 [GAUDI2_QUEUE_ID_NIC_10_1] = mmNIC5_QM0_BASE,
910 [GAUDI2_QUEUE_ID_NIC_10_2] = mmNIC5_QM0_BASE,
911 [GAUDI2_QUEUE_ID_NIC_10_3] = mmNIC5_QM0_BASE,
912 [GAUDI2_QUEUE_ID_NIC_11_0] = mmNIC5_QM1_BASE,
913 [GAUDI2_QUEUE_ID_NIC_11_1] = mmNIC5_QM1_BASE,
914 [GAUDI2_QUEUE_ID_NIC_11_2] = mmNIC5_QM1_BASE,
915 [GAUDI2_QUEUE_ID_NIC_11_3] = mmNIC5_QM1_BASE,
916 [GAUDI2_QUEUE_ID_NIC_12_0] = mmNIC6_QM0_BASE,
917 [GAUDI2_QUEUE_ID_NIC_12_1] = mmNIC6_QM0_BASE,
918 [GAUDI2_QUEUE_ID_NIC_12_2] = mmNIC6_QM0_BASE,
919 [GAUDI2_QUEUE_ID_NIC_12_3] = mmNIC6_QM0_BASE,
920 [GAUDI2_QUEUE_ID_NIC_13_0] = mmNIC6_QM1_BASE,
921 [GAUDI2_QUEUE_ID_NIC_13_1] = mmNIC6_QM1_BASE,
922 [GAUDI2_QUEUE_ID_NIC_13_2] = mmNIC6_QM1_BASE,
923 [GAUDI2_QUEUE_ID_NIC_13_3] = mmNIC6_QM1_BASE,
924 [GAUDI2_QUEUE_ID_NIC_14_0] = mmNIC7_QM0_BASE,
925 [GAUDI2_QUEUE_ID_NIC_14_1] = mmNIC7_QM0_BASE,
926 [GAUDI2_QUEUE_ID_NIC_14_2] = mmNIC7_QM0_BASE,
927 [GAUDI2_QUEUE_ID_NIC_14_3] = mmNIC7_QM0_BASE,
928 [GAUDI2_QUEUE_ID_NIC_15_0] = mmNIC7_QM1_BASE,
929 [GAUDI2_QUEUE_ID_NIC_15_1] = mmNIC7_QM1_BASE,
930 [GAUDI2_QUEUE_ID_NIC_15_2] = mmNIC7_QM1_BASE,
931 [GAUDI2_QUEUE_ID_NIC_15_3] = mmNIC7_QM1_BASE,
932 [GAUDI2_QUEUE_ID_NIC_16_0] = mmNIC8_QM0_BASE,
933 [GAUDI2_QUEUE_ID_NIC_16_1] = mmNIC8_QM0_BASE,
934 [GAUDI2_QUEUE_ID_NIC_16_2] = mmNIC8_QM0_BASE,
935 [GAUDI2_QUEUE_ID_NIC_16_3] = mmNIC8_QM0_BASE,
936 [GAUDI2_QUEUE_ID_NIC_17_0] = mmNIC8_QM1_BASE,
937 [GAUDI2_QUEUE_ID_NIC_17_1] = mmNIC8_QM1_BASE,
938 [GAUDI2_QUEUE_ID_NIC_17_2] = mmNIC8_QM1_BASE,
939 [GAUDI2_QUEUE_ID_NIC_17_3] = mmNIC8_QM1_BASE,
940 [GAUDI2_QUEUE_ID_NIC_18_0] = mmNIC9_QM0_BASE,
941 [GAUDI2_QUEUE_ID_NIC_18_1] = mmNIC9_QM0_BASE,
942 [GAUDI2_QUEUE_ID_NIC_18_2] = mmNIC9_QM0_BASE,
943 [GAUDI2_QUEUE_ID_NIC_18_3] = mmNIC9_QM0_BASE,
944 [GAUDI2_QUEUE_ID_NIC_19_0] = mmNIC9_QM1_BASE,
945 [GAUDI2_QUEUE_ID_NIC_19_1] = mmNIC9_QM1_BASE,
946 [GAUDI2_QUEUE_ID_NIC_19_2] = mmNIC9_QM1_BASE,
947 [GAUDI2_QUEUE_ID_NIC_19_3] = mmNIC9_QM1_BASE,
948 [GAUDI2_QUEUE_ID_NIC_20_0] = mmNIC10_QM0_BASE,
949 [GAUDI2_QUEUE_ID_NIC_20_1] = mmNIC10_QM0_BASE,
950 [GAUDI2_QUEUE_ID_NIC_20_2] = mmNIC10_QM0_BASE,
951 [GAUDI2_QUEUE_ID_NIC_20_3] = mmNIC10_QM0_BASE,
952 [GAUDI2_QUEUE_ID_NIC_21_0] = mmNIC10_QM1_BASE,
953 [GAUDI2_QUEUE_ID_NIC_21_1] = mmNIC10_QM1_BASE,
954 [GAUDI2_QUEUE_ID_NIC_21_2] = mmNIC10_QM1_BASE,
955 [GAUDI2_QUEUE_ID_NIC_21_3] = mmNIC10_QM1_BASE,
956 [GAUDI2_QUEUE_ID_NIC_22_0] = mmNIC11_QM0_BASE,
957 [GAUDI2_QUEUE_ID_NIC_22_1] = mmNIC11_QM0_BASE,
958 [GAUDI2_QUEUE_ID_NIC_22_2] = mmNIC11_QM0_BASE,
959 [GAUDI2_QUEUE_ID_NIC_22_3] = mmNIC11_QM0_BASE,
960 [GAUDI2_QUEUE_ID_NIC_23_0] = mmNIC11_QM1_BASE,
961 [GAUDI2_QUEUE_ID_NIC_23_1] = mmNIC11_QM1_BASE,
962 [GAUDI2_QUEUE_ID_NIC_23_2] = mmNIC11_QM1_BASE,
963 [GAUDI2_QUEUE_ID_NIC_23_3] = mmNIC11_QM1_BASE,
964 [GAUDI2_QUEUE_ID_ROT_0_0] = mmROT0_QM_BASE,
965 [GAUDI2_QUEUE_ID_ROT_0_1] = mmROT0_QM_BASE,
966 [GAUDI2_QUEUE_ID_ROT_0_2] = mmROT0_QM_BASE,
967 [GAUDI2_QUEUE_ID_ROT_0_3] = mmROT0_QM_BASE,
968 [GAUDI2_QUEUE_ID_ROT_1_0] = mmROT1_QM_BASE,
969 [GAUDI2_QUEUE_ID_ROT_1_1] = mmROT1_QM_BASE,
970 [GAUDI2_QUEUE_ID_ROT_1_2] = mmROT1_QM_BASE,
971 [GAUDI2_QUEUE_ID_ROT_1_3] = mmROT1_QM_BASE
972 };
973
974 static const u32 gaudi2_arc_blocks_bases[NUM_ARC_CPUS] = {
975 [CPU_ID_SCHED_ARC0] = mmARC_FARM_ARC0_AUX_BASE,
976 [CPU_ID_SCHED_ARC1] = mmARC_FARM_ARC1_AUX_BASE,
977 [CPU_ID_SCHED_ARC2] = mmARC_FARM_ARC2_AUX_BASE,
978 [CPU_ID_SCHED_ARC3] = mmARC_FARM_ARC3_AUX_BASE,
979 [CPU_ID_SCHED_ARC4] = mmDCORE1_MME_QM_ARC_AUX_BASE,
980 [CPU_ID_SCHED_ARC5] = mmDCORE3_MME_QM_ARC_AUX_BASE,
981 [CPU_ID_TPC_QMAN_ARC0] = mmDCORE0_TPC0_QM_ARC_AUX_BASE,
982 [CPU_ID_TPC_QMAN_ARC1] = mmDCORE0_TPC1_QM_ARC_AUX_BASE,
983 [CPU_ID_TPC_QMAN_ARC2] = mmDCORE0_TPC2_QM_ARC_AUX_BASE,
984 [CPU_ID_TPC_QMAN_ARC3] = mmDCORE0_TPC3_QM_ARC_AUX_BASE,
985 [CPU_ID_TPC_QMAN_ARC4] = mmDCORE0_TPC4_QM_ARC_AUX_BASE,
986 [CPU_ID_TPC_QMAN_ARC5] = mmDCORE0_TPC5_QM_ARC_AUX_BASE,
987 [CPU_ID_TPC_QMAN_ARC6] = mmDCORE1_TPC0_QM_ARC_AUX_BASE,
988 [CPU_ID_TPC_QMAN_ARC7] = mmDCORE1_TPC1_QM_ARC_AUX_BASE,
989 [CPU_ID_TPC_QMAN_ARC8] = mmDCORE1_TPC2_QM_ARC_AUX_BASE,
990 [CPU_ID_TPC_QMAN_ARC9] = mmDCORE1_TPC3_QM_ARC_AUX_BASE,
991 [CPU_ID_TPC_QMAN_ARC10] = mmDCORE1_TPC4_QM_ARC_AUX_BASE,
992 [CPU_ID_TPC_QMAN_ARC11] = mmDCORE1_TPC5_QM_ARC_AUX_BASE,
993 [CPU_ID_TPC_QMAN_ARC12] = mmDCORE2_TPC0_QM_ARC_AUX_BASE,
994 [CPU_ID_TPC_QMAN_ARC13] = mmDCORE2_TPC1_QM_ARC_AUX_BASE,
995 [CPU_ID_TPC_QMAN_ARC14] = mmDCORE2_TPC2_QM_ARC_AUX_BASE,
996 [CPU_ID_TPC_QMAN_ARC15] = mmDCORE2_TPC3_QM_ARC_AUX_BASE,
997 [CPU_ID_TPC_QMAN_ARC16] = mmDCORE2_TPC4_QM_ARC_AUX_BASE,
998 [CPU_ID_TPC_QMAN_ARC17] = mmDCORE2_TPC5_QM_ARC_AUX_BASE,
999 [CPU_ID_TPC_QMAN_ARC18] = mmDCORE3_TPC0_QM_ARC_AUX_BASE,
1000 [CPU_ID_TPC_QMAN_ARC19] = mmDCORE3_TPC1_QM_ARC_AUX_BASE,
1001 [CPU_ID_TPC_QMAN_ARC20] = mmDCORE3_TPC2_QM_ARC_AUX_BASE,
1002 [CPU_ID_TPC_QMAN_ARC21] = mmDCORE3_TPC3_QM_ARC_AUX_BASE,
1003 [CPU_ID_TPC_QMAN_ARC22] = mmDCORE3_TPC4_QM_ARC_AUX_BASE,
1004 [CPU_ID_TPC_QMAN_ARC23] = mmDCORE3_TPC5_QM_ARC_AUX_BASE,
1005 [CPU_ID_TPC_QMAN_ARC24] = mmDCORE0_TPC6_QM_ARC_AUX_BASE,
1006 [CPU_ID_MME_QMAN_ARC0] = mmDCORE0_MME_QM_ARC_AUX_BASE,
1007 [CPU_ID_MME_QMAN_ARC1] = mmDCORE2_MME_QM_ARC_AUX_BASE,
1008 [CPU_ID_EDMA_QMAN_ARC0] = mmDCORE0_EDMA0_QM_ARC_AUX_BASE,
1009 [CPU_ID_EDMA_QMAN_ARC1] = mmDCORE0_EDMA1_QM_ARC_AUX_BASE,
1010 [CPU_ID_EDMA_QMAN_ARC2] = mmDCORE1_EDMA0_QM_ARC_AUX_BASE,
1011 [CPU_ID_EDMA_QMAN_ARC3] = mmDCORE1_EDMA1_QM_ARC_AUX_BASE,
1012 [CPU_ID_EDMA_QMAN_ARC4] = mmDCORE2_EDMA0_QM_ARC_AUX_BASE,
1013 [CPU_ID_EDMA_QMAN_ARC5] = mmDCORE2_EDMA1_QM_ARC_AUX_BASE,
1014 [CPU_ID_EDMA_QMAN_ARC6] = mmDCORE3_EDMA0_QM_ARC_AUX_BASE,
1015 [CPU_ID_EDMA_QMAN_ARC7] = mmDCORE3_EDMA1_QM_ARC_AUX_BASE,
1016 [CPU_ID_PDMA_QMAN_ARC0] = mmPDMA0_QM_ARC_AUX_BASE,
1017 [CPU_ID_PDMA_QMAN_ARC1] = mmPDMA1_QM_ARC_AUX_BASE,
1018 [CPU_ID_ROT_QMAN_ARC0] = mmROT0_QM_ARC_AUX_BASE,
1019 [CPU_ID_ROT_QMAN_ARC1] = mmROT1_QM_ARC_AUX_BASE,
1020 [CPU_ID_NIC_QMAN_ARC0] = mmNIC0_QM_ARC_AUX0_BASE,
1021 [CPU_ID_NIC_QMAN_ARC1] = mmNIC0_QM_ARC_AUX1_BASE,
1022 [CPU_ID_NIC_QMAN_ARC2] = mmNIC1_QM_ARC_AUX0_BASE,
1023 [CPU_ID_NIC_QMAN_ARC3] = mmNIC1_QM_ARC_AUX1_BASE,
1024 [CPU_ID_NIC_QMAN_ARC4] = mmNIC2_QM_ARC_AUX0_BASE,
1025 [CPU_ID_NIC_QMAN_ARC5] = mmNIC2_QM_ARC_AUX1_BASE,
1026 [CPU_ID_NIC_QMAN_ARC6] = mmNIC3_QM_ARC_AUX0_BASE,
1027 [CPU_ID_NIC_QMAN_ARC7] = mmNIC3_QM_ARC_AUX1_BASE,
1028 [CPU_ID_NIC_QMAN_ARC8] = mmNIC4_QM_ARC_AUX0_BASE,
1029 [CPU_ID_NIC_QMAN_ARC9] = mmNIC4_QM_ARC_AUX1_BASE,
1030 [CPU_ID_NIC_QMAN_ARC10] = mmNIC5_QM_ARC_AUX0_BASE,
1031 [CPU_ID_NIC_QMAN_ARC11] = mmNIC5_QM_ARC_AUX1_BASE,
1032 [CPU_ID_NIC_QMAN_ARC12] = mmNIC6_QM_ARC_AUX0_BASE,
1033 [CPU_ID_NIC_QMAN_ARC13] = mmNIC6_QM_ARC_AUX1_BASE,
1034 [CPU_ID_NIC_QMAN_ARC14] = mmNIC7_QM_ARC_AUX0_BASE,
1035 [CPU_ID_NIC_QMAN_ARC15] = mmNIC7_QM_ARC_AUX1_BASE,
1036 [CPU_ID_NIC_QMAN_ARC16] = mmNIC8_QM_ARC_AUX0_BASE,
1037 [CPU_ID_NIC_QMAN_ARC17] = mmNIC8_QM_ARC_AUX1_BASE,
1038 [CPU_ID_NIC_QMAN_ARC18] = mmNIC9_QM_ARC_AUX0_BASE,
1039 [CPU_ID_NIC_QMAN_ARC19] = mmNIC9_QM_ARC_AUX1_BASE,
1040 [CPU_ID_NIC_QMAN_ARC20] = mmNIC10_QM_ARC_AUX0_BASE,
1041 [CPU_ID_NIC_QMAN_ARC21] = mmNIC10_QM_ARC_AUX1_BASE,
1042 [CPU_ID_NIC_QMAN_ARC22] = mmNIC11_QM_ARC_AUX0_BASE,
1043 [CPU_ID_NIC_QMAN_ARC23] = mmNIC11_QM_ARC_AUX1_BASE,
1044 };
1045
1046 static const u32 gaudi2_arc_dccm_bases[NUM_ARC_CPUS] = {
1047 [CPU_ID_SCHED_ARC0] = mmARC_FARM_ARC0_DCCM0_BASE,
1048 [CPU_ID_SCHED_ARC1] = mmARC_FARM_ARC1_DCCM0_BASE,
1049 [CPU_ID_SCHED_ARC2] = mmARC_FARM_ARC2_DCCM0_BASE,
1050 [CPU_ID_SCHED_ARC3] = mmARC_FARM_ARC3_DCCM0_BASE,
1051 [CPU_ID_SCHED_ARC4] = mmDCORE1_MME_QM_ARC_DCCM_BASE,
1052 [CPU_ID_SCHED_ARC5] = mmDCORE3_MME_QM_ARC_DCCM_BASE,
1053 [CPU_ID_TPC_QMAN_ARC0] = mmDCORE0_TPC0_QM_DCCM_BASE,
1054 [CPU_ID_TPC_QMAN_ARC1] = mmDCORE0_TPC1_QM_DCCM_BASE,
1055 [CPU_ID_TPC_QMAN_ARC2] = mmDCORE0_TPC2_QM_DCCM_BASE,
1056 [CPU_ID_TPC_QMAN_ARC3] = mmDCORE0_TPC3_QM_DCCM_BASE,
1057 [CPU_ID_TPC_QMAN_ARC4] = mmDCORE0_TPC4_QM_DCCM_BASE,
1058 [CPU_ID_TPC_QMAN_ARC5] = mmDCORE0_TPC5_QM_DCCM_BASE,
1059 [CPU_ID_TPC_QMAN_ARC6] = mmDCORE1_TPC0_QM_DCCM_BASE,
1060 [CPU_ID_TPC_QMAN_ARC7] = mmDCORE1_TPC1_QM_DCCM_BASE,
1061 [CPU_ID_TPC_QMAN_ARC8] = mmDCORE1_TPC2_QM_DCCM_BASE,
1062 [CPU_ID_TPC_QMAN_ARC9] = mmDCORE1_TPC3_QM_DCCM_BASE,
1063 [CPU_ID_TPC_QMAN_ARC10] = mmDCORE1_TPC4_QM_DCCM_BASE,
1064 [CPU_ID_TPC_QMAN_ARC11] = mmDCORE1_TPC5_QM_DCCM_BASE,
1065 [CPU_ID_TPC_QMAN_ARC12] = mmDCORE2_TPC0_QM_DCCM_BASE,
1066 [CPU_ID_TPC_QMAN_ARC13] = mmDCORE2_TPC1_QM_DCCM_BASE,
1067 [CPU_ID_TPC_QMAN_ARC14] = mmDCORE2_TPC2_QM_DCCM_BASE,
1068 [CPU_ID_TPC_QMAN_ARC15] = mmDCORE2_TPC3_QM_DCCM_BASE,
1069 [CPU_ID_TPC_QMAN_ARC16] = mmDCORE2_TPC4_QM_DCCM_BASE,
1070 [CPU_ID_TPC_QMAN_ARC17] = mmDCORE2_TPC5_QM_DCCM_BASE,
1071 [CPU_ID_TPC_QMAN_ARC18] = mmDCORE3_TPC0_QM_DCCM_BASE,
1072 [CPU_ID_TPC_QMAN_ARC19] = mmDCORE3_TPC1_QM_DCCM_BASE,
1073 [CPU_ID_TPC_QMAN_ARC20] = mmDCORE3_TPC2_QM_DCCM_BASE,
1074 [CPU_ID_TPC_QMAN_ARC21] = mmDCORE3_TPC3_QM_DCCM_BASE,
1075 [CPU_ID_TPC_QMAN_ARC22] = mmDCORE3_TPC4_QM_DCCM_BASE,
1076 [CPU_ID_TPC_QMAN_ARC23] = mmDCORE3_TPC5_QM_DCCM_BASE,
1077 [CPU_ID_TPC_QMAN_ARC24] = mmDCORE0_TPC6_QM_DCCM_BASE,
1078 [CPU_ID_MME_QMAN_ARC0] = mmDCORE0_MME_QM_ARC_DCCM_BASE,
1079 [CPU_ID_MME_QMAN_ARC1] = mmDCORE2_MME_QM_ARC_DCCM_BASE,
1080 [CPU_ID_EDMA_QMAN_ARC0] = mmDCORE0_EDMA0_QM_DCCM_BASE,
1081 [CPU_ID_EDMA_QMAN_ARC1] = mmDCORE0_EDMA1_QM_DCCM_BASE,
1082 [CPU_ID_EDMA_QMAN_ARC2] = mmDCORE1_EDMA0_QM_DCCM_BASE,
1083 [CPU_ID_EDMA_QMAN_ARC3] = mmDCORE1_EDMA1_QM_DCCM_BASE,
1084 [CPU_ID_EDMA_QMAN_ARC4] = mmDCORE2_EDMA0_QM_DCCM_BASE,
1085 [CPU_ID_EDMA_QMAN_ARC5] = mmDCORE2_EDMA1_QM_DCCM_BASE,
1086 [CPU_ID_EDMA_QMAN_ARC6] = mmDCORE3_EDMA0_QM_DCCM_BASE,
1087 [CPU_ID_EDMA_QMAN_ARC7] = mmDCORE3_EDMA1_QM_DCCM_BASE,
1088 [CPU_ID_PDMA_QMAN_ARC0] = mmPDMA0_QM_ARC_DCCM_BASE,
1089 [CPU_ID_PDMA_QMAN_ARC1] = mmPDMA1_QM_ARC_DCCM_BASE,
1090 [CPU_ID_ROT_QMAN_ARC0] = mmROT0_QM_ARC_DCCM_BASE,
1091 [CPU_ID_ROT_QMAN_ARC1] = mmROT1_QM_ARC_DCCM_BASE,
1092 [CPU_ID_NIC_QMAN_ARC0] = mmNIC0_QM_DCCM0_BASE,
1093 [CPU_ID_NIC_QMAN_ARC1] = mmNIC0_QM_DCCM1_BASE,
1094 [CPU_ID_NIC_QMAN_ARC2] = mmNIC1_QM_DCCM0_BASE,
1095 [CPU_ID_NIC_QMAN_ARC3] = mmNIC1_QM_DCCM1_BASE,
1096 [CPU_ID_NIC_QMAN_ARC4] = mmNIC2_QM_DCCM0_BASE,
1097 [CPU_ID_NIC_QMAN_ARC5] = mmNIC2_QM_DCCM1_BASE,
1098 [CPU_ID_NIC_QMAN_ARC6] = mmNIC3_QM_DCCM0_BASE,
1099 [CPU_ID_NIC_QMAN_ARC7] = mmNIC3_QM_DCCM1_BASE,
1100 [CPU_ID_NIC_QMAN_ARC8] = mmNIC4_QM_DCCM0_BASE,
1101 [CPU_ID_NIC_QMAN_ARC9] = mmNIC4_QM_DCCM1_BASE,
1102 [CPU_ID_NIC_QMAN_ARC10] = mmNIC5_QM_DCCM0_BASE,
1103 [CPU_ID_NIC_QMAN_ARC11] = mmNIC5_QM_DCCM1_BASE,
1104 [CPU_ID_NIC_QMAN_ARC12] = mmNIC6_QM_DCCM0_BASE,
1105 [CPU_ID_NIC_QMAN_ARC13] = mmNIC6_QM_DCCM1_BASE,
1106 [CPU_ID_NIC_QMAN_ARC14] = mmNIC7_QM_DCCM0_BASE,
1107 [CPU_ID_NIC_QMAN_ARC15] = mmNIC7_QM_DCCM1_BASE,
1108 [CPU_ID_NIC_QMAN_ARC16] = mmNIC8_QM_DCCM0_BASE,
1109 [CPU_ID_NIC_QMAN_ARC17] = mmNIC8_QM_DCCM1_BASE,
1110 [CPU_ID_NIC_QMAN_ARC18] = mmNIC9_QM_DCCM0_BASE,
1111 [CPU_ID_NIC_QMAN_ARC19] = mmNIC9_QM_DCCM1_BASE,
1112 [CPU_ID_NIC_QMAN_ARC20] = mmNIC10_QM_DCCM0_BASE,
1113 [CPU_ID_NIC_QMAN_ARC21] = mmNIC10_QM_DCCM1_BASE,
1114 [CPU_ID_NIC_QMAN_ARC22] = mmNIC11_QM_DCCM0_BASE,
1115 [CPU_ID_NIC_QMAN_ARC23] = mmNIC11_QM_DCCM1_BASE,
1116 };
1117
1118 const u32 gaudi2_mme_ctrl_lo_blocks_bases[MME_ID_SIZE] = {
1119 [MME_ID_DCORE0] = mmDCORE0_MME_CTRL_LO_BASE,
1120 [MME_ID_DCORE1] = mmDCORE1_MME_CTRL_LO_BASE,
1121 [MME_ID_DCORE2] = mmDCORE2_MME_CTRL_LO_BASE,
1122 [MME_ID_DCORE3] = mmDCORE3_MME_CTRL_LO_BASE,
1123 };
1124
1125 static const u32 gaudi2_queue_id_to_arc_id[GAUDI2_QUEUE_ID_SIZE] = {
1126 [GAUDI2_QUEUE_ID_PDMA_0_0] = CPU_ID_PDMA_QMAN_ARC0,
1127 [GAUDI2_QUEUE_ID_PDMA_0_1] = CPU_ID_PDMA_QMAN_ARC0,
1128 [GAUDI2_QUEUE_ID_PDMA_0_2] = CPU_ID_PDMA_QMAN_ARC0,
1129 [GAUDI2_QUEUE_ID_PDMA_0_3] = CPU_ID_PDMA_QMAN_ARC0,
1130 [GAUDI2_QUEUE_ID_PDMA_1_0] = CPU_ID_PDMA_QMAN_ARC1,
1131 [GAUDI2_QUEUE_ID_PDMA_1_1] = CPU_ID_PDMA_QMAN_ARC1,
1132 [GAUDI2_QUEUE_ID_PDMA_1_2] = CPU_ID_PDMA_QMAN_ARC1,
1133 [GAUDI2_QUEUE_ID_PDMA_1_3] = CPU_ID_PDMA_QMAN_ARC1,
1134 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC0,
1135 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC0,
1136 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC0,
1137 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC0,
1138 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC1,
1139 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC1,
1140 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC1,
1141 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC1,
1142 [GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = CPU_ID_MME_QMAN_ARC0,
1143 [GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = CPU_ID_MME_QMAN_ARC0,
1144 [GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = CPU_ID_MME_QMAN_ARC0,
1145 [GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = CPU_ID_MME_QMAN_ARC0,
1146 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = CPU_ID_TPC_QMAN_ARC0,
1147 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = CPU_ID_TPC_QMAN_ARC0,
1148 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = CPU_ID_TPC_QMAN_ARC0,
1149 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = CPU_ID_TPC_QMAN_ARC0,
1150 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = CPU_ID_TPC_QMAN_ARC1,
1151 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = CPU_ID_TPC_QMAN_ARC1,
1152 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = CPU_ID_TPC_QMAN_ARC1,
1153 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = CPU_ID_TPC_QMAN_ARC1,
1154 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = CPU_ID_TPC_QMAN_ARC2,
1155 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = CPU_ID_TPC_QMAN_ARC2,
1156 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = CPU_ID_TPC_QMAN_ARC2,
1157 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = CPU_ID_TPC_QMAN_ARC2,
1158 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = CPU_ID_TPC_QMAN_ARC3,
1159 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = CPU_ID_TPC_QMAN_ARC3,
1160 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = CPU_ID_TPC_QMAN_ARC3,
1161 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = CPU_ID_TPC_QMAN_ARC3,
1162 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = CPU_ID_TPC_QMAN_ARC4,
1163 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = CPU_ID_TPC_QMAN_ARC4,
1164 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = CPU_ID_TPC_QMAN_ARC4,
1165 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = CPU_ID_TPC_QMAN_ARC4,
1166 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = CPU_ID_TPC_QMAN_ARC5,
1167 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = CPU_ID_TPC_QMAN_ARC5,
1168 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = CPU_ID_TPC_QMAN_ARC5,
1169 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = CPU_ID_TPC_QMAN_ARC5,
1170 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = CPU_ID_TPC_QMAN_ARC24,
1171 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = CPU_ID_TPC_QMAN_ARC24,
1172 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = CPU_ID_TPC_QMAN_ARC24,
1173 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = CPU_ID_TPC_QMAN_ARC24,
1174 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC2,
1175 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC2,
1176 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC2,
1177 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC2,
1178 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC3,
1179 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC3,
1180 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC3,
1181 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC3,
1182 [GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = CPU_ID_SCHED_ARC4,
1183 [GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = CPU_ID_SCHED_ARC4,
1184 [GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = CPU_ID_SCHED_ARC4,
1185 [GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = CPU_ID_SCHED_ARC4,
1186 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = CPU_ID_TPC_QMAN_ARC6,
1187 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = CPU_ID_TPC_QMAN_ARC6,
1188 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = CPU_ID_TPC_QMAN_ARC6,
1189 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = CPU_ID_TPC_QMAN_ARC6,
1190 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = CPU_ID_TPC_QMAN_ARC7,
1191 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = CPU_ID_TPC_QMAN_ARC7,
1192 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = CPU_ID_TPC_QMAN_ARC7,
1193 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = CPU_ID_TPC_QMAN_ARC7,
1194 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = CPU_ID_TPC_QMAN_ARC8,
1195 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = CPU_ID_TPC_QMAN_ARC8,
1196 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = CPU_ID_TPC_QMAN_ARC8,
1197 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = CPU_ID_TPC_QMAN_ARC8,
1198 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = CPU_ID_TPC_QMAN_ARC9,
1199 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = CPU_ID_TPC_QMAN_ARC9,
1200 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = CPU_ID_TPC_QMAN_ARC9,
1201 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = CPU_ID_TPC_QMAN_ARC9,
1202 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = CPU_ID_TPC_QMAN_ARC10,
1203 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = CPU_ID_TPC_QMAN_ARC10,
1204 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = CPU_ID_TPC_QMAN_ARC10,
1205 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = CPU_ID_TPC_QMAN_ARC10,
1206 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = CPU_ID_TPC_QMAN_ARC11,
1207 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = CPU_ID_TPC_QMAN_ARC11,
1208 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = CPU_ID_TPC_QMAN_ARC11,
1209 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = CPU_ID_TPC_QMAN_ARC11,
1210 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC4,
1211 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC4,
1212 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC4,
1213 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC4,
1214 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC5,
1215 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC5,
1216 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC5,
1217 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC5,
1218 [GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = CPU_ID_MME_QMAN_ARC1,
1219 [GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = CPU_ID_MME_QMAN_ARC1,
1220 [GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = CPU_ID_MME_QMAN_ARC1,
1221 [GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = CPU_ID_MME_QMAN_ARC1,
1222 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = CPU_ID_TPC_QMAN_ARC12,
1223 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = CPU_ID_TPC_QMAN_ARC12,
1224 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = CPU_ID_TPC_QMAN_ARC12,
1225 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = CPU_ID_TPC_QMAN_ARC12,
1226 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = CPU_ID_TPC_QMAN_ARC13,
1227 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = CPU_ID_TPC_QMAN_ARC13,
1228 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = CPU_ID_TPC_QMAN_ARC13,
1229 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = CPU_ID_TPC_QMAN_ARC13,
1230 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = CPU_ID_TPC_QMAN_ARC14,
1231 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = CPU_ID_TPC_QMAN_ARC14,
1232 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = CPU_ID_TPC_QMAN_ARC14,
1233 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = CPU_ID_TPC_QMAN_ARC14,
1234 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = CPU_ID_TPC_QMAN_ARC15,
1235 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = CPU_ID_TPC_QMAN_ARC15,
1236 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = CPU_ID_TPC_QMAN_ARC15,
1237 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = CPU_ID_TPC_QMAN_ARC15,
1238 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = CPU_ID_TPC_QMAN_ARC16,
1239 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = CPU_ID_TPC_QMAN_ARC16,
1240 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = CPU_ID_TPC_QMAN_ARC16,
1241 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = CPU_ID_TPC_QMAN_ARC16,
1242 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = CPU_ID_TPC_QMAN_ARC17,
1243 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = CPU_ID_TPC_QMAN_ARC17,
1244 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = CPU_ID_TPC_QMAN_ARC17,
1245 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = CPU_ID_TPC_QMAN_ARC17,
1246 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC6,
1247 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC6,
1248 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC6,
1249 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC6,
1250 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC7,
1251 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC7,
1252 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC7,
1253 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC7,
1254 [GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = CPU_ID_SCHED_ARC5,
1255 [GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = CPU_ID_SCHED_ARC5,
1256 [GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = CPU_ID_SCHED_ARC5,
1257 [GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = CPU_ID_SCHED_ARC5,
1258 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = CPU_ID_TPC_QMAN_ARC18,
1259 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = CPU_ID_TPC_QMAN_ARC18,
1260 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = CPU_ID_TPC_QMAN_ARC18,
1261 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = CPU_ID_TPC_QMAN_ARC18,
1262 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = CPU_ID_TPC_QMAN_ARC19,
1263 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = CPU_ID_TPC_QMAN_ARC19,
1264 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = CPU_ID_TPC_QMAN_ARC19,
1265 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = CPU_ID_TPC_QMAN_ARC19,
1266 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = CPU_ID_TPC_QMAN_ARC20,
1267 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = CPU_ID_TPC_QMAN_ARC20,
1268 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = CPU_ID_TPC_QMAN_ARC20,
1269 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = CPU_ID_TPC_QMAN_ARC20,
1270 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = CPU_ID_TPC_QMAN_ARC21,
1271 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = CPU_ID_TPC_QMAN_ARC21,
1272 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = CPU_ID_TPC_QMAN_ARC21,
1273 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = CPU_ID_TPC_QMAN_ARC21,
1274 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = CPU_ID_TPC_QMAN_ARC22,
1275 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = CPU_ID_TPC_QMAN_ARC22,
1276 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = CPU_ID_TPC_QMAN_ARC22,
1277 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = CPU_ID_TPC_QMAN_ARC22,
1278 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = CPU_ID_TPC_QMAN_ARC23,
1279 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = CPU_ID_TPC_QMAN_ARC23,
1280 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = CPU_ID_TPC_QMAN_ARC23,
1281 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = CPU_ID_TPC_QMAN_ARC23,
1282 [GAUDI2_QUEUE_ID_NIC_0_0] = CPU_ID_NIC_QMAN_ARC0,
1283 [GAUDI2_QUEUE_ID_NIC_0_1] = CPU_ID_NIC_QMAN_ARC0,
1284 [GAUDI2_QUEUE_ID_NIC_0_2] = CPU_ID_NIC_QMAN_ARC0,
1285 [GAUDI2_QUEUE_ID_NIC_0_3] = CPU_ID_NIC_QMAN_ARC0,
1286 [GAUDI2_QUEUE_ID_NIC_1_0] = CPU_ID_NIC_QMAN_ARC1,
1287 [GAUDI2_QUEUE_ID_NIC_1_1] = CPU_ID_NIC_QMAN_ARC1,
1288 [GAUDI2_QUEUE_ID_NIC_1_2] = CPU_ID_NIC_QMAN_ARC1,
1289 [GAUDI2_QUEUE_ID_NIC_1_3] = CPU_ID_NIC_QMAN_ARC1,
1290 [GAUDI2_QUEUE_ID_NIC_2_0] = CPU_ID_NIC_QMAN_ARC2,
1291 [GAUDI2_QUEUE_ID_NIC_2_1] = CPU_ID_NIC_QMAN_ARC2,
1292 [GAUDI2_QUEUE_ID_NIC_2_2] = CPU_ID_NIC_QMAN_ARC2,
1293 [GAUDI2_QUEUE_ID_NIC_2_3] = CPU_ID_NIC_QMAN_ARC2,
1294 [GAUDI2_QUEUE_ID_NIC_3_0] = CPU_ID_NIC_QMAN_ARC3,
1295 [GAUDI2_QUEUE_ID_NIC_3_1] = CPU_ID_NIC_QMAN_ARC3,
1296 [GAUDI2_QUEUE_ID_NIC_3_2] = CPU_ID_NIC_QMAN_ARC3,
1297 [GAUDI2_QUEUE_ID_NIC_3_3] = CPU_ID_NIC_QMAN_ARC3,
1298 [GAUDI2_QUEUE_ID_NIC_4_0] = CPU_ID_NIC_QMAN_ARC4,
1299 [GAUDI2_QUEUE_ID_NIC_4_1] = CPU_ID_NIC_QMAN_ARC4,
1300 [GAUDI2_QUEUE_ID_NIC_4_2] = CPU_ID_NIC_QMAN_ARC4,
1301 [GAUDI2_QUEUE_ID_NIC_4_3] = CPU_ID_NIC_QMAN_ARC4,
1302 [GAUDI2_QUEUE_ID_NIC_5_0] = CPU_ID_NIC_QMAN_ARC5,
1303 [GAUDI2_QUEUE_ID_NIC_5_1] = CPU_ID_NIC_QMAN_ARC5,
1304 [GAUDI2_QUEUE_ID_NIC_5_2] = CPU_ID_NIC_QMAN_ARC5,
1305 [GAUDI2_QUEUE_ID_NIC_5_3] = CPU_ID_NIC_QMAN_ARC5,
1306 [GAUDI2_QUEUE_ID_NIC_6_0] = CPU_ID_NIC_QMAN_ARC6,
1307 [GAUDI2_QUEUE_ID_NIC_6_1] = CPU_ID_NIC_QMAN_ARC6,
1308 [GAUDI2_QUEUE_ID_NIC_6_2] = CPU_ID_NIC_QMAN_ARC6,
1309 [GAUDI2_QUEUE_ID_NIC_6_3] = CPU_ID_NIC_QMAN_ARC6,
1310 [GAUDI2_QUEUE_ID_NIC_7_0] = CPU_ID_NIC_QMAN_ARC7,
1311 [GAUDI2_QUEUE_ID_NIC_7_1] = CPU_ID_NIC_QMAN_ARC7,
1312 [GAUDI2_QUEUE_ID_NIC_7_2] = CPU_ID_NIC_QMAN_ARC7,
1313 [GAUDI2_QUEUE_ID_NIC_7_3] = CPU_ID_NIC_QMAN_ARC7,
1314 [GAUDI2_QUEUE_ID_NIC_8_0] = CPU_ID_NIC_QMAN_ARC8,
1315 [GAUDI2_QUEUE_ID_NIC_8_1] = CPU_ID_NIC_QMAN_ARC8,
1316 [GAUDI2_QUEUE_ID_NIC_8_2] = CPU_ID_NIC_QMAN_ARC8,
1317 [GAUDI2_QUEUE_ID_NIC_8_3] = CPU_ID_NIC_QMAN_ARC8,
1318 [GAUDI2_QUEUE_ID_NIC_9_0] = CPU_ID_NIC_QMAN_ARC9,
1319 [GAUDI2_QUEUE_ID_NIC_9_1] = CPU_ID_NIC_QMAN_ARC9,
1320 [GAUDI2_QUEUE_ID_NIC_9_2] = CPU_ID_NIC_QMAN_ARC9,
1321 [GAUDI2_QUEUE_ID_NIC_9_3] = CPU_ID_NIC_QMAN_ARC9,
1322 [GAUDI2_QUEUE_ID_NIC_10_0] = CPU_ID_NIC_QMAN_ARC10,
1323 [GAUDI2_QUEUE_ID_NIC_10_1] = CPU_ID_NIC_QMAN_ARC10,
1324 [GAUDI2_QUEUE_ID_NIC_10_2] = CPU_ID_NIC_QMAN_ARC10,
1325 [GAUDI2_QUEUE_ID_NIC_10_3] = CPU_ID_NIC_QMAN_ARC10,
1326 [GAUDI2_QUEUE_ID_NIC_11_0] = CPU_ID_NIC_QMAN_ARC11,
1327 [GAUDI2_QUEUE_ID_NIC_11_1] = CPU_ID_NIC_QMAN_ARC11,
1328 [GAUDI2_QUEUE_ID_NIC_11_2] = CPU_ID_NIC_QMAN_ARC11,
1329 [GAUDI2_QUEUE_ID_NIC_11_3] = CPU_ID_NIC_QMAN_ARC11,
1330 [GAUDI2_QUEUE_ID_NIC_12_0] = CPU_ID_NIC_QMAN_ARC12,
1331 [GAUDI2_QUEUE_ID_NIC_12_1] = CPU_ID_NIC_QMAN_ARC12,
1332 [GAUDI2_QUEUE_ID_NIC_12_2] = CPU_ID_NIC_QMAN_ARC12,
1333 [GAUDI2_QUEUE_ID_NIC_12_3] = CPU_ID_NIC_QMAN_ARC12,
1334 [GAUDI2_QUEUE_ID_NIC_13_0] = CPU_ID_NIC_QMAN_ARC13,
1335 [GAUDI2_QUEUE_ID_NIC_13_1] = CPU_ID_NIC_QMAN_ARC13,
1336 [GAUDI2_QUEUE_ID_NIC_13_2] = CPU_ID_NIC_QMAN_ARC13,
1337 [GAUDI2_QUEUE_ID_NIC_13_3] = CPU_ID_NIC_QMAN_ARC13,
1338 [GAUDI2_QUEUE_ID_NIC_14_0] = CPU_ID_NIC_QMAN_ARC14,
1339 [GAUDI2_QUEUE_ID_NIC_14_1] = CPU_ID_NIC_QMAN_ARC14,
1340 [GAUDI2_QUEUE_ID_NIC_14_2] = CPU_ID_NIC_QMAN_ARC14,
1341 [GAUDI2_QUEUE_ID_NIC_14_3] = CPU_ID_NIC_QMAN_ARC14,
1342 [GAUDI2_QUEUE_ID_NIC_15_0] = CPU_ID_NIC_QMAN_ARC15,
1343 [GAUDI2_QUEUE_ID_NIC_15_1] = CPU_ID_NIC_QMAN_ARC15,
1344 [GAUDI2_QUEUE_ID_NIC_15_2] = CPU_ID_NIC_QMAN_ARC15,
1345 [GAUDI2_QUEUE_ID_NIC_15_3] = CPU_ID_NIC_QMAN_ARC15,
1346 [GAUDI2_QUEUE_ID_NIC_16_0] = CPU_ID_NIC_QMAN_ARC16,
1347 [GAUDI2_QUEUE_ID_NIC_16_1] = CPU_ID_NIC_QMAN_ARC16,
1348 [GAUDI2_QUEUE_ID_NIC_16_2] = CPU_ID_NIC_QMAN_ARC16,
1349 [GAUDI2_QUEUE_ID_NIC_16_3] = CPU_ID_NIC_QMAN_ARC16,
1350 [GAUDI2_QUEUE_ID_NIC_17_0] = CPU_ID_NIC_QMAN_ARC17,
1351 [GAUDI2_QUEUE_ID_NIC_17_1] = CPU_ID_NIC_QMAN_ARC17,
1352 [GAUDI2_QUEUE_ID_NIC_17_2] = CPU_ID_NIC_QMAN_ARC17,
1353 [GAUDI2_QUEUE_ID_NIC_17_3] = CPU_ID_NIC_QMAN_ARC17,
1354 [GAUDI2_QUEUE_ID_NIC_18_0] = CPU_ID_NIC_QMAN_ARC18,
1355 [GAUDI2_QUEUE_ID_NIC_18_1] = CPU_ID_NIC_QMAN_ARC18,
1356 [GAUDI2_QUEUE_ID_NIC_18_2] = CPU_ID_NIC_QMAN_ARC18,
1357 [GAUDI2_QUEUE_ID_NIC_18_3] = CPU_ID_NIC_QMAN_ARC18,
1358 [GAUDI2_QUEUE_ID_NIC_19_0] = CPU_ID_NIC_QMAN_ARC19,
1359 [GAUDI2_QUEUE_ID_NIC_19_1] = CPU_ID_NIC_QMAN_ARC19,
1360 [GAUDI2_QUEUE_ID_NIC_19_2] = CPU_ID_NIC_QMAN_ARC19,
1361 [GAUDI2_QUEUE_ID_NIC_19_3] = CPU_ID_NIC_QMAN_ARC19,
1362 [GAUDI2_QUEUE_ID_NIC_20_0] = CPU_ID_NIC_QMAN_ARC20,
1363 [GAUDI2_QUEUE_ID_NIC_20_1] = CPU_ID_NIC_QMAN_ARC20,
1364 [GAUDI2_QUEUE_ID_NIC_20_2] = CPU_ID_NIC_QMAN_ARC20,
1365 [GAUDI2_QUEUE_ID_NIC_20_3] = CPU_ID_NIC_QMAN_ARC20,
1366 [GAUDI2_QUEUE_ID_NIC_21_0] = CPU_ID_NIC_QMAN_ARC21,
1367 [GAUDI2_QUEUE_ID_NIC_21_1] = CPU_ID_NIC_QMAN_ARC21,
1368 [GAUDI2_QUEUE_ID_NIC_21_2] = CPU_ID_NIC_QMAN_ARC21,
1369 [GAUDI2_QUEUE_ID_NIC_21_3] = CPU_ID_NIC_QMAN_ARC21,
1370 [GAUDI2_QUEUE_ID_NIC_22_0] = CPU_ID_NIC_QMAN_ARC22,
1371 [GAUDI2_QUEUE_ID_NIC_22_1] = CPU_ID_NIC_QMAN_ARC22,
1372 [GAUDI2_QUEUE_ID_NIC_22_2] = CPU_ID_NIC_QMAN_ARC22,
1373 [GAUDI2_QUEUE_ID_NIC_22_3] = CPU_ID_NIC_QMAN_ARC22,
1374 [GAUDI2_QUEUE_ID_NIC_23_0] = CPU_ID_NIC_QMAN_ARC23,
1375 [GAUDI2_QUEUE_ID_NIC_23_1] = CPU_ID_NIC_QMAN_ARC23,
1376 [GAUDI2_QUEUE_ID_NIC_23_2] = CPU_ID_NIC_QMAN_ARC23,
1377 [GAUDI2_QUEUE_ID_NIC_23_3] = CPU_ID_NIC_QMAN_ARC23,
1378 [GAUDI2_QUEUE_ID_ROT_0_0] = CPU_ID_ROT_QMAN_ARC0,
1379 [GAUDI2_QUEUE_ID_ROT_0_1] = CPU_ID_ROT_QMAN_ARC0,
1380 [GAUDI2_QUEUE_ID_ROT_0_2] = CPU_ID_ROT_QMAN_ARC0,
1381 [GAUDI2_QUEUE_ID_ROT_0_3] = CPU_ID_ROT_QMAN_ARC0,
1382 [GAUDI2_QUEUE_ID_ROT_1_0] = CPU_ID_ROT_QMAN_ARC1,
1383 [GAUDI2_QUEUE_ID_ROT_1_1] = CPU_ID_ROT_QMAN_ARC1,
1384 [GAUDI2_QUEUE_ID_ROT_1_2] = CPU_ID_ROT_QMAN_ARC1,
1385 [GAUDI2_QUEUE_ID_ROT_1_3] = CPU_ID_ROT_QMAN_ARC1
1386 };
1387
1388 const u32 gaudi2_dma_core_blocks_bases[DMA_CORE_ID_SIZE] = {
1389 [DMA_CORE_ID_PDMA0] = mmPDMA0_CORE_BASE,
1390 [DMA_CORE_ID_PDMA1] = mmPDMA1_CORE_BASE,
1391 [DMA_CORE_ID_EDMA0] = mmDCORE0_EDMA0_CORE_BASE,
1392 [DMA_CORE_ID_EDMA1] = mmDCORE0_EDMA1_CORE_BASE,
1393 [DMA_CORE_ID_EDMA2] = mmDCORE1_EDMA0_CORE_BASE,
1394 [DMA_CORE_ID_EDMA3] = mmDCORE1_EDMA1_CORE_BASE,
1395 [DMA_CORE_ID_EDMA4] = mmDCORE2_EDMA0_CORE_BASE,
1396 [DMA_CORE_ID_EDMA5] = mmDCORE2_EDMA1_CORE_BASE,
1397 [DMA_CORE_ID_EDMA6] = mmDCORE3_EDMA0_CORE_BASE,
1398 [DMA_CORE_ID_EDMA7] = mmDCORE3_EDMA1_CORE_BASE,
1399 [DMA_CORE_ID_KDMA] = mmARC_FARM_KDMA_BASE
1400 };
1401
1402 const u32 gaudi2_mme_acc_blocks_bases[MME_ID_SIZE] = {
1403 [MME_ID_DCORE0] = mmDCORE0_MME_ACC_BASE,
1404 [MME_ID_DCORE1] = mmDCORE1_MME_ACC_BASE,
1405 [MME_ID_DCORE2] = mmDCORE2_MME_ACC_BASE,
1406 [MME_ID_DCORE3] = mmDCORE3_MME_ACC_BASE
1407 };
1408
1409 static const u32 gaudi2_tpc_cfg_blocks_bases[TPC_ID_SIZE] = {
1410 [TPC_ID_DCORE0_TPC0] = mmDCORE0_TPC0_CFG_BASE,
1411 [TPC_ID_DCORE0_TPC1] = mmDCORE0_TPC1_CFG_BASE,
1412 [TPC_ID_DCORE0_TPC2] = mmDCORE0_TPC2_CFG_BASE,
1413 [TPC_ID_DCORE0_TPC3] = mmDCORE0_TPC3_CFG_BASE,
1414 [TPC_ID_DCORE0_TPC4] = mmDCORE0_TPC4_CFG_BASE,
1415 [TPC_ID_DCORE0_TPC5] = mmDCORE0_TPC5_CFG_BASE,
1416 [TPC_ID_DCORE1_TPC0] = mmDCORE1_TPC0_CFG_BASE,
1417 [TPC_ID_DCORE1_TPC1] = mmDCORE1_TPC1_CFG_BASE,
1418 [TPC_ID_DCORE1_TPC2] = mmDCORE1_TPC2_CFG_BASE,
1419 [TPC_ID_DCORE1_TPC3] = mmDCORE1_TPC3_CFG_BASE,
1420 [TPC_ID_DCORE1_TPC4] = mmDCORE1_TPC4_CFG_BASE,
1421 [TPC_ID_DCORE1_TPC5] = mmDCORE1_TPC5_CFG_BASE,
1422 [TPC_ID_DCORE2_TPC0] = mmDCORE2_TPC0_CFG_BASE,
1423 [TPC_ID_DCORE2_TPC1] = mmDCORE2_TPC1_CFG_BASE,
1424 [TPC_ID_DCORE2_TPC2] = mmDCORE2_TPC2_CFG_BASE,
1425 [TPC_ID_DCORE2_TPC3] = mmDCORE2_TPC3_CFG_BASE,
1426 [TPC_ID_DCORE2_TPC4] = mmDCORE2_TPC4_CFG_BASE,
1427 [TPC_ID_DCORE2_TPC5] = mmDCORE2_TPC5_CFG_BASE,
1428 [TPC_ID_DCORE3_TPC0] = mmDCORE3_TPC0_CFG_BASE,
1429 [TPC_ID_DCORE3_TPC1] = mmDCORE3_TPC1_CFG_BASE,
1430 [TPC_ID_DCORE3_TPC2] = mmDCORE3_TPC2_CFG_BASE,
1431 [TPC_ID_DCORE3_TPC3] = mmDCORE3_TPC3_CFG_BASE,
1432 [TPC_ID_DCORE3_TPC4] = mmDCORE3_TPC4_CFG_BASE,
1433 [TPC_ID_DCORE3_TPC5] = mmDCORE3_TPC5_CFG_BASE,
1434 [TPC_ID_DCORE0_TPC6] = mmDCORE0_TPC6_CFG_BASE,
1435 };
1436
1437 const u32 gaudi2_rot_blocks_bases[ROTATOR_ID_SIZE] = {
1438 [ROTATOR_ID_0] = mmROT0_BASE,
1439 [ROTATOR_ID_1] = mmROT1_BASE
1440 };
1441
1442 static const u32 gaudi2_tpc_id_to_queue_id[TPC_ID_SIZE] = {
1443 [TPC_ID_DCORE0_TPC0] = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0,
1444 [TPC_ID_DCORE0_TPC1] = GAUDI2_QUEUE_ID_DCORE0_TPC_1_0,
1445 [TPC_ID_DCORE0_TPC2] = GAUDI2_QUEUE_ID_DCORE0_TPC_2_0,
1446 [TPC_ID_DCORE0_TPC3] = GAUDI2_QUEUE_ID_DCORE0_TPC_3_0,
1447 [TPC_ID_DCORE0_TPC4] = GAUDI2_QUEUE_ID_DCORE0_TPC_4_0,
1448 [TPC_ID_DCORE0_TPC5] = GAUDI2_QUEUE_ID_DCORE0_TPC_5_0,
1449 [TPC_ID_DCORE1_TPC0] = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0,
1450 [TPC_ID_DCORE1_TPC1] = GAUDI2_QUEUE_ID_DCORE1_TPC_1_0,
1451 [TPC_ID_DCORE1_TPC2] = GAUDI2_QUEUE_ID_DCORE1_TPC_2_0,
1452 [TPC_ID_DCORE1_TPC3] = GAUDI2_QUEUE_ID_DCORE1_TPC_3_0,
1453 [TPC_ID_DCORE1_TPC4] = GAUDI2_QUEUE_ID_DCORE1_TPC_4_0,
1454 [TPC_ID_DCORE1_TPC5] = GAUDI2_QUEUE_ID_DCORE1_TPC_5_0,
1455 [TPC_ID_DCORE2_TPC0] = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0,
1456 [TPC_ID_DCORE2_TPC1] = GAUDI2_QUEUE_ID_DCORE2_TPC_1_0,
1457 [TPC_ID_DCORE2_TPC2] = GAUDI2_QUEUE_ID_DCORE2_TPC_2_0,
1458 [TPC_ID_DCORE2_TPC3] = GAUDI2_QUEUE_ID_DCORE2_TPC_3_0,
1459 [TPC_ID_DCORE2_TPC4] = GAUDI2_QUEUE_ID_DCORE2_TPC_4_0,
1460 [TPC_ID_DCORE2_TPC5] = GAUDI2_QUEUE_ID_DCORE2_TPC_5_0,
1461 [TPC_ID_DCORE3_TPC0] = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0,
1462 [TPC_ID_DCORE3_TPC1] = GAUDI2_QUEUE_ID_DCORE3_TPC_1_0,
1463 [TPC_ID_DCORE3_TPC2] = GAUDI2_QUEUE_ID_DCORE3_TPC_2_0,
1464 [TPC_ID_DCORE3_TPC3] = GAUDI2_QUEUE_ID_DCORE3_TPC_3_0,
1465 [TPC_ID_DCORE3_TPC4] = GAUDI2_QUEUE_ID_DCORE3_TPC_4_0,
1466 [TPC_ID_DCORE3_TPC5] = GAUDI2_QUEUE_ID_DCORE3_TPC_5_0,
1467 [TPC_ID_DCORE0_TPC6] = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0,
1468 };
1469
1470 static const u32 gaudi2_rot_id_to_queue_id[ROTATOR_ID_SIZE] = {
1471 [ROTATOR_ID_0] = GAUDI2_QUEUE_ID_ROT_0_0,
1472 [ROTATOR_ID_1] = GAUDI2_QUEUE_ID_ROT_1_0,
1473 };
1474
1475 const u32 edma_stream_base[NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES] = {
1476 GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0,
1477 GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0,
1478 GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0,
1479 GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0,
1480 GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0,
1481 GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0,
1482 GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0,
1483 GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0,
1484 };
1485
1486 static const char gaudi2_vdec_irq_name[GAUDI2_VDEC_MSIX_ENTRIES][GAUDI2_MAX_STRING_LEN] = {
1487 "gaudi2 vdec 0_0", "gaudi2 vdec 0_0 abnormal",
1488 "gaudi2 vdec 0_1", "gaudi2 vdec 0_1 abnormal",
1489 "gaudi2 vdec 1_0", "gaudi2 vdec 1_0 abnormal",
1490 "gaudi2 vdec 1_1", "gaudi2 vdec 1_1 abnormal",
1491 "gaudi2 vdec 2_0", "gaudi2 vdec 2_0 abnormal",
1492 "gaudi2 vdec 2_1", "gaudi2 vdec 2_1 abnormal",
1493 "gaudi2 vdec 3_0", "gaudi2 vdec 3_0 abnormal",
1494 "gaudi2 vdec 3_1", "gaudi2 vdec 3_1 abnormal",
1495 "gaudi2 vdec s_0", "gaudi2 vdec s_0 abnormal",
1496 "gaudi2 vdec s_1", "gaudi2 vdec s_1 abnormal"
1497 };
1498
1499 static const u32 rtr_coordinates_to_rtr_id[NUM_OF_RTR_PER_DCORE * NUM_OF_DCORES] = {
1500 RTR_ID_X_Y(2, 4),
1501 RTR_ID_X_Y(3, 4),
1502 RTR_ID_X_Y(4, 4),
1503 RTR_ID_X_Y(5, 4),
1504 RTR_ID_X_Y(6, 4),
1505 RTR_ID_X_Y(7, 4),
1506 RTR_ID_X_Y(8, 4),
1507 RTR_ID_X_Y(9, 4),
1508 RTR_ID_X_Y(10, 4),
1509 RTR_ID_X_Y(11, 4),
1510 RTR_ID_X_Y(12, 4),
1511 RTR_ID_X_Y(13, 4),
1512 RTR_ID_X_Y(14, 4),
1513 RTR_ID_X_Y(15, 4),
1514 RTR_ID_X_Y(16, 4),
1515 RTR_ID_X_Y(17, 4),
1516 RTR_ID_X_Y(2, 11),
1517 RTR_ID_X_Y(3, 11),
1518 RTR_ID_X_Y(4, 11),
1519 RTR_ID_X_Y(5, 11),
1520 RTR_ID_X_Y(6, 11),
1521 RTR_ID_X_Y(7, 11),
1522 RTR_ID_X_Y(8, 11),
1523 RTR_ID_X_Y(9, 11),
1524 RTR_ID_X_Y(0, 0),/* 24 no id */
1525 RTR_ID_X_Y(0, 0),/* 25 no id */
1526 RTR_ID_X_Y(0, 0),/* 26 no id */
1527 RTR_ID_X_Y(0, 0),/* 27 no id */
1528 RTR_ID_X_Y(14, 11),
1529 RTR_ID_X_Y(15, 11),
1530 RTR_ID_X_Y(16, 11),
1531 RTR_ID_X_Y(17, 11)
1532 };
1533
1534 enum rtr_id {
1535 DCORE0_RTR0,
1536 DCORE0_RTR1,
1537 DCORE0_RTR2,
1538 DCORE0_RTR3,
1539 DCORE0_RTR4,
1540 DCORE0_RTR5,
1541 DCORE0_RTR6,
1542 DCORE0_RTR7,
1543 DCORE1_RTR0,
1544 DCORE1_RTR1,
1545 DCORE1_RTR2,
1546 DCORE1_RTR3,
1547 DCORE1_RTR4,
1548 DCORE1_RTR5,
1549 DCORE1_RTR6,
1550 DCORE1_RTR7,
1551 DCORE2_RTR0,
1552 DCORE2_RTR1,
1553 DCORE2_RTR2,
1554 DCORE2_RTR3,
1555 DCORE2_RTR4,
1556 DCORE2_RTR5,
1557 DCORE2_RTR6,
1558 DCORE2_RTR7,
1559 DCORE3_RTR0,
1560 DCORE3_RTR1,
1561 DCORE3_RTR2,
1562 DCORE3_RTR3,
1563 DCORE3_RTR4,
1564 DCORE3_RTR5,
1565 DCORE3_RTR6,
1566 DCORE3_RTR7,
1567 };
1568
1569 static const u32 gaudi2_tpc_initiator_rtr_id[NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1] = {
1570 DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR2, DCORE0_RTR2, DCORE0_RTR3, DCORE0_RTR3,
1571 DCORE1_RTR6, DCORE1_RTR6, DCORE1_RTR5, DCORE1_RTR5, DCORE1_RTR4, DCORE1_RTR4,
1572 DCORE2_RTR3, DCORE2_RTR3, DCORE2_RTR2, DCORE2_RTR2, DCORE2_RTR1, DCORE2_RTR1,
1573 DCORE3_RTR4, DCORE3_RTR4, DCORE3_RTR5, DCORE3_RTR5, DCORE3_RTR6, DCORE3_RTR6,
1574 DCORE0_RTR0
1575 };
1576
1577 static const u32 gaudi2_dec_initiator_rtr_id[NUMBER_OF_DEC] = {
1578 DCORE0_RTR0, DCORE0_RTR0, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0, DCORE2_RTR0,
1579 DCORE3_RTR7, DCORE3_RTR7, DCORE0_RTR0, DCORE0_RTR0
1580 };
1581
1582 static const u32 gaudi2_nic_initiator_rtr_id[NIC_NUMBER_OF_MACROS] = {
1583 DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0,
1584 DCORE2_RTR0, DCORE2_RTR0, DCORE2_RTR0, DCORE3_RTR7, DCORE3_RTR7, DCORE3_RTR7
1585 };
1586
1587 struct sft_info {
1588 u8 interface_id;
1589 u8 dcore_id;
1590 };
1591
1592 static const struct sft_info gaudi2_edma_initiator_sft_id[NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES] = {
1593 {0, 0}, {1, 0}, {0, 1}, {1, 1}, {1, 2}, {1, 3}, {0, 2}, {0, 3},
1594 };
1595
1596 static const u32 gaudi2_pdma_initiator_rtr_id[NUM_OF_PDMA] = {
1597 DCORE0_RTR0, DCORE0_RTR0
1598 };
1599
1600 static const u32 gaudi2_rot_initiator_rtr_id[NUM_OF_ROT] = {
1601 DCORE2_RTR0, DCORE3_RTR7
1602 };
1603
1604 struct mme_initiators_rtr_id {
1605 u32 wap0;
1606 u32 wap1;
1607 u32 write;
1608 u32 read;
1609 u32 sbte0;
1610 u32 sbte1;
1611 u32 sbte2;
1612 u32 sbte3;
1613 u32 sbte4;
1614 };
1615
1616 enum mme_initiators {
1617 MME_WAP0 = 0,
1618 MME_WAP1,
1619 MME_WRITE,
1620 MME_READ,
1621 MME_SBTE0,
1622 MME_SBTE1,
1623 MME_SBTE2,
1624 MME_SBTE3,
1625 MME_SBTE4,
1626 MME_INITIATORS_MAX
1627 };
1628
1629 static const struct mme_initiators_rtr_id
1630 gaudi2_mme_initiator_rtr_id[NUM_OF_MME_PER_DCORE * NUM_OF_DCORES] = {
1631 { .wap0 = 5, .wap1 = 7, .write = 6, .read = 7,
1632 .sbte0 = 7, .sbte1 = 4, .sbte2 = 4, .sbte3 = 5, .sbte4 = 6},
1633 { .wap0 = 10, .wap1 = 8, .write = 9, .read = 8,
1634 .sbte0 = 11, .sbte1 = 11, .sbte2 = 10, .sbte3 = 9, .sbte4 = 8},
1635 { .wap0 = 21, .wap1 = 23, .write = 22, .read = 23,
1636 .sbte0 = 20, .sbte1 = 20, .sbte2 = 21, .sbte3 = 22, .sbte4 = 23},
1637 { .wap0 = 30, .wap1 = 28, .write = 29, .read = 30,
1638 .sbte0 = 31, .sbte1 = 31, .sbte2 = 30, .sbte3 = 29, .sbte4 = 28},
1639 };
1640
1641 enum razwi_event_sources {
1642 RAZWI_TPC,
1643 RAZWI_MME,
1644 RAZWI_EDMA,
1645 RAZWI_PDMA,
1646 RAZWI_NIC,
1647 RAZWI_DEC,
1648 RAZWI_ROT
1649 };
1650
1651 struct hbm_mc_error_causes {
1652 u32 mask;
1653 char cause[50];
1654 };
1655
1656 static struct hbm_mc_error_causes hbm_mc_spi[GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE] = {
1657 {HBM_MC_SPI_TEMP_PIN_CHG_MASK, "temperature pins changed"},
1658 {HBM_MC_SPI_THR_ENG_MASK, "temperature-based throttling engaged"},
1659 {HBM_MC_SPI_THR_DIS_ENG_MASK, "temperature-based throttling disengaged"},
1660 {HBM_MC_SPI_IEEE1500_COMP_MASK, "IEEE1500 op comp"},
1661 {HBM_MC_SPI_IEEE1500_PAUSED_MASK, "IEEE1500 op paused"},
1662 };
1663
1664 static const char * const hbm_mc_sei_cause[GAUDI2_NUM_OF_HBM_SEI_CAUSE] = {
1665 [HBM_SEI_CMD_PARITY_EVEN] = "SEI C/A parity even",
1666 [HBM_SEI_CMD_PARITY_ODD] = "SEI C/A parity odd",
1667 [HBM_SEI_READ_ERR] = "SEI read data error",
1668 [HBM_SEI_WRITE_DATA_PARITY_ERR] = "SEI write data parity error",
1669 [HBM_SEI_CATTRIP] = "SEI CATTRIP asserted",
1670 [HBM_SEI_MEM_BIST_FAIL] = "SEI memory BIST fail",
1671 [HBM_SEI_DFI] = "SEI DFI error",
1672 [HBM_SEI_INV_TEMP_READ_OUT] = "SEI invalid temp read",
1673 [HBM_SEI_BIST_FAIL] = "SEI BIST fail"
1674 };
1675
1676 struct mmu_spi_sei_cause {
1677 char cause[50];
1678 int clear_bit;
1679 };
1680
1681 static const struct mmu_spi_sei_cause gaudi2_mmu_spi_sei[GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE] = {
1682 {"page fault", 1}, /* INTERRUPT_CLR[1] */
1683 {"page access", 1}, /* INTERRUPT_CLR[1] */
1684 {"bypass ddr", 2}, /* INTERRUPT_CLR[2] */
1685 {"multi hit", 2}, /* INTERRUPT_CLR[2] */
1686 {"mmu rei0", -1}, /* no clear register bit */
1687 {"mmu rei1", -1}, /* no clear register bit */
1688 {"stlb rei0", -1}, /* no clear register bit */
1689 {"stlb rei1", -1}, /* no clear register bit */
1690 {"rr privileged write hit", 2}, /* INTERRUPT_CLR[2] */
1691 {"rr privileged read hit", 2}, /* INTERRUPT_CLR[2] */
1692 {"rr secure write hit", 2}, /* INTERRUPT_CLR[2] */
1693 {"rr secure read hit", 2}, /* INTERRUPT_CLR[2] */
1694 {"bist_fail no use", 2}, /* INTERRUPT_CLR[2] */
1695 {"bist_fail no use", 2}, /* INTERRUPT_CLR[2] */
1696 {"bist_fail no use", 2}, /* INTERRUPT_CLR[2] */
1697 {"bist_fail no use", 2}, /* INTERRUPT_CLR[2] */
1698 {"slave error", 16}, /* INTERRUPT_CLR[16] */
1699 {"dec error", 17}, /* INTERRUPT_CLR[17] */
1700 {"burst fifo full", 2} /* INTERRUPT_CLR[2] */
1701 };
1702
1703 struct gaudi2_cache_invld_params {
1704 u64 start_va;
1705 u64 end_va;
1706 u32 inv_start_val;
1707 u32 flags;
1708 bool range_invalidation;
1709 };
1710
1711 struct gaudi2_tpc_idle_data {
1712 struct engines_data *e;
1713 unsigned long *mask;
1714 bool *is_idle;
1715 const char *tpc_fmt;
1716 };
1717
1718 struct gaudi2_tpc_mmu_data {
1719 u32 rw_asid;
1720 };
1721
1722 static s64 gaudi2_state_dump_specs_props[SP_MAX] = {0};
1723
1724 static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val);
1725 static bool gaudi2_is_queue_enabled(struct hl_device *hdev, u32 hw_queue_id);
1726 static bool gaudi2_is_arc_enabled(struct hl_device *hdev, u64 arc_id);
1727 static void gaudi2_clr_arc_id_cap(struct hl_device *hdev, u64 arc_id);
1728 static void gaudi2_set_arc_id_cap(struct hl_device *hdev, u64 arc_id);
1729 static void gaudi2_memset_device_lbw(struct hl_device *hdev, u32 addr, u32 size, u32 val);
1730 static int gaudi2_send_job_to_kdma(struct hl_device *hdev, u64 src_addr, u64 dst_addr, u32 size,
1731 bool is_memset);
1732 static u64 gaudi2_mmu_scramble_addr(struct hl_device *hdev, u64 raw_addr);
1733
gaudi2_init_scrambler_hbm(struct hl_device * hdev)1734 static void gaudi2_init_scrambler_hbm(struct hl_device *hdev)
1735 {
1736
1737 }
1738
gaudi2_get_signal_cb_size(struct hl_device * hdev)1739 static u32 gaudi2_get_signal_cb_size(struct hl_device *hdev)
1740 {
1741 return sizeof(struct packet_msg_short);
1742 }
1743
gaudi2_get_wait_cb_size(struct hl_device * hdev)1744 static u32 gaudi2_get_wait_cb_size(struct hl_device *hdev)
1745 {
1746 return sizeof(struct packet_msg_short) * 4 + sizeof(struct packet_fence);
1747 }
1748
gaudi2_iterate_tpcs(struct hl_device * hdev,struct iterate_module_ctx * ctx)1749 void gaudi2_iterate_tpcs(struct hl_device *hdev, struct iterate_module_ctx *ctx)
1750 {
1751 struct asic_fixed_properties *prop = &hdev->asic_prop;
1752 int dcore, inst, tpc_seq;
1753 u32 offset;
1754
1755 /* init the return code */
1756 ctx->rc = 0;
1757
1758 for (dcore = 0; dcore < NUM_OF_DCORES; dcore++) {
1759 for (inst = 0; inst < NUM_OF_TPC_PER_DCORE; inst++) {
1760 tpc_seq = dcore * NUM_OF_TPC_PER_DCORE + inst;
1761
1762 if (!(prop->tpc_enabled_mask & BIT(tpc_seq)))
1763 continue;
1764
1765 offset = (DCORE_OFFSET * dcore) + (DCORE_TPC_OFFSET * inst);
1766
1767 ctx->fn(hdev, dcore, inst, offset, ctx);
1768 if (ctx->rc) {
1769 dev_err(hdev->dev, "TPC iterator failed for DCORE%d TPC%d\n",
1770 dcore, inst);
1771 return;
1772 }
1773 }
1774 }
1775
1776 if (!(prop->tpc_enabled_mask & BIT(TPC_ID_DCORE0_TPC6)))
1777 return;
1778
1779 /* special check for PCI TPC (DCORE0_TPC6) */
1780 offset = DCORE_TPC_OFFSET * (NUM_DCORE0_TPC - 1);
1781 ctx->fn(hdev, 0, NUM_DCORE0_TPC - 1, offset, ctx);
1782 if (ctx->rc)
1783 dev_err(hdev->dev, "TPC iterator failed for DCORE0 TPC6\n");
1784 }
1785
gaudi2_host_phys_addr_valid(u64 addr)1786 static bool gaudi2_host_phys_addr_valid(u64 addr)
1787 {
1788 if ((addr < HOST_PHYS_BASE_0 + HOST_PHYS_SIZE_0) || (addr >= HOST_PHYS_BASE_1))
1789 return true;
1790
1791 return false;
1792 }
1793
set_number_of_functional_hbms(struct hl_device * hdev)1794 static int set_number_of_functional_hbms(struct hl_device *hdev)
1795 {
1796 struct asic_fixed_properties *prop = &hdev->asic_prop;
1797 u8 faulty_hbms = hweight64(hdev->dram_binning);
1798
1799 /* check if all HBMs should be used */
1800 if (!faulty_hbms) {
1801 dev_dbg(hdev->dev, "All HBM are in use (no binning)\n");
1802 prop->num_functional_hbms = GAUDI2_HBM_NUM;
1803 return 0;
1804 }
1805
1806 /*
1807 * check for error condition in which number of binning
1808 * candidates is higher than the maximum supported by the
1809 * driver (in which case binning mask shall be ignored and driver will
1810 * set the default)
1811 */
1812 if (faulty_hbms > MAX_FAULTY_HBMS) {
1813 dev_err(hdev->dev,
1814 "HBM binning supports max of %d faulty HBMs, supplied mask 0x%llx.\n",
1815 MAX_FAULTY_HBMS, hdev->dram_binning);
1816 return -EINVAL;
1817 }
1818
1819 /*
1820 * by default, number of functional HBMs in Gaudi2 is always
1821 * GAUDI2_HBM_NUM - 1.
1822 */
1823 prop->num_functional_hbms = GAUDI2_HBM_NUM - faulty_hbms;
1824 return 0;
1825 }
1826
gaudi2_set_dram_properties(struct hl_device * hdev)1827 static int gaudi2_set_dram_properties(struct hl_device *hdev)
1828 {
1829 struct asic_fixed_properties *prop = &hdev->asic_prop;
1830 u32 basic_hbm_page_size;
1831 int rc;
1832
1833 rc = set_number_of_functional_hbms(hdev);
1834 if (rc)
1835 return -EINVAL;
1836
1837 /*
1838 * Due to HW bug in which TLB size is x16 smaller than expected we use a workaround
1839 * in which we are using x16 bigger page size to be able to populate the entire
1840 * HBM mappings in the TLB
1841 */
1842 basic_hbm_page_size = prop->num_functional_hbms * SZ_8M;
1843 prop->dram_page_size = GAUDI2_COMPENSATE_TLB_PAGE_SIZE_FACTOR * basic_hbm_page_size;
1844 prop->device_mem_alloc_default_page_size = prop->dram_page_size;
1845 prop->dram_size = prop->num_functional_hbms * SZ_16G;
1846 prop->dram_base_address = DRAM_PHYS_BASE;
1847 prop->dram_end_address = prop->dram_base_address + prop->dram_size;
1848 prop->dram_supports_virtual_memory = true;
1849
1850 prop->dram_user_base_address = DRAM_PHYS_BASE + prop->dram_page_size;
1851 prop->dram_hints_align_mask = ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK;
1852 prop->hints_dram_reserved_va_range.start_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HBM_START;
1853 prop->hints_dram_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HBM_END;
1854
1855 /* since DRAM page size differs from DMMU page size we need to allocate
1856 * DRAM memory in units of dram_page size and mapping this memory in
1857 * units of DMMU page size. we overcome this size mismatch using a
1858 * scrambling routine which takes a DRAM page and converts it to a DMMU
1859 * page.
1860 * We therefore:
1861 * 1. partition the virtual address space to DRAM-page (whole) pages.
1862 * (suppose we get n such pages)
1863 * 2. limit the amount of virtual address space we got from 1 above to
1864 * a multiple of 64M as we don't want the scrambled address to cross
1865 * the DRAM virtual address space.
1866 * ( m = (n * DRAM_page_size) / DMMU_page_size).
1867 * 3. determine the and address accordingly
1868 * end_addr = start_addr + m * 48M
1869 *
1870 * the DRAM address MSBs (63:48) are not part of the roundup calculation
1871 */
1872 prop->dmmu.start_addr = prop->dram_base_address +
1873 (prop->dram_page_size *
1874 DIV_ROUND_UP_SECTOR_T(prop->dram_size, prop->dram_page_size));
1875
1876 prop->dmmu.end_addr = prop->dmmu.start_addr + prop->dram_page_size *
1877 div_u64((VA_HBM_SPACE_END - prop->dmmu.start_addr), prop->dmmu.page_size);
1878
1879 return 0;
1880 }
1881
gaudi2_set_fixed_properties(struct hl_device * hdev)1882 static int gaudi2_set_fixed_properties(struct hl_device *hdev)
1883 {
1884 struct asic_fixed_properties *prop = &hdev->asic_prop;
1885 struct hw_queue_properties *q_props;
1886 u32 num_sync_stream_queues = 0;
1887 int i;
1888
1889 prop->max_queues = GAUDI2_QUEUE_ID_SIZE;
1890 prop->hw_queues_props = kcalloc(prop->max_queues, sizeof(struct hw_queue_properties),
1891 GFP_KERNEL);
1892
1893 if (!prop->hw_queues_props)
1894 return -ENOMEM;
1895
1896 q_props = prop->hw_queues_props;
1897
1898 for (i = 0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i++) {
1899 q_props[i].type = QUEUE_TYPE_HW;
1900 q_props[i].driver_only = 0;
1901
1902 if (i >= GAUDI2_QUEUE_ID_NIC_0_0 && i <= GAUDI2_QUEUE_ID_NIC_23_3) {
1903 q_props[i].supports_sync_stream = 0;
1904 } else {
1905 q_props[i].supports_sync_stream = 1;
1906 num_sync_stream_queues++;
1907 }
1908
1909 q_props[i].cb_alloc_flags = CB_ALLOC_USER;
1910 }
1911
1912 q_props[GAUDI2_QUEUE_ID_CPU_PQ].type = QUEUE_TYPE_CPU;
1913 q_props[GAUDI2_QUEUE_ID_CPU_PQ].driver_only = 1;
1914 q_props[GAUDI2_QUEUE_ID_CPU_PQ].cb_alloc_flags = CB_ALLOC_KERNEL;
1915
1916 prop->cache_line_size = DEVICE_CACHE_LINE_SIZE;
1917 prop->cfg_base_address = CFG_BASE;
1918 prop->device_dma_offset_for_host_access = HOST_PHYS_BASE_0;
1919 prop->host_base_address = HOST_PHYS_BASE_0;
1920 prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE_0;
1921 prop->max_pending_cs = GAUDI2_MAX_PENDING_CS;
1922 prop->completion_queues_count = GAUDI2_RESERVED_CQ_NUMBER;
1923 prop->user_dec_intr_count = NUMBER_OF_DEC;
1924 prop->user_interrupt_count = GAUDI2_IRQ_NUM_USER_LAST - GAUDI2_IRQ_NUM_USER_FIRST + 1;
1925 prop->completion_mode = HL_COMPLETION_MODE_CS;
1926 prop->sync_stream_first_sob = GAUDI2_RESERVED_SOB_NUMBER;
1927 prop->sync_stream_first_mon = GAUDI2_RESERVED_MON_NUMBER;
1928
1929 prop->sram_base_address = SRAM_BASE_ADDR;
1930 prop->sram_size = SRAM_SIZE;
1931 prop->sram_end_address = prop->sram_base_address + prop->sram_size;
1932 prop->sram_user_base_address = prop->sram_base_address + SRAM_USER_BASE_OFFSET;
1933
1934 prop->hints_range_reservation = true;
1935
1936 if (hdev->pldm)
1937 prop->mmu_pgt_size = 0x800000; /* 8MB */
1938 else
1939 prop->mmu_pgt_size = MMU_PAGE_TABLES_INITIAL_SIZE;
1940
1941 prop->mmu_pte_size = HL_PTE_SIZE;
1942 prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
1943 prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
1944
1945 prop->dmmu.hop_shifts[MMU_HOP0] = DHOP0_SHIFT;
1946 prop->dmmu.hop_shifts[MMU_HOP1] = DHOP1_SHIFT;
1947 prop->dmmu.hop_shifts[MMU_HOP2] = DHOP2_SHIFT;
1948 prop->dmmu.hop_shifts[MMU_HOP3] = DHOP3_SHIFT;
1949 prop->dmmu.hop_shifts[MMU_HOP4] = DHOP4_SHIFT;
1950 prop->dmmu.hop_masks[MMU_HOP0] = DHOP0_MASK;
1951 prop->dmmu.hop_masks[MMU_HOP1] = DHOP1_MASK;
1952 prop->dmmu.hop_masks[MMU_HOP2] = DHOP2_MASK;
1953 prop->dmmu.hop_masks[MMU_HOP3] = DHOP3_MASK;
1954 prop->dmmu.hop_masks[MMU_HOP4] = DHOP4_MASK;
1955 prop->dmmu.page_size = PAGE_SIZE_1GB;
1956 prop->dmmu.num_hops = MMU_ARCH_6_HOPS;
1957 prop->dmmu.last_mask = LAST_MASK;
1958 prop->dmmu.host_resident = 1;
1959 /* TODO: will be duplicated until implementing per-MMU props */
1960 prop->dmmu.hop_table_size = prop->mmu_hop_table_size;
1961 prop->dmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
1962
1963 /*
1964 * this is done in order to be able to validate FW descriptor (i.e. validating that
1965 * the addresses and allocated space for FW image does not cross memory bounds).
1966 * for this reason we set the DRAM size to the minimum possible and later it will
1967 * be modified according to what reported in the cpucp info packet
1968 */
1969 prop->dram_size = (GAUDI2_HBM_NUM - 1) * SZ_16G;
1970
1971 hdev->pmmu_huge_range = true;
1972 prop->pmmu.host_resident = 1;
1973 prop->pmmu.num_hops = MMU_ARCH_6_HOPS;
1974 prop->pmmu.last_mask = LAST_MASK;
1975 /* TODO: will be duplicated until implementing per-MMU props */
1976 prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
1977 prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
1978
1979 prop->hints_host_reserved_va_range.start_addr = RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START;
1980 prop->hints_host_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HOST_END;
1981 prop->hints_host_hpage_reserved_va_range.start_addr =
1982 RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_START;
1983 prop->hints_host_hpage_reserved_va_range.end_addr =
1984 RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_END;
1985
1986 if (PAGE_SIZE == SZ_64K) {
1987 prop->pmmu.hop_shifts[MMU_HOP0] = HOP0_SHIFT_64K;
1988 prop->pmmu.hop_shifts[MMU_HOP1] = HOP1_SHIFT_64K;
1989 prop->pmmu.hop_shifts[MMU_HOP2] = HOP2_SHIFT_64K;
1990 prop->pmmu.hop_shifts[MMU_HOP3] = HOP3_SHIFT_64K;
1991 prop->pmmu.hop_shifts[MMU_HOP4] = HOP4_SHIFT_64K;
1992 prop->pmmu.hop_shifts[MMU_HOP5] = HOP5_SHIFT_64K;
1993 prop->pmmu.hop_masks[MMU_HOP0] = HOP0_MASK_64K;
1994 prop->pmmu.hop_masks[MMU_HOP1] = HOP1_MASK_64K;
1995 prop->pmmu.hop_masks[MMU_HOP2] = HOP2_MASK_64K;
1996 prop->pmmu.hop_masks[MMU_HOP3] = HOP3_MASK_64K;
1997 prop->pmmu.hop_masks[MMU_HOP4] = HOP4_MASK_64K;
1998 prop->pmmu.hop_masks[MMU_HOP5] = HOP5_MASK_64K;
1999 prop->pmmu.start_addr = VA_HOST_SPACE_PAGE_START;
2000 prop->pmmu.end_addr = VA_HOST_SPACE_PAGE_END;
2001 prop->pmmu.page_size = PAGE_SIZE_64KB;
2002
2003 /* shifts and masks are the same in PMMU and HPMMU */
2004 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
2005 prop->pmmu_huge.page_size = PAGE_SIZE_16MB;
2006 prop->pmmu_huge.start_addr = VA_HOST_SPACE_HPAGE_START;
2007 prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END;
2008 } else {
2009 prop->pmmu.hop_shifts[MMU_HOP0] = HOP0_SHIFT_4K;
2010 prop->pmmu.hop_shifts[MMU_HOP1] = HOP1_SHIFT_4K;
2011 prop->pmmu.hop_shifts[MMU_HOP2] = HOP2_SHIFT_4K;
2012 prop->pmmu.hop_shifts[MMU_HOP3] = HOP3_SHIFT_4K;
2013 prop->pmmu.hop_shifts[MMU_HOP4] = HOP4_SHIFT_4K;
2014 prop->pmmu.hop_shifts[MMU_HOP5] = HOP5_SHIFT_4K;
2015 prop->pmmu.hop_masks[MMU_HOP0] = HOP0_MASK_4K;
2016 prop->pmmu.hop_masks[MMU_HOP1] = HOP1_MASK_4K;
2017 prop->pmmu.hop_masks[MMU_HOP2] = HOP2_MASK_4K;
2018 prop->pmmu.hop_masks[MMU_HOP3] = HOP3_MASK_4K;
2019 prop->pmmu.hop_masks[MMU_HOP4] = HOP4_MASK_4K;
2020 prop->pmmu.hop_masks[MMU_HOP5] = HOP5_MASK_4K;
2021 prop->pmmu.start_addr = VA_HOST_SPACE_PAGE_START;
2022 prop->pmmu.end_addr = VA_HOST_SPACE_PAGE_END;
2023 prop->pmmu.page_size = PAGE_SIZE_4KB;
2024
2025 /* shifts and masks are the same in PMMU and HPMMU */
2026 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
2027 prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
2028 prop->pmmu_huge.start_addr = VA_HOST_SPACE_HPAGE_START;
2029 prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END;
2030 }
2031
2032 prop->num_engine_cores = CPU_ID_MAX;
2033 prop->cfg_size = CFG_SIZE;
2034 prop->max_asid = MAX_ASID;
2035 prop->num_of_events = GAUDI2_EVENT_SIZE;
2036
2037 prop->dc_power_default = DC_POWER_DEFAULT;
2038
2039 prop->cb_pool_cb_cnt = GAUDI2_CB_POOL_CB_CNT;
2040 prop->cb_pool_cb_size = GAUDI2_CB_POOL_CB_SIZE;
2041 prop->pcie_dbi_base_address = CFG_BASE + mmPCIE_DBI_BASE;
2042 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
2043
2044 strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN);
2045
2046 prop->mme_master_slave_mode = 1;
2047
2048 prop->first_available_user_sob[0] = GAUDI2_RESERVED_SOB_NUMBER +
2049 (num_sync_stream_queues * HL_RSVD_SOBS);
2050
2051 prop->first_available_user_mon[0] = GAUDI2_RESERVED_MON_NUMBER +
2052 (num_sync_stream_queues * HL_RSVD_MONS);
2053
2054 prop->first_available_user_interrupt = GAUDI2_IRQ_NUM_USER_FIRST;
2055
2056 prop->first_available_cq[0] = GAUDI2_RESERVED_CQ_NUMBER;
2057
2058 prop->fw_cpu_boot_dev_sts0_valid = false;
2059 prop->fw_cpu_boot_dev_sts1_valid = false;
2060 prop->hard_reset_done_by_fw = false;
2061 prop->gic_interrupts_enable = true;
2062
2063 prop->server_type = HL_SERVER_TYPE_UNKNOWN;
2064
2065 prop->max_dec = NUMBER_OF_DEC;
2066
2067 prop->clk_pll_index = HL_GAUDI2_MME_PLL;
2068
2069 prop->dma_mask = 64;
2070
2071 return 0;
2072 }
2073
gaudi2_pci_bars_map(struct hl_device * hdev)2074 static int gaudi2_pci_bars_map(struct hl_device *hdev)
2075 {
2076 static const char * const name[] = {"CFG_SRAM", "MSIX", "DRAM"};
2077 bool is_wc[3] = {false, false, true};
2078 int rc;
2079
2080 rc = hl_pci_bars_map(hdev, name, is_wc);
2081 if (rc)
2082 return rc;
2083
2084 hdev->rmmio = hdev->pcie_bar[SRAM_CFG_BAR_ID] + (CFG_BASE - STM_FLASH_BASE_ADDR);
2085
2086 return 0;
2087 }
2088
gaudi2_set_hbm_bar_base(struct hl_device * hdev,u64 addr)2089 static u64 gaudi2_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
2090 {
2091 struct gaudi2_device *gaudi2 = hdev->asic_specific;
2092 struct hl_inbound_pci_region pci_region;
2093 u64 old_addr = addr;
2094 int rc;
2095
2096 if ((gaudi2) && (gaudi2->dram_bar_cur_addr == addr))
2097 return old_addr;
2098
2099 if (hdev->asic_prop.iatu_done_by_fw)
2100 return U64_MAX;
2101
2102 /* Inbound Region 2 - Bar 4 - Point to DRAM */
2103 pci_region.mode = PCI_BAR_MATCH_MODE;
2104 pci_region.bar = DRAM_BAR_ID;
2105 pci_region.addr = addr;
2106 rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
2107 if (rc)
2108 return U64_MAX;
2109
2110 if (gaudi2) {
2111 old_addr = gaudi2->dram_bar_cur_addr;
2112 gaudi2->dram_bar_cur_addr = addr;
2113 }
2114
2115 return old_addr;
2116 }
2117
gaudi2_init_iatu(struct hl_device * hdev)2118 static int gaudi2_init_iatu(struct hl_device *hdev)
2119 {
2120 struct hl_inbound_pci_region inbound_region;
2121 struct hl_outbound_pci_region outbound_region;
2122 u32 bar_addr_low, bar_addr_high;
2123 int rc;
2124
2125 if (hdev->asic_prop.iatu_done_by_fw)
2126 return 0;
2127
2128 /* Temporary inbound Region 0 - Bar 0 - Point to CFG
2129 * We must map this region in BAR match mode in order to
2130 * fetch BAR physical base address
2131 */
2132 inbound_region.mode = PCI_BAR_MATCH_MODE;
2133 inbound_region.bar = SRAM_CFG_BAR_ID;
2134 /* Base address must be aligned to Bar size which is 256 MB */
2135 inbound_region.addr = STM_FLASH_BASE_ADDR - STM_FLASH_ALIGNED_OFF;
2136 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
2137 if (rc)
2138 return rc;
2139
2140 /* Fetch physical BAR address */
2141 bar_addr_high = RREG32(mmPCIE_DBI_BAR1_REG + STM_FLASH_ALIGNED_OFF);
2142 bar_addr_low = RREG32(mmPCIE_DBI_BAR0_REG + STM_FLASH_ALIGNED_OFF) & ~0xF;
2143
2144 hdev->pcie_bar_phys[SRAM_CFG_BAR_ID] = (u64)bar_addr_high << 32 | bar_addr_low;
2145
2146 /* Inbound Region 0 - Bar 0 - Point to CFG */
2147 inbound_region.mode = PCI_ADDRESS_MATCH_MODE;
2148 inbound_region.bar = SRAM_CFG_BAR_ID;
2149 inbound_region.offset_in_bar = 0;
2150 inbound_region.addr = STM_FLASH_BASE_ADDR;
2151 inbound_region.size = CFG_REGION_SIZE;
2152 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
2153 if (rc)
2154 return rc;
2155
2156 /* Inbound Region 1 - Bar 0 - Point to BAR0_RESERVED + SRAM */
2157 inbound_region.mode = PCI_ADDRESS_MATCH_MODE;
2158 inbound_region.bar = SRAM_CFG_BAR_ID;
2159 inbound_region.offset_in_bar = CFG_REGION_SIZE;
2160 inbound_region.addr = BAR0_RSRVD_BASE_ADDR;
2161 inbound_region.size = BAR0_RSRVD_SIZE + SRAM_SIZE;
2162 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
2163 if (rc)
2164 return rc;
2165
2166 /* Inbound Region 2 - Bar 4 - Point to DRAM */
2167 inbound_region.mode = PCI_BAR_MATCH_MODE;
2168 inbound_region.bar = DRAM_BAR_ID;
2169 inbound_region.addr = DRAM_PHYS_BASE;
2170 rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
2171 if (rc)
2172 return rc;
2173
2174 /* Outbound Region 0 - Point to Host */
2175 outbound_region.addr = HOST_PHYS_BASE_0;
2176 outbound_region.size = HOST_PHYS_SIZE_0;
2177 rc = hl_pci_set_outbound_region(hdev, &outbound_region);
2178
2179 return rc;
2180 }
2181
gaudi2_get_hw_state(struct hl_device * hdev)2182 static enum hl_device_hw_state gaudi2_get_hw_state(struct hl_device *hdev)
2183 {
2184 return RREG32(mmHW_STATE);
2185 }
2186
gaudi2_tpc_binning_init_prop(struct hl_device * hdev)2187 static int gaudi2_tpc_binning_init_prop(struct hl_device *hdev)
2188 {
2189 struct asic_fixed_properties *prop = &hdev->asic_prop;
2190
2191 /*
2192 * check for error condition in which number of binning candidates
2193 * is higher than the maximum supported by the driver
2194 */
2195 if (hweight64(hdev->tpc_binning) > MAX_CLUSTER_BINNING_FAULTY_TPCS) {
2196 dev_err(hdev->dev, "TPC binning is supported for max of %d faulty TPCs, provided mask 0x%llx\n",
2197 MAX_CLUSTER_BINNING_FAULTY_TPCS,
2198 hdev->tpc_binning);
2199 return -EINVAL;
2200 }
2201
2202 prop->tpc_binning_mask = hdev->tpc_binning;
2203 prop->tpc_enabled_mask = GAUDI2_TPC_FULL_MASK;
2204
2205 return 0;
2206 }
2207
gaudi2_set_tpc_binning_masks(struct hl_device * hdev)2208 static int gaudi2_set_tpc_binning_masks(struct hl_device *hdev)
2209 {
2210 struct asic_fixed_properties *prop = &hdev->asic_prop;
2211 struct hw_queue_properties *q_props = prop->hw_queues_props;
2212 u64 tpc_binning_mask;
2213 u8 subst_idx = 0;
2214 int i, rc;
2215
2216 rc = gaudi2_tpc_binning_init_prop(hdev);
2217 if (rc)
2218 return rc;
2219
2220 tpc_binning_mask = prop->tpc_binning_mask;
2221
2222 for (i = 0 ; i < MAX_FAULTY_TPCS ; i++) {
2223 u8 subst_seq, binned, qid_base;
2224
2225 if (tpc_binning_mask == 0)
2226 break;
2227
2228 if (subst_idx == 0) {
2229 subst_seq = TPC_ID_DCORE0_TPC6;
2230 qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0;
2231 } else {
2232 subst_seq = TPC_ID_DCORE3_TPC5;
2233 qid_base = GAUDI2_QUEUE_ID_DCORE3_TPC_5_0;
2234 }
2235
2236
2237 /* clear bit from mask */
2238 binned = __ffs(tpc_binning_mask);
2239 /*
2240 * Coverity complains about possible out-of-bound access in
2241 * clear_bit
2242 */
2243 if (binned >= TPC_ID_SIZE) {
2244 dev_err(hdev->dev,
2245 "Invalid binned TPC (binning mask: %llx)\n",
2246 tpc_binning_mask);
2247 return -EINVAL;
2248 }
2249 clear_bit(binned, (unsigned long *)&tpc_binning_mask);
2250
2251 /* also clear replacing TPC bit from enabled mask */
2252 clear_bit(subst_seq, (unsigned long *)&prop->tpc_enabled_mask);
2253
2254 /* bin substite TPC's Qs */
2255 q_props[qid_base].binned = 1;
2256 q_props[qid_base + 1].binned = 1;
2257 q_props[qid_base + 2].binned = 1;
2258 q_props[qid_base + 3].binned = 1;
2259
2260 subst_idx++;
2261 }
2262
2263 return 0;
2264 }
2265
gaudi2_set_dec_binning_masks(struct hl_device * hdev)2266 static int gaudi2_set_dec_binning_masks(struct hl_device *hdev)
2267 {
2268 struct asic_fixed_properties *prop = &hdev->asic_prop;
2269 u8 num_faulty;
2270
2271 num_faulty = hweight32(hdev->decoder_binning);
2272
2273 /*
2274 * check for error condition in which number of binning candidates
2275 * is higher than the maximum supported by the driver
2276 */
2277 if (num_faulty > MAX_FAULTY_DECODERS) {
2278 dev_err(hdev->dev, "decoder binning is supported for max of single faulty decoder, provided mask 0x%x\n",
2279 hdev->decoder_binning);
2280 return -EINVAL;
2281 }
2282
2283 prop->decoder_binning_mask = (hdev->decoder_binning & GAUDI2_DECODER_FULL_MASK);
2284
2285 if (prop->decoder_binning_mask)
2286 prop->decoder_enabled_mask = (GAUDI2_DECODER_FULL_MASK & ~BIT(DEC_ID_PCIE_VDEC1));
2287 else
2288 prop->decoder_enabled_mask = GAUDI2_DECODER_FULL_MASK;
2289
2290 return 0;
2291 }
2292
gaudi2_set_dram_binning_masks(struct hl_device * hdev)2293 static void gaudi2_set_dram_binning_masks(struct hl_device *hdev)
2294 {
2295 struct asic_fixed_properties *prop = &hdev->asic_prop;
2296
2297 /* check if we should override default binning */
2298 if (!hdev->dram_binning) {
2299 prop->dram_binning_mask = 0;
2300 prop->dram_enabled_mask = GAUDI2_DRAM_FULL_MASK;
2301 return;
2302 }
2303
2304 /* set DRAM binning constraints */
2305 prop->faulty_dram_cluster_map |= hdev->dram_binning;
2306 prop->dram_binning_mask = hdev->dram_binning;
2307 prop->dram_enabled_mask = GAUDI2_DRAM_FULL_MASK & ~BIT(HBM_ID5);
2308 }
2309
gaudi2_set_edma_binning_masks(struct hl_device * hdev)2310 static int gaudi2_set_edma_binning_masks(struct hl_device *hdev)
2311 {
2312 struct asic_fixed_properties *prop = &hdev->asic_prop;
2313 struct hw_queue_properties *q_props;
2314 u8 seq, num_faulty;
2315
2316 num_faulty = hweight32(hdev->edma_binning);
2317
2318 /*
2319 * check for error condition in which number of binning candidates
2320 * is higher than the maximum supported by the driver
2321 */
2322 if (num_faulty > MAX_FAULTY_EDMAS) {
2323 dev_err(hdev->dev,
2324 "EDMA binning is supported for max of single faulty EDMA, provided mask 0x%x\n",
2325 hdev->edma_binning);
2326 return -EINVAL;
2327 }
2328
2329 if (!hdev->edma_binning) {
2330 prop->edma_binning_mask = 0;
2331 prop->edma_enabled_mask = GAUDI2_EDMA_FULL_MASK;
2332 return 0;
2333 }
2334
2335 seq = __ffs((unsigned long)hdev->edma_binning);
2336
2337 /* set binning constraints */
2338 prop->faulty_dram_cluster_map |= BIT(edma_to_hbm_cluster[seq]);
2339 prop->edma_binning_mask = hdev->edma_binning;
2340 prop->edma_enabled_mask = GAUDI2_EDMA_FULL_MASK & ~BIT(EDMA_ID_DCORE3_INSTANCE1);
2341
2342 /* bin substitute EDMA's queue */
2343 q_props = prop->hw_queues_props;
2344 q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0].binned = 1;
2345 q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1].binned = 1;
2346 q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2].binned = 1;
2347 q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3].binned = 1;
2348
2349 return 0;
2350 }
2351
gaudi2_set_xbar_edge_enable_mask(struct hl_device * hdev,u32 xbar_edge_iso_mask)2352 static int gaudi2_set_xbar_edge_enable_mask(struct hl_device *hdev, u32 xbar_edge_iso_mask)
2353 {
2354 struct asic_fixed_properties *prop = &hdev->asic_prop;
2355 u8 num_faulty, seq;
2356
2357 /* check if we should override default binning */
2358 if (!xbar_edge_iso_mask) {
2359 prop->xbar_edge_enabled_mask = GAUDI2_XBAR_EDGE_FULL_MASK;
2360 return 0;
2361 }
2362
2363 /*
2364 * note that it can be set to value other than 0 only after cpucp packet (i.e.
2365 * only the FW can set a redundancy value). for user it'll always be 0.
2366 */
2367 num_faulty = hweight32(xbar_edge_iso_mask);
2368
2369 /*
2370 * check for error condition in which number of binning candidates
2371 * is higher than the maximum supported by the driver
2372 */
2373 if (num_faulty > MAX_FAULTY_XBARS) {
2374 dev_err(hdev->dev, "we cannot have more than %d faulty XBAR EDGE\n",
2375 MAX_FAULTY_XBARS);
2376 return -EINVAL;
2377 }
2378
2379 seq = __ffs((unsigned long)xbar_edge_iso_mask);
2380
2381 /* set binning constraints */
2382 prop->faulty_dram_cluster_map |= BIT(xbar_edge_to_hbm_cluster[seq]);
2383 prop->xbar_edge_enabled_mask = (~xbar_edge_iso_mask) & GAUDI2_XBAR_EDGE_FULL_MASK;
2384
2385 return 0;
2386 }
2387
gaudi2_set_cluster_binning_masks_common(struct hl_device * hdev,u8 xbar_edge_iso_mask)2388 static int gaudi2_set_cluster_binning_masks_common(struct hl_device *hdev, u8 xbar_edge_iso_mask)
2389 {
2390 int rc;
2391
2392 /*
2393 * mark all clusters as good, each component will "fail" cluster
2394 * based on eFuse/user values.
2395 * If more than single cluster is faulty- the chip is unusable
2396 */
2397 hdev->asic_prop.faulty_dram_cluster_map = 0;
2398
2399 gaudi2_set_dram_binning_masks(hdev);
2400
2401 rc = gaudi2_set_edma_binning_masks(hdev);
2402 if (rc)
2403 return rc;
2404
2405 rc = gaudi2_set_xbar_edge_enable_mask(hdev, xbar_edge_iso_mask);
2406 if (rc)
2407 return rc;
2408
2409
2410 /* always initially set to full mask */
2411 hdev->asic_prop.hmmu_hif_enabled_mask = GAUDI2_HIF_HMMU_FULL_MASK;
2412
2413 return 0;
2414 }
2415
gaudi2_set_cluster_binning_masks(struct hl_device * hdev)2416 static int gaudi2_set_cluster_binning_masks(struct hl_device *hdev)
2417 {
2418 struct asic_fixed_properties *prop = &hdev->asic_prop;
2419 int rc;
2420
2421 rc = gaudi2_set_cluster_binning_masks_common(hdev, prop->cpucp_info.xbar_binning_mask);
2422 if (rc)
2423 return rc;
2424
2425 /* if we have DRAM binning reported by FW we should perform cluster config */
2426 if (prop->faulty_dram_cluster_map) {
2427 u8 cluster_seq = __ffs((unsigned long)prop->faulty_dram_cluster_map);
2428
2429 prop->hmmu_hif_enabled_mask = cluster_hmmu_hif_enabled_mask[cluster_seq];
2430 }
2431
2432 return 0;
2433 }
2434
gaudi2_cpucp_info_get(struct hl_device * hdev)2435 static int gaudi2_cpucp_info_get(struct hl_device *hdev)
2436 {
2437 struct gaudi2_device *gaudi2 = hdev->asic_specific;
2438 struct asic_fixed_properties *prop = &hdev->asic_prop;
2439 long max_power;
2440 u64 dram_size;
2441 int rc;
2442
2443 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
2444 return 0;
2445
2446 /* No point of asking this information again when not doing hard reset, as the device
2447 * CPU hasn't been reset
2448 */
2449 if (hdev->reset_info.in_compute_reset)
2450 return 0;
2451
2452 rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
2453 mmCPU_BOOT_ERR1);
2454 if (rc)
2455 return rc;
2456
2457 dram_size = le64_to_cpu(prop->cpucp_info.dram_size);
2458 if (dram_size) {
2459 /* we can have wither 5 or 6 HBMs. other values are invalid */
2460
2461 if ((dram_size != ((GAUDI2_HBM_NUM - 1) * SZ_16G)) &&
2462 (dram_size != (GAUDI2_HBM_NUM * SZ_16G))) {
2463 dev_err(hdev->dev,
2464 "F/W reported invalid DRAM size %llu. Trying to use default size %llu\n",
2465 dram_size, prop->dram_size);
2466 dram_size = prop->dram_size;
2467 }
2468
2469 prop->dram_size = dram_size;
2470 prop->dram_end_address = prop->dram_base_address + dram_size;
2471 }
2472
2473 if (!strlen(prop->cpucp_info.card_name))
2474 strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN);
2475
2476 /* Overwrite binning masks with the actual binning values from F/W */
2477 hdev->dram_binning = prop->cpucp_info.dram_binning_mask;
2478 hdev->edma_binning = prop->cpucp_info.edma_binning_mask;
2479 hdev->tpc_binning = le64_to_cpu(prop->cpucp_info.tpc_binning_mask);
2480 hdev->decoder_binning = lower_32_bits(le64_to_cpu(prop->cpucp_info.decoder_binning_mask));
2481
2482 /*
2483 * at this point the DRAM parameters need to be updated according to data obtained
2484 * from the FW
2485 */
2486 rc = gaudi2_set_dram_properties(hdev);
2487 if (rc)
2488 return rc;
2489
2490 rc = gaudi2_set_cluster_binning_masks(hdev);
2491 if (rc)
2492 return rc;
2493
2494 rc = gaudi2_set_tpc_binning_masks(hdev);
2495 if (rc)
2496 return rc;
2497
2498 rc = gaudi2_set_dec_binning_masks(hdev);
2499 if (rc)
2500 return rc;
2501
2502 max_power = hl_fw_get_max_power(hdev);
2503 if (max_power < 0)
2504 return max_power;
2505
2506 prop->max_power_default = (u64) max_power;
2507
2508 return 0;
2509 }
2510
gaudi2_fetch_psoc_frequency(struct hl_device * hdev)2511 static int gaudi2_fetch_psoc_frequency(struct hl_device *hdev)
2512 {
2513 struct gaudi2_device *gaudi2 = hdev->asic_specific;
2514 u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS];
2515 int rc;
2516
2517 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
2518 return 0;
2519
2520 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI2_CPU_PLL, pll_freq_arr);
2521 if (rc)
2522 return rc;
2523
2524 hdev->asic_prop.psoc_timestamp_frequency = pll_freq_arr[3];
2525
2526 return 0;
2527 }
2528
gaudi2_early_init(struct hl_device * hdev)2529 static int gaudi2_early_init(struct hl_device *hdev)
2530 {
2531 struct asic_fixed_properties *prop = &hdev->asic_prop;
2532 struct pci_dev *pdev = hdev->pdev;
2533 resource_size_t pci_bar_size;
2534 int rc;
2535
2536 rc = gaudi2_set_fixed_properties(hdev);
2537 if (rc)
2538 return rc;
2539
2540 /* Check BAR sizes */
2541 pci_bar_size = pci_resource_len(pdev, SRAM_CFG_BAR_ID);
2542
2543 if (pci_bar_size != CFG_BAR_SIZE) {
2544 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
2545 SRAM_CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE);
2546 rc = -ENODEV;
2547 goto free_queue_props;
2548 }
2549
2550 pci_bar_size = pci_resource_len(pdev, MSIX_BAR_ID);
2551 if (pci_bar_size != MSIX_BAR_SIZE) {
2552 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
2553 MSIX_BAR_ID, &pci_bar_size, MSIX_BAR_SIZE);
2554 rc = -ENODEV;
2555 goto free_queue_props;
2556 }
2557
2558 prop->dram_pci_bar_size = pci_resource_len(pdev, DRAM_BAR_ID);
2559 hdev->dram_pci_bar_start = pci_resource_start(pdev, DRAM_BAR_ID);
2560
2561 /*
2562 * Only in pldm driver config iATU
2563 */
2564 if (hdev->pldm)
2565 hdev->asic_prop.iatu_done_by_fw = false;
2566 else
2567 hdev->asic_prop.iatu_done_by_fw = true;
2568
2569 rc = hl_pci_init(hdev);
2570 if (rc)
2571 goto free_queue_props;
2572
2573 /* Before continuing in the initialization, we need to read the preboot
2574 * version to determine whether we run with a security-enabled firmware
2575 */
2576 rc = hl_fw_read_preboot_status(hdev);
2577 if (rc) {
2578 if (hdev->reset_on_preboot_fail)
2579 hdev->asic_funcs->hw_fini(hdev, true, false);
2580 goto pci_fini;
2581 }
2582
2583 if (gaudi2_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
2584 dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
2585 hdev->asic_funcs->hw_fini(hdev, true, false);
2586 }
2587
2588 return 0;
2589
2590 pci_fini:
2591 hl_pci_fini(hdev);
2592 free_queue_props:
2593 kfree(hdev->asic_prop.hw_queues_props);
2594 return rc;
2595 }
2596
gaudi2_early_fini(struct hl_device * hdev)2597 static int gaudi2_early_fini(struct hl_device *hdev)
2598 {
2599 kfree(hdev->asic_prop.hw_queues_props);
2600 hl_pci_fini(hdev);
2601
2602 return 0;
2603 }
2604
gaudi2_is_arc_nic_owned(u64 arc_id)2605 static bool gaudi2_is_arc_nic_owned(u64 arc_id)
2606 {
2607 switch (arc_id) {
2608 case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
2609 return true;
2610 default:
2611 return false;
2612 }
2613 }
2614
gaudi2_is_arc_tpc_owned(u64 arc_id)2615 static bool gaudi2_is_arc_tpc_owned(u64 arc_id)
2616 {
2617 switch (arc_id) {
2618 case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
2619 return true;
2620 default:
2621 return false;
2622 }
2623 }
2624
gaudi2_init_arcs(struct hl_device * hdev)2625 static void gaudi2_init_arcs(struct hl_device *hdev)
2626 {
2627 struct gaudi2_device *gaudi2 = hdev->asic_specific;
2628 u64 arc_id;
2629 u32 i;
2630
2631 for (i = CPU_ID_SCHED_ARC0 ; i <= CPU_ID_SCHED_ARC3 ; i++) {
2632 if (gaudi2_is_arc_enabled(hdev, i))
2633 continue;
2634
2635 gaudi2_set_arc_id_cap(hdev, i);
2636 }
2637
2638 for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i += 4) {
2639 if (!gaudi2_is_queue_enabled(hdev, i))
2640 continue;
2641
2642 arc_id = gaudi2_queue_id_to_arc_id[i];
2643 if (gaudi2_is_arc_enabled(hdev, arc_id))
2644 continue;
2645
2646 if (gaudi2_is_arc_nic_owned(arc_id) &&
2647 !(hdev->nic_ports_mask & BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0)))
2648 continue;
2649
2650 if (gaudi2_is_arc_tpc_owned(arc_id) && !(gaudi2->tpc_hw_cap_initialized &
2651 BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0)))
2652 continue;
2653
2654 gaudi2_set_arc_id_cap(hdev, arc_id);
2655 }
2656 }
2657
gaudi2_scrub_arc_dccm(struct hl_device * hdev,u32 cpu_id)2658 static int gaudi2_scrub_arc_dccm(struct hl_device *hdev, u32 cpu_id)
2659 {
2660 u32 reg_base, reg_val;
2661 int rc;
2662
2663 switch (cpu_id) {
2664 case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC3:
2665 /* Each ARC scheduler has 2 consecutive DCCM blocks */
2666 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
2667 ARC_DCCM_BLOCK_SIZE * 2, true);
2668 if (rc)
2669 return rc;
2670 break;
2671 case CPU_ID_SCHED_ARC4:
2672 case CPU_ID_SCHED_ARC5:
2673 case CPU_ID_MME_QMAN_ARC0:
2674 case CPU_ID_MME_QMAN_ARC1:
2675 reg_base = gaudi2_arc_blocks_bases[cpu_id];
2676
2677 /* Scrub lower DCCM block */
2678 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
2679 ARC_DCCM_BLOCK_SIZE, true);
2680 if (rc)
2681 return rc;
2682
2683 /* Switch to upper DCCM block */
2684 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_MME_ARC_UPPER_DCCM_EN_VAL_MASK, 1);
2685 WREG32(reg_base + ARC_DCCM_UPPER_EN_OFFSET, reg_val);
2686
2687 /* Scrub upper DCCM block */
2688 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
2689 ARC_DCCM_BLOCK_SIZE, true);
2690 if (rc)
2691 return rc;
2692
2693 /* Switch to lower DCCM block */
2694 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_MME_ARC_UPPER_DCCM_EN_VAL_MASK, 0);
2695 WREG32(reg_base + ARC_DCCM_UPPER_EN_OFFSET, reg_val);
2696 break;
2697 default:
2698 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
2699 ARC_DCCM_BLOCK_SIZE, true);
2700 if (rc)
2701 return rc;
2702 }
2703
2704 return 0;
2705 }
2706
gaudi2_scrub_arcs_dccm(struct hl_device * hdev)2707 static void gaudi2_scrub_arcs_dccm(struct hl_device *hdev)
2708 {
2709 u16 arc_id;
2710
2711 for (arc_id = CPU_ID_SCHED_ARC0 ; arc_id < CPU_ID_MAX ; arc_id++) {
2712 if (!gaudi2_is_arc_enabled(hdev, arc_id))
2713 continue;
2714
2715 gaudi2_scrub_arc_dccm(hdev, arc_id);
2716 }
2717 }
2718
gaudi2_late_init(struct hl_device * hdev)2719 static int gaudi2_late_init(struct hl_device *hdev)
2720 {
2721 struct gaudi2_device *gaudi2 = hdev->asic_specific;
2722 int rc;
2723
2724 hdev->asic_prop.supports_advanced_cpucp_rc = true;
2725
2726 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS,
2727 gaudi2->virt_msix_db_dma_addr);
2728 if (rc) {
2729 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
2730 return rc;
2731 }
2732
2733 rc = gaudi2_fetch_psoc_frequency(hdev);
2734 if (rc) {
2735 dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
2736 goto disable_pci_access;
2737 }
2738
2739 gaudi2_init_arcs(hdev);
2740 gaudi2_scrub_arcs_dccm(hdev);
2741 gaudi2_init_security(hdev);
2742
2743 return 0;
2744
2745 disable_pci_access:
2746 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
2747
2748 return rc;
2749 }
2750
gaudi2_late_fini(struct hl_device * hdev)2751 static void gaudi2_late_fini(struct hl_device *hdev)
2752 {
2753 hl_hwmon_release_resources(hdev);
2754 }
2755
gaudi2_user_mapped_dec_init(struct gaudi2_device * gaudi2,u32 start_idx)2756 static void gaudi2_user_mapped_dec_init(struct gaudi2_device *gaudi2, u32 start_idx)
2757 {
2758 struct user_mapped_block *blocks = gaudi2->mapped_blocks;
2759
2760 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE0_DEC0_CMD_BASE, HL_BLOCK_SIZE);
2761 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE0_DEC1_CMD_BASE, HL_BLOCK_SIZE);
2762 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE1_DEC0_CMD_BASE, HL_BLOCK_SIZE);
2763 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE1_DEC1_CMD_BASE, HL_BLOCK_SIZE);
2764 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE2_DEC0_CMD_BASE, HL_BLOCK_SIZE);
2765 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE2_DEC1_CMD_BASE, HL_BLOCK_SIZE);
2766 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE3_DEC0_CMD_BASE, HL_BLOCK_SIZE);
2767 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE3_DEC1_CMD_BASE, HL_BLOCK_SIZE);
2768 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmPCIE_DEC0_CMD_BASE, HL_BLOCK_SIZE);
2769 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx], mmPCIE_DEC1_CMD_BASE, HL_BLOCK_SIZE);
2770 }
2771
gaudi2_user_mapped_blocks_init(struct hl_device * hdev)2772 static void gaudi2_user_mapped_blocks_init(struct hl_device *hdev)
2773 {
2774 struct gaudi2_device *gaudi2 = hdev->asic_specific;
2775 struct user_mapped_block *blocks = gaudi2->mapped_blocks;
2776 u32 block_size, umr_start_idx, num_umr_blocks;
2777 int i;
2778
2779 for (i = 0 ; i < NUM_ARC_CPUS ; i++) {
2780 if (i >= CPU_ID_SCHED_ARC0 && i <= CPU_ID_SCHED_ARC3)
2781 block_size = ARC_DCCM_BLOCK_SIZE * 2;
2782 else
2783 block_size = ARC_DCCM_BLOCK_SIZE;
2784
2785 blocks[i].address = gaudi2_arc_dccm_bases[i];
2786 blocks[i].size = block_size;
2787 }
2788
2789 blocks[NUM_ARC_CPUS].address = mmARC_FARM_ARC0_ACP_ENG_BASE;
2790 blocks[NUM_ARC_CPUS].size = HL_BLOCK_SIZE;
2791
2792 blocks[NUM_ARC_CPUS + 1].address = mmARC_FARM_ARC1_ACP_ENG_BASE;
2793 blocks[NUM_ARC_CPUS + 1].size = HL_BLOCK_SIZE;
2794
2795 blocks[NUM_ARC_CPUS + 2].address = mmARC_FARM_ARC2_ACP_ENG_BASE;
2796 blocks[NUM_ARC_CPUS + 2].size = HL_BLOCK_SIZE;
2797
2798 blocks[NUM_ARC_CPUS + 3].address = mmARC_FARM_ARC3_ACP_ENG_BASE;
2799 blocks[NUM_ARC_CPUS + 3].size = HL_BLOCK_SIZE;
2800
2801 blocks[NUM_ARC_CPUS + 4].address = mmDCORE0_MME_QM_ARC_ACP_ENG_BASE;
2802 blocks[NUM_ARC_CPUS + 4].size = HL_BLOCK_SIZE;
2803
2804 blocks[NUM_ARC_CPUS + 5].address = mmDCORE1_MME_QM_ARC_ACP_ENG_BASE;
2805 blocks[NUM_ARC_CPUS + 5].size = HL_BLOCK_SIZE;
2806
2807 blocks[NUM_ARC_CPUS + 6].address = mmDCORE2_MME_QM_ARC_ACP_ENG_BASE;
2808 blocks[NUM_ARC_CPUS + 6].size = HL_BLOCK_SIZE;
2809
2810 blocks[NUM_ARC_CPUS + 7].address = mmDCORE3_MME_QM_ARC_ACP_ENG_BASE;
2811 blocks[NUM_ARC_CPUS + 7].size = HL_BLOCK_SIZE;
2812
2813 umr_start_idx = NUM_ARC_CPUS + NUM_OF_USER_ACP_BLOCKS;
2814 num_umr_blocks = NIC_NUMBER_OF_ENGINES * NUM_OF_USER_NIC_UMR_BLOCKS;
2815 for (i = 0 ; i < num_umr_blocks ; i++) {
2816 u8 nic_id, umr_block_id;
2817
2818 nic_id = i / NUM_OF_USER_NIC_UMR_BLOCKS;
2819 umr_block_id = i % NUM_OF_USER_NIC_UMR_BLOCKS;
2820
2821 blocks[umr_start_idx + i].address =
2822 mmNIC0_UMR0_0_UNSECURE_DOORBELL0_BASE +
2823 (nic_id / NIC_NUMBER_OF_QM_PER_MACRO) * NIC_OFFSET +
2824 (nic_id % NIC_NUMBER_OF_QM_PER_MACRO) * NIC_QM_OFFSET +
2825 umr_block_id * NIC_UMR_OFFSET;
2826 blocks[umr_start_idx + i].size = HL_BLOCK_SIZE;
2827 }
2828
2829 /* Expose decoder HW configuration block to user */
2830 gaudi2_user_mapped_dec_init(gaudi2, USR_MAPPED_BLK_DEC_START_IDX);
2831
2832 for (i = 1; i < NUM_OF_DCORES; ++i) {
2833 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1)].size = SM_OBJS_BLOCK_SIZE;
2834 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1) + 1].size = HL_BLOCK_SIZE;
2835
2836 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1)].address =
2837 mmDCORE0_SYNC_MNGR_OBJS_BASE + i * DCORE_OFFSET;
2838
2839 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1) + 1].address =
2840 mmDCORE0_SYNC_MNGR_GLBL_BASE + i * DCORE_OFFSET;
2841 }
2842 }
2843
gaudi2_alloc_cpu_accessible_dma_mem(struct hl_device * hdev)2844 static int gaudi2_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
2845 {
2846 dma_addr_t dma_addr_arr[GAUDI2_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
2847 void *virt_addr_arr[GAUDI2_ALLOC_CPU_MEM_RETRY_CNT] = {};
2848 int i, j, rc = 0;
2849
2850 /* The device ARC works with 32-bits addresses, and because there is a single HW register
2851 * that holds the extension bits (49..28), these bits must be identical in all the allocated
2852 * range.
2853 */
2854
2855 for (i = 0 ; i < GAUDI2_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
2856 virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
2857 &dma_addr_arr[i], GFP_KERNEL | __GFP_ZERO);
2858 if (!virt_addr_arr[i]) {
2859 rc = -ENOMEM;
2860 goto free_dma_mem_arr;
2861 }
2862
2863 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
2864 if (GAUDI2_ARC_PCI_MSB_ADDR(dma_addr_arr[i]) == GAUDI2_ARC_PCI_MSB_ADDR(end_addr))
2865 break;
2866 }
2867
2868 if (i == GAUDI2_ALLOC_CPU_MEM_RETRY_CNT) {
2869 dev_err(hdev->dev,
2870 "MSB of ARC accessible DMA memory are not identical in all range\n");
2871 rc = -EFAULT;
2872 goto free_dma_mem_arr;
2873 }
2874
2875 hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
2876 hdev->cpu_accessible_dma_address = dma_addr_arr[i];
2877
2878 free_dma_mem_arr:
2879 for (j = 0 ; j < i ; j++)
2880 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j],
2881 dma_addr_arr[j]);
2882
2883 return rc;
2884 }
2885
gaudi2_set_pci_memory_regions(struct hl_device * hdev)2886 static void gaudi2_set_pci_memory_regions(struct hl_device *hdev)
2887 {
2888 struct asic_fixed_properties *prop = &hdev->asic_prop;
2889 struct pci_mem_region *region;
2890
2891 /* CFG */
2892 region = &hdev->pci_mem_region[PCI_REGION_CFG];
2893 region->region_base = CFG_BASE;
2894 region->region_size = CFG_SIZE;
2895 region->offset_in_bar = CFG_BASE - STM_FLASH_BASE_ADDR;
2896 region->bar_size = CFG_BAR_SIZE;
2897 region->bar_id = SRAM_CFG_BAR_ID;
2898 region->used = 1;
2899
2900 /* SRAM */
2901 region = &hdev->pci_mem_region[PCI_REGION_SRAM];
2902 region->region_base = SRAM_BASE_ADDR;
2903 region->region_size = SRAM_SIZE;
2904 region->offset_in_bar = CFG_REGION_SIZE + BAR0_RSRVD_SIZE;
2905 region->bar_size = CFG_BAR_SIZE;
2906 region->bar_id = SRAM_CFG_BAR_ID;
2907 region->used = 1;
2908
2909 /* DRAM */
2910 region = &hdev->pci_mem_region[PCI_REGION_DRAM];
2911 region->region_base = DRAM_PHYS_BASE;
2912 region->region_size = hdev->asic_prop.dram_size;
2913 region->offset_in_bar = 0;
2914 region->bar_size = prop->dram_pci_bar_size;
2915 region->bar_id = DRAM_BAR_ID;
2916 region->used = 1;
2917 }
2918
gaudi2_user_interrupt_setup(struct hl_device * hdev)2919 static void gaudi2_user_interrupt_setup(struct hl_device *hdev)
2920 {
2921 struct asic_fixed_properties *prop = &hdev->asic_prop;
2922 int i, j, k;
2923
2924 /* Initialize common user CQ interrupt */
2925 HL_USR_INTR_STRUCT_INIT(hdev->common_user_cq_interrupt, hdev,
2926 HL_COMMON_USER_CQ_INTERRUPT_ID, false);
2927
2928 /* Initialize common decoder interrupt */
2929 HL_USR_INTR_STRUCT_INIT(hdev->common_decoder_interrupt, hdev,
2930 HL_COMMON_DEC_INTERRUPT_ID, true);
2931
2932 /* User interrupts structure holds both decoder and user interrupts from various engines.
2933 * We first initialize the decoder interrupts and then we add the user interrupts.
2934 * The only limitation is that the last decoder interrupt id must be smaller
2935 * then GAUDI2_IRQ_NUM_USER_FIRST. This is checked at compilation time.
2936 */
2937
2938 /* Initialize decoder interrupts, expose only normal interrupts,
2939 * error interrupts to be handled by driver
2940 */
2941 for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM, j = 0 ; i <= GAUDI2_IRQ_NUM_SHARED_DEC1_NRM;
2942 i += 2, j++)
2943 HL_USR_INTR_STRUCT_INIT(hdev->user_interrupt[j], hdev, i, true);
2944
2945 for (i = GAUDI2_IRQ_NUM_USER_FIRST, k = 0 ; k < prop->user_interrupt_count; i++, j++, k++)
2946 HL_USR_INTR_STRUCT_INIT(hdev->user_interrupt[j], hdev, i, false);
2947 }
2948
gaudi2_get_non_zero_random_int(void)2949 static inline int gaudi2_get_non_zero_random_int(void)
2950 {
2951 int rand = get_random_u32();
2952
2953 return rand ? rand : 1;
2954 }
2955
gaudi2_sw_init(struct hl_device * hdev)2956 static int gaudi2_sw_init(struct hl_device *hdev)
2957 {
2958 struct asic_fixed_properties *prop = &hdev->asic_prop;
2959 struct gaudi2_device *gaudi2;
2960 int i, rc;
2961
2962 /* Allocate device structure */
2963 gaudi2 = kzalloc(sizeof(*gaudi2), GFP_KERNEL);
2964 if (!gaudi2)
2965 return -ENOMEM;
2966
2967 for (i = 0 ; i < ARRAY_SIZE(gaudi2_irq_map_table) ; i++) {
2968 if (gaudi2_irq_map_table[i].msg || !gaudi2_irq_map_table[i].valid)
2969 continue;
2970
2971 if (gaudi2->num_of_valid_hw_events == GAUDI2_EVENT_SIZE) {
2972 dev_err(hdev->dev, "H/W events array exceeds the limit of %u events\n",
2973 GAUDI2_EVENT_SIZE);
2974 rc = -EINVAL;
2975 goto free_gaudi2_device;
2976 }
2977
2978 gaudi2->hw_events[gaudi2->num_of_valid_hw_events++] = gaudi2_irq_map_table[i].fc_id;
2979 }
2980
2981 for (i = 0 ; i < MME_NUM_OF_LFSR_SEEDS ; i++)
2982 gaudi2->lfsr_rand_seeds[i] = gaudi2_get_non_zero_random_int();
2983
2984 gaudi2->cpucp_info_get = gaudi2_cpucp_info_get;
2985
2986 hdev->asic_specific = gaudi2;
2987
2988 /* Create DMA pool for small allocations.
2989 * Use DEVICE_CACHE_LINE_SIZE for alignment since the NIC memory-mapped
2990 * PI/CI registers allocated from this pool have this restriction
2991 */
2992 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), &hdev->pdev->dev,
2993 GAUDI2_DMA_POOL_BLK_SIZE, DEVICE_CACHE_LINE_SIZE, 0);
2994 if (!hdev->dma_pool) {
2995 dev_err(hdev->dev, "failed to create DMA pool\n");
2996 rc = -ENOMEM;
2997 goto free_gaudi2_device;
2998 }
2999
3000 rc = gaudi2_alloc_cpu_accessible_dma_mem(hdev);
3001 if (rc)
3002 goto free_dma_pool;
3003
3004 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
3005 if (!hdev->cpu_accessible_dma_pool) {
3006 dev_err(hdev->dev, "Failed to create CPU accessible DMA pool\n");
3007 rc = -ENOMEM;
3008 goto free_cpu_dma_mem;
3009 }
3010
3011 rc = gen_pool_add(hdev->cpu_accessible_dma_pool, (uintptr_t) hdev->cpu_accessible_dma_mem,
3012 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
3013 if (rc) {
3014 dev_err(hdev->dev, "Failed to add memory to CPU accessible DMA pool\n");
3015 rc = -EFAULT;
3016 goto free_cpu_accessible_dma_pool;
3017 }
3018
3019 gaudi2->virt_msix_db_cpu_addr = hl_cpu_accessible_dma_pool_alloc(hdev, prop->pmmu.page_size,
3020 &gaudi2->virt_msix_db_dma_addr);
3021 if (!gaudi2->virt_msix_db_cpu_addr) {
3022 dev_err(hdev->dev, "Failed to allocate DMA memory for virtual MSI-X doorbell\n");
3023 rc = -ENOMEM;
3024 goto free_cpu_accessible_dma_pool;
3025 }
3026
3027 spin_lock_init(&gaudi2->hw_queues_lock);
3028
3029 gaudi2->scratchpad_kernel_address = hl_asic_dma_alloc_coherent(hdev, PAGE_SIZE,
3030 &gaudi2->scratchpad_bus_address,
3031 GFP_KERNEL | __GFP_ZERO);
3032 if (!gaudi2->scratchpad_kernel_address) {
3033 rc = -ENOMEM;
3034 goto free_virt_msix_db_mem;
3035 }
3036
3037 gaudi2_user_mapped_blocks_init(hdev);
3038
3039 /* Initialize user interrupts */
3040 gaudi2_user_interrupt_setup(hdev);
3041
3042 hdev->supports_coresight = true;
3043 hdev->supports_sync_stream = true;
3044 hdev->supports_cb_mapping = true;
3045 hdev->supports_wait_for_multi_cs = false;
3046
3047 prop->supports_compute_reset = true;
3048
3049 hdev->asic_funcs->set_pci_memory_regions(hdev);
3050
3051 return 0;
3052
3053 free_virt_msix_db_mem:
3054 hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr);
3055 free_cpu_accessible_dma_pool:
3056 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
3057 free_cpu_dma_mem:
3058 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
3059 hdev->cpu_accessible_dma_address);
3060 free_dma_pool:
3061 dma_pool_destroy(hdev->dma_pool);
3062 free_gaudi2_device:
3063 kfree(gaudi2);
3064 return rc;
3065 }
3066
gaudi2_sw_fini(struct hl_device * hdev)3067 static int gaudi2_sw_fini(struct hl_device *hdev)
3068 {
3069 struct asic_fixed_properties *prop = &hdev->asic_prop;
3070 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3071
3072 hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr);
3073
3074 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
3075
3076 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
3077 hdev->cpu_accessible_dma_address);
3078
3079 hl_asic_dma_free_coherent(hdev, PAGE_SIZE, gaudi2->scratchpad_kernel_address,
3080 gaudi2->scratchpad_bus_address);
3081
3082 dma_pool_destroy(hdev->dma_pool);
3083
3084 kfree(gaudi2);
3085
3086 return 0;
3087 }
3088
gaudi2_stop_qman_common(struct hl_device * hdev,u32 reg_base)3089 static void gaudi2_stop_qman_common(struct hl_device *hdev, u32 reg_base)
3090 {
3091 WREG32(reg_base + QM_GLBL_CFG1_OFFSET, QM_GLBL_CFG1_PQF_STOP |
3092 QM_GLBL_CFG1_CQF_STOP |
3093 QM_GLBL_CFG1_CP_STOP);
3094
3095 /* stop also the ARC */
3096 WREG32(reg_base + QM_GLBL_CFG2_OFFSET, QM_GLBL_CFG2_ARC_CQF_STOP);
3097 }
3098
gaudi2_flush_qman_common(struct hl_device * hdev,u32 reg_base)3099 static void gaudi2_flush_qman_common(struct hl_device *hdev, u32 reg_base)
3100 {
3101 WREG32(reg_base + QM_GLBL_CFG1_OFFSET, QM_GLBL_CFG1_PQF_FLUSH |
3102 QM_GLBL_CFG1_CQF_FLUSH |
3103 QM_GLBL_CFG1_CP_FLUSH);
3104 }
3105
gaudi2_flush_qman_arc_common(struct hl_device * hdev,u32 reg_base)3106 static void gaudi2_flush_qman_arc_common(struct hl_device *hdev, u32 reg_base)
3107 {
3108 WREG32(reg_base + QM_GLBL_CFG2_OFFSET, QM_GLBL_CFG2_ARC_CQF_FLUSH);
3109 }
3110
3111 /**
3112 * gaudi2_clear_qm_fence_counters_common - clear QM's fence counters
3113 *
3114 * @hdev: pointer to the habanalabs device structure
3115 * @queue_id: queue to clear fence counters to
3116 * @skip_fence: if true set maximum fence value to all fence counters to avoid
3117 * getting stuck on any fence value. otherwise set all fence
3118 * counters to 0 (standard clear of fence counters)
3119 */
gaudi2_clear_qm_fence_counters_common(struct hl_device * hdev,u32 queue_id,bool skip_fence)3120 static void gaudi2_clear_qm_fence_counters_common(struct hl_device *hdev, u32 queue_id,
3121 bool skip_fence)
3122 {
3123 u32 size, reg_base;
3124 u32 addr, val;
3125
3126 reg_base = gaudi2_qm_blocks_bases[queue_id];
3127
3128 addr = reg_base + QM_CP_FENCE0_CNT_0_OFFSET;
3129 size = mmPDMA0_QM_CP_BARRIER_CFG - mmPDMA0_QM_CP_FENCE0_CNT_0;
3130
3131 /*
3132 * in case we want to make sure that QM that is stuck on a fence will
3133 * be released we should set the fence counter to a higher value that
3134 * the value the QM waiting for. to comply with any fence counter of
3135 * any value we set maximum fence value to all counters
3136 */
3137 val = skip_fence ? U32_MAX : 0;
3138 gaudi2_memset_device_lbw(hdev, addr, size, val);
3139 }
3140
gaudi2_qman_manual_flush_common(struct hl_device * hdev,u32 queue_id)3141 static void gaudi2_qman_manual_flush_common(struct hl_device *hdev, u32 queue_id)
3142 {
3143 u32 reg_base = gaudi2_qm_blocks_bases[queue_id];
3144
3145 gaudi2_clear_qm_fence_counters_common(hdev, queue_id, true);
3146 gaudi2_flush_qman_common(hdev, reg_base);
3147 gaudi2_flush_qman_arc_common(hdev, reg_base);
3148 }
3149
gaudi2_stop_dma_qmans(struct hl_device * hdev)3150 static void gaudi2_stop_dma_qmans(struct hl_device *hdev)
3151 {
3152 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3153 int dcore, inst;
3154
3155 if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
3156 goto stop_edma_qmans;
3157
3158 /* Stop CPs of PDMA QMANs */
3159 gaudi2_stop_qman_common(hdev, mmPDMA0_QM_BASE);
3160 gaudi2_stop_qman_common(hdev, mmPDMA1_QM_BASE);
3161
3162 stop_edma_qmans:
3163 if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
3164 return;
3165
3166 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
3167 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
3168 u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
3169 u32 qm_base;
3170
3171 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
3172 continue;
3173
3174 qm_base = mmDCORE0_EDMA0_QM_BASE + dcore * DCORE_OFFSET +
3175 inst * DCORE_EDMA_OFFSET;
3176
3177 /* Stop CPs of EDMA QMANs */
3178 gaudi2_stop_qman_common(hdev, qm_base);
3179 }
3180 }
3181 }
3182
gaudi2_stop_mme_qmans(struct hl_device * hdev)3183 static void gaudi2_stop_mme_qmans(struct hl_device *hdev)
3184 {
3185 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3186 u32 offset, i;
3187
3188 offset = mmDCORE1_MME_QM_BASE - mmDCORE0_MME_QM_BASE;
3189
3190 for (i = 0 ; i < NUM_OF_DCORES ; i++) {
3191 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i)))
3192 continue;
3193
3194 gaudi2_stop_qman_common(hdev, mmDCORE0_MME_QM_BASE + (i * offset));
3195 }
3196 }
3197
gaudi2_stop_tpc_qmans(struct hl_device * hdev)3198 static void gaudi2_stop_tpc_qmans(struct hl_device *hdev)
3199 {
3200 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3201 u32 reg_base;
3202 int i;
3203
3204 if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
3205 return;
3206
3207 for (i = 0 ; i < TPC_ID_SIZE ; i++) {
3208 if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
3209 continue;
3210
3211 reg_base = gaudi2_qm_blocks_bases[gaudi2_tpc_id_to_queue_id[i]];
3212 gaudi2_stop_qman_common(hdev, reg_base);
3213 }
3214 }
3215
gaudi2_stop_rot_qmans(struct hl_device * hdev)3216 static void gaudi2_stop_rot_qmans(struct hl_device *hdev)
3217 {
3218 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3219 u32 reg_base;
3220 int i;
3221
3222 if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
3223 return;
3224
3225 for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
3226 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
3227 continue;
3228
3229 reg_base = gaudi2_qm_blocks_bases[gaudi2_rot_id_to_queue_id[i]];
3230 gaudi2_stop_qman_common(hdev, reg_base);
3231 }
3232 }
3233
gaudi2_stop_nic_qmans(struct hl_device * hdev)3234 static void gaudi2_stop_nic_qmans(struct hl_device *hdev)
3235 {
3236 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3237 u32 reg_base, queue_id;
3238 int i;
3239
3240 if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
3241 return;
3242
3243 queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
3244
3245 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
3246 if (!(hdev->nic_ports_mask & BIT(i)))
3247 continue;
3248
3249 reg_base = gaudi2_qm_blocks_bases[queue_id];
3250 gaudi2_stop_qman_common(hdev, reg_base);
3251 }
3252 }
3253
gaudi2_stall_dma_common(struct hl_device * hdev,u32 reg_base)3254 static void gaudi2_stall_dma_common(struct hl_device *hdev, u32 reg_base)
3255 {
3256 u32 reg_val;
3257
3258 reg_val = FIELD_PREP(PDMA0_CORE_CFG_1_HALT_MASK, 0x1);
3259 WREG32(reg_base + DMA_CORE_CFG_1_OFFSET, reg_val);
3260 }
3261
gaudi2_dma_stall(struct hl_device * hdev)3262 static void gaudi2_dma_stall(struct hl_device *hdev)
3263 {
3264 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3265 int dcore, inst;
3266
3267 if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
3268 goto stall_edma;
3269
3270 gaudi2_stall_dma_common(hdev, mmPDMA0_CORE_BASE);
3271 gaudi2_stall_dma_common(hdev, mmPDMA1_CORE_BASE);
3272
3273 stall_edma:
3274 if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
3275 return;
3276
3277 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
3278 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
3279 u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
3280 u32 core_base;
3281
3282 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
3283 continue;
3284
3285 core_base = mmDCORE0_EDMA0_CORE_BASE + dcore * DCORE_OFFSET +
3286 inst * DCORE_EDMA_OFFSET;
3287
3288 /* Stall CPs of EDMA QMANs */
3289 gaudi2_stall_dma_common(hdev, core_base);
3290 }
3291 }
3292 }
3293
gaudi2_mme_stall(struct hl_device * hdev)3294 static void gaudi2_mme_stall(struct hl_device *hdev)
3295 {
3296 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3297 u32 offset, i;
3298
3299 offset = mmDCORE1_MME_CTRL_LO_QM_STALL - mmDCORE0_MME_CTRL_LO_QM_STALL;
3300
3301 for (i = 0 ; i < NUM_OF_DCORES ; i++)
3302 if (gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i))
3303 WREG32(mmDCORE0_MME_CTRL_LO_QM_STALL + (i * offset), 1);
3304 }
3305
gaudi2_tpc_stall(struct hl_device * hdev)3306 static void gaudi2_tpc_stall(struct hl_device *hdev)
3307 {
3308 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3309 u32 reg_base;
3310 int i;
3311
3312 if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
3313 return;
3314
3315 for (i = 0 ; i < TPC_ID_SIZE ; i++) {
3316 if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
3317 continue;
3318
3319 reg_base = gaudi2_tpc_cfg_blocks_bases[i];
3320 WREG32(reg_base + TPC_CFG_STALL_OFFSET, 1);
3321 }
3322 }
3323
gaudi2_rotator_stall(struct hl_device * hdev)3324 static void gaudi2_rotator_stall(struct hl_device *hdev)
3325 {
3326 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3327 u32 reg_val;
3328 int i;
3329
3330 if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
3331 return;
3332
3333 reg_val = FIELD_PREP(ROT_MSS_HALT_WBC_MASK, 0x1) |
3334 FIELD_PREP(ROT_MSS_HALT_RSB_MASK, 0x1) |
3335 FIELD_PREP(ROT_MSS_HALT_MRSB_MASK, 0x1);
3336
3337 for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
3338 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
3339 continue;
3340
3341 WREG32(mmROT0_MSS_HALT + i * ROT_OFFSET, reg_val);
3342 }
3343 }
3344
gaudi2_disable_qman_common(struct hl_device * hdev,u32 reg_base)3345 static void gaudi2_disable_qman_common(struct hl_device *hdev, u32 reg_base)
3346 {
3347 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, 0);
3348 }
3349
gaudi2_disable_dma_qmans(struct hl_device * hdev)3350 static void gaudi2_disable_dma_qmans(struct hl_device *hdev)
3351 {
3352 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3353 int dcore, inst;
3354
3355 if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
3356 goto stop_edma_qmans;
3357
3358 gaudi2_disable_qman_common(hdev, mmPDMA0_QM_BASE);
3359 gaudi2_disable_qman_common(hdev, mmPDMA1_QM_BASE);
3360
3361 stop_edma_qmans:
3362 if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
3363 return;
3364
3365 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
3366 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
3367 u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
3368 u32 qm_base;
3369
3370 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
3371 continue;
3372
3373 qm_base = mmDCORE0_EDMA0_QM_BASE + dcore * DCORE_OFFSET +
3374 inst * DCORE_EDMA_OFFSET;
3375
3376 /* Disable CPs of EDMA QMANs */
3377 gaudi2_disable_qman_common(hdev, qm_base);
3378 }
3379 }
3380 }
3381
gaudi2_disable_mme_qmans(struct hl_device * hdev)3382 static void gaudi2_disable_mme_qmans(struct hl_device *hdev)
3383 {
3384 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3385 u32 offset, i;
3386
3387 offset = mmDCORE1_MME_QM_BASE - mmDCORE0_MME_QM_BASE;
3388
3389 for (i = 0 ; i < NUM_OF_DCORES ; i++)
3390 if (gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i))
3391 gaudi2_disable_qman_common(hdev, mmDCORE0_MME_QM_BASE + (i * offset));
3392 }
3393
gaudi2_disable_tpc_qmans(struct hl_device * hdev)3394 static void gaudi2_disable_tpc_qmans(struct hl_device *hdev)
3395 {
3396 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3397 u32 reg_base;
3398 int i;
3399
3400 if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
3401 return;
3402
3403 for (i = 0 ; i < TPC_ID_SIZE ; i++) {
3404 if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
3405 continue;
3406
3407 reg_base = gaudi2_qm_blocks_bases[gaudi2_tpc_id_to_queue_id[i]];
3408 gaudi2_disable_qman_common(hdev, reg_base);
3409 }
3410 }
3411
gaudi2_disable_rot_qmans(struct hl_device * hdev)3412 static void gaudi2_disable_rot_qmans(struct hl_device *hdev)
3413 {
3414 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3415 u32 reg_base;
3416 int i;
3417
3418 if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
3419 return;
3420
3421 for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
3422 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
3423 continue;
3424
3425 reg_base = gaudi2_qm_blocks_bases[gaudi2_rot_id_to_queue_id[i]];
3426 gaudi2_disable_qman_common(hdev, reg_base);
3427 }
3428 }
3429
gaudi2_disable_nic_qmans(struct hl_device * hdev)3430 static void gaudi2_disable_nic_qmans(struct hl_device *hdev)
3431 {
3432 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3433 u32 reg_base, queue_id;
3434 int i;
3435
3436 if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
3437 return;
3438
3439 queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
3440
3441 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
3442 if (!(hdev->nic_ports_mask & BIT(i)))
3443 continue;
3444
3445 reg_base = gaudi2_qm_blocks_bases[queue_id];
3446 gaudi2_disable_qman_common(hdev, reg_base);
3447 }
3448 }
3449
gaudi2_enable_timestamp(struct hl_device * hdev)3450 static void gaudi2_enable_timestamp(struct hl_device *hdev)
3451 {
3452 /* Disable the timestamp counter */
3453 WREG32(mmPSOC_TIMESTAMP_BASE, 0);
3454
3455 /* Zero the lower/upper parts of the 64-bit counter */
3456 WREG32(mmPSOC_TIMESTAMP_BASE + 0xC, 0);
3457 WREG32(mmPSOC_TIMESTAMP_BASE + 0x8, 0);
3458
3459 /* Enable the counter */
3460 WREG32(mmPSOC_TIMESTAMP_BASE, 1);
3461 }
3462
gaudi2_disable_timestamp(struct hl_device * hdev)3463 static void gaudi2_disable_timestamp(struct hl_device *hdev)
3464 {
3465 /* Disable the timestamp counter */
3466 WREG32(mmPSOC_TIMESTAMP_BASE, 0);
3467 }
3468
gaudi2_irq_name(u16 irq_number)3469 static const char *gaudi2_irq_name(u16 irq_number)
3470 {
3471 switch (irq_number) {
3472 case GAUDI2_IRQ_NUM_EVENT_QUEUE:
3473 return "gaudi2 cpu eq";
3474 case GAUDI2_IRQ_NUM_COMPLETION:
3475 return "gaudi2 completion";
3476 case GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ... GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM:
3477 return gaudi2_vdec_irq_name[irq_number - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM];
3478 case GAUDI2_IRQ_NUM_USER_FIRST ... GAUDI2_IRQ_NUM_USER_LAST:
3479 return "gaudi2 user completion";
3480 default:
3481 return "invalid";
3482 }
3483 }
3484
gaudi2_dec_disable_msix(struct hl_device * hdev,u32 max_irq_num)3485 static void gaudi2_dec_disable_msix(struct hl_device *hdev, u32 max_irq_num)
3486 {
3487 int i, irq, relative_idx;
3488 struct hl_dec *dec;
3489
3490 for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ; i < max_irq_num ; i++) {
3491 irq = pci_irq_vector(hdev->pdev, i);
3492 relative_idx = i - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM;
3493
3494 dec = hdev->dec + relative_idx / 2;
3495
3496 /* We pass different structures depending on the irq handler. For the abnormal
3497 * interrupt we pass hl_dec and for the regular interrupt we pass the relevant
3498 * user_interrupt entry
3499 */
3500 free_irq(irq, ((relative_idx % 2) ?
3501 (void *) dec :
3502 (void *) &hdev->user_interrupt[dec->core_id]));
3503 }
3504 }
3505
gaudi2_dec_enable_msix(struct hl_device * hdev)3506 static int gaudi2_dec_enable_msix(struct hl_device *hdev)
3507 {
3508 int rc, i, irq_init_cnt, irq, relative_idx;
3509 irq_handler_t irq_handler;
3510 struct hl_dec *dec;
3511
3512 for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM, irq_init_cnt = 0;
3513 i <= GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM;
3514 i++, irq_init_cnt++) {
3515
3516 irq = pci_irq_vector(hdev->pdev, i);
3517 relative_idx = i - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM;
3518
3519 irq_handler = (relative_idx % 2) ?
3520 hl_irq_handler_dec_abnrm :
3521 hl_irq_handler_user_interrupt;
3522
3523 dec = hdev->dec + relative_idx / 2;
3524
3525 /* We pass different structures depending on the irq handler. For the abnormal
3526 * interrupt we pass hl_dec and for the regular interrupt we pass the relevant
3527 * user_interrupt entry
3528 */
3529 rc = request_irq(irq, irq_handler, 0, gaudi2_irq_name(i),
3530 ((relative_idx % 2) ?
3531 (void *) dec :
3532 (void *) &hdev->user_interrupt[dec->core_id]));
3533 if (rc) {
3534 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
3535 goto free_dec_irqs;
3536 }
3537 }
3538
3539 return 0;
3540
3541 free_dec_irqs:
3542 gaudi2_dec_disable_msix(hdev, (GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + irq_init_cnt));
3543 return rc;
3544 }
3545
gaudi2_enable_msix(struct hl_device * hdev)3546 static int gaudi2_enable_msix(struct hl_device *hdev)
3547 {
3548 struct asic_fixed_properties *prop = &hdev->asic_prop;
3549 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3550 int rc, irq, i, j, user_irq_init_cnt;
3551 irq_handler_t irq_handler;
3552 struct hl_cq *cq;
3553
3554 if (gaudi2->hw_cap_initialized & HW_CAP_MSIX)
3555 return 0;
3556
3557 rc = pci_alloc_irq_vectors(hdev->pdev, GAUDI2_MSIX_ENTRIES, GAUDI2_MSIX_ENTRIES,
3558 PCI_IRQ_MSIX);
3559 if (rc < 0) {
3560 dev_err(hdev->dev, "MSI-X: Failed to enable support -- %d/%d\n",
3561 GAUDI2_MSIX_ENTRIES, rc);
3562 return rc;
3563 }
3564
3565 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
3566 cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION];
3567 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi2_irq_name(GAUDI2_IRQ_NUM_COMPLETION), cq);
3568 if (rc) {
3569 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
3570 goto free_irq_vectors;
3571 }
3572
3573 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
3574 rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi2_irq_name(GAUDI2_IRQ_NUM_EVENT_QUEUE),
3575 &hdev->event_queue);
3576 if (rc) {
3577 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
3578 goto free_completion_irq;
3579 }
3580
3581 rc = gaudi2_dec_enable_msix(hdev);
3582 if (rc) {
3583 dev_err(hdev->dev, "Failed to enable decoder IRQ");
3584 goto free_event_irq;
3585 }
3586
3587 for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, user_irq_init_cnt = 0;
3588 user_irq_init_cnt < prop->user_interrupt_count;
3589 i++, j++, user_irq_init_cnt++) {
3590
3591 irq = pci_irq_vector(hdev->pdev, i);
3592 irq_handler = hl_irq_handler_user_interrupt;
3593
3594 rc = request_irq(irq, irq_handler, 0, gaudi2_irq_name(i), &hdev->user_interrupt[j]);
3595 if (rc) {
3596 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
3597 goto free_user_irq;
3598 }
3599 }
3600
3601 gaudi2->hw_cap_initialized |= HW_CAP_MSIX;
3602
3603 return 0;
3604
3605 free_user_irq:
3606 for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count;
3607 i < GAUDI2_IRQ_NUM_USER_FIRST + user_irq_init_cnt ; i++, j++) {
3608
3609 irq = pci_irq_vector(hdev->pdev, i);
3610 free_irq(irq, &hdev->user_interrupt[j]);
3611 }
3612
3613 gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM + 1);
3614
3615 free_event_irq:
3616 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
3617 free_irq(irq, cq);
3618
3619 free_completion_irq:
3620 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
3621 free_irq(irq, cq);
3622
3623 free_irq_vectors:
3624 pci_free_irq_vectors(hdev->pdev);
3625
3626 return rc;
3627 }
3628
gaudi2_sync_irqs(struct hl_device * hdev)3629 static void gaudi2_sync_irqs(struct hl_device *hdev)
3630 {
3631 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3632 int i, j;
3633 int irq;
3634
3635 if (!(gaudi2->hw_cap_initialized & HW_CAP_MSIX))
3636 return;
3637
3638 /* Wait for all pending IRQs to be finished */
3639 synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION));
3640
3641 for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ; i <= GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM ; i++) {
3642 irq = pci_irq_vector(hdev->pdev, i);
3643 synchronize_irq(irq);
3644 }
3645
3646 for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = 0 ; j < hdev->asic_prop.user_interrupt_count;
3647 i++, j++) {
3648 irq = pci_irq_vector(hdev->pdev, i);
3649 synchronize_irq(irq);
3650 }
3651
3652 synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE));
3653 }
3654
gaudi2_disable_msix(struct hl_device * hdev)3655 static void gaudi2_disable_msix(struct hl_device *hdev)
3656 {
3657 struct asic_fixed_properties *prop = &hdev->asic_prop;
3658 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3659 struct hl_cq *cq;
3660 int irq, i, j, k;
3661
3662 if (!(gaudi2->hw_cap_initialized & HW_CAP_MSIX))
3663 return;
3664
3665 gaudi2_sync_irqs(hdev);
3666
3667 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
3668 free_irq(irq, &hdev->event_queue);
3669
3670 gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM + 1);
3671
3672 for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, k = 0;
3673 k < hdev->asic_prop.user_interrupt_count ; i++, j++, k++) {
3674
3675 irq = pci_irq_vector(hdev->pdev, i);
3676 free_irq(irq, &hdev->user_interrupt[j]);
3677 }
3678
3679 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
3680 cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION];
3681 free_irq(irq, cq);
3682
3683 pci_free_irq_vectors(hdev->pdev);
3684
3685 gaudi2->hw_cap_initialized &= ~HW_CAP_MSIX;
3686 }
3687
gaudi2_stop_dcore_dec(struct hl_device * hdev,int dcore_id)3688 static void gaudi2_stop_dcore_dec(struct hl_device *hdev, int dcore_id)
3689 {
3690 u32 reg_val = FIELD_PREP(DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_STOP_MASK, 0x1);
3691 u32 graceful_pend_mask = DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_PEND_MASK;
3692 u32 timeout_usec, dec_id, dec_bit, offset, graceful;
3693 int rc;
3694
3695 if (hdev->pldm)
3696 timeout_usec = GAUDI2_PLDM_VDEC_TIMEOUT_USEC;
3697 else
3698 timeout_usec = GAUDI2_VDEC_TIMEOUT_USEC;
3699
3700 for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
3701 dec_bit = dcore_id * NUM_OF_DEC_PER_DCORE + dec_id;
3702 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
3703 continue;
3704
3705 offset = dcore_id * DCORE_OFFSET + dec_id * DCORE_VDEC_OFFSET;
3706
3707 WREG32(mmDCORE0_DEC0_CMD_SWREG16 + offset, 0);
3708
3709 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_GRACEFUL + offset, reg_val);
3710
3711 /* Wait till all traffic from decoder stops
3712 * before apply core reset.
3713 */
3714 rc = hl_poll_timeout(
3715 hdev,
3716 mmDCORE0_VDEC0_BRDG_CTRL_GRACEFUL + offset,
3717 graceful,
3718 (graceful & graceful_pend_mask),
3719 100,
3720 timeout_usec);
3721 if (rc)
3722 dev_err(hdev->dev,
3723 "Failed to stop traffic from DCORE%d Decoder %d\n",
3724 dcore_id, dec_id);
3725 }
3726 }
3727
gaudi2_stop_pcie_dec(struct hl_device * hdev)3728 static void gaudi2_stop_pcie_dec(struct hl_device *hdev)
3729 {
3730 u32 reg_val = FIELD_PREP(DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_STOP_MASK, 0x1);
3731 u32 graceful_pend_mask = PCIE_VDEC0_BRDG_CTRL_GRACEFUL_PEND_MASK;
3732 u32 timeout_usec, dec_id, dec_bit, offset, graceful;
3733 int rc;
3734
3735 if (hdev->pldm)
3736 timeout_usec = GAUDI2_PLDM_VDEC_TIMEOUT_USEC;
3737 else
3738 timeout_usec = GAUDI2_VDEC_TIMEOUT_USEC;
3739
3740 for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
3741 dec_bit = PCIE_DEC_SHIFT + dec_id;
3742 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
3743 continue;
3744
3745 offset = dec_id * PCIE_VDEC_OFFSET;
3746
3747 WREG32(mmPCIE_DEC0_CMD_SWREG16 + offset, 0);
3748
3749 WREG32(mmPCIE_VDEC0_BRDG_CTRL_GRACEFUL + offset, reg_val);
3750
3751 /* Wait till all traffic from decoder stops
3752 * before apply core reset.
3753 */
3754 rc = hl_poll_timeout(
3755 hdev,
3756 mmPCIE_VDEC0_BRDG_CTRL_GRACEFUL + offset,
3757 graceful,
3758 (graceful & graceful_pend_mask),
3759 100,
3760 timeout_usec);
3761 if (rc)
3762 dev_err(hdev->dev,
3763 "Failed to stop traffic from PCIe Decoder %d\n",
3764 dec_id);
3765 }
3766 }
3767
gaudi2_stop_dec(struct hl_device * hdev)3768 static void gaudi2_stop_dec(struct hl_device *hdev)
3769 {
3770 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3771 int dcore_id;
3772
3773 if ((gaudi2->dec_hw_cap_initialized & HW_CAP_DEC_MASK) == 0)
3774 return;
3775
3776 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
3777 gaudi2_stop_dcore_dec(hdev, dcore_id);
3778
3779 gaudi2_stop_pcie_dec(hdev);
3780 }
3781
gaudi2_set_arc_running_mode(struct hl_device * hdev,u32 cpu_id,u32 run_mode)3782 static void gaudi2_set_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode)
3783 {
3784 u32 reg_base, reg_val;
3785
3786 reg_base = gaudi2_arc_blocks_bases[cpu_id];
3787 if (run_mode == HL_ENGINE_CORE_RUN)
3788 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 1);
3789 else
3790 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_HALT_REQ_MASK, 1);
3791
3792 WREG32(reg_base + ARC_HALT_REQ_OFFSET, reg_val);
3793 }
3794
gaudi2_halt_arcs(struct hl_device * hdev)3795 static void gaudi2_halt_arcs(struct hl_device *hdev)
3796 {
3797 u16 arc_id;
3798
3799 for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++) {
3800 if (gaudi2_is_arc_enabled(hdev, arc_id))
3801 gaudi2_set_arc_running_mode(hdev, arc_id, HL_ENGINE_CORE_HALT);
3802 }
3803 }
3804
gaudi2_verify_arc_running_mode(struct hl_device * hdev,u32 cpu_id,u32 run_mode)3805 static int gaudi2_verify_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode)
3806 {
3807 int rc;
3808 u32 reg_base, val, ack_mask, timeout_usec = 100000;
3809
3810 if (hdev->pldm)
3811 timeout_usec *= 100;
3812
3813 reg_base = gaudi2_arc_blocks_bases[cpu_id];
3814 if (run_mode == HL_ENGINE_CORE_RUN)
3815 ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_RUN_ACK_MASK;
3816 else
3817 ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_HALT_ACK_MASK;
3818
3819 rc = hl_poll_timeout(hdev, reg_base + ARC_HALT_ACK_OFFSET,
3820 val, ((val & ack_mask) == ack_mask),
3821 1000, timeout_usec);
3822
3823 if (!rc) {
3824 /* Clear */
3825 val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 0);
3826 WREG32(reg_base + ARC_HALT_REQ_OFFSET, val);
3827 }
3828
3829 return rc;
3830 }
3831
gaudi2_reset_arcs(struct hl_device * hdev)3832 static void gaudi2_reset_arcs(struct hl_device *hdev)
3833 {
3834 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3835 u16 arc_id;
3836
3837 if (!gaudi2)
3838 return;
3839
3840 for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++)
3841 if (gaudi2_is_arc_enabled(hdev, arc_id))
3842 gaudi2_clr_arc_id_cap(hdev, arc_id);
3843 }
3844
gaudi2_nic_qmans_manual_flush(struct hl_device * hdev)3845 static void gaudi2_nic_qmans_manual_flush(struct hl_device *hdev)
3846 {
3847 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3848 u32 queue_id;
3849 int i;
3850
3851 if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
3852 return;
3853
3854 queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
3855
3856 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
3857 if (!(hdev->nic_ports_mask & BIT(i)))
3858 continue;
3859
3860 gaudi2_qman_manual_flush_common(hdev, queue_id);
3861 }
3862 }
3863
gaudi2_set_engine_cores(struct hl_device * hdev,u32 * core_ids,u32 num_cores,u32 core_command)3864 static int gaudi2_set_engine_cores(struct hl_device *hdev, u32 *core_ids,
3865 u32 num_cores, u32 core_command)
3866 {
3867 int i, rc;
3868
3869
3870 for (i = 0 ; i < num_cores ; i++) {
3871 if (gaudi2_is_arc_enabled(hdev, core_ids[i]))
3872 gaudi2_set_arc_running_mode(hdev, core_ids[i], core_command);
3873 }
3874
3875 for (i = 0 ; i < num_cores ; i++) {
3876 if (gaudi2_is_arc_enabled(hdev, core_ids[i])) {
3877 rc = gaudi2_verify_arc_running_mode(hdev, core_ids[i], core_command);
3878
3879 if (rc) {
3880 dev_err(hdev->dev, "failed to %s arc: %d\n",
3881 (core_command == HL_ENGINE_CORE_HALT) ?
3882 "HALT" : "RUN", core_ids[i]);
3883 return -1;
3884 }
3885 }
3886 }
3887
3888 return 0;
3889 }
3890
gaudi2_halt_engines(struct hl_device * hdev,bool hard_reset,bool fw_reset)3891 static void gaudi2_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
3892 {
3893 u32 wait_timeout_ms;
3894
3895 if (hdev->pldm)
3896 wait_timeout_ms = GAUDI2_PLDM_RESET_WAIT_MSEC;
3897 else
3898 wait_timeout_ms = GAUDI2_RESET_WAIT_MSEC;
3899
3900 if (fw_reset)
3901 goto skip_engines;
3902
3903 gaudi2_stop_dma_qmans(hdev);
3904 gaudi2_stop_mme_qmans(hdev);
3905 gaudi2_stop_tpc_qmans(hdev);
3906 gaudi2_stop_rot_qmans(hdev);
3907 gaudi2_stop_nic_qmans(hdev);
3908 msleep(wait_timeout_ms);
3909
3910 gaudi2_halt_arcs(hdev);
3911 gaudi2_dma_stall(hdev);
3912 gaudi2_mme_stall(hdev);
3913 gaudi2_tpc_stall(hdev);
3914 gaudi2_rotator_stall(hdev);
3915
3916 msleep(wait_timeout_ms);
3917
3918 gaudi2_stop_dec(hdev);
3919
3920 /*
3921 * in case of soft reset do a manual flush for QMANs (currently called
3922 * only for NIC QMANs
3923 */
3924 if (!hard_reset)
3925 gaudi2_nic_qmans_manual_flush(hdev);
3926
3927 gaudi2_disable_dma_qmans(hdev);
3928 gaudi2_disable_mme_qmans(hdev);
3929 gaudi2_disable_tpc_qmans(hdev);
3930 gaudi2_disable_rot_qmans(hdev);
3931 gaudi2_disable_nic_qmans(hdev);
3932 gaudi2_disable_timestamp(hdev);
3933
3934 skip_engines:
3935 if (hard_reset) {
3936 gaudi2_disable_msix(hdev);
3937 return;
3938 }
3939
3940 gaudi2_sync_irqs(hdev);
3941 }
3942
gaudi2_init_firmware_preload_params(struct hl_device * hdev)3943 static void gaudi2_init_firmware_preload_params(struct hl_device *hdev)
3944 {
3945 struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
3946
3947 pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3948 pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
3949 pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
3950 pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
3951 pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
3952 pre_fw_load->wait_for_preboot_timeout = GAUDI2_PREBOOT_REQ_TIMEOUT_USEC;
3953 }
3954
gaudi2_init_firmware_loader(struct hl_device * hdev)3955 static void gaudi2_init_firmware_loader(struct hl_device *hdev)
3956 {
3957 struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3958 struct dynamic_fw_load_mgr *dynamic_loader;
3959 struct cpu_dyn_regs *dyn_regs;
3960
3961 /* fill common fields */
3962 fw_loader->fw_comp_loaded = FW_TYPE_NONE;
3963 fw_loader->boot_fit_img.image_name = GAUDI2_BOOT_FIT_FILE;
3964 fw_loader->linux_img.image_name = GAUDI2_LINUX_FW_FILE;
3965 fw_loader->boot_fit_timeout = GAUDI2_BOOT_FIT_REQ_TIMEOUT_USEC;
3966 fw_loader->skip_bmc = false;
3967 fw_loader->sram_bar_id = SRAM_CFG_BAR_ID;
3968 fw_loader->dram_bar_id = DRAM_BAR_ID;
3969
3970 if (hdev->asic_type == ASIC_GAUDI2 || hdev->asic_type == ASIC_GAUDI2_SEC)
3971 fw_loader->cpu_timeout = GAUDI2_CPU_TIMEOUT_USEC;
3972 else /* ASIC_GAUDI2_FPGA */
3973 fw_loader->cpu_timeout = GAUDI2_FPGA_CPU_TIMEOUT;
3974
3975 /* here we update initial values for few specific dynamic regs (as
3976 * before reading the first descriptor from FW those value has to be
3977 * hard-coded). in later stages of the protocol those values will be
3978 * updated automatically by reading the FW descriptor so data there
3979 * will always be up-to-date
3980 */
3981 dynamic_loader = &hdev->fw_loader.dynamic_loader;
3982 dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3983 dyn_regs->kmd_msg_to_cpu = cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3984 dyn_regs->cpu_cmd_status_to_host = cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3985 dynamic_loader->wait_for_bl_timeout = GAUDI2_WAIT_FOR_BL_TIMEOUT_USEC;
3986 }
3987
gaudi2_init_cpu(struct hl_device * hdev)3988 static int gaudi2_init_cpu(struct hl_device *hdev)
3989 {
3990 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3991 int rc;
3992
3993 if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
3994 return 0;
3995
3996 if (gaudi2->hw_cap_initialized & HW_CAP_CPU)
3997 return 0;
3998
3999 rc = hl_fw_init_cpu(hdev);
4000 if (rc)
4001 return rc;
4002
4003 gaudi2->hw_cap_initialized |= HW_CAP_CPU;
4004
4005 return 0;
4006 }
4007
gaudi2_init_cpu_queues(struct hl_device * hdev,u32 cpu_timeout)4008 static int gaudi2_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
4009 {
4010 struct hl_hw_queue *cpu_pq = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
4011 struct asic_fixed_properties *prop = &hdev->asic_prop;
4012 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4013 struct cpu_dyn_regs *dyn_regs;
4014 struct hl_eq *eq;
4015 u32 status;
4016 int err;
4017
4018 if (!hdev->cpu_queues_enable)
4019 return 0;
4020
4021 if (gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)
4022 return 0;
4023
4024 eq = &hdev->event_queue;
4025
4026 dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4027
4028 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
4029 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
4030
4031 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
4032 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
4033
4034 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW, lower_32_bits(hdev->cpu_accessible_dma_address));
4035 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH, upper_32_bits(hdev->cpu_accessible_dma_address));
4036
4037 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
4038 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
4039 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
4040
4041 /* Used for EQ CI */
4042 WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
4043
4044 WREG32(mmCPU_IF_PF_PQ_PI, 0);
4045
4046 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
4047
4048 /* Let the ARC know we are ready as it is now handling those queues */
4049
4050 WREG32(le32_to_cpu(dyn_regs->gic_host_pi_upd_irq),
4051 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_PI_UPDATE].cpu_id);
4052
4053 err = hl_poll_timeout(
4054 hdev,
4055 mmCPU_IF_QUEUE_INIT,
4056 status,
4057 (status == PQ_INIT_STATUS_READY_FOR_HOST),
4058 1000,
4059 cpu_timeout);
4060
4061 if (err) {
4062 dev_err(hdev->dev, "Failed to communicate with device CPU (timeout)\n");
4063 return -EIO;
4064 }
4065
4066 /* update FW application security bits */
4067 if (prop->fw_cpu_boot_dev_sts0_valid)
4068 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
4069
4070 if (prop->fw_cpu_boot_dev_sts1_valid)
4071 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
4072
4073 gaudi2->hw_cap_initialized |= HW_CAP_CPU_Q;
4074 return 0;
4075 }
4076
gaudi2_init_qman_pq(struct hl_device * hdev,u32 reg_base,u32 queue_id_base)4077 static void gaudi2_init_qman_pq(struct hl_device *hdev, u32 reg_base,
4078 u32 queue_id_base)
4079 {
4080 struct hl_hw_queue *q;
4081 u32 pq_id, pq_offset;
4082
4083 for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) {
4084 q = &hdev->kernel_queues[queue_id_base + pq_id];
4085 pq_offset = pq_id * 4;
4086
4087 WREG32(reg_base + QM_PQ_BASE_LO_0_OFFSET + pq_offset,
4088 lower_32_bits(q->bus_address));
4089 WREG32(reg_base + QM_PQ_BASE_HI_0_OFFSET + pq_offset,
4090 upper_32_bits(q->bus_address));
4091 WREG32(reg_base + QM_PQ_SIZE_0_OFFSET + pq_offset, ilog2(HL_QUEUE_LENGTH));
4092 WREG32(reg_base + QM_PQ_PI_0_OFFSET + pq_offset, 0);
4093 WREG32(reg_base + QM_PQ_CI_0_OFFSET + pq_offset, 0);
4094 }
4095 }
4096
gaudi2_init_qman_cp(struct hl_device * hdev,u32 reg_base)4097 static void gaudi2_init_qman_cp(struct hl_device *hdev, u32 reg_base)
4098 {
4099 u32 cp_id, cp_offset, mtr_base_lo, mtr_base_hi, so_base_lo, so_base_hi;
4100
4101 mtr_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
4102 mtr_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
4103 so_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4104 so_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4105
4106 for (cp_id = 0 ; cp_id < NUM_OF_CP_PER_QMAN; cp_id++) {
4107 cp_offset = cp_id * 4;
4108
4109 WREG32(reg_base + QM_CP_MSG_BASE0_ADDR_LO_0_OFFSET + cp_offset, mtr_base_lo);
4110 WREG32(reg_base + QM_CP_MSG_BASE0_ADDR_HI_0_OFFSET + cp_offset, mtr_base_hi);
4111 WREG32(reg_base + QM_CP_MSG_BASE1_ADDR_LO_0_OFFSET + cp_offset, so_base_lo);
4112 WREG32(reg_base + QM_CP_MSG_BASE1_ADDR_HI_0_OFFSET + cp_offset, so_base_hi);
4113 }
4114
4115 /* allow QMANs to accept work from ARC CQF */
4116 WREG32(reg_base + QM_CP_CFG_OFFSET, FIELD_PREP(PDMA0_QM_CP_CFG_SWITCH_EN_MASK, 0x1));
4117 }
4118
gaudi2_init_qman_pqc(struct hl_device * hdev,u32 reg_base,u32 queue_id_base)4119 static void gaudi2_init_qman_pqc(struct hl_device *hdev, u32 reg_base,
4120 u32 queue_id_base)
4121 {
4122 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4123 u32 pq_id, pq_offset, so_base_lo, so_base_hi;
4124
4125 so_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4126 so_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4127
4128 for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) {
4129 pq_offset = pq_id * 4;
4130
4131 /* Configure QMAN HBW to scratchpad as it is not needed */
4132 WREG32(reg_base + QM_PQC_HBW_BASE_LO_0_OFFSET + pq_offset,
4133 lower_32_bits(gaudi2->scratchpad_bus_address));
4134 WREG32(reg_base + QM_PQC_HBW_BASE_HI_0_OFFSET + pq_offset,
4135 upper_32_bits(gaudi2->scratchpad_bus_address));
4136 WREG32(reg_base + QM_PQC_SIZE_0_OFFSET + pq_offset,
4137 ilog2(PAGE_SIZE / sizeof(struct hl_cq_entry)));
4138
4139 WREG32(reg_base + QM_PQC_PI_0_OFFSET + pq_offset, 0);
4140 WREG32(reg_base + QM_PQC_LBW_WDATA_0_OFFSET + pq_offset, QM_PQC_LBW_WDATA);
4141 WREG32(reg_base + QM_PQC_LBW_BASE_LO_0_OFFSET + pq_offset, so_base_lo);
4142 WREG32(reg_base + QM_PQC_LBW_BASE_HI_0_OFFSET + pq_offset, so_base_hi);
4143 }
4144
4145 /* Enable QMAN H/W completion */
4146 WREG32(reg_base + QM_PQC_CFG_OFFSET, 1 << PDMA0_QM_PQC_CFG_EN_SHIFT);
4147 }
4148
gaudi2_get_dyn_sp_reg(struct hl_device * hdev,u32 queue_id_base)4149 static u32 gaudi2_get_dyn_sp_reg(struct hl_device *hdev, u32 queue_id_base)
4150 {
4151 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4152 u32 sp_reg_addr;
4153
4154 switch (queue_id_base) {
4155 case GAUDI2_QUEUE_ID_PDMA_0_0...GAUDI2_QUEUE_ID_PDMA_1_3:
4156 fallthrough;
4157 case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3:
4158 fallthrough;
4159 case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3:
4160 fallthrough;
4161 case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3:
4162 fallthrough;
4163 case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3:
4164 sp_reg_addr = le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
4165 break;
4166 case GAUDI2_QUEUE_ID_DCORE0_MME_0_0...GAUDI2_QUEUE_ID_DCORE0_MME_0_3:
4167 fallthrough;
4168 case GAUDI2_QUEUE_ID_DCORE1_MME_0_0...GAUDI2_QUEUE_ID_DCORE1_MME_0_3:
4169 fallthrough;
4170 case GAUDI2_QUEUE_ID_DCORE2_MME_0_0...GAUDI2_QUEUE_ID_DCORE2_MME_0_3:
4171 fallthrough;
4172 case GAUDI2_QUEUE_ID_DCORE3_MME_0_0...GAUDI2_QUEUE_ID_DCORE3_MME_0_3:
4173 sp_reg_addr = le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
4174 break;
4175 case GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_6_3:
4176 fallthrough;
4177 case GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE1_TPC_5_3:
4178 fallthrough;
4179 case GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE2_TPC_5_3:
4180 fallthrough;
4181 case GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE3_TPC_5_3:
4182 sp_reg_addr = le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
4183 break;
4184 case GAUDI2_QUEUE_ID_ROT_0_0...GAUDI2_QUEUE_ID_ROT_1_3:
4185 sp_reg_addr = le32_to_cpu(dyn_regs->gic_rot_qm_irq_ctrl);
4186 break;
4187 case GAUDI2_QUEUE_ID_NIC_0_0...GAUDI2_QUEUE_ID_NIC_23_3:
4188 sp_reg_addr = le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
4189 break;
4190 default:
4191 dev_err(hdev->dev, "Unexpected h/w queue %d\n", queue_id_base);
4192 return 0;
4193 }
4194
4195 return sp_reg_addr;
4196 }
4197
gaudi2_init_qman_common(struct hl_device * hdev,u32 reg_base,u32 queue_id_base)4198 static void gaudi2_init_qman_common(struct hl_device *hdev, u32 reg_base,
4199 u32 queue_id_base)
4200 {
4201 u32 glbl_prot = QMAN_MAKE_TRUSTED, irq_handler_offset;
4202 int map_table_entry;
4203
4204 WREG32(reg_base + QM_GLBL_PROT_OFFSET, glbl_prot);
4205
4206 irq_handler_offset = gaudi2_get_dyn_sp_reg(hdev, queue_id_base);
4207 WREG32(reg_base + QM_GLBL_ERR_ADDR_LO_OFFSET, lower_32_bits(CFG_BASE + irq_handler_offset));
4208 WREG32(reg_base + QM_GLBL_ERR_ADDR_HI_OFFSET, upper_32_bits(CFG_BASE + irq_handler_offset));
4209
4210 map_table_entry = gaudi2_qman_async_event_id[queue_id_base];
4211 WREG32(reg_base + QM_GLBL_ERR_WDATA_OFFSET,
4212 gaudi2_irq_map_table[map_table_entry].cpu_id);
4213
4214 WREG32(reg_base + QM_ARB_ERR_MSG_EN_OFFSET, QM_ARB_ERR_MSG_EN_MASK);
4215
4216 WREG32(reg_base + QM_ARB_SLV_CHOISE_WDT_OFFSET, GAUDI2_ARB_WDT_TIMEOUT);
4217 WREG32(reg_base + QM_GLBL_CFG1_OFFSET, 0);
4218 WREG32(reg_base + QM_GLBL_CFG2_OFFSET, 0);
4219
4220 /* Enable the QMAN channel.
4221 * PDMA QMAN configuration is different, as we do not allow user to
4222 * access some of the CPs.
4223 * PDMA0: CP2/3 are reserved for the ARC usage.
4224 * PDMA1: CP1/2/3 are reserved for the ARC usage.
4225 */
4226 if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_1_0])
4227 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA1_QMAN_ENABLE);
4228 else if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_0_0])
4229 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA0_QMAN_ENABLE);
4230 else
4231 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, QMAN_ENABLE);
4232 }
4233
gaudi2_init_qman(struct hl_device * hdev,u32 reg_base,u32 queue_id_base)4234 static void gaudi2_init_qman(struct hl_device *hdev, u32 reg_base,
4235 u32 queue_id_base)
4236 {
4237 u32 pq_id;
4238
4239 for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++)
4240 hdev->kernel_queues[queue_id_base + pq_id].cq_id = GAUDI2_RESERVED_CQ_CS_COMPLETION;
4241
4242 gaudi2_init_qman_pq(hdev, reg_base, queue_id_base);
4243 gaudi2_init_qman_cp(hdev, reg_base);
4244 gaudi2_init_qman_pqc(hdev, reg_base, queue_id_base);
4245 gaudi2_init_qman_common(hdev, reg_base, queue_id_base);
4246 }
4247
gaudi2_init_dma_core(struct hl_device * hdev,u32 reg_base,u32 dma_core_id,bool is_secure)4248 static void gaudi2_init_dma_core(struct hl_device *hdev, u32 reg_base,
4249 u32 dma_core_id, bool is_secure)
4250 {
4251 u32 prot, irq_handler_offset;
4252 struct cpu_dyn_regs *dyn_regs;
4253 int map_table_entry;
4254
4255 prot = 1 << ARC_FARM_KDMA_PROT_ERR_VAL_SHIFT;
4256 if (is_secure)
4257 prot |= 1 << ARC_FARM_KDMA_PROT_VAL_SHIFT;
4258
4259 WREG32(reg_base + DMA_CORE_PROT_OFFSET, prot);
4260
4261 dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4262 irq_handler_offset = le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
4263
4264 WREG32(reg_base + DMA_CORE_ERRMSG_ADDR_LO_OFFSET,
4265 lower_32_bits(CFG_BASE + irq_handler_offset));
4266
4267 WREG32(reg_base + DMA_CORE_ERRMSG_ADDR_HI_OFFSET,
4268 upper_32_bits(CFG_BASE + irq_handler_offset));
4269
4270 map_table_entry = gaudi2_dma_core_async_event_id[dma_core_id];
4271 WREG32(reg_base + DMA_CORE_ERRMSG_WDATA_OFFSET,
4272 gaudi2_irq_map_table[map_table_entry].cpu_id);
4273
4274 /* Enable the DMA channel */
4275 WREG32(reg_base + DMA_CORE_CFG_0_OFFSET, 1 << ARC_FARM_KDMA_CFG_0_EN_SHIFT);
4276 }
4277
gaudi2_init_kdma(struct hl_device * hdev)4278 static void gaudi2_init_kdma(struct hl_device *hdev)
4279 {
4280 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4281 u32 reg_base;
4282
4283 if ((gaudi2->hw_cap_initialized & HW_CAP_KDMA) == HW_CAP_KDMA)
4284 return;
4285
4286 reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_KDMA];
4287
4288 gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_KDMA, true);
4289
4290 gaudi2->hw_cap_initialized |= HW_CAP_KDMA;
4291 }
4292
gaudi2_init_pdma(struct hl_device * hdev)4293 static void gaudi2_init_pdma(struct hl_device *hdev)
4294 {
4295 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4296 u32 reg_base;
4297
4298 if ((gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK) == HW_CAP_PDMA_MASK)
4299 return;
4300
4301 reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_PDMA0];
4302 gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_PDMA0, false);
4303
4304 reg_base = gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_0_0];
4305 gaudi2_init_qman(hdev, reg_base, GAUDI2_QUEUE_ID_PDMA_0_0);
4306
4307 reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_PDMA1];
4308 gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_PDMA1, false);
4309
4310 reg_base = gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_1_0];
4311 gaudi2_init_qman(hdev, reg_base, GAUDI2_QUEUE_ID_PDMA_1_0);
4312
4313 gaudi2->hw_cap_initialized |= HW_CAP_PDMA_MASK;
4314 }
4315
gaudi2_init_edma_instance(struct hl_device * hdev,u8 seq)4316 static void gaudi2_init_edma_instance(struct hl_device *hdev, u8 seq)
4317 {
4318 u32 reg_base, base_edma_core_id, base_edma_qman_id;
4319
4320 base_edma_core_id = DMA_CORE_ID_EDMA0 + seq;
4321 base_edma_qman_id = edma_stream_base[seq];
4322
4323 reg_base = gaudi2_dma_core_blocks_bases[base_edma_core_id];
4324 gaudi2_init_dma_core(hdev, reg_base, base_edma_core_id, false);
4325
4326 reg_base = gaudi2_qm_blocks_bases[base_edma_qman_id];
4327 gaudi2_init_qman(hdev, reg_base, base_edma_qman_id);
4328 }
4329
gaudi2_init_edma(struct hl_device * hdev)4330 static void gaudi2_init_edma(struct hl_device *hdev)
4331 {
4332 struct asic_fixed_properties *prop = &hdev->asic_prop;
4333 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4334 int dcore, inst;
4335
4336 if ((gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK) == HW_CAP_EDMA_MASK)
4337 return;
4338
4339 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
4340 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
4341 u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
4342
4343 if (!(prop->edma_enabled_mask & BIT(seq)))
4344 continue;
4345
4346 gaudi2_init_edma_instance(hdev, seq);
4347
4348 gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_EDMA_SHIFT + seq);
4349 }
4350 }
4351 }
4352
4353 /*
4354 * gaudi2_arm_monitors_for_virt_msix_db() - Arm monitors for writing to the virtual MSI-X doorbell.
4355 * @hdev: pointer to habanalabs device structure.
4356 * @sob_id: sync object ID.
4357 * @first_mon_id: ID of first monitor out of 3 consecutive monitors.
4358 * @interrupt_id: interrupt ID.
4359 *
4360 * Some initiators cannot have HBW address in their completion address registers, and thus cannot
4361 * write directly to the HBW host memory of the virtual MSI-X doorbell.
4362 * Instead, they are configured to LBW write to a sync object, and a monitor will do the HBW write.
4363 *
4364 * The mechanism in the sync manager block is composed of a master monitor with 3 messages.
4365 * In addition to the HBW write, the other 2 messages are for preparing the monitor to next
4366 * completion, by decrementing the sync object value and re-arming the monitor.
4367 */
gaudi2_arm_monitors_for_virt_msix_db(struct hl_device * hdev,u32 sob_id,u32 first_mon_id,u32 interrupt_id)4368 static void gaudi2_arm_monitors_for_virt_msix_db(struct hl_device *hdev, u32 sob_id,
4369 u32 first_mon_id, u32 interrupt_id)
4370 {
4371 u32 sob_offset, first_mon_offset, mon_offset, payload, sob_group, mode, arm, config;
4372 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4373 u64 addr;
4374 u8 mask;
4375
4376 /* Reset the SOB value */
4377 sob_offset = sob_id * sizeof(u32);
4378 WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset, 0);
4379
4380 /* Configure 3 monitors:
4381 * 1. Write interrupt ID to the virtual MSI-X doorbell (master monitor)
4382 * 2. Decrement SOB value by 1.
4383 * 3. Re-arm the master monitor.
4384 */
4385
4386 first_mon_offset = first_mon_id * sizeof(u32);
4387
4388 /* 2nd monitor: Decrement SOB value by 1 */
4389 mon_offset = first_mon_offset + sizeof(u32);
4390
4391 addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
4392 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
4393 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
4394
4395 payload = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 0x7FFF) | /* "-1" */
4396 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_SIGN_MASK, 1) |
4397 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1);
4398 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
4399
4400 /* 3rd monitor: Re-arm the master monitor */
4401 mon_offset = first_mon_offset + 2 * sizeof(u32);
4402
4403 addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + first_mon_offset;
4404 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
4405 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
4406
4407 sob_group = sob_id / 8;
4408 mask = ~BIT(sob_id & 0x7);
4409 mode = 0; /* comparison mode is "greater than or equal to" */
4410 arm = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SID_MASK, sob_group) |
4411 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_MASK_MASK, mask) |
4412 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOP_MASK, mode) |
4413 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOD_MASK, 1);
4414
4415 payload = arm;
4416 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
4417
4418 /* 1st monitor (master): Write interrupt ID to the virtual MSI-X doorbell */
4419 mon_offset = first_mon_offset;
4420
4421 config = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_WR_NUM_MASK, 2); /* "2": 3 writes */
4422 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + mon_offset, config);
4423
4424 addr = gaudi2->virt_msix_db_dma_addr;
4425 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
4426 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
4427
4428 payload = interrupt_id;
4429 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
4430
4431 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + mon_offset, arm);
4432 }
4433
gaudi2_prepare_sm_for_virt_msix_db(struct hl_device * hdev)4434 static void gaudi2_prepare_sm_for_virt_msix_db(struct hl_device *hdev)
4435 {
4436 u32 decoder_id, sob_id, first_mon_id, interrupt_id;
4437 struct asic_fixed_properties *prop = &hdev->asic_prop;
4438
4439 /* Decoder normal/abnormal interrupts */
4440 for (decoder_id = 0 ; decoder_id < NUMBER_OF_DEC ; ++decoder_id) {
4441 if (!(prop->decoder_enabled_mask & BIT(decoder_id)))
4442 continue;
4443
4444 sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id;
4445 first_mon_id = GAUDI2_RESERVED_MON_DEC_NRM_FIRST + 3 * decoder_id;
4446 interrupt_id = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 2 * decoder_id;
4447 gaudi2_arm_monitors_for_virt_msix_db(hdev, sob_id, first_mon_id, interrupt_id);
4448
4449 sob_id = GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + decoder_id;
4450 first_mon_id = GAUDI2_RESERVED_MON_DEC_ABNRM_FIRST + 3 * decoder_id;
4451 interrupt_id += 1;
4452 gaudi2_arm_monitors_for_virt_msix_db(hdev, sob_id, first_mon_id, interrupt_id);
4453 }
4454 }
4455
gaudi2_init_sm(struct hl_device * hdev)4456 static void gaudi2_init_sm(struct hl_device *hdev)
4457 {
4458 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4459 u64 cq_address;
4460 u32 reg_val;
4461 int i;
4462
4463 /* Enable HBW/LBW CQ for completion monitors */
4464 reg_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_CQ_EN_MASK, 1);
4465 reg_val |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_LBW_EN_MASK, 1);
4466
4467 for (i = 0 ; i < GAUDI2_MAX_PENDING_CS ; i++)
4468 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + (4 * i), reg_val);
4469
4470 /* Enable only HBW CQ for KDMA completion monitor */
4471 reg_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_CQ_EN_MASK, 1);
4472 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + (4 * i), reg_val);
4473
4474 /* Init CQ0 DB */
4475 /* Configure the monitor to trigger MSI-X interrupt */
4476 /* TODO:
4477 * Remove the if statement when virtual MSI-X doorbell is supported in simulator (SW-93022)
4478 * and in F/W (SW-93024).
4479 */
4480 if (!hdev->pdev || hdev->asic_prop.fw_security_enabled) {
4481 u64 msix_db_reg = CFG_BASE + mmPCIE_DBI_MSIX_DOORBELL_OFF;
4482
4483 WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0, lower_32_bits(msix_db_reg));
4484 WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0, upper_32_bits(msix_db_reg));
4485 } else {
4486 WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0,
4487 lower_32_bits(gaudi2->virt_msix_db_dma_addr));
4488 WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0,
4489 upper_32_bits(gaudi2->virt_msix_db_dma_addr));
4490 }
4491 WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0, GAUDI2_IRQ_NUM_COMPLETION);
4492
4493 for (i = 0 ; i < GAUDI2_RESERVED_CQ_NUMBER ; i++) {
4494 cq_address =
4495 hdev->completion_queue[i].bus_address;
4496
4497 WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + (4 * i),
4498 lower_32_bits(cq_address));
4499 WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + (4 * i),
4500 upper_32_bits(cq_address));
4501 WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + (4 * i),
4502 ilog2(HL_CQ_SIZE_IN_BYTES));
4503 }
4504
4505 /* Configure kernel ASID and MMU BP*/
4506 WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_SEC, 0x10000);
4507 WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_NONE_SEC_PRIV, 0);
4508
4509 /* Initialize sync objects and monitors which are used for the virtual MSI-X doorbell */
4510 gaudi2_prepare_sm_for_virt_msix_db(hdev);
4511 }
4512
gaudi2_init_mme_acc(struct hl_device * hdev,u32 reg_base)4513 static void gaudi2_init_mme_acc(struct hl_device *hdev, u32 reg_base)
4514 {
4515 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4516 u32 reg_val;
4517 int i;
4518
4519 reg_val = FIELD_PREP(MME_ACC_INTR_MASK_WBC_ERR_RESP_MASK, 0);
4520 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_POS_INF_MASK, 1);
4521 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_NEG_INF_MASK, 1);
4522 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_NAN_MASK, 1);
4523 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_RESULT_POS_INF_MASK, 1);
4524 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_RESULT_NEG_INF_MASK, 1);
4525
4526 WREG32(reg_base + MME_ACC_INTR_MASK_OFFSET, reg_val);
4527 WREG32(reg_base + MME_ACC_AP_LFSR_POLY_OFFSET, 0x80DEADAF);
4528
4529 for (i = 0 ; i < MME_NUM_OF_LFSR_SEEDS ; i++) {
4530 WREG32(reg_base + MME_ACC_AP_LFSR_SEED_SEL_OFFSET, i);
4531 WREG32(reg_base + MME_ACC_AP_LFSR_SEED_WDATA_OFFSET, gaudi2->lfsr_rand_seeds[i]);
4532 }
4533 }
4534
gaudi2_init_dcore_mme(struct hl_device * hdev,int dcore_id,bool config_qman_only)4535 static void gaudi2_init_dcore_mme(struct hl_device *hdev, int dcore_id,
4536 bool config_qman_only)
4537 {
4538 u32 queue_id_base, reg_base, clk_en_addr = 0;
4539
4540 switch (dcore_id) {
4541 case 0:
4542 queue_id_base = GAUDI2_QUEUE_ID_DCORE0_MME_0_0;
4543 break;
4544 case 1:
4545 queue_id_base = GAUDI2_QUEUE_ID_DCORE1_MME_0_0;
4546 clk_en_addr = mmDCORE1_MME_CTRL_LO_QM_SLV_CLK_EN;
4547 break;
4548 case 2:
4549 queue_id_base = GAUDI2_QUEUE_ID_DCORE2_MME_0_0;
4550 break;
4551 case 3:
4552 queue_id_base = GAUDI2_QUEUE_ID_DCORE3_MME_0_0;
4553 clk_en_addr = mmDCORE3_MME_CTRL_LO_QM_SLV_CLK_EN;
4554 break;
4555 default:
4556 dev_err(hdev->dev, "Invalid dcore id %u\n", dcore_id);
4557 return;
4558 }
4559
4560 if (clk_en_addr && !(hdev->fw_components & FW_TYPE_BOOT_CPU))
4561 WREG32(clk_en_addr, 0x1);
4562
4563 if (!config_qman_only) {
4564 reg_base = gaudi2_mme_acc_blocks_bases[dcore_id];
4565 gaudi2_init_mme_acc(hdev, reg_base);
4566 }
4567
4568 reg_base = gaudi2_qm_blocks_bases[queue_id_base];
4569 gaudi2_init_qman(hdev, reg_base, queue_id_base);
4570 }
4571
gaudi2_init_mme(struct hl_device * hdev)4572 static void gaudi2_init_mme(struct hl_device *hdev)
4573 {
4574 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4575 int i;
4576
4577 if ((gaudi2->hw_cap_initialized & HW_CAP_MME_MASK) == HW_CAP_MME_MASK)
4578 return;
4579
4580 for (i = 0 ; i < NUM_OF_DCORES ; i++) {
4581 gaudi2_init_dcore_mme(hdev, i, false);
4582
4583 gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_MME_SHIFT + i);
4584 }
4585 }
4586
gaudi2_init_tpc_cfg(struct hl_device * hdev,u32 reg_base)4587 static void gaudi2_init_tpc_cfg(struct hl_device *hdev, u32 reg_base)
4588 {
4589 /* Mask arithmetic and QM interrupts in TPC */
4590 WREG32(reg_base + TPC_CFG_TPC_INTR_MASK_OFFSET, 0x23FFFE);
4591
4592 /* Set 16 cache lines */
4593 WREG32(reg_base + TPC_CFG_MSS_CONFIG_OFFSET,
4594 2 << DCORE0_TPC0_CFG_MSS_CONFIG_ICACHE_FETCH_LINE_NUM_SHIFT);
4595 }
4596
4597 struct gaudi2_tpc_init_cfg_data {
4598 enum gaudi2_queue_id dcore_tpc_qid_base[NUM_OF_DCORES];
4599 };
4600
gaudi2_init_tpc_config(struct hl_device * hdev,int dcore,int inst,u32 offset,struct iterate_module_ctx * ctx)4601 static void gaudi2_init_tpc_config(struct hl_device *hdev, int dcore, int inst,
4602 u32 offset, struct iterate_module_ctx *ctx)
4603 {
4604 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4605 struct gaudi2_tpc_init_cfg_data *cfg_data = ctx->data;
4606 u32 queue_id_base;
4607 u8 seq;
4608
4609 queue_id_base = cfg_data->dcore_tpc_qid_base[dcore] + (inst * NUM_OF_PQ_PER_QMAN);
4610
4611 if (dcore == 0 && inst == (NUM_DCORE0_TPC - 1))
4612 /* gets last sequence number */
4613 seq = NUM_OF_DCORES * NUM_OF_TPC_PER_DCORE;
4614 else
4615 seq = dcore * NUM_OF_TPC_PER_DCORE + inst;
4616
4617 gaudi2_init_tpc_cfg(hdev, mmDCORE0_TPC0_CFG_BASE + offset);
4618 gaudi2_init_qman(hdev, mmDCORE0_TPC0_QM_BASE + offset, queue_id_base);
4619
4620 gaudi2->tpc_hw_cap_initialized |= BIT_ULL(HW_CAP_TPC_SHIFT + seq);
4621 }
4622
gaudi2_init_tpc(struct hl_device * hdev)4623 static void gaudi2_init_tpc(struct hl_device *hdev)
4624 {
4625 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4626 struct gaudi2_tpc_init_cfg_data init_cfg_data;
4627 struct iterate_module_ctx tpc_iter;
4628
4629 if (!hdev->asic_prop.tpc_enabled_mask)
4630 return;
4631
4632 if ((gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK) == HW_CAP_TPC_MASK)
4633 return;
4634
4635 init_cfg_data.dcore_tpc_qid_base[0] = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0;
4636 init_cfg_data.dcore_tpc_qid_base[1] = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0;
4637 init_cfg_data.dcore_tpc_qid_base[2] = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0;
4638 init_cfg_data.dcore_tpc_qid_base[3] = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0;
4639 tpc_iter.fn = &gaudi2_init_tpc_config;
4640 tpc_iter.data = &init_cfg_data;
4641 gaudi2_iterate_tpcs(hdev, &tpc_iter);
4642 }
4643
gaudi2_init_rotator(struct hl_device * hdev)4644 static void gaudi2_init_rotator(struct hl_device *hdev)
4645 {
4646 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4647 u32 i, reg_base, queue_id;
4648
4649 queue_id = GAUDI2_QUEUE_ID_ROT_0_0;
4650
4651 for (i = 0 ; i < NUM_OF_ROT ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
4652 reg_base = gaudi2_qm_blocks_bases[queue_id];
4653 gaudi2_init_qman(hdev, reg_base, queue_id);
4654
4655 gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_ROT_SHIFT + i);
4656 }
4657 }
4658
gaudi2_init_vdec_brdg_ctrl(struct hl_device * hdev,u64 base_addr,u32 decoder_id)4659 static void gaudi2_init_vdec_brdg_ctrl(struct hl_device *hdev, u64 base_addr, u32 decoder_id)
4660 {
4661 u32 sob_id;
4662
4663 /* TODO:
4664 * Remove when virtual MSI-X doorbell is supported in simulator (SW-93022) and in F/W
4665 * (SW-93024).
4666 */
4667 if (!hdev->pdev || hdev->asic_prop.fw_security_enabled) {
4668 u32 interrupt_id = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 2 * decoder_id;
4669
4670 WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_AWADDR, mmPCIE_DBI_MSIX_DOORBELL_OFF);
4671 WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_WDATA, interrupt_id);
4672 WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_AWADDR, mmPCIE_DBI_MSIX_DOORBELL_OFF);
4673 WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_WDATA, interrupt_id + 1);
4674 return;
4675 }
4676
4677 /* VCMD normal interrupt */
4678 sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id;
4679 WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_AWADDR,
4680 mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_id * sizeof(u32));
4681 WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_WDATA, GAUDI2_SOB_INCREMENT_BY_ONE);
4682
4683 /* VCMD abnormal interrupt */
4684 sob_id = GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + decoder_id;
4685 WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_AWADDR,
4686 mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_id * sizeof(u32));
4687 WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_WDATA, GAUDI2_SOB_INCREMENT_BY_ONE);
4688 }
4689
gaudi2_init_dec(struct hl_device * hdev)4690 static void gaudi2_init_dec(struct hl_device *hdev)
4691 {
4692 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4693 u32 dcore_id, dec_id, dec_bit;
4694 u64 base_addr;
4695
4696 if (!hdev->asic_prop.decoder_enabled_mask)
4697 return;
4698
4699 if ((gaudi2->dec_hw_cap_initialized & HW_CAP_DEC_MASK) == HW_CAP_DEC_MASK)
4700 return;
4701
4702 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
4703 for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
4704 dec_bit = dcore_id * NUM_OF_DEC_PER_DCORE + dec_id;
4705
4706 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
4707 continue;
4708
4709 base_addr = mmDCORE0_DEC0_CMD_BASE +
4710 BRDG_CTRL_BLOCK_OFFSET +
4711 dcore_id * DCORE_OFFSET +
4712 dec_id * DCORE_VDEC_OFFSET;
4713
4714 gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, dec_bit);
4715
4716 gaudi2->dec_hw_cap_initialized |= BIT_ULL(HW_CAP_DEC_SHIFT + dec_bit);
4717 }
4718
4719 for (dec_id = 0 ; dec_id < NUM_OF_PCIE_VDEC ; dec_id++) {
4720 dec_bit = PCIE_DEC_SHIFT + dec_id;
4721 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
4722 continue;
4723
4724 base_addr = mmPCIE_DEC0_CMD_BASE + BRDG_CTRL_BLOCK_OFFSET +
4725 dec_id * DCORE_VDEC_OFFSET;
4726
4727 gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, dec_bit);
4728
4729 gaudi2->dec_hw_cap_initialized |= BIT_ULL(HW_CAP_DEC_SHIFT + dec_bit);
4730 }
4731 }
4732
gaudi2_init_msix_gw_table(struct hl_device * hdev)4733 static void gaudi2_init_msix_gw_table(struct hl_device *hdev)
4734 {
4735 u32 first_reg_offset, last_reg_offset, msix_gw_table_base;
4736 u8 first_bit, last_bit;
4737 int i;
4738
4739 msix_gw_table_base = mmPCIE_WRAP_MSIX_GW_TABLE_0;
4740 first_reg_offset = (GAUDI2_IRQ_NUM_USER_FIRST >> 5) << 2;
4741 first_bit = GAUDI2_IRQ_NUM_USER_FIRST % 32;
4742 last_reg_offset = (GAUDI2_IRQ_NUM_USER_LAST >> 5) << 2;
4743 last_bit = GAUDI2_IRQ_NUM_USER_LAST % 32;
4744
4745 if (first_reg_offset == last_reg_offset) {
4746 WREG32(msix_gw_table_base + first_reg_offset, GENMASK(last_bit, first_bit));
4747 return;
4748 }
4749
4750 WREG32(msix_gw_table_base + first_reg_offset, GENMASK(31, first_bit));
4751 WREG32(msix_gw_table_base + last_reg_offset, GENMASK(last_bit, 0));
4752
4753 for (i = first_reg_offset + 4; i < last_reg_offset ; i += 4)
4754 WREG32(msix_gw_table_base + i, 0xFFFFFFFF);
4755 }
4756
gaudi2_mmu_update_asid_hop0_addr(struct hl_device * hdev,u32 stlb_base,u32 asid,u64 phys_addr)4757 static int gaudi2_mmu_update_asid_hop0_addr(struct hl_device *hdev,
4758 u32 stlb_base, u32 asid, u64 phys_addr)
4759 {
4760 u32 status, timeout_usec;
4761 int rc;
4762
4763 if (hdev->pldm || !hdev->pdev)
4764 timeout_usec = GAUDI2_PLDM_MMU_TIMEOUT_USEC;
4765 else
4766 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
4767
4768 WREG32(stlb_base + STLB_ASID_OFFSET, asid);
4769 WREG32(stlb_base + STLB_HOP0_PA43_12_OFFSET, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
4770 WREG32(stlb_base + STLB_HOP0_PA63_44_OFFSET, phys_addr >> MMU_HOP0_PA63_44_SHIFT);
4771 WREG32(stlb_base + STLB_BUSY_OFFSET, 0x80000000);
4772
4773 rc = hl_poll_timeout(
4774 hdev,
4775 stlb_base + STLB_BUSY_OFFSET,
4776 status,
4777 !(status & 0x80000000),
4778 1000,
4779 timeout_usec);
4780
4781 if (rc) {
4782 dev_err(hdev->dev, "Timeout during MMU hop0 config of asid %d\n", asid);
4783 return rc;
4784 }
4785
4786 return 0;
4787 }
4788
gaudi2_mmu_send_invalidate_cache_cmd(struct hl_device * hdev,u32 stlb_base,u32 start_offset,u32 inv_start_val,u32 flags)4789 static void gaudi2_mmu_send_invalidate_cache_cmd(struct hl_device *hdev, u32 stlb_base,
4790 u32 start_offset, u32 inv_start_val,
4791 u32 flags)
4792 {
4793 /* clear PMMU mem line cache (only needed in mmu range invalidation) */
4794 if (flags & MMU_OP_CLEAR_MEMCACHE)
4795 WREG32(mmPMMU_HBW_STLB_MEM_CACHE_INVALIDATION, 0x1);
4796
4797 if (flags & MMU_OP_SKIP_LOW_CACHE_INV)
4798 return;
4799
4800 WREG32(stlb_base + start_offset, inv_start_val);
4801 }
4802
gaudi2_mmu_invalidate_cache_status_poll(struct hl_device * hdev,u32 stlb_base,struct gaudi2_cache_invld_params * inv_params)4803 static int gaudi2_mmu_invalidate_cache_status_poll(struct hl_device *hdev, u32 stlb_base,
4804 struct gaudi2_cache_invld_params *inv_params)
4805 {
4806 u32 status, timeout_usec, start_offset;
4807 int rc;
4808
4809 timeout_usec = (hdev->pldm) ? GAUDI2_PLDM_MMU_TIMEOUT_USEC :
4810 GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC;
4811
4812 /* poll PMMU mem line cache (only needed in mmu range invalidation) */
4813 if (inv_params->flags & MMU_OP_CLEAR_MEMCACHE) {
4814 rc = hl_poll_timeout(
4815 hdev,
4816 mmPMMU_HBW_STLB_MEM_CACHE_INV_STATUS,
4817 status,
4818 status & 0x1,
4819 1000,
4820 timeout_usec);
4821
4822 if (rc)
4823 return rc;
4824
4825 /* Need to manually reset the status to 0 */
4826 WREG32(mmPMMU_HBW_STLB_MEM_CACHE_INV_STATUS, 0x0);
4827 }
4828
4829 /* Lower cache does not work with cache lines, hence we can skip its
4830 * invalidation upon map and invalidate only upon unmap
4831 */
4832 if (inv_params->flags & MMU_OP_SKIP_LOW_CACHE_INV)
4833 return 0;
4834
4835 start_offset = inv_params->range_invalidation ?
4836 STLB_RANGE_CACHE_INVALIDATION_OFFSET : STLB_INV_ALL_START_OFFSET;
4837
4838 rc = hl_poll_timeout(
4839 hdev,
4840 stlb_base + start_offset,
4841 status,
4842 !(status & 0x1),
4843 1000,
4844 timeout_usec);
4845
4846 return rc;
4847 }
4848
gaudi2_is_hmmu_enabled(struct hl_device * hdev,int dcore_id,int hmmu_id)4849 bool gaudi2_is_hmmu_enabled(struct hl_device *hdev, int dcore_id, int hmmu_id)
4850 {
4851 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4852 u32 hw_cap;
4853
4854 hw_cap = HW_CAP_DCORE0_DMMU0 << (NUM_OF_HMMU_PER_DCORE * dcore_id + hmmu_id);
4855
4856 if (gaudi2->hw_cap_initialized & hw_cap)
4857 return true;
4858
4859 return false;
4860 }
4861
4862 /* this function shall be called only for HMMUs for which capability bit is set */
get_hmmu_stlb_base(int dcore_id,int hmmu_id)4863 static inline u32 get_hmmu_stlb_base(int dcore_id, int hmmu_id)
4864 {
4865 u32 offset;
4866
4867 offset = (u32) (dcore_id * DCORE_OFFSET + hmmu_id * DCORE_HMMU_OFFSET);
4868 return (u32)(mmDCORE0_HMMU0_STLB_BASE + offset);
4869 }
4870
gaudi2_mmu_invalidate_cache_trigger(struct hl_device * hdev,u32 stlb_base,struct gaudi2_cache_invld_params * inv_params)4871 static void gaudi2_mmu_invalidate_cache_trigger(struct hl_device *hdev, u32 stlb_base,
4872 struct gaudi2_cache_invld_params *inv_params)
4873 {
4874 u32 start_offset;
4875
4876 if (inv_params->range_invalidation) {
4877 /* Set the addresses range
4878 * Note: that the start address we set in register, is not included in
4879 * the range of the invalidation, by design.
4880 * that's why we need to set lower address than the one we actually
4881 * want to be included in the range invalidation.
4882 */
4883 u64 start = inv_params->start_va - 1;
4884
4885 start_offset = STLB_RANGE_CACHE_INVALIDATION_OFFSET;
4886
4887 WREG32(stlb_base + STLB_RANGE_INV_START_LSB_OFFSET,
4888 start >> MMU_RANGE_INV_VA_LSB_SHIFT);
4889
4890 WREG32(stlb_base + STLB_RANGE_INV_START_MSB_OFFSET,
4891 start >> MMU_RANGE_INV_VA_MSB_SHIFT);
4892
4893 WREG32(stlb_base + STLB_RANGE_INV_END_LSB_OFFSET,
4894 inv_params->end_va >> MMU_RANGE_INV_VA_LSB_SHIFT);
4895
4896 WREG32(stlb_base + STLB_RANGE_INV_END_MSB_OFFSET,
4897 inv_params->end_va >> MMU_RANGE_INV_VA_MSB_SHIFT);
4898 } else {
4899 start_offset = STLB_INV_ALL_START_OFFSET;
4900 }
4901
4902 gaudi2_mmu_send_invalidate_cache_cmd(hdev, stlb_base, start_offset,
4903 inv_params->inv_start_val, inv_params->flags);
4904 }
4905
gaudi2_hmmu_invalidate_cache_trigger(struct hl_device * hdev,int dcore_id,int hmmu_id,struct gaudi2_cache_invld_params * inv_params)4906 static inline void gaudi2_hmmu_invalidate_cache_trigger(struct hl_device *hdev,
4907 int dcore_id, int hmmu_id,
4908 struct gaudi2_cache_invld_params *inv_params)
4909 {
4910 u32 stlb_base = get_hmmu_stlb_base(dcore_id, hmmu_id);
4911
4912 gaudi2_mmu_invalidate_cache_trigger(hdev, stlb_base, inv_params);
4913 }
4914
gaudi2_hmmu_invalidate_cache_status_poll(struct hl_device * hdev,int dcore_id,int hmmu_id,struct gaudi2_cache_invld_params * inv_params)4915 static inline int gaudi2_hmmu_invalidate_cache_status_poll(struct hl_device *hdev,
4916 int dcore_id, int hmmu_id,
4917 struct gaudi2_cache_invld_params *inv_params)
4918 {
4919 u32 stlb_base = get_hmmu_stlb_base(dcore_id, hmmu_id);
4920
4921 return gaudi2_mmu_invalidate_cache_status_poll(hdev, stlb_base, inv_params);
4922 }
4923
gaudi2_hmmus_invalidate_cache(struct hl_device * hdev,struct gaudi2_cache_invld_params * inv_params)4924 static int gaudi2_hmmus_invalidate_cache(struct hl_device *hdev,
4925 struct gaudi2_cache_invld_params *inv_params)
4926 {
4927 int dcore_id, hmmu_id;
4928
4929 /* first send all invalidation commands */
4930 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
4931 for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE ; hmmu_id++) {
4932 if (!gaudi2_is_hmmu_enabled(hdev, dcore_id, hmmu_id))
4933 continue;
4934
4935 gaudi2_hmmu_invalidate_cache_trigger(hdev, dcore_id, hmmu_id, inv_params);
4936 }
4937 }
4938
4939 /* next, poll all invalidations status */
4940 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
4941 for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE ; hmmu_id++) {
4942 int rc;
4943
4944 if (!gaudi2_is_hmmu_enabled(hdev, dcore_id, hmmu_id))
4945 continue;
4946
4947 rc = gaudi2_hmmu_invalidate_cache_status_poll(hdev, dcore_id, hmmu_id,
4948 inv_params);
4949 if (rc)
4950 return rc;
4951 }
4952 }
4953
4954 return 0;
4955 }
4956
gaudi2_mmu_invalidate_cache(struct hl_device * hdev,bool is_hard,u32 flags)4957 static int gaudi2_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)
4958 {
4959 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4960 struct gaudi2_cache_invld_params invld_params;
4961 int rc = 0;
4962
4963 if (hdev->reset_info.hard_reset_pending)
4964 return rc;
4965
4966 invld_params.range_invalidation = false;
4967 invld_params.inv_start_val = 1;
4968
4969 if ((flags & MMU_OP_USERPTR) && (gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
4970 invld_params.flags = flags;
4971 gaudi2_mmu_invalidate_cache_trigger(hdev, mmPMMU_HBW_STLB_BASE, &invld_params);
4972 rc = gaudi2_mmu_invalidate_cache_status_poll(hdev, mmPMMU_HBW_STLB_BASE,
4973 &invld_params);
4974 } else if (flags & MMU_OP_PHYS_PACK) {
4975 invld_params.flags = 0;
4976 rc = gaudi2_hmmus_invalidate_cache(hdev, &invld_params);
4977 }
4978
4979 return rc;
4980 }
4981
gaudi2_mmu_invalidate_cache_range(struct hl_device * hdev,bool is_hard,u32 flags,u32 asid,u64 va,u64 size)4982 static int gaudi2_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard,
4983 u32 flags, u32 asid, u64 va, u64 size)
4984 {
4985 struct gaudi2_cache_invld_params invld_params = {0};
4986 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4987 u64 start_va, end_va;
4988 u32 inv_start_val;
4989 int rc = 0;
4990
4991 if (hdev->reset_info.hard_reset_pending)
4992 return 0;
4993
4994 inv_start_val = (1 << MMU_RANGE_INV_EN_SHIFT |
4995 1 << MMU_RANGE_INV_ASID_EN_SHIFT |
4996 asid << MMU_RANGE_INV_ASID_SHIFT);
4997 start_va = va;
4998 end_va = start_va + size;
4999
5000 if ((flags & MMU_OP_USERPTR) && (gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
5001 /* As range invalidation does not support zero address we will
5002 * do full invalidation in this case
5003 */
5004 if (start_va) {
5005 invld_params.range_invalidation = true;
5006 invld_params.start_va = start_va;
5007 invld_params.end_va = end_va;
5008 invld_params.inv_start_val = inv_start_val;
5009 invld_params.flags = flags | MMU_OP_CLEAR_MEMCACHE;
5010 } else {
5011 invld_params.range_invalidation = false;
5012 invld_params.inv_start_val = 1;
5013 invld_params.flags = flags;
5014 }
5015
5016
5017 gaudi2_mmu_invalidate_cache_trigger(hdev, mmPMMU_HBW_STLB_BASE, &invld_params);
5018 rc = gaudi2_mmu_invalidate_cache_status_poll(hdev, mmPMMU_HBW_STLB_BASE,
5019 &invld_params);
5020 if (rc)
5021 return rc;
5022
5023 } else if (flags & MMU_OP_PHYS_PACK) {
5024 invld_params.start_va = gaudi2_mmu_scramble_addr(hdev, start_va);
5025 invld_params.end_va = gaudi2_mmu_scramble_addr(hdev, end_va);
5026 invld_params.inv_start_val = inv_start_val;
5027 invld_params.flags = flags;
5028 rc = gaudi2_hmmus_invalidate_cache(hdev, &invld_params);
5029 }
5030
5031 return rc;
5032 }
5033
gaudi2_mmu_update_hop0_addr(struct hl_device * hdev,u32 stlb_base)5034 static int gaudi2_mmu_update_hop0_addr(struct hl_device *hdev, u32 stlb_base)
5035 {
5036 struct asic_fixed_properties *prop = &hdev->asic_prop;
5037 u64 hop0_addr;
5038 u32 asid, max_asid = prop->max_asid;
5039 int rc;
5040
5041 /* it takes too much time to init all of the ASIDs on palladium */
5042 if (hdev->pldm)
5043 max_asid = min((u32) 8, max_asid);
5044
5045 for (asid = 0 ; asid < max_asid ; asid++) {
5046 hop0_addr = hdev->mmu_priv.hr.mmu_asid_hop0[asid].phys_addr;
5047 rc = gaudi2_mmu_update_asid_hop0_addr(hdev, stlb_base, asid, hop0_addr);
5048 if (rc) {
5049 dev_err(hdev->dev, "failed to set hop0 addr for asid %d\n", asid);
5050 return rc;
5051 }
5052 }
5053
5054 return 0;
5055 }
5056
gaudi2_mmu_init_common(struct hl_device * hdev,u32 mmu_base,u32 stlb_base)5057 static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base, u32 stlb_base)
5058 {
5059 u32 status, timeout_usec;
5060 int rc;
5061
5062 if (hdev->pldm || !hdev->pdev)
5063 timeout_usec = GAUDI2_PLDM_MMU_TIMEOUT_USEC;
5064 else
5065 timeout_usec = GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC;
5066
5067 WREG32(stlb_base + STLB_INV_ALL_START_OFFSET, 1);
5068
5069 rc = hl_poll_timeout(
5070 hdev,
5071 stlb_base + STLB_SRAM_INIT_OFFSET,
5072 status,
5073 !status,
5074 1000,
5075 timeout_usec);
5076
5077 if (rc)
5078 dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU SRAM init\n");
5079
5080 rc = gaudi2_mmu_update_hop0_addr(hdev, stlb_base);
5081 if (rc)
5082 return rc;
5083
5084 WREG32(mmu_base + MMU_BYPASS_OFFSET, 0);
5085
5086 rc = hl_poll_timeout(
5087 hdev,
5088 stlb_base + STLB_INV_ALL_START_OFFSET,
5089 status,
5090 !status,
5091 1000,
5092 timeout_usec);
5093
5094 if (rc)
5095 dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU invalidate all\n");
5096
5097 WREG32(mmu_base + MMU_ENABLE_OFFSET, 1);
5098
5099 return rc;
5100 }
5101
gaudi2_pci_mmu_init(struct hl_device * hdev)5102 static int gaudi2_pci_mmu_init(struct hl_device *hdev)
5103 {
5104 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5105 u32 mmu_base, stlb_base;
5106 int rc;
5107
5108 if (gaudi2->hw_cap_initialized & HW_CAP_PMMU)
5109 return 0;
5110
5111 mmu_base = mmPMMU_HBW_MMU_BASE;
5112 stlb_base = mmPMMU_HBW_STLB_BASE;
5113
5114 RMWREG32(stlb_base + STLB_HOP_CONFIGURATION_OFFSET,
5115 (0 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_HOP_SHIFT) |
5116 (5 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_SHIFT) |
5117 (4 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_SHIFT) |
5118 (5 << PMMU_HBW_STLB_HOP_CONFIGURATION_LAST_HOP_SHIFT) |
5119 (5 << PMMU_HBW_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_SHIFT),
5120 PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK |
5121 PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK |
5122 PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK |
5123 PMMU_HBW_STLB_HOP_CONFIGURATION_LAST_HOP_MASK |
5124 PMMU_HBW_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK);
5125
5126 WREG32(stlb_base + STLB_LL_LOOKUP_MASK_63_32_OFFSET, 0);
5127
5128 if (PAGE_SIZE == SZ_64K) {
5129 /* Set page sizes to 64K on hop5 and 16M on hop4 + enable 8 bit hops */
5130 RMWREG32(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET,
5131 FIELD_PREP(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP5_PAGE_SIZE_MASK, 4) |
5132 FIELD_PREP(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK, 3) |
5133 FIELD_PREP(
5134 DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_CFG_8_BITS_HOP_MODE_EN_MASK,
5135 1),
5136 DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP5_PAGE_SIZE_MASK |
5137 DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK |
5138 DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_CFG_8_BITS_HOP_MODE_EN_MASK);
5139 }
5140
5141 WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_PMMU_SPI_SEI_ENABLE_MASK);
5142
5143 rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base);
5144 if (rc)
5145 return rc;
5146
5147 gaudi2->hw_cap_initialized |= HW_CAP_PMMU;
5148
5149 return 0;
5150 }
5151
gaudi2_dcore_hmmu_init(struct hl_device * hdev,int dcore_id,int hmmu_id)5152 static int gaudi2_dcore_hmmu_init(struct hl_device *hdev, int dcore_id,
5153 int hmmu_id)
5154 {
5155 struct asic_fixed_properties *prop = &hdev->asic_prop;
5156 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5157 u32 offset, mmu_base, stlb_base, hw_cap;
5158 u8 dmmu_seq;
5159 int rc;
5160
5161 dmmu_seq = NUM_OF_HMMU_PER_DCORE * dcore_id + hmmu_id;
5162 hw_cap = HW_CAP_DCORE0_DMMU0 << dmmu_seq;
5163
5164 /*
5165 * return if DMMU is already initialized or if it's not out of
5166 * isolation (due to cluster binning)
5167 */
5168 if ((gaudi2->hw_cap_initialized & hw_cap) || !(prop->hmmu_hif_enabled_mask & BIT(dmmu_seq)))
5169 return 0;
5170
5171 offset = (u32) (dcore_id * DCORE_OFFSET + hmmu_id * DCORE_HMMU_OFFSET);
5172 mmu_base = mmDCORE0_HMMU0_MMU_BASE + offset;
5173 stlb_base = mmDCORE0_HMMU0_STLB_BASE + offset;
5174
5175 RMWREG32(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET, 5 /* 64MB */,
5176 MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK);
5177
5178 RMWREG32(stlb_base + STLB_HOP_CONFIGURATION_OFFSET,
5179 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK, 0) |
5180 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK, 3) |
5181 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK, 3) |
5182 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_LAST_HOP_MASK, 3) |
5183 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK, 3),
5184 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK |
5185 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK |
5186 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK |
5187 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_LAST_HOP_MASK |
5188 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK);
5189
5190 RMWREG32(stlb_base + STLB_HOP_CONFIGURATION_OFFSET, 1,
5191 STLB_HOP_CONFIGURATION_ONLY_LARGE_PAGE_MASK);
5192
5193 WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_HMMU_SPI_SEI_ENABLE_MASK);
5194
5195 rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base);
5196 if (rc)
5197 return rc;
5198
5199 gaudi2->hw_cap_initialized |= hw_cap;
5200
5201 return 0;
5202 }
5203
gaudi2_hbm_mmu_init(struct hl_device * hdev)5204 static int gaudi2_hbm_mmu_init(struct hl_device *hdev)
5205 {
5206 int rc, dcore_id, hmmu_id;
5207
5208 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
5209 for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE; hmmu_id++) {
5210 rc = gaudi2_dcore_hmmu_init(hdev, dcore_id, hmmu_id);
5211 if (rc)
5212 return rc;
5213 }
5214
5215 return 0;
5216 }
5217
gaudi2_mmu_init(struct hl_device * hdev)5218 static int gaudi2_mmu_init(struct hl_device *hdev)
5219 {
5220 int rc;
5221
5222 rc = gaudi2_pci_mmu_init(hdev);
5223 if (rc)
5224 return rc;
5225
5226 rc = gaudi2_hbm_mmu_init(hdev);
5227 if (rc)
5228 return rc;
5229
5230 return 0;
5231 }
5232
gaudi2_hw_init(struct hl_device * hdev)5233 static int gaudi2_hw_init(struct hl_device *hdev)
5234 {
5235 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5236 int rc;
5237
5238 /* Let's mark in the H/W that we have reached this point. We check
5239 * this value in the reset_before_init function to understand whether
5240 * we need to reset the chip before doing H/W init. This register is
5241 * cleared by the H/W upon H/W reset
5242 */
5243 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
5244
5245 /* Perform read from the device to make sure device is up */
5246 RREG32(mmHW_STATE);
5247
5248 /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
5249 * So we set it here and if anyone tries to move it later to
5250 * a different address, there will be an error
5251 */
5252 if (hdev->asic_prop.iatu_done_by_fw)
5253 gaudi2->dram_bar_cur_addr = DRAM_PHYS_BASE;
5254
5255 /*
5256 * Before pushing u-boot/linux to device, need to set the hbm bar to
5257 * base address of dram
5258 */
5259 if (gaudi2_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
5260 dev_err(hdev->dev, "failed to map HBM bar to DRAM base address\n");
5261 return -EIO;
5262 }
5263
5264 rc = gaudi2_init_cpu(hdev);
5265 if (rc) {
5266 dev_err(hdev->dev, "failed to initialize CPU\n");
5267 return rc;
5268 }
5269
5270 gaudi2_init_msix_gw_table(hdev);
5271
5272 gaudi2_init_scrambler_hbm(hdev);
5273 gaudi2_init_kdma(hdev);
5274
5275 rc = gaudi2_init_cpu_queues(hdev, GAUDI2_CPU_TIMEOUT_USEC);
5276 if (rc) {
5277 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n", rc);
5278 return rc;
5279 }
5280
5281 rc = gaudi2->cpucp_info_get(hdev);
5282 if (rc) {
5283 dev_err(hdev->dev, "Failed to get cpucp info\n");
5284 return rc;
5285 }
5286
5287 rc = gaudi2_mmu_init(hdev);
5288 if (rc)
5289 return rc;
5290
5291 gaudi2_init_pdma(hdev);
5292 gaudi2_init_edma(hdev);
5293 gaudi2_init_sm(hdev);
5294 gaudi2_init_tpc(hdev);
5295 gaudi2_init_mme(hdev);
5296 gaudi2_init_rotator(hdev);
5297 gaudi2_init_dec(hdev);
5298 gaudi2_enable_timestamp(hdev);
5299
5300 rc = gaudi2_coresight_init(hdev);
5301 if (rc)
5302 goto disable_queues;
5303
5304 rc = gaudi2_enable_msix(hdev);
5305 if (rc)
5306 goto disable_queues;
5307
5308 /* Perform read from the device to flush all configuration */
5309 RREG32(mmHW_STATE);
5310
5311 return 0;
5312
5313 disable_queues:
5314 gaudi2_disable_dma_qmans(hdev);
5315 gaudi2_disable_mme_qmans(hdev);
5316 gaudi2_disable_tpc_qmans(hdev);
5317 gaudi2_disable_rot_qmans(hdev);
5318 gaudi2_disable_nic_qmans(hdev);
5319
5320 gaudi2_disable_timestamp(hdev);
5321
5322 return rc;
5323 }
5324
5325 /**
5326 * gaudi2_send_hard_reset_cmd - common function to handle reset
5327 *
5328 * @hdev: pointer to the habanalabs device structure
5329 *
5330 * This function handles the various possible scenarios for reset.
5331 * It considers if reset is handled by driver\FW and what FW components are loaded
5332 */
gaudi2_send_hard_reset_cmd(struct hl_device * hdev)5333 static void gaudi2_send_hard_reset_cmd(struct hl_device *hdev)
5334 {
5335 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
5336 bool heartbeat_reset, preboot_only, cpu_initialized = false;
5337 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5338 u32 cpu_boot_status;
5339
5340 preboot_only = (hdev->fw_loader.fw_comp_loaded == FW_TYPE_PREBOOT_CPU);
5341 heartbeat_reset = (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT);
5342
5343 /*
5344 * Handle corner case where failure was at cpu management app load,
5345 * and driver didn't detect any failure while loading the FW,
5346 * then at such scenario driver will send only HALT_MACHINE
5347 * and no one will respond to this request since FW already back to preboot
5348 * and it cannot handle such cmd.
5349 * In this case next time the management app loads it'll check on events register
5350 * which will still have the halt indication, and will reboot the device.
5351 * The solution is to let preboot clear all relevant registers before next boot
5352 * once driver send COMMS_RST_DEV.
5353 */
5354 cpu_boot_status = RREG32(mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS);
5355
5356 if (gaudi2 && (gaudi2->hw_cap_initialized & HW_CAP_CPU) &&
5357 (cpu_boot_status == CPU_BOOT_STATUS_SRAM_AVAIL))
5358 cpu_initialized = true;
5359
5360 /*
5361 * when Linux/Bootfit exist this write to the SP can be interpreted in 2 ways:
5362 * 1. FW reset: FW initiate the reset sequence
5363 * 2. driver reset: FW will start HALT sequence (the preparations for the
5364 * reset but not the reset itself as it is not implemented
5365 * on their part) and LKD will wait to let FW complete the
5366 * sequence before issuing the reset
5367 */
5368 if (!preboot_only && cpu_initialized) {
5369 WREG32(le32_to_cpu(dyn_regs->gic_host_halt_irq),
5370 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_HALT_MACHINE].cpu_id);
5371
5372 msleep(GAUDI2_CPU_RESET_WAIT_MSEC);
5373 }
5374
5375 /*
5376 * When working with preboot (without Linux/Boot fit) we can
5377 * communicate only using the COMMS commands to issue halt/reset.
5378 *
5379 * For the case in which we are working with Linux/Bootfit this is a hail-mary
5380 * attempt to revive the card in the small chance that the f/w has
5381 * experienced a watchdog event, which caused it to return back to preboot.
5382 * In that case, triggering reset through GIC won't help. We need to
5383 * trigger the reset as if Linux wasn't loaded.
5384 *
5385 * We do it only if the reset cause was HB, because that would be the
5386 * indication of such an event.
5387 *
5388 * In case watchdog hasn't expired but we still got HB, then this won't
5389 * do any damage.
5390 */
5391
5392 if (heartbeat_reset || preboot_only || !cpu_initialized) {
5393 if (hdev->asic_prop.hard_reset_done_by_fw)
5394 hl_fw_ask_hard_reset_without_linux(hdev);
5395 else
5396 hl_fw_ask_halt_machine_without_linux(hdev);
5397 }
5398 }
5399
5400 /**
5401 * gaudi2_execute_hard_reset - execute hard reset by driver/FW
5402 *
5403 * @hdev: pointer to the habanalabs device structure
5404 * @reset_sleep_ms: sleep time in msec after reset
5405 *
5406 * This function executes hard reset based on if driver/FW should do the reset
5407 */
gaudi2_execute_hard_reset(struct hl_device * hdev,u32 reset_sleep_ms)5408 static void gaudi2_execute_hard_reset(struct hl_device *hdev, u32 reset_sleep_ms)
5409 {
5410 if (hdev->asic_prop.hard_reset_done_by_fw) {
5411 gaudi2_send_hard_reset_cmd(hdev);
5412 return;
5413 }
5414
5415 /* Set device to handle FLR by H/W as we will put the device
5416 * CPU to halt mode
5417 */
5418 WREG32(mmPCIE_AUX_FLR_CTRL,
5419 (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK | PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
5420
5421 gaudi2_send_hard_reset_cmd(hdev);
5422
5423 WREG32(mmPSOC_RESET_CONF_SW_ALL_RST, 1);
5424 }
5425
5426 /**
5427 * gaudi2_execute_soft_reset - execute soft reset by driver/FW
5428 *
5429 * @hdev: pointer to the habanalabs device structure
5430 * @reset_sleep_ms: sleep time in msec after reset
5431 * @driver_performs_reset: true if driver should perform reset instead of f/w.
5432 *
5433 * This function executes soft reset based on if driver/FW should do the reset
5434 */
gaudi2_execute_soft_reset(struct hl_device * hdev,u32 reset_sleep_ms,bool driver_performs_reset)5435 static void gaudi2_execute_soft_reset(struct hl_device *hdev, u32 reset_sleep_ms,
5436 bool driver_performs_reset)
5437 {
5438 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
5439
5440 if (!driver_performs_reset) {
5441 /* set SP to indicate reset request sent to FW */
5442 if (dyn_regs->cpu_rst_status)
5443 WREG32(le32_to_cpu(dyn_regs->cpu_rst_status), CPU_RST_STATUS_NA);
5444 else
5445 WREG32(mmCPU_RST_STATUS_TO_HOST, CPU_RST_STATUS_NA);
5446
5447 WREG32(le32_to_cpu(dyn_regs->gic_host_soft_rst_irq),
5448 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_SOFT_RESET].cpu_id);
5449 return;
5450 }
5451
5452 /* Block access to engines, QMANs and SM during reset, these
5453 * RRs will be reconfigured after soft reset.
5454 * PCIE_MSIX is left unsecured to allow NIC packets processing during the reset.
5455 */
5456 gaudi2_write_rr_to_all_lbw_rtrs(hdev, RR_TYPE_LONG, NUM_LONG_LBW_RR - 1,
5457 mmDCORE0_TPC0_QM_DCCM_BASE, mmPCIE_MSIX_BASE);
5458
5459 gaudi2_write_rr_to_all_lbw_rtrs(hdev, RR_TYPE_LONG, NUM_LONG_LBW_RR - 2,
5460 mmPCIE_MSIX_BASE + HL_BLOCK_SIZE,
5461 mmPCIE_VDEC1_MSTR_IF_RR_SHRD_HBW_BASE + HL_BLOCK_SIZE);
5462
5463 WREG32(mmPSOC_RESET_CONF_SOFT_RST, 1);
5464 }
5465
gaudi2_poll_btm_indication(struct hl_device * hdev,u32 reset_sleep_ms,u32 poll_timeout_us)5466 static void gaudi2_poll_btm_indication(struct hl_device *hdev, u32 reset_sleep_ms,
5467 u32 poll_timeout_us)
5468 {
5469 int i, rc = 0;
5470 u32 reg_val;
5471
5472 /* without this sleep reset will not work */
5473 msleep(reset_sleep_ms);
5474
5475 /* We poll the BTM done indication multiple times after reset due to
5476 * a HW errata 'GAUDI2_0300'
5477 */
5478 for (i = 0 ; i < GAUDI2_RESET_POLL_CNT ; i++)
5479 rc = hl_poll_timeout(
5480 hdev,
5481 mmPSOC_GLOBAL_CONF_BTM_FSM,
5482 reg_val,
5483 reg_val == 0,
5484 1000,
5485 poll_timeout_us);
5486
5487 if (rc)
5488 dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", reg_val);
5489 }
5490
gaudi2_get_soft_rst_done_indication(struct hl_device * hdev,u32 poll_timeout_us)5491 static void gaudi2_get_soft_rst_done_indication(struct hl_device *hdev, u32 poll_timeout_us)
5492 {
5493 int i, rc = 0;
5494 u32 reg_val;
5495
5496 for (i = 0 ; i < GAUDI2_RESET_POLL_CNT ; i++)
5497 rc = hl_poll_timeout(
5498 hdev,
5499 mmCPU_RST_STATUS_TO_HOST,
5500 reg_val,
5501 reg_val == CPU_RST_STATUS_SOFT_RST_DONE,
5502 1000,
5503 poll_timeout_us);
5504
5505 if (rc)
5506 dev_err(hdev->dev, "Timeout while waiting for FW to complete soft reset (0x%x)\n",
5507 reg_val);
5508 }
5509
gaudi2_hw_fini(struct hl_device * hdev,bool hard_reset,bool fw_reset)5510 static void gaudi2_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
5511 {
5512 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5513 u32 poll_timeout_us, reset_sleep_ms;
5514 bool driver_performs_reset = false;
5515
5516 if (hdev->pldm) {
5517 reset_sleep_ms = hard_reset ? GAUDI2_PLDM_HRESET_TIMEOUT_MSEC :
5518 GAUDI2_PLDM_SRESET_TIMEOUT_MSEC;
5519 poll_timeout_us = GAUDI2_PLDM_RESET_POLL_TIMEOUT_USEC;
5520 } else {
5521 reset_sleep_ms = GAUDI2_RESET_TIMEOUT_MSEC;
5522 poll_timeout_us = GAUDI2_RESET_POLL_TIMEOUT_USEC;
5523 }
5524
5525 if (fw_reset)
5526 goto skip_reset;
5527
5528 gaudi2_reset_arcs(hdev);
5529
5530 if (hard_reset) {
5531 driver_performs_reset = !hdev->asic_prop.hard_reset_done_by_fw;
5532 gaudi2_execute_hard_reset(hdev, reset_sleep_ms);
5533 } else {
5534 /*
5535 * As we have to support also work with preboot only (which does not supports
5536 * soft reset) we have to make sure that security is disabled before letting driver
5537 * do the reset. user shall control the BFE flags to avoid asking soft reset in
5538 * secured device with preboot only.
5539 */
5540 driver_performs_reset = (hdev->fw_components == FW_TYPE_PREBOOT_CPU &&
5541 !hdev->asic_prop.fw_security_enabled);
5542 gaudi2_execute_soft_reset(hdev, reset_sleep_ms, driver_performs_reset);
5543 }
5544
5545 skip_reset:
5546 if (driver_performs_reset || hard_reset)
5547 gaudi2_poll_btm_indication(hdev, reset_sleep_ms, poll_timeout_us);
5548 else
5549 gaudi2_get_soft_rst_done_indication(hdev, poll_timeout_us);
5550
5551 if (!gaudi2)
5552 return;
5553
5554 gaudi2->dec_hw_cap_initialized &= ~(HW_CAP_DEC_MASK);
5555 gaudi2->tpc_hw_cap_initialized &= ~(HW_CAP_TPC_MASK);
5556
5557 /*
5558 * Clear NIC capability mask in order for driver to re-configure
5559 * NIC QMANs. NIC ports will not be re-configured during soft
5560 * reset as we call gaudi2_nic_init only during hard reset
5561 */
5562 gaudi2->nic_hw_cap_initialized &= ~(HW_CAP_NIC_MASK);
5563
5564 if (hard_reset) {
5565 gaudi2->hw_cap_initialized &=
5566 ~(HW_CAP_DRAM | HW_CAP_CLK_GATE | HW_CAP_HBM_SCRAMBLER_MASK |
5567 HW_CAP_PMMU | HW_CAP_CPU | HW_CAP_CPU_Q |
5568 HW_CAP_SRAM_SCRAMBLER | HW_CAP_DMMU_MASK |
5569 HW_CAP_PDMA_MASK | HW_CAP_EDMA_MASK | HW_CAP_KDMA |
5570 HW_CAP_MME_MASK | HW_CAP_ROT_MASK);
5571
5572 memset(gaudi2->events_stat, 0, sizeof(gaudi2->events_stat));
5573 } else {
5574 gaudi2->hw_cap_initialized &=
5575 ~(HW_CAP_CLK_GATE | HW_CAP_HBM_SCRAMBLER_SW_RESET |
5576 HW_CAP_PDMA_MASK | HW_CAP_EDMA_MASK | HW_CAP_MME_MASK |
5577 HW_CAP_ROT_MASK);
5578 }
5579 }
5580
gaudi2_suspend(struct hl_device * hdev)5581 static int gaudi2_suspend(struct hl_device *hdev)
5582 {
5583 int rc;
5584
5585 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
5586 if (rc)
5587 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
5588
5589 return rc;
5590 }
5591
gaudi2_resume(struct hl_device * hdev)5592 static int gaudi2_resume(struct hl_device *hdev)
5593 {
5594 return gaudi2_init_iatu(hdev);
5595 }
5596
gaudi2_mmap(struct hl_device * hdev,struct vm_area_struct * vma,void * cpu_addr,dma_addr_t dma_addr,size_t size)5597 static int gaudi2_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
5598 void *cpu_addr, dma_addr_t dma_addr, size_t size)
5599 {
5600 int rc;
5601
5602 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
5603 VM_DONTCOPY | VM_NORESERVE;
5604
5605 #ifdef _HAS_DMA_MMAP_COHERENT
5606
5607 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size);
5608 if (rc)
5609 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
5610
5611 #else
5612
5613 rc = remap_pfn_range(vma, vma->vm_start,
5614 virt_to_phys(cpu_addr) >> PAGE_SHIFT,
5615 size, vma->vm_page_prot);
5616 if (rc)
5617 dev_err(hdev->dev, "remap_pfn_range error %d", rc);
5618
5619 #endif
5620
5621 return rc;
5622 }
5623
gaudi2_is_queue_enabled(struct hl_device * hdev,u32 hw_queue_id)5624 static bool gaudi2_is_queue_enabled(struct hl_device *hdev, u32 hw_queue_id)
5625 {
5626 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5627 u64 hw_cap_mask = 0;
5628 u64 hw_tpc_cap_bit = 0;
5629 u64 hw_nic_cap_bit = 0;
5630 u64 hw_test_cap_bit = 0;
5631
5632 switch (hw_queue_id) {
5633 case GAUDI2_QUEUE_ID_PDMA_0_0:
5634 case GAUDI2_QUEUE_ID_PDMA_0_1:
5635 case GAUDI2_QUEUE_ID_PDMA_1_0:
5636 hw_cap_mask = HW_CAP_PDMA_MASK;
5637 break;
5638 case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3:
5639 hw_test_cap_bit = HW_CAP_EDMA_SHIFT +
5640 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0) >> 2);
5641 break;
5642 case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3:
5643 hw_test_cap_bit = HW_CAP_EDMA_SHIFT + NUM_OF_EDMA_PER_DCORE +
5644 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0) >> 2);
5645 break;
5646 case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3:
5647 hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 2 * NUM_OF_EDMA_PER_DCORE +
5648 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0) >> 2);
5649 break;
5650 case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3:
5651 hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 3 * NUM_OF_EDMA_PER_DCORE +
5652 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0) >> 2);
5653 break;
5654
5655 case GAUDI2_QUEUE_ID_DCORE0_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE0_MME_0_3:
5656 hw_test_cap_bit = HW_CAP_MME_SHIFT;
5657 break;
5658
5659 case GAUDI2_QUEUE_ID_DCORE1_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE1_MME_0_3:
5660 hw_test_cap_bit = HW_CAP_MME_SHIFT + 1;
5661 break;
5662
5663 case GAUDI2_QUEUE_ID_DCORE2_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE2_MME_0_3:
5664 hw_test_cap_bit = HW_CAP_MME_SHIFT + 2;
5665 break;
5666
5667 case GAUDI2_QUEUE_ID_DCORE3_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE3_MME_0_3:
5668 hw_test_cap_bit = HW_CAP_MME_SHIFT + 3;
5669 break;
5670
5671 case GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_5_3:
5672 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT +
5673 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE0_TPC_0_0) >> 2);
5674
5675 /* special case where cap bit refers to the first queue id */
5676 if (!hw_tpc_cap_bit)
5677 return !!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(0));
5678 break;
5679
5680 case GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE1_TPC_5_3:
5681 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + NUM_OF_TPC_PER_DCORE +
5682 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE1_TPC_0_0) >> 2);
5683 break;
5684
5685 case GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE2_TPC_5_3:
5686 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (2 * NUM_OF_TPC_PER_DCORE) +
5687 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE2_TPC_0_0) >> 2);
5688 break;
5689
5690 case GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE3_TPC_5_3:
5691 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (3 * NUM_OF_TPC_PER_DCORE) +
5692 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE3_TPC_0_0) >> 2);
5693 break;
5694
5695 case GAUDI2_QUEUE_ID_DCORE0_TPC_6_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_6_3:
5696 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (4 * NUM_OF_TPC_PER_DCORE);
5697 break;
5698
5699 case GAUDI2_QUEUE_ID_ROT_0_0 ... GAUDI2_QUEUE_ID_ROT_1_3:
5700 hw_test_cap_bit = HW_CAP_ROT_SHIFT + ((hw_queue_id - GAUDI2_QUEUE_ID_ROT_0_0) >> 2);
5701 break;
5702
5703 case GAUDI2_QUEUE_ID_NIC_0_0 ... GAUDI2_QUEUE_ID_NIC_23_3:
5704 hw_nic_cap_bit = HW_CAP_NIC_SHIFT + ((hw_queue_id - GAUDI2_QUEUE_ID_NIC_0_0) >> 2);
5705
5706 /* special case where cap bit refers to the first queue id */
5707 if (!hw_nic_cap_bit)
5708 return !!(gaudi2->nic_hw_cap_initialized & BIT_ULL(0));
5709 break;
5710
5711 case GAUDI2_QUEUE_ID_CPU_PQ:
5712 return !!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q);
5713
5714 default:
5715 return false;
5716 }
5717
5718 if (hw_tpc_cap_bit)
5719 return !!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(hw_tpc_cap_bit));
5720
5721 if (hw_nic_cap_bit)
5722 return !!(gaudi2->nic_hw_cap_initialized & BIT_ULL(hw_nic_cap_bit));
5723
5724 if (hw_test_cap_bit)
5725 hw_cap_mask = BIT_ULL(hw_test_cap_bit);
5726
5727 return !!(gaudi2->hw_cap_initialized & hw_cap_mask);
5728 }
5729
gaudi2_is_arc_enabled(struct hl_device * hdev,u64 arc_id)5730 static bool gaudi2_is_arc_enabled(struct hl_device *hdev, u64 arc_id)
5731 {
5732 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5733
5734 switch (arc_id) {
5735 case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
5736 case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
5737 return !!(gaudi2->active_hw_arc & BIT_ULL(arc_id));
5738
5739 case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
5740 return !!(gaudi2->active_tpc_arc & BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0));
5741
5742 case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
5743 return !!(gaudi2->active_nic_arc & BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0));
5744
5745 default:
5746 return false;
5747 }
5748 }
5749
gaudi2_clr_arc_id_cap(struct hl_device * hdev,u64 arc_id)5750 static void gaudi2_clr_arc_id_cap(struct hl_device *hdev, u64 arc_id)
5751 {
5752 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5753
5754 switch (arc_id) {
5755 case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
5756 case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
5757 gaudi2->active_hw_arc &= ~(BIT_ULL(arc_id));
5758 break;
5759
5760 case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
5761 gaudi2->active_tpc_arc &= ~(BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0));
5762 break;
5763
5764 case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
5765 gaudi2->active_nic_arc &= ~(BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0));
5766 break;
5767
5768 default:
5769 return;
5770 }
5771 }
5772
gaudi2_set_arc_id_cap(struct hl_device * hdev,u64 arc_id)5773 static void gaudi2_set_arc_id_cap(struct hl_device *hdev, u64 arc_id)
5774 {
5775 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5776
5777 switch (arc_id) {
5778 case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
5779 case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
5780 gaudi2->active_hw_arc |= BIT_ULL(arc_id);
5781 break;
5782
5783 case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
5784 gaudi2->active_tpc_arc |= BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0);
5785 break;
5786
5787 case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
5788 gaudi2->active_nic_arc |= BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0);
5789 break;
5790
5791 default:
5792 return;
5793 }
5794 }
5795
gaudi2_ring_doorbell(struct hl_device * hdev,u32 hw_queue_id,u32 pi)5796 static void gaudi2_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
5797 {
5798 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
5799 u32 pq_offset, reg_base, db_reg_offset, db_value;
5800
5801 if (hw_queue_id != GAUDI2_QUEUE_ID_CPU_PQ) {
5802 /*
5803 * QMAN has 4 successive PQ_PI registers, 1 for each of the QMAN PQs.
5804 * Masking the H/W queue ID with 0x3 extracts the QMAN internal PQ
5805 * number.
5806 */
5807 pq_offset = (hw_queue_id & 0x3) * 4;
5808 reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
5809 db_reg_offset = reg_base + QM_PQ_PI_0_OFFSET + pq_offset;
5810 } else {
5811 db_reg_offset = mmCPU_IF_PF_PQ_PI;
5812 }
5813
5814 db_value = pi;
5815
5816 /* ring the doorbell */
5817 WREG32(db_reg_offset, db_value);
5818
5819 if (hw_queue_id == GAUDI2_QUEUE_ID_CPU_PQ) {
5820 /* make sure device CPU will read latest data from host */
5821 mb();
5822 WREG32(le32_to_cpu(dyn_regs->gic_host_pi_upd_irq),
5823 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_PI_UPDATE].cpu_id);
5824 }
5825 }
5826
gaudi2_pqe_write(struct hl_device * hdev,__le64 * pqe,struct hl_bd * bd)5827 static void gaudi2_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd)
5828 {
5829 __le64 *pbd = (__le64 *) bd;
5830
5831 /* The QMANs are on the host memory so a simple copy suffice */
5832 pqe[0] = pbd[0];
5833 pqe[1] = pbd[1];
5834 }
5835
gaudi2_dma_alloc_coherent(struct hl_device * hdev,size_t size,dma_addr_t * dma_handle,gfp_t flags)5836 static void *gaudi2_dma_alloc_coherent(struct hl_device *hdev, size_t size,
5837 dma_addr_t *dma_handle, gfp_t flags)
5838 {
5839 return dma_alloc_coherent(&hdev->pdev->dev, size, dma_handle, flags);
5840 }
5841
gaudi2_dma_free_coherent(struct hl_device * hdev,size_t size,void * cpu_addr,dma_addr_t dma_handle)5842 static void gaudi2_dma_free_coherent(struct hl_device *hdev, size_t size,
5843 void *cpu_addr, dma_addr_t dma_handle)
5844 {
5845 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, dma_handle);
5846 }
5847
gaudi2_send_cpu_message(struct hl_device * hdev,u32 * msg,u16 len,u32 timeout,u64 * result)5848 static int gaudi2_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len,
5849 u32 timeout, u64 *result)
5850 {
5851 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5852
5853 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) {
5854 if (result)
5855 *result = 0;
5856 return 0;
5857 }
5858
5859 if (!timeout)
5860 timeout = GAUDI2_MSG_TO_CPU_TIMEOUT_USEC;
5861
5862 return hl_fw_send_cpu_message(hdev, GAUDI2_QUEUE_ID_CPU_PQ, msg, len, timeout, result);
5863 }
5864
gaudi2_dma_pool_zalloc(struct hl_device * hdev,size_t size,gfp_t mem_flags,dma_addr_t * dma_handle)5865 static void *gaudi2_dma_pool_zalloc(struct hl_device *hdev, size_t size,
5866 gfp_t mem_flags, dma_addr_t *dma_handle)
5867 {
5868 if (size > GAUDI2_DMA_POOL_BLK_SIZE)
5869 return NULL;
5870
5871 return dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
5872 }
5873
gaudi2_dma_pool_free(struct hl_device * hdev,void * vaddr,dma_addr_t dma_addr)5874 static void gaudi2_dma_pool_free(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr)
5875 {
5876 dma_pool_free(hdev->dma_pool, vaddr, dma_addr);
5877 }
5878
gaudi2_cpu_accessible_dma_pool_alloc(struct hl_device * hdev,size_t size,dma_addr_t * dma_handle)5879 static void *gaudi2_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
5880 dma_addr_t *dma_handle)
5881 {
5882 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
5883 }
5884
gaudi2_cpu_accessible_dma_pool_free(struct hl_device * hdev,size_t size,void * vaddr)5885 static void gaudi2_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr)
5886 {
5887 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
5888 }
5889
gaudi2_dma_map_single(struct hl_device * hdev,void * addr,int len,enum dma_data_direction dir)5890 static dma_addr_t gaudi2_dma_map_single(struct hl_device *hdev, void *addr, int len,
5891 enum dma_data_direction dir)
5892 {
5893 dma_addr_t dma_addr;
5894
5895 dma_addr = dma_map_single(&hdev->pdev->dev, addr, len, dir);
5896 if (unlikely(dma_mapping_error(&hdev->pdev->dev, dma_addr)))
5897 return 0;
5898
5899 return dma_addr;
5900 }
5901
gaudi2_dma_unmap_single(struct hl_device * hdev,dma_addr_t addr,int len,enum dma_data_direction dir)5902 static void gaudi2_dma_unmap_single(struct hl_device *hdev, dma_addr_t addr, int len,
5903 enum dma_data_direction dir)
5904 {
5905 dma_unmap_single(&hdev->pdev->dev, addr, len, dir);
5906 }
5907
gaudi2_validate_cb_address(struct hl_device * hdev,struct hl_cs_parser * parser)5908 static int gaudi2_validate_cb_address(struct hl_device *hdev, struct hl_cs_parser *parser)
5909 {
5910 struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5911 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5912
5913 if (!gaudi2_is_queue_enabled(hdev, parser->hw_queue_id)) {
5914 dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
5915 return -EINVAL;
5916 }
5917
5918 /* Just check if CB address is valid */
5919
5920 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5921 parser->user_cb_size,
5922 asic_prop->sram_user_base_address,
5923 asic_prop->sram_end_address))
5924 return 0;
5925
5926 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5927 parser->user_cb_size,
5928 asic_prop->dram_user_base_address,
5929 asic_prop->dram_end_address))
5930 return 0;
5931
5932 if ((gaudi2->hw_cap_initialized & HW_CAP_DMMU_MASK) &&
5933 hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5934 parser->user_cb_size,
5935 asic_prop->dmmu.start_addr,
5936 asic_prop->dmmu.end_addr))
5937 return 0;
5938
5939 if (gaudi2->hw_cap_initialized & HW_CAP_PMMU) {
5940 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5941 parser->user_cb_size,
5942 asic_prop->pmmu.start_addr,
5943 asic_prop->pmmu.end_addr) ||
5944 hl_mem_area_inside_range(
5945 (u64) (uintptr_t) parser->user_cb,
5946 parser->user_cb_size,
5947 asic_prop->pmmu_huge.start_addr,
5948 asic_prop->pmmu_huge.end_addr))
5949 return 0;
5950
5951 } else if (gaudi2_host_phys_addr_valid((u64) (uintptr_t) parser->user_cb)) {
5952 if (!hdev->pdev)
5953 return 0;
5954
5955 if (!device_iommu_mapped(&hdev->pdev->dev))
5956 return 0;
5957 }
5958
5959 dev_err(hdev->dev, "CB address %p + 0x%x for internal QMAN is not valid\n",
5960 parser->user_cb, parser->user_cb_size);
5961
5962 return -EFAULT;
5963 }
5964
gaudi2_cs_parser(struct hl_device * hdev,struct hl_cs_parser * parser)5965 static int gaudi2_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5966 {
5967 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5968
5969 if (!parser->is_kernel_allocated_cb)
5970 return gaudi2_validate_cb_address(hdev, parser);
5971
5972 if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
5973 dev_err(hdev->dev, "PMMU not initialized - Unsupported mode in Gaudi2\n");
5974 return -EINVAL;
5975 }
5976
5977 return 0;
5978 }
5979
gaudi2_send_heartbeat(struct hl_device * hdev)5980 static int gaudi2_send_heartbeat(struct hl_device *hdev)
5981 {
5982 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5983
5984 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
5985 return 0;
5986
5987 return hl_fw_send_heartbeat(hdev);
5988 }
5989
5990 /* This is an internal helper function, used to update the KDMA mmu props.
5991 * Should be called with a proper kdma lock.
5992 */
gaudi2_kdma_set_mmbp_asid(struct hl_device * hdev,bool mmu_bypass,u32 asid)5993 static void gaudi2_kdma_set_mmbp_asid(struct hl_device *hdev,
5994 bool mmu_bypass, u32 asid)
5995 {
5996 u32 rw_asid, rw_mmu_bp;
5997
5998 rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
5999 (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
6000
6001 rw_mmu_bp = (!!mmu_bypass << ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_RD_SHIFT) |
6002 (!!mmu_bypass << ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_WR_SHIFT);
6003
6004 WREG32(mmARC_FARM_KDMA_CTX_AXUSER_HB_ASID, rw_asid);
6005 WREG32(mmARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP, rw_mmu_bp);
6006 }
6007
gaudi2_arm_cq_monitor(struct hl_device * hdev,u32 sob_id,u32 mon_id,u32 cq_id,u32 mon_payload,u32 sync_value)6008 static void gaudi2_arm_cq_monitor(struct hl_device *hdev, u32 sob_id, u32 mon_id, u32 cq_id,
6009 u32 mon_payload, u32 sync_value)
6010 {
6011 u32 sob_offset, mon_offset, sync_group_id, mode, mon_arm;
6012 u8 mask;
6013
6014 sob_offset = sob_id * 4;
6015 mon_offset = mon_id * 4;
6016
6017 /* Reset the SOB value */
6018 WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset, 0);
6019
6020 /* Configure this address with CQ_ID 0 because CQ_EN is set */
6021 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, cq_id);
6022
6023 /* Configure this address with CS index because CQ_EN is set */
6024 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, mon_payload);
6025
6026 sync_group_id = sob_id / 8;
6027 mask = ~(1 << (sob_id & 0x7));
6028 mode = 1; /* comparison mode is "equal to" */
6029
6030 mon_arm = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOD_MASK, sync_value);
6031 mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOP_MASK, mode);
6032 mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_MASK_MASK, mask);
6033 mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SID_MASK, sync_group_id);
6034 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + mon_offset, mon_arm);
6035 }
6036
6037 /* This is an internal helper function used by gaudi2_send_job_to_kdma only */
gaudi2_send_job_to_kdma(struct hl_device * hdev,u64 src_addr,u64 dst_addr,u32 size,bool is_memset)6038 static int gaudi2_send_job_to_kdma(struct hl_device *hdev,
6039 u64 src_addr, u64 dst_addr,
6040 u32 size, bool is_memset)
6041 {
6042 u32 comp_val, commit_mask, *polling_addr, timeout, status = 0;
6043 struct hl_cq_entry *cq_base;
6044 struct hl_cq *cq;
6045 u64 comp_addr;
6046 int rc;
6047
6048 gaudi2_arm_cq_monitor(hdev, GAUDI2_RESERVED_SOB_KDMA_COMPLETION,
6049 GAUDI2_RESERVED_MON_KDMA_COMPLETION,
6050 GAUDI2_RESERVED_CQ_KDMA_COMPLETION, 1, 1);
6051
6052 comp_addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6053 (GAUDI2_RESERVED_SOB_KDMA_COMPLETION * sizeof(u32));
6054
6055 comp_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1) |
6056 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1);
6057
6058 WREG32(mmARC_FARM_KDMA_CTX_SRC_BASE_LO, lower_32_bits(src_addr));
6059 WREG32(mmARC_FARM_KDMA_CTX_SRC_BASE_HI, upper_32_bits(src_addr));
6060 WREG32(mmARC_FARM_KDMA_CTX_DST_BASE_LO, lower_32_bits(dst_addr));
6061 WREG32(mmARC_FARM_KDMA_CTX_DST_BASE_HI, upper_32_bits(dst_addr));
6062 WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_ADDR_LO, lower_32_bits(comp_addr));
6063 WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_ADDR_HI, upper_32_bits(comp_addr));
6064 WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_WDATA, comp_val);
6065 WREG32(mmARC_FARM_KDMA_CTX_DST_TSIZE_0, size);
6066
6067 commit_mask = FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_LIN_MASK, 1) |
6068 FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_WR_COMP_EN_MASK, 1);
6069
6070 if (is_memset)
6071 commit_mask |= FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_MEM_SET_MASK, 1);
6072
6073 WREG32(mmARC_FARM_KDMA_CTX_COMMIT, commit_mask);
6074
6075 /* Wait for completion */
6076 cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_KDMA_COMPLETION];
6077 cq_base = cq->kernel_address;
6078 polling_addr = (u32 *)&cq_base[cq->ci];
6079
6080 if (hdev->pldm)
6081 /* for each 1MB 20 second of timeout */
6082 timeout = ((size / SZ_1M) + 1) * USEC_PER_SEC * 20;
6083 else
6084 timeout = KDMA_TIMEOUT_USEC;
6085
6086 /* Polling */
6087 rc = hl_poll_timeout_memory(
6088 hdev,
6089 polling_addr,
6090 status,
6091 (status == 1),
6092 1000,
6093 timeout,
6094 true);
6095
6096 *polling_addr = 0;
6097
6098 if (rc) {
6099 dev_err(hdev->dev, "Timeout while waiting for KDMA to be idle\n");
6100 WREG32(mmARC_FARM_KDMA_CFG_1, 1 << ARC_FARM_KDMA_CFG_1_HALT_SHIFT);
6101 return rc;
6102 }
6103
6104 cq->ci = hl_cq_inc_ptr(cq->ci);
6105
6106 return 0;
6107 }
6108
gaudi2_memset_device_lbw(struct hl_device * hdev,u32 addr,u32 size,u32 val)6109 static void gaudi2_memset_device_lbw(struct hl_device *hdev, u32 addr, u32 size, u32 val)
6110 {
6111 u32 i;
6112
6113 for (i = 0 ; i < size ; i += sizeof(u32))
6114 WREG32(addr + i, val);
6115 }
6116
gaudi2_qman_set_test_mode(struct hl_device * hdev,u32 hw_queue_id,bool enable)6117 static void gaudi2_qman_set_test_mode(struct hl_device *hdev, u32 hw_queue_id, bool enable)
6118 {
6119 u32 reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
6120
6121 if (enable) {
6122 WREG32(reg_base + QM_GLBL_PROT_OFFSET, QMAN_MAKE_TRUSTED_TEST_MODE);
6123 WREG32(reg_base + QM_PQC_CFG_OFFSET, 0);
6124 } else {
6125 WREG32(reg_base + QM_GLBL_PROT_OFFSET, QMAN_MAKE_TRUSTED);
6126 WREG32(reg_base + QM_PQC_CFG_OFFSET, 1 << PDMA0_QM_PQC_CFG_EN_SHIFT);
6127 }
6128 }
6129
gaudi2_test_queue(struct hl_device * hdev,u32 hw_queue_id)6130 static int gaudi2_test_queue(struct hl_device *hdev, u32 hw_queue_id)
6131 {
6132 u32 sob_offset = hdev->asic_prop.first_available_user_sob[0] * 4;
6133 u32 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
6134 u32 timeout_usec, tmp, sob_base = 1, sob_val = 0x5a5a;
6135 struct packet_msg_short *msg_short_pkt;
6136 dma_addr_t pkt_dma_addr;
6137 size_t pkt_size;
6138 int rc;
6139
6140 if (hdev->pldm)
6141 timeout_usec = GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC;
6142 else
6143 timeout_usec = GAUDI2_TEST_QUEUE_WAIT_USEC;
6144
6145 pkt_size = sizeof(*msg_short_pkt);
6146 msg_short_pkt = hl_asic_dma_pool_zalloc(hdev, pkt_size, GFP_KERNEL, &pkt_dma_addr);
6147 if (!msg_short_pkt) {
6148 dev_err(hdev->dev, "Failed to allocate packet for H/W queue %d testing\n",
6149 hw_queue_id);
6150 return -ENOMEM;
6151 }
6152
6153 tmp = (PACKET_MSG_SHORT << GAUDI2_PKT_CTL_OPCODE_SHIFT) |
6154 (1 << GAUDI2_PKT_CTL_EB_SHIFT) |
6155 (1 << GAUDI2_PKT_CTL_MB_SHIFT) |
6156 (sob_base << GAUDI2_PKT_SHORT_CTL_BASE_SHIFT) |
6157 (sob_offset << GAUDI2_PKT_SHORT_CTL_ADDR_SHIFT);
6158
6159 msg_short_pkt->value = cpu_to_le32(sob_val);
6160 msg_short_pkt->ctl = cpu_to_le32(tmp);
6161
6162 /* Reset the SOB value */
6163 WREG32(sob_addr, 0);
6164
6165 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, pkt_dma_addr);
6166 if (rc) {
6167 dev_err(hdev->dev, "Failed to send msg_short packet to H/W queue %d\n",
6168 hw_queue_id);
6169 goto free_pkt;
6170 }
6171
6172 rc = hl_poll_timeout(
6173 hdev,
6174 sob_addr,
6175 tmp,
6176 (tmp == sob_val),
6177 1000,
6178 timeout_usec);
6179
6180 if (rc == -ETIMEDOUT) {
6181 dev_err(hdev->dev, "H/W queue %d test failed (SOB_OBJ_0 == 0x%x)\n",
6182 hw_queue_id, tmp);
6183 rc = -EIO;
6184 }
6185
6186 /* Reset the SOB value */
6187 WREG32(sob_addr, 0);
6188
6189 free_pkt:
6190 hl_asic_dma_pool_free(hdev, (void *) msg_short_pkt, pkt_dma_addr);
6191 return rc;
6192 }
6193
gaudi2_test_cpu_queue(struct hl_device * hdev)6194 static int gaudi2_test_cpu_queue(struct hl_device *hdev)
6195 {
6196 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6197
6198 /*
6199 * check capability here as send_cpu_message() won't update the result
6200 * value if no capability
6201 */
6202 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
6203 return 0;
6204
6205 return hl_fw_test_cpu_queue(hdev);
6206 }
6207
gaudi2_test_queues(struct hl_device * hdev)6208 static int gaudi2_test_queues(struct hl_device *hdev)
6209 {
6210 int i, rc, ret_val = 0;
6211
6212 for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ; i++) {
6213 if (!gaudi2_is_queue_enabled(hdev, i))
6214 continue;
6215
6216 gaudi2_qman_set_test_mode(hdev, i, true);
6217 rc = gaudi2_test_queue(hdev, i);
6218 gaudi2_qman_set_test_mode(hdev, i, false);
6219
6220 if (rc) {
6221 ret_val = -EINVAL;
6222 goto done;
6223 }
6224 }
6225
6226 rc = gaudi2_test_cpu_queue(hdev);
6227 if (rc) {
6228 ret_val = -EINVAL;
6229 goto done;
6230 }
6231
6232 done:
6233 return ret_val;
6234 }
6235
gaudi2_compute_reset_late_init(struct hl_device * hdev)6236 static int gaudi2_compute_reset_late_init(struct hl_device *hdev)
6237 {
6238 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6239 size_t irq_arr_size;
6240
6241 /* TODO: missing gaudi2_nic_resume.
6242 * Until implemented nic_hw_cap_initialized will remain zeroed
6243 */
6244 gaudi2_init_arcs(hdev);
6245 gaudi2_scrub_arcs_dccm(hdev);
6246 gaudi2_init_security(hdev);
6247
6248 /* Unmask all IRQs since some could have been received during the soft reset */
6249 irq_arr_size = gaudi2->num_of_valid_hw_events * sizeof(gaudi2->hw_events[0]);
6250 return hl_fw_unmask_irq_arr(hdev, gaudi2->hw_events, irq_arr_size);
6251 }
6252
gaudi2_is_tpc_engine_idle(struct hl_device * hdev,int dcore,int inst,u32 offset,struct iterate_module_ctx * ctx)6253 static void gaudi2_is_tpc_engine_idle(struct hl_device *hdev, int dcore, int inst, u32 offset,
6254 struct iterate_module_ctx *ctx)
6255 {
6256 struct gaudi2_tpc_idle_data *idle_data = ctx->data;
6257 u32 tpc_cfg_sts, qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts;
6258 bool is_eng_idle;
6259 int engine_idx;
6260
6261 if ((dcore == 0) && (inst == (NUM_DCORE0_TPC - 1)))
6262 engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_6;
6263 else
6264 engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_0 +
6265 dcore * GAUDI2_ENGINE_ID_DCORE_OFFSET + inst;
6266
6267 tpc_cfg_sts = RREG32(mmDCORE0_TPC0_CFG_STATUS + offset);
6268 qm_glbl_sts0 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS0 + offset);
6269 qm_glbl_sts1 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS1 + offset);
6270 qm_cgm_sts = RREG32(mmDCORE0_TPC0_QM_CGM_STS + offset);
6271
6272 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
6273 IS_TPC_IDLE(tpc_cfg_sts);
6274 *(idle_data->is_idle) &= is_eng_idle;
6275
6276 if (idle_data->mask && !is_eng_idle)
6277 set_bit(engine_idx, idle_data->mask);
6278
6279 if (idle_data->e)
6280 hl_engine_data_sprintf(idle_data->e,
6281 idle_data->tpc_fmt, dcore, inst,
6282 is_eng_idle ? "Y" : "N",
6283 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
6284 }
6285
gaudi2_is_device_idle(struct hl_device * hdev,u64 * mask_arr,u8 mask_len,struct engines_data * e)6286 static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
6287 struct engines_data *e)
6288 {
6289 u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, dma_core_idle_ind_mask,
6290 mme_arch_sts, dec_swreg15, dec_enabled_bit;
6291 struct asic_fixed_properties *prop = &hdev->asic_prop;
6292 const char *rot_fmt = "%-6d%-5d%-9s%#-14x%#-12x%s\n";
6293 unsigned long *mask = (unsigned long *) mask_arr;
6294 const char *edma_fmt = "%-6d%-6d%-9s%#-14x%#x\n";
6295 const char *mme_fmt = "%-5d%-6s%-9s%#-14x%#x\n";
6296 const char *nic_fmt = "%-5d%-9s%#-14x%#-12x\n";
6297 const char *pdma_fmt = "%-6d%-9s%#-14x%#x\n";
6298 const char *pcie_dec_fmt = "%-10d%-9s%#x\n";
6299 const char *dec_fmt = "%-6d%-5d%-9s%#x\n";
6300 bool is_idle = true, is_eng_idle;
6301 u64 offset;
6302
6303 struct gaudi2_tpc_idle_data tpc_idle_data = {
6304 .tpc_fmt = "%-6d%-5d%-9s%#-14x%#-12x%#x\n",
6305 .e = e,
6306 .mask = mask,
6307 .is_idle = &is_idle,
6308 };
6309 struct iterate_module_ctx tpc_iter = {
6310 .fn = &gaudi2_is_tpc_engine_idle,
6311 .data = &tpc_idle_data,
6312 };
6313
6314 int engine_idx, i, j;
6315
6316 /* EDMA, Two engines per Dcore */
6317 if (e)
6318 hl_engine_data_sprintf(e,
6319 "\nCORE EDMA is_idle QM_GLBL_STS0 DMA_CORE_IDLE_IND_MASK\n"
6320 "---- ---- ------- ------------ ----------------------\n");
6321
6322 for (i = 0; i < NUM_OF_DCORES; i++) {
6323 for (j = 0 ; j < NUM_OF_EDMA_PER_DCORE ; j++) {
6324 int seq = i * NUM_OF_EDMA_PER_DCORE + j;
6325
6326 if (!(prop->edma_enabled_mask & BIT(seq)))
6327 continue;
6328
6329 engine_idx = GAUDI2_DCORE0_ENGINE_ID_EDMA_0 +
6330 i * GAUDI2_ENGINE_ID_DCORE_OFFSET + j;
6331 offset = i * DCORE_OFFSET + j * DCORE_EDMA_OFFSET;
6332
6333 dma_core_idle_ind_mask =
6334 RREG32(mmDCORE0_EDMA0_CORE_IDLE_IND_MASK + offset);
6335
6336 qm_glbl_sts0 = RREG32(mmDCORE0_EDMA0_QM_GLBL_STS0 + offset);
6337 qm_glbl_sts1 = RREG32(mmDCORE0_EDMA0_QM_GLBL_STS1 + offset);
6338 qm_cgm_sts = RREG32(mmDCORE0_EDMA0_QM_CGM_STS + offset);
6339
6340 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
6341 IS_DMA_IDLE(dma_core_idle_ind_mask);
6342 is_idle &= is_eng_idle;
6343
6344 if (mask && !is_eng_idle)
6345 set_bit(engine_idx, mask);
6346
6347 if (e)
6348 hl_engine_data_sprintf(e, edma_fmt, i, j,
6349 is_eng_idle ? "Y" : "N",
6350 qm_glbl_sts0,
6351 dma_core_idle_ind_mask);
6352 }
6353 }
6354
6355 /* PDMA, Two engines in Full chip */
6356 if (e)
6357 hl_engine_data_sprintf(e,
6358 "\nPDMA is_idle QM_GLBL_STS0 DMA_CORE_IDLE_IND_MASK\n"
6359 "---- ------- ------------ ----------------------\n");
6360
6361 for (i = 0 ; i < NUM_OF_PDMA ; i++) {
6362 engine_idx = GAUDI2_ENGINE_ID_PDMA_0 + i;
6363 offset = i * PDMA_OFFSET;
6364 dma_core_idle_ind_mask = RREG32(mmPDMA0_CORE_IDLE_IND_MASK + offset);
6365
6366 qm_glbl_sts0 = RREG32(mmPDMA0_QM_GLBL_STS0 + offset);
6367 qm_glbl_sts1 = RREG32(mmPDMA0_QM_GLBL_STS1 + offset);
6368 qm_cgm_sts = RREG32(mmPDMA0_QM_CGM_STS + offset);
6369
6370 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
6371 IS_DMA_IDLE(dma_core_idle_ind_mask);
6372 is_idle &= is_eng_idle;
6373
6374 if (mask && !is_eng_idle)
6375 set_bit(engine_idx, mask);
6376
6377 if (e)
6378 hl_engine_data_sprintf(e, pdma_fmt, i, is_eng_idle ? "Y" : "N",
6379 qm_glbl_sts0, dma_core_idle_ind_mask);
6380 }
6381
6382 /* NIC, twelve macros in Full chip */
6383 if (e && hdev->nic_ports_mask)
6384 hl_engine_data_sprintf(e,
6385 "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n"
6386 "--- ------- ------------ ----------\n");
6387
6388 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
6389 if (!(i & 1))
6390 offset = i / 2 * NIC_OFFSET;
6391 else
6392 offset += NIC_QM_OFFSET;
6393
6394 if (!(hdev->nic_ports_mask & BIT(i)))
6395 continue;
6396
6397 engine_idx = GAUDI2_ENGINE_ID_NIC0_0 + i;
6398
6399
6400 qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
6401 qm_glbl_sts1 = RREG32(mmNIC0_QM0_GLBL_STS1 + offset);
6402 qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
6403
6404 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
6405 is_idle &= is_eng_idle;
6406
6407 if (mask && !is_eng_idle)
6408 set_bit(engine_idx, mask);
6409
6410 if (e)
6411 hl_engine_data_sprintf(e, nic_fmt, i, is_eng_idle ? "Y" : "N",
6412 qm_glbl_sts0, qm_cgm_sts);
6413 }
6414
6415 if (e)
6416 hl_engine_data_sprintf(e,
6417 "\nMME Stub is_idle QM_GLBL_STS0 MME_ARCH_STATUS\n"
6418 "--- ---- ------- ------------ ---------------\n");
6419 /* MME, one per Dcore */
6420 for (i = 0 ; i < NUM_OF_DCORES ; i++) {
6421 engine_idx = GAUDI2_DCORE0_ENGINE_ID_MME + i * GAUDI2_ENGINE_ID_DCORE_OFFSET;
6422 offset = i * DCORE_OFFSET;
6423
6424 qm_glbl_sts0 = RREG32(mmDCORE0_MME_QM_GLBL_STS0 + offset);
6425 qm_glbl_sts1 = RREG32(mmDCORE0_MME_QM_GLBL_STS1 + offset);
6426 qm_cgm_sts = RREG32(mmDCORE0_MME_QM_CGM_STS + offset);
6427
6428 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
6429 is_idle &= is_eng_idle;
6430
6431 mme_arch_sts = RREG32(mmDCORE0_MME_CTRL_LO_ARCH_STATUS + offset);
6432 is_eng_idle &= IS_MME_IDLE(mme_arch_sts);
6433 is_idle &= is_eng_idle;
6434
6435 if (e)
6436 hl_engine_data_sprintf(e, mme_fmt, i, "N",
6437 is_eng_idle ? "Y" : "N",
6438 qm_glbl_sts0,
6439 mme_arch_sts);
6440
6441 if (mask && !is_eng_idle)
6442 set_bit(engine_idx, mask);
6443 }
6444
6445 /*
6446 * TPC
6447 */
6448 if (e && prop->tpc_enabled_mask)
6449 hl_engine_data_sprintf(e,
6450 "\nCORE TPC is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_IDLE_IND_MASK\n"
6451 "---- --- -------- ------------ ---------- ----------------------\n");
6452
6453 gaudi2_iterate_tpcs(hdev, &tpc_iter);
6454
6455 /* Decoders, two each Dcore and two shared PCIe decoders */
6456 if (e && (prop->decoder_enabled_mask & (~PCIE_DEC_EN_MASK)))
6457 hl_engine_data_sprintf(e,
6458 "\nCORE DEC is_idle VSI_CMD_SWREG15\n"
6459 "---- --- ------- ---------------\n");
6460
6461 for (i = 0 ; i < NUM_OF_DCORES ; i++) {
6462 for (j = 0 ; j < NUM_OF_DEC_PER_DCORE ; j++) {
6463 dec_enabled_bit = 1 << (i * NUM_OF_DEC_PER_DCORE + j);
6464 if (!(prop->decoder_enabled_mask & dec_enabled_bit))
6465 continue;
6466
6467 engine_idx = GAUDI2_DCORE0_ENGINE_ID_DEC_0 +
6468 i * GAUDI2_ENGINE_ID_DCORE_OFFSET + j;
6469 offset = i * DCORE_OFFSET + j * DCORE_DEC_OFFSET;
6470
6471 dec_swreg15 = RREG32(mmDCORE0_DEC0_CMD_SWREG15 + offset);
6472 is_eng_idle = IS_DEC_IDLE(dec_swreg15);
6473 is_idle &= is_eng_idle;
6474
6475 if (mask && !is_eng_idle)
6476 set_bit(engine_idx, mask);
6477
6478 if (e)
6479 hl_engine_data_sprintf(e, dec_fmt, i, j,
6480 is_eng_idle ? "Y" : "N", dec_swreg15);
6481 }
6482 }
6483
6484 if (e && (prop->decoder_enabled_mask & PCIE_DEC_EN_MASK))
6485 hl_engine_data_sprintf(e,
6486 "\nPCIe DEC is_idle VSI_CMD_SWREG15\n"
6487 "-------- ------- ---------------\n");
6488
6489 /* Check shared(PCIe) decoders */
6490 for (i = 0 ; i < NUM_OF_DEC_PER_DCORE ; i++) {
6491 dec_enabled_bit = PCIE_DEC_SHIFT + i;
6492 if (!(prop->decoder_enabled_mask & BIT(dec_enabled_bit)))
6493 continue;
6494
6495 engine_idx = GAUDI2_PCIE_ENGINE_ID_DEC_0 + i;
6496 offset = i * DCORE_DEC_OFFSET;
6497 dec_swreg15 = RREG32(mmPCIE_DEC0_CMD_SWREG15 + offset);
6498 is_eng_idle = IS_DEC_IDLE(dec_swreg15);
6499 is_idle &= is_eng_idle;
6500
6501 if (mask && !is_eng_idle)
6502 set_bit(engine_idx, mask);
6503
6504 if (e)
6505 hl_engine_data_sprintf(e, pcie_dec_fmt, i,
6506 is_eng_idle ? "Y" : "N", dec_swreg15);
6507 }
6508
6509 if (e)
6510 hl_engine_data_sprintf(e,
6511 "\nCORE ROT is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n"
6512 "---- ---- ------- ------------ ---------- -------------\n");
6513
6514 for (i = 0 ; i < NUM_OF_ROT ; i++) {
6515 engine_idx = GAUDI2_ENGINE_ID_ROT_0 + i;
6516
6517 offset = i * ROT_OFFSET;
6518
6519 qm_glbl_sts0 = RREG32(mmROT0_QM_GLBL_STS0 + offset);
6520 qm_glbl_sts1 = RREG32(mmROT0_QM_GLBL_STS1 + offset);
6521 qm_cgm_sts = RREG32(mmROT0_QM_CGM_STS + offset);
6522
6523 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
6524 is_idle &= is_eng_idle;
6525
6526 if (mask && !is_eng_idle)
6527 set_bit(engine_idx, mask);
6528
6529 if (e)
6530 hl_engine_data_sprintf(e, rot_fmt, i, 0, is_eng_idle ? "Y" : "N",
6531 qm_glbl_sts0, qm_cgm_sts, "-");
6532 }
6533
6534 return is_idle;
6535 }
6536
gaudi2_hw_queues_lock(struct hl_device * hdev)6537 static void gaudi2_hw_queues_lock(struct hl_device *hdev)
6538 __acquires(&gaudi2->hw_queues_lock)
6539 {
6540 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6541
6542 spin_lock(&gaudi2->hw_queues_lock);
6543 }
6544
gaudi2_hw_queues_unlock(struct hl_device * hdev)6545 static void gaudi2_hw_queues_unlock(struct hl_device *hdev)
6546 __releases(&gaudi2->hw_queues_lock)
6547 {
6548 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6549
6550 spin_unlock(&gaudi2->hw_queues_lock);
6551 }
6552
gaudi2_get_pci_id(struct hl_device * hdev)6553 static u32 gaudi2_get_pci_id(struct hl_device *hdev)
6554 {
6555 return hdev->pdev->device;
6556 }
6557
gaudi2_get_eeprom_data(struct hl_device * hdev,void * data,size_t max_size)6558 static int gaudi2_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
6559 {
6560 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6561
6562 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
6563 return 0;
6564
6565 return hl_fw_get_eeprom_data(hdev, data, max_size);
6566 }
6567
gaudi2_update_eq_ci(struct hl_device * hdev,u32 val)6568 static void gaudi2_update_eq_ci(struct hl_device *hdev, u32 val)
6569 {
6570 WREG32(mmCPU_IF_EQ_RD_OFFS, val);
6571 }
6572
gaudi2_get_events_stat(struct hl_device * hdev,bool aggregate,u32 * size)6573 static void *gaudi2_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
6574 {
6575 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6576
6577 if (aggregate) {
6578 *size = (u32) sizeof(gaudi2->events_stat_aggregate);
6579 return gaudi2->events_stat_aggregate;
6580 }
6581
6582 *size = (u32) sizeof(gaudi2->events_stat);
6583 return gaudi2->events_stat;
6584 }
6585
gaudi2_mmu_vdec_dcore_prepare(struct hl_device * hdev,int dcore_id,int dcore_vdec_id,u32 rw_asid,u32 rw_mmu_bp)6586 static void gaudi2_mmu_vdec_dcore_prepare(struct hl_device *hdev, int dcore_id,
6587 int dcore_vdec_id, u32 rw_asid, u32 rw_mmu_bp)
6588 {
6589 u32 offset = (mmDCORE0_VDEC1_BRDG_CTRL_BASE - mmDCORE0_VDEC0_BRDG_CTRL_BASE) *
6590 dcore_vdec_id + DCORE_OFFSET * dcore_id;
6591
6592 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_MMU_BP + offset, rw_mmu_bp);
6593 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_ASID + offset, rw_asid);
6594
6595 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_MMU_BP + offset, rw_mmu_bp);
6596 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_ASID + offset, rw_asid);
6597
6598 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_MMU_BP + offset, rw_mmu_bp);
6599 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_ASID + offset, rw_asid);
6600
6601 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_MMU_BP + offset, rw_mmu_bp);
6602 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_ASID + offset, rw_asid);
6603
6604 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_MMU_BP + offset, rw_mmu_bp);
6605 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_ASID + offset, rw_asid);
6606 }
6607
gaudi2_mmu_dcore_prepare(struct hl_device * hdev,int dcore_id,u32 asid)6608 static void gaudi2_mmu_dcore_prepare(struct hl_device *hdev, int dcore_id, u32 asid)
6609 {
6610 u32 rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
6611 (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
6612 struct asic_fixed_properties *prop = &hdev->asic_prop;
6613 u32 dcore_offset = dcore_id * DCORE_OFFSET;
6614 u32 vdec_id, i, ports_offset, reg_val;
6615 u8 edma_seq_base;
6616
6617 /* EDMA */
6618 edma_seq_base = dcore_id * NUM_OF_EDMA_PER_DCORE;
6619 if (prop->edma_enabled_mask & BIT(edma_seq_base)) {
6620 WREG32(mmDCORE0_EDMA0_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
6621 WREG32(mmDCORE0_EDMA0_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
6622 WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + dcore_offset, 0);
6623 WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_ASID + dcore_offset, rw_asid);
6624 }
6625
6626 if (prop->edma_enabled_mask & BIT(edma_seq_base + 1)) {
6627 WREG32(mmDCORE0_EDMA1_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
6628 WREG32(mmDCORE0_EDMA1_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
6629 WREG32(mmDCORE0_EDMA1_CORE_CTX_AXUSER_HB_ASID + dcore_offset, rw_asid);
6630 WREG32(mmDCORE0_EDMA1_CORE_CTX_AXUSER_HB_MMU_BP + dcore_offset, 0);
6631 }
6632
6633 /* Sync Mngr */
6634 WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_NONE_SEC_PRIV + dcore_offset, asid);
6635 /*
6636 * Sync Mngrs on dcores 1 - 3 are exposed to user, so must use user ASID
6637 * for any access type
6638 */
6639 if (dcore_id > 0) {
6640 reg_val = (asid << DCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID_RD_SHIFT) |
6641 (asid << DCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID_WR_SHIFT);
6642 WREG32(mmDCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID + dcore_offset, reg_val);
6643 WREG32(mmDCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_MMU_BP + dcore_offset, 0);
6644 }
6645
6646 WREG32(mmDCORE0_MME_CTRL_LO_MME_AXUSER_HB_MMU_BP + dcore_offset, 0);
6647 WREG32(mmDCORE0_MME_CTRL_LO_MME_AXUSER_HB_ASID + dcore_offset, rw_asid);
6648
6649 for (i = 0 ; i < NUM_OF_MME_SBTE_PORTS ; i++) {
6650 ports_offset = i * DCORE_MME_SBTE_OFFSET;
6651 WREG32(mmDCORE0_MME_SBTE0_MSTR_IF_AXUSER_HB_MMU_BP +
6652 dcore_offset + ports_offset, 0);
6653 WREG32(mmDCORE0_MME_SBTE0_MSTR_IF_AXUSER_HB_ASID +
6654 dcore_offset + ports_offset, rw_asid);
6655 }
6656
6657 for (i = 0 ; i < NUM_OF_MME_WB_PORTS ; i++) {
6658 ports_offset = i * DCORE_MME_WB_OFFSET;
6659 WREG32(mmDCORE0_MME_WB0_MSTR_IF_AXUSER_HB_MMU_BP +
6660 dcore_offset + ports_offset, 0);
6661 WREG32(mmDCORE0_MME_WB0_MSTR_IF_AXUSER_HB_ASID +
6662 dcore_offset + ports_offset, rw_asid);
6663 }
6664
6665 WREG32(mmDCORE0_MME_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
6666 WREG32(mmDCORE0_MME_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
6667
6668 /*
6669 * Decoders
6670 */
6671 for (vdec_id = 0 ; vdec_id < NUM_OF_DEC_PER_DCORE ; vdec_id++) {
6672 if (prop->decoder_enabled_mask & BIT(dcore_id * NUM_OF_DEC_PER_DCORE + vdec_id))
6673 gaudi2_mmu_vdec_dcore_prepare(hdev, dcore_id, vdec_id, rw_asid, 0);
6674 }
6675 }
6676
gudi2_mmu_vdec_shared_prepare(struct hl_device * hdev,int shared_vdec_id,u32 rw_asid,u32 rw_mmu_bp)6677 static void gudi2_mmu_vdec_shared_prepare(struct hl_device *hdev,
6678 int shared_vdec_id, u32 rw_asid, u32 rw_mmu_bp)
6679 {
6680 u32 offset = (mmPCIE_VDEC1_BRDG_CTRL_BASE - mmPCIE_VDEC0_BRDG_CTRL_BASE) * shared_vdec_id;
6681
6682 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_MMU_BP + offset, rw_mmu_bp);
6683 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_ASID + offset, rw_asid);
6684
6685 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_MMU_BP + offset, rw_mmu_bp);
6686 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_ASID + offset, rw_asid);
6687
6688 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_MMU_BP + offset, rw_mmu_bp);
6689 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_ASID + offset, rw_asid);
6690
6691 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_MMU_BP + offset, rw_mmu_bp);
6692 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_ASID + offset, rw_asid);
6693
6694 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_MMU_BP + offset, rw_mmu_bp);
6695 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_ASID + offset, rw_asid);
6696 }
6697
gudi2_mmu_arc_farm_arc_dup_eng_prepare(struct hl_device * hdev,int arc_farm_id,u32 rw_asid,u32 rw_mmu_bp)6698 static void gudi2_mmu_arc_farm_arc_dup_eng_prepare(struct hl_device *hdev, int arc_farm_id,
6699 u32 rw_asid, u32 rw_mmu_bp)
6700 {
6701 u32 offset = (mmARC_FARM_ARC1_DUP_ENG_BASE - mmARC_FARM_ARC0_DUP_ENG_BASE) * arc_farm_id;
6702
6703 WREG32(mmARC_FARM_ARC0_DUP_ENG_AXUSER_HB_MMU_BP + offset, rw_mmu_bp);
6704 WREG32(mmARC_FARM_ARC0_DUP_ENG_AXUSER_HB_ASID + offset, rw_asid);
6705 }
6706
gaudi2_arc_mmu_prepare(struct hl_device * hdev,u32 cpu_id,u32 asid)6707 static void gaudi2_arc_mmu_prepare(struct hl_device *hdev, u32 cpu_id, u32 asid)
6708 {
6709 u32 reg_base, reg_offset, reg_val = 0;
6710
6711 reg_base = gaudi2_arc_blocks_bases[cpu_id];
6712
6713 /* Enable MMU and configure asid for all relevant ARC regions */
6714 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_ARC_REGION_CFG_MMU_BP_MASK, 0);
6715 reg_val |= FIELD_PREP(ARC_FARM_ARC0_AUX_ARC_REGION_CFG_0_ASID_MASK, asid);
6716
6717 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION3_GENERAL);
6718 WREG32(reg_base + reg_offset, reg_val);
6719
6720 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION4_HBM0_FW);
6721 WREG32(reg_base + reg_offset, reg_val);
6722
6723 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION5_HBM1_GC_DATA);
6724 WREG32(reg_base + reg_offset, reg_val);
6725
6726 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION6_HBM2_GC_DATA);
6727 WREG32(reg_base + reg_offset, reg_val);
6728
6729 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION7_HBM3_GC_DATA);
6730 WREG32(reg_base + reg_offset, reg_val);
6731
6732 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION9_PCIE);
6733 WREG32(reg_base + reg_offset, reg_val);
6734
6735 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION10_GENERAL);
6736 WREG32(reg_base + reg_offset, reg_val);
6737
6738 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION11_GENERAL);
6739 WREG32(reg_base + reg_offset, reg_val);
6740
6741 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION12_GENERAL);
6742 WREG32(reg_base + reg_offset, reg_val);
6743
6744 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION13_GENERAL);
6745 WREG32(reg_base + reg_offset, reg_val);
6746
6747 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION14_GENERAL);
6748 WREG32(reg_base + reg_offset, reg_val);
6749 }
6750
gaudi2_arc_mmu_prepare_all(struct hl_device * hdev,u32 asid)6751 static int gaudi2_arc_mmu_prepare_all(struct hl_device *hdev, u32 asid)
6752 {
6753 int i;
6754
6755 if (hdev->fw_components & FW_TYPE_BOOT_CPU)
6756 return hl_fw_cpucp_engine_core_asid_set(hdev, asid);
6757
6758 for (i = CPU_ID_SCHED_ARC0 ; i < NUM_OF_ARC_FARMS_ARC ; i++)
6759 gaudi2_arc_mmu_prepare(hdev, i, asid);
6760
6761 for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i += 4) {
6762 if (!gaudi2_is_queue_enabled(hdev, i))
6763 continue;
6764
6765 gaudi2_arc_mmu_prepare(hdev, gaudi2_queue_id_to_arc_id[i], asid);
6766 }
6767
6768 return 0;
6769 }
6770
gaudi2_mmu_shared_prepare(struct hl_device * hdev,u32 asid)6771 static int gaudi2_mmu_shared_prepare(struct hl_device *hdev, u32 asid)
6772 {
6773 struct asic_fixed_properties *prop = &hdev->asic_prop;
6774 u32 rw_asid, offset;
6775 int rc, i;
6776
6777 rw_asid = FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_MASK, asid) |
6778 FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_MASK, asid);
6779
6780 WREG32(mmPDMA0_QM_AXUSER_NONSECURED_HB_ASID, rw_asid);
6781 WREG32(mmPDMA0_QM_AXUSER_NONSECURED_HB_MMU_BP, 0);
6782 WREG32(mmPDMA0_CORE_CTX_AXUSER_HB_ASID, rw_asid);
6783 WREG32(mmPDMA0_CORE_CTX_AXUSER_HB_MMU_BP, 0);
6784
6785 WREG32(mmPDMA1_QM_AXUSER_NONSECURED_HB_ASID, rw_asid);
6786 WREG32(mmPDMA1_QM_AXUSER_NONSECURED_HB_MMU_BP, 0);
6787 WREG32(mmPDMA1_CORE_CTX_AXUSER_HB_ASID, rw_asid);
6788 WREG32(mmPDMA1_CORE_CTX_AXUSER_HB_MMU_BP, 0);
6789
6790 /* ROT */
6791 for (i = 0 ; i < NUM_OF_ROT ; i++) {
6792 offset = i * ROT_OFFSET;
6793 WREG32(mmROT0_QM_AXUSER_NONSECURED_HB_ASID + offset, rw_asid);
6794 WREG32(mmROT0_QM_AXUSER_NONSECURED_HB_MMU_BP + offset, 0);
6795 RMWREG32(mmROT0_CPL_QUEUE_AWUSER + offset, asid, MMUBP_ASID_MASK);
6796 RMWREG32(mmROT0_DESC_HBW_ARUSER_LO + offset, asid, MMUBP_ASID_MASK);
6797 RMWREG32(mmROT0_DESC_HBW_AWUSER_LO + offset, asid, MMUBP_ASID_MASK);
6798 }
6799
6800 /* Shared Decoders are the last bits in the decoders mask */
6801 if (prop->decoder_enabled_mask & BIT(NUM_OF_DCORES * NUM_OF_DEC_PER_DCORE + 0))
6802 gudi2_mmu_vdec_shared_prepare(hdev, 0, rw_asid, 0);
6803
6804 if (prop->decoder_enabled_mask & BIT(NUM_OF_DCORES * NUM_OF_DEC_PER_DCORE + 1))
6805 gudi2_mmu_vdec_shared_prepare(hdev, 1, rw_asid, 0);
6806
6807 /* arc farm arc dup eng */
6808 for (i = 0 ; i < NUM_OF_ARC_FARMS_ARC ; i++)
6809 gudi2_mmu_arc_farm_arc_dup_eng_prepare(hdev, i, rw_asid, 0);
6810
6811 rc = gaudi2_arc_mmu_prepare_all(hdev, asid);
6812 if (rc)
6813 return rc;
6814
6815 return 0;
6816 }
6817
gaudi2_tpc_mmu_prepare(struct hl_device * hdev,int dcore,int inst,u32 offset,struct iterate_module_ctx * ctx)6818 static void gaudi2_tpc_mmu_prepare(struct hl_device *hdev, int dcore, int inst, u32 offset,
6819 struct iterate_module_ctx *ctx)
6820 {
6821 struct gaudi2_tpc_mmu_data *mmu_data = ctx->data;
6822
6823 WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_MMU_BP + offset, 0);
6824 WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_ASID + offset, mmu_data->rw_asid);
6825 WREG32(mmDCORE0_TPC0_QM_AXUSER_NONSECURED_HB_MMU_BP + offset, 0);
6826 WREG32(mmDCORE0_TPC0_QM_AXUSER_NONSECURED_HB_ASID + offset, mmu_data->rw_asid);
6827 }
6828
6829 /* zero the MMUBP and set the ASID */
gaudi2_mmu_prepare(struct hl_device * hdev,u32 asid)6830 static int gaudi2_mmu_prepare(struct hl_device *hdev, u32 asid)
6831 {
6832 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6833 struct gaudi2_tpc_mmu_data tpc_mmu_data;
6834 struct iterate_module_ctx tpc_iter = {
6835 .fn = &gaudi2_tpc_mmu_prepare,
6836 .data = &tpc_mmu_data,
6837 };
6838 int rc, i;
6839
6840 if (asid & ~DCORE0_HMMU0_STLB_ASID_ASID_MASK) {
6841 dev_crit(hdev->dev, "asid %u is too big\n", asid);
6842 return -EINVAL;
6843 }
6844
6845 if (!(gaudi2->hw_cap_initialized & HW_CAP_MMU_MASK))
6846 return 0;
6847
6848 rc = gaudi2_mmu_shared_prepare(hdev, asid);
6849 if (rc)
6850 return rc;
6851
6852 /* configure DCORE MMUs */
6853 tpc_mmu_data.rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
6854 (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
6855 gaudi2_iterate_tpcs(hdev, &tpc_iter);
6856 for (i = 0 ; i < NUM_OF_DCORES ; i++)
6857 gaudi2_mmu_dcore_prepare(hdev, i, asid);
6858
6859 return 0;
6860 }
6861
is_info_event(u32 event)6862 static inline bool is_info_event(u32 event)
6863 {
6864 switch (event) {
6865 case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE:
6866 return true;
6867 default:
6868 return false;
6869 }
6870 }
6871
gaudi2_print_irq_info(struct hl_device * hdev,u16 event_type)6872 static void gaudi2_print_irq_info(struct hl_device *hdev, u16 event_type)
6873 {
6874 char desc[64] = "";
6875 bool event_valid = false;
6876
6877 /* return in case of NIC status event - these events are received periodically and not as
6878 * an indication to an error, thus not printed.
6879 */
6880 if (event_type >= GAUDI2_EVENT_CPU0_STATUS_NIC0_ENG0 &&
6881 event_type <= GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG1)
6882 return;
6883
6884 if (gaudi2_irq_map_table[event_type].valid) {
6885 snprintf(desc, sizeof(desc), gaudi2_irq_map_table[event_type].name);
6886 event_valid = true;
6887 }
6888
6889 if (!event_valid)
6890 snprintf(desc, sizeof(desc), "N/A");
6891
6892 if (is_info_event(event_type))
6893 dev_info_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
6894 event_type, desc);
6895 else
6896 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
6897 event_type, desc);
6898 }
6899
gaudi2_handle_ecc_event(struct hl_device * hdev,u16 event_type,struct hl_eq_ecc_data * ecc_data)6900 static bool gaudi2_handle_ecc_event(struct hl_device *hdev, u16 event_type,
6901 struct hl_eq_ecc_data *ecc_data)
6902 {
6903 u64 ecc_address = 0, ecc_syndrom = 0;
6904 u8 memory_wrapper_idx = 0;
6905
6906 ecc_address = le64_to_cpu(ecc_data->ecc_address);
6907 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
6908 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
6909
6910 dev_err(hdev->dev,
6911 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u. critical %u.\n",
6912 ecc_address, ecc_syndrom, memory_wrapper_idx, ecc_data->is_critical);
6913
6914 return !!ecc_data->is_critical;
6915 }
6916
6917 /*
6918 * gaudi2_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
6919 *
6920 * @idx: the current pi/ci value
6921 * @q_len: the queue length (power of 2)
6922 *
6923 * @return the cyclically decremented index
6924 */
gaudi2_queue_idx_dec(u32 idx,u32 q_len)6925 static inline u32 gaudi2_queue_idx_dec(u32 idx, u32 q_len)
6926 {
6927 u32 mask = q_len - 1;
6928
6929 /*
6930 * modular decrement is equivalent to adding (queue_size -1)
6931 * later we take LSBs to make sure the value is in the
6932 * range [0, queue_len - 1]
6933 */
6934 return (idx + q_len - 1) & mask;
6935 }
6936
6937 /**
6938 * gaudi2_print_sw_config_stream_data - print SW config stream data
6939 *
6940 * @hdev: pointer to the habanalabs device structure
6941 * @stream: the QMAN's stream
6942 * @qman_base: base address of QMAN registers block
6943 */
gaudi2_print_sw_config_stream_data(struct hl_device * hdev,u32 stream,u64 qman_base)6944 static void gaudi2_print_sw_config_stream_data(struct hl_device *hdev,
6945 u32 stream, u64 qman_base)
6946 {
6947 u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
6948 u32 cq_ptr_lo_off, size;
6949
6950 cq_ptr_lo_off = mmDCORE0_TPC0_QM_CQ_PTR_LO_1 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0;
6951
6952 cq_ptr_lo = qman_base + (mmDCORE0_TPC0_QM_CQ_PTR_LO_0 - mmDCORE0_TPC0_QM_BASE) +
6953 stream * cq_ptr_lo_off;
6954
6955 cq_ptr_hi = cq_ptr_lo + (mmDCORE0_TPC0_QM_CQ_PTR_HI_0 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0);
6956
6957 cq_tsize = cq_ptr_lo + (mmDCORE0_TPC0_QM_CQ_TSIZE_0 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0);
6958
6959 cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
6960 size = RREG32(cq_tsize);
6961 dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %x\n",
6962 stream, cq_ptr, size);
6963 }
6964
6965 /**
6966 * gaudi2_print_last_pqes_on_err - print last PQEs on error
6967 *
6968 * @hdev: pointer to the habanalabs device structure
6969 * @qid_base: first QID of the QMAN (out of 4 streams)
6970 * @stream: the QMAN's stream
6971 * @qman_base: base address of QMAN registers block
6972 * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
6973 */
gaudi2_print_last_pqes_on_err(struct hl_device * hdev,u32 qid_base,u32 stream,u64 qman_base,bool pr_sw_conf)6974 static void gaudi2_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base, u32 stream,
6975 u64 qman_base, bool pr_sw_conf)
6976 {
6977 u32 ci, qm_ci_stream_off;
6978 struct hl_hw_queue *q;
6979 u64 pq_ci;
6980 int i;
6981
6982 q = &hdev->kernel_queues[qid_base + stream];
6983
6984 qm_ci_stream_off = mmDCORE0_TPC0_QM_PQ_CI_1 - mmDCORE0_TPC0_QM_PQ_CI_0;
6985 pq_ci = qman_base + (mmDCORE0_TPC0_QM_PQ_CI_0 - mmDCORE0_TPC0_QM_BASE) +
6986 stream * qm_ci_stream_off;
6987
6988 hdev->asic_funcs->hw_queues_lock(hdev);
6989
6990 if (pr_sw_conf)
6991 gaudi2_print_sw_config_stream_data(hdev, stream, qman_base);
6992
6993 ci = RREG32(pq_ci);
6994
6995 /* we should start printing form ci -1 */
6996 ci = gaudi2_queue_idx_dec(ci, HL_QUEUE_LENGTH);
6997
6998 for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
6999 struct hl_bd *bd;
7000 u64 addr;
7001 u32 len;
7002
7003 bd = q->kernel_address;
7004 bd += ci;
7005
7006 len = le32_to_cpu(bd->len);
7007 /* len 0 means uninitialized entry- break */
7008 if (!len)
7009 break;
7010
7011 addr = le64_to_cpu(bd->ptr);
7012
7013 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %x\n",
7014 stream, ci, addr, len);
7015
7016 /* get previous ci, wrap if needed */
7017 ci = gaudi2_queue_idx_dec(ci, HL_QUEUE_LENGTH);
7018 }
7019
7020 hdev->asic_funcs->hw_queues_unlock(hdev);
7021 }
7022
7023 /**
7024 * print_qman_data_on_err - extract QMAN data on error
7025 *
7026 * @hdev: pointer to the habanalabs device structure
7027 * @qid_base: first QID of the QMAN (out of 4 streams)
7028 * @stream: the QMAN's stream
7029 * @qman_base: base address of QMAN registers block
7030 *
7031 * This function attempt to extract as much data as possible on QMAN error.
7032 * On upper CP print the SW config stream data and last 8 PQEs.
7033 * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
7034 */
print_qman_data_on_err(struct hl_device * hdev,u32 qid_base,u32 stream,u64 qman_base)7035 static void print_qman_data_on_err(struct hl_device *hdev, u32 qid_base, u32 stream, u64 qman_base)
7036 {
7037 u32 i;
7038
7039 if (stream != QMAN_STREAMS) {
7040 gaudi2_print_last_pqes_on_err(hdev, qid_base, stream, qman_base, true);
7041 return;
7042 }
7043
7044 gaudi2_print_sw_config_stream_data(hdev, stream, qman_base);
7045
7046 for (i = 0 ; i < QMAN_STREAMS ; i++)
7047 gaudi2_print_last_pqes_on_err(hdev, qid_base, i, qman_base, false);
7048 }
7049
gaudi2_handle_qman_err_generic(struct hl_device * hdev,const char * qm_name,u64 qman_base,u32 qid_base)7050 static void gaudi2_handle_qman_err_generic(struct hl_device *hdev, const char *qm_name,
7051 u64 qman_base, u32 qid_base)
7052 {
7053 u32 i, j, glbl_sts_val, arb_err_val, num_error_causes;
7054 u64 glbl_sts_addr, arb_err_addr;
7055 char reg_desc[32];
7056
7057 glbl_sts_addr = qman_base + (mmDCORE0_TPC0_QM_GLBL_ERR_STS_0 - mmDCORE0_TPC0_QM_BASE);
7058 arb_err_addr = qman_base + (mmDCORE0_TPC0_QM_ARB_ERR_CAUSE - mmDCORE0_TPC0_QM_BASE);
7059
7060 /* Iterate through all stream GLBL_ERR_STS registers + Lower CP */
7061 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7062 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7063
7064 if (!glbl_sts_val)
7065 continue;
7066
7067 if (i == QMAN_STREAMS) {
7068 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
7069 num_error_causes = GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE;
7070 } else {
7071 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7072 num_error_causes = GAUDI2_NUM_OF_QM_ERR_CAUSE;
7073 }
7074
7075 for (j = 0 ; j < num_error_causes ; j++)
7076 if (glbl_sts_val & BIT(j))
7077 dev_err_ratelimited(hdev->dev, "%s %s. err cause: %s\n",
7078 qm_name, reg_desc,
7079 i == QMAN_STREAMS ?
7080 gaudi2_qman_lower_cp_error_cause[j] :
7081 gaudi2_qman_error_cause[j]);
7082
7083 print_qman_data_on_err(hdev, qid_base, i, qman_base);
7084 }
7085
7086 arb_err_val = RREG32(arb_err_addr);
7087
7088 if (!arb_err_val)
7089 return;
7090
7091 for (j = 0 ; j < GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7092 if (arb_err_val & BIT(j))
7093 dev_err_ratelimited(hdev->dev, "%s ARB_ERR. err cause: %s\n",
7094 qm_name, gaudi2_qman_arb_error_cause[j]);
7095 }
7096 }
7097
gaudi2_razwi_rr_hbw_shared_printf_info(struct hl_device * hdev,u64 rtr_mstr_if_base_addr,bool is_write,char * name,bool read_razwi_regs,struct hl_eq_razwi_info * razwi_info)7098 static void gaudi2_razwi_rr_hbw_shared_printf_info(struct hl_device *hdev,
7099 u64 rtr_mstr_if_base_addr, bool is_write, char *name,
7100 bool read_razwi_regs, struct hl_eq_razwi_info *razwi_info)
7101 {
7102 u32 razwi_hi, razwi_lo, razwi_xy;
7103
7104 if (is_write) {
7105 if (read_razwi_regs) {
7106 razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HI);
7107 razwi_lo = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_LO);
7108 razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_XY);
7109 } else {
7110 razwi_hi = le32_to_cpu(razwi_info->hbw.rr_aw_razwi_hi_reg);
7111 razwi_lo = le32_to_cpu(razwi_info->hbw.rr_aw_razwi_lo_reg);
7112 razwi_xy = le32_to_cpu(razwi_info->hbw.rr_aw_razwi_id_reg);
7113 }
7114 } else {
7115 if (read_razwi_regs) {
7116 razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HI);
7117 razwi_lo = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_LO);
7118 razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_XY);
7119 } else {
7120 razwi_hi = le32_to_cpu(razwi_info->hbw.rr_ar_razwi_hi_reg);
7121 razwi_lo = le32_to_cpu(razwi_info->hbw.rr_ar_razwi_lo_reg);
7122 razwi_xy = le32_to_cpu(razwi_info->hbw.rr_ar_razwi_id_reg);
7123 }
7124 }
7125
7126 dev_err_ratelimited(hdev->dev,
7127 "%s-RAZWI SHARED RR HBW %s error, address %#llx, Initiator coordinates 0x%x\n",
7128 name, is_write ? "WR" : "RD", (u64)razwi_hi << 32 | razwi_lo, razwi_xy);
7129 }
7130
gaudi2_razwi_rr_lbw_shared_printf_info(struct hl_device * hdev,u64 rtr_mstr_if_base_addr,bool is_write,char * name,bool read_razwi_regs,struct hl_eq_razwi_info * razwi_info)7131 static void gaudi2_razwi_rr_lbw_shared_printf_info(struct hl_device *hdev,
7132 u64 rtr_mstr_if_base_addr, bool is_write, char *name,
7133 bool read_razwi_regs, struct hl_eq_razwi_info *razwi_info)
7134 {
7135 u32 razwi_addr, razwi_xy;
7136
7137 if (is_write) {
7138 if (read_razwi_regs) {
7139 razwi_addr = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI);
7140 razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_XY);
7141 } else {
7142 razwi_addr = le32_to_cpu(razwi_info->lbw.rr_aw_razwi_reg);
7143 razwi_xy = le32_to_cpu(razwi_info->lbw.rr_aw_razwi_id_reg);
7144 }
7145
7146 dev_err_ratelimited(hdev->dev,
7147 "%s-RAZWI SHARED RR LBW WR error, mstr_if 0x%llx, captured address 0x%x, Initiator coordinates 0x%x\n",
7148 name, rtr_mstr_if_base_addr, razwi_addr, razwi_xy);
7149 } else {
7150 if (read_razwi_regs) {
7151 razwi_addr = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI);
7152 razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_XY);
7153 } else {
7154 razwi_addr = le32_to_cpu(razwi_info->lbw.rr_ar_razwi_reg);
7155 razwi_xy = le32_to_cpu(razwi_info->lbw.rr_ar_razwi_id_reg);
7156 }
7157
7158 dev_err_ratelimited(hdev->dev,
7159 "%s-RAZWI SHARED RR LBW AR error, mstr_if 0x%llx, captured address 0x%x Initiator coordinates 0x%x\n",
7160 name, rtr_mstr_if_base_addr, razwi_addr, razwi_xy);
7161 }
7162 }
7163
7164 /*
7165 * This function handles RR(Range register) hit events.
7166 * raised be initiators not PSOC RAZWI.
7167 */
gaudi2_ack_module_razwi_event_handler(struct hl_device * hdev,enum razwi_event_sources module,u8 module_idx,u8 module_sub_idx,struct hl_eq_razwi_info * razwi_info)7168 static void gaudi2_ack_module_razwi_event_handler(struct hl_device *hdev,
7169 enum razwi_event_sources module, u8 module_idx,
7170 u8 module_sub_idx, struct hl_eq_razwi_info *razwi_info)
7171 {
7172 bool via_sft = false, read_razwi_regs = false;
7173 u32 rtr_id, dcore_id, dcore_rtr_id, sft_id;
7174 u64 rtr_mstr_if_base_addr;
7175 u32 hbw_shrd_aw = 0, hbw_shrd_ar = 0;
7176 u32 lbw_shrd_aw = 0, lbw_shrd_ar = 0;
7177 char initiator_name[64];
7178
7179 if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX) || !razwi_info)
7180 read_razwi_regs = true;
7181
7182 switch (module) {
7183 case RAZWI_TPC:
7184 rtr_id = gaudi2_tpc_initiator_rtr_id[module_idx];
7185 sprintf(initiator_name, "TPC_%u", module_idx);
7186 break;
7187 case RAZWI_MME:
7188 sprintf(initiator_name, "MME_%u", module_idx);
7189 switch (module_sub_idx) {
7190 case MME_WAP0:
7191 rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].wap0;
7192 break;
7193 case MME_WAP1:
7194 rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].wap1;
7195 break;
7196 case MME_WRITE:
7197 rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].write;
7198 break;
7199 case MME_READ:
7200 rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].read;
7201 break;
7202 case MME_SBTE0:
7203 rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte0;
7204 break;
7205 case MME_SBTE1:
7206 rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte1;
7207 break;
7208 case MME_SBTE2:
7209 rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte2;
7210 break;
7211 case MME_SBTE3:
7212 rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte3;
7213 break;
7214 case MME_SBTE4:
7215 rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte4;
7216 break;
7217 default:
7218 return;
7219 }
7220 break;
7221 case RAZWI_EDMA:
7222 sft_id = gaudi2_edma_initiator_sft_id[module_idx].interface_id;
7223 dcore_id = gaudi2_edma_initiator_sft_id[module_idx].dcore_id;
7224 via_sft = true;
7225 sprintf(initiator_name, "EDMA_%u", module_idx);
7226 break;
7227 case RAZWI_PDMA:
7228 rtr_id = gaudi2_pdma_initiator_rtr_id[module_idx];
7229 sprintf(initiator_name, "PDMA_%u", module_idx);
7230 break;
7231 case RAZWI_NIC:
7232 rtr_id = gaudi2_nic_initiator_rtr_id[module_idx];
7233 sprintf(initiator_name, "NIC_%u", module_idx);
7234 break;
7235 case RAZWI_DEC:
7236 rtr_id = gaudi2_dec_initiator_rtr_id[module_idx];
7237 sprintf(initiator_name, "DEC_%u", module_idx);
7238 break;
7239 case RAZWI_ROT:
7240 rtr_id = gaudi2_rot_initiator_rtr_id[module_idx];
7241 sprintf(initiator_name, "ROT_%u", module_idx);
7242 break;
7243 default:
7244 return;
7245 }
7246
7247 if (!read_razwi_regs) {
7248 if (le32_to_cpu(razwi_info->razwi_happened_mask) & RAZWI_HAPPENED_HBW) {
7249 hbw_shrd_aw = le32_to_cpu(razwi_info->razwi_happened_mask) &
7250 RAZWI_HAPPENED_AW;
7251 hbw_shrd_ar = le32_to_cpu(razwi_info->razwi_happened_mask) &
7252 RAZWI_HAPPENED_AR;
7253 } else if (le32_to_cpu(razwi_info->razwi_happened_mask) & RAZWI_HAPPENED_LBW) {
7254 lbw_shrd_aw = le32_to_cpu(razwi_info->razwi_happened_mask) &
7255 RAZWI_HAPPENED_AW;
7256 lbw_shrd_ar = le32_to_cpu(razwi_info->razwi_happened_mask) &
7257 RAZWI_HAPPENED_AR;
7258 }
7259 rtr_mstr_if_base_addr = 0;
7260
7261 goto dump_info;
7262 }
7263
7264 /* Find router mstr_if register base */
7265 if (via_sft) {
7266 rtr_mstr_if_base_addr = mmSFT0_HBW_RTR_IF0_RTR_CTRL_BASE +
7267 dcore_id * SFT_DCORE_OFFSET +
7268 sft_id * SFT_IF_OFFSET +
7269 RTR_MSTR_IF_OFFSET;
7270 } else {
7271 dcore_id = rtr_id / NUM_OF_RTR_PER_DCORE;
7272 dcore_rtr_id = rtr_id % NUM_OF_RTR_PER_DCORE;
7273 rtr_mstr_if_base_addr = mmDCORE0_RTR0_CTRL_BASE +
7274 dcore_id * DCORE_OFFSET +
7275 dcore_rtr_id * DCORE_RTR_OFFSET +
7276 RTR_MSTR_IF_OFFSET;
7277 }
7278
7279 /* Find out event cause by reading "RAZWI_HAPPENED" registers */
7280 hbw_shrd_aw = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED);
7281
7282 hbw_shrd_ar = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED);
7283
7284 if (via_sft) {
7285 /* SFT has separate MSTR_IF for LBW, only there we can
7286 * read the LBW razwi related registers
7287 */
7288 u64 base;
7289
7290 base = mmSFT0_HBW_RTR_IF0_RTR_CTRL_BASE + dcore_id * SFT_DCORE_OFFSET +
7291 RTR_LBW_MSTR_IF_OFFSET;
7292
7293 lbw_shrd_aw = RREG32(base + RR_SHRD_LBW_AW_RAZWI_HAPPENED);
7294
7295 lbw_shrd_ar = RREG32(base + RR_SHRD_LBW_AR_RAZWI_HAPPENED);
7296 } else {
7297 lbw_shrd_aw = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED);
7298
7299 lbw_shrd_ar = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED);
7300 }
7301
7302 dump_info:
7303 /* check if there is no RR razwi indication at all */
7304 if (!hbw_shrd_aw && !hbw_shrd_ar && !lbw_shrd_aw && !lbw_shrd_ar)
7305 return;
7306
7307 if (hbw_shrd_aw) {
7308 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, true,
7309 initiator_name, read_razwi_regs, razwi_info);
7310
7311 /* Clear event indication */
7312 if (read_razwi_regs)
7313 WREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED, hbw_shrd_aw);
7314 }
7315
7316 if (hbw_shrd_ar) {
7317 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, false,
7318 initiator_name, read_razwi_regs, razwi_info);
7319
7320 /* Clear event indication */
7321 if (read_razwi_regs)
7322 WREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED, hbw_shrd_ar);
7323 }
7324
7325 if (lbw_shrd_aw) {
7326 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, true,
7327 initiator_name, read_razwi_regs, razwi_info);
7328
7329 /* Clear event indication */
7330 if (read_razwi_regs)
7331 WREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED, lbw_shrd_aw);
7332 }
7333
7334 if (lbw_shrd_ar) {
7335 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, false,
7336 initiator_name, read_razwi_regs, razwi_info);
7337
7338 /* Clear event indication */
7339 if (read_razwi_regs)
7340 WREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED, lbw_shrd_ar);
7341 }
7342 }
7343
gaudi2_check_if_razwi_happened(struct hl_device * hdev)7344 static void gaudi2_check_if_razwi_happened(struct hl_device *hdev)
7345 {
7346 struct asic_fixed_properties *prop = &hdev->asic_prop;
7347 u8 mod_idx, sub_mod;
7348
7349 /* check all TPCs */
7350 for (mod_idx = 0 ; mod_idx < (NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1) ; mod_idx++) {
7351 if (prop->tpc_enabled_mask & BIT(mod_idx))
7352 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, mod_idx, 0, NULL);
7353 }
7354
7355 /* check all MMEs */
7356 for (mod_idx = 0 ; mod_idx < (NUM_OF_MME_PER_DCORE * NUM_OF_DCORES) ; mod_idx++)
7357 for (sub_mod = MME_WAP0 ; sub_mod < MME_INITIATORS_MAX ; sub_mod++)
7358 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mod_idx,
7359 sub_mod, NULL);
7360
7361 /* check all EDMAs */
7362 for (mod_idx = 0 ; mod_idx < (NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES) ; mod_idx++)
7363 if (prop->edma_enabled_mask & BIT(mod_idx))
7364 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_EDMA, mod_idx, 0, NULL);
7365
7366 /* check all PDMAs */
7367 for (mod_idx = 0 ; mod_idx < NUM_OF_PDMA ; mod_idx++)
7368 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_PDMA, mod_idx, 0, NULL);
7369
7370 /* check all NICs */
7371 for (mod_idx = 0 ; mod_idx < NIC_NUMBER_OF_PORTS ; mod_idx++)
7372 if (hdev->nic_ports_mask & BIT(mod_idx))
7373 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_NIC, mod_idx >> 1, 0,
7374 NULL);
7375
7376 /* check all DECs */
7377 for (mod_idx = 0 ; mod_idx < NUMBER_OF_DEC ; mod_idx++)
7378 if (prop->decoder_enabled_mask & BIT(mod_idx))
7379 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, mod_idx, 0, NULL);
7380
7381 /* check all ROTs */
7382 for (mod_idx = 0 ; mod_idx < NUM_OF_ROT ; mod_idx++)
7383 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, mod_idx, 0, NULL);
7384 }
7385
gaudi2_get_initiators_name(u32 rtr_id)7386 static const char *gaudi2_get_initiators_name(u32 rtr_id)
7387 {
7388 switch (rtr_id) {
7389 case DCORE0_RTR0:
7390 return "DEC0/1/8/9, TPC24, PDMA0/1, PMMU, PCIE_IF, EDMA0/2, HMMU0/2/4/6, CPU";
7391 case DCORE0_RTR1:
7392 return "TPC0/1";
7393 case DCORE0_RTR2:
7394 return "TPC2/3";
7395 case DCORE0_RTR3:
7396 return "TPC4/5";
7397 case DCORE0_RTR4:
7398 return "MME0_SBTE0/1";
7399 case DCORE0_RTR5:
7400 return "MME0_WAP0/SBTE2";
7401 case DCORE0_RTR6:
7402 return "MME0_CTRL_WR/SBTE3";
7403 case DCORE0_RTR7:
7404 return "MME0_WAP1/CTRL_RD/SBTE4";
7405 case DCORE1_RTR0:
7406 return "MME1_WAP1/CTRL_RD/SBTE4";
7407 case DCORE1_RTR1:
7408 return "MME1_CTRL_WR/SBTE3";
7409 case DCORE1_RTR2:
7410 return "MME1_WAP0/SBTE2";
7411 case DCORE1_RTR3:
7412 return "MME1_SBTE0/1";
7413 case DCORE1_RTR4:
7414 return "TPC10/11";
7415 case DCORE1_RTR5:
7416 return "TPC8/9";
7417 case DCORE1_RTR6:
7418 return "TPC6/7";
7419 case DCORE1_RTR7:
7420 return "DEC2/3, NIC0/1/2/3/4, ARC_FARM, KDMA, EDMA1/3, HMMU1/3/5/7";
7421 case DCORE2_RTR0:
7422 return "DEC4/5, NIC5/6/7/8, EDMA4/6, HMMU8/10/12/14, ROT0";
7423 case DCORE2_RTR1:
7424 return "TPC16/17";
7425 case DCORE2_RTR2:
7426 return "TPC14/15";
7427 case DCORE2_RTR3:
7428 return "TPC12/13";
7429 case DCORE2_RTR4:
7430 return "MME2_SBTE0/1";
7431 case DCORE2_RTR5:
7432 return "MME2_WAP0/SBTE2";
7433 case DCORE2_RTR6:
7434 return "MME2_CTRL_WR/SBTE3";
7435 case DCORE2_RTR7:
7436 return "MME2_WAP1/CTRL_RD/SBTE4";
7437 case DCORE3_RTR0:
7438 return "MME3_WAP1/CTRL_RD/SBTE4";
7439 case DCORE3_RTR1:
7440 return "MME3_CTRL_WR/SBTE3";
7441 case DCORE3_RTR2:
7442 return "MME3_WAP0/SBTE2";
7443 case DCORE3_RTR3:
7444 return "MME3_SBTE0/1";
7445 case DCORE3_RTR4:
7446 return "TPC18/19";
7447 case DCORE3_RTR5:
7448 return "TPC20/21";
7449 case DCORE3_RTR6:
7450 return "TPC22/23";
7451 case DCORE3_RTR7:
7452 return "DEC6/7, NIC9/10/11, EDMA5/7, HMMU9/11/13/15, ROT1, PSOC";
7453 default:
7454 return "N/A";
7455 }
7456 }
7457
gaudi2_razwi_unmapped_addr_hbw_printf_info(struct hl_device * hdev,u32 rtr_id,u64 rtr_ctrl_base_addr,bool is_write)7458 static void gaudi2_razwi_unmapped_addr_hbw_printf_info(struct hl_device *hdev, u32 rtr_id,
7459 u64 rtr_ctrl_base_addr, bool is_write)
7460 {
7461 u32 razwi_hi, razwi_lo;
7462
7463 if (is_write) {
7464 razwi_hi = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_ADDR_HI);
7465 razwi_lo = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_ADDR_LO);
7466
7467 /* Clear set indication */
7468 WREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_SET, 0x1);
7469 } else {
7470 razwi_hi = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_ADDR_HI);
7471 razwi_lo = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_ADDR_LO);
7472
7473 /* Clear set indication */
7474 WREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_SET, 0x1);
7475 }
7476
7477 dev_err_ratelimited(hdev->dev,
7478 "RAZWI PSOC unmapped HBW %s error, rtr id %u, address %#llx\n",
7479 is_write ? "WR" : "RD", rtr_id, (u64)razwi_hi << 32 | razwi_lo);
7480
7481 dev_err_ratelimited(hdev->dev,
7482 "Initiators: %s\n", gaudi2_get_initiators_name(rtr_id));
7483 }
7484
gaudi2_razwi_unmapped_addr_lbw_printf_info(struct hl_device * hdev,u32 rtr_id,u64 rtr_ctrl_base_addr,bool is_write)7485 static void gaudi2_razwi_unmapped_addr_lbw_printf_info(struct hl_device *hdev, u32 rtr_id,
7486 u64 rtr_ctrl_base_addr, bool is_write)
7487 {
7488 u32 razwi_addr;
7489
7490 if (is_write) {
7491 razwi_addr = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_ADDR);
7492
7493 /* Clear set indication */
7494 WREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_SET, 0x1);
7495 } else {
7496 razwi_addr = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_ADDR);
7497
7498 /* Clear set indication */
7499 WREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_SET, 0x1);
7500 }
7501
7502 dev_err_ratelimited(hdev->dev,
7503 "RAZWI PSOC unmapped LBW %s error, rtr id %u, address %#x\n",
7504 is_write ? "WR" : "RD", rtr_id, razwi_addr);
7505
7506 dev_err_ratelimited(hdev->dev,
7507 "Initiators: %s\n", gaudi2_get_initiators_name(rtr_id));
7508 }
7509
7510 /* PSOC RAZWI interrupt occurs only when trying to access a bad address */
gaudi2_ack_psoc_razwi_event_handler(struct hl_device * hdev)7511 static void gaudi2_ack_psoc_razwi_event_handler(struct hl_device *hdev)
7512 {
7513 u32 hbw_aw_set, hbw_ar_set, lbw_aw_set, lbw_ar_set, rtr_id, dcore_id, dcore_rtr_id, xy,
7514 razwi_mask_info, razwi_intr = 0;
7515 int rtr_map_arr_len = NUM_OF_RTR_PER_DCORE * NUM_OF_DCORES;
7516 u64 rtr_ctrl_base_addr;
7517
7518 if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX)) {
7519 razwi_intr = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_INTERRUPT);
7520 if (!razwi_intr)
7521 return;
7522 }
7523
7524 razwi_mask_info = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_MASK_INFO);
7525 xy = FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_L_MASK, razwi_mask_info);
7526
7527 dev_err_ratelimited(hdev->dev,
7528 "PSOC RAZWI interrupt: Mask %d, AR %d, AW %d, AXUSER_L 0x%x AXUSER_H 0x%x\n",
7529 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_MASK_MASK, razwi_mask_info),
7530 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AR_MASK, razwi_mask_info),
7531 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AW_MASK, razwi_mask_info),
7532 xy,
7533 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_H_MASK, razwi_mask_info));
7534
7535 if (xy == 0) {
7536 dev_err_ratelimited(hdev->dev,
7537 "PSOC RAZWI interrupt: received event from 0 rtr coordinates\n");
7538 goto clear;
7539 }
7540
7541 /* Find router id by router coordinates */
7542 for (rtr_id = 0 ; rtr_id < rtr_map_arr_len ; rtr_id++)
7543 if (rtr_coordinates_to_rtr_id[rtr_id] == xy)
7544 break;
7545
7546 if (rtr_id == rtr_map_arr_len) {
7547 dev_err_ratelimited(hdev->dev,
7548 "PSOC RAZWI interrupt: invalid rtr coordinates (0x%x)\n", xy);
7549 goto clear;
7550 }
7551
7552 /* Find router mstr_if register base */
7553 dcore_id = rtr_id / NUM_OF_RTR_PER_DCORE;
7554 dcore_rtr_id = rtr_id % NUM_OF_RTR_PER_DCORE;
7555 rtr_ctrl_base_addr = mmDCORE0_RTR0_CTRL_BASE + dcore_id * DCORE_OFFSET +
7556 dcore_rtr_id * DCORE_RTR_OFFSET;
7557
7558 hbw_aw_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_SET);
7559 hbw_ar_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_SET);
7560 lbw_aw_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_SET);
7561 lbw_ar_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_SET);
7562
7563 if (hbw_aw_set)
7564 gaudi2_razwi_unmapped_addr_hbw_printf_info(hdev, rtr_id,
7565 rtr_ctrl_base_addr, true);
7566
7567 if (hbw_ar_set)
7568 gaudi2_razwi_unmapped_addr_hbw_printf_info(hdev, rtr_id,
7569 rtr_ctrl_base_addr, false);
7570
7571 if (lbw_aw_set)
7572 gaudi2_razwi_unmapped_addr_lbw_printf_info(hdev, rtr_id,
7573 rtr_ctrl_base_addr, true);
7574
7575 if (lbw_ar_set)
7576 gaudi2_razwi_unmapped_addr_lbw_printf_info(hdev, rtr_id,
7577 rtr_ctrl_base_addr, false);
7578
7579 clear:
7580 /* Clear Interrupts only on pldm or if f/w doesn't handle interrupts */
7581 if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX))
7582 WREG32(mmPSOC_GLOBAL_CONF_RAZWI_INTERRUPT, razwi_intr);
7583 }
7584
_gaudi2_handle_qm_sei_err(struct hl_device * hdev,u64 qman_base)7585 static void _gaudi2_handle_qm_sei_err(struct hl_device *hdev, u64 qman_base)
7586 {
7587 u32 i, sts_val, sts_clr_val = 0;
7588
7589 sts_val = RREG32(qman_base + QM_SEI_STATUS_OFFSET);
7590
7591 for (i = 0 ; i < GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE ; i++) {
7592 if (sts_val & BIT(i)) {
7593 dev_err_ratelimited(hdev->dev, "QM SEI. err cause: %s\n",
7594 gaudi2_qm_sei_error_cause[i]);
7595 sts_clr_val |= BIT(i);
7596 }
7597 }
7598
7599 WREG32(qman_base + QM_SEI_STATUS_OFFSET, sts_clr_val);
7600 }
7601
gaudi2_handle_qm_sei_err(struct hl_device * hdev,u16 event_type,struct hl_eq_razwi_info * razwi_info)7602 static void gaudi2_handle_qm_sei_err(struct hl_device *hdev, u16 event_type,
7603 struct hl_eq_razwi_info *razwi_info)
7604 {
7605 u64 qman_base;
7606 u8 index;
7607
7608 switch (event_type) {
7609 case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC23_AXI_ERR_RSP:
7610 index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP;
7611 qman_base = mmDCORE0_TPC0_QM_BASE +
7612 (index / NUM_OF_TPC_PER_DCORE) * DCORE_OFFSET +
7613 (index % NUM_OF_TPC_PER_DCORE) * DCORE_TPC_OFFSET;
7614 break;
7615 case GAUDI2_EVENT_TPC24_AXI_ERR_RSP:
7616 qman_base = mmDCORE0_TPC6_QM_BASE;
7617 break;
7618 case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE:
7619 case GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE:
7620 case GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE:
7621 case GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE:
7622 index = (event_type - GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE) /
7623 (GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE -
7624 GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE);
7625 qman_base = mmDCORE0_MME_QM_BASE + index * DCORE_OFFSET;
7626 break;
7627 case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP:
7628 case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP:
7629 index = event_type - GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP;
7630 qman_base = mmPDMA0_QM_BASE + index * PDMA_OFFSET;
7631 break;
7632 case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE:
7633 case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE:
7634 index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE;
7635 qman_base = mmROT0_QM_BASE + index * ROT_OFFSET;
7636 break;
7637 default:
7638 return;
7639 }
7640
7641 _gaudi2_handle_qm_sei_err(hdev, qman_base);
7642
7643 /* There is a single event per NIC macro, so should check its both QMAN blocks */
7644 if (event_type >= GAUDI2_EVENT_NIC0_AXI_ERROR_RESPONSE &&
7645 event_type <= GAUDI2_EVENT_NIC11_AXI_ERROR_RESPONSE)
7646 _gaudi2_handle_qm_sei_err(hdev, qman_base + NIC_QM_OFFSET);
7647
7648 /* check if RAZWI happened */
7649 if (razwi_info)
7650 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_PDMA, 0, 0, razwi_info);
7651 }
7652
gaudi2_handle_qman_err(struct hl_device * hdev,u16 event_type)7653 static void gaudi2_handle_qman_err(struct hl_device *hdev, u16 event_type)
7654 {
7655 u32 qid_base;
7656 u64 qman_base;
7657 char desc[32];
7658 u8 index;
7659
7660 switch (event_type) {
7661 case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_TPC5_QM:
7662 index = event_type - GAUDI2_EVENT_TPC0_QM;
7663 qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 + index * QMAN_STREAMS;
7664 qman_base = mmDCORE0_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
7665 snprintf(desc, ARRAY_SIZE(desc), "DCORE0_TPC%d_QM", index);
7666 break;
7667 case GAUDI2_EVENT_TPC6_QM ... GAUDI2_EVENT_TPC11_QM:
7668 index = event_type - GAUDI2_EVENT_TPC6_QM;
7669 qid_base = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 + index * QMAN_STREAMS;
7670 qman_base = mmDCORE1_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
7671 snprintf(desc, ARRAY_SIZE(desc), "DCORE1_TPC%d_QM", index);
7672 break;
7673 case GAUDI2_EVENT_TPC12_QM ... GAUDI2_EVENT_TPC17_QM:
7674 index = event_type - GAUDI2_EVENT_TPC12_QM;
7675 qid_base = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 + index * QMAN_STREAMS;
7676 qman_base = mmDCORE2_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
7677 snprintf(desc, ARRAY_SIZE(desc), "DCORE2_TPC%d_QM", index);
7678 break;
7679 case GAUDI2_EVENT_TPC18_QM ... GAUDI2_EVENT_TPC23_QM:
7680 index = event_type - GAUDI2_EVENT_TPC18_QM;
7681 qid_base = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 + index * QMAN_STREAMS;
7682 qman_base = mmDCORE3_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
7683 snprintf(desc, ARRAY_SIZE(desc), "DCORE3_TPC%d_QM", index);
7684 break;
7685 case GAUDI2_EVENT_TPC24_QM:
7686 qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0;
7687 qman_base = mmDCORE0_TPC6_QM_BASE;
7688 snprintf(desc, ARRAY_SIZE(desc), "DCORE0_TPC6_QM");
7689 break;
7690 case GAUDI2_EVENT_MME0_QM:
7691 qid_base = GAUDI2_QUEUE_ID_DCORE0_MME_0_0;
7692 qman_base = mmDCORE0_MME_QM_BASE;
7693 snprintf(desc, ARRAY_SIZE(desc), "DCORE0_MME_QM");
7694 break;
7695 case GAUDI2_EVENT_MME1_QM:
7696 qid_base = GAUDI2_QUEUE_ID_DCORE1_MME_0_0;
7697 qman_base = mmDCORE1_MME_QM_BASE;
7698 snprintf(desc, ARRAY_SIZE(desc), "DCORE1_MME_QM");
7699 break;
7700 case GAUDI2_EVENT_MME2_QM:
7701 qid_base = GAUDI2_QUEUE_ID_DCORE2_MME_0_0;
7702 qman_base = mmDCORE2_MME_QM_BASE;
7703 snprintf(desc, ARRAY_SIZE(desc), "DCORE2_MME_QM");
7704 break;
7705 case GAUDI2_EVENT_MME3_QM:
7706 qid_base = GAUDI2_QUEUE_ID_DCORE3_MME_0_0;
7707 qman_base = mmDCORE3_MME_QM_BASE;
7708 snprintf(desc, ARRAY_SIZE(desc), "DCORE3_MME_QM");
7709 break;
7710 case GAUDI2_EVENT_HDMA0_QM:
7711 qid_base = GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0;
7712 qman_base = mmDCORE0_EDMA0_QM_BASE;
7713 snprintf(desc, ARRAY_SIZE(desc), "DCORE0_EDMA0_QM");
7714 break;
7715 case GAUDI2_EVENT_HDMA1_QM:
7716 qid_base = GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0;
7717 qman_base = mmDCORE0_EDMA1_QM_BASE;
7718 snprintf(desc, ARRAY_SIZE(desc), "DCORE0_EDMA1_QM");
7719 break;
7720 case GAUDI2_EVENT_HDMA2_QM:
7721 qid_base = GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0;
7722 qman_base = mmDCORE1_EDMA0_QM_BASE;
7723 snprintf(desc, ARRAY_SIZE(desc), "DCORE1_EDMA0_QM");
7724 break;
7725 case GAUDI2_EVENT_HDMA3_QM:
7726 qid_base = GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0;
7727 qman_base = mmDCORE1_EDMA1_QM_BASE;
7728 snprintf(desc, ARRAY_SIZE(desc), "DCORE1_EDMA1_QM");
7729 break;
7730 case GAUDI2_EVENT_HDMA4_QM:
7731 qid_base = GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0;
7732 qman_base = mmDCORE2_EDMA0_QM_BASE;
7733 snprintf(desc, ARRAY_SIZE(desc), "DCORE2_EDMA0_QM");
7734 break;
7735 case GAUDI2_EVENT_HDMA5_QM:
7736 qid_base = GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0;
7737 qman_base = mmDCORE2_EDMA1_QM_BASE;
7738 snprintf(desc, ARRAY_SIZE(desc), "DCORE2_EDMA1_QM");
7739 break;
7740 case GAUDI2_EVENT_HDMA6_QM:
7741 qid_base = GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0;
7742 qman_base = mmDCORE3_EDMA0_QM_BASE;
7743 snprintf(desc, ARRAY_SIZE(desc), "DCORE3_EDMA0_QM");
7744 break;
7745 case GAUDI2_EVENT_HDMA7_QM:
7746 qid_base = GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0;
7747 qman_base = mmDCORE3_EDMA1_QM_BASE;
7748 snprintf(desc, ARRAY_SIZE(desc), "DCORE3_EDMA1_QM");
7749 break;
7750 case GAUDI2_EVENT_PDMA0_QM:
7751 qid_base = GAUDI2_QUEUE_ID_PDMA_0_0;
7752 qman_base = mmPDMA0_QM_BASE;
7753 snprintf(desc, ARRAY_SIZE(desc), "PDMA0_QM");
7754 break;
7755 case GAUDI2_EVENT_PDMA1_QM:
7756 qid_base = GAUDI2_QUEUE_ID_PDMA_1_0;
7757 qman_base = mmPDMA1_QM_BASE;
7758 snprintf(desc, ARRAY_SIZE(desc), "PDMA1_QM");
7759 break;
7760 case GAUDI2_EVENT_ROTATOR0_ROT0_QM:
7761 qid_base = GAUDI2_QUEUE_ID_ROT_0_0;
7762 qman_base = mmROT0_QM_BASE;
7763 snprintf(desc, ARRAY_SIZE(desc), "ROTATOR0_QM");
7764 break;
7765 case GAUDI2_EVENT_ROTATOR1_ROT1_QM:
7766 qid_base = GAUDI2_QUEUE_ID_ROT_1_0;
7767 qman_base = mmROT1_QM_BASE;
7768 snprintf(desc, ARRAY_SIZE(desc), "ROTATOR1_QM");
7769 break;
7770 default:
7771 return;
7772 }
7773
7774 gaudi2_handle_qman_err_generic(hdev, desc, qman_base, qid_base);
7775
7776 /* Handle EDMA QM SEI here because there is no AXI error response event for EDMA */
7777 if (event_type >= GAUDI2_EVENT_HDMA2_QM && event_type <= GAUDI2_EVENT_HDMA5_QM)
7778 _gaudi2_handle_qm_sei_err(hdev, qman_base);
7779 }
7780
gaudi2_handle_arc_farm_sei_err(struct hl_device * hdev)7781 static void gaudi2_handle_arc_farm_sei_err(struct hl_device *hdev)
7782 {
7783 u32 i, sts_val, sts_clr_val = 0;
7784
7785 sts_val = RREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_STS);
7786
7787 for (i = 0 ; i < GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE ; i++) {
7788 if (sts_val & BIT(i)) {
7789 dev_err_ratelimited(hdev->dev, "ARC SEI. err cause: %s\n",
7790 gaudi2_arc_sei_error_cause[i]);
7791 sts_clr_val |= BIT(i);
7792 }
7793 }
7794
7795 WREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_CLR, sts_clr_val);
7796 }
7797
gaudi2_handle_cpu_sei_err(struct hl_device * hdev)7798 static void gaudi2_handle_cpu_sei_err(struct hl_device *hdev)
7799 {
7800 u32 i, sts_val, sts_clr_val = 0;
7801
7802 sts_val = RREG32(mmCPU_IF_CPU_SEI_INTR_STS);
7803
7804 for (i = 0 ; i < GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE ; i++) {
7805 if (sts_val & BIT(i)) {
7806 dev_err_ratelimited(hdev->dev, "CPU SEI. err cause: %s\n",
7807 gaudi2_cpu_sei_error_cause[i]);
7808 sts_clr_val |= BIT(i);
7809 }
7810 }
7811
7812 WREG32(mmCPU_IF_CPU_SEI_INTR_CLR, sts_clr_val);
7813 }
7814
gaudi2_handle_rot_err(struct hl_device * hdev,u8 rot_index,struct hl_eq_razwi_with_intr_cause * razwi_with_intr_cause)7815 static void gaudi2_handle_rot_err(struct hl_device *hdev, u8 rot_index,
7816 struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause)
7817 {
7818 u64 intr_cause_data = le64_to_cpu(razwi_with_intr_cause->intr_cause.intr_cause_data);
7819 int i;
7820
7821 for (i = 0 ; i < GAUDI2_NUM_OF_ROT_ERR_CAUSE ; i++)
7822 if (intr_cause_data & BIT(i))
7823 dev_err_ratelimited(hdev->dev, "ROT%u. err cause: %s\n",
7824 rot_index, guadi2_rot_error_cause[i]);
7825
7826 /* check if RAZWI happened */
7827 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, rot_index, 0,
7828 &razwi_with_intr_cause->razwi_info);
7829 }
7830
gaudi2_tpc_ack_interrupts(struct hl_device * hdev,u8 tpc_index,char * interrupt_name,struct hl_eq_razwi_with_intr_cause * razwi_with_intr_cause)7831 static void gaudi2_tpc_ack_interrupts(struct hl_device *hdev, u8 tpc_index, char *interrupt_name,
7832 struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause)
7833 {
7834 u64 intr_cause_data = le64_to_cpu(razwi_with_intr_cause->intr_cause.intr_cause_data);
7835 int i;
7836
7837 for (i = 0 ; i < GAUDI2_NUM_OF_TPC_INTR_CAUSE ; i++)
7838 if (intr_cause_data & BIT(i))
7839 dev_err_ratelimited(hdev->dev, "TPC%d_%s interrupt cause: %s\n",
7840 tpc_index, interrupt_name, gaudi2_tpc_interrupts_cause[i]);
7841
7842 /* check if RAZWI happened */
7843 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, tpc_index, 0,
7844 &razwi_with_intr_cause->razwi_info);
7845 }
7846
gaudi2_handle_dec_err(struct hl_device * hdev,u8 dec_index,const char * interrupt_name,struct hl_eq_razwi_info * razwi_info)7847 static void gaudi2_handle_dec_err(struct hl_device *hdev, u8 dec_index, const char *interrupt_name,
7848 struct hl_eq_razwi_info *razwi_info)
7849 {
7850 u32 sts_addr, sts_val, sts_clr_val = 0;
7851 int i;
7852
7853 if (dec_index < NUM_OF_VDEC_PER_DCORE * NUM_OF_DCORES)
7854 /* DCORE DEC */
7855 sts_addr = mmDCORE0_VDEC0_BRDG_CTRL_CAUSE_INTR +
7856 DCORE_OFFSET * (dec_index / NUM_OF_DEC_PER_DCORE) +
7857 DCORE_VDEC_OFFSET * (dec_index % NUM_OF_DEC_PER_DCORE);
7858 else
7859 /* PCIE DEC */
7860 sts_addr = mmPCIE_VDEC0_BRDG_CTRL_CAUSE_INTR + PCIE_VDEC_OFFSET *
7861 (dec_index - NUM_OF_VDEC_PER_DCORE * NUM_OF_DCORES);
7862
7863 sts_val = RREG32(sts_addr);
7864
7865 for (i = 0 ; i < GAUDI2_NUM_OF_DEC_ERR_CAUSE ; i++) {
7866 if (sts_val & BIT(i)) {
7867 dev_err_ratelimited(hdev->dev, "DEC%u_%s err cause: %s\n",
7868 dec_index, interrupt_name, gaudi2_dec_error_cause[i]);
7869 sts_clr_val |= BIT(i);
7870 }
7871 }
7872
7873 /* check if RAZWI happened */
7874 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, dec_index, 0, razwi_info);
7875
7876 /* Write 1 clear errors */
7877 WREG32(sts_addr, sts_clr_val);
7878 }
7879
gaudi2_handle_mme_err(struct hl_device * hdev,u8 mme_index,const char * interrupt_name,struct hl_eq_razwi_info * razwi_info)7880 static void gaudi2_handle_mme_err(struct hl_device *hdev, u8 mme_index, const char *interrupt_name,
7881 struct hl_eq_razwi_info *razwi_info)
7882 {
7883 u32 sts_addr, sts_val, sts_clr_addr, sts_clr_val = 0;
7884 int i;
7885
7886 sts_addr = mmDCORE0_MME_CTRL_LO_INTR_CAUSE + DCORE_OFFSET * mme_index;
7887 sts_clr_addr = mmDCORE0_MME_CTRL_LO_INTR_CLEAR + DCORE_OFFSET * mme_index;
7888
7889 sts_val = RREG32(sts_addr);
7890
7891 for (i = 0 ; i < GAUDI2_NUM_OF_MME_ERR_CAUSE ; i++) {
7892 if (sts_val & BIT(i)) {
7893 dev_err_ratelimited(hdev->dev, "MME%u_%s err cause: %s\n",
7894 mme_index, interrupt_name, guadi2_mme_error_cause[i]);
7895 sts_clr_val |= BIT(i);
7896 }
7897 }
7898
7899 /* check if RAZWI happened */
7900 for (i = MME_WRITE ; i < MME_INITIATORS_MAX ; i++)
7901 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, i, razwi_info);
7902
7903 WREG32(sts_clr_addr, sts_clr_val);
7904 }
7905
gaudi2_handle_mme_sbte_err(struct hl_device * hdev,u8 mme_index,u8 sbte_index,u64 intr_cause_data)7906 static void gaudi2_handle_mme_sbte_err(struct hl_device *hdev, u8 mme_index, u8 sbte_index,
7907 u64 intr_cause_data)
7908 {
7909 int i;
7910
7911 for (i = 0 ; i < GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE ; i++)
7912 if (intr_cause_data & BIT(i))
7913 dev_err_ratelimited(hdev->dev, "MME%uSBTE%u_AXI_ERR_RSP err cause: %s\n",
7914 mme_index, sbte_index, guadi2_mme_sbte_error_cause[i]);
7915 }
7916
gaudi2_handle_mme_wap_err(struct hl_device * hdev,u8 mme_index,struct hl_eq_razwi_info * razwi_info)7917 static void gaudi2_handle_mme_wap_err(struct hl_device *hdev, u8 mme_index,
7918 struct hl_eq_razwi_info *razwi_info)
7919 {
7920 u32 sts_addr, sts_val, sts_clr_addr, sts_clr_val = 0;
7921 int i;
7922
7923 sts_addr = mmDCORE0_MME_ACC_INTR_CAUSE + DCORE_OFFSET * mme_index;
7924 sts_clr_addr = mmDCORE0_MME_ACC_INTR_CLEAR + DCORE_OFFSET * mme_index;
7925
7926 sts_val = RREG32(sts_addr);
7927
7928 for (i = 0 ; i < GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE ; i++) {
7929 if (sts_val & BIT(i)) {
7930 dev_err_ratelimited(hdev->dev,
7931 "MME%u_WAP_SOURCE_RESULT_INVALID err cause: %s\n",
7932 mme_index, guadi2_mme_wap_error_cause[i]);
7933 sts_clr_val |= BIT(i);
7934 }
7935 }
7936
7937 /* check if RAZWI happened on WAP0/1 */
7938 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP0, razwi_info);
7939 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP1, razwi_info);
7940
7941 WREG32(sts_clr_addr, sts_clr_val);
7942 }
7943
gaudi2_handle_kdma_core_event(struct hl_device * hdev,u64 intr_cause_data)7944 static void gaudi2_handle_kdma_core_event(struct hl_device *hdev, u64 intr_cause_data)
7945 {
7946 int i;
7947
7948 /* If an AXI read or write error is received, an error is reported and
7949 * interrupt message is sent. Due to an HW errata, when reading the cause
7950 * register of the KDMA engine, the reported error is always HBW even if
7951 * the actual error caused by a LBW KDMA transaction.
7952 */
7953 for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++)
7954 if (intr_cause_data & BIT(i))
7955 dev_err_ratelimited(hdev->dev, "kdma core err cause: %s\n",
7956 gaudi2_kdma_core_interrupts_cause[i]);
7957 }
7958
gaudi2_handle_dma_core_event(struct hl_device * hdev,u64 intr_cause_data)7959 static void gaudi2_handle_dma_core_event(struct hl_device *hdev, u64 intr_cause_data)
7960 {
7961 int i;
7962
7963 for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++)
7964 if (intr_cause_data & BIT(i))
7965 dev_err_ratelimited(hdev->dev, "dma core err cause: %s\n",
7966 gaudi2_dma_core_interrupts_cause[i]);
7967 }
7968
gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(struct hl_device * hdev)7969 static void gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(struct hl_device *hdev)
7970 {
7971 u32 mstr_if_base_addr = mmPCIE_MSTR_RR_MSTR_IF_RR_SHRD_HBW_BASE, razwi_happened_addr;
7972
7973 razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED;
7974 if (RREG32(razwi_happened_addr)) {
7975 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE", true,
7976 NULL);
7977 WREG32(razwi_happened_addr, 0x1);
7978 }
7979
7980 razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED;
7981 if (RREG32(razwi_happened_addr)) {
7982 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE", true,
7983 NULL);
7984 WREG32(razwi_happened_addr, 0x1);
7985 }
7986
7987 razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED;
7988 if (RREG32(razwi_happened_addr)) {
7989 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE", true,
7990 NULL);
7991 WREG32(razwi_happened_addr, 0x1);
7992 }
7993
7994 razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED;
7995 if (RREG32(razwi_happened_addr)) {
7996 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE", true,
7997 NULL);
7998 WREG32(razwi_happened_addr, 0x1);
7999 }
8000 }
8001
gaudi2_print_pcie_addr_dec_info(struct hl_device * hdev,u64 intr_cause_data)8002 static void gaudi2_print_pcie_addr_dec_info(struct hl_device *hdev, u64 intr_cause_data)
8003 {
8004 int i;
8005
8006 for (i = 0 ; i < GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE ; i++) {
8007 if (!(intr_cause_data & BIT_ULL(i)))
8008 continue;
8009
8010 dev_err_ratelimited(hdev->dev, "PCIE ADDR DEC Error: %s\n",
8011 gaudi2_pcie_addr_dec_error_cause[i]);
8012
8013 switch (intr_cause_data & BIT_ULL(i)) {
8014 case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK:
8015 break;
8016 case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK:
8017 gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(hdev);
8018 break;
8019 }
8020 }
8021 }
8022
gaudi2_handle_pif_fatal(struct hl_device * hdev,u64 intr_cause_data)8023 static void gaudi2_handle_pif_fatal(struct hl_device *hdev, u64 intr_cause_data)
8024
8025 {
8026 int i;
8027
8028 for (i = 0 ; i < GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE ; i++) {
8029 if (intr_cause_data & BIT_ULL(i))
8030 dev_err_ratelimited(hdev->dev, "PMMU PIF err cause: %s\n",
8031 gaudi2_pmmu_fatal_interrupts_cause[i]);
8032 }
8033 }
8034
gaudi2_handle_hif_fatal(struct hl_device * hdev,u16 event_type,u64 intr_cause_data)8035 static void gaudi2_handle_hif_fatal(struct hl_device *hdev, u16 event_type, u64 intr_cause_data)
8036 {
8037 u32 dcore_id, hif_id;
8038 int i;
8039
8040 dcore_id = (event_type - GAUDI2_EVENT_HIF0_FATAL) / 4;
8041 hif_id = (event_type - GAUDI2_EVENT_HIF0_FATAL) % 4;
8042
8043 for (i = 0 ; i < GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE ; i++) {
8044 if (intr_cause_data & BIT_ULL(i))
8045 dev_err_ratelimited(hdev->dev, "DCORE%u_HIF%u: %s\n", dcore_id, hif_id,
8046 gaudi2_hif_fatal_interrupts_cause[i]);
8047 }
8048 }
8049
gaudi2_handle_page_error(struct hl_device * hdev,u64 mmu_base,bool is_pmmu)8050 static void gaudi2_handle_page_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu)
8051 {
8052 u32 valid, val;
8053 u64 addr;
8054
8055 valid = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID));
8056
8057 if (!(valid & DCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID_PAGE_ERR_VALID_ENTRY_MASK))
8058 return;
8059
8060 val = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE));
8061 addr = val & DCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE_VA_63_32_MASK;
8062 addr <<= 32;
8063 addr |= RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE_VA));
8064
8065 dev_err_ratelimited(hdev->dev, "%s page fault on va 0x%llx\n",
8066 is_pmmu ? "PMMU" : "HMMU", addr);
8067
8068 WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE), 0);
8069 }
8070
gaudi2_handle_access_error(struct hl_device * hdev,u64 mmu_base,bool is_pmmu)8071 static void gaudi2_handle_access_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu)
8072 {
8073 u32 valid, val;
8074 u64 addr;
8075
8076 valid = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID));
8077
8078 if (!(valid & DCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID_ACCESS_ERR_VALID_ENTRY_MASK))
8079 return;
8080
8081 val = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE));
8082 addr = val & DCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE_VA_63_32_MASK;
8083 addr <<= 32;
8084 addr |= RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE_VA));
8085
8086 dev_err_ratelimited(hdev->dev, "%s access error on va 0x%llx\n",
8087 is_pmmu ? "PMMU" : "HMMU", addr);
8088 WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE), 0);
8089 }
8090
gaudi2_handle_mmu_spi_sei_generic(struct hl_device * hdev,const char * mmu_name,u64 mmu_base,bool is_pmmu)8091 static void gaudi2_handle_mmu_spi_sei_generic(struct hl_device *hdev, const char *mmu_name,
8092 u64 mmu_base, bool is_pmmu)
8093 {
8094 u32 spi_sei_cause, interrupt_clr = 0x0;
8095 int i;
8096
8097 spi_sei_cause = RREG32(mmu_base + MMU_SPI_SEI_CAUSE_OFFSET);
8098
8099 for (i = 0 ; i < GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE ; i++) {
8100 if (spi_sei_cause & BIT(i)) {
8101 dev_err_ratelimited(hdev->dev, "%s SPI_SEI ERR. err cause: %s\n",
8102 mmu_name, gaudi2_mmu_spi_sei[i].cause);
8103
8104 if (i == 0)
8105 gaudi2_handle_page_error(hdev, mmu_base, is_pmmu);
8106 else if (i == 1)
8107 gaudi2_handle_access_error(hdev, mmu_base, is_pmmu);
8108
8109 if (gaudi2_mmu_spi_sei[i].clear_bit >= 0)
8110 interrupt_clr |= BIT(gaudi2_mmu_spi_sei[i].clear_bit);
8111 }
8112 }
8113
8114 /* Clear cause */
8115 WREG32_AND(mmu_base + MMU_SPI_SEI_CAUSE_OFFSET, ~spi_sei_cause);
8116
8117 /* Clear interrupt */
8118 WREG32(mmu_base + MMU_INTERRUPT_CLR_OFFSET, interrupt_clr);
8119 }
8120
gaudi2_handle_sm_err(struct hl_device * hdev,u8 sm_index)8121 static bool gaudi2_handle_sm_err(struct hl_device *hdev, u8 sm_index)
8122 {
8123 u32 sei_cause_addr, sei_cause_val, sei_cause_cause, sei_cause_log;
8124 u32 cq_intr_addr, cq_intr_val, cq_intr_queue_index;
8125 bool reset = true;
8126 int i;
8127
8128 sei_cause_addr = mmDCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE + DCORE_OFFSET * sm_index;
8129 cq_intr_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_INTR + DCORE_OFFSET * sm_index;
8130
8131 sei_cause_val = RREG32(sei_cause_addr);
8132 sei_cause_cause = FIELD_GET(DCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE_CAUSE_MASK, sei_cause_val);
8133 cq_intr_val = RREG32(cq_intr_addr);
8134
8135 /* SEI interrupt */
8136 if (sei_cause_cause) {
8137 /* There are corresponding SEI_CAUSE_log bits for every SEI_CAUSE_cause bit */
8138 sei_cause_log = FIELD_GET(DCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE_LOG_MASK,
8139 sei_cause_val);
8140
8141 for (i = 0 ; i < GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE ; i++) {
8142 if (!(sei_cause_cause & BIT(i)))
8143 continue;
8144
8145 dev_err_ratelimited(hdev->dev, "SM%u SEI ERR. err cause: %s. %s: 0x%X\n",
8146 sm_index,
8147 gaudi2_sm_sei_cause[i].cause_name,
8148 gaudi2_sm_sei_cause[i].log_name,
8149 sei_cause_log & gaudi2_sm_sei_cause[i].log_mask);
8150
8151 /* Due to a potential H/W issue, do not reset upon BRESP errors */
8152 if (i == 2)
8153 reset = false;
8154 break;
8155 }
8156
8157 /* Clear SM_SEI_CAUSE */
8158 WREG32(sei_cause_addr, 0);
8159 }
8160
8161 /* CQ interrupt */
8162 if (cq_intr_val & DCORE0_SYNC_MNGR_GLBL_CQ_INTR_CQ_SEC_INTR_MASK) {
8163 cq_intr_queue_index =
8164 FIELD_GET(DCORE0_SYNC_MNGR_GLBL_CQ_INTR_CQ_INTR_QUEUE_INDEX_MASK,
8165 cq_intr_val);
8166
8167 dev_err_ratelimited(hdev->dev, "SM%u err. err cause: CQ_INTR. queue index: %u\n",
8168 sm_index, cq_intr_queue_index);
8169
8170 /* Clear CQ_INTR */
8171 WREG32(cq_intr_addr, 0);
8172 }
8173
8174 return reset;
8175 }
8176
gaudi2_handle_mmu_spi_sei_err(struct hl_device * hdev,u16 event_type)8177 static void gaudi2_handle_mmu_spi_sei_err(struct hl_device *hdev, u16 event_type)
8178 {
8179 bool is_pmmu = false;
8180 char desc[32];
8181 u64 mmu_base;
8182 u8 index;
8183
8184 switch (event_type) {
8185 case GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM ... GAUDI2_EVENT_HMMU3_SECURITY_ERROR:
8186 index = (event_type - GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM) / 3;
8187 mmu_base = mmDCORE0_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8188 snprintf(desc, ARRAY_SIZE(desc), "DCORE0_HMMU%d", index);
8189 break;
8190 case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_3_AXI_ERR_RSP:
8191 index = (event_type - GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP);
8192 mmu_base = mmDCORE0_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8193 snprintf(desc, ARRAY_SIZE(desc), "DCORE0_HMMU%d", index);
8194 break;
8195 case GAUDI2_EVENT_HMMU8_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_HMMU11_SECURITY_ERROR:
8196 index = (event_type - GAUDI2_EVENT_HMMU8_PAGE_FAULT_WR_PERM) / 3;
8197 mmu_base = mmDCORE1_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8198 snprintf(desc, ARRAY_SIZE(desc), "DCORE1_HMMU%d", index);
8199 break;
8200 case GAUDI2_EVENT_HMMU_8_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_11_AXI_ERR_RSP:
8201 index = (event_type - GAUDI2_EVENT_HMMU_8_AXI_ERR_RSP);
8202 mmu_base = mmDCORE1_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8203 snprintf(desc, ARRAY_SIZE(desc), "DCORE1_HMMU%d", index);
8204 break;
8205 case GAUDI2_EVENT_HMMU7_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_HMMU4_SECURITY_ERROR:
8206 index = (event_type - GAUDI2_EVENT_HMMU7_PAGE_FAULT_WR_PERM) / 3;
8207 mmu_base = mmDCORE2_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8208 snprintf(desc, ARRAY_SIZE(desc), "DCORE2_HMMU%d", index);
8209 break;
8210 case GAUDI2_EVENT_HMMU_7_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_4_AXI_ERR_RSP:
8211 index = (event_type - GAUDI2_EVENT_HMMU_7_AXI_ERR_RSP);
8212 mmu_base = mmDCORE2_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8213 snprintf(desc, ARRAY_SIZE(desc), "DCORE2_HMMU%d", index);
8214 break;
8215 case GAUDI2_EVENT_HMMU15_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR:
8216 index = (event_type - GAUDI2_EVENT_HMMU15_PAGE_FAULT_WR_PERM) / 3;
8217 mmu_base = mmDCORE3_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8218 snprintf(desc, ARRAY_SIZE(desc), "DCORE3_HMMU%d", index);
8219 break;
8220 case GAUDI2_EVENT_HMMU_15_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP:
8221 index = (event_type - GAUDI2_EVENT_HMMU_15_AXI_ERR_RSP);
8222 mmu_base = mmDCORE3_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8223 snprintf(desc, ARRAY_SIZE(desc), "DCORE3_HMMU%d", index);
8224 break;
8225 case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR:
8226 case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0:
8227 is_pmmu = true;
8228 mmu_base = mmPMMU_HBW_MMU_BASE;
8229 snprintf(desc, ARRAY_SIZE(desc), "PMMU");
8230 break;
8231 default:
8232 return;
8233 }
8234
8235 gaudi2_handle_mmu_spi_sei_generic(hdev, desc, mmu_base, is_pmmu);
8236 }
8237
8238
8239 /* returns true if hard reset is required (ECC DERR or Read parity), false otherwise (ECC SERR) */
gaudi2_hbm_sei_handle_read_err(struct hl_device * hdev,struct hl_eq_hbm_sei_read_err_intr_info * rd_err_data,u32 err_cnt)8240 static bool gaudi2_hbm_sei_handle_read_err(struct hl_device *hdev,
8241 struct hl_eq_hbm_sei_read_err_intr_info *rd_err_data, u32 err_cnt)
8242 {
8243 u32 addr, beat, beat_shift;
8244 bool rc = false;
8245
8246 dev_err_ratelimited(hdev->dev,
8247 "READ ERROR count: ECC SERR: %d, ECC DERR: %d, RD_PARITY: %d\n",
8248 FIELD_GET(HBM_ECC_SERR_CNTR_MASK, err_cnt),
8249 FIELD_GET(HBM_ECC_DERR_CNTR_MASK, err_cnt),
8250 FIELD_GET(HBM_RD_PARITY_CNTR_MASK, err_cnt));
8251
8252 addr = le32_to_cpu(rd_err_data->dbg_rd_err_addr.rd_addr_val);
8253 dev_err_ratelimited(hdev->dev,
8254 "READ ERROR address: sid(%u), bg(%u), ba(%u), col(%u), row(%u)\n",
8255 FIELD_GET(HBM_RD_ADDR_SID_MASK, addr),
8256 FIELD_GET(HBM_RD_ADDR_BG_MASK, addr),
8257 FIELD_GET(HBM_RD_ADDR_BA_MASK, addr),
8258 FIELD_GET(HBM_RD_ADDR_COL_MASK, addr),
8259 FIELD_GET(HBM_RD_ADDR_ROW_MASK, addr));
8260
8261 /* For each beat (RDQS edge), look for possible errors and print relevant info */
8262 for (beat = 0 ; beat < 4 ; beat++) {
8263 if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
8264 (HBM_RD_ERR_SERR_BEAT0_MASK << beat))
8265 dev_err_ratelimited(hdev->dev, "Beat%d ECC SERR: DM: %#x, Syndrome: %#x\n",
8266 beat,
8267 le32_to_cpu(rd_err_data->dbg_rd_err_dm),
8268 le32_to_cpu(rd_err_data->dbg_rd_err_syndrome));
8269
8270 if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
8271 (HBM_RD_ERR_DERR_BEAT0_MASK << beat)) {
8272 dev_err_ratelimited(hdev->dev, "Beat%d ECC DERR: DM: %#x, Syndrome: %#x\n",
8273 beat,
8274 le32_to_cpu(rd_err_data->dbg_rd_err_dm),
8275 le32_to_cpu(rd_err_data->dbg_rd_err_syndrome));
8276 rc |= true;
8277 }
8278
8279 beat_shift = beat * HBM_RD_ERR_BEAT_SHIFT;
8280 if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
8281 (HBM_RD_ERR_PAR_ERR_BEAT0_MASK << beat_shift)) {
8282 dev_err_ratelimited(hdev->dev,
8283 "Beat%d read PARITY: DM: %#x, PAR data: %#x\n",
8284 beat,
8285 le32_to_cpu(rd_err_data->dbg_rd_err_dm),
8286 (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
8287 (HBM_RD_ERR_PAR_DATA_BEAT0_MASK << beat_shift)) >>
8288 (HBM_RD_ERR_PAR_DATA_BEAT0_SHIFT + beat_shift));
8289 rc |= true;
8290 }
8291
8292 dev_err_ratelimited(hdev->dev, "Beat%d DQ data:\n", beat);
8293 dev_err_ratelimited(hdev->dev, "\t0x%08x\n",
8294 le32_to_cpu(rd_err_data->dbg_rd_err_data[beat * 2]));
8295 dev_err_ratelimited(hdev->dev, "\t0x%08x\n",
8296 le32_to_cpu(rd_err_data->dbg_rd_err_data[beat * 2 + 1]));
8297 }
8298
8299 return rc;
8300 }
8301
gaudi2_hbm_sei_print_wr_par_info(struct hl_device * hdev,struct hl_eq_hbm_sei_wr_par_intr_info * wr_par_err_data,u32 err_cnt)8302 static void gaudi2_hbm_sei_print_wr_par_info(struct hl_device *hdev,
8303 struct hl_eq_hbm_sei_wr_par_intr_info *wr_par_err_data, u32 err_cnt)
8304 {
8305 struct hbm_sei_wr_cmd_address *wr_cmd_addr = wr_par_err_data->dbg_last_wr_cmds;
8306 u32 i, curr_addr, derr = wr_par_err_data->dbg_derr;
8307
8308 dev_err_ratelimited(hdev->dev, "WRITE PARITY ERROR count: %d\n", err_cnt);
8309
8310 dev_err_ratelimited(hdev->dev, "CK-0 DERR: 0x%02x, CK-1 DERR: 0x%02x\n",
8311 derr & 0x3, derr & 0xc);
8312
8313 /* JIRA H6-3286 - the following prints may not be valid */
8314 dev_err_ratelimited(hdev->dev, "Last latched write commands addresses:\n");
8315 for (i = 0 ; i < HBM_WR_PAR_CMD_LIFO_LEN ; i++) {
8316 curr_addr = le32_to_cpu(wr_cmd_addr[i].dbg_wr_cmd_addr);
8317 dev_err_ratelimited(hdev->dev,
8318 "\twrite cmd[%u]: Address: SID(%u) BG(%u) BA(%u) COL(%u).\n",
8319 i,
8320 FIELD_GET(WR_PAR_LAST_CMD_SID_MASK, curr_addr),
8321 FIELD_GET(WR_PAR_LAST_CMD_BG_MASK, curr_addr),
8322 FIELD_GET(WR_PAR_LAST_CMD_BA_MASK, curr_addr),
8323 FIELD_GET(WR_PAR_LAST_CMD_COL_MASK, curr_addr));
8324 }
8325 }
8326
gaudi2_hbm_sei_print_ca_par_info(struct hl_device * hdev,struct hl_eq_hbm_sei_ca_par_intr_info * ca_par_err_data,u32 err_cnt)8327 static void gaudi2_hbm_sei_print_ca_par_info(struct hl_device *hdev,
8328 struct hl_eq_hbm_sei_ca_par_intr_info *ca_par_err_data, u32 err_cnt)
8329 {
8330 __le32 *col_cmd = ca_par_err_data->dbg_col;
8331 __le16 *row_cmd = ca_par_err_data->dbg_row;
8332 u32 i;
8333
8334 dev_err_ratelimited(hdev->dev, "CA ERROR count: %d\n", err_cnt);
8335
8336 dev_err_ratelimited(hdev->dev, "Last latched C&R bus commands:\n");
8337 for (i = 0 ; i < HBM_CA_ERR_CMD_LIFO_LEN ; i++)
8338 dev_err_ratelimited(hdev->dev, "cmd%u: ROW(0x%04x) COL(0x%05x)\n", i,
8339 le16_to_cpu(row_cmd[i]) & (u16)GENMASK(13, 0),
8340 le32_to_cpu(col_cmd[i]) & (u32)GENMASK(17, 0));
8341 }
8342
8343 /* Returns true if hard reset is needed or false otherwise */
gaudi2_handle_hbm_mc_sei_err(struct hl_device * hdev,u16 event_type,struct hl_eq_hbm_sei_data * sei_data)8344 static bool gaudi2_handle_hbm_mc_sei_err(struct hl_device *hdev, u16 event_type,
8345 struct hl_eq_hbm_sei_data *sei_data)
8346 {
8347 bool require_hard_reset = false;
8348 u32 hbm_id, mc_id, cause_idx;
8349
8350 hbm_id = (event_type - GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE) / 4;
8351 mc_id = ((event_type - GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE) / 2) % 2;
8352
8353 cause_idx = sei_data->hdr.sei_cause;
8354 if (cause_idx > GAUDI2_NUM_OF_HBM_SEI_CAUSE - 1) {
8355 dev_err_ratelimited(hdev->dev, "Invalid HBM SEI event cause (%d) provided by FW\n",
8356 cause_idx);
8357 return true;
8358 }
8359
8360 if (sei_data->hdr.is_critical)
8361 dev_err(hdev->dev,
8362 "System Critical Error Interrupt - HBM(%u) MC(%u) MC_CH(%u) MC_PC(%u). Error cause: %s\n",
8363 hbm_id, mc_id, sei_data->hdr.mc_channel, sei_data->hdr.mc_pseudo_channel,
8364 hbm_mc_sei_cause[cause_idx]);
8365
8366 else
8367 dev_err_ratelimited(hdev->dev,
8368 "System Non-Critical Error Interrupt - HBM(%u) MC(%u) MC_CH(%u) MC_PC(%u). Error cause: %s\n",
8369 hbm_id, mc_id, sei_data->hdr.mc_channel, sei_data->hdr.mc_pseudo_channel,
8370 hbm_mc_sei_cause[cause_idx]);
8371
8372 /* Print error-specific info */
8373 switch (cause_idx) {
8374 case HBM_SEI_CATTRIP:
8375 require_hard_reset = true;
8376 break;
8377
8378 case HBM_SEI_CMD_PARITY_EVEN:
8379 gaudi2_hbm_sei_print_ca_par_info(hdev, &sei_data->ca_parity_even_info,
8380 le32_to_cpu(sei_data->hdr.cnt));
8381 require_hard_reset = true;
8382 break;
8383
8384 case HBM_SEI_CMD_PARITY_ODD:
8385 gaudi2_hbm_sei_print_ca_par_info(hdev, &sei_data->ca_parity_odd_info,
8386 le32_to_cpu(sei_data->hdr.cnt));
8387 require_hard_reset = true;
8388 break;
8389
8390 case HBM_SEI_WRITE_DATA_PARITY_ERR:
8391 gaudi2_hbm_sei_print_wr_par_info(hdev, &sei_data->wr_parity_info,
8392 le32_to_cpu(sei_data->hdr.cnt));
8393 require_hard_reset = true;
8394 break;
8395
8396 case HBM_SEI_READ_ERR:
8397 /* Unlike other SEI events, read error requires further processing of the
8398 * raw data in order to determine the root cause.
8399 */
8400 require_hard_reset = gaudi2_hbm_sei_handle_read_err(hdev,
8401 &sei_data->read_err_info,
8402 le32_to_cpu(sei_data->hdr.cnt));
8403 break;
8404
8405 default:
8406 break;
8407 }
8408
8409 require_hard_reset |= !!sei_data->hdr.is_critical;
8410
8411 return require_hard_reset;
8412 }
8413
gaudi2_handle_hbm_cattrip(struct hl_device * hdev,u64 intr_cause_data)8414 static void gaudi2_handle_hbm_cattrip(struct hl_device *hdev, u64 intr_cause_data)
8415 {
8416 dev_err(hdev->dev,
8417 "HBM catastrophic temperature error (CATTRIP) cause %#llx\n",
8418 intr_cause_data);
8419 }
8420
gaudi2_handle_hbm_mc_spi(struct hl_device * hdev,u64 intr_cause_data)8421 static void gaudi2_handle_hbm_mc_spi(struct hl_device *hdev, u64 intr_cause_data)
8422 {
8423 u32 i;
8424
8425 for (i = 0 ; i < GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE ; i++)
8426 if (intr_cause_data & hbm_mc_spi[i].mask)
8427 dev_dbg(hdev->dev, "HBM spi event: notification cause(%s)\n",
8428 hbm_mc_spi[i].cause);
8429 }
8430
gaudi2_print_clk_change_info(struct hl_device * hdev,u16 event_type)8431 static void gaudi2_print_clk_change_info(struct hl_device *hdev, u16 event_type)
8432 {
8433 ktime_t zero_time = ktime_set(0, 0);
8434
8435 mutex_lock(&hdev->clk_throttling.lock);
8436
8437 switch (event_type) {
8438 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S:
8439 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
8440 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
8441 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
8442 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
8443 dev_info_ratelimited(hdev->dev, "Clock throttling due to power consumption\n");
8444 break;
8445
8446 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_E:
8447 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
8448 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
8449 dev_info_ratelimited(hdev->dev, "Power envelop is safe, back to optimal clock\n");
8450 break;
8451
8452 case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S:
8453 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
8454 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
8455 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
8456 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
8457 dev_info_ratelimited(hdev->dev, "Clock throttling due to overheating\n");
8458 break;
8459
8460 case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
8461 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
8462 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
8463 dev_info_ratelimited(hdev->dev, "Thermal envelop is safe, back to optimal clock\n");
8464 break;
8465
8466 default:
8467 dev_err(hdev->dev, "Received invalid clock change event %d\n", event_type);
8468 break;
8469 }
8470
8471 mutex_unlock(&hdev->clk_throttling.lock);
8472 }
8473
gaudi2_print_out_of_sync_info(struct hl_device * hdev,struct cpucp_pkt_sync_err * sync_err)8474 static void gaudi2_print_out_of_sync_info(struct hl_device *hdev,
8475 struct cpucp_pkt_sync_err *sync_err)
8476 {
8477 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
8478
8479 dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n",
8480 sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci));
8481 }
8482
gaudi2_handle_pcie_p2p_msix(struct hl_device * hdev)8483 static void gaudi2_handle_pcie_p2p_msix(struct hl_device *hdev)
8484 {
8485 u32 p2p_intr, msix_gw_intr;
8486
8487 p2p_intr = RREG32(mmPCIE_WRAP_P2P_INTR);
8488 msix_gw_intr = RREG32(mmPCIE_WRAP_MSIX_GW_INTR);
8489
8490 if (p2p_intr) {
8491 dev_err_ratelimited(hdev->dev,
8492 "pcie p2p transaction terminated due to security, req_id(0x%x)\n",
8493 RREG32(mmPCIE_WRAP_P2P_REQ_ID));
8494
8495 WREG32(mmPCIE_WRAP_P2P_INTR, 0x1);
8496 }
8497
8498 if (msix_gw_intr) {
8499 dev_err_ratelimited(hdev->dev,
8500 "pcie msi-x gen denied due to vector num check failure, vec(0x%X)\n",
8501 RREG32(mmPCIE_WRAP_MSIX_GW_VEC));
8502
8503 WREG32(mmPCIE_WRAP_MSIX_GW_INTR, 0x1);
8504 }
8505 }
8506
gaudi2_handle_pcie_drain(struct hl_device * hdev,struct hl_eq_pcie_drain_ind_data * drain_data)8507 static void gaudi2_handle_pcie_drain(struct hl_device *hdev,
8508 struct hl_eq_pcie_drain_ind_data *drain_data)
8509 {
8510 u64 lbw_rd, lbw_wr, hbw_rd, hbw_wr, cause;
8511
8512 cause = le64_to_cpu(drain_data->intr_cause.intr_cause_data);
8513 lbw_rd = le64_to_cpu(drain_data->drain_rd_addr_lbw);
8514 lbw_wr = le64_to_cpu(drain_data->drain_wr_addr_lbw);
8515 hbw_rd = le64_to_cpu(drain_data->drain_rd_addr_hbw);
8516 hbw_wr = le64_to_cpu(drain_data->drain_wr_addr_hbw);
8517
8518 if (cause & BIT_ULL(0))
8519 dev_err_ratelimited(hdev->dev,
8520 "PCIE AXI drain LBW completed, read_err %u, write_err %u\n",
8521 !!lbw_rd, !!lbw_wr);
8522
8523 if (cause & BIT_ULL(1))
8524 dev_err_ratelimited(hdev->dev,
8525 "PCIE AXI drain HBW completed, raddr %#llx, waddr %#llx\n",
8526 hbw_rd, hbw_wr);
8527 }
8528
gaudi2_handle_psoc_drain(struct hl_device * hdev,u64 intr_cause_data)8529 static void gaudi2_handle_psoc_drain(struct hl_device *hdev, u64 intr_cause_data)
8530 {
8531 int i;
8532
8533 for (i = 0 ; i < GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE ; i++) {
8534 if (intr_cause_data & BIT_ULL(i))
8535 dev_err_ratelimited(hdev->dev, "PSOC %s completed\n",
8536 gaudi2_psoc_axi_drain_interrupts_cause[i]);
8537 }
8538 }
8539
gaudi2_print_cpu_pkt_failure_info(struct hl_device * hdev,struct cpucp_pkt_sync_err * sync_err)8540 static void gaudi2_print_cpu_pkt_failure_info(struct hl_device *hdev,
8541 struct cpucp_pkt_sync_err *sync_err)
8542 {
8543 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
8544
8545 dev_warn(hdev->dev,
8546 "FW reported sanity check failure, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n",
8547 sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci));
8548 }
8549
hl_arc_event_handle(struct hl_device * hdev,struct hl_eq_engine_arc_intr_data * data)8550 static void hl_arc_event_handle(struct hl_device *hdev,
8551 struct hl_eq_engine_arc_intr_data *data)
8552 {
8553 struct hl_engine_arc_dccm_queue_full_irq *q;
8554 u32 intr_type, engine_id;
8555 u64 payload;
8556
8557 intr_type = le32_to_cpu(data->intr_type);
8558 engine_id = le32_to_cpu(data->engine_id);
8559 payload = le64_to_cpu(data->payload);
8560
8561 switch (intr_type) {
8562 case ENGINE_ARC_DCCM_QUEUE_FULL_IRQ:
8563 q = (struct hl_engine_arc_dccm_queue_full_irq *) &payload;
8564
8565 dev_err_ratelimited(hdev->dev,
8566 "ARC DCCM Full event: EngId: %u, Intr_type: %u, Qidx: %u\n",
8567 engine_id, intr_type, q->queue_index);
8568 break;
8569 default:
8570 dev_err_ratelimited(hdev->dev, "Unknown ARC event type\n");
8571 }
8572 }
8573
gaudi2_handle_eqe(struct hl_device * hdev,struct hl_eq_entry * eq_entry)8574 static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
8575 {
8576 u32 ctl, reset_flags = HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY;
8577 struct gaudi2_device *gaudi2 = hdev->asic_specific;
8578 bool reset_required = false, skip_reset = false;
8579 int index, sbte_index;
8580 u64 event_mask = 0;
8581 u16 event_type;
8582
8583 ctl = le32_to_cpu(eq_entry->hdr.ctl);
8584 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK) >> EQ_CTL_EVENT_TYPE_SHIFT);
8585
8586 if (event_type >= GAUDI2_EVENT_SIZE) {
8587 dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
8588 event_type, GAUDI2_EVENT_SIZE - 1);
8589 return;
8590 }
8591
8592 gaudi2->events_stat[event_type]++;
8593 gaudi2->events_stat_aggregate[event_type]++;
8594
8595 gaudi2_print_irq_info(hdev, event_type);
8596
8597 switch (event_type) {
8598 case GAUDI2_EVENT_PCIE_CORE_SERR ... GAUDI2_EVENT_ARC0_ECC_DERR:
8599 fallthrough;
8600 case GAUDI2_EVENT_ROTATOR0_SERR ... GAUDI2_EVENT_ROTATOR1_DERR:
8601 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8602 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8603 reset_required = gaudi2_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
8604 break;
8605
8606 case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_PDMA1_QM:
8607 fallthrough;
8608 case GAUDI2_EVENT_ROTATOR0_ROT0_QM ... GAUDI2_EVENT_ROTATOR1_ROT1_QM:
8609 fallthrough;
8610 case GAUDI2_EVENT_NIC0_QM0 ... GAUDI2_EVENT_NIC11_QM1:
8611 gaudi2_handle_qman_err(hdev, event_type);
8612 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8613 break;
8614
8615 case GAUDI2_EVENT_ARC_AXI_ERROR_RESPONSE_0:
8616 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8617 gaudi2_handle_arc_farm_sei_err(hdev);
8618 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8619 break;
8620
8621 case GAUDI2_EVENT_CPU_AXI_ERR_RSP:
8622 gaudi2_handle_cpu_sei_err(hdev);
8623 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8624 break;
8625
8626 case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP:
8627 case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP:
8628 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8629 gaudi2_handle_qm_sei_err(hdev, event_type, &eq_entry->razwi_info);
8630 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8631 break;
8632
8633 case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE:
8634 case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE:
8635 index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE;
8636 gaudi2_handle_rot_err(hdev, index, &eq_entry->razwi_with_intr_cause);
8637 gaudi2_handle_qm_sei_err(hdev, event_type, NULL);
8638 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8639 break;
8640
8641 case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC24_AXI_ERR_RSP:
8642 index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP;
8643 gaudi2_tpc_ack_interrupts(hdev, index, "AXI_ERR_RSP",
8644 &eq_entry->razwi_with_intr_cause);
8645 gaudi2_handle_qm_sei_err(hdev, event_type, NULL);
8646 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8647 break;
8648
8649 case GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE ... GAUDI2_EVENT_DEC9_AXI_ERR_RSPONSE:
8650 index = event_type - GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE;
8651 gaudi2_handle_dec_err(hdev, index, "AXI_ERR_RESPONSE", &eq_entry->razwi_info);
8652 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8653 break;
8654
8655 case GAUDI2_EVENT_TPC0_KERNEL_ERR:
8656 case GAUDI2_EVENT_TPC1_KERNEL_ERR:
8657 case GAUDI2_EVENT_TPC2_KERNEL_ERR:
8658 case GAUDI2_EVENT_TPC3_KERNEL_ERR:
8659 case GAUDI2_EVENT_TPC4_KERNEL_ERR:
8660 case GAUDI2_EVENT_TPC5_KERNEL_ERR:
8661 case GAUDI2_EVENT_TPC6_KERNEL_ERR:
8662 case GAUDI2_EVENT_TPC7_KERNEL_ERR:
8663 case GAUDI2_EVENT_TPC8_KERNEL_ERR:
8664 case GAUDI2_EVENT_TPC9_KERNEL_ERR:
8665 case GAUDI2_EVENT_TPC10_KERNEL_ERR:
8666 case GAUDI2_EVENT_TPC11_KERNEL_ERR:
8667 case GAUDI2_EVENT_TPC12_KERNEL_ERR:
8668 case GAUDI2_EVENT_TPC13_KERNEL_ERR:
8669 case GAUDI2_EVENT_TPC14_KERNEL_ERR:
8670 case GAUDI2_EVENT_TPC15_KERNEL_ERR:
8671 case GAUDI2_EVENT_TPC16_KERNEL_ERR:
8672 case GAUDI2_EVENT_TPC17_KERNEL_ERR:
8673 case GAUDI2_EVENT_TPC18_KERNEL_ERR:
8674 case GAUDI2_EVENT_TPC19_KERNEL_ERR:
8675 case GAUDI2_EVENT_TPC20_KERNEL_ERR:
8676 case GAUDI2_EVENT_TPC21_KERNEL_ERR:
8677 case GAUDI2_EVENT_TPC22_KERNEL_ERR:
8678 case GAUDI2_EVENT_TPC23_KERNEL_ERR:
8679 case GAUDI2_EVENT_TPC24_KERNEL_ERR:
8680 index = (event_type - GAUDI2_EVENT_TPC0_KERNEL_ERR) /
8681 (GAUDI2_EVENT_TPC1_KERNEL_ERR - GAUDI2_EVENT_TPC0_KERNEL_ERR);
8682 gaudi2_tpc_ack_interrupts(hdev, index, "KRN_ERR", &eq_entry->razwi_with_intr_cause);
8683 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8684 break;
8685
8686 case GAUDI2_EVENT_DEC0_SPI:
8687 case GAUDI2_EVENT_DEC1_SPI:
8688 case GAUDI2_EVENT_DEC2_SPI:
8689 case GAUDI2_EVENT_DEC3_SPI:
8690 case GAUDI2_EVENT_DEC4_SPI:
8691 case GAUDI2_EVENT_DEC5_SPI:
8692 case GAUDI2_EVENT_DEC6_SPI:
8693 case GAUDI2_EVENT_DEC7_SPI:
8694 case GAUDI2_EVENT_DEC8_SPI:
8695 case GAUDI2_EVENT_DEC9_SPI:
8696 index = (event_type - GAUDI2_EVENT_DEC0_SPI) /
8697 (GAUDI2_EVENT_DEC1_SPI - GAUDI2_EVENT_DEC0_SPI);
8698 gaudi2_handle_dec_err(hdev, index, "SPI", &eq_entry->razwi_info);
8699 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8700 break;
8701
8702 case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE:
8703 case GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE:
8704 case GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE:
8705 case GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE:
8706 index = (event_type - GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE) /
8707 (GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE -
8708 GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE);
8709 gaudi2_handle_mme_err(hdev, index,
8710 "CTRL_AXI_ERROR_RESPONSE", &eq_entry->razwi_info);
8711 gaudi2_handle_qm_sei_err(hdev, event_type, NULL);
8712 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8713 break;
8714
8715 case GAUDI2_EVENT_MME0_QMAN_SW_ERROR:
8716 case GAUDI2_EVENT_MME1_QMAN_SW_ERROR:
8717 case GAUDI2_EVENT_MME2_QMAN_SW_ERROR:
8718 case GAUDI2_EVENT_MME3_QMAN_SW_ERROR:
8719 index = (event_type - GAUDI2_EVENT_MME0_QMAN_SW_ERROR) /
8720 (GAUDI2_EVENT_MME1_QMAN_SW_ERROR -
8721 GAUDI2_EVENT_MME0_QMAN_SW_ERROR);
8722 gaudi2_handle_mme_err(hdev, index, "QMAN_SW_ERROR", &eq_entry->razwi_info);
8723 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8724 break;
8725
8726 case GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID:
8727 case GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID:
8728 case GAUDI2_EVENT_MME2_WAP_SOURCE_RESULT_INVALID:
8729 case GAUDI2_EVENT_MME3_WAP_SOURCE_RESULT_INVALID:
8730 index = (event_type - GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID) /
8731 (GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID -
8732 GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID);
8733 gaudi2_handle_mme_wap_err(hdev, index, &eq_entry->razwi_info);
8734 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8735 break;
8736
8737 case GAUDI2_EVENT_KDMA_CH0_AXI_ERR_RSP:
8738 case GAUDI2_EVENT_KDMA0_CORE:
8739 gaudi2_handle_kdma_core_event(hdev,
8740 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
8741 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8742 break;
8743
8744 case GAUDI2_EVENT_HDMA2_CORE ... GAUDI2_EVENT_PDMA1_CORE:
8745 gaudi2_handle_dma_core_event(hdev,
8746 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
8747 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8748 break;
8749
8750 case GAUDI2_EVENT_PCIE_ADDR_DEC_ERR:
8751 gaudi2_print_pcie_addr_dec_info(hdev,
8752 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
8753 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8754 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8755 break;
8756
8757 case GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR:
8758 case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP:
8759 case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR:
8760 case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0:
8761 gaudi2_handle_mmu_spi_sei_err(hdev, event_type);
8762 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8763 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8764 break;
8765
8766 case GAUDI2_EVENT_HIF0_FATAL ... GAUDI2_EVENT_HIF12_FATAL:
8767 gaudi2_handle_hif_fatal(hdev, event_type,
8768 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
8769 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8770 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8771 break;
8772
8773 case GAUDI2_EVENT_PMMU_FATAL_0:
8774 gaudi2_handle_pif_fatal(hdev,
8775 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
8776 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8777 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8778 break;
8779
8780 case GAUDI2_EVENT_PSOC63_RAZWI_OR_PID_MIN_MAX_INTERRUPT:
8781 gaudi2_ack_psoc_razwi_event_handler(hdev);
8782 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8783 break;
8784
8785 case GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE ... GAUDI2_EVENT_HBM5_MC1_SEI_NON_SEVERE:
8786 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8787 if (gaudi2_handle_hbm_mc_sei_err(hdev, event_type, &eq_entry->sei_data)) {
8788 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8789 reset_required = true;
8790 }
8791 break;
8792
8793 case GAUDI2_EVENT_HBM_CATTRIP_0 ... GAUDI2_EVENT_HBM_CATTRIP_5:
8794 gaudi2_handle_hbm_cattrip(hdev, le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
8795 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8796 break;
8797
8798 case GAUDI2_EVENT_HBM0_MC0_SPI ... GAUDI2_EVENT_HBM5_MC1_SPI:
8799 gaudi2_handle_hbm_mc_spi(hdev, le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
8800 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8801 break;
8802
8803 case GAUDI2_EVENT_PCIE_DRAIN_COMPLETE:
8804 gaudi2_handle_pcie_drain(hdev, &eq_entry->pcie_drain_ind_data);
8805 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8806 break;
8807
8808 case GAUDI2_EVENT_PSOC59_RPM_ERROR_OR_DRAIN:
8809 gaudi2_handle_psoc_drain(hdev, le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
8810 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8811 break;
8812
8813 case GAUDI2_EVENT_CPU_AXI_ECC:
8814 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8815 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8816 break;
8817 case GAUDI2_EVENT_CPU_L2_RAM_ECC:
8818 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8819 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8820 break;
8821 case GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME0_SBTE4_AXI_ERR_RSP:
8822 case GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME1_SBTE4_AXI_ERR_RSP:
8823 case GAUDI2_EVENT_MME2_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME2_SBTE4_AXI_ERR_RSP:
8824 case GAUDI2_EVENT_MME3_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME3_SBTE4_AXI_ERR_RSP:
8825 index = (event_type - GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP) /
8826 (GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP -
8827 GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP);
8828 sbte_index = (event_type - GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP) %
8829 (GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP -
8830 GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP);
8831 gaudi2_handle_mme_sbte_err(hdev, index, sbte_index,
8832 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
8833 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8834 break;
8835 case GAUDI2_EVENT_VM0_ALARM_A ... GAUDI2_EVENT_VM3_ALARM_B:
8836 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8837 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8838 break;
8839 case GAUDI2_EVENT_PSOC_AXI_ERR_RSP:
8840 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8841 break;
8842 case GAUDI2_EVENT_PSOC_PRSTN_FALL:
8843 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8844 break;
8845 case GAUDI2_EVENT_PCIE_APB_TIMEOUT:
8846 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8847 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8848 break;
8849 case GAUDI2_EVENT_PCIE_FATAL_ERR:
8850 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8851 break;
8852 case GAUDI2_EVENT_TPC0_BMON_SPMU:
8853 case GAUDI2_EVENT_TPC1_BMON_SPMU:
8854 case GAUDI2_EVENT_TPC2_BMON_SPMU:
8855 case GAUDI2_EVENT_TPC3_BMON_SPMU:
8856 case GAUDI2_EVENT_TPC4_BMON_SPMU:
8857 case GAUDI2_EVENT_TPC5_BMON_SPMU:
8858 case GAUDI2_EVENT_TPC6_BMON_SPMU:
8859 case GAUDI2_EVENT_TPC7_BMON_SPMU:
8860 case GAUDI2_EVENT_TPC8_BMON_SPMU:
8861 case GAUDI2_EVENT_TPC9_BMON_SPMU:
8862 case GAUDI2_EVENT_TPC10_BMON_SPMU:
8863 case GAUDI2_EVENT_TPC11_BMON_SPMU:
8864 case GAUDI2_EVENT_TPC12_BMON_SPMU:
8865 case GAUDI2_EVENT_TPC13_BMON_SPMU:
8866 case GAUDI2_EVENT_TPC14_BMON_SPMU:
8867 case GAUDI2_EVENT_TPC15_BMON_SPMU:
8868 case GAUDI2_EVENT_TPC16_BMON_SPMU:
8869 case GAUDI2_EVENT_TPC17_BMON_SPMU:
8870 case GAUDI2_EVENT_TPC18_BMON_SPMU:
8871 case GAUDI2_EVENT_TPC19_BMON_SPMU:
8872 case GAUDI2_EVENT_TPC20_BMON_SPMU:
8873 case GAUDI2_EVENT_TPC21_BMON_SPMU:
8874 case GAUDI2_EVENT_TPC22_BMON_SPMU:
8875 case GAUDI2_EVENT_TPC23_BMON_SPMU:
8876 case GAUDI2_EVENT_TPC24_BMON_SPMU:
8877 case GAUDI2_EVENT_MME0_CTRL_BMON_SPMU:
8878 case GAUDI2_EVENT_MME0_SBTE_BMON_SPMU:
8879 case GAUDI2_EVENT_MME0_WAP_BMON_SPMU:
8880 case GAUDI2_EVENT_MME1_CTRL_BMON_SPMU:
8881 case GAUDI2_EVENT_MME1_SBTE_BMON_SPMU:
8882 case GAUDI2_EVENT_MME1_WAP_BMON_SPMU:
8883 case GAUDI2_EVENT_MME2_CTRL_BMON_SPMU:
8884 case GAUDI2_EVENT_MME2_SBTE_BMON_SPMU:
8885 case GAUDI2_EVENT_MME2_WAP_BMON_SPMU:
8886 case GAUDI2_EVENT_MME3_CTRL_BMON_SPMU:
8887 case GAUDI2_EVENT_MME3_SBTE_BMON_SPMU:
8888 case GAUDI2_EVENT_MME3_WAP_BMON_SPMU:
8889 case GAUDI2_EVENT_HDMA2_BM_SPMU ... GAUDI2_EVENT_PDMA1_BM_SPMU:
8890 fallthrough;
8891 case GAUDI2_EVENT_DEC0_BMON_SPMU:
8892 case GAUDI2_EVENT_DEC1_BMON_SPMU:
8893 case GAUDI2_EVENT_DEC2_BMON_SPMU:
8894 case GAUDI2_EVENT_DEC3_BMON_SPMU:
8895 case GAUDI2_EVENT_DEC4_BMON_SPMU:
8896 case GAUDI2_EVENT_DEC5_BMON_SPMU:
8897 case GAUDI2_EVENT_DEC6_BMON_SPMU:
8898 case GAUDI2_EVENT_DEC7_BMON_SPMU:
8899 case GAUDI2_EVENT_DEC8_BMON_SPMU:
8900 case GAUDI2_EVENT_DEC9_BMON_SPMU:
8901 case GAUDI2_EVENT_ROTATOR0_BMON_SPMU ... GAUDI2_EVENT_SM3_BMON_SPMU:
8902 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8903 break;
8904
8905 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S:
8906 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_E:
8907 case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S:
8908 case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
8909 gaudi2_print_clk_change_info(hdev, event_type);
8910 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8911 break;
8912
8913 case GAUDI2_EVENT_CPU_PKT_QUEUE_OUT_SYNC:
8914 gaudi2_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
8915 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8916 break;
8917
8918 case GAUDI2_EVENT_PCIE_FLR_REQUESTED:
8919 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8920 /* Do nothing- FW will handle it */
8921 break;
8922
8923 case GAUDI2_EVENT_PCIE_P2P_MSIX:
8924 gaudi2_handle_pcie_p2p_msix(hdev);
8925 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8926 break;
8927
8928 case GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE ... GAUDI2_EVENT_SM3_AXI_ERROR_RESPONSE:
8929 index = event_type - GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE;
8930 skip_reset = !gaudi2_handle_sm_err(hdev, index);
8931 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8932 break;
8933
8934 case GAUDI2_EVENT_PSOC_MME_PLL_LOCK_ERR ... GAUDI2_EVENT_DCORE2_HBM_PLL_LOCK_ERR:
8935 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8936 break;
8937
8938 case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE:
8939 dev_info(hdev->dev, "CPLD shutdown cause, reset reason: 0x%llx\n",
8940 le64_to_cpu(eq_entry->data[0]));
8941 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8942 break;
8943 case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_EVENT:
8944 dev_err(hdev->dev, "CPLD shutdown event, reset reason: 0x%llx\n",
8945 le64_to_cpu(eq_entry->data[0]));
8946 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8947 break;
8948
8949 case GAUDI2_EVENT_CPU_PKT_SANITY_FAILED:
8950 gaudi2_print_cpu_pkt_failure_info(hdev, &eq_entry->pkt_sync_err);
8951 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8952 break;
8953
8954 case GAUDI2_EVENT_ARC_DCCM_FULL:
8955 hl_arc_event_handle(hdev, &eq_entry->arc_data);
8956 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8957 break;
8958
8959 default:
8960 if (gaudi2_irq_map_table[event_type].valid)
8961 dev_err_ratelimited(hdev->dev, "Cannot find handler for event %d\n",
8962 event_type);
8963 }
8964
8965 if ((gaudi2_irq_map_table[event_type].reset || reset_required) && !skip_reset)
8966 goto reset_device;
8967
8968 /* Send unmask irq only for interrupts not classified as MSG */
8969 if (!gaudi2_irq_map_table[event_type].msg)
8970 hl_fw_unmask_irq(hdev, event_type);
8971
8972 if (event_mask)
8973 hl_notifier_event_send_all(hdev, event_mask);
8974
8975 return;
8976
8977 reset_device:
8978 if (hdev->hard_reset_on_fw_events) {
8979 hl_device_reset(hdev, reset_flags);
8980 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
8981 } else {
8982 if (!gaudi2_irq_map_table[event_type].msg)
8983 hl_fw_unmask_irq(hdev, event_type);
8984 }
8985
8986 if (event_mask)
8987 hl_notifier_event_send_all(hdev, event_mask);
8988 }
8989
gaudi2_memset_device_memory(struct hl_device * hdev,u64 addr,u64 size,u64 val)8990 static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val)
8991 {
8992 struct asic_fixed_properties *prop = &hdev->asic_prop;
8993 u64 comp_addr, cur_addr = addr, end_addr = addr + size;
8994 u32 chunk_size, busy, dcore, edma_idx, sob_offset, sob_addr, comp_val, edma_commit;
8995 u32 old_mmubp, mmubp;
8996 int rc = 0;
8997
8998 sob_offset = hdev->asic_prop.first_available_user_sob[0] * 4;
8999 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
9000 comp_addr = CFG_BASE + sob_addr;
9001 comp_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1) |
9002 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1);
9003
9004 edma_commit = FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_LIN_MASK, 1) |
9005 FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_MEM_SET_MASK, 1) |
9006 FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_WR_COMP_EN_MASK, 1);
9007 mmubp = FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_WR_MASK, 1) |
9008 FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_RD_MASK, 1);
9009
9010 if (prop->edma_enabled_mask == 0) {
9011 dev_info(hdev->dev, "non of the EDMA engines is enabled - skip dram scrubbing\n");
9012 return -EIO;
9013 }
9014
9015 /*
9016 * set mmu bypass for the scrubbing - all ddmas are configured the same so save
9017 * only the first one to restore later
9018 */
9019 old_mmubp = RREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP);
9020 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
9021 for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
9022 u32 edma_offset = dcore * DCORE_OFFSET + edma_idx * DCORE_EDMA_OFFSET;
9023 u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
9024
9025 if (!(prop->edma_enabled_mask & BIT(edma_bit)))
9026 continue;
9027
9028 WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP +
9029 edma_offset, mmubp);
9030 }
9031 }
9032
9033 while (cur_addr < end_addr) {
9034 int dma_num = 0;
9035
9036 WREG32(sob_addr, 0);
9037 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
9038 for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
9039 u32 edma_offset = dcore * DCORE_OFFSET +
9040 edma_idx * DCORE_EDMA_OFFSET;
9041 u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
9042
9043 if (!(prop->edma_enabled_mask & BIT(edma_bit)))
9044 continue;
9045
9046 chunk_size = min_t(u64, SZ_2G, end_addr - cur_addr);
9047
9048 WREG32(mmDCORE0_EDMA0_CORE_CTX_SRC_BASE_LO + edma_offset,
9049 lower_32_bits(val));
9050 WREG32(mmDCORE0_EDMA0_CORE_CTX_SRC_BASE_HI + edma_offset,
9051 upper_32_bits(val));
9052
9053 WREG32(mmDCORE0_EDMA0_CORE_CTX_DST_BASE_LO + edma_offset,
9054 lower_32_bits(cur_addr));
9055 WREG32(mmDCORE0_EDMA0_CORE_CTX_DST_BASE_HI + edma_offset,
9056 upper_32_bits(cur_addr));
9057
9058 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_LO + edma_offset,
9059 lower_32_bits(comp_addr));
9060 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_HI + edma_offset,
9061 upper_32_bits(comp_addr));
9062 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_WDATA + edma_offset,
9063 comp_val);
9064
9065 WREG32(mmDCORE0_EDMA0_CORE_CTX_DST_TSIZE_0 + edma_offset,
9066 chunk_size);
9067 WREG32(mmDCORE0_EDMA0_CORE_CTX_COMMIT + edma_offset, edma_commit);
9068
9069 dma_num++;
9070
9071 cur_addr += chunk_size;
9072
9073 if (cur_addr == end_addr)
9074 goto poll;
9075 }
9076 }
9077 poll:
9078 rc = hl_poll_timeout(hdev, sob_addr, busy, (busy == dma_num), 1000, 1000000);
9079 if (rc) {
9080 dev_err(hdev->dev, "DMA Timeout during HBM scrubbing\n");
9081 goto end;
9082 }
9083 }
9084 end:
9085 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
9086 for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
9087 u32 edma_offset = dcore * DCORE_OFFSET + edma_idx * DCORE_EDMA_OFFSET;
9088 u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
9089
9090 if (!(prop->edma_enabled_mask & BIT(edma_bit)))
9091 continue;
9092
9093 WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + edma_offset, old_mmubp);
9094 }
9095 }
9096
9097 WREG32(sob_addr, 0);
9098 return rc;
9099 }
9100
gaudi2_scrub_device_dram(struct hl_device * hdev,u64 val)9101 static int gaudi2_scrub_device_dram(struct hl_device *hdev, u64 val)
9102 {
9103 int rc;
9104 struct asic_fixed_properties *prop = &hdev->asic_prop;
9105 u64 size = prop->dram_end_address - prop->dram_user_base_address;
9106
9107 rc = gaudi2_memset_device_memory(hdev, prop->dram_user_base_address, size, val);
9108
9109 if (rc)
9110 dev_err(hdev->dev, "Failed to scrub dram, address: 0x%llx size: %llu\n",
9111 prop->dram_user_base_address, size);
9112 return rc;
9113 }
9114
gaudi2_scrub_device_mem(struct hl_device * hdev)9115 static int gaudi2_scrub_device_mem(struct hl_device *hdev)
9116 {
9117 int rc;
9118 struct asic_fixed_properties *prop = &hdev->asic_prop;
9119 u64 val = hdev->memory_scrub_val;
9120 u64 addr, size;
9121
9122 if (!hdev->memory_scrub)
9123 return 0;
9124
9125 /* scrub SRAM */
9126 addr = prop->sram_user_base_address;
9127 size = hdev->pldm ? 0x10000 : (prop->sram_size - SRAM_USER_BASE_OFFSET);
9128 dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx, val: 0x%llx\n",
9129 addr, addr + size, val);
9130 rc = gaudi2_memset_device_memory(hdev, addr, size, val);
9131 if (rc) {
9132 dev_err(hdev->dev, "scrubbing SRAM failed (%d)\n", rc);
9133 return rc;
9134 }
9135
9136 /* scrub DRAM */
9137 rc = gaudi2_scrub_device_dram(hdev, val);
9138 if (rc) {
9139 dev_err(hdev->dev, "scrubbing DRAM failed (%d)\n", rc);
9140 return rc;
9141 }
9142 return 0;
9143 }
9144
gaudi2_restore_user_sm_registers(struct hl_device * hdev)9145 static void gaudi2_restore_user_sm_registers(struct hl_device *hdev)
9146 {
9147 u64 addr, mon_sts_addr, mon_cfg_addr, cq_lbw_l_addr, cq_lbw_h_addr,
9148 cq_lbw_data_addr, cq_base_l_addr, cq_base_h_addr, cq_size_addr;
9149 u32 val, size, offset;
9150 int dcore_id;
9151
9152 offset = hdev->asic_prop.first_available_cq[0] * 4;
9153 cq_lbw_l_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + offset;
9154 cq_lbw_h_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 + offset;
9155 cq_lbw_data_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0 + offset;
9156 cq_base_l_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + offset;
9157 cq_base_h_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + offset;
9158 cq_size_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + offset;
9159 size = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 -
9160 (mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + offset);
9161
9162 /* memset dcore0 CQ registers */
9163 gaudi2_memset_device_lbw(hdev, cq_lbw_l_addr, size, 0);
9164 gaudi2_memset_device_lbw(hdev, cq_lbw_h_addr, size, 0);
9165 gaudi2_memset_device_lbw(hdev, cq_lbw_data_addr, size, 0);
9166 gaudi2_memset_device_lbw(hdev, cq_base_l_addr, size, 0);
9167 gaudi2_memset_device_lbw(hdev, cq_base_h_addr, size, 0);
9168
9169 cq_lbw_l_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + DCORE_OFFSET;
9170 cq_lbw_h_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 + DCORE_OFFSET;
9171 cq_lbw_data_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0 + DCORE_OFFSET;
9172 cq_base_l_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + DCORE_OFFSET;
9173 cq_base_h_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + DCORE_OFFSET;
9174 cq_size_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + DCORE_OFFSET;
9175 size = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 - mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0;
9176
9177 for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
9178 gaudi2_memset_device_lbw(hdev, cq_lbw_l_addr, size, 0);
9179 gaudi2_memset_device_lbw(hdev, cq_lbw_h_addr, size, 0);
9180 gaudi2_memset_device_lbw(hdev, cq_lbw_data_addr, size, 0);
9181 gaudi2_memset_device_lbw(hdev, cq_base_l_addr, size, 0);
9182 gaudi2_memset_device_lbw(hdev, cq_base_h_addr, size, 0);
9183 gaudi2_memset_device_lbw(hdev, cq_size_addr, size, 0);
9184
9185 cq_lbw_l_addr += DCORE_OFFSET;
9186 cq_lbw_h_addr += DCORE_OFFSET;
9187 cq_lbw_data_addr += DCORE_OFFSET;
9188 cq_base_l_addr += DCORE_OFFSET;
9189 cq_base_h_addr += DCORE_OFFSET;
9190 cq_size_addr += DCORE_OFFSET;
9191 }
9192
9193 offset = hdev->asic_prop.first_available_user_mon[0] * 4;
9194 addr = mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + offset;
9195 val = 1 << DCORE0_SYNC_MNGR_OBJS_MON_STATUS_PROT_SHIFT;
9196 size = mmDCORE0_SYNC_MNGR_OBJS_SM_SEC_0 - (mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + offset);
9197
9198 /* memset dcore0 monitors */
9199 gaudi2_memset_device_lbw(hdev, addr, size, val);
9200
9201 addr = mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + offset;
9202 gaudi2_memset_device_lbw(hdev, addr, size, 0);
9203
9204 mon_sts_addr = mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + DCORE_OFFSET;
9205 mon_cfg_addr = mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + DCORE_OFFSET;
9206 size = mmDCORE0_SYNC_MNGR_OBJS_SM_SEC_0 - mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0;
9207
9208 for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
9209 gaudi2_memset_device_lbw(hdev, mon_sts_addr, size, val);
9210 gaudi2_memset_device_lbw(hdev, mon_cfg_addr, size, 0);
9211 mon_sts_addr += DCORE_OFFSET;
9212 mon_cfg_addr += DCORE_OFFSET;
9213 }
9214
9215 offset = hdev->asic_prop.first_available_user_sob[0] * 4;
9216 addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset;
9217 val = 0;
9218 size = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 -
9219 (mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset);
9220
9221 /* memset dcore0 sobs */
9222 gaudi2_memset_device_lbw(hdev, addr, size, val);
9223
9224 addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + DCORE_OFFSET;
9225 size = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 - mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0;
9226
9227 for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
9228 gaudi2_memset_device_lbw(hdev, addr, size, val);
9229 addr += DCORE_OFFSET;
9230 }
9231
9232 /* Flush all WREG to prevent race */
9233 val = RREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset);
9234 }
9235
gaudi2_restore_user_qm_registers(struct hl_device * hdev)9236 static void gaudi2_restore_user_qm_registers(struct hl_device *hdev)
9237 {
9238 u32 reg_base, hw_queue_id;
9239
9240 for (hw_queue_id = GAUDI2_QUEUE_ID_PDMA_0_0 ; hw_queue_id <= GAUDI2_QUEUE_ID_ROT_1_0;
9241 hw_queue_id += NUM_OF_PQ_PER_QMAN) {
9242 if (!gaudi2_is_queue_enabled(hdev, hw_queue_id))
9243 continue;
9244
9245 gaudi2_clear_qm_fence_counters_common(hdev, hw_queue_id, false);
9246
9247 reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
9248 WREG32(reg_base + QM_ARB_CFG_0_OFFSET, 0);
9249 }
9250
9251 /* Flush all WREG to prevent race */
9252 RREG32(mmPDMA0_QM_ARB_CFG_0);
9253 }
9254
gaudi2_restore_nic_qm_registers(struct hl_device * hdev)9255 static void gaudi2_restore_nic_qm_registers(struct hl_device *hdev)
9256 {
9257 u32 reg_base, hw_queue_id;
9258
9259 for (hw_queue_id = GAUDI2_QUEUE_ID_NIC_0_0 ; hw_queue_id <= GAUDI2_QUEUE_ID_NIC_23_3;
9260 hw_queue_id += NUM_OF_PQ_PER_QMAN) {
9261 if (!gaudi2_is_queue_enabled(hdev, hw_queue_id))
9262 continue;
9263
9264 gaudi2_clear_qm_fence_counters_common(hdev, hw_queue_id, false);
9265
9266 reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
9267 WREG32(reg_base + QM_ARB_CFG_0_OFFSET, 0);
9268 }
9269
9270 /* Flush all WREG to prevent race */
9271 RREG32(mmPDMA0_QM_ARB_CFG_0);
9272 }
9273
gaudi2_context_switch(struct hl_device * hdev,u32 asid)9274 static int gaudi2_context_switch(struct hl_device *hdev, u32 asid)
9275 {
9276 return 0;
9277 }
9278
gaudi2_restore_phase_topology(struct hl_device * hdev)9279 static void gaudi2_restore_phase_topology(struct hl_device *hdev)
9280 {
9281 }
9282
gaudi2_init_block_instances(struct hl_device * hdev,u32 block_idx,struct dup_block_ctx * cfg_ctx)9283 static void gaudi2_init_block_instances(struct hl_device *hdev, u32 block_idx,
9284 struct dup_block_ctx *cfg_ctx)
9285 {
9286 u64 block_base = cfg_ctx->base + block_idx * cfg_ctx->block_off;
9287 u8 seq;
9288 int i;
9289
9290 for (i = 0 ; i < cfg_ctx->instances ; i++) {
9291 seq = block_idx * cfg_ctx->instances + i;
9292
9293 /* skip disabled instance */
9294 if (!(cfg_ctx->enabled_mask & BIT_ULL(seq)))
9295 continue;
9296
9297 cfg_ctx->instance_cfg_fn(hdev, block_base + i * cfg_ctx->instance_off,
9298 cfg_ctx->data);
9299 }
9300 }
9301
gaudi2_init_blocks_with_mask(struct hl_device * hdev,struct dup_block_ctx * cfg_ctx,u64 mask)9302 static void gaudi2_init_blocks_with_mask(struct hl_device *hdev, struct dup_block_ctx *cfg_ctx,
9303 u64 mask)
9304 {
9305 int i;
9306
9307 cfg_ctx->enabled_mask = mask;
9308
9309 for (i = 0 ; i < cfg_ctx->blocks ; i++)
9310 gaudi2_init_block_instances(hdev, i, cfg_ctx);
9311 }
9312
gaudi2_init_blocks(struct hl_device * hdev,struct dup_block_ctx * cfg_ctx)9313 void gaudi2_init_blocks(struct hl_device *hdev, struct dup_block_ctx *cfg_ctx)
9314 {
9315 gaudi2_init_blocks_with_mask(hdev, cfg_ctx, U64_MAX);
9316 }
9317
gaudi2_debugfs_read_dma(struct hl_device * hdev,u64 addr,u32 size,void * blob_addr)9318 static int gaudi2_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, void *blob_addr)
9319 {
9320 void *host_mem_virtual_addr;
9321 dma_addr_t host_mem_dma_addr;
9322 u64 reserved_va_base;
9323 u32 pos, size_left, size_to_dma;
9324 struct hl_ctx *ctx;
9325 int rc = 0;
9326
9327 /* Fetch the ctx */
9328 ctx = hl_get_compute_ctx(hdev);
9329 if (!ctx) {
9330 dev_err(hdev->dev, "No ctx available\n");
9331 return -EINVAL;
9332 }
9333
9334 /* Allocate buffers for read and for poll */
9335 host_mem_virtual_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &host_mem_dma_addr,
9336 GFP_KERNEL | __GFP_ZERO);
9337 if (host_mem_virtual_addr == NULL) {
9338 dev_err(hdev->dev, "Failed to allocate memory for KDMA read\n");
9339 rc = -ENOMEM;
9340 goto put_ctx;
9341 }
9342
9343 /* Reserve VM region on asic side */
9344 reserved_va_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST, SZ_2M,
9345 HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
9346 if (!reserved_va_base) {
9347 dev_err(hdev->dev, "Failed to reserve vmem on asic\n");
9348 rc = -ENOMEM;
9349 goto free_data_buffer;
9350 }
9351
9352 /* Create mapping on asic side */
9353 mutex_lock(&hdev->mmu_lock);
9354 rc = hl_mmu_map_contiguous(ctx, reserved_va_base, host_mem_dma_addr, SZ_2M);
9355 hl_mmu_invalidate_cache_range(hdev, false,
9356 MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV,
9357 ctx->asid, reserved_va_base, SZ_2M);
9358 mutex_unlock(&hdev->mmu_lock);
9359 if (rc) {
9360 dev_err(hdev->dev, "Failed to create mapping on asic mmu\n");
9361 goto unreserve_va;
9362 }
9363
9364 /* Enable MMU on KDMA */
9365 gaudi2_kdma_set_mmbp_asid(hdev, false, ctx->asid);
9366
9367 pos = 0;
9368 size_left = size;
9369 size_to_dma = SZ_2M;
9370
9371 while (size_left > 0) {
9372 if (size_left < SZ_2M)
9373 size_to_dma = size_left;
9374
9375 rc = gaudi2_send_job_to_kdma(hdev, addr, reserved_va_base, size_to_dma, false);
9376 if (rc)
9377 break;
9378
9379 memcpy(blob_addr + pos, host_mem_virtual_addr, size_to_dma);
9380
9381 if (size_left <= SZ_2M)
9382 break;
9383
9384 pos += SZ_2M;
9385 addr += SZ_2M;
9386 size_left -= SZ_2M;
9387 }
9388
9389 gaudi2_kdma_set_mmbp_asid(hdev, true, HL_KERNEL_ASID_ID);
9390
9391 mutex_lock(&hdev->mmu_lock);
9392 hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M);
9393 hl_mmu_invalidate_cache_range(hdev, false, MMU_OP_USERPTR,
9394 ctx->asid, reserved_va_base, SZ_2M);
9395 mutex_unlock(&hdev->mmu_lock);
9396 unreserve_va:
9397 hl_unreserve_va_block(hdev, ctx, reserved_va_base, SZ_2M);
9398 free_data_buffer:
9399 hl_asic_dma_free_coherent(hdev, SZ_2M, host_mem_virtual_addr, host_mem_dma_addr);
9400 put_ctx:
9401 hl_ctx_put(ctx);
9402
9403 return rc;
9404 }
9405
gaudi2_internal_cb_pool_init(struct hl_device * hdev,struct hl_ctx * ctx)9406 static int gaudi2_internal_cb_pool_init(struct hl_device *hdev, struct hl_ctx *ctx)
9407 {
9408 struct gaudi2_device *gaudi2 = hdev->asic_specific;
9409 int min_alloc_order, rc;
9410
9411 if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU))
9412 return 0;
9413
9414 hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev,
9415 HOST_SPACE_INTERNAL_CB_SZ,
9416 &hdev->internal_cb_pool_dma_addr,
9417 GFP_KERNEL | __GFP_ZERO);
9418
9419 if (!hdev->internal_cb_pool_virt_addr)
9420 return -ENOMEM;
9421
9422 min_alloc_order = ilog2(min(gaudi2_get_signal_cb_size(hdev),
9423 gaudi2_get_wait_cb_size(hdev)));
9424
9425 hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
9426 if (!hdev->internal_cb_pool) {
9427 dev_err(hdev->dev, "Failed to create internal CB pool\n");
9428 rc = -ENOMEM;
9429 goto free_internal_cb_pool;
9430 }
9431
9432 rc = gen_pool_add(hdev->internal_cb_pool, (uintptr_t) hdev->internal_cb_pool_virt_addr,
9433 HOST_SPACE_INTERNAL_CB_SZ, -1);
9434 if (rc) {
9435 dev_err(hdev->dev, "Failed to add memory to internal CB pool\n");
9436 rc = -EFAULT;
9437 goto destroy_internal_cb_pool;
9438 }
9439
9440 hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST,
9441 HOST_SPACE_INTERNAL_CB_SZ, HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
9442
9443 if (!hdev->internal_cb_va_base) {
9444 rc = -ENOMEM;
9445 goto destroy_internal_cb_pool;
9446 }
9447
9448 mutex_lock(&hdev->mmu_lock);
9449 rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base, hdev->internal_cb_pool_dma_addr,
9450 HOST_SPACE_INTERNAL_CB_SZ);
9451 hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
9452 mutex_unlock(&hdev->mmu_lock);
9453
9454 if (rc)
9455 goto unreserve_internal_cb_pool;
9456
9457 return 0;
9458
9459 unreserve_internal_cb_pool:
9460 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
9461 destroy_internal_cb_pool:
9462 gen_pool_destroy(hdev->internal_cb_pool);
9463 free_internal_cb_pool:
9464 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
9465 hdev->internal_cb_pool_dma_addr);
9466
9467 return rc;
9468 }
9469
gaudi2_internal_cb_pool_fini(struct hl_device * hdev,struct hl_ctx * ctx)9470 static void gaudi2_internal_cb_pool_fini(struct hl_device *hdev, struct hl_ctx *ctx)
9471 {
9472 struct gaudi2_device *gaudi2 = hdev->asic_specific;
9473
9474 if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU))
9475 return;
9476
9477 mutex_lock(&hdev->mmu_lock);
9478 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
9479 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
9480 hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
9481 mutex_unlock(&hdev->mmu_lock);
9482
9483 gen_pool_destroy(hdev->internal_cb_pool);
9484
9485 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
9486 hdev->internal_cb_pool_dma_addr);
9487 }
9488
gaudi2_restore_user_registers(struct hl_device * hdev)9489 static void gaudi2_restore_user_registers(struct hl_device *hdev)
9490 {
9491 gaudi2_restore_user_sm_registers(hdev);
9492 gaudi2_restore_user_qm_registers(hdev);
9493 }
9494
gaudi2_map_virtual_msix_doorbell_memory(struct hl_ctx * ctx)9495 static int gaudi2_map_virtual_msix_doorbell_memory(struct hl_ctx *ctx)
9496 {
9497 struct hl_device *hdev = ctx->hdev;
9498 struct asic_fixed_properties *prop = &hdev->asic_prop;
9499 struct gaudi2_device *gaudi2 = hdev->asic_specific;
9500 int rc;
9501
9502 rc = hl_mmu_map_page(ctx, RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START,
9503 gaudi2->virt_msix_db_dma_addr, prop->pmmu.page_size, true);
9504 if (rc)
9505 dev_err(hdev->dev, "Failed to map VA %#llx for virtual MSI-X doorbell memory\n",
9506 RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START);
9507
9508 return rc;
9509 }
9510
gaudi2_unmap_virtual_msix_doorbell_memory(struct hl_ctx * ctx)9511 static void gaudi2_unmap_virtual_msix_doorbell_memory(struct hl_ctx *ctx)
9512 {
9513 struct hl_device *hdev = ctx->hdev;
9514 struct asic_fixed_properties *prop = &hdev->asic_prop;
9515 int rc;
9516
9517 rc = hl_mmu_unmap_page(ctx, RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START,
9518 prop->pmmu.page_size, true);
9519 if (rc)
9520 dev_err(hdev->dev, "Failed to unmap VA %#llx of virtual MSI-X doorbell memory\n",
9521 RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START);
9522 }
9523
gaudi2_ctx_init(struct hl_ctx * ctx)9524 static int gaudi2_ctx_init(struct hl_ctx *ctx)
9525 {
9526 int rc;
9527
9528 rc = gaudi2_mmu_prepare(ctx->hdev, ctx->asid);
9529 if (rc)
9530 return rc;
9531
9532 /* No need to clear user registers if the device has just
9533 * performed reset, we restore only nic qm registers
9534 */
9535 if (ctx->hdev->reset_upon_device_release)
9536 gaudi2_restore_nic_qm_registers(ctx->hdev);
9537 else
9538 gaudi2_restore_user_registers(ctx->hdev);
9539
9540 rc = gaudi2_internal_cb_pool_init(ctx->hdev, ctx);
9541 if (rc)
9542 return rc;
9543
9544 rc = gaudi2_map_virtual_msix_doorbell_memory(ctx);
9545 if (rc)
9546 gaudi2_internal_cb_pool_fini(ctx->hdev, ctx);
9547
9548 return rc;
9549 }
9550
gaudi2_ctx_fini(struct hl_ctx * ctx)9551 static void gaudi2_ctx_fini(struct hl_ctx *ctx)
9552 {
9553 if (ctx->asid == HL_KERNEL_ASID_ID)
9554 return;
9555
9556 gaudi2_internal_cb_pool_fini(ctx->hdev, ctx);
9557
9558 gaudi2_unmap_virtual_msix_doorbell_memory(ctx);
9559 }
9560
gaudi2_pre_schedule_cs(struct hl_cs * cs)9561 static int gaudi2_pre_schedule_cs(struct hl_cs *cs)
9562 {
9563 struct hl_device *hdev = cs->ctx->hdev;
9564 int index = cs->sequence & (hdev->asic_prop.max_pending_cs - 1);
9565 u32 mon_payload, sob_id, mon_id;
9566
9567 if (!cs_needs_completion(cs))
9568 return 0;
9569
9570 /*
9571 * First 64 SOB/MON are reserved for driver for QMAN auto completion
9572 * mechanism. Each SOB/MON pair are used for a pending CS with the same
9573 * cyclic index. The SOB value is increased when each of the CS jobs is
9574 * completed. When the SOB reaches the number of CS jobs, the monitor
9575 * generates MSI-X interrupt.
9576 */
9577
9578 sob_id = mon_id = index;
9579 mon_payload = (1 << CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT) |
9580 (1 << CQ_ENTRY_READY_SHIFT) | index;
9581
9582 gaudi2_arm_cq_monitor(hdev, sob_id, mon_id, GAUDI2_RESERVED_CQ_CS_COMPLETION, mon_payload,
9583 cs->jobs_cnt);
9584
9585 return 0;
9586 }
9587
gaudi2_get_queue_id_for_cq(struct hl_device * hdev,u32 cq_idx)9588 static u32 gaudi2_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
9589 {
9590 return HL_INVALID_QUEUE;
9591 }
9592
gaudi2_gen_signal_cb(struct hl_device * hdev,void * data,u16 sob_id,u32 size,bool eb)9593 static u32 gaudi2_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, u32 size, bool eb)
9594 {
9595 struct hl_cb *cb = data;
9596 struct packet_msg_short *pkt;
9597 u32 value, ctl, pkt_size = sizeof(*pkt);
9598
9599 pkt = (struct packet_msg_short *) (uintptr_t) (cb->kernel_address + size);
9600 memset(pkt, 0, pkt_size);
9601
9602 /* Inc by 1, Mode ADD */
9603 value = FIELD_PREP(GAUDI2_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
9604 value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
9605
9606 ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
9607 ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 1); /* SOB base */
9608 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
9609 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, eb);
9610 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
9611
9612 pkt->value = cpu_to_le32(value);
9613 pkt->ctl = cpu_to_le32(ctl);
9614
9615 return size + pkt_size;
9616 }
9617
gaudi2_add_mon_msg_short(struct packet_msg_short * pkt,u32 value,u16 addr)9618 static u32 gaudi2_add_mon_msg_short(struct packet_msg_short *pkt, u32 value, u16 addr)
9619 {
9620 u32 ctl, pkt_size = sizeof(*pkt);
9621
9622 memset(pkt, 0, pkt_size);
9623
9624 ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, addr);
9625 ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 0); /* MON base */
9626 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
9627 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
9628 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 0);
9629
9630 pkt->value = cpu_to_le32(value);
9631 pkt->ctl = cpu_to_le32(ctl);
9632
9633 return pkt_size;
9634 }
9635
gaudi2_add_arm_monitor_pkt(struct hl_device * hdev,struct packet_msg_short * pkt,u16 sob_base,u8 sob_mask,u16 sob_val,u16 addr)9636 static u32 gaudi2_add_arm_monitor_pkt(struct hl_device *hdev, struct packet_msg_short *pkt,
9637 u16 sob_base, u8 sob_mask, u16 sob_val, u16 addr)
9638 {
9639 u32 ctl, value, pkt_size = sizeof(*pkt);
9640 u8 mask;
9641
9642 if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
9643 dev_err(hdev->dev, "sob_base %u (mask %#x) is not valid\n", sob_base, sob_mask);
9644 return 0;
9645 }
9646
9647 memset(pkt, 0, pkt_size);
9648
9649 value = FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
9650 value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
9651 value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_MODE_MASK, 0); /* GREATER OR EQUAL*/
9652 value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_MASK_MASK, mask);
9653
9654 ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, addr);
9655 ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 0); /* MON base */
9656 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
9657 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
9658 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
9659
9660 pkt->value = cpu_to_le32(value);
9661 pkt->ctl = cpu_to_le32(ctl);
9662
9663 return pkt_size;
9664 }
9665
gaudi2_add_fence_pkt(struct packet_fence * pkt)9666 static u32 gaudi2_add_fence_pkt(struct packet_fence *pkt)
9667 {
9668 u32 ctl, cfg, pkt_size = sizeof(*pkt);
9669
9670 memset(pkt, 0, pkt_size);
9671
9672 cfg = FIELD_PREP(GAUDI2_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
9673 cfg |= FIELD_PREP(GAUDI2_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
9674 cfg |= FIELD_PREP(GAUDI2_PKT_FENCE_CFG_ID_MASK, 2);
9675
9676 ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
9677 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
9678 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
9679
9680 pkt->cfg = cpu_to_le32(cfg);
9681 pkt->ctl = cpu_to_le32(ctl);
9682
9683 return pkt_size;
9684 }
9685
gaudi2_gen_wait_cb(struct hl_device * hdev,struct hl_gen_wait_properties * prop)9686 static u32 gaudi2_gen_wait_cb(struct hl_device *hdev, struct hl_gen_wait_properties *prop)
9687 {
9688 struct hl_cb *cb = prop->data;
9689 void *buf = (void *) (uintptr_t) (cb->kernel_address);
9690
9691 u64 monitor_base, fence_addr = 0;
9692 u32 stream_index, size = prop->size;
9693 u16 msg_addr_offset;
9694
9695 stream_index = prop->q_idx % 4;
9696 fence_addr = CFG_BASE + gaudi2_qm_blocks_bases[prop->q_idx] +
9697 QM_FENCE2_OFFSET + stream_index * 4;
9698
9699 /*
9700 * monitor_base should be the content of the base0 address registers,
9701 * so it will be added to the msg short offsets
9702 */
9703 monitor_base = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
9704
9705 /* First monitor config packet: low address of the sync */
9706 msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + prop->mon_id * 4) -
9707 monitor_base;
9708
9709 size += gaudi2_add_mon_msg_short(buf + size, (u32) fence_addr, msg_addr_offset);
9710
9711 /* Second monitor config packet: high address of the sync */
9712 msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + prop->mon_id * 4) -
9713 monitor_base;
9714
9715 size += gaudi2_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32), msg_addr_offset);
9716
9717 /*
9718 * Third monitor config packet: the payload, i.e. what to write when the
9719 * sync triggers
9720 */
9721 msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + prop->mon_id * 4) -
9722 monitor_base;
9723
9724 size += gaudi2_add_mon_msg_short(buf + size, 1, msg_addr_offset);
9725
9726 /* Fourth monitor config packet: bind the monitor to a sync object */
9727 msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + prop->mon_id * 4) - monitor_base;
9728
9729 size += gaudi2_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base, prop->sob_mask,
9730 prop->sob_val, msg_addr_offset);
9731
9732 /* Fence packet */
9733 size += gaudi2_add_fence_pkt(buf + size);
9734
9735 return size;
9736 }
9737
gaudi2_reset_sob(struct hl_device * hdev,void * data)9738 static void gaudi2_reset_sob(struct hl_device *hdev, void *data)
9739 {
9740 struct hl_hw_sob *hw_sob = data;
9741
9742 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx, hw_sob->sob_id);
9743
9744 WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + hw_sob->sob_id * 4, 0);
9745
9746 kref_init(&hw_sob->kref);
9747 }
9748
gaudi2_reset_sob_group(struct hl_device * hdev,u16 sob_group)9749 static void gaudi2_reset_sob_group(struct hl_device *hdev, u16 sob_group)
9750 {
9751 }
9752
gaudi2_get_device_time(struct hl_device * hdev)9753 static u64 gaudi2_get_device_time(struct hl_device *hdev)
9754 {
9755 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
9756
9757 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
9758 }
9759
gaudi2_collective_wait_init_cs(struct hl_cs * cs)9760 static int gaudi2_collective_wait_init_cs(struct hl_cs *cs)
9761 {
9762 return 0;
9763 }
9764
gaudi2_collective_wait_create_jobs(struct hl_device * hdev,struct hl_ctx * ctx,struct hl_cs * cs,u32 wait_queue_id,u32 collective_engine_id,u32 encaps_signal_offset)9765 static int gaudi2_collective_wait_create_jobs(struct hl_device *hdev, struct hl_ctx *ctx,
9766 struct hl_cs *cs, u32 wait_queue_id,
9767 u32 collective_engine_id, u32 encaps_signal_offset)
9768 {
9769 return -EINVAL;
9770 }
9771
9772 /*
9773 * hl_mmu_scramble - converts a dram (non power of 2) page-size aligned address
9774 * to DMMU page-size address (64MB) before mapping it in
9775 * the MMU.
9776 * The operation is performed on both the virtual and physical addresses.
9777 * for device with 6 HBMs the scramble is:
9778 * (addr[47:0] / 48M) * 64M + addr % 48M + addr[63:48]
9779 *
9780 * Example:
9781 * =============================================================================
9782 * Allocated DRAM Reserved VA scrambled VA for MMU mapping Scrambled PA
9783 * Phys address in MMU last
9784 * HOP
9785 * =============================================================================
9786 * PA1 0x3000000 VA1 0x9C000000 SVA1= (VA1/48M)*64M 0xD0000000 <- PA1/48M 0x1
9787 * PA2 0x9000000 VA2 0x9F000000 SVA2= (VA2/48M)*64M 0xD4000000 <- PA2/48M 0x3
9788 * =============================================================================
9789 */
gaudi2_mmu_scramble_addr(struct hl_device * hdev,u64 raw_addr)9790 static u64 gaudi2_mmu_scramble_addr(struct hl_device *hdev, u64 raw_addr)
9791 {
9792 struct asic_fixed_properties *prop = &hdev->asic_prop;
9793 u32 divisor, mod_va;
9794 u64 div_va;
9795
9796 /* accept any address in the DRAM address space */
9797 if (hl_mem_area_inside_range(raw_addr, sizeof(raw_addr), DRAM_PHYS_BASE,
9798 VA_HBM_SPACE_END)) {
9799
9800 divisor = prop->num_functional_hbms * GAUDI2_HBM_MMU_SCRM_MEM_SIZE;
9801 div_va = div_u64_rem(raw_addr & GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK, divisor, &mod_va);
9802 return (raw_addr & ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK) |
9803 (div_va << GAUDI2_HBM_MMU_SCRM_DIV_SHIFT) |
9804 (mod_va << GAUDI2_HBM_MMU_SCRM_MOD_SHIFT);
9805 }
9806
9807 return raw_addr;
9808 }
9809
gaudi2_mmu_descramble_addr(struct hl_device * hdev,u64 scrambled_addr)9810 static u64 gaudi2_mmu_descramble_addr(struct hl_device *hdev, u64 scrambled_addr)
9811 {
9812 struct asic_fixed_properties *prop = &hdev->asic_prop;
9813 u32 divisor, mod_va;
9814 u64 div_va;
9815
9816 /* accept any address in the DRAM address space */
9817 if (hl_mem_area_inside_range(scrambled_addr, sizeof(scrambled_addr), DRAM_PHYS_BASE,
9818 VA_HBM_SPACE_END)) {
9819
9820 divisor = prop->num_functional_hbms * GAUDI2_HBM_MMU_SCRM_MEM_SIZE;
9821 div_va = div_u64_rem(scrambled_addr & GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK,
9822 PAGE_SIZE_64MB, &mod_va);
9823
9824 return ((scrambled_addr & ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK) +
9825 (div_va * divisor + mod_va));
9826 }
9827
9828 return scrambled_addr;
9829 }
9830
gaudi2_get_dec_base_addr(struct hl_device * hdev,u32 core_id)9831 static u32 gaudi2_get_dec_base_addr(struct hl_device *hdev, u32 core_id)
9832 {
9833 u32 base = 0, dcore_id, dec_id;
9834
9835 if (core_id >= NUMBER_OF_DEC) {
9836 dev_err(hdev->dev, "Unexpected core number %d for DEC\n", core_id);
9837 goto out;
9838 }
9839
9840 if (core_id < 8) {
9841 dcore_id = core_id / NUM_OF_DEC_PER_DCORE;
9842 dec_id = core_id % NUM_OF_DEC_PER_DCORE;
9843
9844 base = mmDCORE0_DEC0_CMD_BASE + dcore_id * DCORE_OFFSET +
9845 dec_id * DCORE_VDEC_OFFSET;
9846 } else {
9847 /* PCIe Shared Decoder */
9848 base = mmPCIE_DEC0_CMD_BASE + ((core_id % 8) * PCIE_VDEC_OFFSET);
9849 }
9850 out:
9851 return base;
9852 }
9853
gaudi2_get_hw_block_id(struct hl_device * hdev,u64 block_addr,u32 * block_size,u32 * block_id)9854 static int gaudi2_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
9855 u32 *block_size, u32 *block_id)
9856 {
9857 struct gaudi2_device *gaudi2 = hdev->asic_specific;
9858 int i;
9859
9860 for (i = 0 ; i < NUM_USER_MAPPED_BLOCKS ; i++) {
9861 if (block_addr == CFG_BASE + gaudi2->mapped_blocks[i].address) {
9862 *block_id = i;
9863 if (block_size)
9864 *block_size = gaudi2->mapped_blocks[i].size;
9865 return 0;
9866 }
9867 }
9868
9869 dev_err(hdev->dev, "Invalid block address %#llx", block_addr);
9870
9871 return -EINVAL;
9872 }
9873
gaudi2_block_mmap(struct hl_device * hdev,struct vm_area_struct * vma,u32 block_id,u32 block_size)9874 static int gaudi2_block_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
9875 u32 block_id, u32 block_size)
9876 {
9877 struct gaudi2_device *gaudi2 = hdev->asic_specific;
9878 u64 offset_in_bar;
9879 u64 address;
9880 int rc;
9881
9882 if (block_id >= NUM_USER_MAPPED_BLOCKS) {
9883 dev_err(hdev->dev, "Invalid block id %u", block_id);
9884 return -EINVAL;
9885 }
9886
9887 /* we allow mapping only an entire block */
9888 if (block_size != gaudi2->mapped_blocks[block_id].size) {
9889 dev_err(hdev->dev, "Invalid block size %u", block_size);
9890 return -EINVAL;
9891 }
9892
9893 offset_in_bar = CFG_BASE + gaudi2->mapped_blocks[block_id].address - STM_FLASH_BASE_ADDR;
9894
9895 address = pci_resource_start(hdev->pdev, SRAM_CFG_BAR_ID) + offset_in_bar;
9896
9897 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
9898 VM_DONTCOPY | VM_NORESERVE;
9899
9900 rc = remap_pfn_range(vma, vma->vm_start, address >> PAGE_SHIFT,
9901 block_size, vma->vm_page_prot);
9902 if (rc)
9903 dev_err(hdev->dev, "remap_pfn_range error %d", rc);
9904
9905 return rc;
9906 }
9907
gaudi2_enable_events_from_fw(struct hl_device * hdev)9908 static void gaudi2_enable_events_from_fw(struct hl_device *hdev)
9909 {
9910 struct gaudi2_device *gaudi2 = hdev->asic_specific;
9911
9912 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
9913 u32 irq_handler_offset = le32_to_cpu(dyn_regs->gic_host_ints_irq);
9914
9915 if (gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)
9916 WREG32(irq_handler_offset,
9917 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_INTS_REGISTER].cpu_id);
9918 }
9919
gaudi2_get_mmu_base(struct hl_device * hdev,u64 mmu_id,u32 * mmu_base)9920 static int gaudi2_get_mmu_base(struct hl_device *hdev, u64 mmu_id, u32 *mmu_base)
9921 {
9922 switch (mmu_id) {
9923 case HW_CAP_DCORE0_DMMU0:
9924 *mmu_base = mmDCORE0_HMMU0_MMU_BASE;
9925 break;
9926 case HW_CAP_DCORE0_DMMU1:
9927 *mmu_base = mmDCORE0_HMMU1_MMU_BASE;
9928 break;
9929 case HW_CAP_DCORE0_DMMU2:
9930 *mmu_base = mmDCORE0_HMMU2_MMU_BASE;
9931 break;
9932 case HW_CAP_DCORE0_DMMU3:
9933 *mmu_base = mmDCORE0_HMMU3_MMU_BASE;
9934 break;
9935 case HW_CAP_DCORE1_DMMU0:
9936 *mmu_base = mmDCORE1_HMMU0_MMU_BASE;
9937 break;
9938 case HW_CAP_DCORE1_DMMU1:
9939 *mmu_base = mmDCORE1_HMMU1_MMU_BASE;
9940 break;
9941 case HW_CAP_DCORE1_DMMU2:
9942 *mmu_base = mmDCORE1_HMMU2_MMU_BASE;
9943 break;
9944 case HW_CAP_DCORE1_DMMU3:
9945 *mmu_base = mmDCORE1_HMMU3_MMU_BASE;
9946 break;
9947 case HW_CAP_DCORE2_DMMU0:
9948 *mmu_base = mmDCORE2_HMMU0_MMU_BASE;
9949 break;
9950 case HW_CAP_DCORE2_DMMU1:
9951 *mmu_base = mmDCORE2_HMMU1_MMU_BASE;
9952 break;
9953 case HW_CAP_DCORE2_DMMU2:
9954 *mmu_base = mmDCORE2_HMMU2_MMU_BASE;
9955 break;
9956 case HW_CAP_DCORE2_DMMU3:
9957 *mmu_base = mmDCORE2_HMMU3_MMU_BASE;
9958 break;
9959 case HW_CAP_DCORE3_DMMU0:
9960 *mmu_base = mmDCORE3_HMMU0_MMU_BASE;
9961 break;
9962 case HW_CAP_DCORE3_DMMU1:
9963 *mmu_base = mmDCORE3_HMMU1_MMU_BASE;
9964 break;
9965 case HW_CAP_DCORE3_DMMU2:
9966 *mmu_base = mmDCORE3_HMMU2_MMU_BASE;
9967 break;
9968 case HW_CAP_DCORE3_DMMU3:
9969 *mmu_base = mmDCORE3_HMMU3_MMU_BASE;
9970 break;
9971 case HW_CAP_PMMU:
9972 *mmu_base = mmPMMU_HBW_MMU_BASE;
9973 break;
9974 default:
9975 return -EINVAL;
9976 }
9977
9978 return 0;
9979 }
9980
gaudi2_ack_mmu_error(struct hl_device * hdev,u64 mmu_id)9981 static void gaudi2_ack_mmu_error(struct hl_device *hdev, u64 mmu_id)
9982 {
9983 bool is_pmmu = (mmu_id == HW_CAP_PMMU);
9984 struct gaudi2_device *gaudi2 = hdev->asic_specific;
9985 u32 mmu_base;
9986
9987 if (!(gaudi2->hw_cap_initialized & mmu_id))
9988 return;
9989
9990 if (gaudi2_get_mmu_base(hdev, mmu_id, &mmu_base))
9991 return;
9992
9993 gaudi2_handle_page_error(hdev, mmu_base, is_pmmu);
9994 gaudi2_handle_access_error(hdev, mmu_base, is_pmmu);
9995 }
9996
gaudi2_ack_mmu_page_fault_or_access_error(struct hl_device * hdev,u64 mmu_cap_mask)9997 static int gaudi2_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask)
9998 {
9999 u32 i, mmu_id, num_of_hmmus = NUM_OF_HMMU_PER_DCORE * NUM_OF_DCORES;
10000
10001 /* check all HMMUs */
10002 for (i = 0 ; i < num_of_hmmus ; i++) {
10003 mmu_id = HW_CAP_DCORE0_DMMU0 << i;
10004
10005 if (mmu_cap_mask & mmu_id)
10006 gaudi2_ack_mmu_error(hdev, mmu_id);
10007 }
10008
10009 /* check PMMU */
10010 if (mmu_cap_mask & HW_CAP_PMMU)
10011 gaudi2_ack_mmu_error(hdev, HW_CAP_PMMU);
10012
10013 return 0;
10014 }
10015
gaudi2_get_msi_info(__le32 * table)10016 static void gaudi2_get_msi_info(__le32 *table)
10017 {
10018 table[CPUCP_EVENT_QUEUE_MSI_TYPE] = cpu_to_le32(GAUDI2_EVENT_QUEUE_MSIX_IDX);
10019 }
10020
gaudi2_map_pll_idx_to_fw_idx(u32 pll_idx)10021 static int gaudi2_map_pll_idx_to_fw_idx(u32 pll_idx)
10022 {
10023 switch (pll_idx) {
10024 case HL_GAUDI2_CPU_PLL: return CPU_PLL;
10025 case HL_GAUDI2_PCI_PLL: return PCI_PLL;
10026 case HL_GAUDI2_NIC_PLL: return NIC_PLL;
10027 case HL_GAUDI2_DMA_PLL: return DMA_PLL;
10028 case HL_GAUDI2_MESH_PLL: return MESH_PLL;
10029 case HL_GAUDI2_MME_PLL: return MME_PLL;
10030 case HL_GAUDI2_TPC_PLL: return TPC_PLL;
10031 case HL_GAUDI2_IF_PLL: return IF_PLL;
10032 case HL_GAUDI2_SRAM_PLL: return SRAM_PLL;
10033 case HL_GAUDI2_HBM_PLL: return HBM_PLL;
10034 case HL_GAUDI2_VID_PLL: return VID_PLL;
10035 case HL_GAUDI2_MSS_PLL: return MSS_PLL;
10036 default: return -EINVAL;
10037 }
10038 }
10039
gaudi2_gen_sync_to_engine_map(struct hl_device * hdev,struct hl_sync_to_engine_map * map)10040 static int gaudi2_gen_sync_to_engine_map(struct hl_device *hdev, struct hl_sync_to_engine_map *map)
10041 {
10042 /* Not implemented */
10043 return 0;
10044 }
10045
gaudi2_monitor_valid(struct hl_mon_state_dump * mon)10046 static int gaudi2_monitor_valid(struct hl_mon_state_dump *mon)
10047 {
10048 /* Not implemented */
10049 return 0;
10050 }
10051
gaudi2_print_single_monitor(char ** buf,size_t * size,size_t * offset,struct hl_device * hdev,struct hl_mon_state_dump * mon)10052 static int gaudi2_print_single_monitor(char **buf, size_t *size, size_t *offset,
10053 struct hl_device *hdev, struct hl_mon_state_dump *mon)
10054 {
10055 /* Not implemented */
10056 return 0;
10057 }
10058
10059
gaudi2_print_fences_single_engine(struct hl_device * hdev,u64 base_offset,u64 status_base_offset,enum hl_sync_engine_type engine_type,u32 engine_id,char ** buf,size_t * size,size_t * offset)10060 static int gaudi2_print_fences_single_engine(struct hl_device *hdev, u64 base_offset,
10061 u64 status_base_offset, enum hl_sync_engine_type engine_type,
10062 u32 engine_id, char **buf, size_t *size, size_t *offset)
10063 {
10064 /* Not implemented */
10065 return 0;
10066 }
10067
10068
10069 static struct hl_state_dump_specs_funcs gaudi2_state_dump_funcs = {
10070 .monitor_valid = gaudi2_monitor_valid,
10071 .print_single_monitor = gaudi2_print_single_monitor,
10072 .gen_sync_to_engine_map = gaudi2_gen_sync_to_engine_map,
10073 .print_fences_single_engine = gaudi2_print_fences_single_engine,
10074 };
10075
gaudi2_state_dump_init(struct hl_device * hdev)10076 static void gaudi2_state_dump_init(struct hl_device *hdev)
10077 {
10078 /* Not implemented */
10079 hdev->state_dump_specs.props = gaudi2_state_dump_specs_props;
10080 hdev->state_dump_specs.funcs = gaudi2_state_dump_funcs;
10081 }
10082
gaudi2_get_sob_addr(struct hl_device * hdev,u32 sob_id)10083 static u32 gaudi2_get_sob_addr(struct hl_device *hdev, u32 sob_id)
10084 {
10085 return 0;
10086 }
10087
gaudi2_get_stream_master_qid_arr(void)10088 static u32 *gaudi2_get_stream_master_qid_arr(void)
10089 {
10090 return NULL;
10091 }
10092
gaudi2_add_device_attr(struct hl_device * hdev,struct attribute_group * dev_clk_attr_grp,struct attribute_group * dev_vrm_attr_grp)10093 static void gaudi2_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
10094 struct attribute_group *dev_vrm_attr_grp)
10095 {
10096 hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
10097 hl_sysfs_add_dev_vrm_attr(hdev, dev_vrm_attr_grp);
10098 }
10099
gaudi2_mmu_get_real_page_size(struct hl_device * hdev,struct hl_mmu_properties * mmu_prop,u32 page_size,u32 * real_page_size,bool is_dram_addr)10100 static int gaudi2_mmu_get_real_page_size(struct hl_device *hdev, struct hl_mmu_properties *mmu_prop,
10101 u32 page_size, u32 *real_page_size, bool is_dram_addr)
10102 {
10103 struct asic_fixed_properties *prop = &hdev->asic_prop;
10104
10105 /* for host pages the page size must be */
10106 if (!is_dram_addr) {
10107 if (page_size % mmu_prop->page_size)
10108 goto page_size_err;
10109
10110 *real_page_size = mmu_prop->page_size;
10111 return 0;
10112 }
10113
10114 if ((page_size % prop->dram_page_size) || (prop->dram_page_size > mmu_prop->page_size))
10115 goto page_size_err;
10116
10117 /*
10118 * MMU page size is different from DRAM page size (more precisely, DMMU page is greater
10119 * than DRAM page size).
10120 * for this reason work with the DRAM page size and let the MMU scrambling routine handle
10121 * this mismatch when calculating the address to place in the MMU page table.
10122 * (in that case also make sure that the dram_page_size is not greater than the
10123 * mmu page size)
10124 */
10125 *real_page_size = prop->dram_page_size;
10126
10127 return 0;
10128
10129 page_size_err:
10130 dev_err(hdev->dev, "page size of %u is not %uKB aligned, can't map\n",
10131 page_size, mmu_prop->page_size >> 10);
10132 return -EFAULT;
10133 }
10134
gaudi2_get_monitor_dump(struct hl_device * hdev,void * data)10135 static int gaudi2_get_monitor_dump(struct hl_device *hdev, void *data)
10136 {
10137 return -EOPNOTSUPP;
10138 }
10139
gaudi2_send_device_activity(struct hl_device * hdev,bool open)10140 int gaudi2_send_device_activity(struct hl_device *hdev, bool open)
10141 {
10142 struct gaudi2_device *gaudi2 = hdev->asic_specific;
10143
10144 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q) || hdev->fw_major_version < 37)
10145 return 0;
10146
10147 /* TODO: add check for FW version using minor ver once it's known */
10148 return hl_fw_send_device_activity(hdev, open);
10149 }
10150
10151 static const struct hl_asic_funcs gaudi2_funcs = {
10152 .early_init = gaudi2_early_init,
10153 .early_fini = gaudi2_early_fini,
10154 .late_init = gaudi2_late_init,
10155 .late_fini = gaudi2_late_fini,
10156 .sw_init = gaudi2_sw_init,
10157 .sw_fini = gaudi2_sw_fini,
10158 .hw_init = gaudi2_hw_init,
10159 .hw_fini = gaudi2_hw_fini,
10160 .halt_engines = gaudi2_halt_engines,
10161 .suspend = gaudi2_suspend,
10162 .resume = gaudi2_resume,
10163 .mmap = gaudi2_mmap,
10164 .ring_doorbell = gaudi2_ring_doorbell,
10165 .pqe_write = gaudi2_pqe_write,
10166 .asic_dma_alloc_coherent = gaudi2_dma_alloc_coherent,
10167 .asic_dma_free_coherent = gaudi2_dma_free_coherent,
10168 .scrub_device_mem = gaudi2_scrub_device_mem,
10169 .scrub_device_dram = gaudi2_scrub_device_dram,
10170 .get_int_queue_base = NULL,
10171 .test_queues = gaudi2_test_queues,
10172 .asic_dma_pool_zalloc = gaudi2_dma_pool_zalloc,
10173 .asic_dma_pool_free = gaudi2_dma_pool_free,
10174 .cpu_accessible_dma_pool_alloc = gaudi2_cpu_accessible_dma_pool_alloc,
10175 .cpu_accessible_dma_pool_free = gaudi2_cpu_accessible_dma_pool_free,
10176 .asic_dma_unmap_single = gaudi2_dma_unmap_single,
10177 .asic_dma_map_single = gaudi2_dma_map_single,
10178 .hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
10179 .cs_parser = gaudi2_cs_parser,
10180 .asic_dma_map_sgtable = hl_dma_map_sgtable,
10181 .add_end_of_cb_packets = NULL,
10182 .update_eq_ci = gaudi2_update_eq_ci,
10183 .context_switch = gaudi2_context_switch,
10184 .restore_phase_topology = gaudi2_restore_phase_topology,
10185 .debugfs_read_dma = gaudi2_debugfs_read_dma,
10186 .add_device_attr = gaudi2_add_device_attr,
10187 .handle_eqe = gaudi2_handle_eqe,
10188 .get_events_stat = gaudi2_get_events_stat,
10189 .read_pte = NULL,
10190 .write_pte = NULL,
10191 .mmu_invalidate_cache = gaudi2_mmu_invalidate_cache,
10192 .mmu_invalidate_cache_range = gaudi2_mmu_invalidate_cache_range,
10193 .mmu_prefetch_cache_range = NULL,
10194 .send_heartbeat = gaudi2_send_heartbeat,
10195 .debug_coresight = gaudi2_debug_coresight,
10196 .is_device_idle = gaudi2_is_device_idle,
10197 .compute_reset_late_init = gaudi2_compute_reset_late_init,
10198 .hw_queues_lock = gaudi2_hw_queues_lock,
10199 .hw_queues_unlock = gaudi2_hw_queues_unlock,
10200 .get_pci_id = gaudi2_get_pci_id,
10201 .get_eeprom_data = gaudi2_get_eeprom_data,
10202 .get_monitor_dump = gaudi2_get_monitor_dump,
10203 .send_cpu_message = gaudi2_send_cpu_message,
10204 .pci_bars_map = gaudi2_pci_bars_map,
10205 .init_iatu = gaudi2_init_iatu,
10206 .rreg = hl_rreg,
10207 .wreg = hl_wreg,
10208 .halt_coresight = gaudi2_halt_coresight,
10209 .ctx_init = gaudi2_ctx_init,
10210 .ctx_fini = gaudi2_ctx_fini,
10211 .pre_schedule_cs = gaudi2_pre_schedule_cs,
10212 .get_queue_id_for_cq = gaudi2_get_queue_id_for_cq,
10213 .load_firmware_to_device = NULL,
10214 .load_boot_fit_to_device = NULL,
10215 .get_signal_cb_size = gaudi2_get_signal_cb_size,
10216 .get_wait_cb_size = gaudi2_get_wait_cb_size,
10217 .gen_signal_cb = gaudi2_gen_signal_cb,
10218 .gen_wait_cb = gaudi2_gen_wait_cb,
10219 .reset_sob = gaudi2_reset_sob,
10220 .reset_sob_group = gaudi2_reset_sob_group,
10221 .get_device_time = gaudi2_get_device_time,
10222 .pb_print_security_errors = gaudi2_pb_print_security_errors,
10223 .collective_wait_init_cs = gaudi2_collective_wait_init_cs,
10224 .collective_wait_create_jobs = gaudi2_collective_wait_create_jobs,
10225 .get_dec_base_addr = gaudi2_get_dec_base_addr,
10226 .scramble_addr = gaudi2_mmu_scramble_addr,
10227 .descramble_addr = gaudi2_mmu_descramble_addr,
10228 .ack_protection_bits_errors = gaudi2_ack_protection_bits_errors,
10229 .get_hw_block_id = gaudi2_get_hw_block_id,
10230 .hw_block_mmap = gaudi2_block_mmap,
10231 .enable_events_from_fw = gaudi2_enable_events_from_fw,
10232 .ack_mmu_errors = gaudi2_ack_mmu_page_fault_or_access_error,
10233 .get_msi_info = gaudi2_get_msi_info,
10234 .map_pll_idx_to_fw_idx = gaudi2_map_pll_idx_to_fw_idx,
10235 .init_firmware_preload_params = gaudi2_init_firmware_preload_params,
10236 .init_firmware_loader = gaudi2_init_firmware_loader,
10237 .init_cpu_scrambler_dram = gaudi2_init_scrambler_hbm,
10238 .state_dump_init = gaudi2_state_dump_init,
10239 .get_sob_addr = &gaudi2_get_sob_addr,
10240 .set_pci_memory_regions = gaudi2_set_pci_memory_regions,
10241 .get_stream_master_qid_arr = gaudi2_get_stream_master_qid_arr,
10242 .check_if_razwi_happened = gaudi2_check_if_razwi_happened,
10243 .mmu_get_real_page_size = gaudi2_mmu_get_real_page_size,
10244 .access_dev_mem = hl_access_dev_mem,
10245 .set_dram_bar_base = gaudi2_set_hbm_bar_base,
10246 .set_engine_cores = gaudi2_set_engine_cores,
10247 .send_device_activity = gaudi2_send_device_activity,
10248 };
10249
gaudi2_set_asic_funcs(struct hl_device * hdev)10250 void gaudi2_set_asic_funcs(struct hl_device *hdev)
10251 {
10252 hdev->asic_funcs = &gaudi2_funcs;
10253 }
10254