1 // SPDX-License-Identifier: GPL-2.0
2 
3 /*
4  * Copyright 2020-2022 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7 
8 #include "gaudi2P.h"
9 #include "gaudi2_masks.h"
10 #include "../include/hw_ip/mmu/mmu_general.h"
11 #include "../include/hw_ip/mmu/mmu_v2_0.h"
12 #include "../include/gaudi2/gaudi2_packets.h"
13 #include "../include/gaudi2/gaudi2_reg_map.h"
14 #include "../include/gaudi2/gaudi2_async_ids_map_extended.h"
15 #include "../include/gaudi2/arc/gaudi2_arc_common_packets.h"
16 
17 #include <linux/module.h>
18 #include <linux/pci.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 
22 #define GAUDI2_DMA_POOL_BLK_SIZE		SZ_256		/* 256 bytes */
23 
24 #define GAUDI2_RESET_TIMEOUT_MSEC		2000		/* 2000ms */
25 #define GAUDI2_RESET_POLL_TIMEOUT_USEC		50000		/* 50ms */
26 #define GAUDI2_PLDM_HRESET_TIMEOUT_MSEC		25000		/* 25s */
27 #define GAUDI2_PLDM_SRESET_TIMEOUT_MSEC		25000		/* 25s */
28 #define GAUDI2_PLDM_RESET_POLL_TIMEOUT_USEC	3000000		/* 3s */
29 #define GAUDI2_RESET_POLL_CNT			3
30 #define GAUDI2_RESET_WAIT_MSEC			1		/* 1ms */
31 #define GAUDI2_CPU_RESET_WAIT_MSEC		100		/* 100ms */
32 #define GAUDI2_PLDM_RESET_WAIT_MSEC		1000		/* 1s */
33 #define GAUDI2_CB_POOL_CB_CNT			512
34 #define GAUDI2_CB_POOL_CB_SIZE			SZ_128K		/* 128KB */
35 #define GAUDI2_MSG_TO_CPU_TIMEOUT_USEC		4000000		/* 4s */
36 #define GAUDI2_WAIT_FOR_BL_TIMEOUT_USEC		25000000	/* 25s */
37 #define GAUDI2_TEST_QUEUE_WAIT_USEC		100000		/* 100ms */
38 #define GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC	1000000		/* 1s */
39 
40 #define GAUDI2_ALLOC_CPU_MEM_RETRY_CNT		3
41 
42 /*
43  * since the code already has built-in support for binning of up to MAX_FAULTY_TPCS TPCs
44  * and the code relies on that value (for array size etc..) we define another value
45  * for MAX faulty TPCs which reflects the cluster binning requirements
46  */
47 #define MAX_CLUSTER_BINNING_FAULTY_TPCS		1
48 #define MAX_FAULTY_XBARS			1
49 #define MAX_FAULTY_EDMAS			1
50 #define MAX_FAULTY_DECODERS			1
51 
52 #define GAUDI2_TPC_FULL_MASK			0x1FFFFFF
53 #define GAUDI2_HIF_HMMU_FULL_MASK		0xFFFF
54 #define GAUDI2_DECODER_FULL_MASK		0x3FF
55 
56 #define GAUDI2_NUM_OF_QM_ERR_CAUSE		18
57 #define GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE		25
58 #define GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE		3
59 #define GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE		14
60 #define GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE		3
61 #define GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE		2
62 #define GAUDI2_NUM_OF_ROT_ERR_CAUSE		22
63 #define GAUDI2_NUM_OF_TPC_INTR_CAUSE		30
64 #define GAUDI2_NUM_OF_DEC_ERR_CAUSE		25
65 #define GAUDI2_NUM_OF_MME_ERR_CAUSE		16
66 #define GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE	5
67 #define GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE		7
68 #define GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE	8
69 #define GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE		19
70 #define GAUDI2_NUM_OF_HBM_SEI_CAUSE		9
71 #define GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE		3
72 #define GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE	3
73 #define GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE	2
74 #define GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE	2
75 #define GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE	2
76 #define GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE		5
77 
78 #define GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC	(MMU_CONFIG_TIMEOUT_USEC * 10)
79 #define GAUDI2_PLDM_MMU_TIMEOUT_USEC		(MMU_CONFIG_TIMEOUT_USEC * 200)
80 #define GAUDI2_ARB_WDT_TIMEOUT			(0x1000000)
81 
82 #define GAUDI2_VDEC_TIMEOUT_USEC		10000		/* 10ms */
83 #define GAUDI2_PLDM_VDEC_TIMEOUT_USEC		(GAUDI2_VDEC_TIMEOUT_USEC * 100)
84 
85 #define KDMA_TIMEOUT_USEC			USEC_PER_SEC
86 
87 #define IS_DMA_IDLE(dma_core_idle_ind_mask)	\
88 	(!((dma_core_idle_ind_mask) &		\
89 	((DCORE0_EDMA0_CORE_IDLE_IND_MASK_DESC_CNT_STS_MASK) | \
90 	(DCORE0_EDMA0_CORE_IDLE_IND_MASK_COMP_MASK))))
91 
92 #define IS_MME_IDLE(mme_arch_sts) (((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK)
93 
94 #define IS_TPC_IDLE(tpc_cfg_sts) (((tpc_cfg_sts) & (TPC_IDLE_MASK)) == (TPC_IDLE_MASK))
95 
96 #define IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) \
97 	((((qm_glbl_sts0) & (QM_IDLE_MASK)) == (QM_IDLE_MASK)) && \
98 	(((qm_glbl_sts1) & (QM_ARC_IDLE_MASK)) == (QM_ARC_IDLE_MASK)) && \
99 	(((qm_cgm_sts) & (CGM_IDLE_MASK)) == (CGM_IDLE_MASK)))
100 
101 #define PCIE_DEC_EN_MASK			0x300
102 #define DEC_WORK_STATE_IDLE			0
103 #define DEC_WORK_STATE_PEND			3
104 #define IS_DEC_IDLE(dec_swreg15) \
105 	(((dec_swreg15) & DCORE0_DEC0_CMD_SWREG15_SW_WORK_STATE_MASK) == DEC_WORK_STATE_IDLE || \
106 	((dec_swreg15) & DCORE0_DEC0_CMD_SWREG15_SW_WORK_STATE_MASK) ==  DEC_WORK_STATE_PEND)
107 
108 /* HBM MMU address scrambling parameters */
109 #define GAUDI2_HBM_MMU_SCRM_MEM_SIZE		SZ_8M
110 #define GAUDI2_HBM_MMU_SCRM_DIV_SHIFT		26
111 #define GAUDI2_HBM_MMU_SCRM_MOD_SHIFT		0
112 #define GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK	DRAM_VA_HINT_MASK
113 #define GAUDI2_COMPENSATE_TLB_PAGE_SIZE_FACTOR	16
114 #define MMU_RANGE_INV_VA_LSB_SHIFT		12
115 #define MMU_RANGE_INV_VA_MSB_SHIFT		44
116 #define MMU_RANGE_INV_EN_SHIFT			0
117 #define MMU_RANGE_INV_ASID_EN_SHIFT		1
118 #define MMU_RANGE_INV_ASID_SHIFT		2
119 
120 /* The last SPI_SEI cause bit, "burst_fifo_full", is expected to be triggered in PMMU because it has
121  * a 2 entries FIFO, and hence it is not enabled for it.
122  */
123 #define GAUDI2_PMMU_SPI_SEI_ENABLE_MASK		GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 2, 0)
124 #define GAUDI2_HMMU_SPI_SEI_ENABLE_MASK		GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 1, 0)
125 
126 #define GAUDI2_MAX_STRING_LEN			64
127 
128 #define GAUDI2_VDEC_MSIX_ENTRIES		(GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM - \
129 							GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 1)
130 
131 enum hl_pmmu_fatal_cause {
132 	LATENCY_RD_OUT_FIFO_OVERRUN,
133 	LATENCY_WR_OUT_FIFO_OVERRUN,
134 };
135 
136 enum hl_pcie_drain_ind_cause {
137 	LBW_AXI_DRAIN_IND,
138 	HBW_AXI_DRAIN_IND
139 };
140 
141 static const u32 cluster_hmmu_hif_enabled_mask[GAUDI2_HBM_NUM] = {
142 	[HBM_ID0] = 0xFFFC,
143 	[HBM_ID1] = 0xFFCF,
144 	[HBM_ID2] = 0xF7F7,
145 	[HBM_ID3] = 0x7F7F,
146 	[HBM_ID4] = 0xFCFF,
147 	[HBM_ID5] = 0xCFFF,
148 };
149 
150 static const u8 xbar_edge_to_hbm_cluster[EDMA_ID_SIZE] = {
151 	[0] = HBM_ID0,
152 	[1] = HBM_ID1,
153 	[2] = HBM_ID4,
154 	[3] = HBM_ID5,
155 };
156 
157 static const u8 edma_to_hbm_cluster[EDMA_ID_SIZE] = {
158 	[EDMA_ID_DCORE0_INSTANCE0] = HBM_ID0,
159 	[EDMA_ID_DCORE0_INSTANCE1] = HBM_ID2,
160 	[EDMA_ID_DCORE1_INSTANCE0] = HBM_ID1,
161 	[EDMA_ID_DCORE1_INSTANCE1] = HBM_ID3,
162 	[EDMA_ID_DCORE2_INSTANCE0] = HBM_ID2,
163 	[EDMA_ID_DCORE2_INSTANCE1] = HBM_ID4,
164 	[EDMA_ID_DCORE3_INSTANCE0] = HBM_ID3,
165 	[EDMA_ID_DCORE3_INSTANCE1] = HBM_ID5,
166 };
167 
168 static const int gaudi2_qman_async_event_id[] = {
169 	[GAUDI2_QUEUE_ID_PDMA_0_0] = GAUDI2_EVENT_PDMA0_QM,
170 	[GAUDI2_QUEUE_ID_PDMA_0_1] = GAUDI2_EVENT_PDMA0_QM,
171 	[GAUDI2_QUEUE_ID_PDMA_0_2] = GAUDI2_EVENT_PDMA0_QM,
172 	[GAUDI2_QUEUE_ID_PDMA_0_3] = GAUDI2_EVENT_PDMA0_QM,
173 	[GAUDI2_QUEUE_ID_PDMA_1_0] = GAUDI2_EVENT_PDMA1_QM,
174 	[GAUDI2_QUEUE_ID_PDMA_1_1] = GAUDI2_EVENT_PDMA1_QM,
175 	[GAUDI2_QUEUE_ID_PDMA_1_2] = GAUDI2_EVENT_PDMA1_QM,
176 	[GAUDI2_QUEUE_ID_PDMA_1_3] = GAUDI2_EVENT_PDMA1_QM,
177 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = GAUDI2_EVENT_HDMA0_QM,
178 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = GAUDI2_EVENT_HDMA0_QM,
179 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = GAUDI2_EVENT_HDMA0_QM,
180 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = GAUDI2_EVENT_HDMA0_QM,
181 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = GAUDI2_EVENT_HDMA1_QM,
182 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = GAUDI2_EVENT_HDMA1_QM,
183 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = GAUDI2_EVENT_HDMA1_QM,
184 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = GAUDI2_EVENT_HDMA1_QM,
185 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = GAUDI2_EVENT_MME0_QM,
186 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = GAUDI2_EVENT_MME0_QM,
187 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = GAUDI2_EVENT_MME0_QM,
188 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = GAUDI2_EVENT_MME0_QM,
189 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = GAUDI2_EVENT_TPC0_QM,
190 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = GAUDI2_EVENT_TPC0_QM,
191 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = GAUDI2_EVENT_TPC0_QM,
192 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = GAUDI2_EVENT_TPC0_QM,
193 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = GAUDI2_EVENT_TPC1_QM,
194 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = GAUDI2_EVENT_TPC1_QM,
195 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = GAUDI2_EVENT_TPC1_QM,
196 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = GAUDI2_EVENT_TPC1_QM,
197 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = GAUDI2_EVENT_TPC2_QM,
198 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = GAUDI2_EVENT_TPC2_QM,
199 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = GAUDI2_EVENT_TPC2_QM,
200 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = GAUDI2_EVENT_TPC2_QM,
201 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = GAUDI2_EVENT_TPC3_QM,
202 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = GAUDI2_EVENT_TPC3_QM,
203 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = GAUDI2_EVENT_TPC3_QM,
204 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = GAUDI2_EVENT_TPC3_QM,
205 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = GAUDI2_EVENT_TPC4_QM,
206 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = GAUDI2_EVENT_TPC4_QM,
207 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = GAUDI2_EVENT_TPC4_QM,
208 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = GAUDI2_EVENT_TPC4_QM,
209 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = GAUDI2_EVENT_TPC5_QM,
210 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = GAUDI2_EVENT_TPC5_QM,
211 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = GAUDI2_EVENT_TPC5_QM,
212 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = GAUDI2_EVENT_TPC5_QM,
213 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = GAUDI2_EVENT_TPC24_QM,
214 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = GAUDI2_EVENT_TPC24_QM,
215 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = GAUDI2_EVENT_TPC24_QM,
216 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = GAUDI2_EVENT_TPC24_QM,
217 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = GAUDI2_EVENT_HDMA2_QM,
218 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = GAUDI2_EVENT_HDMA2_QM,
219 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = GAUDI2_EVENT_HDMA2_QM,
220 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = GAUDI2_EVENT_HDMA2_QM,
221 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = GAUDI2_EVENT_HDMA3_QM,
222 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = GAUDI2_EVENT_HDMA3_QM,
223 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = GAUDI2_EVENT_HDMA3_QM,
224 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = GAUDI2_EVENT_HDMA3_QM,
225 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = GAUDI2_EVENT_MME1_QM,
226 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = GAUDI2_EVENT_MME1_QM,
227 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = GAUDI2_EVENT_MME1_QM,
228 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = GAUDI2_EVENT_MME1_QM,
229 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = GAUDI2_EVENT_TPC6_QM,
230 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = GAUDI2_EVENT_TPC6_QM,
231 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = GAUDI2_EVENT_TPC6_QM,
232 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = GAUDI2_EVENT_TPC6_QM,
233 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = GAUDI2_EVENT_TPC7_QM,
234 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = GAUDI2_EVENT_TPC7_QM,
235 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = GAUDI2_EVENT_TPC7_QM,
236 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = GAUDI2_EVENT_TPC7_QM,
237 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = GAUDI2_EVENT_TPC8_QM,
238 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = GAUDI2_EVENT_TPC8_QM,
239 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = GAUDI2_EVENT_TPC8_QM,
240 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = GAUDI2_EVENT_TPC8_QM,
241 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = GAUDI2_EVENT_TPC9_QM,
242 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = GAUDI2_EVENT_TPC9_QM,
243 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = GAUDI2_EVENT_TPC9_QM,
244 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = GAUDI2_EVENT_TPC9_QM,
245 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = GAUDI2_EVENT_TPC10_QM,
246 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = GAUDI2_EVENT_TPC10_QM,
247 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = GAUDI2_EVENT_TPC10_QM,
248 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = GAUDI2_EVENT_TPC10_QM,
249 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = GAUDI2_EVENT_TPC11_QM,
250 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = GAUDI2_EVENT_TPC11_QM,
251 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = GAUDI2_EVENT_TPC11_QM,
252 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = GAUDI2_EVENT_TPC11_QM,
253 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = GAUDI2_EVENT_HDMA4_QM,
254 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = GAUDI2_EVENT_HDMA4_QM,
255 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = GAUDI2_EVENT_HDMA4_QM,
256 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = GAUDI2_EVENT_HDMA4_QM,
257 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = GAUDI2_EVENT_HDMA5_QM,
258 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = GAUDI2_EVENT_HDMA5_QM,
259 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = GAUDI2_EVENT_HDMA5_QM,
260 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = GAUDI2_EVENT_HDMA5_QM,
261 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = GAUDI2_EVENT_MME2_QM,
262 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = GAUDI2_EVENT_MME2_QM,
263 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = GAUDI2_EVENT_MME2_QM,
264 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = GAUDI2_EVENT_MME2_QM,
265 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = GAUDI2_EVENT_TPC12_QM,
266 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = GAUDI2_EVENT_TPC12_QM,
267 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = GAUDI2_EVENT_TPC12_QM,
268 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = GAUDI2_EVENT_TPC12_QM,
269 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = GAUDI2_EVENT_TPC13_QM,
270 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = GAUDI2_EVENT_TPC13_QM,
271 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = GAUDI2_EVENT_TPC13_QM,
272 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = GAUDI2_EVENT_TPC13_QM,
273 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = GAUDI2_EVENT_TPC14_QM,
274 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = GAUDI2_EVENT_TPC14_QM,
275 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = GAUDI2_EVENT_TPC14_QM,
276 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = GAUDI2_EVENT_TPC14_QM,
277 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = GAUDI2_EVENT_TPC15_QM,
278 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = GAUDI2_EVENT_TPC15_QM,
279 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = GAUDI2_EVENT_TPC15_QM,
280 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = GAUDI2_EVENT_TPC15_QM,
281 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = GAUDI2_EVENT_TPC16_QM,
282 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = GAUDI2_EVENT_TPC16_QM,
283 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = GAUDI2_EVENT_TPC16_QM,
284 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = GAUDI2_EVENT_TPC16_QM,
285 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = GAUDI2_EVENT_TPC17_QM,
286 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = GAUDI2_EVENT_TPC17_QM,
287 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = GAUDI2_EVENT_TPC17_QM,
288 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = GAUDI2_EVENT_TPC17_QM,
289 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = GAUDI2_EVENT_HDMA6_QM,
290 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = GAUDI2_EVENT_HDMA6_QM,
291 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = GAUDI2_EVENT_HDMA6_QM,
292 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = GAUDI2_EVENT_HDMA6_QM,
293 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = GAUDI2_EVENT_HDMA7_QM,
294 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = GAUDI2_EVENT_HDMA7_QM,
295 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = GAUDI2_EVENT_HDMA7_QM,
296 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = GAUDI2_EVENT_HDMA7_QM,
297 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = GAUDI2_EVENT_MME3_QM,
298 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = GAUDI2_EVENT_MME3_QM,
299 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = GAUDI2_EVENT_MME3_QM,
300 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = GAUDI2_EVENT_MME3_QM,
301 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = GAUDI2_EVENT_TPC18_QM,
302 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = GAUDI2_EVENT_TPC18_QM,
303 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = GAUDI2_EVENT_TPC18_QM,
304 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = GAUDI2_EVENT_TPC18_QM,
305 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = GAUDI2_EVENT_TPC19_QM,
306 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = GAUDI2_EVENT_TPC19_QM,
307 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = GAUDI2_EVENT_TPC19_QM,
308 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = GAUDI2_EVENT_TPC19_QM,
309 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = GAUDI2_EVENT_TPC20_QM,
310 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = GAUDI2_EVENT_TPC20_QM,
311 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = GAUDI2_EVENT_TPC20_QM,
312 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = GAUDI2_EVENT_TPC20_QM,
313 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = GAUDI2_EVENT_TPC21_QM,
314 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = GAUDI2_EVENT_TPC21_QM,
315 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = GAUDI2_EVENT_TPC21_QM,
316 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = GAUDI2_EVENT_TPC21_QM,
317 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = GAUDI2_EVENT_TPC22_QM,
318 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = GAUDI2_EVENT_TPC22_QM,
319 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = GAUDI2_EVENT_TPC22_QM,
320 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = GAUDI2_EVENT_TPC22_QM,
321 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = GAUDI2_EVENT_TPC23_QM,
322 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = GAUDI2_EVENT_TPC23_QM,
323 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = GAUDI2_EVENT_TPC23_QM,
324 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = GAUDI2_EVENT_TPC23_QM,
325 	[GAUDI2_QUEUE_ID_NIC_0_0] = GAUDI2_EVENT_NIC0_QM0,
326 	[GAUDI2_QUEUE_ID_NIC_0_1] = GAUDI2_EVENT_NIC0_QM0,
327 	[GAUDI2_QUEUE_ID_NIC_0_2] = GAUDI2_EVENT_NIC0_QM0,
328 	[GAUDI2_QUEUE_ID_NIC_0_3] = GAUDI2_EVENT_NIC0_QM0,
329 	[GAUDI2_QUEUE_ID_NIC_1_0] = GAUDI2_EVENT_NIC0_QM1,
330 	[GAUDI2_QUEUE_ID_NIC_1_1] = GAUDI2_EVENT_NIC0_QM1,
331 	[GAUDI2_QUEUE_ID_NIC_1_2] = GAUDI2_EVENT_NIC0_QM1,
332 	[GAUDI2_QUEUE_ID_NIC_1_3] = GAUDI2_EVENT_NIC0_QM1,
333 	[GAUDI2_QUEUE_ID_NIC_2_0] = GAUDI2_EVENT_NIC1_QM0,
334 	[GAUDI2_QUEUE_ID_NIC_2_1] = GAUDI2_EVENT_NIC1_QM0,
335 	[GAUDI2_QUEUE_ID_NIC_2_2] = GAUDI2_EVENT_NIC1_QM0,
336 	[GAUDI2_QUEUE_ID_NIC_2_3] = GAUDI2_EVENT_NIC1_QM0,
337 	[GAUDI2_QUEUE_ID_NIC_3_0] = GAUDI2_EVENT_NIC1_QM1,
338 	[GAUDI2_QUEUE_ID_NIC_3_1] = GAUDI2_EVENT_NIC1_QM1,
339 	[GAUDI2_QUEUE_ID_NIC_3_2] = GAUDI2_EVENT_NIC1_QM1,
340 	[GAUDI2_QUEUE_ID_NIC_3_3] = GAUDI2_EVENT_NIC1_QM1,
341 	[GAUDI2_QUEUE_ID_NIC_4_0] = GAUDI2_EVENT_NIC2_QM0,
342 	[GAUDI2_QUEUE_ID_NIC_4_1] = GAUDI2_EVENT_NIC2_QM0,
343 	[GAUDI2_QUEUE_ID_NIC_4_2] = GAUDI2_EVENT_NIC2_QM0,
344 	[GAUDI2_QUEUE_ID_NIC_4_3] = GAUDI2_EVENT_NIC2_QM0,
345 	[GAUDI2_QUEUE_ID_NIC_5_0] = GAUDI2_EVENT_NIC2_QM1,
346 	[GAUDI2_QUEUE_ID_NIC_5_1] = GAUDI2_EVENT_NIC2_QM1,
347 	[GAUDI2_QUEUE_ID_NIC_5_2] = GAUDI2_EVENT_NIC2_QM1,
348 	[GAUDI2_QUEUE_ID_NIC_5_3] = GAUDI2_EVENT_NIC2_QM1,
349 	[GAUDI2_QUEUE_ID_NIC_6_0] = GAUDI2_EVENT_NIC3_QM0,
350 	[GAUDI2_QUEUE_ID_NIC_6_1] = GAUDI2_EVENT_NIC3_QM0,
351 	[GAUDI2_QUEUE_ID_NIC_6_2] = GAUDI2_EVENT_NIC3_QM0,
352 	[GAUDI2_QUEUE_ID_NIC_6_3] = GAUDI2_EVENT_NIC3_QM0,
353 	[GAUDI2_QUEUE_ID_NIC_7_0] = GAUDI2_EVENT_NIC3_QM1,
354 	[GAUDI2_QUEUE_ID_NIC_7_1] = GAUDI2_EVENT_NIC3_QM1,
355 	[GAUDI2_QUEUE_ID_NIC_7_2] = GAUDI2_EVENT_NIC3_QM1,
356 	[GAUDI2_QUEUE_ID_NIC_7_3] = GAUDI2_EVENT_NIC3_QM1,
357 	[GAUDI2_QUEUE_ID_NIC_8_0] = GAUDI2_EVENT_NIC4_QM0,
358 	[GAUDI2_QUEUE_ID_NIC_8_1] = GAUDI2_EVENT_NIC4_QM0,
359 	[GAUDI2_QUEUE_ID_NIC_8_2] = GAUDI2_EVENT_NIC4_QM0,
360 	[GAUDI2_QUEUE_ID_NIC_8_3] = GAUDI2_EVENT_NIC4_QM0,
361 	[GAUDI2_QUEUE_ID_NIC_9_0] = GAUDI2_EVENT_NIC4_QM1,
362 	[GAUDI2_QUEUE_ID_NIC_9_1] = GAUDI2_EVENT_NIC4_QM1,
363 	[GAUDI2_QUEUE_ID_NIC_9_2] = GAUDI2_EVENT_NIC4_QM1,
364 	[GAUDI2_QUEUE_ID_NIC_9_3] = GAUDI2_EVENT_NIC4_QM1,
365 	[GAUDI2_QUEUE_ID_NIC_10_0] = GAUDI2_EVENT_NIC5_QM0,
366 	[GAUDI2_QUEUE_ID_NIC_10_1] = GAUDI2_EVENT_NIC5_QM0,
367 	[GAUDI2_QUEUE_ID_NIC_10_2] = GAUDI2_EVENT_NIC5_QM0,
368 	[GAUDI2_QUEUE_ID_NIC_10_3] = GAUDI2_EVENT_NIC5_QM0,
369 	[GAUDI2_QUEUE_ID_NIC_11_0] = GAUDI2_EVENT_NIC5_QM1,
370 	[GAUDI2_QUEUE_ID_NIC_11_1] = GAUDI2_EVENT_NIC5_QM1,
371 	[GAUDI2_QUEUE_ID_NIC_11_2] = GAUDI2_EVENT_NIC5_QM1,
372 	[GAUDI2_QUEUE_ID_NIC_11_3] = GAUDI2_EVENT_NIC5_QM1,
373 	[GAUDI2_QUEUE_ID_NIC_12_0] = GAUDI2_EVENT_NIC6_QM0,
374 	[GAUDI2_QUEUE_ID_NIC_12_1] = GAUDI2_EVENT_NIC6_QM0,
375 	[GAUDI2_QUEUE_ID_NIC_12_2] = GAUDI2_EVENT_NIC6_QM0,
376 	[GAUDI2_QUEUE_ID_NIC_12_3] = GAUDI2_EVENT_NIC6_QM0,
377 	[GAUDI2_QUEUE_ID_NIC_13_0] = GAUDI2_EVENT_NIC6_QM1,
378 	[GAUDI2_QUEUE_ID_NIC_13_1] = GAUDI2_EVENT_NIC6_QM1,
379 	[GAUDI2_QUEUE_ID_NIC_13_2] = GAUDI2_EVENT_NIC6_QM1,
380 	[GAUDI2_QUEUE_ID_NIC_13_3] = GAUDI2_EVENT_NIC6_QM1,
381 	[GAUDI2_QUEUE_ID_NIC_14_0] = GAUDI2_EVENT_NIC7_QM0,
382 	[GAUDI2_QUEUE_ID_NIC_14_1] = GAUDI2_EVENT_NIC7_QM0,
383 	[GAUDI2_QUEUE_ID_NIC_14_2] = GAUDI2_EVENT_NIC7_QM0,
384 	[GAUDI2_QUEUE_ID_NIC_14_3] = GAUDI2_EVENT_NIC7_QM0,
385 	[GAUDI2_QUEUE_ID_NIC_15_0] = GAUDI2_EVENT_NIC7_QM1,
386 	[GAUDI2_QUEUE_ID_NIC_15_1] = GAUDI2_EVENT_NIC7_QM1,
387 	[GAUDI2_QUEUE_ID_NIC_15_2] = GAUDI2_EVENT_NIC7_QM1,
388 	[GAUDI2_QUEUE_ID_NIC_15_3] = GAUDI2_EVENT_NIC7_QM1,
389 	[GAUDI2_QUEUE_ID_NIC_16_0] = GAUDI2_EVENT_NIC8_QM0,
390 	[GAUDI2_QUEUE_ID_NIC_16_1] = GAUDI2_EVENT_NIC8_QM0,
391 	[GAUDI2_QUEUE_ID_NIC_16_2] = GAUDI2_EVENT_NIC8_QM0,
392 	[GAUDI2_QUEUE_ID_NIC_16_3] = GAUDI2_EVENT_NIC8_QM0,
393 	[GAUDI2_QUEUE_ID_NIC_17_0] = GAUDI2_EVENT_NIC8_QM1,
394 	[GAUDI2_QUEUE_ID_NIC_17_1] = GAUDI2_EVENT_NIC8_QM1,
395 	[GAUDI2_QUEUE_ID_NIC_17_2] = GAUDI2_EVENT_NIC8_QM1,
396 	[GAUDI2_QUEUE_ID_NIC_17_3] = GAUDI2_EVENT_NIC8_QM1,
397 	[GAUDI2_QUEUE_ID_NIC_18_0] = GAUDI2_EVENT_NIC9_QM0,
398 	[GAUDI2_QUEUE_ID_NIC_18_1] = GAUDI2_EVENT_NIC9_QM0,
399 	[GAUDI2_QUEUE_ID_NIC_18_2] = GAUDI2_EVENT_NIC9_QM0,
400 	[GAUDI2_QUEUE_ID_NIC_18_3] = GAUDI2_EVENT_NIC9_QM0,
401 	[GAUDI2_QUEUE_ID_NIC_19_0] = GAUDI2_EVENT_NIC9_QM1,
402 	[GAUDI2_QUEUE_ID_NIC_19_1] = GAUDI2_EVENT_NIC9_QM1,
403 	[GAUDI2_QUEUE_ID_NIC_19_2] = GAUDI2_EVENT_NIC9_QM1,
404 	[GAUDI2_QUEUE_ID_NIC_19_3] = GAUDI2_EVENT_NIC9_QM1,
405 	[GAUDI2_QUEUE_ID_NIC_20_0] = GAUDI2_EVENT_NIC10_QM0,
406 	[GAUDI2_QUEUE_ID_NIC_20_1] = GAUDI2_EVENT_NIC10_QM0,
407 	[GAUDI2_QUEUE_ID_NIC_20_2] = GAUDI2_EVENT_NIC10_QM0,
408 	[GAUDI2_QUEUE_ID_NIC_20_3] = GAUDI2_EVENT_NIC10_QM0,
409 	[GAUDI2_QUEUE_ID_NIC_21_0] = GAUDI2_EVENT_NIC10_QM1,
410 	[GAUDI2_QUEUE_ID_NIC_21_1] = GAUDI2_EVENT_NIC10_QM1,
411 	[GAUDI2_QUEUE_ID_NIC_21_2] = GAUDI2_EVENT_NIC10_QM1,
412 	[GAUDI2_QUEUE_ID_NIC_21_3] = GAUDI2_EVENT_NIC10_QM1,
413 	[GAUDI2_QUEUE_ID_NIC_22_0] = GAUDI2_EVENT_NIC11_QM0,
414 	[GAUDI2_QUEUE_ID_NIC_22_1] = GAUDI2_EVENT_NIC11_QM0,
415 	[GAUDI2_QUEUE_ID_NIC_22_2] = GAUDI2_EVENT_NIC11_QM0,
416 	[GAUDI2_QUEUE_ID_NIC_22_3] = GAUDI2_EVENT_NIC11_QM0,
417 	[GAUDI2_QUEUE_ID_NIC_23_0] = GAUDI2_EVENT_NIC11_QM1,
418 	[GAUDI2_QUEUE_ID_NIC_23_1] = GAUDI2_EVENT_NIC11_QM1,
419 	[GAUDI2_QUEUE_ID_NIC_23_2] = GAUDI2_EVENT_NIC11_QM1,
420 	[GAUDI2_QUEUE_ID_NIC_23_3] = GAUDI2_EVENT_NIC11_QM1,
421 	[GAUDI2_QUEUE_ID_ROT_0_0] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
422 	[GAUDI2_QUEUE_ID_ROT_0_1] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
423 	[GAUDI2_QUEUE_ID_ROT_0_2] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
424 	[GAUDI2_QUEUE_ID_ROT_0_3] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
425 	[GAUDI2_QUEUE_ID_ROT_1_0] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
426 	[GAUDI2_QUEUE_ID_ROT_1_1] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
427 	[GAUDI2_QUEUE_ID_ROT_1_2] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
428 	[GAUDI2_QUEUE_ID_ROT_1_3] = GAUDI2_EVENT_ROTATOR1_ROT1_QM
429 };
430 
431 static const int gaudi2_dma_core_async_event_id[] = {
432 	[DMA_CORE_ID_EDMA0] = GAUDI2_EVENT_HDMA0_CORE,
433 	[DMA_CORE_ID_EDMA1] = GAUDI2_EVENT_HDMA1_CORE,
434 	[DMA_CORE_ID_EDMA2] = GAUDI2_EVENT_HDMA2_CORE,
435 	[DMA_CORE_ID_EDMA3] = GAUDI2_EVENT_HDMA3_CORE,
436 	[DMA_CORE_ID_EDMA4] = GAUDI2_EVENT_HDMA4_CORE,
437 	[DMA_CORE_ID_EDMA5] = GAUDI2_EVENT_HDMA5_CORE,
438 	[DMA_CORE_ID_EDMA6] = GAUDI2_EVENT_HDMA6_CORE,
439 	[DMA_CORE_ID_EDMA7] = GAUDI2_EVENT_HDMA7_CORE,
440 	[DMA_CORE_ID_PDMA0] = GAUDI2_EVENT_PDMA0_CORE,
441 	[DMA_CORE_ID_PDMA1] = GAUDI2_EVENT_PDMA1_CORE,
442 	[DMA_CORE_ID_KDMA] = GAUDI2_EVENT_KDMA0_CORE,
443 };
444 
445 static const char * const gaudi2_qm_sei_error_cause[GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE] = {
446 	"qman sei intr",
447 	"arc sei intr"
448 };
449 
450 static const char * const gaudi2_cpu_sei_error_cause[GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE] = {
451 	"AXI_TERMINATOR WR",
452 	"AXI_TERMINATOR RD",
453 	"AXI SPLIT SEI Status"
454 };
455 
456 static const char * const gaudi2_arc_sei_error_cause[GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE] = {
457 	"cbu_bresp_sei_intr_cause",
458 	"cbu_rresp_sei_intr_cause",
459 	"lbu_bresp_sei_intr_cause",
460 	"lbu_rresp_sei_intr_cause",
461 	"cbu_axi_split_intr_cause",
462 	"lbu_axi_split_intr_cause",
463 	"arc_ip_excptn_sei_intr_cause",
464 	"dmi_bresp_sei_intr_cause",
465 	"aux2apb_err_sei_intr_cause",
466 	"cfg_lbw_wr_terminated_intr_cause",
467 	"cfg_lbw_rd_terminated_intr_cause",
468 	"cfg_dccm_wr_terminated_intr_cause",
469 	"cfg_dccm_rd_terminated_intr_cause",
470 	"cfg_hbw_rd_terminated_intr_cause"
471 };
472 
473 static const char * const gaudi2_dec_error_cause[GAUDI2_NUM_OF_DEC_ERR_CAUSE] = {
474 	"msix_vcd_hbw_sei",
475 	"msix_l2c_hbw_sei",
476 	"msix_nrm_hbw_sei",
477 	"msix_abnrm_hbw_sei",
478 	"msix_vcd_lbw_sei",
479 	"msix_l2c_lbw_sei",
480 	"msix_nrm_lbw_sei",
481 	"msix_abnrm_lbw_sei",
482 	"apb_vcd_lbw_sei",
483 	"apb_l2c_lbw_sei",
484 	"apb_nrm_lbw_sei",
485 	"apb_abnrm_lbw_sei",
486 	"dec_sei",
487 	"dec_apb_sei",
488 	"trc_apb_sei",
489 	"lbw_mstr_if_sei",
490 	"axi_split_bresp_err_sei",
491 	"hbw_axi_wr_viol_sei",
492 	"hbw_axi_rd_viol_sei",
493 	"lbw_axi_wr_viol_sei",
494 	"lbw_axi_rd_viol_sei",
495 	"vcd_spi",
496 	"l2c_spi",
497 	"nrm_spi",
498 	"abnrm_spi",
499 };
500 
501 static const char * const gaudi2_qman_error_cause[GAUDI2_NUM_OF_QM_ERR_CAUSE] = {
502 	"PQ AXI HBW error",
503 	"CQ AXI HBW error",
504 	"CP AXI HBW error",
505 	"CP error due to undefined OPCODE",
506 	"CP encountered STOP OPCODE",
507 	"CP AXI LBW error",
508 	"CP WRREG32 or WRBULK returned error",
509 	"N/A",
510 	"FENCE 0 inc over max value and clipped",
511 	"FENCE 1 inc over max value and clipped",
512 	"FENCE 2 inc over max value and clipped",
513 	"FENCE 3 inc over max value and clipped",
514 	"FENCE 0 dec under min value and clipped",
515 	"FENCE 1 dec under min value and clipped",
516 	"FENCE 2 dec under min value and clipped",
517 	"FENCE 3 dec under min value and clipped",
518 	"CPDMA Up overflow",
519 	"PQC L2H error"
520 };
521 
522 static const char * const gaudi2_qman_lower_cp_error_cause[GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE] = {
523 	"RSVD0",
524 	"CQ AXI HBW error",
525 	"CP AXI HBW error",
526 	"CP error due to undefined OPCODE",
527 	"CP encountered STOP OPCODE",
528 	"CP AXI LBW error",
529 	"CP WRREG32 or WRBULK returned error",
530 	"N/A",
531 	"FENCE 0 inc over max value and clipped",
532 	"FENCE 1 inc over max value and clipped",
533 	"FENCE 2 inc over max value and clipped",
534 	"FENCE 3 inc over max value and clipped",
535 	"FENCE 0 dec under min value and clipped",
536 	"FENCE 1 dec under min value and clipped",
537 	"FENCE 2 dec under min value and clipped",
538 	"FENCE 3 dec under min value and clipped",
539 	"CPDMA Up overflow",
540 	"RSVD17",
541 	"CQ_WR_IFIFO_CI_ERR",
542 	"CQ_WR_CTL_CI_ERR",
543 	"ARC_CQF_RD_ERR",
544 	"ARC_CQ_WR_IFIFO_CI_ERR",
545 	"ARC_CQ_WR_CTL_CI_ERR",
546 	"ARC_AXI_ERR",
547 	"CP_SWITCH_WDT_ERR"
548 };
549 
550 static const char * const gaudi2_qman_arb_error_cause[GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE] = {
551 	"Choice push while full error",
552 	"Choice Q watchdog error",
553 	"MSG AXI LBW returned with error"
554 };
555 
556 static const char * const guadi2_rot_error_cause[GAUDI2_NUM_OF_ROT_ERR_CAUSE] = {
557 	"qm_axi_err",
558 	"qm_trace_fence_events",
559 	"qm_sw_err",
560 	"qm_cp_sw_stop",
561 	"lbw_mstr_rresp_err",
562 	"lbw_mstr_bresp_err",
563 	"lbw_msg_slverr",
564 	"hbw_msg_slverr",
565 	"wbc_slverr",
566 	"hbw_mstr_rresp_err",
567 	"hbw_mstr_bresp_err",
568 	"sb_resp_intr",
569 	"mrsb_resp_intr",
570 	"core_dw_status_0",
571 	"core_dw_status_1",
572 	"core_dw_status_2",
573 	"core_dw_status_3",
574 	"core_dw_status_4",
575 	"core_dw_status_5",
576 	"core_dw_status_6",
577 	"core_dw_status_7",
578 	"async_arc2cpu_sei_intr",
579 };
580 
581 static const char * const gaudi2_tpc_interrupts_cause[GAUDI2_NUM_OF_TPC_INTR_CAUSE] = {
582 	"tpc_address_exceed_slm",
583 	"tpc_div_by_0",
584 	"tpc_spu_mac_overflow",
585 	"tpc_spu_addsub_overflow",
586 	"tpc_spu_abs_overflow",
587 	"tpc_spu_fma_fp_dst_nan",
588 	"tpc_spu_fma_fp_dst_inf",
589 	"tpc_spu_convert_fp_dst_nan",
590 	"tpc_spu_convert_fp_dst_inf",
591 	"tpc_spu_fp_dst_denorm",
592 	"tpc_vpu_mac_overflow",
593 	"tpc_vpu_addsub_overflow",
594 	"tpc_vpu_abs_overflow",
595 	"tpc_vpu_convert_fp_dst_nan",
596 	"tpc_vpu_convert_fp_dst_inf",
597 	"tpc_vpu_fma_fp_dst_nan",
598 	"tpc_vpu_fma_fp_dst_inf",
599 	"tpc_vpu_fp_dst_denorm",
600 	"tpc_assertions",
601 	"tpc_illegal_instruction",
602 	"tpc_pc_wrap_around",
603 	"tpc_qm_sw_err",
604 	"tpc_hbw_rresp_err",
605 	"tpc_hbw_bresp_err",
606 	"tpc_lbw_rresp_err",
607 	"tpc_lbw_bresp_err",
608 	"st_unlock_already_locked",
609 	"invalid_lock_access",
610 	"LD_L protection violation",
611 	"ST_L protection violation",
612 };
613 
614 static const char * const guadi2_mme_error_cause[GAUDI2_NUM_OF_MME_ERR_CAUSE] = {
615 	"agu_resp_intr",
616 	"qman_axi_err",
617 	"wap sei (wbc axi err)",
618 	"arc sei",
619 	"cfg access error",
620 	"qm_sw_err",
621 	"sbte_dbg_intr_0",
622 	"sbte_dbg_intr_1",
623 	"sbte_dbg_intr_2",
624 	"sbte_dbg_intr_3",
625 	"sbte_dbg_intr_4",
626 	"sbte_prtn_intr_0",
627 	"sbte_prtn_intr_1",
628 	"sbte_prtn_intr_2",
629 	"sbte_prtn_intr_3",
630 	"sbte_prtn_intr_4",
631 };
632 
633 static const char * const guadi2_mme_sbte_error_cause[GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE] = {
634 	"i0",
635 	"i1",
636 	"i2",
637 	"i3",
638 	"i4",
639 };
640 
641 static const char * const guadi2_mme_wap_error_cause[GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE] = {
642 	"WBC ERR RESP_0",
643 	"WBC ERR RESP_1",
644 	"AP SOURCE POS INF",
645 	"AP SOURCE NEG INF",
646 	"AP SOURCE NAN",
647 	"AP RESULT POS INF",
648 	"AP RESULT NEG INF",
649 };
650 
651 static const char * const gaudi2_dma_core_interrupts_cause[GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE] = {
652 	"HBW Read returned with error RRESP",
653 	"HBW write returned with error BRESP",
654 	"LBW write returned with error BRESP",
655 	"descriptor_fifo_overflow",
656 	"KDMA SB LBW Read returned with error",
657 	"KDMA WBC LBW Write returned with error",
658 	"TRANSPOSE ENGINE DESC FIFO OVERFLOW",
659 	"WRONG CFG FOR COMMIT IN LIN DMA"
660 };
661 
662 static const char * const gaudi2_kdma_core_interrupts_cause[GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE] = {
663 	"HBW/LBW Read returned with error RRESP",
664 	"HBW/LBW write returned with error BRESP",
665 	"LBW write returned with error BRESP",
666 	"descriptor_fifo_overflow",
667 	"KDMA SB LBW Read returned with error",
668 	"KDMA WBC LBW Write returned with error",
669 	"TRANSPOSE ENGINE DESC FIFO OVERFLOW",
670 	"WRONG CFG FOR COMMIT IN LIN DMA"
671 };
672 
673 struct gaudi2_sm_sei_cause_data {
674 	const char *cause_name;
675 	const char *log_name;
676 	u32 log_mask;
677 };
678 
679 static const struct gaudi2_sm_sei_cause_data
680 gaudi2_sm_sei_cause[GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE] = {
681 	{"calculated SO value overflow/underflow", "SOB group ID", 0x7FF},
682 	{"payload address of monitor is not aligned to 4B", "monitor addr", 0xFFFF},
683 	{"armed monitor write got BRESP (SLVERR or DECERR)", "AXI id", 0xFFFF},
684 };
685 
686 static const char * const
687 gaudi2_pmmu_fatal_interrupts_cause[GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE] = {
688 	"LATENCY_RD_OUT_FIFO_OVERRUN",
689 	"LATENCY_WR_OUT_FIFO_OVERRUN",
690 };
691 
692 static const char * const
693 gaudi2_hif_fatal_interrupts_cause[GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE] = {
694 	"LATENCY_RD_OUT_FIFO_OVERRUN",
695 	"LATENCY_WR_OUT_FIFO_OVERRUN",
696 };
697 
698 static const char * const
699 gaudi2_psoc_axi_drain_interrupts_cause[GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE] = {
700 	"AXI drain HBW",
701 	"AXI drain LBW",
702 };
703 
704 static const char * const
705 gaudi2_pcie_addr_dec_error_cause[GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE] = {
706 	"HBW error response",
707 	"LBW error response",
708 	"TLP is blocked by RR"
709 };
710 
711 const u32 gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_SIZE] = {
712 	[GAUDI2_QUEUE_ID_PDMA_0_0] = mmPDMA0_QM_BASE,
713 	[GAUDI2_QUEUE_ID_PDMA_0_1] = mmPDMA0_QM_BASE,
714 	[GAUDI2_QUEUE_ID_PDMA_0_2] = mmPDMA0_QM_BASE,
715 	[GAUDI2_QUEUE_ID_PDMA_0_3] = mmPDMA0_QM_BASE,
716 	[GAUDI2_QUEUE_ID_PDMA_1_0] = mmPDMA1_QM_BASE,
717 	[GAUDI2_QUEUE_ID_PDMA_1_1] = mmPDMA1_QM_BASE,
718 	[GAUDI2_QUEUE_ID_PDMA_1_2] = mmPDMA1_QM_BASE,
719 	[GAUDI2_QUEUE_ID_PDMA_1_3] = mmPDMA1_QM_BASE,
720 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = mmDCORE0_EDMA0_QM_BASE,
721 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = mmDCORE0_EDMA0_QM_BASE,
722 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = mmDCORE0_EDMA0_QM_BASE,
723 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = mmDCORE0_EDMA0_QM_BASE,
724 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = mmDCORE0_EDMA1_QM_BASE,
725 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = mmDCORE0_EDMA1_QM_BASE,
726 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = mmDCORE0_EDMA1_QM_BASE,
727 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = mmDCORE0_EDMA1_QM_BASE,
728 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = mmDCORE0_MME_QM_BASE,
729 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = mmDCORE0_MME_QM_BASE,
730 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = mmDCORE0_MME_QM_BASE,
731 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = mmDCORE0_MME_QM_BASE,
732 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = mmDCORE0_TPC0_QM_BASE,
733 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = mmDCORE0_TPC0_QM_BASE,
734 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = mmDCORE0_TPC0_QM_BASE,
735 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = mmDCORE0_TPC0_QM_BASE,
736 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = mmDCORE0_TPC1_QM_BASE,
737 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = mmDCORE0_TPC1_QM_BASE,
738 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = mmDCORE0_TPC1_QM_BASE,
739 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = mmDCORE0_TPC1_QM_BASE,
740 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = mmDCORE0_TPC2_QM_BASE,
741 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = mmDCORE0_TPC2_QM_BASE,
742 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = mmDCORE0_TPC2_QM_BASE,
743 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = mmDCORE0_TPC2_QM_BASE,
744 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = mmDCORE0_TPC3_QM_BASE,
745 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = mmDCORE0_TPC3_QM_BASE,
746 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = mmDCORE0_TPC3_QM_BASE,
747 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = mmDCORE0_TPC3_QM_BASE,
748 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = mmDCORE0_TPC4_QM_BASE,
749 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = mmDCORE0_TPC4_QM_BASE,
750 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = mmDCORE0_TPC4_QM_BASE,
751 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = mmDCORE0_TPC4_QM_BASE,
752 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = mmDCORE0_TPC5_QM_BASE,
753 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = mmDCORE0_TPC5_QM_BASE,
754 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = mmDCORE0_TPC5_QM_BASE,
755 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = mmDCORE0_TPC5_QM_BASE,
756 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = mmDCORE0_TPC6_QM_BASE,
757 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = mmDCORE0_TPC6_QM_BASE,
758 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = mmDCORE0_TPC6_QM_BASE,
759 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = mmDCORE0_TPC6_QM_BASE,
760 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = mmDCORE1_EDMA0_QM_BASE,
761 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = mmDCORE1_EDMA0_QM_BASE,
762 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = mmDCORE1_EDMA0_QM_BASE,
763 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = mmDCORE1_EDMA0_QM_BASE,
764 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = mmDCORE1_EDMA1_QM_BASE,
765 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = mmDCORE1_EDMA1_QM_BASE,
766 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = mmDCORE1_EDMA1_QM_BASE,
767 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = mmDCORE1_EDMA1_QM_BASE,
768 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = mmDCORE1_MME_QM_BASE,
769 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = mmDCORE1_MME_QM_BASE,
770 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = mmDCORE1_MME_QM_BASE,
771 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = mmDCORE1_MME_QM_BASE,
772 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = mmDCORE1_TPC0_QM_BASE,
773 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = mmDCORE1_TPC0_QM_BASE,
774 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = mmDCORE1_TPC0_QM_BASE,
775 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = mmDCORE1_TPC0_QM_BASE,
776 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = mmDCORE1_TPC1_QM_BASE,
777 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = mmDCORE1_TPC1_QM_BASE,
778 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = mmDCORE1_TPC1_QM_BASE,
779 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = mmDCORE1_TPC1_QM_BASE,
780 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = mmDCORE1_TPC2_QM_BASE,
781 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = mmDCORE1_TPC2_QM_BASE,
782 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = mmDCORE1_TPC2_QM_BASE,
783 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = mmDCORE1_TPC2_QM_BASE,
784 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = mmDCORE1_TPC3_QM_BASE,
785 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = mmDCORE1_TPC3_QM_BASE,
786 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = mmDCORE1_TPC3_QM_BASE,
787 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = mmDCORE1_TPC3_QM_BASE,
788 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = mmDCORE1_TPC4_QM_BASE,
789 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = mmDCORE1_TPC4_QM_BASE,
790 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = mmDCORE1_TPC4_QM_BASE,
791 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = mmDCORE1_TPC4_QM_BASE,
792 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = mmDCORE1_TPC5_QM_BASE,
793 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = mmDCORE1_TPC5_QM_BASE,
794 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = mmDCORE1_TPC5_QM_BASE,
795 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = mmDCORE1_TPC5_QM_BASE,
796 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = mmDCORE2_EDMA0_QM_BASE,
797 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = mmDCORE2_EDMA0_QM_BASE,
798 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = mmDCORE2_EDMA0_QM_BASE,
799 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = mmDCORE2_EDMA0_QM_BASE,
800 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = mmDCORE2_EDMA1_QM_BASE,
801 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = mmDCORE2_EDMA1_QM_BASE,
802 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = mmDCORE2_EDMA1_QM_BASE,
803 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = mmDCORE2_EDMA1_QM_BASE,
804 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = mmDCORE2_MME_QM_BASE,
805 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = mmDCORE2_MME_QM_BASE,
806 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = mmDCORE2_MME_QM_BASE,
807 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = mmDCORE2_MME_QM_BASE,
808 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = mmDCORE2_TPC0_QM_BASE,
809 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = mmDCORE2_TPC0_QM_BASE,
810 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = mmDCORE2_TPC0_QM_BASE,
811 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = mmDCORE2_TPC0_QM_BASE,
812 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = mmDCORE2_TPC1_QM_BASE,
813 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = mmDCORE2_TPC1_QM_BASE,
814 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = mmDCORE2_TPC1_QM_BASE,
815 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = mmDCORE2_TPC1_QM_BASE,
816 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = mmDCORE2_TPC2_QM_BASE,
817 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = mmDCORE2_TPC2_QM_BASE,
818 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = mmDCORE2_TPC2_QM_BASE,
819 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = mmDCORE2_TPC2_QM_BASE,
820 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = mmDCORE2_TPC3_QM_BASE,
821 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = mmDCORE2_TPC3_QM_BASE,
822 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = mmDCORE2_TPC3_QM_BASE,
823 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = mmDCORE2_TPC3_QM_BASE,
824 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = mmDCORE2_TPC4_QM_BASE,
825 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = mmDCORE2_TPC4_QM_BASE,
826 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = mmDCORE2_TPC4_QM_BASE,
827 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = mmDCORE2_TPC4_QM_BASE,
828 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = mmDCORE2_TPC5_QM_BASE,
829 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = mmDCORE2_TPC5_QM_BASE,
830 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = mmDCORE2_TPC5_QM_BASE,
831 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = mmDCORE2_TPC5_QM_BASE,
832 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = mmDCORE3_EDMA0_QM_BASE,
833 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = mmDCORE3_EDMA0_QM_BASE,
834 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = mmDCORE3_EDMA0_QM_BASE,
835 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = mmDCORE3_EDMA0_QM_BASE,
836 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = mmDCORE3_EDMA1_QM_BASE,
837 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = mmDCORE3_EDMA1_QM_BASE,
838 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = mmDCORE3_EDMA1_QM_BASE,
839 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = mmDCORE3_EDMA1_QM_BASE,
840 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = mmDCORE3_MME_QM_BASE,
841 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = mmDCORE3_MME_QM_BASE,
842 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = mmDCORE3_MME_QM_BASE,
843 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = mmDCORE3_MME_QM_BASE,
844 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = mmDCORE3_TPC0_QM_BASE,
845 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = mmDCORE3_TPC0_QM_BASE,
846 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = mmDCORE3_TPC0_QM_BASE,
847 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = mmDCORE3_TPC0_QM_BASE,
848 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = mmDCORE3_TPC1_QM_BASE,
849 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = mmDCORE3_TPC1_QM_BASE,
850 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = mmDCORE3_TPC1_QM_BASE,
851 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = mmDCORE3_TPC1_QM_BASE,
852 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = mmDCORE3_TPC2_QM_BASE,
853 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = mmDCORE3_TPC2_QM_BASE,
854 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = mmDCORE3_TPC2_QM_BASE,
855 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = mmDCORE3_TPC2_QM_BASE,
856 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = mmDCORE3_TPC3_QM_BASE,
857 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = mmDCORE3_TPC3_QM_BASE,
858 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = mmDCORE3_TPC3_QM_BASE,
859 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = mmDCORE3_TPC3_QM_BASE,
860 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = mmDCORE3_TPC4_QM_BASE,
861 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = mmDCORE3_TPC4_QM_BASE,
862 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = mmDCORE3_TPC4_QM_BASE,
863 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = mmDCORE3_TPC4_QM_BASE,
864 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = mmDCORE3_TPC5_QM_BASE,
865 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = mmDCORE3_TPC5_QM_BASE,
866 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = mmDCORE3_TPC5_QM_BASE,
867 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = mmDCORE3_TPC5_QM_BASE,
868 	[GAUDI2_QUEUE_ID_NIC_0_0] = mmNIC0_QM0_BASE,
869 	[GAUDI2_QUEUE_ID_NIC_0_1] = mmNIC0_QM0_BASE,
870 	[GAUDI2_QUEUE_ID_NIC_0_2] = mmNIC0_QM0_BASE,
871 	[GAUDI2_QUEUE_ID_NIC_0_3] = mmNIC0_QM0_BASE,
872 	[GAUDI2_QUEUE_ID_NIC_1_0] = mmNIC0_QM1_BASE,
873 	[GAUDI2_QUEUE_ID_NIC_1_1] = mmNIC0_QM1_BASE,
874 	[GAUDI2_QUEUE_ID_NIC_1_2] = mmNIC0_QM1_BASE,
875 	[GAUDI2_QUEUE_ID_NIC_1_3] = mmNIC0_QM1_BASE,
876 	[GAUDI2_QUEUE_ID_NIC_2_0] = mmNIC1_QM0_BASE,
877 	[GAUDI2_QUEUE_ID_NIC_2_1] = mmNIC1_QM0_BASE,
878 	[GAUDI2_QUEUE_ID_NIC_2_2] = mmNIC1_QM0_BASE,
879 	[GAUDI2_QUEUE_ID_NIC_2_3] = mmNIC1_QM0_BASE,
880 	[GAUDI2_QUEUE_ID_NIC_3_0] = mmNIC1_QM1_BASE,
881 	[GAUDI2_QUEUE_ID_NIC_3_1] = mmNIC1_QM1_BASE,
882 	[GAUDI2_QUEUE_ID_NIC_3_2] = mmNIC1_QM1_BASE,
883 	[GAUDI2_QUEUE_ID_NIC_3_3] = mmNIC1_QM1_BASE,
884 	[GAUDI2_QUEUE_ID_NIC_4_0] = mmNIC2_QM0_BASE,
885 	[GAUDI2_QUEUE_ID_NIC_4_1] = mmNIC2_QM0_BASE,
886 	[GAUDI2_QUEUE_ID_NIC_4_2] = mmNIC2_QM0_BASE,
887 	[GAUDI2_QUEUE_ID_NIC_4_3] = mmNIC2_QM0_BASE,
888 	[GAUDI2_QUEUE_ID_NIC_5_0] = mmNIC2_QM1_BASE,
889 	[GAUDI2_QUEUE_ID_NIC_5_1] = mmNIC2_QM1_BASE,
890 	[GAUDI2_QUEUE_ID_NIC_5_2] = mmNIC2_QM1_BASE,
891 	[GAUDI2_QUEUE_ID_NIC_5_3] = mmNIC2_QM1_BASE,
892 	[GAUDI2_QUEUE_ID_NIC_6_0] = mmNIC3_QM0_BASE,
893 	[GAUDI2_QUEUE_ID_NIC_6_1] = mmNIC3_QM0_BASE,
894 	[GAUDI2_QUEUE_ID_NIC_6_2] = mmNIC3_QM0_BASE,
895 	[GAUDI2_QUEUE_ID_NIC_6_3] = mmNIC3_QM0_BASE,
896 	[GAUDI2_QUEUE_ID_NIC_7_0] = mmNIC3_QM1_BASE,
897 	[GAUDI2_QUEUE_ID_NIC_7_1] = mmNIC3_QM1_BASE,
898 	[GAUDI2_QUEUE_ID_NIC_7_2] = mmNIC3_QM1_BASE,
899 	[GAUDI2_QUEUE_ID_NIC_7_3] = mmNIC3_QM1_BASE,
900 	[GAUDI2_QUEUE_ID_NIC_8_0] = mmNIC4_QM0_BASE,
901 	[GAUDI2_QUEUE_ID_NIC_8_1] = mmNIC4_QM0_BASE,
902 	[GAUDI2_QUEUE_ID_NIC_8_2] = mmNIC4_QM0_BASE,
903 	[GAUDI2_QUEUE_ID_NIC_8_3] = mmNIC4_QM0_BASE,
904 	[GAUDI2_QUEUE_ID_NIC_9_0] = mmNIC4_QM1_BASE,
905 	[GAUDI2_QUEUE_ID_NIC_9_1] = mmNIC4_QM1_BASE,
906 	[GAUDI2_QUEUE_ID_NIC_9_2] = mmNIC4_QM1_BASE,
907 	[GAUDI2_QUEUE_ID_NIC_9_3] = mmNIC4_QM1_BASE,
908 	[GAUDI2_QUEUE_ID_NIC_10_0] = mmNIC5_QM0_BASE,
909 	[GAUDI2_QUEUE_ID_NIC_10_1] = mmNIC5_QM0_BASE,
910 	[GAUDI2_QUEUE_ID_NIC_10_2] = mmNIC5_QM0_BASE,
911 	[GAUDI2_QUEUE_ID_NIC_10_3] = mmNIC5_QM0_BASE,
912 	[GAUDI2_QUEUE_ID_NIC_11_0] = mmNIC5_QM1_BASE,
913 	[GAUDI2_QUEUE_ID_NIC_11_1] = mmNIC5_QM1_BASE,
914 	[GAUDI2_QUEUE_ID_NIC_11_2] = mmNIC5_QM1_BASE,
915 	[GAUDI2_QUEUE_ID_NIC_11_3] = mmNIC5_QM1_BASE,
916 	[GAUDI2_QUEUE_ID_NIC_12_0] = mmNIC6_QM0_BASE,
917 	[GAUDI2_QUEUE_ID_NIC_12_1] = mmNIC6_QM0_BASE,
918 	[GAUDI2_QUEUE_ID_NIC_12_2] = mmNIC6_QM0_BASE,
919 	[GAUDI2_QUEUE_ID_NIC_12_3] = mmNIC6_QM0_BASE,
920 	[GAUDI2_QUEUE_ID_NIC_13_0] = mmNIC6_QM1_BASE,
921 	[GAUDI2_QUEUE_ID_NIC_13_1] = mmNIC6_QM1_BASE,
922 	[GAUDI2_QUEUE_ID_NIC_13_2] = mmNIC6_QM1_BASE,
923 	[GAUDI2_QUEUE_ID_NIC_13_3] = mmNIC6_QM1_BASE,
924 	[GAUDI2_QUEUE_ID_NIC_14_0] = mmNIC7_QM0_BASE,
925 	[GAUDI2_QUEUE_ID_NIC_14_1] = mmNIC7_QM0_BASE,
926 	[GAUDI2_QUEUE_ID_NIC_14_2] = mmNIC7_QM0_BASE,
927 	[GAUDI2_QUEUE_ID_NIC_14_3] = mmNIC7_QM0_BASE,
928 	[GAUDI2_QUEUE_ID_NIC_15_0] = mmNIC7_QM1_BASE,
929 	[GAUDI2_QUEUE_ID_NIC_15_1] = mmNIC7_QM1_BASE,
930 	[GAUDI2_QUEUE_ID_NIC_15_2] = mmNIC7_QM1_BASE,
931 	[GAUDI2_QUEUE_ID_NIC_15_3] = mmNIC7_QM1_BASE,
932 	[GAUDI2_QUEUE_ID_NIC_16_0] = mmNIC8_QM0_BASE,
933 	[GAUDI2_QUEUE_ID_NIC_16_1] = mmNIC8_QM0_BASE,
934 	[GAUDI2_QUEUE_ID_NIC_16_2] = mmNIC8_QM0_BASE,
935 	[GAUDI2_QUEUE_ID_NIC_16_3] = mmNIC8_QM0_BASE,
936 	[GAUDI2_QUEUE_ID_NIC_17_0] = mmNIC8_QM1_BASE,
937 	[GAUDI2_QUEUE_ID_NIC_17_1] = mmNIC8_QM1_BASE,
938 	[GAUDI2_QUEUE_ID_NIC_17_2] = mmNIC8_QM1_BASE,
939 	[GAUDI2_QUEUE_ID_NIC_17_3] = mmNIC8_QM1_BASE,
940 	[GAUDI2_QUEUE_ID_NIC_18_0] = mmNIC9_QM0_BASE,
941 	[GAUDI2_QUEUE_ID_NIC_18_1] = mmNIC9_QM0_BASE,
942 	[GAUDI2_QUEUE_ID_NIC_18_2] = mmNIC9_QM0_BASE,
943 	[GAUDI2_QUEUE_ID_NIC_18_3] = mmNIC9_QM0_BASE,
944 	[GAUDI2_QUEUE_ID_NIC_19_0] = mmNIC9_QM1_BASE,
945 	[GAUDI2_QUEUE_ID_NIC_19_1] = mmNIC9_QM1_BASE,
946 	[GAUDI2_QUEUE_ID_NIC_19_2] = mmNIC9_QM1_BASE,
947 	[GAUDI2_QUEUE_ID_NIC_19_3] = mmNIC9_QM1_BASE,
948 	[GAUDI2_QUEUE_ID_NIC_20_0] = mmNIC10_QM0_BASE,
949 	[GAUDI2_QUEUE_ID_NIC_20_1] = mmNIC10_QM0_BASE,
950 	[GAUDI2_QUEUE_ID_NIC_20_2] = mmNIC10_QM0_BASE,
951 	[GAUDI2_QUEUE_ID_NIC_20_3] = mmNIC10_QM0_BASE,
952 	[GAUDI2_QUEUE_ID_NIC_21_0] = mmNIC10_QM1_BASE,
953 	[GAUDI2_QUEUE_ID_NIC_21_1] = mmNIC10_QM1_BASE,
954 	[GAUDI2_QUEUE_ID_NIC_21_2] = mmNIC10_QM1_BASE,
955 	[GAUDI2_QUEUE_ID_NIC_21_3] = mmNIC10_QM1_BASE,
956 	[GAUDI2_QUEUE_ID_NIC_22_0] = mmNIC11_QM0_BASE,
957 	[GAUDI2_QUEUE_ID_NIC_22_1] = mmNIC11_QM0_BASE,
958 	[GAUDI2_QUEUE_ID_NIC_22_2] = mmNIC11_QM0_BASE,
959 	[GAUDI2_QUEUE_ID_NIC_22_3] = mmNIC11_QM0_BASE,
960 	[GAUDI2_QUEUE_ID_NIC_23_0] = mmNIC11_QM1_BASE,
961 	[GAUDI2_QUEUE_ID_NIC_23_1] = mmNIC11_QM1_BASE,
962 	[GAUDI2_QUEUE_ID_NIC_23_2] = mmNIC11_QM1_BASE,
963 	[GAUDI2_QUEUE_ID_NIC_23_3] = mmNIC11_QM1_BASE,
964 	[GAUDI2_QUEUE_ID_ROT_0_0] = mmROT0_QM_BASE,
965 	[GAUDI2_QUEUE_ID_ROT_0_1] = mmROT0_QM_BASE,
966 	[GAUDI2_QUEUE_ID_ROT_0_2] = mmROT0_QM_BASE,
967 	[GAUDI2_QUEUE_ID_ROT_0_3] = mmROT0_QM_BASE,
968 	[GAUDI2_QUEUE_ID_ROT_1_0] = mmROT1_QM_BASE,
969 	[GAUDI2_QUEUE_ID_ROT_1_1] = mmROT1_QM_BASE,
970 	[GAUDI2_QUEUE_ID_ROT_1_2] = mmROT1_QM_BASE,
971 	[GAUDI2_QUEUE_ID_ROT_1_3] = mmROT1_QM_BASE
972 };
973 
974 static const u32 gaudi2_arc_blocks_bases[NUM_ARC_CPUS] = {
975 	[CPU_ID_SCHED_ARC0] = mmARC_FARM_ARC0_AUX_BASE,
976 	[CPU_ID_SCHED_ARC1] = mmARC_FARM_ARC1_AUX_BASE,
977 	[CPU_ID_SCHED_ARC2] = mmARC_FARM_ARC2_AUX_BASE,
978 	[CPU_ID_SCHED_ARC3] = mmARC_FARM_ARC3_AUX_BASE,
979 	[CPU_ID_SCHED_ARC4] = mmDCORE1_MME_QM_ARC_AUX_BASE,
980 	[CPU_ID_SCHED_ARC5] = mmDCORE3_MME_QM_ARC_AUX_BASE,
981 	[CPU_ID_TPC_QMAN_ARC0] = mmDCORE0_TPC0_QM_ARC_AUX_BASE,
982 	[CPU_ID_TPC_QMAN_ARC1] = mmDCORE0_TPC1_QM_ARC_AUX_BASE,
983 	[CPU_ID_TPC_QMAN_ARC2] = mmDCORE0_TPC2_QM_ARC_AUX_BASE,
984 	[CPU_ID_TPC_QMAN_ARC3] = mmDCORE0_TPC3_QM_ARC_AUX_BASE,
985 	[CPU_ID_TPC_QMAN_ARC4] = mmDCORE0_TPC4_QM_ARC_AUX_BASE,
986 	[CPU_ID_TPC_QMAN_ARC5] = mmDCORE0_TPC5_QM_ARC_AUX_BASE,
987 	[CPU_ID_TPC_QMAN_ARC6] = mmDCORE1_TPC0_QM_ARC_AUX_BASE,
988 	[CPU_ID_TPC_QMAN_ARC7] = mmDCORE1_TPC1_QM_ARC_AUX_BASE,
989 	[CPU_ID_TPC_QMAN_ARC8] = mmDCORE1_TPC2_QM_ARC_AUX_BASE,
990 	[CPU_ID_TPC_QMAN_ARC9] = mmDCORE1_TPC3_QM_ARC_AUX_BASE,
991 	[CPU_ID_TPC_QMAN_ARC10] = mmDCORE1_TPC4_QM_ARC_AUX_BASE,
992 	[CPU_ID_TPC_QMAN_ARC11] = mmDCORE1_TPC5_QM_ARC_AUX_BASE,
993 	[CPU_ID_TPC_QMAN_ARC12] = mmDCORE2_TPC0_QM_ARC_AUX_BASE,
994 	[CPU_ID_TPC_QMAN_ARC13] = mmDCORE2_TPC1_QM_ARC_AUX_BASE,
995 	[CPU_ID_TPC_QMAN_ARC14] = mmDCORE2_TPC2_QM_ARC_AUX_BASE,
996 	[CPU_ID_TPC_QMAN_ARC15] = mmDCORE2_TPC3_QM_ARC_AUX_BASE,
997 	[CPU_ID_TPC_QMAN_ARC16] = mmDCORE2_TPC4_QM_ARC_AUX_BASE,
998 	[CPU_ID_TPC_QMAN_ARC17] = mmDCORE2_TPC5_QM_ARC_AUX_BASE,
999 	[CPU_ID_TPC_QMAN_ARC18] = mmDCORE3_TPC0_QM_ARC_AUX_BASE,
1000 	[CPU_ID_TPC_QMAN_ARC19] = mmDCORE3_TPC1_QM_ARC_AUX_BASE,
1001 	[CPU_ID_TPC_QMAN_ARC20] = mmDCORE3_TPC2_QM_ARC_AUX_BASE,
1002 	[CPU_ID_TPC_QMAN_ARC21] = mmDCORE3_TPC3_QM_ARC_AUX_BASE,
1003 	[CPU_ID_TPC_QMAN_ARC22] = mmDCORE3_TPC4_QM_ARC_AUX_BASE,
1004 	[CPU_ID_TPC_QMAN_ARC23] = mmDCORE3_TPC5_QM_ARC_AUX_BASE,
1005 	[CPU_ID_TPC_QMAN_ARC24] = mmDCORE0_TPC6_QM_ARC_AUX_BASE,
1006 	[CPU_ID_MME_QMAN_ARC0] = mmDCORE0_MME_QM_ARC_AUX_BASE,
1007 	[CPU_ID_MME_QMAN_ARC1] = mmDCORE2_MME_QM_ARC_AUX_BASE,
1008 	[CPU_ID_EDMA_QMAN_ARC0] = mmDCORE0_EDMA0_QM_ARC_AUX_BASE,
1009 	[CPU_ID_EDMA_QMAN_ARC1] = mmDCORE0_EDMA1_QM_ARC_AUX_BASE,
1010 	[CPU_ID_EDMA_QMAN_ARC2] = mmDCORE1_EDMA0_QM_ARC_AUX_BASE,
1011 	[CPU_ID_EDMA_QMAN_ARC3] = mmDCORE1_EDMA1_QM_ARC_AUX_BASE,
1012 	[CPU_ID_EDMA_QMAN_ARC4] = mmDCORE2_EDMA0_QM_ARC_AUX_BASE,
1013 	[CPU_ID_EDMA_QMAN_ARC5] = mmDCORE2_EDMA1_QM_ARC_AUX_BASE,
1014 	[CPU_ID_EDMA_QMAN_ARC6] = mmDCORE3_EDMA0_QM_ARC_AUX_BASE,
1015 	[CPU_ID_EDMA_QMAN_ARC7] = mmDCORE3_EDMA1_QM_ARC_AUX_BASE,
1016 	[CPU_ID_PDMA_QMAN_ARC0] = mmPDMA0_QM_ARC_AUX_BASE,
1017 	[CPU_ID_PDMA_QMAN_ARC1] = mmPDMA1_QM_ARC_AUX_BASE,
1018 	[CPU_ID_ROT_QMAN_ARC0] = mmROT0_QM_ARC_AUX_BASE,
1019 	[CPU_ID_ROT_QMAN_ARC1] = mmROT1_QM_ARC_AUX_BASE,
1020 	[CPU_ID_NIC_QMAN_ARC0] = mmNIC0_QM_ARC_AUX0_BASE,
1021 	[CPU_ID_NIC_QMAN_ARC1] = mmNIC0_QM_ARC_AUX1_BASE,
1022 	[CPU_ID_NIC_QMAN_ARC2] = mmNIC1_QM_ARC_AUX0_BASE,
1023 	[CPU_ID_NIC_QMAN_ARC3] = mmNIC1_QM_ARC_AUX1_BASE,
1024 	[CPU_ID_NIC_QMAN_ARC4] = mmNIC2_QM_ARC_AUX0_BASE,
1025 	[CPU_ID_NIC_QMAN_ARC5] = mmNIC2_QM_ARC_AUX1_BASE,
1026 	[CPU_ID_NIC_QMAN_ARC6] = mmNIC3_QM_ARC_AUX0_BASE,
1027 	[CPU_ID_NIC_QMAN_ARC7] = mmNIC3_QM_ARC_AUX1_BASE,
1028 	[CPU_ID_NIC_QMAN_ARC8] = mmNIC4_QM_ARC_AUX0_BASE,
1029 	[CPU_ID_NIC_QMAN_ARC9] = mmNIC4_QM_ARC_AUX1_BASE,
1030 	[CPU_ID_NIC_QMAN_ARC10] = mmNIC5_QM_ARC_AUX0_BASE,
1031 	[CPU_ID_NIC_QMAN_ARC11] = mmNIC5_QM_ARC_AUX1_BASE,
1032 	[CPU_ID_NIC_QMAN_ARC12] = mmNIC6_QM_ARC_AUX0_BASE,
1033 	[CPU_ID_NIC_QMAN_ARC13] = mmNIC6_QM_ARC_AUX1_BASE,
1034 	[CPU_ID_NIC_QMAN_ARC14] = mmNIC7_QM_ARC_AUX0_BASE,
1035 	[CPU_ID_NIC_QMAN_ARC15] = mmNIC7_QM_ARC_AUX1_BASE,
1036 	[CPU_ID_NIC_QMAN_ARC16] = mmNIC8_QM_ARC_AUX0_BASE,
1037 	[CPU_ID_NIC_QMAN_ARC17] = mmNIC8_QM_ARC_AUX1_BASE,
1038 	[CPU_ID_NIC_QMAN_ARC18] = mmNIC9_QM_ARC_AUX0_BASE,
1039 	[CPU_ID_NIC_QMAN_ARC19] = mmNIC9_QM_ARC_AUX1_BASE,
1040 	[CPU_ID_NIC_QMAN_ARC20] = mmNIC10_QM_ARC_AUX0_BASE,
1041 	[CPU_ID_NIC_QMAN_ARC21] = mmNIC10_QM_ARC_AUX1_BASE,
1042 	[CPU_ID_NIC_QMAN_ARC22] = mmNIC11_QM_ARC_AUX0_BASE,
1043 	[CPU_ID_NIC_QMAN_ARC23] = mmNIC11_QM_ARC_AUX1_BASE,
1044 };
1045 
1046 static const u32 gaudi2_arc_dccm_bases[NUM_ARC_CPUS] = {
1047 	[CPU_ID_SCHED_ARC0] = mmARC_FARM_ARC0_DCCM0_BASE,
1048 	[CPU_ID_SCHED_ARC1] = mmARC_FARM_ARC1_DCCM0_BASE,
1049 	[CPU_ID_SCHED_ARC2] = mmARC_FARM_ARC2_DCCM0_BASE,
1050 	[CPU_ID_SCHED_ARC3] = mmARC_FARM_ARC3_DCCM0_BASE,
1051 	[CPU_ID_SCHED_ARC4] = mmDCORE1_MME_QM_ARC_DCCM_BASE,
1052 	[CPU_ID_SCHED_ARC5] = mmDCORE3_MME_QM_ARC_DCCM_BASE,
1053 	[CPU_ID_TPC_QMAN_ARC0] = mmDCORE0_TPC0_QM_DCCM_BASE,
1054 	[CPU_ID_TPC_QMAN_ARC1] = mmDCORE0_TPC1_QM_DCCM_BASE,
1055 	[CPU_ID_TPC_QMAN_ARC2] = mmDCORE0_TPC2_QM_DCCM_BASE,
1056 	[CPU_ID_TPC_QMAN_ARC3] = mmDCORE0_TPC3_QM_DCCM_BASE,
1057 	[CPU_ID_TPC_QMAN_ARC4] = mmDCORE0_TPC4_QM_DCCM_BASE,
1058 	[CPU_ID_TPC_QMAN_ARC5] = mmDCORE0_TPC5_QM_DCCM_BASE,
1059 	[CPU_ID_TPC_QMAN_ARC6] = mmDCORE1_TPC0_QM_DCCM_BASE,
1060 	[CPU_ID_TPC_QMAN_ARC7] = mmDCORE1_TPC1_QM_DCCM_BASE,
1061 	[CPU_ID_TPC_QMAN_ARC8] = mmDCORE1_TPC2_QM_DCCM_BASE,
1062 	[CPU_ID_TPC_QMAN_ARC9] = mmDCORE1_TPC3_QM_DCCM_BASE,
1063 	[CPU_ID_TPC_QMAN_ARC10] = mmDCORE1_TPC4_QM_DCCM_BASE,
1064 	[CPU_ID_TPC_QMAN_ARC11] = mmDCORE1_TPC5_QM_DCCM_BASE,
1065 	[CPU_ID_TPC_QMAN_ARC12] = mmDCORE2_TPC0_QM_DCCM_BASE,
1066 	[CPU_ID_TPC_QMAN_ARC13] = mmDCORE2_TPC1_QM_DCCM_BASE,
1067 	[CPU_ID_TPC_QMAN_ARC14] = mmDCORE2_TPC2_QM_DCCM_BASE,
1068 	[CPU_ID_TPC_QMAN_ARC15] = mmDCORE2_TPC3_QM_DCCM_BASE,
1069 	[CPU_ID_TPC_QMAN_ARC16] = mmDCORE2_TPC4_QM_DCCM_BASE,
1070 	[CPU_ID_TPC_QMAN_ARC17] = mmDCORE2_TPC5_QM_DCCM_BASE,
1071 	[CPU_ID_TPC_QMAN_ARC18] = mmDCORE3_TPC0_QM_DCCM_BASE,
1072 	[CPU_ID_TPC_QMAN_ARC19] = mmDCORE3_TPC1_QM_DCCM_BASE,
1073 	[CPU_ID_TPC_QMAN_ARC20] = mmDCORE3_TPC2_QM_DCCM_BASE,
1074 	[CPU_ID_TPC_QMAN_ARC21] = mmDCORE3_TPC3_QM_DCCM_BASE,
1075 	[CPU_ID_TPC_QMAN_ARC22] = mmDCORE3_TPC4_QM_DCCM_BASE,
1076 	[CPU_ID_TPC_QMAN_ARC23] = mmDCORE3_TPC5_QM_DCCM_BASE,
1077 	[CPU_ID_TPC_QMAN_ARC24] = mmDCORE0_TPC6_QM_DCCM_BASE,
1078 	[CPU_ID_MME_QMAN_ARC0] = mmDCORE0_MME_QM_ARC_DCCM_BASE,
1079 	[CPU_ID_MME_QMAN_ARC1] = mmDCORE2_MME_QM_ARC_DCCM_BASE,
1080 	[CPU_ID_EDMA_QMAN_ARC0] = mmDCORE0_EDMA0_QM_DCCM_BASE,
1081 	[CPU_ID_EDMA_QMAN_ARC1] = mmDCORE0_EDMA1_QM_DCCM_BASE,
1082 	[CPU_ID_EDMA_QMAN_ARC2] = mmDCORE1_EDMA0_QM_DCCM_BASE,
1083 	[CPU_ID_EDMA_QMAN_ARC3] = mmDCORE1_EDMA1_QM_DCCM_BASE,
1084 	[CPU_ID_EDMA_QMAN_ARC4] = mmDCORE2_EDMA0_QM_DCCM_BASE,
1085 	[CPU_ID_EDMA_QMAN_ARC5] = mmDCORE2_EDMA1_QM_DCCM_BASE,
1086 	[CPU_ID_EDMA_QMAN_ARC6] = mmDCORE3_EDMA0_QM_DCCM_BASE,
1087 	[CPU_ID_EDMA_QMAN_ARC7] = mmDCORE3_EDMA1_QM_DCCM_BASE,
1088 	[CPU_ID_PDMA_QMAN_ARC0] = mmPDMA0_QM_ARC_DCCM_BASE,
1089 	[CPU_ID_PDMA_QMAN_ARC1] = mmPDMA1_QM_ARC_DCCM_BASE,
1090 	[CPU_ID_ROT_QMAN_ARC0] = mmROT0_QM_ARC_DCCM_BASE,
1091 	[CPU_ID_ROT_QMAN_ARC1] = mmROT1_QM_ARC_DCCM_BASE,
1092 	[CPU_ID_NIC_QMAN_ARC0] = mmNIC0_QM_DCCM0_BASE,
1093 	[CPU_ID_NIC_QMAN_ARC1] = mmNIC0_QM_DCCM1_BASE,
1094 	[CPU_ID_NIC_QMAN_ARC2] = mmNIC1_QM_DCCM0_BASE,
1095 	[CPU_ID_NIC_QMAN_ARC3] = mmNIC1_QM_DCCM1_BASE,
1096 	[CPU_ID_NIC_QMAN_ARC4] = mmNIC2_QM_DCCM0_BASE,
1097 	[CPU_ID_NIC_QMAN_ARC5] = mmNIC2_QM_DCCM1_BASE,
1098 	[CPU_ID_NIC_QMAN_ARC6] = mmNIC3_QM_DCCM0_BASE,
1099 	[CPU_ID_NIC_QMAN_ARC7] = mmNIC3_QM_DCCM1_BASE,
1100 	[CPU_ID_NIC_QMAN_ARC8] = mmNIC4_QM_DCCM0_BASE,
1101 	[CPU_ID_NIC_QMAN_ARC9] = mmNIC4_QM_DCCM1_BASE,
1102 	[CPU_ID_NIC_QMAN_ARC10] = mmNIC5_QM_DCCM0_BASE,
1103 	[CPU_ID_NIC_QMAN_ARC11] = mmNIC5_QM_DCCM1_BASE,
1104 	[CPU_ID_NIC_QMAN_ARC12] = mmNIC6_QM_DCCM0_BASE,
1105 	[CPU_ID_NIC_QMAN_ARC13] = mmNIC6_QM_DCCM1_BASE,
1106 	[CPU_ID_NIC_QMAN_ARC14] = mmNIC7_QM_DCCM0_BASE,
1107 	[CPU_ID_NIC_QMAN_ARC15] = mmNIC7_QM_DCCM1_BASE,
1108 	[CPU_ID_NIC_QMAN_ARC16] = mmNIC8_QM_DCCM0_BASE,
1109 	[CPU_ID_NIC_QMAN_ARC17] = mmNIC8_QM_DCCM1_BASE,
1110 	[CPU_ID_NIC_QMAN_ARC18] = mmNIC9_QM_DCCM0_BASE,
1111 	[CPU_ID_NIC_QMAN_ARC19] = mmNIC9_QM_DCCM1_BASE,
1112 	[CPU_ID_NIC_QMAN_ARC20] = mmNIC10_QM_DCCM0_BASE,
1113 	[CPU_ID_NIC_QMAN_ARC21] = mmNIC10_QM_DCCM1_BASE,
1114 	[CPU_ID_NIC_QMAN_ARC22] = mmNIC11_QM_DCCM0_BASE,
1115 	[CPU_ID_NIC_QMAN_ARC23] = mmNIC11_QM_DCCM1_BASE,
1116 };
1117 
1118 const u32 gaudi2_mme_ctrl_lo_blocks_bases[MME_ID_SIZE] = {
1119 	[MME_ID_DCORE0] = mmDCORE0_MME_CTRL_LO_BASE,
1120 	[MME_ID_DCORE1] = mmDCORE1_MME_CTRL_LO_BASE,
1121 	[MME_ID_DCORE2] = mmDCORE2_MME_CTRL_LO_BASE,
1122 	[MME_ID_DCORE3] = mmDCORE3_MME_CTRL_LO_BASE,
1123 };
1124 
1125 static const u32 gaudi2_queue_id_to_arc_id[GAUDI2_QUEUE_ID_SIZE] = {
1126 	[GAUDI2_QUEUE_ID_PDMA_0_0] = CPU_ID_PDMA_QMAN_ARC0,
1127 	[GAUDI2_QUEUE_ID_PDMA_0_1] = CPU_ID_PDMA_QMAN_ARC0,
1128 	[GAUDI2_QUEUE_ID_PDMA_0_2] = CPU_ID_PDMA_QMAN_ARC0,
1129 	[GAUDI2_QUEUE_ID_PDMA_0_3] = CPU_ID_PDMA_QMAN_ARC0,
1130 	[GAUDI2_QUEUE_ID_PDMA_1_0] = CPU_ID_PDMA_QMAN_ARC1,
1131 	[GAUDI2_QUEUE_ID_PDMA_1_1] = CPU_ID_PDMA_QMAN_ARC1,
1132 	[GAUDI2_QUEUE_ID_PDMA_1_2] = CPU_ID_PDMA_QMAN_ARC1,
1133 	[GAUDI2_QUEUE_ID_PDMA_1_3] = CPU_ID_PDMA_QMAN_ARC1,
1134 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC0,
1135 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC0,
1136 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC0,
1137 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC0,
1138 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC1,
1139 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC1,
1140 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC1,
1141 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC1,
1142 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = CPU_ID_MME_QMAN_ARC0,
1143 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = CPU_ID_MME_QMAN_ARC0,
1144 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = CPU_ID_MME_QMAN_ARC0,
1145 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = CPU_ID_MME_QMAN_ARC0,
1146 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = CPU_ID_TPC_QMAN_ARC0,
1147 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = CPU_ID_TPC_QMAN_ARC0,
1148 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = CPU_ID_TPC_QMAN_ARC0,
1149 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = CPU_ID_TPC_QMAN_ARC0,
1150 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = CPU_ID_TPC_QMAN_ARC1,
1151 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = CPU_ID_TPC_QMAN_ARC1,
1152 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = CPU_ID_TPC_QMAN_ARC1,
1153 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = CPU_ID_TPC_QMAN_ARC1,
1154 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = CPU_ID_TPC_QMAN_ARC2,
1155 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = CPU_ID_TPC_QMAN_ARC2,
1156 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = CPU_ID_TPC_QMAN_ARC2,
1157 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = CPU_ID_TPC_QMAN_ARC2,
1158 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = CPU_ID_TPC_QMAN_ARC3,
1159 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = CPU_ID_TPC_QMAN_ARC3,
1160 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = CPU_ID_TPC_QMAN_ARC3,
1161 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = CPU_ID_TPC_QMAN_ARC3,
1162 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = CPU_ID_TPC_QMAN_ARC4,
1163 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = CPU_ID_TPC_QMAN_ARC4,
1164 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = CPU_ID_TPC_QMAN_ARC4,
1165 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = CPU_ID_TPC_QMAN_ARC4,
1166 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = CPU_ID_TPC_QMAN_ARC5,
1167 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = CPU_ID_TPC_QMAN_ARC5,
1168 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = CPU_ID_TPC_QMAN_ARC5,
1169 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = CPU_ID_TPC_QMAN_ARC5,
1170 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = CPU_ID_TPC_QMAN_ARC24,
1171 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = CPU_ID_TPC_QMAN_ARC24,
1172 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = CPU_ID_TPC_QMAN_ARC24,
1173 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = CPU_ID_TPC_QMAN_ARC24,
1174 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC2,
1175 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC2,
1176 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC2,
1177 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC2,
1178 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC3,
1179 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC3,
1180 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC3,
1181 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC3,
1182 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = CPU_ID_SCHED_ARC4,
1183 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = CPU_ID_SCHED_ARC4,
1184 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = CPU_ID_SCHED_ARC4,
1185 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = CPU_ID_SCHED_ARC4,
1186 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = CPU_ID_TPC_QMAN_ARC6,
1187 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = CPU_ID_TPC_QMAN_ARC6,
1188 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = CPU_ID_TPC_QMAN_ARC6,
1189 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = CPU_ID_TPC_QMAN_ARC6,
1190 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = CPU_ID_TPC_QMAN_ARC7,
1191 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = CPU_ID_TPC_QMAN_ARC7,
1192 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = CPU_ID_TPC_QMAN_ARC7,
1193 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = CPU_ID_TPC_QMAN_ARC7,
1194 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = CPU_ID_TPC_QMAN_ARC8,
1195 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = CPU_ID_TPC_QMAN_ARC8,
1196 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = CPU_ID_TPC_QMAN_ARC8,
1197 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = CPU_ID_TPC_QMAN_ARC8,
1198 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = CPU_ID_TPC_QMAN_ARC9,
1199 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = CPU_ID_TPC_QMAN_ARC9,
1200 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = CPU_ID_TPC_QMAN_ARC9,
1201 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = CPU_ID_TPC_QMAN_ARC9,
1202 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = CPU_ID_TPC_QMAN_ARC10,
1203 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = CPU_ID_TPC_QMAN_ARC10,
1204 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = CPU_ID_TPC_QMAN_ARC10,
1205 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = CPU_ID_TPC_QMAN_ARC10,
1206 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = CPU_ID_TPC_QMAN_ARC11,
1207 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = CPU_ID_TPC_QMAN_ARC11,
1208 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = CPU_ID_TPC_QMAN_ARC11,
1209 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = CPU_ID_TPC_QMAN_ARC11,
1210 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC4,
1211 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC4,
1212 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC4,
1213 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC4,
1214 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC5,
1215 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC5,
1216 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC5,
1217 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC5,
1218 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = CPU_ID_MME_QMAN_ARC1,
1219 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = CPU_ID_MME_QMAN_ARC1,
1220 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = CPU_ID_MME_QMAN_ARC1,
1221 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = CPU_ID_MME_QMAN_ARC1,
1222 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = CPU_ID_TPC_QMAN_ARC12,
1223 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = CPU_ID_TPC_QMAN_ARC12,
1224 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = CPU_ID_TPC_QMAN_ARC12,
1225 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = CPU_ID_TPC_QMAN_ARC12,
1226 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = CPU_ID_TPC_QMAN_ARC13,
1227 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = CPU_ID_TPC_QMAN_ARC13,
1228 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = CPU_ID_TPC_QMAN_ARC13,
1229 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = CPU_ID_TPC_QMAN_ARC13,
1230 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = CPU_ID_TPC_QMAN_ARC14,
1231 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = CPU_ID_TPC_QMAN_ARC14,
1232 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = CPU_ID_TPC_QMAN_ARC14,
1233 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = CPU_ID_TPC_QMAN_ARC14,
1234 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = CPU_ID_TPC_QMAN_ARC15,
1235 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = CPU_ID_TPC_QMAN_ARC15,
1236 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = CPU_ID_TPC_QMAN_ARC15,
1237 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = CPU_ID_TPC_QMAN_ARC15,
1238 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = CPU_ID_TPC_QMAN_ARC16,
1239 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = CPU_ID_TPC_QMAN_ARC16,
1240 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = CPU_ID_TPC_QMAN_ARC16,
1241 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = CPU_ID_TPC_QMAN_ARC16,
1242 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = CPU_ID_TPC_QMAN_ARC17,
1243 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = CPU_ID_TPC_QMAN_ARC17,
1244 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = CPU_ID_TPC_QMAN_ARC17,
1245 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = CPU_ID_TPC_QMAN_ARC17,
1246 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC6,
1247 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC6,
1248 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC6,
1249 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC6,
1250 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC7,
1251 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC7,
1252 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC7,
1253 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC7,
1254 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = CPU_ID_SCHED_ARC5,
1255 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = CPU_ID_SCHED_ARC5,
1256 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = CPU_ID_SCHED_ARC5,
1257 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = CPU_ID_SCHED_ARC5,
1258 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = CPU_ID_TPC_QMAN_ARC18,
1259 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = CPU_ID_TPC_QMAN_ARC18,
1260 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = CPU_ID_TPC_QMAN_ARC18,
1261 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = CPU_ID_TPC_QMAN_ARC18,
1262 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = CPU_ID_TPC_QMAN_ARC19,
1263 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = CPU_ID_TPC_QMAN_ARC19,
1264 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = CPU_ID_TPC_QMAN_ARC19,
1265 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = CPU_ID_TPC_QMAN_ARC19,
1266 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = CPU_ID_TPC_QMAN_ARC20,
1267 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = CPU_ID_TPC_QMAN_ARC20,
1268 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = CPU_ID_TPC_QMAN_ARC20,
1269 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = CPU_ID_TPC_QMAN_ARC20,
1270 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = CPU_ID_TPC_QMAN_ARC21,
1271 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = CPU_ID_TPC_QMAN_ARC21,
1272 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = CPU_ID_TPC_QMAN_ARC21,
1273 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = CPU_ID_TPC_QMAN_ARC21,
1274 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = CPU_ID_TPC_QMAN_ARC22,
1275 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = CPU_ID_TPC_QMAN_ARC22,
1276 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = CPU_ID_TPC_QMAN_ARC22,
1277 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = CPU_ID_TPC_QMAN_ARC22,
1278 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = CPU_ID_TPC_QMAN_ARC23,
1279 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = CPU_ID_TPC_QMAN_ARC23,
1280 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = CPU_ID_TPC_QMAN_ARC23,
1281 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = CPU_ID_TPC_QMAN_ARC23,
1282 	[GAUDI2_QUEUE_ID_NIC_0_0] = CPU_ID_NIC_QMAN_ARC0,
1283 	[GAUDI2_QUEUE_ID_NIC_0_1] = CPU_ID_NIC_QMAN_ARC0,
1284 	[GAUDI2_QUEUE_ID_NIC_0_2] = CPU_ID_NIC_QMAN_ARC0,
1285 	[GAUDI2_QUEUE_ID_NIC_0_3] = CPU_ID_NIC_QMAN_ARC0,
1286 	[GAUDI2_QUEUE_ID_NIC_1_0] = CPU_ID_NIC_QMAN_ARC1,
1287 	[GAUDI2_QUEUE_ID_NIC_1_1] = CPU_ID_NIC_QMAN_ARC1,
1288 	[GAUDI2_QUEUE_ID_NIC_1_2] = CPU_ID_NIC_QMAN_ARC1,
1289 	[GAUDI2_QUEUE_ID_NIC_1_3] = CPU_ID_NIC_QMAN_ARC1,
1290 	[GAUDI2_QUEUE_ID_NIC_2_0] = CPU_ID_NIC_QMAN_ARC2,
1291 	[GAUDI2_QUEUE_ID_NIC_2_1] = CPU_ID_NIC_QMAN_ARC2,
1292 	[GAUDI2_QUEUE_ID_NIC_2_2] = CPU_ID_NIC_QMAN_ARC2,
1293 	[GAUDI2_QUEUE_ID_NIC_2_3] = CPU_ID_NIC_QMAN_ARC2,
1294 	[GAUDI2_QUEUE_ID_NIC_3_0] = CPU_ID_NIC_QMAN_ARC3,
1295 	[GAUDI2_QUEUE_ID_NIC_3_1] = CPU_ID_NIC_QMAN_ARC3,
1296 	[GAUDI2_QUEUE_ID_NIC_3_2] = CPU_ID_NIC_QMAN_ARC3,
1297 	[GAUDI2_QUEUE_ID_NIC_3_3] = CPU_ID_NIC_QMAN_ARC3,
1298 	[GAUDI2_QUEUE_ID_NIC_4_0] = CPU_ID_NIC_QMAN_ARC4,
1299 	[GAUDI2_QUEUE_ID_NIC_4_1] = CPU_ID_NIC_QMAN_ARC4,
1300 	[GAUDI2_QUEUE_ID_NIC_4_2] = CPU_ID_NIC_QMAN_ARC4,
1301 	[GAUDI2_QUEUE_ID_NIC_4_3] = CPU_ID_NIC_QMAN_ARC4,
1302 	[GAUDI2_QUEUE_ID_NIC_5_0] = CPU_ID_NIC_QMAN_ARC5,
1303 	[GAUDI2_QUEUE_ID_NIC_5_1] = CPU_ID_NIC_QMAN_ARC5,
1304 	[GAUDI2_QUEUE_ID_NIC_5_2] = CPU_ID_NIC_QMAN_ARC5,
1305 	[GAUDI2_QUEUE_ID_NIC_5_3] = CPU_ID_NIC_QMAN_ARC5,
1306 	[GAUDI2_QUEUE_ID_NIC_6_0] = CPU_ID_NIC_QMAN_ARC6,
1307 	[GAUDI2_QUEUE_ID_NIC_6_1] = CPU_ID_NIC_QMAN_ARC6,
1308 	[GAUDI2_QUEUE_ID_NIC_6_2] = CPU_ID_NIC_QMAN_ARC6,
1309 	[GAUDI2_QUEUE_ID_NIC_6_3] = CPU_ID_NIC_QMAN_ARC6,
1310 	[GAUDI2_QUEUE_ID_NIC_7_0] = CPU_ID_NIC_QMAN_ARC7,
1311 	[GAUDI2_QUEUE_ID_NIC_7_1] = CPU_ID_NIC_QMAN_ARC7,
1312 	[GAUDI2_QUEUE_ID_NIC_7_2] = CPU_ID_NIC_QMAN_ARC7,
1313 	[GAUDI2_QUEUE_ID_NIC_7_3] = CPU_ID_NIC_QMAN_ARC7,
1314 	[GAUDI2_QUEUE_ID_NIC_8_0] = CPU_ID_NIC_QMAN_ARC8,
1315 	[GAUDI2_QUEUE_ID_NIC_8_1] = CPU_ID_NIC_QMAN_ARC8,
1316 	[GAUDI2_QUEUE_ID_NIC_8_2] = CPU_ID_NIC_QMAN_ARC8,
1317 	[GAUDI2_QUEUE_ID_NIC_8_3] = CPU_ID_NIC_QMAN_ARC8,
1318 	[GAUDI2_QUEUE_ID_NIC_9_0] = CPU_ID_NIC_QMAN_ARC9,
1319 	[GAUDI2_QUEUE_ID_NIC_9_1] = CPU_ID_NIC_QMAN_ARC9,
1320 	[GAUDI2_QUEUE_ID_NIC_9_2] = CPU_ID_NIC_QMAN_ARC9,
1321 	[GAUDI2_QUEUE_ID_NIC_9_3] = CPU_ID_NIC_QMAN_ARC9,
1322 	[GAUDI2_QUEUE_ID_NIC_10_0] = CPU_ID_NIC_QMAN_ARC10,
1323 	[GAUDI2_QUEUE_ID_NIC_10_1] = CPU_ID_NIC_QMAN_ARC10,
1324 	[GAUDI2_QUEUE_ID_NIC_10_2] = CPU_ID_NIC_QMAN_ARC10,
1325 	[GAUDI2_QUEUE_ID_NIC_10_3] = CPU_ID_NIC_QMAN_ARC10,
1326 	[GAUDI2_QUEUE_ID_NIC_11_0] = CPU_ID_NIC_QMAN_ARC11,
1327 	[GAUDI2_QUEUE_ID_NIC_11_1] = CPU_ID_NIC_QMAN_ARC11,
1328 	[GAUDI2_QUEUE_ID_NIC_11_2] = CPU_ID_NIC_QMAN_ARC11,
1329 	[GAUDI2_QUEUE_ID_NIC_11_3] = CPU_ID_NIC_QMAN_ARC11,
1330 	[GAUDI2_QUEUE_ID_NIC_12_0] = CPU_ID_NIC_QMAN_ARC12,
1331 	[GAUDI2_QUEUE_ID_NIC_12_1] = CPU_ID_NIC_QMAN_ARC12,
1332 	[GAUDI2_QUEUE_ID_NIC_12_2] = CPU_ID_NIC_QMAN_ARC12,
1333 	[GAUDI2_QUEUE_ID_NIC_12_3] = CPU_ID_NIC_QMAN_ARC12,
1334 	[GAUDI2_QUEUE_ID_NIC_13_0] = CPU_ID_NIC_QMAN_ARC13,
1335 	[GAUDI2_QUEUE_ID_NIC_13_1] = CPU_ID_NIC_QMAN_ARC13,
1336 	[GAUDI2_QUEUE_ID_NIC_13_2] = CPU_ID_NIC_QMAN_ARC13,
1337 	[GAUDI2_QUEUE_ID_NIC_13_3] = CPU_ID_NIC_QMAN_ARC13,
1338 	[GAUDI2_QUEUE_ID_NIC_14_0] = CPU_ID_NIC_QMAN_ARC14,
1339 	[GAUDI2_QUEUE_ID_NIC_14_1] = CPU_ID_NIC_QMAN_ARC14,
1340 	[GAUDI2_QUEUE_ID_NIC_14_2] = CPU_ID_NIC_QMAN_ARC14,
1341 	[GAUDI2_QUEUE_ID_NIC_14_3] = CPU_ID_NIC_QMAN_ARC14,
1342 	[GAUDI2_QUEUE_ID_NIC_15_0] = CPU_ID_NIC_QMAN_ARC15,
1343 	[GAUDI2_QUEUE_ID_NIC_15_1] = CPU_ID_NIC_QMAN_ARC15,
1344 	[GAUDI2_QUEUE_ID_NIC_15_2] = CPU_ID_NIC_QMAN_ARC15,
1345 	[GAUDI2_QUEUE_ID_NIC_15_3] = CPU_ID_NIC_QMAN_ARC15,
1346 	[GAUDI2_QUEUE_ID_NIC_16_0] = CPU_ID_NIC_QMAN_ARC16,
1347 	[GAUDI2_QUEUE_ID_NIC_16_1] = CPU_ID_NIC_QMAN_ARC16,
1348 	[GAUDI2_QUEUE_ID_NIC_16_2] = CPU_ID_NIC_QMAN_ARC16,
1349 	[GAUDI2_QUEUE_ID_NIC_16_3] = CPU_ID_NIC_QMAN_ARC16,
1350 	[GAUDI2_QUEUE_ID_NIC_17_0] = CPU_ID_NIC_QMAN_ARC17,
1351 	[GAUDI2_QUEUE_ID_NIC_17_1] = CPU_ID_NIC_QMAN_ARC17,
1352 	[GAUDI2_QUEUE_ID_NIC_17_2] = CPU_ID_NIC_QMAN_ARC17,
1353 	[GAUDI2_QUEUE_ID_NIC_17_3] = CPU_ID_NIC_QMAN_ARC17,
1354 	[GAUDI2_QUEUE_ID_NIC_18_0] = CPU_ID_NIC_QMAN_ARC18,
1355 	[GAUDI2_QUEUE_ID_NIC_18_1] = CPU_ID_NIC_QMAN_ARC18,
1356 	[GAUDI2_QUEUE_ID_NIC_18_2] = CPU_ID_NIC_QMAN_ARC18,
1357 	[GAUDI2_QUEUE_ID_NIC_18_3] = CPU_ID_NIC_QMAN_ARC18,
1358 	[GAUDI2_QUEUE_ID_NIC_19_0] = CPU_ID_NIC_QMAN_ARC19,
1359 	[GAUDI2_QUEUE_ID_NIC_19_1] = CPU_ID_NIC_QMAN_ARC19,
1360 	[GAUDI2_QUEUE_ID_NIC_19_2] = CPU_ID_NIC_QMAN_ARC19,
1361 	[GAUDI2_QUEUE_ID_NIC_19_3] = CPU_ID_NIC_QMAN_ARC19,
1362 	[GAUDI2_QUEUE_ID_NIC_20_0] = CPU_ID_NIC_QMAN_ARC20,
1363 	[GAUDI2_QUEUE_ID_NIC_20_1] = CPU_ID_NIC_QMAN_ARC20,
1364 	[GAUDI2_QUEUE_ID_NIC_20_2] = CPU_ID_NIC_QMAN_ARC20,
1365 	[GAUDI2_QUEUE_ID_NIC_20_3] = CPU_ID_NIC_QMAN_ARC20,
1366 	[GAUDI2_QUEUE_ID_NIC_21_0] = CPU_ID_NIC_QMAN_ARC21,
1367 	[GAUDI2_QUEUE_ID_NIC_21_1] = CPU_ID_NIC_QMAN_ARC21,
1368 	[GAUDI2_QUEUE_ID_NIC_21_2] = CPU_ID_NIC_QMAN_ARC21,
1369 	[GAUDI2_QUEUE_ID_NIC_21_3] = CPU_ID_NIC_QMAN_ARC21,
1370 	[GAUDI2_QUEUE_ID_NIC_22_0] = CPU_ID_NIC_QMAN_ARC22,
1371 	[GAUDI2_QUEUE_ID_NIC_22_1] = CPU_ID_NIC_QMAN_ARC22,
1372 	[GAUDI2_QUEUE_ID_NIC_22_2] = CPU_ID_NIC_QMAN_ARC22,
1373 	[GAUDI2_QUEUE_ID_NIC_22_3] = CPU_ID_NIC_QMAN_ARC22,
1374 	[GAUDI2_QUEUE_ID_NIC_23_0] = CPU_ID_NIC_QMAN_ARC23,
1375 	[GAUDI2_QUEUE_ID_NIC_23_1] = CPU_ID_NIC_QMAN_ARC23,
1376 	[GAUDI2_QUEUE_ID_NIC_23_2] = CPU_ID_NIC_QMAN_ARC23,
1377 	[GAUDI2_QUEUE_ID_NIC_23_3] = CPU_ID_NIC_QMAN_ARC23,
1378 	[GAUDI2_QUEUE_ID_ROT_0_0] = CPU_ID_ROT_QMAN_ARC0,
1379 	[GAUDI2_QUEUE_ID_ROT_0_1] = CPU_ID_ROT_QMAN_ARC0,
1380 	[GAUDI2_QUEUE_ID_ROT_0_2] = CPU_ID_ROT_QMAN_ARC0,
1381 	[GAUDI2_QUEUE_ID_ROT_0_3] = CPU_ID_ROT_QMAN_ARC0,
1382 	[GAUDI2_QUEUE_ID_ROT_1_0] = CPU_ID_ROT_QMAN_ARC1,
1383 	[GAUDI2_QUEUE_ID_ROT_1_1] = CPU_ID_ROT_QMAN_ARC1,
1384 	[GAUDI2_QUEUE_ID_ROT_1_2] = CPU_ID_ROT_QMAN_ARC1,
1385 	[GAUDI2_QUEUE_ID_ROT_1_3] = CPU_ID_ROT_QMAN_ARC1
1386 };
1387 
1388 const u32 gaudi2_dma_core_blocks_bases[DMA_CORE_ID_SIZE] = {
1389 	[DMA_CORE_ID_PDMA0] = mmPDMA0_CORE_BASE,
1390 	[DMA_CORE_ID_PDMA1] = mmPDMA1_CORE_BASE,
1391 	[DMA_CORE_ID_EDMA0] = mmDCORE0_EDMA0_CORE_BASE,
1392 	[DMA_CORE_ID_EDMA1] = mmDCORE0_EDMA1_CORE_BASE,
1393 	[DMA_CORE_ID_EDMA2] = mmDCORE1_EDMA0_CORE_BASE,
1394 	[DMA_CORE_ID_EDMA3] = mmDCORE1_EDMA1_CORE_BASE,
1395 	[DMA_CORE_ID_EDMA4] = mmDCORE2_EDMA0_CORE_BASE,
1396 	[DMA_CORE_ID_EDMA5] = mmDCORE2_EDMA1_CORE_BASE,
1397 	[DMA_CORE_ID_EDMA6] = mmDCORE3_EDMA0_CORE_BASE,
1398 	[DMA_CORE_ID_EDMA7] = mmDCORE3_EDMA1_CORE_BASE,
1399 	[DMA_CORE_ID_KDMA] = mmARC_FARM_KDMA_BASE
1400 };
1401 
1402 const u32 gaudi2_mme_acc_blocks_bases[MME_ID_SIZE] = {
1403 	[MME_ID_DCORE0] = mmDCORE0_MME_ACC_BASE,
1404 	[MME_ID_DCORE1] = mmDCORE1_MME_ACC_BASE,
1405 	[MME_ID_DCORE2] = mmDCORE2_MME_ACC_BASE,
1406 	[MME_ID_DCORE3] = mmDCORE3_MME_ACC_BASE
1407 };
1408 
1409 static const u32 gaudi2_tpc_cfg_blocks_bases[TPC_ID_SIZE] = {
1410 	[TPC_ID_DCORE0_TPC0] = mmDCORE0_TPC0_CFG_BASE,
1411 	[TPC_ID_DCORE0_TPC1] = mmDCORE0_TPC1_CFG_BASE,
1412 	[TPC_ID_DCORE0_TPC2] = mmDCORE0_TPC2_CFG_BASE,
1413 	[TPC_ID_DCORE0_TPC3] = mmDCORE0_TPC3_CFG_BASE,
1414 	[TPC_ID_DCORE0_TPC4] = mmDCORE0_TPC4_CFG_BASE,
1415 	[TPC_ID_DCORE0_TPC5] = mmDCORE0_TPC5_CFG_BASE,
1416 	[TPC_ID_DCORE1_TPC0] = mmDCORE1_TPC0_CFG_BASE,
1417 	[TPC_ID_DCORE1_TPC1] = mmDCORE1_TPC1_CFG_BASE,
1418 	[TPC_ID_DCORE1_TPC2] = mmDCORE1_TPC2_CFG_BASE,
1419 	[TPC_ID_DCORE1_TPC3] = mmDCORE1_TPC3_CFG_BASE,
1420 	[TPC_ID_DCORE1_TPC4] = mmDCORE1_TPC4_CFG_BASE,
1421 	[TPC_ID_DCORE1_TPC5] = mmDCORE1_TPC5_CFG_BASE,
1422 	[TPC_ID_DCORE2_TPC0] = mmDCORE2_TPC0_CFG_BASE,
1423 	[TPC_ID_DCORE2_TPC1] = mmDCORE2_TPC1_CFG_BASE,
1424 	[TPC_ID_DCORE2_TPC2] = mmDCORE2_TPC2_CFG_BASE,
1425 	[TPC_ID_DCORE2_TPC3] = mmDCORE2_TPC3_CFG_BASE,
1426 	[TPC_ID_DCORE2_TPC4] = mmDCORE2_TPC4_CFG_BASE,
1427 	[TPC_ID_DCORE2_TPC5] = mmDCORE2_TPC5_CFG_BASE,
1428 	[TPC_ID_DCORE3_TPC0] = mmDCORE3_TPC0_CFG_BASE,
1429 	[TPC_ID_DCORE3_TPC1] = mmDCORE3_TPC1_CFG_BASE,
1430 	[TPC_ID_DCORE3_TPC2] = mmDCORE3_TPC2_CFG_BASE,
1431 	[TPC_ID_DCORE3_TPC3] = mmDCORE3_TPC3_CFG_BASE,
1432 	[TPC_ID_DCORE3_TPC4] = mmDCORE3_TPC4_CFG_BASE,
1433 	[TPC_ID_DCORE3_TPC5] = mmDCORE3_TPC5_CFG_BASE,
1434 	[TPC_ID_DCORE0_TPC6] = mmDCORE0_TPC6_CFG_BASE,
1435 };
1436 
1437 const u32 gaudi2_rot_blocks_bases[ROTATOR_ID_SIZE] = {
1438 	[ROTATOR_ID_0] = mmROT0_BASE,
1439 	[ROTATOR_ID_1] = mmROT1_BASE
1440 };
1441 
1442 static const u32 gaudi2_tpc_id_to_queue_id[TPC_ID_SIZE] = {
1443 	[TPC_ID_DCORE0_TPC0] = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0,
1444 	[TPC_ID_DCORE0_TPC1] = GAUDI2_QUEUE_ID_DCORE0_TPC_1_0,
1445 	[TPC_ID_DCORE0_TPC2] = GAUDI2_QUEUE_ID_DCORE0_TPC_2_0,
1446 	[TPC_ID_DCORE0_TPC3] = GAUDI2_QUEUE_ID_DCORE0_TPC_3_0,
1447 	[TPC_ID_DCORE0_TPC4] = GAUDI2_QUEUE_ID_DCORE0_TPC_4_0,
1448 	[TPC_ID_DCORE0_TPC5] = GAUDI2_QUEUE_ID_DCORE0_TPC_5_0,
1449 	[TPC_ID_DCORE1_TPC0] = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0,
1450 	[TPC_ID_DCORE1_TPC1] = GAUDI2_QUEUE_ID_DCORE1_TPC_1_0,
1451 	[TPC_ID_DCORE1_TPC2] = GAUDI2_QUEUE_ID_DCORE1_TPC_2_0,
1452 	[TPC_ID_DCORE1_TPC3] = GAUDI2_QUEUE_ID_DCORE1_TPC_3_0,
1453 	[TPC_ID_DCORE1_TPC4] = GAUDI2_QUEUE_ID_DCORE1_TPC_4_0,
1454 	[TPC_ID_DCORE1_TPC5] = GAUDI2_QUEUE_ID_DCORE1_TPC_5_0,
1455 	[TPC_ID_DCORE2_TPC0] = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0,
1456 	[TPC_ID_DCORE2_TPC1] = GAUDI2_QUEUE_ID_DCORE2_TPC_1_0,
1457 	[TPC_ID_DCORE2_TPC2] = GAUDI2_QUEUE_ID_DCORE2_TPC_2_0,
1458 	[TPC_ID_DCORE2_TPC3] = GAUDI2_QUEUE_ID_DCORE2_TPC_3_0,
1459 	[TPC_ID_DCORE2_TPC4] = GAUDI2_QUEUE_ID_DCORE2_TPC_4_0,
1460 	[TPC_ID_DCORE2_TPC5] = GAUDI2_QUEUE_ID_DCORE2_TPC_5_0,
1461 	[TPC_ID_DCORE3_TPC0] = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0,
1462 	[TPC_ID_DCORE3_TPC1] = GAUDI2_QUEUE_ID_DCORE3_TPC_1_0,
1463 	[TPC_ID_DCORE3_TPC2] = GAUDI2_QUEUE_ID_DCORE3_TPC_2_0,
1464 	[TPC_ID_DCORE3_TPC3] = GAUDI2_QUEUE_ID_DCORE3_TPC_3_0,
1465 	[TPC_ID_DCORE3_TPC4] = GAUDI2_QUEUE_ID_DCORE3_TPC_4_0,
1466 	[TPC_ID_DCORE3_TPC5] = GAUDI2_QUEUE_ID_DCORE3_TPC_5_0,
1467 	[TPC_ID_DCORE0_TPC6] = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0,
1468 };
1469 
1470 static const u32 gaudi2_rot_id_to_queue_id[ROTATOR_ID_SIZE] = {
1471 	[ROTATOR_ID_0] = GAUDI2_QUEUE_ID_ROT_0_0,
1472 	[ROTATOR_ID_1] = GAUDI2_QUEUE_ID_ROT_1_0,
1473 };
1474 
1475 const u32 edma_stream_base[NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES] = {
1476 	GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0,
1477 	GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0,
1478 	GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0,
1479 	GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0,
1480 	GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0,
1481 	GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0,
1482 	GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0,
1483 	GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0,
1484 };
1485 
1486 static const char gaudi2_vdec_irq_name[GAUDI2_VDEC_MSIX_ENTRIES][GAUDI2_MAX_STRING_LEN] = {
1487 	"gaudi2 vdec 0_0", "gaudi2 vdec 0_0 abnormal",
1488 	"gaudi2 vdec 0_1", "gaudi2 vdec 0_1 abnormal",
1489 	"gaudi2 vdec 1_0", "gaudi2 vdec 1_0 abnormal",
1490 	"gaudi2 vdec 1_1", "gaudi2 vdec 1_1 abnormal",
1491 	"gaudi2 vdec 2_0", "gaudi2 vdec 2_0 abnormal",
1492 	"gaudi2 vdec 2_1", "gaudi2 vdec 2_1 abnormal",
1493 	"gaudi2 vdec 3_0", "gaudi2 vdec 3_0 abnormal",
1494 	"gaudi2 vdec 3_1", "gaudi2 vdec 3_1 abnormal",
1495 	"gaudi2 vdec s_0", "gaudi2 vdec s_0 abnormal",
1496 	"gaudi2 vdec s_1", "gaudi2 vdec s_1 abnormal"
1497 };
1498 
1499 static const u32 rtr_coordinates_to_rtr_id[NUM_OF_RTR_PER_DCORE * NUM_OF_DCORES] = {
1500 	RTR_ID_X_Y(2, 4),
1501 	RTR_ID_X_Y(3, 4),
1502 	RTR_ID_X_Y(4, 4),
1503 	RTR_ID_X_Y(5, 4),
1504 	RTR_ID_X_Y(6, 4),
1505 	RTR_ID_X_Y(7, 4),
1506 	RTR_ID_X_Y(8, 4),
1507 	RTR_ID_X_Y(9, 4),
1508 	RTR_ID_X_Y(10, 4),
1509 	RTR_ID_X_Y(11, 4),
1510 	RTR_ID_X_Y(12, 4),
1511 	RTR_ID_X_Y(13, 4),
1512 	RTR_ID_X_Y(14, 4),
1513 	RTR_ID_X_Y(15, 4),
1514 	RTR_ID_X_Y(16, 4),
1515 	RTR_ID_X_Y(17, 4),
1516 	RTR_ID_X_Y(2, 11),
1517 	RTR_ID_X_Y(3, 11),
1518 	RTR_ID_X_Y(4, 11),
1519 	RTR_ID_X_Y(5, 11),
1520 	RTR_ID_X_Y(6, 11),
1521 	RTR_ID_X_Y(7, 11),
1522 	RTR_ID_X_Y(8, 11),
1523 	RTR_ID_X_Y(9, 11),
1524 	RTR_ID_X_Y(0, 0),/* 24 no id */
1525 	RTR_ID_X_Y(0, 0),/* 25 no id */
1526 	RTR_ID_X_Y(0, 0),/* 26 no id */
1527 	RTR_ID_X_Y(0, 0),/* 27 no id */
1528 	RTR_ID_X_Y(14, 11),
1529 	RTR_ID_X_Y(15, 11),
1530 	RTR_ID_X_Y(16, 11),
1531 	RTR_ID_X_Y(17, 11)
1532 };
1533 
1534 enum rtr_id {
1535 	DCORE0_RTR0,
1536 	DCORE0_RTR1,
1537 	DCORE0_RTR2,
1538 	DCORE0_RTR3,
1539 	DCORE0_RTR4,
1540 	DCORE0_RTR5,
1541 	DCORE0_RTR6,
1542 	DCORE0_RTR7,
1543 	DCORE1_RTR0,
1544 	DCORE1_RTR1,
1545 	DCORE1_RTR2,
1546 	DCORE1_RTR3,
1547 	DCORE1_RTR4,
1548 	DCORE1_RTR5,
1549 	DCORE1_RTR6,
1550 	DCORE1_RTR7,
1551 	DCORE2_RTR0,
1552 	DCORE2_RTR1,
1553 	DCORE2_RTR2,
1554 	DCORE2_RTR3,
1555 	DCORE2_RTR4,
1556 	DCORE2_RTR5,
1557 	DCORE2_RTR6,
1558 	DCORE2_RTR7,
1559 	DCORE3_RTR0,
1560 	DCORE3_RTR1,
1561 	DCORE3_RTR2,
1562 	DCORE3_RTR3,
1563 	DCORE3_RTR4,
1564 	DCORE3_RTR5,
1565 	DCORE3_RTR6,
1566 	DCORE3_RTR7,
1567 };
1568 
1569 static const u32 gaudi2_tpc_initiator_rtr_id[NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1] = {
1570 	DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR2, DCORE0_RTR2, DCORE0_RTR3, DCORE0_RTR3,
1571 	DCORE1_RTR6, DCORE1_RTR6, DCORE1_RTR5, DCORE1_RTR5, DCORE1_RTR4, DCORE1_RTR4,
1572 	DCORE2_RTR3, DCORE2_RTR3, DCORE2_RTR2, DCORE2_RTR2, DCORE2_RTR1, DCORE2_RTR1,
1573 	DCORE3_RTR4, DCORE3_RTR4, DCORE3_RTR5, DCORE3_RTR5, DCORE3_RTR6, DCORE3_RTR6,
1574 	DCORE0_RTR0
1575 };
1576 
1577 static const u32 gaudi2_dec_initiator_rtr_id[NUMBER_OF_DEC] = {
1578 	DCORE0_RTR0, DCORE0_RTR0, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0, DCORE2_RTR0,
1579 	DCORE3_RTR7, DCORE3_RTR7, DCORE0_RTR0, DCORE0_RTR0
1580 };
1581 
1582 static const u32 gaudi2_nic_initiator_rtr_id[NIC_NUMBER_OF_MACROS] = {
1583 	DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0,
1584 	DCORE2_RTR0, DCORE2_RTR0, DCORE2_RTR0, DCORE3_RTR7, DCORE3_RTR7, DCORE3_RTR7
1585 };
1586 
1587 struct sft_info {
1588 	u8 interface_id;
1589 	u8 dcore_id;
1590 };
1591 
1592 static const struct sft_info gaudi2_edma_initiator_sft_id[NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES] = {
1593 	{0, 0},	{1, 0}, {0, 1}, {1, 1}, {1, 2}, {1, 3},	{0, 2},	{0, 3},
1594 };
1595 
1596 static const u32 gaudi2_pdma_initiator_rtr_id[NUM_OF_PDMA] = {
1597 	DCORE0_RTR0, DCORE0_RTR0
1598 };
1599 
1600 static const u32 gaudi2_rot_initiator_rtr_id[NUM_OF_ROT] = {
1601 	DCORE2_RTR0, DCORE3_RTR7
1602 };
1603 
1604 struct mme_initiators_rtr_id {
1605 	u32 wap0;
1606 	u32 wap1;
1607 	u32 write;
1608 	u32 read;
1609 	u32 sbte0;
1610 	u32 sbte1;
1611 	u32 sbte2;
1612 	u32 sbte3;
1613 	u32 sbte4;
1614 };
1615 
1616 enum mme_initiators {
1617 	MME_WAP0 = 0,
1618 	MME_WAP1,
1619 	MME_WRITE,
1620 	MME_READ,
1621 	MME_SBTE0,
1622 	MME_SBTE1,
1623 	MME_SBTE2,
1624 	MME_SBTE3,
1625 	MME_SBTE4,
1626 	MME_INITIATORS_MAX
1627 };
1628 
1629 static const struct mme_initiators_rtr_id
1630 gaudi2_mme_initiator_rtr_id[NUM_OF_MME_PER_DCORE * NUM_OF_DCORES] = {
1631 	{ .wap0 = 5, .wap1 = 7, .write = 6, .read = 7,
1632 	.sbte0 = 7, .sbte1 = 4, .sbte2 = 4, .sbte3 = 5, .sbte4 = 6},
1633 	{ .wap0 = 10, .wap1 = 8, .write = 9, .read = 8,
1634 	.sbte0 = 11, .sbte1 = 11, .sbte2 = 10, .sbte3 = 9, .sbte4 = 8},
1635 	{ .wap0 = 21, .wap1 = 23, .write = 22, .read = 23,
1636 	.sbte0 = 20, .sbte1 = 20, .sbte2 = 21, .sbte3 = 22, .sbte4 = 23},
1637 	{ .wap0 = 30, .wap1 = 28, .write = 29, .read = 30,
1638 	.sbte0 = 31, .sbte1 = 31, .sbte2 = 30, .sbte3 = 29, .sbte4 = 28},
1639 };
1640 
1641 enum razwi_event_sources {
1642 	RAZWI_TPC,
1643 	RAZWI_MME,
1644 	RAZWI_EDMA,
1645 	RAZWI_PDMA,
1646 	RAZWI_NIC,
1647 	RAZWI_DEC,
1648 	RAZWI_ROT
1649 };
1650 
1651 struct hbm_mc_error_causes {
1652 	u32 mask;
1653 	char cause[50];
1654 };
1655 
1656 static struct hbm_mc_error_causes hbm_mc_spi[GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE] = {
1657 	{HBM_MC_SPI_TEMP_PIN_CHG_MASK, "temperature pins changed"},
1658 	{HBM_MC_SPI_THR_ENG_MASK, "temperature-based throttling engaged"},
1659 	{HBM_MC_SPI_THR_DIS_ENG_MASK, "temperature-based throttling disengaged"},
1660 	{HBM_MC_SPI_IEEE1500_COMP_MASK, "IEEE1500 op comp"},
1661 	{HBM_MC_SPI_IEEE1500_PAUSED_MASK, "IEEE1500 op paused"},
1662 };
1663 
1664 static const char * const hbm_mc_sei_cause[GAUDI2_NUM_OF_HBM_SEI_CAUSE] = {
1665 	[HBM_SEI_CMD_PARITY_EVEN] = "SEI C/A parity even",
1666 	[HBM_SEI_CMD_PARITY_ODD] = "SEI C/A parity odd",
1667 	[HBM_SEI_READ_ERR] = "SEI read data error",
1668 	[HBM_SEI_WRITE_DATA_PARITY_ERR] = "SEI write data parity error",
1669 	[HBM_SEI_CATTRIP] = "SEI CATTRIP asserted",
1670 	[HBM_SEI_MEM_BIST_FAIL] = "SEI memory BIST fail",
1671 	[HBM_SEI_DFI] = "SEI DFI error",
1672 	[HBM_SEI_INV_TEMP_READ_OUT] = "SEI invalid temp read",
1673 	[HBM_SEI_BIST_FAIL] = "SEI BIST fail"
1674 };
1675 
1676 struct mmu_spi_sei_cause {
1677 	char cause[50];
1678 	int clear_bit;
1679 };
1680 
1681 static const struct mmu_spi_sei_cause gaudi2_mmu_spi_sei[GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE] = {
1682 	{"page fault", 1},		/* INTERRUPT_CLR[1] */
1683 	{"page access", 1},		/* INTERRUPT_CLR[1] */
1684 	{"bypass ddr", 2},		/* INTERRUPT_CLR[2] */
1685 	{"multi hit", 2},		/* INTERRUPT_CLR[2] */
1686 	{"mmu rei0", -1},		/* no clear register bit */
1687 	{"mmu rei1", -1},		/* no clear register bit */
1688 	{"stlb rei0", -1},		/* no clear register bit */
1689 	{"stlb rei1", -1},		/* no clear register bit */
1690 	{"rr privileged write hit", 2},	/* INTERRUPT_CLR[2] */
1691 	{"rr privileged read hit", 2},	/* INTERRUPT_CLR[2] */
1692 	{"rr secure write hit", 2},	/* INTERRUPT_CLR[2] */
1693 	{"rr secure read hit", 2},	/* INTERRUPT_CLR[2] */
1694 	{"bist_fail no use", 2},	/* INTERRUPT_CLR[2] */
1695 	{"bist_fail no use", 2},	/* INTERRUPT_CLR[2] */
1696 	{"bist_fail no use", 2},	/* INTERRUPT_CLR[2] */
1697 	{"bist_fail no use", 2},	/* INTERRUPT_CLR[2] */
1698 	{"slave error", 16},		/* INTERRUPT_CLR[16] */
1699 	{"dec error", 17},		/* INTERRUPT_CLR[17] */
1700 	{"burst fifo full", 2}		/* INTERRUPT_CLR[2] */
1701 };
1702 
1703 struct gaudi2_cache_invld_params {
1704 	u64 start_va;
1705 	u64 end_va;
1706 	u32 inv_start_val;
1707 	u32 flags;
1708 	bool range_invalidation;
1709 };
1710 
1711 struct gaudi2_tpc_idle_data {
1712 	struct engines_data *e;
1713 	unsigned long *mask;
1714 	bool *is_idle;
1715 	const char *tpc_fmt;
1716 };
1717 
1718 struct gaudi2_tpc_mmu_data {
1719 	u32 rw_asid;
1720 };
1721 
1722 static s64 gaudi2_state_dump_specs_props[SP_MAX] = {0};
1723 
1724 static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val);
1725 static bool gaudi2_is_queue_enabled(struct hl_device *hdev, u32 hw_queue_id);
1726 static bool gaudi2_is_arc_enabled(struct hl_device *hdev, u64 arc_id);
1727 static void gaudi2_clr_arc_id_cap(struct hl_device *hdev, u64 arc_id);
1728 static void gaudi2_set_arc_id_cap(struct hl_device *hdev, u64 arc_id);
1729 static void gaudi2_memset_device_lbw(struct hl_device *hdev, u32 addr, u32 size, u32 val);
1730 static int gaudi2_send_job_to_kdma(struct hl_device *hdev, u64 src_addr, u64 dst_addr, u32 size,
1731 										bool is_memset);
1732 static u64 gaudi2_mmu_scramble_addr(struct hl_device *hdev, u64 raw_addr);
1733 
gaudi2_init_scrambler_hbm(struct hl_device * hdev)1734 static void gaudi2_init_scrambler_hbm(struct hl_device *hdev)
1735 {
1736 
1737 }
1738 
gaudi2_get_signal_cb_size(struct hl_device * hdev)1739 static u32 gaudi2_get_signal_cb_size(struct hl_device *hdev)
1740 {
1741 	return sizeof(struct packet_msg_short);
1742 }
1743 
gaudi2_get_wait_cb_size(struct hl_device * hdev)1744 static u32 gaudi2_get_wait_cb_size(struct hl_device *hdev)
1745 {
1746 	return sizeof(struct packet_msg_short) * 4 + sizeof(struct packet_fence);
1747 }
1748 
gaudi2_iterate_tpcs(struct hl_device * hdev,struct iterate_module_ctx * ctx)1749 void gaudi2_iterate_tpcs(struct hl_device *hdev, struct iterate_module_ctx *ctx)
1750 {
1751 	struct asic_fixed_properties *prop = &hdev->asic_prop;
1752 	int dcore, inst, tpc_seq;
1753 	u32 offset;
1754 
1755 	/* init the return code */
1756 	ctx->rc = 0;
1757 
1758 	for (dcore = 0; dcore < NUM_OF_DCORES; dcore++) {
1759 		for (inst = 0; inst < NUM_OF_TPC_PER_DCORE; inst++) {
1760 			tpc_seq = dcore * NUM_OF_TPC_PER_DCORE + inst;
1761 
1762 			if (!(prop->tpc_enabled_mask & BIT(tpc_seq)))
1763 				continue;
1764 
1765 			offset = (DCORE_OFFSET * dcore) + (DCORE_TPC_OFFSET * inst);
1766 
1767 			ctx->fn(hdev, dcore, inst, offset, ctx);
1768 			if (ctx->rc) {
1769 				dev_err(hdev->dev, "TPC iterator failed for DCORE%d TPC%d\n",
1770 							dcore, inst);
1771 				return;
1772 			}
1773 		}
1774 	}
1775 
1776 	if (!(prop->tpc_enabled_mask & BIT(TPC_ID_DCORE0_TPC6)))
1777 		return;
1778 
1779 	/* special check for PCI TPC (DCORE0_TPC6) */
1780 	offset = DCORE_TPC_OFFSET * (NUM_DCORE0_TPC - 1);
1781 	ctx->fn(hdev, 0, NUM_DCORE0_TPC - 1, offset, ctx);
1782 	if (ctx->rc)
1783 		dev_err(hdev->dev, "TPC iterator failed for DCORE0 TPC6\n");
1784 }
1785 
gaudi2_host_phys_addr_valid(u64 addr)1786 static bool gaudi2_host_phys_addr_valid(u64 addr)
1787 {
1788 	if ((addr < HOST_PHYS_BASE_0 + HOST_PHYS_SIZE_0) || (addr >= HOST_PHYS_BASE_1))
1789 		return true;
1790 
1791 	return false;
1792 }
1793 
set_number_of_functional_hbms(struct hl_device * hdev)1794 static int set_number_of_functional_hbms(struct hl_device *hdev)
1795 {
1796 	struct asic_fixed_properties *prop = &hdev->asic_prop;
1797 	u8 faulty_hbms = hweight64(hdev->dram_binning);
1798 
1799 	/* check if all HBMs should be used */
1800 	if (!faulty_hbms) {
1801 		dev_dbg(hdev->dev, "All HBM are in use (no binning)\n");
1802 		prop->num_functional_hbms = GAUDI2_HBM_NUM;
1803 		return 0;
1804 	}
1805 
1806 	/*
1807 	 * check for error condition in which number of binning
1808 	 * candidates is higher than the maximum supported by the
1809 	 * driver (in which case binning mask shall be ignored and driver will
1810 	 * set the default)
1811 	 */
1812 	if (faulty_hbms > MAX_FAULTY_HBMS) {
1813 		dev_err(hdev->dev,
1814 			"HBM binning supports max of %d faulty HBMs, supplied mask 0x%llx.\n",
1815 			MAX_FAULTY_HBMS, hdev->dram_binning);
1816 		return -EINVAL;
1817 	}
1818 
1819 	/*
1820 	 * by default, number of functional HBMs in Gaudi2 is always
1821 	 * GAUDI2_HBM_NUM - 1.
1822 	 */
1823 	prop->num_functional_hbms = GAUDI2_HBM_NUM - faulty_hbms;
1824 	return 0;
1825 }
1826 
gaudi2_set_dram_properties(struct hl_device * hdev)1827 static int gaudi2_set_dram_properties(struct hl_device *hdev)
1828 {
1829 	struct asic_fixed_properties *prop = &hdev->asic_prop;
1830 	u32 basic_hbm_page_size;
1831 	int rc;
1832 
1833 	rc = set_number_of_functional_hbms(hdev);
1834 	if (rc)
1835 		return -EINVAL;
1836 
1837 	/*
1838 	 * Due to HW bug in which TLB size is x16 smaller than expected we use a workaround
1839 	 * in which we are using x16 bigger page size to be able to populate the entire
1840 	 * HBM mappings in the TLB
1841 	 */
1842 	basic_hbm_page_size = prop->num_functional_hbms * SZ_8M;
1843 	prop->dram_page_size = GAUDI2_COMPENSATE_TLB_PAGE_SIZE_FACTOR * basic_hbm_page_size;
1844 	prop->device_mem_alloc_default_page_size = prop->dram_page_size;
1845 	prop->dram_size = prop->num_functional_hbms * SZ_16G;
1846 	prop->dram_base_address = DRAM_PHYS_BASE;
1847 	prop->dram_end_address = prop->dram_base_address + prop->dram_size;
1848 	prop->dram_supports_virtual_memory = true;
1849 
1850 	prop->dram_user_base_address = DRAM_PHYS_BASE + prop->dram_page_size;
1851 	prop->dram_hints_align_mask = ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK;
1852 	prop->hints_dram_reserved_va_range.start_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HBM_START;
1853 	prop->hints_dram_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HBM_END;
1854 
1855 	/* since DRAM page size differs from DMMU page size we need to allocate
1856 	 * DRAM memory in units of dram_page size and mapping this memory in
1857 	 * units of DMMU page size. we overcome this size mismatch using a
1858 	 * scrambling routine which takes a DRAM page and converts it to a DMMU
1859 	 * page.
1860 	 * We therefore:
1861 	 * 1. partition the virtual address space to DRAM-page (whole) pages.
1862 	 *    (suppose we get n such pages)
1863 	 * 2. limit the amount of virtual address space we got from 1 above to
1864 	 *    a multiple of 64M as we don't want the scrambled address to cross
1865 	 *    the DRAM virtual address space.
1866 	 *    ( m = (n * DRAM_page_size) / DMMU_page_size).
1867 	 * 3. determine the and address accordingly
1868 	 *    end_addr = start_addr + m * 48M
1869 	 *
1870 	 *    the DRAM address MSBs (63:48) are not part of the roundup calculation
1871 	 */
1872 	prop->dmmu.start_addr = prop->dram_base_address +
1873 			(prop->dram_page_size *
1874 				DIV_ROUND_UP_SECTOR_T(prop->dram_size, prop->dram_page_size));
1875 
1876 	prop->dmmu.end_addr = prop->dmmu.start_addr + prop->dram_page_size *
1877 			div_u64((VA_HBM_SPACE_END - prop->dmmu.start_addr), prop->dmmu.page_size);
1878 
1879 	return 0;
1880 }
1881 
gaudi2_set_fixed_properties(struct hl_device * hdev)1882 static int gaudi2_set_fixed_properties(struct hl_device *hdev)
1883 {
1884 	struct asic_fixed_properties *prop = &hdev->asic_prop;
1885 	struct hw_queue_properties *q_props;
1886 	u32 num_sync_stream_queues = 0;
1887 	int i;
1888 
1889 	prop->max_queues = GAUDI2_QUEUE_ID_SIZE;
1890 	prop->hw_queues_props = kcalloc(prop->max_queues, sizeof(struct hw_queue_properties),
1891 					GFP_KERNEL);
1892 
1893 	if (!prop->hw_queues_props)
1894 		return -ENOMEM;
1895 
1896 	q_props = prop->hw_queues_props;
1897 
1898 	for (i = 0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i++) {
1899 		q_props[i].type = QUEUE_TYPE_HW;
1900 		q_props[i].driver_only = 0;
1901 
1902 		if (i >= GAUDI2_QUEUE_ID_NIC_0_0 && i <= GAUDI2_QUEUE_ID_NIC_23_3) {
1903 			q_props[i].supports_sync_stream = 0;
1904 		} else {
1905 			q_props[i].supports_sync_stream = 1;
1906 			num_sync_stream_queues++;
1907 		}
1908 
1909 		q_props[i].cb_alloc_flags = CB_ALLOC_USER;
1910 	}
1911 
1912 	q_props[GAUDI2_QUEUE_ID_CPU_PQ].type = QUEUE_TYPE_CPU;
1913 	q_props[GAUDI2_QUEUE_ID_CPU_PQ].driver_only = 1;
1914 	q_props[GAUDI2_QUEUE_ID_CPU_PQ].cb_alloc_flags = CB_ALLOC_KERNEL;
1915 
1916 	prop->cache_line_size = DEVICE_CACHE_LINE_SIZE;
1917 	prop->cfg_base_address = CFG_BASE;
1918 	prop->device_dma_offset_for_host_access = HOST_PHYS_BASE_0;
1919 	prop->host_base_address = HOST_PHYS_BASE_0;
1920 	prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE_0;
1921 	prop->max_pending_cs = GAUDI2_MAX_PENDING_CS;
1922 	prop->completion_queues_count = GAUDI2_RESERVED_CQ_NUMBER;
1923 	prop->user_dec_intr_count = NUMBER_OF_DEC;
1924 	prop->user_interrupt_count = GAUDI2_IRQ_NUM_USER_LAST - GAUDI2_IRQ_NUM_USER_FIRST + 1;
1925 	prop->completion_mode = HL_COMPLETION_MODE_CS;
1926 	prop->sync_stream_first_sob = GAUDI2_RESERVED_SOB_NUMBER;
1927 	prop->sync_stream_first_mon = GAUDI2_RESERVED_MON_NUMBER;
1928 
1929 	prop->sram_base_address = SRAM_BASE_ADDR;
1930 	prop->sram_size = SRAM_SIZE;
1931 	prop->sram_end_address = prop->sram_base_address + prop->sram_size;
1932 	prop->sram_user_base_address = prop->sram_base_address + SRAM_USER_BASE_OFFSET;
1933 
1934 	prop->hints_range_reservation = true;
1935 
1936 	if (hdev->pldm)
1937 		prop->mmu_pgt_size = 0x800000; /* 8MB */
1938 	else
1939 		prop->mmu_pgt_size = MMU_PAGE_TABLES_INITIAL_SIZE;
1940 
1941 	prop->mmu_pte_size = HL_PTE_SIZE;
1942 	prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
1943 	prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
1944 
1945 	prop->dmmu.hop_shifts[MMU_HOP0] = DHOP0_SHIFT;
1946 	prop->dmmu.hop_shifts[MMU_HOP1] = DHOP1_SHIFT;
1947 	prop->dmmu.hop_shifts[MMU_HOP2] = DHOP2_SHIFT;
1948 	prop->dmmu.hop_shifts[MMU_HOP3] = DHOP3_SHIFT;
1949 	prop->dmmu.hop_shifts[MMU_HOP4] = DHOP4_SHIFT;
1950 	prop->dmmu.hop_masks[MMU_HOP0] = DHOP0_MASK;
1951 	prop->dmmu.hop_masks[MMU_HOP1] = DHOP1_MASK;
1952 	prop->dmmu.hop_masks[MMU_HOP2] = DHOP2_MASK;
1953 	prop->dmmu.hop_masks[MMU_HOP3] = DHOP3_MASK;
1954 	prop->dmmu.hop_masks[MMU_HOP4] = DHOP4_MASK;
1955 	prop->dmmu.page_size = PAGE_SIZE_1GB;
1956 	prop->dmmu.num_hops = MMU_ARCH_6_HOPS;
1957 	prop->dmmu.last_mask = LAST_MASK;
1958 	prop->dmmu.host_resident = 1;
1959 	/* TODO: will be duplicated until implementing per-MMU props */
1960 	prop->dmmu.hop_table_size = prop->mmu_hop_table_size;
1961 	prop->dmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
1962 
1963 	/*
1964 	 * this is done in order to be able to validate FW descriptor (i.e. validating that
1965 	 * the addresses and allocated space for FW image does not cross memory bounds).
1966 	 * for this reason we set the DRAM size to the minimum possible and later it will
1967 	 * be modified according to what reported in the cpucp info packet
1968 	 */
1969 	prop->dram_size = (GAUDI2_HBM_NUM - 1) * SZ_16G;
1970 
1971 	hdev->pmmu_huge_range = true;
1972 	prop->pmmu.host_resident = 1;
1973 	prop->pmmu.num_hops = MMU_ARCH_6_HOPS;
1974 	prop->pmmu.last_mask = LAST_MASK;
1975 	/* TODO: will be duplicated until implementing per-MMU props */
1976 	prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
1977 	prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
1978 
1979 	prop->hints_host_reserved_va_range.start_addr = RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START;
1980 	prop->hints_host_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HOST_END;
1981 	prop->hints_host_hpage_reserved_va_range.start_addr =
1982 			RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_START;
1983 	prop->hints_host_hpage_reserved_va_range.end_addr =
1984 			RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_END;
1985 
1986 	if (PAGE_SIZE == SZ_64K) {
1987 		prop->pmmu.hop_shifts[MMU_HOP0] = HOP0_SHIFT_64K;
1988 		prop->pmmu.hop_shifts[MMU_HOP1] = HOP1_SHIFT_64K;
1989 		prop->pmmu.hop_shifts[MMU_HOP2] = HOP2_SHIFT_64K;
1990 		prop->pmmu.hop_shifts[MMU_HOP3] = HOP3_SHIFT_64K;
1991 		prop->pmmu.hop_shifts[MMU_HOP4] = HOP4_SHIFT_64K;
1992 		prop->pmmu.hop_shifts[MMU_HOP5] = HOP5_SHIFT_64K;
1993 		prop->pmmu.hop_masks[MMU_HOP0] = HOP0_MASK_64K;
1994 		prop->pmmu.hop_masks[MMU_HOP1] = HOP1_MASK_64K;
1995 		prop->pmmu.hop_masks[MMU_HOP2] = HOP2_MASK_64K;
1996 		prop->pmmu.hop_masks[MMU_HOP3] = HOP3_MASK_64K;
1997 		prop->pmmu.hop_masks[MMU_HOP4] = HOP4_MASK_64K;
1998 		prop->pmmu.hop_masks[MMU_HOP5] = HOP5_MASK_64K;
1999 		prop->pmmu.start_addr = VA_HOST_SPACE_PAGE_START;
2000 		prop->pmmu.end_addr = VA_HOST_SPACE_PAGE_END;
2001 		prop->pmmu.page_size = PAGE_SIZE_64KB;
2002 
2003 		/* shifts and masks are the same in PMMU and HPMMU */
2004 		memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
2005 		prop->pmmu_huge.page_size = PAGE_SIZE_16MB;
2006 		prop->pmmu_huge.start_addr = VA_HOST_SPACE_HPAGE_START;
2007 		prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END;
2008 	} else {
2009 		prop->pmmu.hop_shifts[MMU_HOP0] = HOP0_SHIFT_4K;
2010 		prop->pmmu.hop_shifts[MMU_HOP1] = HOP1_SHIFT_4K;
2011 		prop->pmmu.hop_shifts[MMU_HOP2] = HOP2_SHIFT_4K;
2012 		prop->pmmu.hop_shifts[MMU_HOP3] = HOP3_SHIFT_4K;
2013 		prop->pmmu.hop_shifts[MMU_HOP4] = HOP4_SHIFT_4K;
2014 		prop->pmmu.hop_shifts[MMU_HOP5] = HOP5_SHIFT_4K;
2015 		prop->pmmu.hop_masks[MMU_HOP0] = HOP0_MASK_4K;
2016 		prop->pmmu.hop_masks[MMU_HOP1] = HOP1_MASK_4K;
2017 		prop->pmmu.hop_masks[MMU_HOP2] = HOP2_MASK_4K;
2018 		prop->pmmu.hop_masks[MMU_HOP3] = HOP3_MASK_4K;
2019 		prop->pmmu.hop_masks[MMU_HOP4] = HOP4_MASK_4K;
2020 		prop->pmmu.hop_masks[MMU_HOP5] = HOP5_MASK_4K;
2021 		prop->pmmu.start_addr = VA_HOST_SPACE_PAGE_START;
2022 		prop->pmmu.end_addr = VA_HOST_SPACE_PAGE_END;
2023 		prop->pmmu.page_size = PAGE_SIZE_4KB;
2024 
2025 		/* shifts and masks are the same in PMMU and HPMMU */
2026 		memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
2027 		prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
2028 		prop->pmmu_huge.start_addr = VA_HOST_SPACE_HPAGE_START;
2029 		prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END;
2030 	}
2031 
2032 	prop->num_engine_cores = CPU_ID_MAX;
2033 	prop->cfg_size = CFG_SIZE;
2034 	prop->max_asid = MAX_ASID;
2035 	prop->num_of_events = GAUDI2_EVENT_SIZE;
2036 
2037 	prop->dc_power_default = DC_POWER_DEFAULT;
2038 
2039 	prop->cb_pool_cb_cnt = GAUDI2_CB_POOL_CB_CNT;
2040 	prop->cb_pool_cb_size = GAUDI2_CB_POOL_CB_SIZE;
2041 	prop->pcie_dbi_base_address = CFG_BASE + mmPCIE_DBI_BASE;
2042 	prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
2043 
2044 	strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN);
2045 
2046 	prop->mme_master_slave_mode = 1;
2047 
2048 	prop->first_available_user_sob[0] = GAUDI2_RESERVED_SOB_NUMBER +
2049 					(num_sync_stream_queues * HL_RSVD_SOBS);
2050 
2051 	prop->first_available_user_mon[0] = GAUDI2_RESERVED_MON_NUMBER +
2052 					(num_sync_stream_queues * HL_RSVD_MONS);
2053 
2054 	prop->first_available_user_interrupt = GAUDI2_IRQ_NUM_USER_FIRST;
2055 
2056 	prop->first_available_cq[0] = GAUDI2_RESERVED_CQ_NUMBER;
2057 
2058 	prop->fw_cpu_boot_dev_sts0_valid = false;
2059 	prop->fw_cpu_boot_dev_sts1_valid = false;
2060 	prop->hard_reset_done_by_fw = false;
2061 	prop->gic_interrupts_enable = true;
2062 
2063 	prop->server_type = HL_SERVER_TYPE_UNKNOWN;
2064 
2065 	prop->max_dec = NUMBER_OF_DEC;
2066 
2067 	prop->clk_pll_index = HL_GAUDI2_MME_PLL;
2068 
2069 	prop->dma_mask = 64;
2070 
2071 	return 0;
2072 }
2073 
gaudi2_pci_bars_map(struct hl_device * hdev)2074 static int gaudi2_pci_bars_map(struct hl_device *hdev)
2075 {
2076 	static const char * const name[] = {"CFG_SRAM", "MSIX", "DRAM"};
2077 	bool is_wc[3] = {false, false, true};
2078 	int rc;
2079 
2080 	rc = hl_pci_bars_map(hdev, name, is_wc);
2081 	if (rc)
2082 		return rc;
2083 
2084 	hdev->rmmio = hdev->pcie_bar[SRAM_CFG_BAR_ID] + (CFG_BASE - STM_FLASH_BASE_ADDR);
2085 
2086 	return 0;
2087 }
2088 
gaudi2_set_hbm_bar_base(struct hl_device * hdev,u64 addr)2089 static u64 gaudi2_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
2090 {
2091 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
2092 	struct hl_inbound_pci_region pci_region;
2093 	u64 old_addr = addr;
2094 	int rc;
2095 
2096 	if ((gaudi2) && (gaudi2->dram_bar_cur_addr == addr))
2097 		return old_addr;
2098 
2099 	if (hdev->asic_prop.iatu_done_by_fw)
2100 		return U64_MAX;
2101 
2102 	/* Inbound Region 2 - Bar 4 - Point to DRAM */
2103 	pci_region.mode = PCI_BAR_MATCH_MODE;
2104 	pci_region.bar = DRAM_BAR_ID;
2105 	pci_region.addr = addr;
2106 	rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
2107 	if (rc)
2108 		return U64_MAX;
2109 
2110 	if (gaudi2) {
2111 		old_addr = gaudi2->dram_bar_cur_addr;
2112 		gaudi2->dram_bar_cur_addr = addr;
2113 	}
2114 
2115 	return old_addr;
2116 }
2117 
gaudi2_init_iatu(struct hl_device * hdev)2118 static int gaudi2_init_iatu(struct hl_device *hdev)
2119 {
2120 	struct hl_inbound_pci_region inbound_region;
2121 	struct hl_outbound_pci_region outbound_region;
2122 	u32 bar_addr_low, bar_addr_high;
2123 	int rc;
2124 
2125 	if (hdev->asic_prop.iatu_done_by_fw)
2126 		return 0;
2127 
2128 	/* Temporary inbound Region 0 - Bar 0 - Point to CFG
2129 	 * We must map this region in BAR match mode in order to
2130 	 * fetch BAR physical base address
2131 	 */
2132 	inbound_region.mode = PCI_BAR_MATCH_MODE;
2133 	inbound_region.bar = SRAM_CFG_BAR_ID;
2134 	/* Base address must be aligned to Bar size which is 256 MB */
2135 	inbound_region.addr = STM_FLASH_BASE_ADDR - STM_FLASH_ALIGNED_OFF;
2136 	rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
2137 	if (rc)
2138 		return rc;
2139 
2140 	/* Fetch physical BAR address */
2141 	bar_addr_high = RREG32(mmPCIE_DBI_BAR1_REG + STM_FLASH_ALIGNED_OFF);
2142 	bar_addr_low = RREG32(mmPCIE_DBI_BAR0_REG + STM_FLASH_ALIGNED_OFF) & ~0xF;
2143 
2144 	hdev->pcie_bar_phys[SRAM_CFG_BAR_ID] = (u64)bar_addr_high << 32 | bar_addr_low;
2145 
2146 	/* Inbound Region 0 - Bar 0 - Point to CFG */
2147 	inbound_region.mode = PCI_ADDRESS_MATCH_MODE;
2148 	inbound_region.bar = SRAM_CFG_BAR_ID;
2149 	inbound_region.offset_in_bar = 0;
2150 	inbound_region.addr = STM_FLASH_BASE_ADDR;
2151 	inbound_region.size = CFG_REGION_SIZE;
2152 	rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
2153 	if (rc)
2154 		return rc;
2155 
2156 	/* Inbound Region 1 - Bar 0 - Point to BAR0_RESERVED + SRAM */
2157 	inbound_region.mode = PCI_ADDRESS_MATCH_MODE;
2158 	inbound_region.bar = SRAM_CFG_BAR_ID;
2159 	inbound_region.offset_in_bar = CFG_REGION_SIZE;
2160 	inbound_region.addr = BAR0_RSRVD_BASE_ADDR;
2161 	inbound_region.size = BAR0_RSRVD_SIZE + SRAM_SIZE;
2162 	rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
2163 	if (rc)
2164 		return rc;
2165 
2166 	/* Inbound Region 2 - Bar 4 - Point to DRAM */
2167 	inbound_region.mode = PCI_BAR_MATCH_MODE;
2168 	inbound_region.bar = DRAM_BAR_ID;
2169 	inbound_region.addr = DRAM_PHYS_BASE;
2170 	rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
2171 	if (rc)
2172 		return rc;
2173 
2174 	/* Outbound Region 0 - Point to Host */
2175 	outbound_region.addr = HOST_PHYS_BASE_0;
2176 	outbound_region.size = HOST_PHYS_SIZE_0;
2177 	rc = hl_pci_set_outbound_region(hdev, &outbound_region);
2178 
2179 	return rc;
2180 }
2181 
gaudi2_get_hw_state(struct hl_device * hdev)2182 static enum hl_device_hw_state gaudi2_get_hw_state(struct hl_device *hdev)
2183 {
2184 	return RREG32(mmHW_STATE);
2185 }
2186 
gaudi2_tpc_binning_init_prop(struct hl_device * hdev)2187 static int gaudi2_tpc_binning_init_prop(struct hl_device *hdev)
2188 {
2189 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2190 
2191 	/*
2192 	 * check for error condition in which number of binning candidates
2193 	 * is higher than the maximum supported by the driver
2194 	 */
2195 	if (hweight64(hdev->tpc_binning) > MAX_CLUSTER_BINNING_FAULTY_TPCS) {
2196 		dev_err(hdev->dev, "TPC binning is supported for max of %d faulty TPCs, provided mask 0x%llx\n",
2197 					MAX_CLUSTER_BINNING_FAULTY_TPCS,
2198 					hdev->tpc_binning);
2199 		return -EINVAL;
2200 	}
2201 
2202 	prop->tpc_binning_mask = hdev->tpc_binning;
2203 	prop->tpc_enabled_mask = GAUDI2_TPC_FULL_MASK;
2204 
2205 	return 0;
2206 }
2207 
gaudi2_set_tpc_binning_masks(struct hl_device * hdev)2208 static int gaudi2_set_tpc_binning_masks(struct hl_device *hdev)
2209 {
2210 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2211 	struct hw_queue_properties *q_props = prop->hw_queues_props;
2212 	u64 tpc_binning_mask;
2213 	u8 subst_idx = 0;
2214 	int i, rc;
2215 
2216 	rc = gaudi2_tpc_binning_init_prop(hdev);
2217 	if (rc)
2218 		return rc;
2219 
2220 	tpc_binning_mask = prop->tpc_binning_mask;
2221 
2222 	for (i = 0 ; i < MAX_FAULTY_TPCS ; i++) {
2223 		u8 subst_seq, binned, qid_base;
2224 
2225 		if (tpc_binning_mask == 0)
2226 			break;
2227 
2228 		if (subst_idx == 0) {
2229 			subst_seq = TPC_ID_DCORE0_TPC6;
2230 			qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0;
2231 		} else {
2232 			subst_seq = TPC_ID_DCORE3_TPC5;
2233 			qid_base = GAUDI2_QUEUE_ID_DCORE3_TPC_5_0;
2234 		}
2235 
2236 
2237 		/* clear bit from mask */
2238 		binned = __ffs(tpc_binning_mask);
2239 		/*
2240 		 * Coverity complains about possible out-of-bound access in
2241 		 * clear_bit
2242 		 */
2243 		if (binned >= TPC_ID_SIZE) {
2244 			dev_err(hdev->dev,
2245 				"Invalid binned TPC (binning mask: %llx)\n",
2246 				tpc_binning_mask);
2247 			return -EINVAL;
2248 		}
2249 		clear_bit(binned, (unsigned long *)&tpc_binning_mask);
2250 
2251 		/* also clear replacing TPC bit from enabled mask */
2252 		clear_bit(subst_seq, (unsigned long *)&prop->tpc_enabled_mask);
2253 
2254 		/* bin substite TPC's Qs */
2255 		q_props[qid_base].binned = 1;
2256 		q_props[qid_base + 1].binned = 1;
2257 		q_props[qid_base + 2].binned = 1;
2258 		q_props[qid_base + 3].binned = 1;
2259 
2260 		subst_idx++;
2261 	}
2262 
2263 	return 0;
2264 }
2265 
gaudi2_set_dec_binning_masks(struct hl_device * hdev)2266 static int gaudi2_set_dec_binning_masks(struct hl_device *hdev)
2267 {
2268 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2269 	u8 num_faulty;
2270 
2271 	num_faulty = hweight32(hdev->decoder_binning);
2272 
2273 	/*
2274 	 * check for error condition in which number of binning candidates
2275 	 * is higher than the maximum supported by the driver
2276 	 */
2277 	if (num_faulty > MAX_FAULTY_DECODERS) {
2278 		dev_err(hdev->dev, "decoder binning is supported for max of single faulty decoder, provided mask 0x%x\n",
2279 						hdev->decoder_binning);
2280 		return -EINVAL;
2281 	}
2282 
2283 	prop->decoder_binning_mask = (hdev->decoder_binning & GAUDI2_DECODER_FULL_MASK);
2284 
2285 	if (prop->decoder_binning_mask)
2286 		prop->decoder_enabled_mask = (GAUDI2_DECODER_FULL_MASK & ~BIT(DEC_ID_PCIE_VDEC1));
2287 	else
2288 		prop->decoder_enabled_mask = GAUDI2_DECODER_FULL_MASK;
2289 
2290 	return 0;
2291 }
2292 
gaudi2_set_dram_binning_masks(struct hl_device * hdev)2293 static void gaudi2_set_dram_binning_masks(struct hl_device *hdev)
2294 {
2295 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2296 
2297 	/* check if we should override default binning */
2298 	if (!hdev->dram_binning) {
2299 		prop->dram_binning_mask = 0;
2300 		prop->dram_enabled_mask = GAUDI2_DRAM_FULL_MASK;
2301 		return;
2302 	}
2303 
2304 	/* set DRAM binning constraints */
2305 	prop->faulty_dram_cluster_map |= hdev->dram_binning;
2306 	prop->dram_binning_mask = hdev->dram_binning;
2307 	prop->dram_enabled_mask = GAUDI2_DRAM_FULL_MASK & ~BIT(HBM_ID5);
2308 }
2309 
gaudi2_set_edma_binning_masks(struct hl_device * hdev)2310 static int gaudi2_set_edma_binning_masks(struct hl_device *hdev)
2311 {
2312 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2313 	struct hw_queue_properties *q_props;
2314 	u8 seq, num_faulty;
2315 
2316 	num_faulty = hweight32(hdev->edma_binning);
2317 
2318 	/*
2319 	 * check for error condition in which number of binning candidates
2320 	 * is higher than the maximum supported by the driver
2321 	 */
2322 	if (num_faulty > MAX_FAULTY_EDMAS) {
2323 		dev_err(hdev->dev,
2324 			"EDMA binning is supported for max of single faulty EDMA, provided mask 0x%x\n",
2325 			hdev->edma_binning);
2326 		return -EINVAL;
2327 	}
2328 
2329 	if (!hdev->edma_binning) {
2330 		prop->edma_binning_mask = 0;
2331 		prop->edma_enabled_mask = GAUDI2_EDMA_FULL_MASK;
2332 		return 0;
2333 	}
2334 
2335 	seq = __ffs((unsigned long)hdev->edma_binning);
2336 
2337 	/* set binning constraints */
2338 	prop->faulty_dram_cluster_map |= BIT(edma_to_hbm_cluster[seq]);
2339 	prop->edma_binning_mask = hdev->edma_binning;
2340 	prop->edma_enabled_mask = GAUDI2_EDMA_FULL_MASK & ~BIT(EDMA_ID_DCORE3_INSTANCE1);
2341 
2342 	/* bin substitute EDMA's queue */
2343 	q_props = prop->hw_queues_props;
2344 	q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0].binned = 1;
2345 	q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1].binned = 1;
2346 	q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2].binned = 1;
2347 	q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3].binned = 1;
2348 
2349 	return 0;
2350 }
2351 
gaudi2_set_xbar_edge_enable_mask(struct hl_device * hdev,u32 xbar_edge_iso_mask)2352 static int gaudi2_set_xbar_edge_enable_mask(struct hl_device *hdev, u32 xbar_edge_iso_mask)
2353 {
2354 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2355 	u8 num_faulty, seq;
2356 
2357 	/* check if we should override default binning */
2358 	if (!xbar_edge_iso_mask) {
2359 		prop->xbar_edge_enabled_mask = GAUDI2_XBAR_EDGE_FULL_MASK;
2360 		return 0;
2361 	}
2362 
2363 	/*
2364 	 * note that it can be set to value other than 0 only after cpucp packet (i.e.
2365 	 * only the FW can set a redundancy value). for user it'll always be 0.
2366 	 */
2367 	num_faulty = hweight32(xbar_edge_iso_mask);
2368 
2369 	/*
2370 	 * check for error condition in which number of binning candidates
2371 	 * is higher than the maximum supported by the driver
2372 	 */
2373 	if (num_faulty > MAX_FAULTY_XBARS) {
2374 		dev_err(hdev->dev, "we cannot have more than %d faulty XBAR EDGE\n",
2375 									MAX_FAULTY_XBARS);
2376 		return -EINVAL;
2377 	}
2378 
2379 	seq = __ffs((unsigned long)xbar_edge_iso_mask);
2380 
2381 	/* set binning constraints */
2382 	prop->faulty_dram_cluster_map |= BIT(xbar_edge_to_hbm_cluster[seq]);
2383 	prop->xbar_edge_enabled_mask = (~xbar_edge_iso_mask) & GAUDI2_XBAR_EDGE_FULL_MASK;
2384 
2385 	return 0;
2386 }
2387 
gaudi2_set_cluster_binning_masks_common(struct hl_device * hdev,u8 xbar_edge_iso_mask)2388 static int gaudi2_set_cluster_binning_masks_common(struct hl_device *hdev, u8 xbar_edge_iso_mask)
2389 {
2390 	int rc;
2391 
2392 	/*
2393 	 * mark all clusters as good, each component will "fail" cluster
2394 	 * based on eFuse/user values.
2395 	 * If more than single cluster is faulty- the chip is unusable
2396 	 */
2397 	hdev->asic_prop.faulty_dram_cluster_map = 0;
2398 
2399 	gaudi2_set_dram_binning_masks(hdev);
2400 
2401 	rc = gaudi2_set_edma_binning_masks(hdev);
2402 	if (rc)
2403 		return rc;
2404 
2405 	rc = gaudi2_set_xbar_edge_enable_mask(hdev, xbar_edge_iso_mask);
2406 	if (rc)
2407 		return rc;
2408 
2409 
2410 	/* always initially set to full mask */
2411 	hdev->asic_prop.hmmu_hif_enabled_mask = GAUDI2_HIF_HMMU_FULL_MASK;
2412 
2413 	return 0;
2414 }
2415 
gaudi2_set_cluster_binning_masks(struct hl_device * hdev)2416 static int gaudi2_set_cluster_binning_masks(struct hl_device *hdev)
2417 {
2418 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2419 	int rc;
2420 
2421 	rc = gaudi2_set_cluster_binning_masks_common(hdev, prop->cpucp_info.xbar_binning_mask);
2422 	if (rc)
2423 		return rc;
2424 
2425 	/* if we have DRAM binning reported by FW we should perform cluster config  */
2426 	if (prop->faulty_dram_cluster_map) {
2427 		u8 cluster_seq = __ffs((unsigned long)prop->faulty_dram_cluster_map);
2428 
2429 		prop->hmmu_hif_enabled_mask = cluster_hmmu_hif_enabled_mask[cluster_seq];
2430 	}
2431 
2432 	return 0;
2433 }
2434 
gaudi2_cpucp_info_get(struct hl_device * hdev)2435 static int gaudi2_cpucp_info_get(struct hl_device *hdev)
2436 {
2437 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
2438 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2439 	long max_power;
2440 	u64 dram_size;
2441 	int rc;
2442 
2443 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
2444 		return 0;
2445 
2446 	/* No point of asking this information again when not doing hard reset, as the device
2447 	 * CPU hasn't been reset
2448 	 */
2449 	if (hdev->reset_info.in_compute_reset)
2450 		return 0;
2451 
2452 	rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
2453 										mmCPU_BOOT_ERR1);
2454 	if (rc)
2455 		return rc;
2456 
2457 	dram_size = le64_to_cpu(prop->cpucp_info.dram_size);
2458 	if (dram_size) {
2459 		/* we can have wither 5 or 6 HBMs. other values are invalid */
2460 
2461 		if ((dram_size != ((GAUDI2_HBM_NUM - 1) * SZ_16G)) &&
2462 					(dram_size != (GAUDI2_HBM_NUM * SZ_16G))) {
2463 			dev_err(hdev->dev,
2464 				"F/W reported invalid DRAM size %llu. Trying to use default size %llu\n",
2465 				dram_size, prop->dram_size);
2466 			dram_size = prop->dram_size;
2467 		}
2468 
2469 		prop->dram_size = dram_size;
2470 		prop->dram_end_address = prop->dram_base_address + dram_size;
2471 	}
2472 
2473 	if (!strlen(prop->cpucp_info.card_name))
2474 		strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN);
2475 
2476 	/* Overwrite binning masks with the actual binning values from F/W */
2477 	hdev->dram_binning = prop->cpucp_info.dram_binning_mask;
2478 	hdev->edma_binning = prop->cpucp_info.edma_binning_mask;
2479 	hdev->tpc_binning = le64_to_cpu(prop->cpucp_info.tpc_binning_mask);
2480 	hdev->decoder_binning = lower_32_bits(le64_to_cpu(prop->cpucp_info.decoder_binning_mask));
2481 
2482 	/*
2483 	 * at this point the DRAM parameters need to be updated according to data obtained
2484 	 * from the FW
2485 	 */
2486 	rc = gaudi2_set_dram_properties(hdev);
2487 	if (rc)
2488 		return rc;
2489 
2490 	rc = gaudi2_set_cluster_binning_masks(hdev);
2491 	if (rc)
2492 		return rc;
2493 
2494 	rc = gaudi2_set_tpc_binning_masks(hdev);
2495 	if (rc)
2496 		return rc;
2497 
2498 	rc = gaudi2_set_dec_binning_masks(hdev);
2499 	if (rc)
2500 		return rc;
2501 
2502 	max_power = hl_fw_get_max_power(hdev);
2503 	if (max_power < 0)
2504 		return max_power;
2505 
2506 	prop->max_power_default = (u64) max_power;
2507 
2508 	return 0;
2509 }
2510 
gaudi2_fetch_psoc_frequency(struct hl_device * hdev)2511 static int gaudi2_fetch_psoc_frequency(struct hl_device *hdev)
2512 {
2513 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
2514 	u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS];
2515 	int rc;
2516 
2517 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
2518 		return 0;
2519 
2520 	rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI2_CPU_PLL, pll_freq_arr);
2521 	if (rc)
2522 		return rc;
2523 
2524 	hdev->asic_prop.psoc_timestamp_frequency = pll_freq_arr[3];
2525 
2526 	return 0;
2527 }
2528 
gaudi2_early_init(struct hl_device * hdev)2529 static int gaudi2_early_init(struct hl_device *hdev)
2530 {
2531 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2532 	struct pci_dev *pdev = hdev->pdev;
2533 	resource_size_t pci_bar_size;
2534 	int rc;
2535 
2536 	rc = gaudi2_set_fixed_properties(hdev);
2537 	if (rc)
2538 		return rc;
2539 
2540 	/* Check BAR sizes */
2541 	pci_bar_size = pci_resource_len(pdev, SRAM_CFG_BAR_ID);
2542 
2543 	if (pci_bar_size != CFG_BAR_SIZE) {
2544 		dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
2545 			SRAM_CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE);
2546 		rc = -ENODEV;
2547 		goto free_queue_props;
2548 	}
2549 
2550 	pci_bar_size = pci_resource_len(pdev, MSIX_BAR_ID);
2551 	if (pci_bar_size != MSIX_BAR_SIZE) {
2552 		dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
2553 			MSIX_BAR_ID, &pci_bar_size, MSIX_BAR_SIZE);
2554 		rc = -ENODEV;
2555 		goto free_queue_props;
2556 	}
2557 
2558 	prop->dram_pci_bar_size = pci_resource_len(pdev, DRAM_BAR_ID);
2559 	hdev->dram_pci_bar_start = pci_resource_start(pdev, DRAM_BAR_ID);
2560 
2561 	/*
2562 	 * Only in pldm driver config iATU
2563 	 */
2564 	if (hdev->pldm)
2565 		hdev->asic_prop.iatu_done_by_fw = false;
2566 	else
2567 		hdev->asic_prop.iatu_done_by_fw = true;
2568 
2569 	rc = hl_pci_init(hdev);
2570 	if (rc)
2571 		goto free_queue_props;
2572 
2573 	/* Before continuing in the initialization, we need to read the preboot
2574 	 * version to determine whether we run with a security-enabled firmware
2575 	 */
2576 	rc = hl_fw_read_preboot_status(hdev);
2577 	if (rc) {
2578 		if (hdev->reset_on_preboot_fail)
2579 			hdev->asic_funcs->hw_fini(hdev, true, false);
2580 		goto pci_fini;
2581 	}
2582 
2583 	if (gaudi2_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
2584 		dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
2585 		hdev->asic_funcs->hw_fini(hdev, true, false);
2586 	}
2587 
2588 	return 0;
2589 
2590 pci_fini:
2591 	hl_pci_fini(hdev);
2592 free_queue_props:
2593 	kfree(hdev->asic_prop.hw_queues_props);
2594 	return rc;
2595 }
2596 
gaudi2_early_fini(struct hl_device * hdev)2597 static int gaudi2_early_fini(struct hl_device *hdev)
2598 {
2599 	kfree(hdev->asic_prop.hw_queues_props);
2600 	hl_pci_fini(hdev);
2601 
2602 	return 0;
2603 }
2604 
gaudi2_is_arc_nic_owned(u64 arc_id)2605 static bool gaudi2_is_arc_nic_owned(u64 arc_id)
2606 {
2607 	switch (arc_id) {
2608 	case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
2609 		return true;
2610 	default:
2611 		return false;
2612 	}
2613 }
2614 
gaudi2_is_arc_tpc_owned(u64 arc_id)2615 static bool gaudi2_is_arc_tpc_owned(u64 arc_id)
2616 {
2617 	switch (arc_id) {
2618 	case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
2619 		return true;
2620 	default:
2621 		return false;
2622 	}
2623 }
2624 
gaudi2_init_arcs(struct hl_device * hdev)2625 static void gaudi2_init_arcs(struct hl_device *hdev)
2626 {
2627 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
2628 	u64 arc_id;
2629 	u32 i;
2630 
2631 	for (i = CPU_ID_SCHED_ARC0 ; i <= CPU_ID_SCHED_ARC3 ; i++) {
2632 		if (gaudi2_is_arc_enabled(hdev, i))
2633 			continue;
2634 
2635 		gaudi2_set_arc_id_cap(hdev, i);
2636 	}
2637 
2638 	for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i += 4) {
2639 		if (!gaudi2_is_queue_enabled(hdev, i))
2640 			continue;
2641 
2642 		arc_id = gaudi2_queue_id_to_arc_id[i];
2643 		if (gaudi2_is_arc_enabled(hdev, arc_id))
2644 			continue;
2645 
2646 		if (gaudi2_is_arc_nic_owned(arc_id) &&
2647 				!(hdev->nic_ports_mask & BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0)))
2648 			continue;
2649 
2650 		if (gaudi2_is_arc_tpc_owned(arc_id) && !(gaudi2->tpc_hw_cap_initialized &
2651 							BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0)))
2652 			continue;
2653 
2654 		gaudi2_set_arc_id_cap(hdev, arc_id);
2655 	}
2656 }
2657 
gaudi2_scrub_arc_dccm(struct hl_device * hdev,u32 cpu_id)2658 static int gaudi2_scrub_arc_dccm(struct hl_device *hdev, u32 cpu_id)
2659 {
2660 	u32 reg_base, reg_val;
2661 	int rc;
2662 
2663 	switch (cpu_id) {
2664 	case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC3:
2665 		/* Each ARC scheduler has 2 consecutive DCCM blocks */
2666 		rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
2667 						ARC_DCCM_BLOCK_SIZE * 2, true);
2668 		if (rc)
2669 			return rc;
2670 		break;
2671 	case CPU_ID_SCHED_ARC4:
2672 	case CPU_ID_SCHED_ARC5:
2673 	case CPU_ID_MME_QMAN_ARC0:
2674 	case CPU_ID_MME_QMAN_ARC1:
2675 		reg_base = gaudi2_arc_blocks_bases[cpu_id];
2676 
2677 		/* Scrub lower DCCM block */
2678 		rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
2679 						ARC_DCCM_BLOCK_SIZE, true);
2680 		if (rc)
2681 			return rc;
2682 
2683 		/* Switch to upper DCCM block */
2684 		reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_MME_ARC_UPPER_DCCM_EN_VAL_MASK, 1);
2685 		WREG32(reg_base + ARC_DCCM_UPPER_EN_OFFSET, reg_val);
2686 
2687 		/* Scrub upper DCCM block */
2688 		rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
2689 						ARC_DCCM_BLOCK_SIZE, true);
2690 		if (rc)
2691 			return rc;
2692 
2693 		/* Switch to lower DCCM block */
2694 		reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_MME_ARC_UPPER_DCCM_EN_VAL_MASK, 0);
2695 		WREG32(reg_base + ARC_DCCM_UPPER_EN_OFFSET, reg_val);
2696 		break;
2697 	default:
2698 		rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
2699 						ARC_DCCM_BLOCK_SIZE, true);
2700 		if (rc)
2701 			return rc;
2702 	}
2703 
2704 	return 0;
2705 }
2706 
gaudi2_scrub_arcs_dccm(struct hl_device * hdev)2707 static void gaudi2_scrub_arcs_dccm(struct hl_device *hdev)
2708 {
2709 	u16 arc_id;
2710 
2711 	for (arc_id = CPU_ID_SCHED_ARC0 ; arc_id < CPU_ID_MAX ; arc_id++) {
2712 		if (!gaudi2_is_arc_enabled(hdev, arc_id))
2713 			continue;
2714 
2715 		gaudi2_scrub_arc_dccm(hdev, arc_id);
2716 	}
2717 }
2718 
gaudi2_late_init(struct hl_device * hdev)2719 static int gaudi2_late_init(struct hl_device *hdev)
2720 {
2721 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
2722 	int rc;
2723 
2724 	hdev->asic_prop.supports_advanced_cpucp_rc = true;
2725 
2726 	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS,
2727 					gaudi2->virt_msix_db_dma_addr);
2728 	if (rc) {
2729 		dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
2730 		return rc;
2731 	}
2732 
2733 	rc = gaudi2_fetch_psoc_frequency(hdev);
2734 	if (rc) {
2735 		dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
2736 		goto disable_pci_access;
2737 	}
2738 
2739 	gaudi2_init_arcs(hdev);
2740 	gaudi2_scrub_arcs_dccm(hdev);
2741 	gaudi2_init_security(hdev);
2742 
2743 	return 0;
2744 
2745 disable_pci_access:
2746 	hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
2747 
2748 	return rc;
2749 }
2750 
gaudi2_late_fini(struct hl_device * hdev)2751 static void gaudi2_late_fini(struct hl_device *hdev)
2752 {
2753 	hl_hwmon_release_resources(hdev);
2754 }
2755 
gaudi2_user_mapped_dec_init(struct gaudi2_device * gaudi2,u32 start_idx)2756 static void gaudi2_user_mapped_dec_init(struct gaudi2_device *gaudi2, u32 start_idx)
2757 {
2758 	struct user_mapped_block *blocks = gaudi2->mapped_blocks;
2759 
2760 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE0_DEC0_CMD_BASE, HL_BLOCK_SIZE);
2761 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE0_DEC1_CMD_BASE, HL_BLOCK_SIZE);
2762 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE1_DEC0_CMD_BASE, HL_BLOCK_SIZE);
2763 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE1_DEC1_CMD_BASE, HL_BLOCK_SIZE);
2764 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE2_DEC0_CMD_BASE, HL_BLOCK_SIZE);
2765 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE2_DEC1_CMD_BASE, HL_BLOCK_SIZE);
2766 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE3_DEC0_CMD_BASE, HL_BLOCK_SIZE);
2767 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE3_DEC1_CMD_BASE, HL_BLOCK_SIZE);
2768 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmPCIE_DEC0_CMD_BASE, HL_BLOCK_SIZE);
2769 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx], mmPCIE_DEC1_CMD_BASE, HL_BLOCK_SIZE);
2770 }
2771 
gaudi2_user_mapped_blocks_init(struct hl_device * hdev)2772 static void gaudi2_user_mapped_blocks_init(struct hl_device *hdev)
2773 {
2774 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
2775 	struct user_mapped_block *blocks = gaudi2->mapped_blocks;
2776 	u32 block_size, umr_start_idx, num_umr_blocks;
2777 	int i;
2778 
2779 	for (i = 0 ; i < NUM_ARC_CPUS ; i++) {
2780 		if (i >= CPU_ID_SCHED_ARC0 && i <= CPU_ID_SCHED_ARC3)
2781 			block_size = ARC_DCCM_BLOCK_SIZE * 2;
2782 		else
2783 			block_size = ARC_DCCM_BLOCK_SIZE;
2784 
2785 		blocks[i].address = gaudi2_arc_dccm_bases[i];
2786 		blocks[i].size = block_size;
2787 	}
2788 
2789 	blocks[NUM_ARC_CPUS].address = mmARC_FARM_ARC0_ACP_ENG_BASE;
2790 	blocks[NUM_ARC_CPUS].size = HL_BLOCK_SIZE;
2791 
2792 	blocks[NUM_ARC_CPUS + 1].address = mmARC_FARM_ARC1_ACP_ENG_BASE;
2793 	blocks[NUM_ARC_CPUS + 1].size = HL_BLOCK_SIZE;
2794 
2795 	blocks[NUM_ARC_CPUS + 2].address = mmARC_FARM_ARC2_ACP_ENG_BASE;
2796 	blocks[NUM_ARC_CPUS + 2].size = HL_BLOCK_SIZE;
2797 
2798 	blocks[NUM_ARC_CPUS + 3].address = mmARC_FARM_ARC3_ACP_ENG_BASE;
2799 	blocks[NUM_ARC_CPUS + 3].size = HL_BLOCK_SIZE;
2800 
2801 	blocks[NUM_ARC_CPUS + 4].address = mmDCORE0_MME_QM_ARC_ACP_ENG_BASE;
2802 	blocks[NUM_ARC_CPUS + 4].size = HL_BLOCK_SIZE;
2803 
2804 	blocks[NUM_ARC_CPUS + 5].address = mmDCORE1_MME_QM_ARC_ACP_ENG_BASE;
2805 	blocks[NUM_ARC_CPUS + 5].size = HL_BLOCK_SIZE;
2806 
2807 	blocks[NUM_ARC_CPUS + 6].address = mmDCORE2_MME_QM_ARC_ACP_ENG_BASE;
2808 	blocks[NUM_ARC_CPUS + 6].size = HL_BLOCK_SIZE;
2809 
2810 	blocks[NUM_ARC_CPUS + 7].address = mmDCORE3_MME_QM_ARC_ACP_ENG_BASE;
2811 	blocks[NUM_ARC_CPUS + 7].size = HL_BLOCK_SIZE;
2812 
2813 	umr_start_idx = NUM_ARC_CPUS + NUM_OF_USER_ACP_BLOCKS;
2814 	num_umr_blocks = NIC_NUMBER_OF_ENGINES * NUM_OF_USER_NIC_UMR_BLOCKS;
2815 	for (i = 0 ; i < num_umr_blocks ; i++) {
2816 		u8 nic_id, umr_block_id;
2817 
2818 		nic_id = i / NUM_OF_USER_NIC_UMR_BLOCKS;
2819 		umr_block_id = i % NUM_OF_USER_NIC_UMR_BLOCKS;
2820 
2821 		blocks[umr_start_idx + i].address =
2822 			mmNIC0_UMR0_0_UNSECURE_DOORBELL0_BASE +
2823 			(nic_id / NIC_NUMBER_OF_QM_PER_MACRO) * NIC_OFFSET +
2824 			(nic_id % NIC_NUMBER_OF_QM_PER_MACRO) * NIC_QM_OFFSET +
2825 			umr_block_id * NIC_UMR_OFFSET;
2826 		blocks[umr_start_idx + i].size = HL_BLOCK_SIZE;
2827 	}
2828 
2829 	/* Expose decoder HW configuration block to user */
2830 	gaudi2_user_mapped_dec_init(gaudi2, USR_MAPPED_BLK_DEC_START_IDX);
2831 
2832 	for (i = 1; i < NUM_OF_DCORES; ++i) {
2833 		blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1)].size = SM_OBJS_BLOCK_SIZE;
2834 		blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1) + 1].size = HL_BLOCK_SIZE;
2835 
2836 		blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1)].address =
2837 						mmDCORE0_SYNC_MNGR_OBJS_BASE + i * DCORE_OFFSET;
2838 
2839 		blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1) + 1].address =
2840 						mmDCORE0_SYNC_MNGR_GLBL_BASE + i * DCORE_OFFSET;
2841 	}
2842 }
2843 
gaudi2_alloc_cpu_accessible_dma_mem(struct hl_device * hdev)2844 static int gaudi2_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
2845 {
2846 	dma_addr_t dma_addr_arr[GAUDI2_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
2847 	void *virt_addr_arr[GAUDI2_ALLOC_CPU_MEM_RETRY_CNT] = {};
2848 	int i, j, rc = 0;
2849 
2850 	/* The device ARC works with 32-bits addresses, and because there is a single HW register
2851 	 * that holds the extension bits (49..28), these bits must be identical in all the allocated
2852 	 * range.
2853 	 */
2854 
2855 	for (i = 0 ; i < GAUDI2_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
2856 		virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
2857 							&dma_addr_arr[i], GFP_KERNEL | __GFP_ZERO);
2858 		if (!virt_addr_arr[i]) {
2859 			rc = -ENOMEM;
2860 			goto free_dma_mem_arr;
2861 		}
2862 
2863 		end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
2864 		if (GAUDI2_ARC_PCI_MSB_ADDR(dma_addr_arr[i]) == GAUDI2_ARC_PCI_MSB_ADDR(end_addr))
2865 			break;
2866 	}
2867 
2868 	if (i == GAUDI2_ALLOC_CPU_MEM_RETRY_CNT) {
2869 		dev_err(hdev->dev,
2870 			"MSB of ARC accessible DMA memory are not identical in all range\n");
2871 		rc = -EFAULT;
2872 		goto free_dma_mem_arr;
2873 	}
2874 
2875 	hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
2876 	hdev->cpu_accessible_dma_address = dma_addr_arr[i];
2877 
2878 free_dma_mem_arr:
2879 	for (j = 0 ; j < i ; j++)
2880 		hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j],
2881 						dma_addr_arr[j]);
2882 
2883 	return rc;
2884 }
2885 
gaudi2_set_pci_memory_regions(struct hl_device * hdev)2886 static void gaudi2_set_pci_memory_regions(struct hl_device *hdev)
2887 {
2888 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2889 	struct pci_mem_region *region;
2890 
2891 	/* CFG */
2892 	region = &hdev->pci_mem_region[PCI_REGION_CFG];
2893 	region->region_base = CFG_BASE;
2894 	region->region_size = CFG_SIZE;
2895 	region->offset_in_bar = CFG_BASE - STM_FLASH_BASE_ADDR;
2896 	region->bar_size = CFG_BAR_SIZE;
2897 	region->bar_id = SRAM_CFG_BAR_ID;
2898 	region->used = 1;
2899 
2900 	/* SRAM */
2901 	region = &hdev->pci_mem_region[PCI_REGION_SRAM];
2902 	region->region_base = SRAM_BASE_ADDR;
2903 	region->region_size = SRAM_SIZE;
2904 	region->offset_in_bar = CFG_REGION_SIZE + BAR0_RSRVD_SIZE;
2905 	region->bar_size = CFG_BAR_SIZE;
2906 	region->bar_id = SRAM_CFG_BAR_ID;
2907 	region->used = 1;
2908 
2909 	/* DRAM */
2910 	region = &hdev->pci_mem_region[PCI_REGION_DRAM];
2911 	region->region_base = DRAM_PHYS_BASE;
2912 	region->region_size = hdev->asic_prop.dram_size;
2913 	region->offset_in_bar = 0;
2914 	region->bar_size = prop->dram_pci_bar_size;
2915 	region->bar_id = DRAM_BAR_ID;
2916 	region->used = 1;
2917 }
2918 
gaudi2_user_interrupt_setup(struct hl_device * hdev)2919 static void gaudi2_user_interrupt_setup(struct hl_device *hdev)
2920 {
2921 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2922 	int i, j, k;
2923 
2924 	/* Initialize common user CQ interrupt */
2925 	HL_USR_INTR_STRUCT_INIT(hdev->common_user_cq_interrupt, hdev,
2926 				HL_COMMON_USER_CQ_INTERRUPT_ID, false);
2927 
2928 	/* Initialize common decoder interrupt */
2929 	HL_USR_INTR_STRUCT_INIT(hdev->common_decoder_interrupt, hdev,
2930 				HL_COMMON_DEC_INTERRUPT_ID, true);
2931 
2932 	/* User interrupts structure holds both decoder and user interrupts from various engines.
2933 	 * We first initialize the decoder interrupts and then we add the user interrupts.
2934 	 * The only limitation is that the last decoder interrupt id must be smaller
2935 	 * then GAUDI2_IRQ_NUM_USER_FIRST. This is checked at compilation time.
2936 	 */
2937 
2938 	/* Initialize decoder interrupts, expose only normal interrupts,
2939 	 * error interrupts to be handled by driver
2940 	 */
2941 	for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM, j = 0 ; i <= GAUDI2_IRQ_NUM_SHARED_DEC1_NRM;
2942 										i += 2, j++)
2943 		HL_USR_INTR_STRUCT_INIT(hdev->user_interrupt[j], hdev, i, true);
2944 
2945 	for (i = GAUDI2_IRQ_NUM_USER_FIRST, k = 0 ; k < prop->user_interrupt_count; i++, j++, k++)
2946 		HL_USR_INTR_STRUCT_INIT(hdev->user_interrupt[j], hdev, i, false);
2947 }
2948 
gaudi2_get_non_zero_random_int(void)2949 static inline int gaudi2_get_non_zero_random_int(void)
2950 {
2951 	int rand = get_random_u32();
2952 
2953 	return rand ? rand : 1;
2954 }
2955 
gaudi2_sw_init(struct hl_device * hdev)2956 static int gaudi2_sw_init(struct hl_device *hdev)
2957 {
2958 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2959 	struct gaudi2_device *gaudi2;
2960 	int i, rc;
2961 
2962 	/* Allocate device structure */
2963 	gaudi2 = kzalloc(sizeof(*gaudi2), GFP_KERNEL);
2964 	if (!gaudi2)
2965 		return -ENOMEM;
2966 
2967 	for (i = 0 ; i < ARRAY_SIZE(gaudi2_irq_map_table) ; i++) {
2968 		if (gaudi2_irq_map_table[i].msg || !gaudi2_irq_map_table[i].valid)
2969 			continue;
2970 
2971 		if (gaudi2->num_of_valid_hw_events == GAUDI2_EVENT_SIZE) {
2972 			dev_err(hdev->dev, "H/W events array exceeds the limit of %u events\n",
2973 				GAUDI2_EVENT_SIZE);
2974 			rc = -EINVAL;
2975 			goto free_gaudi2_device;
2976 		}
2977 
2978 		gaudi2->hw_events[gaudi2->num_of_valid_hw_events++] = gaudi2_irq_map_table[i].fc_id;
2979 	}
2980 
2981 	for (i = 0 ; i < MME_NUM_OF_LFSR_SEEDS ; i++)
2982 		gaudi2->lfsr_rand_seeds[i] = gaudi2_get_non_zero_random_int();
2983 
2984 	gaudi2->cpucp_info_get = gaudi2_cpucp_info_get;
2985 
2986 	hdev->asic_specific = gaudi2;
2987 
2988 	/* Create DMA pool for small allocations.
2989 	 * Use DEVICE_CACHE_LINE_SIZE for alignment since the NIC memory-mapped
2990 	 * PI/CI registers allocated from this pool have this restriction
2991 	 */
2992 	hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), &hdev->pdev->dev,
2993 					GAUDI2_DMA_POOL_BLK_SIZE, DEVICE_CACHE_LINE_SIZE, 0);
2994 	if (!hdev->dma_pool) {
2995 		dev_err(hdev->dev, "failed to create DMA pool\n");
2996 		rc = -ENOMEM;
2997 		goto free_gaudi2_device;
2998 	}
2999 
3000 	rc = gaudi2_alloc_cpu_accessible_dma_mem(hdev);
3001 	if (rc)
3002 		goto free_dma_pool;
3003 
3004 	hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
3005 	if (!hdev->cpu_accessible_dma_pool) {
3006 		dev_err(hdev->dev, "Failed to create CPU accessible DMA pool\n");
3007 		rc = -ENOMEM;
3008 		goto free_cpu_dma_mem;
3009 	}
3010 
3011 	rc = gen_pool_add(hdev->cpu_accessible_dma_pool, (uintptr_t) hdev->cpu_accessible_dma_mem,
3012 				HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
3013 	if (rc) {
3014 		dev_err(hdev->dev, "Failed to add memory to CPU accessible DMA pool\n");
3015 		rc = -EFAULT;
3016 		goto free_cpu_accessible_dma_pool;
3017 	}
3018 
3019 	gaudi2->virt_msix_db_cpu_addr = hl_cpu_accessible_dma_pool_alloc(hdev, prop->pmmu.page_size,
3020 								&gaudi2->virt_msix_db_dma_addr);
3021 	if (!gaudi2->virt_msix_db_cpu_addr) {
3022 		dev_err(hdev->dev, "Failed to allocate DMA memory for virtual MSI-X doorbell\n");
3023 		rc = -ENOMEM;
3024 		goto free_cpu_accessible_dma_pool;
3025 	}
3026 
3027 	spin_lock_init(&gaudi2->hw_queues_lock);
3028 
3029 	gaudi2->scratchpad_kernel_address = hl_asic_dma_alloc_coherent(hdev, PAGE_SIZE,
3030 							&gaudi2->scratchpad_bus_address,
3031 							GFP_KERNEL | __GFP_ZERO);
3032 	if (!gaudi2->scratchpad_kernel_address) {
3033 		rc = -ENOMEM;
3034 		goto free_virt_msix_db_mem;
3035 	}
3036 
3037 	gaudi2_user_mapped_blocks_init(hdev);
3038 
3039 	/* Initialize user interrupts */
3040 	gaudi2_user_interrupt_setup(hdev);
3041 
3042 	hdev->supports_coresight = true;
3043 	hdev->supports_sync_stream = true;
3044 	hdev->supports_cb_mapping = true;
3045 	hdev->supports_wait_for_multi_cs = false;
3046 
3047 	prop->supports_compute_reset = true;
3048 
3049 	hdev->asic_funcs->set_pci_memory_regions(hdev);
3050 
3051 	return 0;
3052 
3053 free_virt_msix_db_mem:
3054 	hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr);
3055 free_cpu_accessible_dma_pool:
3056 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
3057 free_cpu_dma_mem:
3058 	hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
3059 					hdev->cpu_accessible_dma_address);
3060 free_dma_pool:
3061 	dma_pool_destroy(hdev->dma_pool);
3062 free_gaudi2_device:
3063 	kfree(gaudi2);
3064 	return rc;
3065 }
3066 
gaudi2_sw_fini(struct hl_device * hdev)3067 static int gaudi2_sw_fini(struct hl_device *hdev)
3068 {
3069 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3070 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3071 
3072 	hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr);
3073 
3074 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
3075 
3076 	hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
3077 						hdev->cpu_accessible_dma_address);
3078 
3079 	hl_asic_dma_free_coherent(hdev, PAGE_SIZE, gaudi2->scratchpad_kernel_address,
3080 					gaudi2->scratchpad_bus_address);
3081 
3082 	dma_pool_destroy(hdev->dma_pool);
3083 
3084 	kfree(gaudi2);
3085 
3086 	return 0;
3087 }
3088 
gaudi2_stop_qman_common(struct hl_device * hdev,u32 reg_base)3089 static void gaudi2_stop_qman_common(struct hl_device *hdev, u32 reg_base)
3090 {
3091 	WREG32(reg_base + QM_GLBL_CFG1_OFFSET, QM_GLBL_CFG1_PQF_STOP |
3092 						QM_GLBL_CFG1_CQF_STOP |
3093 						QM_GLBL_CFG1_CP_STOP);
3094 
3095 	/* stop also the ARC */
3096 	WREG32(reg_base + QM_GLBL_CFG2_OFFSET, QM_GLBL_CFG2_ARC_CQF_STOP);
3097 }
3098 
gaudi2_flush_qman_common(struct hl_device * hdev,u32 reg_base)3099 static void gaudi2_flush_qman_common(struct hl_device *hdev, u32 reg_base)
3100 {
3101 	WREG32(reg_base + QM_GLBL_CFG1_OFFSET, QM_GLBL_CFG1_PQF_FLUSH |
3102 						QM_GLBL_CFG1_CQF_FLUSH |
3103 						QM_GLBL_CFG1_CP_FLUSH);
3104 }
3105 
gaudi2_flush_qman_arc_common(struct hl_device * hdev,u32 reg_base)3106 static void gaudi2_flush_qman_arc_common(struct hl_device *hdev, u32 reg_base)
3107 {
3108 	WREG32(reg_base + QM_GLBL_CFG2_OFFSET, QM_GLBL_CFG2_ARC_CQF_FLUSH);
3109 }
3110 
3111 /**
3112  * gaudi2_clear_qm_fence_counters_common - clear QM's fence counters
3113  *
3114  * @hdev: pointer to the habanalabs device structure
3115  * @queue_id: queue to clear fence counters to
3116  * @skip_fence: if true set maximum fence value to all fence counters to avoid
3117  *              getting stuck on any fence value. otherwise set all fence
3118  *              counters to 0 (standard clear of fence counters)
3119  */
gaudi2_clear_qm_fence_counters_common(struct hl_device * hdev,u32 queue_id,bool skip_fence)3120 static void gaudi2_clear_qm_fence_counters_common(struct hl_device *hdev, u32 queue_id,
3121 						bool skip_fence)
3122 {
3123 	u32 size, reg_base;
3124 	u32 addr, val;
3125 
3126 	reg_base = gaudi2_qm_blocks_bases[queue_id];
3127 
3128 	addr = reg_base + QM_CP_FENCE0_CNT_0_OFFSET;
3129 	size = mmPDMA0_QM_CP_BARRIER_CFG - mmPDMA0_QM_CP_FENCE0_CNT_0;
3130 
3131 	/*
3132 	 * in case we want to make sure that QM that is stuck on a fence will
3133 	 * be released we should set the fence counter to a higher value that
3134 	 * the value the QM waiting for. to comply with any fence counter of
3135 	 * any value we set maximum fence value to all counters
3136 	 */
3137 	val = skip_fence ? U32_MAX : 0;
3138 	gaudi2_memset_device_lbw(hdev, addr, size, val);
3139 }
3140 
gaudi2_qman_manual_flush_common(struct hl_device * hdev,u32 queue_id)3141 static void gaudi2_qman_manual_flush_common(struct hl_device *hdev, u32 queue_id)
3142 {
3143 	u32 reg_base = gaudi2_qm_blocks_bases[queue_id];
3144 
3145 	gaudi2_clear_qm_fence_counters_common(hdev, queue_id, true);
3146 	gaudi2_flush_qman_common(hdev, reg_base);
3147 	gaudi2_flush_qman_arc_common(hdev, reg_base);
3148 }
3149 
gaudi2_stop_dma_qmans(struct hl_device * hdev)3150 static void gaudi2_stop_dma_qmans(struct hl_device *hdev)
3151 {
3152 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3153 	int dcore, inst;
3154 
3155 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
3156 		goto stop_edma_qmans;
3157 
3158 	/* Stop CPs of PDMA QMANs */
3159 	gaudi2_stop_qman_common(hdev, mmPDMA0_QM_BASE);
3160 	gaudi2_stop_qman_common(hdev, mmPDMA1_QM_BASE);
3161 
3162 stop_edma_qmans:
3163 	if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
3164 		return;
3165 
3166 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
3167 		for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
3168 			u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
3169 			u32 qm_base;
3170 
3171 			if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
3172 				continue;
3173 
3174 			qm_base = mmDCORE0_EDMA0_QM_BASE + dcore * DCORE_OFFSET +
3175 					inst * DCORE_EDMA_OFFSET;
3176 
3177 			/* Stop CPs of EDMA QMANs */
3178 			gaudi2_stop_qman_common(hdev, qm_base);
3179 		}
3180 	}
3181 }
3182 
gaudi2_stop_mme_qmans(struct hl_device * hdev)3183 static void gaudi2_stop_mme_qmans(struct hl_device *hdev)
3184 {
3185 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3186 	u32 offset, i;
3187 
3188 	offset = mmDCORE1_MME_QM_BASE - mmDCORE0_MME_QM_BASE;
3189 
3190 	for (i = 0 ; i < NUM_OF_DCORES ; i++) {
3191 		if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i)))
3192 			continue;
3193 
3194 		gaudi2_stop_qman_common(hdev, mmDCORE0_MME_QM_BASE + (i * offset));
3195 	}
3196 }
3197 
gaudi2_stop_tpc_qmans(struct hl_device * hdev)3198 static void gaudi2_stop_tpc_qmans(struct hl_device *hdev)
3199 {
3200 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3201 	u32 reg_base;
3202 	int i;
3203 
3204 	if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
3205 		return;
3206 
3207 	for (i = 0 ; i < TPC_ID_SIZE ; i++) {
3208 		if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
3209 			continue;
3210 
3211 		reg_base = gaudi2_qm_blocks_bases[gaudi2_tpc_id_to_queue_id[i]];
3212 		gaudi2_stop_qman_common(hdev, reg_base);
3213 	}
3214 }
3215 
gaudi2_stop_rot_qmans(struct hl_device * hdev)3216 static void gaudi2_stop_rot_qmans(struct hl_device *hdev)
3217 {
3218 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3219 	u32 reg_base;
3220 	int i;
3221 
3222 	if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
3223 		return;
3224 
3225 	for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
3226 		if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
3227 			continue;
3228 
3229 		reg_base = gaudi2_qm_blocks_bases[gaudi2_rot_id_to_queue_id[i]];
3230 		gaudi2_stop_qman_common(hdev, reg_base);
3231 	}
3232 }
3233 
gaudi2_stop_nic_qmans(struct hl_device * hdev)3234 static void gaudi2_stop_nic_qmans(struct hl_device *hdev)
3235 {
3236 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3237 	u32 reg_base, queue_id;
3238 	int i;
3239 
3240 	if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
3241 		return;
3242 
3243 	queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
3244 
3245 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
3246 		if (!(hdev->nic_ports_mask & BIT(i)))
3247 			continue;
3248 
3249 		reg_base = gaudi2_qm_blocks_bases[queue_id];
3250 		gaudi2_stop_qman_common(hdev, reg_base);
3251 	}
3252 }
3253 
gaudi2_stall_dma_common(struct hl_device * hdev,u32 reg_base)3254 static void gaudi2_stall_dma_common(struct hl_device *hdev, u32 reg_base)
3255 {
3256 	u32 reg_val;
3257 
3258 	reg_val = FIELD_PREP(PDMA0_CORE_CFG_1_HALT_MASK, 0x1);
3259 	WREG32(reg_base + DMA_CORE_CFG_1_OFFSET, reg_val);
3260 }
3261 
gaudi2_dma_stall(struct hl_device * hdev)3262 static void gaudi2_dma_stall(struct hl_device *hdev)
3263 {
3264 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3265 	int dcore, inst;
3266 
3267 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
3268 		goto stall_edma;
3269 
3270 	gaudi2_stall_dma_common(hdev, mmPDMA0_CORE_BASE);
3271 	gaudi2_stall_dma_common(hdev, mmPDMA1_CORE_BASE);
3272 
3273 stall_edma:
3274 	if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
3275 		return;
3276 
3277 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
3278 		for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
3279 			u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
3280 			u32 core_base;
3281 
3282 			if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
3283 				continue;
3284 
3285 			core_base = mmDCORE0_EDMA0_CORE_BASE + dcore * DCORE_OFFSET +
3286 					inst * DCORE_EDMA_OFFSET;
3287 
3288 			/* Stall CPs of EDMA QMANs */
3289 			gaudi2_stall_dma_common(hdev, core_base);
3290 		}
3291 	}
3292 }
3293 
gaudi2_mme_stall(struct hl_device * hdev)3294 static void gaudi2_mme_stall(struct hl_device *hdev)
3295 {
3296 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3297 	u32 offset, i;
3298 
3299 	offset = mmDCORE1_MME_CTRL_LO_QM_STALL - mmDCORE0_MME_CTRL_LO_QM_STALL;
3300 
3301 	for (i = 0 ; i < NUM_OF_DCORES ; i++)
3302 		if (gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i))
3303 			WREG32(mmDCORE0_MME_CTRL_LO_QM_STALL + (i * offset), 1);
3304 }
3305 
gaudi2_tpc_stall(struct hl_device * hdev)3306 static void gaudi2_tpc_stall(struct hl_device *hdev)
3307 {
3308 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3309 	u32 reg_base;
3310 	int i;
3311 
3312 	if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
3313 		return;
3314 
3315 	for (i = 0 ; i < TPC_ID_SIZE ; i++) {
3316 		if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
3317 			continue;
3318 
3319 		reg_base = gaudi2_tpc_cfg_blocks_bases[i];
3320 		WREG32(reg_base + TPC_CFG_STALL_OFFSET, 1);
3321 	}
3322 }
3323 
gaudi2_rotator_stall(struct hl_device * hdev)3324 static void gaudi2_rotator_stall(struct hl_device *hdev)
3325 {
3326 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3327 	u32 reg_val;
3328 	int i;
3329 
3330 	if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
3331 		return;
3332 
3333 	reg_val = FIELD_PREP(ROT_MSS_HALT_WBC_MASK, 0x1) |
3334 			FIELD_PREP(ROT_MSS_HALT_RSB_MASK, 0x1) |
3335 			FIELD_PREP(ROT_MSS_HALT_MRSB_MASK, 0x1);
3336 
3337 	for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
3338 		if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
3339 			continue;
3340 
3341 		WREG32(mmROT0_MSS_HALT + i * ROT_OFFSET, reg_val);
3342 	}
3343 }
3344 
gaudi2_disable_qman_common(struct hl_device * hdev,u32 reg_base)3345 static void gaudi2_disable_qman_common(struct hl_device *hdev, u32 reg_base)
3346 {
3347 	WREG32(reg_base + QM_GLBL_CFG0_OFFSET, 0);
3348 }
3349 
gaudi2_disable_dma_qmans(struct hl_device * hdev)3350 static void gaudi2_disable_dma_qmans(struct hl_device *hdev)
3351 {
3352 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3353 	int dcore, inst;
3354 
3355 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
3356 		goto stop_edma_qmans;
3357 
3358 	gaudi2_disable_qman_common(hdev, mmPDMA0_QM_BASE);
3359 	gaudi2_disable_qman_common(hdev, mmPDMA1_QM_BASE);
3360 
3361 stop_edma_qmans:
3362 	if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
3363 		return;
3364 
3365 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
3366 		for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
3367 			u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
3368 			u32 qm_base;
3369 
3370 			if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
3371 				continue;
3372 
3373 			qm_base = mmDCORE0_EDMA0_QM_BASE + dcore * DCORE_OFFSET +
3374 					inst * DCORE_EDMA_OFFSET;
3375 
3376 			/* Disable CPs of EDMA QMANs */
3377 			gaudi2_disable_qman_common(hdev, qm_base);
3378 		}
3379 	}
3380 }
3381 
gaudi2_disable_mme_qmans(struct hl_device * hdev)3382 static void gaudi2_disable_mme_qmans(struct hl_device *hdev)
3383 {
3384 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3385 	u32 offset, i;
3386 
3387 	offset = mmDCORE1_MME_QM_BASE - mmDCORE0_MME_QM_BASE;
3388 
3389 	for (i = 0 ; i < NUM_OF_DCORES ; i++)
3390 		if (gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i))
3391 			gaudi2_disable_qman_common(hdev, mmDCORE0_MME_QM_BASE + (i * offset));
3392 }
3393 
gaudi2_disable_tpc_qmans(struct hl_device * hdev)3394 static void gaudi2_disable_tpc_qmans(struct hl_device *hdev)
3395 {
3396 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3397 	u32 reg_base;
3398 	int i;
3399 
3400 	if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
3401 		return;
3402 
3403 	for (i = 0 ; i < TPC_ID_SIZE ; i++) {
3404 		if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
3405 			continue;
3406 
3407 		reg_base = gaudi2_qm_blocks_bases[gaudi2_tpc_id_to_queue_id[i]];
3408 		gaudi2_disable_qman_common(hdev, reg_base);
3409 	}
3410 }
3411 
gaudi2_disable_rot_qmans(struct hl_device * hdev)3412 static void gaudi2_disable_rot_qmans(struct hl_device *hdev)
3413 {
3414 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3415 	u32 reg_base;
3416 	int i;
3417 
3418 	if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
3419 		return;
3420 
3421 	for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
3422 		if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
3423 			continue;
3424 
3425 		reg_base = gaudi2_qm_blocks_bases[gaudi2_rot_id_to_queue_id[i]];
3426 		gaudi2_disable_qman_common(hdev, reg_base);
3427 	}
3428 }
3429 
gaudi2_disable_nic_qmans(struct hl_device * hdev)3430 static void gaudi2_disable_nic_qmans(struct hl_device *hdev)
3431 {
3432 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3433 	u32 reg_base, queue_id;
3434 	int i;
3435 
3436 	if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
3437 		return;
3438 
3439 	queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
3440 
3441 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
3442 		if (!(hdev->nic_ports_mask & BIT(i)))
3443 			continue;
3444 
3445 		reg_base = gaudi2_qm_blocks_bases[queue_id];
3446 		gaudi2_disable_qman_common(hdev, reg_base);
3447 	}
3448 }
3449 
gaudi2_enable_timestamp(struct hl_device * hdev)3450 static void gaudi2_enable_timestamp(struct hl_device *hdev)
3451 {
3452 	/* Disable the timestamp counter */
3453 	WREG32(mmPSOC_TIMESTAMP_BASE, 0);
3454 
3455 	/* Zero the lower/upper parts of the 64-bit counter */
3456 	WREG32(mmPSOC_TIMESTAMP_BASE + 0xC, 0);
3457 	WREG32(mmPSOC_TIMESTAMP_BASE + 0x8, 0);
3458 
3459 	/* Enable the counter */
3460 	WREG32(mmPSOC_TIMESTAMP_BASE, 1);
3461 }
3462 
gaudi2_disable_timestamp(struct hl_device * hdev)3463 static void gaudi2_disable_timestamp(struct hl_device *hdev)
3464 {
3465 	/* Disable the timestamp counter */
3466 	WREG32(mmPSOC_TIMESTAMP_BASE, 0);
3467 }
3468 
gaudi2_irq_name(u16 irq_number)3469 static const char *gaudi2_irq_name(u16 irq_number)
3470 {
3471 	switch (irq_number) {
3472 	case GAUDI2_IRQ_NUM_EVENT_QUEUE:
3473 		return "gaudi2 cpu eq";
3474 	case GAUDI2_IRQ_NUM_COMPLETION:
3475 		return "gaudi2 completion";
3476 	case GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ... GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM:
3477 		return gaudi2_vdec_irq_name[irq_number - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM];
3478 	case GAUDI2_IRQ_NUM_USER_FIRST ... GAUDI2_IRQ_NUM_USER_LAST:
3479 		return "gaudi2 user completion";
3480 	default:
3481 		return "invalid";
3482 	}
3483 }
3484 
gaudi2_dec_disable_msix(struct hl_device * hdev,u32 max_irq_num)3485 static void gaudi2_dec_disable_msix(struct hl_device *hdev, u32 max_irq_num)
3486 {
3487 	int i, irq, relative_idx;
3488 	struct hl_dec *dec;
3489 
3490 	for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ; i < max_irq_num ; i++) {
3491 		irq = pci_irq_vector(hdev->pdev, i);
3492 		relative_idx = i - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM;
3493 
3494 		dec = hdev->dec + relative_idx / 2;
3495 
3496 		/* We pass different structures depending on the irq handler. For the abnormal
3497 		 * interrupt we pass hl_dec and for the regular interrupt we pass the relevant
3498 		 * user_interrupt entry
3499 		 */
3500 		free_irq(irq, ((relative_idx % 2) ?
3501 				(void *) dec :
3502 				(void *) &hdev->user_interrupt[dec->core_id]));
3503 	}
3504 }
3505 
gaudi2_dec_enable_msix(struct hl_device * hdev)3506 static int gaudi2_dec_enable_msix(struct hl_device *hdev)
3507 {
3508 	int rc, i, irq_init_cnt, irq, relative_idx;
3509 	irq_handler_t irq_handler;
3510 	struct hl_dec *dec;
3511 
3512 	for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM, irq_init_cnt = 0;
3513 			i <= GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM;
3514 			i++, irq_init_cnt++) {
3515 
3516 		irq = pci_irq_vector(hdev->pdev, i);
3517 		relative_idx = i - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM;
3518 
3519 		irq_handler = (relative_idx % 2) ?
3520 				hl_irq_handler_dec_abnrm :
3521 				hl_irq_handler_user_interrupt;
3522 
3523 		dec = hdev->dec + relative_idx / 2;
3524 
3525 		/* We pass different structures depending on the irq handler. For the abnormal
3526 		 * interrupt we pass hl_dec and for the regular interrupt we pass the relevant
3527 		 * user_interrupt entry
3528 		 */
3529 		rc = request_irq(irq, irq_handler, 0, gaudi2_irq_name(i),
3530 				((relative_idx % 2) ?
3531 				(void *) dec :
3532 				(void *) &hdev->user_interrupt[dec->core_id]));
3533 		if (rc) {
3534 			dev_err(hdev->dev, "Failed to request IRQ %d", irq);
3535 			goto free_dec_irqs;
3536 		}
3537 	}
3538 
3539 	return 0;
3540 
3541 free_dec_irqs:
3542 	gaudi2_dec_disable_msix(hdev, (GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + irq_init_cnt));
3543 	return rc;
3544 }
3545 
gaudi2_enable_msix(struct hl_device * hdev)3546 static int gaudi2_enable_msix(struct hl_device *hdev)
3547 {
3548 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3549 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3550 	int rc, irq, i, j, user_irq_init_cnt;
3551 	irq_handler_t irq_handler;
3552 	struct hl_cq *cq;
3553 
3554 	if (gaudi2->hw_cap_initialized & HW_CAP_MSIX)
3555 		return 0;
3556 
3557 	rc = pci_alloc_irq_vectors(hdev->pdev, GAUDI2_MSIX_ENTRIES, GAUDI2_MSIX_ENTRIES,
3558 					PCI_IRQ_MSIX);
3559 	if (rc < 0) {
3560 		dev_err(hdev->dev, "MSI-X: Failed to enable support -- %d/%d\n",
3561 			GAUDI2_MSIX_ENTRIES, rc);
3562 		return rc;
3563 	}
3564 
3565 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
3566 	cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION];
3567 	rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi2_irq_name(GAUDI2_IRQ_NUM_COMPLETION), cq);
3568 	if (rc) {
3569 		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
3570 		goto free_irq_vectors;
3571 	}
3572 
3573 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
3574 	rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi2_irq_name(GAUDI2_IRQ_NUM_EVENT_QUEUE),
3575 			&hdev->event_queue);
3576 	if (rc) {
3577 		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
3578 		goto free_completion_irq;
3579 	}
3580 
3581 	rc = gaudi2_dec_enable_msix(hdev);
3582 	if (rc) {
3583 		dev_err(hdev->dev, "Failed to enable decoder IRQ");
3584 		goto free_event_irq;
3585 	}
3586 
3587 	for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, user_irq_init_cnt = 0;
3588 			user_irq_init_cnt < prop->user_interrupt_count;
3589 			i++, j++, user_irq_init_cnt++) {
3590 
3591 		irq = pci_irq_vector(hdev->pdev, i);
3592 		irq_handler = hl_irq_handler_user_interrupt;
3593 
3594 		rc = request_irq(irq, irq_handler, 0, gaudi2_irq_name(i), &hdev->user_interrupt[j]);
3595 		if (rc) {
3596 			dev_err(hdev->dev, "Failed to request IRQ %d", irq);
3597 			goto free_user_irq;
3598 		}
3599 	}
3600 
3601 	gaudi2->hw_cap_initialized |= HW_CAP_MSIX;
3602 
3603 	return 0;
3604 
3605 free_user_irq:
3606 	for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count;
3607 			i < GAUDI2_IRQ_NUM_USER_FIRST + user_irq_init_cnt ; i++, j++) {
3608 
3609 		irq = pci_irq_vector(hdev->pdev, i);
3610 		free_irq(irq, &hdev->user_interrupt[j]);
3611 	}
3612 
3613 	gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM + 1);
3614 
3615 free_event_irq:
3616 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
3617 	free_irq(irq, cq);
3618 
3619 free_completion_irq:
3620 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
3621 	free_irq(irq, cq);
3622 
3623 free_irq_vectors:
3624 	pci_free_irq_vectors(hdev->pdev);
3625 
3626 	return rc;
3627 }
3628 
gaudi2_sync_irqs(struct hl_device * hdev)3629 static void gaudi2_sync_irqs(struct hl_device *hdev)
3630 {
3631 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3632 	int i, j;
3633 	int irq;
3634 
3635 	if (!(gaudi2->hw_cap_initialized & HW_CAP_MSIX))
3636 		return;
3637 
3638 	/* Wait for all pending IRQs to be finished */
3639 	synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION));
3640 
3641 	for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ; i <= GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM ; i++) {
3642 		irq = pci_irq_vector(hdev->pdev, i);
3643 		synchronize_irq(irq);
3644 	}
3645 
3646 	for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = 0 ; j < hdev->asic_prop.user_interrupt_count;
3647 										i++, j++) {
3648 		irq = pci_irq_vector(hdev->pdev, i);
3649 		synchronize_irq(irq);
3650 	}
3651 
3652 	synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE));
3653 }
3654 
gaudi2_disable_msix(struct hl_device * hdev)3655 static void gaudi2_disable_msix(struct hl_device *hdev)
3656 {
3657 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3658 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3659 	struct hl_cq *cq;
3660 	int irq, i, j, k;
3661 
3662 	if (!(gaudi2->hw_cap_initialized & HW_CAP_MSIX))
3663 		return;
3664 
3665 	gaudi2_sync_irqs(hdev);
3666 
3667 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
3668 	free_irq(irq, &hdev->event_queue);
3669 
3670 	gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM + 1);
3671 
3672 	for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, k = 0;
3673 			k < hdev->asic_prop.user_interrupt_count ; i++, j++, k++) {
3674 
3675 		irq = pci_irq_vector(hdev->pdev, i);
3676 		free_irq(irq, &hdev->user_interrupt[j]);
3677 	}
3678 
3679 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
3680 	cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION];
3681 	free_irq(irq, cq);
3682 
3683 	pci_free_irq_vectors(hdev->pdev);
3684 
3685 	gaudi2->hw_cap_initialized &= ~HW_CAP_MSIX;
3686 }
3687 
gaudi2_stop_dcore_dec(struct hl_device * hdev,int dcore_id)3688 static void gaudi2_stop_dcore_dec(struct hl_device *hdev, int dcore_id)
3689 {
3690 	u32 reg_val = FIELD_PREP(DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_STOP_MASK, 0x1);
3691 	u32 graceful_pend_mask = DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_PEND_MASK;
3692 	u32 timeout_usec, dec_id, dec_bit, offset, graceful;
3693 	int rc;
3694 
3695 	if (hdev->pldm)
3696 		timeout_usec = GAUDI2_PLDM_VDEC_TIMEOUT_USEC;
3697 	else
3698 		timeout_usec = GAUDI2_VDEC_TIMEOUT_USEC;
3699 
3700 	for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
3701 		dec_bit = dcore_id * NUM_OF_DEC_PER_DCORE + dec_id;
3702 		if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
3703 			continue;
3704 
3705 		offset = dcore_id * DCORE_OFFSET + dec_id * DCORE_VDEC_OFFSET;
3706 
3707 		WREG32(mmDCORE0_DEC0_CMD_SWREG16 + offset, 0);
3708 
3709 		WREG32(mmDCORE0_VDEC0_BRDG_CTRL_GRACEFUL + offset, reg_val);
3710 
3711 		/* Wait till all traffic from decoder stops
3712 		 * before apply core reset.
3713 		 */
3714 		rc = hl_poll_timeout(
3715 				hdev,
3716 				mmDCORE0_VDEC0_BRDG_CTRL_GRACEFUL + offset,
3717 				graceful,
3718 				(graceful & graceful_pend_mask),
3719 				100,
3720 				timeout_usec);
3721 		if (rc)
3722 			dev_err(hdev->dev,
3723 				"Failed to stop traffic from DCORE%d Decoder %d\n",
3724 				dcore_id, dec_id);
3725 	}
3726 }
3727 
gaudi2_stop_pcie_dec(struct hl_device * hdev)3728 static void gaudi2_stop_pcie_dec(struct hl_device *hdev)
3729 {
3730 	u32 reg_val = FIELD_PREP(DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_STOP_MASK, 0x1);
3731 	u32 graceful_pend_mask = PCIE_VDEC0_BRDG_CTRL_GRACEFUL_PEND_MASK;
3732 	u32 timeout_usec, dec_id, dec_bit, offset, graceful;
3733 	int rc;
3734 
3735 	if (hdev->pldm)
3736 		timeout_usec = GAUDI2_PLDM_VDEC_TIMEOUT_USEC;
3737 	else
3738 		timeout_usec = GAUDI2_VDEC_TIMEOUT_USEC;
3739 
3740 	for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
3741 		dec_bit = PCIE_DEC_SHIFT + dec_id;
3742 		if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
3743 			continue;
3744 
3745 		offset = dec_id * PCIE_VDEC_OFFSET;
3746 
3747 		WREG32(mmPCIE_DEC0_CMD_SWREG16 + offset, 0);
3748 
3749 		WREG32(mmPCIE_VDEC0_BRDG_CTRL_GRACEFUL + offset, reg_val);
3750 
3751 		/* Wait till all traffic from decoder stops
3752 		 * before apply core reset.
3753 		 */
3754 		rc = hl_poll_timeout(
3755 				hdev,
3756 				mmPCIE_VDEC0_BRDG_CTRL_GRACEFUL + offset,
3757 				graceful,
3758 				(graceful & graceful_pend_mask),
3759 				100,
3760 				timeout_usec);
3761 		if (rc)
3762 			dev_err(hdev->dev,
3763 				"Failed to stop traffic from PCIe Decoder %d\n",
3764 				dec_id);
3765 	}
3766 }
3767 
gaudi2_stop_dec(struct hl_device * hdev)3768 static void gaudi2_stop_dec(struct hl_device *hdev)
3769 {
3770 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3771 	int dcore_id;
3772 
3773 	if ((gaudi2->dec_hw_cap_initialized & HW_CAP_DEC_MASK) == 0)
3774 		return;
3775 
3776 	for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
3777 		gaudi2_stop_dcore_dec(hdev, dcore_id);
3778 
3779 	gaudi2_stop_pcie_dec(hdev);
3780 }
3781 
gaudi2_set_arc_running_mode(struct hl_device * hdev,u32 cpu_id,u32 run_mode)3782 static void gaudi2_set_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode)
3783 {
3784 	u32 reg_base, reg_val;
3785 
3786 	reg_base = gaudi2_arc_blocks_bases[cpu_id];
3787 	if (run_mode == HL_ENGINE_CORE_RUN)
3788 		reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 1);
3789 	else
3790 		reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_HALT_REQ_MASK, 1);
3791 
3792 	WREG32(reg_base + ARC_HALT_REQ_OFFSET, reg_val);
3793 }
3794 
gaudi2_halt_arcs(struct hl_device * hdev)3795 static void gaudi2_halt_arcs(struct hl_device *hdev)
3796 {
3797 	u16 arc_id;
3798 
3799 	for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++) {
3800 		if (gaudi2_is_arc_enabled(hdev, arc_id))
3801 			gaudi2_set_arc_running_mode(hdev, arc_id, HL_ENGINE_CORE_HALT);
3802 	}
3803 }
3804 
gaudi2_verify_arc_running_mode(struct hl_device * hdev,u32 cpu_id,u32 run_mode)3805 static int gaudi2_verify_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode)
3806 {
3807 	int rc;
3808 	u32 reg_base, val, ack_mask, timeout_usec = 100000;
3809 
3810 	if (hdev->pldm)
3811 		timeout_usec *= 100;
3812 
3813 	reg_base = gaudi2_arc_blocks_bases[cpu_id];
3814 	if (run_mode == HL_ENGINE_CORE_RUN)
3815 		ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_RUN_ACK_MASK;
3816 	else
3817 		ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_HALT_ACK_MASK;
3818 
3819 	rc = hl_poll_timeout(hdev, reg_base + ARC_HALT_ACK_OFFSET,
3820 				val, ((val & ack_mask) == ack_mask),
3821 				1000, timeout_usec);
3822 
3823 	if (!rc) {
3824 		/* Clear */
3825 		val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 0);
3826 		WREG32(reg_base + ARC_HALT_REQ_OFFSET, val);
3827 	}
3828 
3829 	return rc;
3830 }
3831 
gaudi2_reset_arcs(struct hl_device * hdev)3832 static void gaudi2_reset_arcs(struct hl_device *hdev)
3833 {
3834 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3835 	u16 arc_id;
3836 
3837 	if (!gaudi2)
3838 		return;
3839 
3840 	for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++)
3841 		if (gaudi2_is_arc_enabled(hdev, arc_id))
3842 			gaudi2_clr_arc_id_cap(hdev, arc_id);
3843 }
3844 
gaudi2_nic_qmans_manual_flush(struct hl_device * hdev)3845 static void gaudi2_nic_qmans_manual_flush(struct hl_device *hdev)
3846 {
3847 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3848 	u32 queue_id;
3849 	int i;
3850 
3851 	if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
3852 		return;
3853 
3854 	queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
3855 
3856 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
3857 		if (!(hdev->nic_ports_mask & BIT(i)))
3858 			continue;
3859 
3860 		gaudi2_qman_manual_flush_common(hdev, queue_id);
3861 	}
3862 }
3863 
gaudi2_set_engine_cores(struct hl_device * hdev,u32 * core_ids,u32 num_cores,u32 core_command)3864 static int gaudi2_set_engine_cores(struct hl_device *hdev, u32 *core_ids,
3865 					u32 num_cores, u32 core_command)
3866 {
3867 	int i, rc;
3868 
3869 
3870 	for (i = 0 ; i < num_cores ; i++) {
3871 		if (gaudi2_is_arc_enabled(hdev, core_ids[i]))
3872 			gaudi2_set_arc_running_mode(hdev, core_ids[i], core_command);
3873 	}
3874 
3875 	for (i = 0 ; i < num_cores ; i++) {
3876 		if (gaudi2_is_arc_enabled(hdev, core_ids[i])) {
3877 			rc = gaudi2_verify_arc_running_mode(hdev, core_ids[i], core_command);
3878 
3879 			if (rc) {
3880 				dev_err(hdev->dev, "failed to %s arc: %d\n",
3881 					(core_command == HL_ENGINE_CORE_HALT) ?
3882 					"HALT" : "RUN", core_ids[i]);
3883 				return -1;
3884 			}
3885 		}
3886 	}
3887 
3888 	return 0;
3889 }
3890 
gaudi2_halt_engines(struct hl_device * hdev,bool hard_reset,bool fw_reset)3891 static void gaudi2_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
3892 {
3893 	u32 wait_timeout_ms;
3894 
3895 	if (hdev->pldm)
3896 		wait_timeout_ms = GAUDI2_PLDM_RESET_WAIT_MSEC;
3897 	else
3898 		wait_timeout_ms = GAUDI2_RESET_WAIT_MSEC;
3899 
3900 	if (fw_reset)
3901 		goto skip_engines;
3902 
3903 	gaudi2_stop_dma_qmans(hdev);
3904 	gaudi2_stop_mme_qmans(hdev);
3905 	gaudi2_stop_tpc_qmans(hdev);
3906 	gaudi2_stop_rot_qmans(hdev);
3907 	gaudi2_stop_nic_qmans(hdev);
3908 	msleep(wait_timeout_ms);
3909 
3910 	gaudi2_halt_arcs(hdev);
3911 	gaudi2_dma_stall(hdev);
3912 	gaudi2_mme_stall(hdev);
3913 	gaudi2_tpc_stall(hdev);
3914 	gaudi2_rotator_stall(hdev);
3915 
3916 	msleep(wait_timeout_ms);
3917 
3918 	gaudi2_stop_dec(hdev);
3919 
3920 	/*
3921 	 * in case of soft reset do a manual flush for QMANs (currently called
3922 	 * only for NIC QMANs
3923 	 */
3924 	if (!hard_reset)
3925 		gaudi2_nic_qmans_manual_flush(hdev);
3926 
3927 	gaudi2_disable_dma_qmans(hdev);
3928 	gaudi2_disable_mme_qmans(hdev);
3929 	gaudi2_disable_tpc_qmans(hdev);
3930 	gaudi2_disable_rot_qmans(hdev);
3931 	gaudi2_disable_nic_qmans(hdev);
3932 	gaudi2_disable_timestamp(hdev);
3933 
3934 skip_engines:
3935 	if (hard_reset) {
3936 		gaudi2_disable_msix(hdev);
3937 		return;
3938 	}
3939 
3940 	gaudi2_sync_irqs(hdev);
3941 }
3942 
gaudi2_init_firmware_preload_params(struct hl_device * hdev)3943 static void gaudi2_init_firmware_preload_params(struct hl_device *hdev)
3944 {
3945 	struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
3946 
3947 	pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3948 	pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
3949 	pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
3950 	pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
3951 	pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
3952 	pre_fw_load->wait_for_preboot_timeout = GAUDI2_PREBOOT_REQ_TIMEOUT_USEC;
3953 }
3954 
gaudi2_init_firmware_loader(struct hl_device * hdev)3955 static void gaudi2_init_firmware_loader(struct hl_device *hdev)
3956 {
3957 	struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3958 	struct dynamic_fw_load_mgr *dynamic_loader;
3959 	struct cpu_dyn_regs *dyn_regs;
3960 
3961 	/* fill common fields */
3962 	fw_loader->fw_comp_loaded = FW_TYPE_NONE;
3963 	fw_loader->boot_fit_img.image_name = GAUDI2_BOOT_FIT_FILE;
3964 	fw_loader->linux_img.image_name = GAUDI2_LINUX_FW_FILE;
3965 	fw_loader->boot_fit_timeout = GAUDI2_BOOT_FIT_REQ_TIMEOUT_USEC;
3966 	fw_loader->skip_bmc = false;
3967 	fw_loader->sram_bar_id = SRAM_CFG_BAR_ID;
3968 	fw_loader->dram_bar_id = DRAM_BAR_ID;
3969 
3970 	if (hdev->asic_type == ASIC_GAUDI2 || hdev->asic_type == ASIC_GAUDI2_SEC)
3971 		fw_loader->cpu_timeout = GAUDI2_CPU_TIMEOUT_USEC;
3972 	else /* ASIC_GAUDI2_FPGA */
3973 		fw_loader->cpu_timeout = GAUDI2_FPGA_CPU_TIMEOUT;
3974 
3975 	/* here we update initial values for few specific dynamic regs (as
3976 	 * before reading the first descriptor from FW those value has to be
3977 	 * hard-coded). in later stages of the protocol those values will be
3978 	 * updated automatically by reading the FW descriptor so data there
3979 	 * will always be up-to-date
3980 	 */
3981 	dynamic_loader = &hdev->fw_loader.dynamic_loader;
3982 	dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3983 	dyn_regs->kmd_msg_to_cpu = cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3984 	dyn_regs->cpu_cmd_status_to_host = cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3985 	dynamic_loader->wait_for_bl_timeout = GAUDI2_WAIT_FOR_BL_TIMEOUT_USEC;
3986 }
3987 
gaudi2_init_cpu(struct hl_device * hdev)3988 static int gaudi2_init_cpu(struct hl_device *hdev)
3989 {
3990 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3991 	int rc;
3992 
3993 	if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
3994 		return 0;
3995 
3996 	if (gaudi2->hw_cap_initialized & HW_CAP_CPU)
3997 		return 0;
3998 
3999 	rc = hl_fw_init_cpu(hdev);
4000 	if (rc)
4001 		return rc;
4002 
4003 	gaudi2->hw_cap_initialized |= HW_CAP_CPU;
4004 
4005 	return 0;
4006 }
4007 
gaudi2_init_cpu_queues(struct hl_device * hdev,u32 cpu_timeout)4008 static int gaudi2_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
4009 {
4010 	struct hl_hw_queue *cpu_pq = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
4011 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4012 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4013 	struct cpu_dyn_regs *dyn_regs;
4014 	struct hl_eq *eq;
4015 	u32 status;
4016 	int err;
4017 
4018 	if (!hdev->cpu_queues_enable)
4019 		return 0;
4020 
4021 	if (gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)
4022 		return 0;
4023 
4024 	eq = &hdev->event_queue;
4025 
4026 	dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4027 
4028 	WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
4029 	WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
4030 
4031 	WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
4032 	WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
4033 
4034 	WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW, lower_32_bits(hdev->cpu_accessible_dma_address));
4035 	WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH, upper_32_bits(hdev->cpu_accessible_dma_address));
4036 
4037 	WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
4038 	WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
4039 	WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
4040 
4041 	/* Used for EQ CI */
4042 	WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
4043 
4044 	WREG32(mmCPU_IF_PF_PQ_PI, 0);
4045 
4046 	WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
4047 
4048 	/* Let the ARC know we are ready as it is now handling those queues  */
4049 
4050 	WREG32(le32_to_cpu(dyn_regs->gic_host_pi_upd_irq),
4051 		gaudi2_irq_map_table[GAUDI2_EVENT_CPU_PI_UPDATE].cpu_id);
4052 
4053 	err = hl_poll_timeout(
4054 		hdev,
4055 		mmCPU_IF_QUEUE_INIT,
4056 		status,
4057 		(status == PQ_INIT_STATUS_READY_FOR_HOST),
4058 		1000,
4059 		cpu_timeout);
4060 
4061 	if (err) {
4062 		dev_err(hdev->dev, "Failed to communicate with device CPU (timeout)\n");
4063 		return -EIO;
4064 	}
4065 
4066 	/* update FW application security bits */
4067 	if (prop->fw_cpu_boot_dev_sts0_valid)
4068 		prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
4069 
4070 	if (prop->fw_cpu_boot_dev_sts1_valid)
4071 		prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
4072 
4073 	gaudi2->hw_cap_initialized |= HW_CAP_CPU_Q;
4074 	return 0;
4075 }
4076 
gaudi2_init_qman_pq(struct hl_device * hdev,u32 reg_base,u32 queue_id_base)4077 static void gaudi2_init_qman_pq(struct hl_device *hdev, u32 reg_base,
4078 				u32 queue_id_base)
4079 {
4080 	struct hl_hw_queue *q;
4081 	u32 pq_id, pq_offset;
4082 
4083 	for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) {
4084 		q = &hdev->kernel_queues[queue_id_base + pq_id];
4085 		pq_offset = pq_id * 4;
4086 
4087 		WREG32(reg_base + QM_PQ_BASE_LO_0_OFFSET + pq_offset,
4088 				lower_32_bits(q->bus_address));
4089 		WREG32(reg_base + QM_PQ_BASE_HI_0_OFFSET + pq_offset,
4090 				upper_32_bits(q->bus_address));
4091 		WREG32(reg_base + QM_PQ_SIZE_0_OFFSET + pq_offset, ilog2(HL_QUEUE_LENGTH));
4092 		WREG32(reg_base + QM_PQ_PI_0_OFFSET + pq_offset, 0);
4093 		WREG32(reg_base + QM_PQ_CI_0_OFFSET + pq_offset, 0);
4094 	}
4095 }
4096 
gaudi2_init_qman_cp(struct hl_device * hdev,u32 reg_base)4097 static void gaudi2_init_qman_cp(struct hl_device *hdev, u32 reg_base)
4098 {
4099 	u32 cp_id, cp_offset, mtr_base_lo, mtr_base_hi, so_base_lo, so_base_hi;
4100 
4101 	mtr_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
4102 	mtr_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
4103 	so_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4104 	so_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4105 
4106 	for (cp_id = 0 ; cp_id < NUM_OF_CP_PER_QMAN; cp_id++) {
4107 		cp_offset = cp_id * 4;
4108 
4109 		WREG32(reg_base + QM_CP_MSG_BASE0_ADDR_LO_0_OFFSET + cp_offset, mtr_base_lo);
4110 		WREG32(reg_base + QM_CP_MSG_BASE0_ADDR_HI_0_OFFSET + cp_offset,	mtr_base_hi);
4111 		WREG32(reg_base + QM_CP_MSG_BASE1_ADDR_LO_0_OFFSET + cp_offset,	so_base_lo);
4112 		WREG32(reg_base + QM_CP_MSG_BASE1_ADDR_HI_0_OFFSET + cp_offset,	so_base_hi);
4113 	}
4114 
4115 	/* allow QMANs to accept work from ARC CQF */
4116 	WREG32(reg_base + QM_CP_CFG_OFFSET, FIELD_PREP(PDMA0_QM_CP_CFG_SWITCH_EN_MASK, 0x1));
4117 }
4118 
gaudi2_init_qman_pqc(struct hl_device * hdev,u32 reg_base,u32 queue_id_base)4119 static void gaudi2_init_qman_pqc(struct hl_device *hdev, u32 reg_base,
4120 				u32 queue_id_base)
4121 {
4122 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4123 	u32 pq_id, pq_offset, so_base_lo, so_base_hi;
4124 
4125 	so_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4126 	so_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4127 
4128 	for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) {
4129 		pq_offset = pq_id * 4;
4130 
4131 		/* Configure QMAN HBW to scratchpad as it is not needed */
4132 		WREG32(reg_base + QM_PQC_HBW_BASE_LO_0_OFFSET + pq_offset,
4133 				lower_32_bits(gaudi2->scratchpad_bus_address));
4134 		WREG32(reg_base + QM_PQC_HBW_BASE_HI_0_OFFSET + pq_offset,
4135 				upper_32_bits(gaudi2->scratchpad_bus_address));
4136 		WREG32(reg_base + QM_PQC_SIZE_0_OFFSET + pq_offset,
4137 				ilog2(PAGE_SIZE / sizeof(struct hl_cq_entry)));
4138 
4139 		WREG32(reg_base + QM_PQC_PI_0_OFFSET + pq_offset, 0);
4140 		WREG32(reg_base + QM_PQC_LBW_WDATA_0_OFFSET + pq_offset, QM_PQC_LBW_WDATA);
4141 		WREG32(reg_base + QM_PQC_LBW_BASE_LO_0_OFFSET + pq_offset, so_base_lo);
4142 		WREG32(reg_base + QM_PQC_LBW_BASE_HI_0_OFFSET + pq_offset, so_base_hi);
4143 	}
4144 
4145 	/* Enable QMAN H/W completion */
4146 	WREG32(reg_base + QM_PQC_CFG_OFFSET, 1 << PDMA0_QM_PQC_CFG_EN_SHIFT);
4147 }
4148 
gaudi2_get_dyn_sp_reg(struct hl_device * hdev,u32 queue_id_base)4149 static u32 gaudi2_get_dyn_sp_reg(struct hl_device *hdev, u32 queue_id_base)
4150 {
4151 	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4152 	u32 sp_reg_addr;
4153 
4154 	switch (queue_id_base) {
4155 	case GAUDI2_QUEUE_ID_PDMA_0_0...GAUDI2_QUEUE_ID_PDMA_1_3:
4156 		fallthrough;
4157 	case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3:
4158 		fallthrough;
4159 	case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3:
4160 		fallthrough;
4161 	case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3:
4162 		fallthrough;
4163 	case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3:
4164 		sp_reg_addr = le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
4165 		break;
4166 	case GAUDI2_QUEUE_ID_DCORE0_MME_0_0...GAUDI2_QUEUE_ID_DCORE0_MME_0_3:
4167 		fallthrough;
4168 	case GAUDI2_QUEUE_ID_DCORE1_MME_0_0...GAUDI2_QUEUE_ID_DCORE1_MME_0_3:
4169 		fallthrough;
4170 	case GAUDI2_QUEUE_ID_DCORE2_MME_0_0...GAUDI2_QUEUE_ID_DCORE2_MME_0_3:
4171 		fallthrough;
4172 	case GAUDI2_QUEUE_ID_DCORE3_MME_0_0...GAUDI2_QUEUE_ID_DCORE3_MME_0_3:
4173 		sp_reg_addr = le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
4174 		break;
4175 	case GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_6_3:
4176 		fallthrough;
4177 	case GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE1_TPC_5_3:
4178 		fallthrough;
4179 	case GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE2_TPC_5_3:
4180 		fallthrough;
4181 	case GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE3_TPC_5_3:
4182 		sp_reg_addr = le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
4183 		break;
4184 	case GAUDI2_QUEUE_ID_ROT_0_0...GAUDI2_QUEUE_ID_ROT_1_3:
4185 		sp_reg_addr = le32_to_cpu(dyn_regs->gic_rot_qm_irq_ctrl);
4186 		break;
4187 	case GAUDI2_QUEUE_ID_NIC_0_0...GAUDI2_QUEUE_ID_NIC_23_3:
4188 		sp_reg_addr = le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
4189 		break;
4190 	default:
4191 		dev_err(hdev->dev, "Unexpected h/w queue %d\n", queue_id_base);
4192 		return 0;
4193 	}
4194 
4195 	return sp_reg_addr;
4196 }
4197 
gaudi2_init_qman_common(struct hl_device * hdev,u32 reg_base,u32 queue_id_base)4198 static void gaudi2_init_qman_common(struct hl_device *hdev, u32 reg_base,
4199 					u32 queue_id_base)
4200 {
4201 	u32 glbl_prot = QMAN_MAKE_TRUSTED, irq_handler_offset;
4202 	int map_table_entry;
4203 
4204 	WREG32(reg_base + QM_GLBL_PROT_OFFSET, glbl_prot);
4205 
4206 	irq_handler_offset = gaudi2_get_dyn_sp_reg(hdev, queue_id_base);
4207 	WREG32(reg_base + QM_GLBL_ERR_ADDR_LO_OFFSET, lower_32_bits(CFG_BASE + irq_handler_offset));
4208 	WREG32(reg_base + QM_GLBL_ERR_ADDR_HI_OFFSET, upper_32_bits(CFG_BASE + irq_handler_offset));
4209 
4210 	map_table_entry = gaudi2_qman_async_event_id[queue_id_base];
4211 	WREG32(reg_base + QM_GLBL_ERR_WDATA_OFFSET,
4212 		gaudi2_irq_map_table[map_table_entry].cpu_id);
4213 
4214 	WREG32(reg_base + QM_ARB_ERR_MSG_EN_OFFSET, QM_ARB_ERR_MSG_EN_MASK);
4215 
4216 	WREG32(reg_base + QM_ARB_SLV_CHOISE_WDT_OFFSET, GAUDI2_ARB_WDT_TIMEOUT);
4217 	WREG32(reg_base + QM_GLBL_CFG1_OFFSET, 0);
4218 	WREG32(reg_base + QM_GLBL_CFG2_OFFSET, 0);
4219 
4220 	/* Enable the QMAN channel.
4221 	 * PDMA QMAN configuration is different, as we do not allow user to
4222 	 * access some of the CPs.
4223 	 * PDMA0: CP2/3 are reserved for the ARC usage.
4224 	 * PDMA1: CP1/2/3 are reserved for the ARC usage.
4225 	 */
4226 	if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_1_0])
4227 		WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA1_QMAN_ENABLE);
4228 	else if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_0_0])
4229 		WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA0_QMAN_ENABLE);
4230 	else
4231 		WREG32(reg_base + QM_GLBL_CFG0_OFFSET, QMAN_ENABLE);
4232 }
4233 
gaudi2_init_qman(struct hl_device * hdev,u32 reg_base,u32 queue_id_base)4234 static void gaudi2_init_qman(struct hl_device *hdev, u32 reg_base,
4235 		u32 queue_id_base)
4236 {
4237 	u32 pq_id;
4238 
4239 	for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++)
4240 		hdev->kernel_queues[queue_id_base + pq_id].cq_id = GAUDI2_RESERVED_CQ_CS_COMPLETION;
4241 
4242 	gaudi2_init_qman_pq(hdev, reg_base, queue_id_base);
4243 	gaudi2_init_qman_cp(hdev, reg_base);
4244 	gaudi2_init_qman_pqc(hdev, reg_base, queue_id_base);
4245 	gaudi2_init_qman_common(hdev, reg_base, queue_id_base);
4246 }
4247 
gaudi2_init_dma_core(struct hl_device * hdev,u32 reg_base,u32 dma_core_id,bool is_secure)4248 static void gaudi2_init_dma_core(struct hl_device *hdev, u32 reg_base,
4249 				u32 dma_core_id, bool is_secure)
4250 {
4251 	u32 prot, irq_handler_offset;
4252 	struct cpu_dyn_regs *dyn_regs;
4253 	int map_table_entry;
4254 
4255 	prot = 1 << ARC_FARM_KDMA_PROT_ERR_VAL_SHIFT;
4256 	if (is_secure)
4257 		prot |= 1 << ARC_FARM_KDMA_PROT_VAL_SHIFT;
4258 
4259 	WREG32(reg_base + DMA_CORE_PROT_OFFSET, prot);
4260 
4261 	dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4262 	irq_handler_offset = le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
4263 
4264 	WREG32(reg_base + DMA_CORE_ERRMSG_ADDR_LO_OFFSET,
4265 			lower_32_bits(CFG_BASE + irq_handler_offset));
4266 
4267 	WREG32(reg_base + DMA_CORE_ERRMSG_ADDR_HI_OFFSET,
4268 			upper_32_bits(CFG_BASE + irq_handler_offset));
4269 
4270 	map_table_entry = gaudi2_dma_core_async_event_id[dma_core_id];
4271 	WREG32(reg_base + DMA_CORE_ERRMSG_WDATA_OFFSET,
4272 		gaudi2_irq_map_table[map_table_entry].cpu_id);
4273 
4274 	/* Enable the DMA channel */
4275 	WREG32(reg_base + DMA_CORE_CFG_0_OFFSET, 1 << ARC_FARM_KDMA_CFG_0_EN_SHIFT);
4276 }
4277 
gaudi2_init_kdma(struct hl_device * hdev)4278 static void gaudi2_init_kdma(struct hl_device *hdev)
4279 {
4280 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4281 	u32 reg_base;
4282 
4283 	if ((gaudi2->hw_cap_initialized & HW_CAP_KDMA) == HW_CAP_KDMA)
4284 		return;
4285 
4286 	reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_KDMA];
4287 
4288 	gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_KDMA, true);
4289 
4290 	gaudi2->hw_cap_initialized |= HW_CAP_KDMA;
4291 }
4292 
gaudi2_init_pdma(struct hl_device * hdev)4293 static void gaudi2_init_pdma(struct hl_device *hdev)
4294 {
4295 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4296 	u32 reg_base;
4297 
4298 	if ((gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK) == HW_CAP_PDMA_MASK)
4299 		return;
4300 
4301 	reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_PDMA0];
4302 	gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_PDMA0, false);
4303 
4304 	reg_base = gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_0_0];
4305 	gaudi2_init_qman(hdev, reg_base, GAUDI2_QUEUE_ID_PDMA_0_0);
4306 
4307 	reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_PDMA1];
4308 	gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_PDMA1, false);
4309 
4310 	reg_base = gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_1_0];
4311 	gaudi2_init_qman(hdev, reg_base, GAUDI2_QUEUE_ID_PDMA_1_0);
4312 
4313 	gaudi2->hw_cap_initialized |= HW_CAP_PDMA_MASK;
4314 }
4315 
gaudi2_init_edma_instance(struct hl_device * hdev,u8 seq)4316 static void gaudi2_init_edma_instance(struct hl_device *hdev, u8 seq)
4317 {
4318 	u32 reg_base, base_edma_core_id, base_edma_qman_id;
4319 
4320 	base_edma_core_id = DMA_CORE_ID_EDMA0 + seq;
4321 	base_edma_qman_id = edma_stream_base[seq];
4322 
4323 	reg_base = gaudi2_dma_core_blocks_bases[base_edma_core_id];
4324 	gaudi2_init_dma_core(hdev, reg_base, base_edma_core_id, false);
4325 
4326 	reg_base = gaudi2_qm_blocks_bases[base_edma_qman_id];
4327 	gaudi2_init_qman(hdev, reg_base, base_edma_qman_id);
4328 }
4329 
gaudi2_init_edma(struct hl_device * hdev)4330 static void gaudi2_init_edma(struct hl_device *hdev)
4331 {
4332 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4333 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4334 	int dcore, inst;
4335 
4336 	if ((gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK) == HW_CAP_EDMA_MASK)
4337 		return;
4338 
4339 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
4340 		for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
4341 			u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
4342 
4343 			if (!(prop->edma_enabled_mask & BIT(seq)))
4344 				continue;
4345 
4346 			gaudi2_init_edma_instance(hdev, seq);
4347 
4348 			gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_EDMA_SHIFT + seq);
4349 		}
4350 	}
4351 }
4352 
4353 /*
4354  * gaudi2_arm_monitors_for_virt_msix_db() - Arm monitors for writing to the virtual MSI-X doorbell.
4355  * @hdev: pointer to habanalabs device structure.
4356  * @sob_id: sync object ID.
4357  * @first_mon_id: ID of first monitor out of 3 consecutive monitors.
4358  * @interrupt_id: interrupt ID.
4359  *
4360  * Some initiators cannot have HBW address in their completion address registers, and thus cannot
4361  * write directly to the HBW host memory of the virtual MSI-X doorbell.
4362  * Instead, they are configured to LBW write to a sync object, and a monitor will do the HBW write.
4363  *
4364  * The mechanism in the sync manager block is composed of a master monitor with 3 messages.
4365  * In addition to the HBW write, the other 2 messages are for preparing the monitor to next
4366  * completion, by decrementing the sync object value and re-arming the monitor.
4367  */
gaudi2_arm_monitors_for_virt_msix_db(struct hl_device * hdev,u32 sob_id,u32 first_mon_id,u32 interrupt_id)4368 static void gaudi2_arm_monitors_for_virt_msix_db(struct hl_device *hdev, u32 sob_id,
4369 							u32 first_mon_id, u32 interrupt_id)
4370 {
4371 	u32 sob_offset, first_mon_offset, mon_offset, payload, sob_group, mode, arm, config;
4372 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4373 	u64 addr;
4374 	u8 mask;
4375 
4376 	/* Reset the SOB value */
4377 	sob_offset = sob_id * sizeof(u32);
4378 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset, 0);
4379 
4380 	/* Configure 3 monitors:
4381 	 * 1. Write interrupt ID to the virtual MSI-X doorbell (master monitor)
4382 	 * 2. Decrement SOB value by 1.
4383 	 * 3. Re-arm the master monitor.
4384 	 */
4385 
4386 	first_mon_offset = first_mon_id * sizeof(u32);
4387 
4388 	/* 2nd monitor: Decrement SOB value by 1 */
4389 	mon_offset = first_mon_offset + sizeof(u32);
4390 
4391 	addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
4392 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
4393 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
4394 
4395 	payload = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 0x7FFF) | /* "-1" */
4396 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_SIGN_MASK, 1) |
4397 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1);
4398 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
4399 
4400 	/* 3rd monitor: Re-arm the master monitor */
4401 	mon_offset = first_mon_offset + 2 * sizeof(u32);
4402 
4403 	addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + first_mon_offset;
4404 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
4405 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
4406 
4407 	sob_group = sob_id / 8;
4408 	mask = ~BIT(sob_id & 0x7);
4409 	mode = 0; /* comparison mode is "greater than or equal to" */
4410 	arm = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SID_MASK, sob_group) |
4411 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_MASK_MASK, mask) |
4412 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOP_MASK, mode) |
4413 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOD_MASK, 1);
4414 
4415 	payload = arm;
4416 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
4417 
4418 	/* 1st monitor (master): Write interrupt ID to the virtual MSI-X doorbell */
4419 	mon_offset = first_mon_offset;
4420 
4421 	config = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_WR_NUM_MASK, 2); /* "2": 3 writes */
4422 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + mon_offset, config);
4423 
4424 	addr = gaudi2->virt_msix_db_dma_addr;
4425 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
4426 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
4427 
4428 	payload = interrupt_id;
4429 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
4430 
4431 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + mon_offset, arm);
4432 }
4433 
gaudi2_prepare_sm_for_virt_msix_db(struct hl_device * hdev)4434 static void gaudi2_prepare_sm_for_virt_msix_db(struct hl_device *hdev)
4435 {
4436 	u32 decoder_id, sob_id, first_mon_id, interrupt_id;
4437 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4438 
4439 	/* Decoder normal/abnormal interrupts */
4440 	for (decoder_id = 0 ; decoder_id < NUMBER_OF_DEC ; ++decoder_id) {
4441 		if (!(prop->decoder_enabled_mask & BIT(decoder_id)))
4442 			continue;
4443 
4444 		sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id;
4445 		first_mon_id = GAUDI2_RESERVED_MON_DEC_NRM_FIRST + 3 * decoder_id;
4446 		interrupt_id = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 2 * decoder_id;
4447 		gaudi2_arm_monitors_for_virt_msix_db(hdev, sob_id, first_mon_id, interrupt_id);
4448 
4449 		sob_id = GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + decoder_id;
4450 		first_mon_id = GAUDI2_RESERVED_MON_DEC_ABNRM_FIRST + 3 * decoder_id;
4451 		interrupt_id += 1;
4452 		gaudi2_arm_monitors_for_virt_msix_db(hdev, sob_id, first_mon_id, interrupt_id);
4453 	}
4454 }
4455 
gaudi2_init_sm(struct hl_device * hdev)4456 static void gaudi2_init_sm(struct hl_device *hdev)
4457 {
4458 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4459 	u64 cq_address;
4460 	u32 reg_val;
4461 	int i;
4462 
4463 	/* Enable HBW/LBW CQ for completion monitors */
4464 	reg_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_CQ_EN_MASK, 1);
4465 	reg_val |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_LBW_EN_MASK, 1);
4466 
4467 	for (i = 0 ; i < GAUDI2_MAX_PENDING_CS ; i++)
4468 		WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + (4 * i), reg_val);
4469 
4470 	/* Enable only HBW CQ for KDMA completion monitor */
4471 	reg_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_CQ_EN_MASK, 1);
4472 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + (4 * i), reg_val);
4473 
4474 	/* Init CQ0 DB */
4475 	/* Configure the monitor to trigger MSI-X interrupt */
4476 	/* TODO:
4477 	 * Remove the if statement when virtual MSI-X doorbell is supported in simulator (SW-93022)
4478 	 * and in F/W (SW-93024).
4479 	 */
4480 	if (!hdev->pdev || hdev->asic_prop.fw_security_enabled) {
4481 		u64 msix_db_reg = CFG_BASE + mmPCIE_DBI_MSIX_DOORBELL_OFF;
4482 
4483 		WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0, lower_32_bits(msix_db_reg));
4484 		WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0, upper_32_bits(msix_db_reg));
4485 	} else {
4486 		WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0,
4487 				lower_32_bits(gaudi2->virt_msix_db_dma_addr));
4488 		WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0,
4489 				upper_32_bits(gaudi2->virt_msix_db_dma_addr));
4490 	}
4491 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0, GAUDI2_IRQ_NUM_COMPLETION);
4492 
4493 	for (i = 0 ; i < GAUDI2_RESERVED_CQ_NUMBER ; i++) {
4494 		cq_address =
4495 			hdev->completion_queue[i].bus_address;
4496 
4497 		WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + (4 * i),
4498 							lower_32_bits(cq_address));
4499 		WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + (4 * i),
4500 							upper_32_bits(cq_address));
4501 		WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + (4 * i),
4502 							ilog2(HL_CQ_SIZE_IN_BYTES));
4503 	}
4504 
4505 	/* Configure kernel ASID and MMU BP*/
4506 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_SEC, 0x10000);
4507 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_NONE_SEC_PRIV, 0);
4508 
4509 	/* Initialize sync objects and monitors which are used for the virtual MSI-X doorbell */
4510 	gaudi2_prepare_sm_for_virt_msix_db(hdev);
4511 }
4512 
gaudi2_init_mme_acc(struct hl_device * hdev,u32 reg_base)4513 static void gaudi2_init_mme_acc(struct hl_device *hdev, u32 reg_base)
4514 {
4515 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4516 	u32 reg_val;
4517 	int i;
4518 
4519 	reg_val = FIELD_PREP(MME_ACC_INTR_MASK_WBC_ERR_RESP_MASK, 0);
4520 	reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_POS_INF_MASK, 1);
4521 	reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_NEG_INF_MASK, 1);
4522 	reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_NAN_MASK, 1);
4523 	reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_RESULT_POS_INF_MASK, 1);
4524 	reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_RESULT_NEG_INF_MASK, 1);
4525 
4526 	WREG32(reg_base + MME_ACC_INTR_MASK_OFFSET, reg_val);
4527 	WREG32(reg_base + MME_ACC_AP_LFSR_POLY_OFFSET, 0x80DEADAF);
4528 
4529 	for (i = 0 ; i < MME_NUM_OF_LFSR_SEEDS ; i++) {
4530 		WREG32(reg_base + MME_ACC_AP_LFSR_SEED_SEL_OFFSET, i);
4531 		WREG32(reg_base + MME_ACC_AP_LFSR_SEED_WDATA_OFFSET, gaudi2->lfsr_rand_seeds[i]);
4532 	}
4533 }
4534 
gaudi2_init_dcore_mme(struct hl_device * hdev,int dcore_id,bool config_qman_only)4535 static void gaudi2_init_dcore_mme(struct hl_device *hdev, int dcore_id,
4536 							bool config_qman_only)
4537 {
4538 	u32 queue_id_base, reg_base, clk_en_addr = 0;
4539 
4540 	switch (dcore_id) {
4541 	case 0:
4542 		queue_id_base = GAUDI2_QUEUE_ID_DCORE0_MME_0_0;
4543 		break;
4544 	case 1:
4545 		queue_id_base = GAUDI2_QUEUE_ID_DCORE1_MME_0_0;
4546 		clk_en_addr = mmDCORE1_MME_CTRL_LO_QM_SLV_CLK_EN;
4547 		break;
4548 	case 2:
4549 		queue_id_base = GAUDI2_QUEUE_ID_DCORE2_MME_0_0;
4550 		break;
4551 	case 3:
4552 		queue_id_base = GAUDI2_QUEUE_ID_DCORE3_MME_0_0;
4553 		clk_en_addr = mmDCORE3_MME_CTRL_LO_QM_SLV_CLK_EN;
4554 		break;
4555 	default:
4556 		dev_err(hdev->dev, "Invalid dcore id %u\n", dcore_id);
4557 		return;
4558 	}
4559 
4560 	if (clk_en_addr && !(hdev->fw_components & FW_TYPE_BOOT_CPU))
4561 		WREG32(clk_en_addr, 0x1);
4562 
4563 	if (!config_qman_only) {
4564 		reg_base = gaudi2_mme_acc_blocks_bases[dcore_id];
4565 		gaudi2_init_mme_acc(hdev, reg_base);
4566 	}
4567 
4568 	reg_base = gaudi2_qm_blocks_bases[queue_id_base];
4569 	gaudi2_init_qman(hdev, reg_base, queue_id_base);
4570 }
4571 
gaudi2_init_mme(struct hl_device * hdev)4572 static void gaudi2_init_mme(struct hl_device *hdev)
4573 {
4574 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4575 	int i;
4576 
4577 	if ((gaudi2->hw_cap_initialized & HW_CAP_MME_MASK) == HW_CAP_MME_MASK)
4578 		return;
4579 
4580 	for (i = 0 ; i < NUM_OF_DCORES ; i++) {
4581 		gaudi2_init_dcore_mme(hdev, i, false);
4582 
4583 		gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_MME_SHIFT + i);
4584 	}
4585 }
4586 
gaudi2_init_tpc_cfg(struct hl_device * hdev,u32 reg_base)4587 static void gaudi2_init_tpc_cfg(struct hl_device *hdev, u32 reg_base)
4588 {
4589 	/* Mask arithmetic and QM interrupts in TPC */
4590 	WREG32(reg_base + TPC_CFG_TPC_INTR_MASK_OFFSET, 0x23FFFE);
4591 
4592 	/* Set 16 cache lines */
4593 	WREG32(reg_base + TPC_CFG_MSS_CONFIG_OFFSET,
4594 			2 << DCORE0_TPC0_CFG_MSS_CONFIG_ICACHE_FETCH_LINE_NUM_SHIFT);
4595 }
4596 
4597 struct gaudi2_tpc_init_cfg_data {
4598 	enum gaudi2_queue_id dcore_tpc_qid_base[NUM_OF_DCORES];
4599 };
4600 
gaudi2_init_tpc_config(struct hl_device * hdev,int dcore,int inst,u32 offset,struct iterate_module_ctx * ctx)4601 static void gaudi2_init_tpc_config(struct hl_device *hdev, int dcore, int inst,
4602 					u32 offset, struct iterate_module_ctx *ctx)
4603 {
4604 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4605 	struct gaudi2_tpc_init_cfg_data *cfg_data = ctx->data;
4606 	u32 queue_id_base;
4607 	u8 seq;
4608 
4609 	queue_id_base = cfg_data->dcore_tpc_qid_base[dcore] + (inst * NUM_OF_PQ_PER_QMAN);
4610 
4611 	if (dcore == 0 && inst == (NUM_DCORE0_TPC - 1))
4612 		/* gets last sequence number */
4613 		seq = NUM_OF_DCORES * NUM_OF_TPC_PER_DCORE;
4614 	else
4615 		seq = dcore * NUM_OF_TPC_PER_DCORE + inst;
4616 
4617 	gaudi2_init_tpc_cfg(hdev, mmDCORE0_TPC0_CFG_BASE + offset);
4618 	gaudi2_init_qman(hdev, mmDCORE0_TPC0_QM_BASE + offset, queue_id_base);
4619 
4620 	gaudi2->tpc_hw_cap_initialized |= BIT_ULL(HW_CAP_TPC_SHIFT + seq);
4621 }
4622 
gaudi2_init_tpc(struct hl_device * hdev)4623 static void gaudi2_init_tpc(struct hl_device *hdev)
4624 {
4625 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4626 	struct gaudi2_tpc_init_cfg_data init_cfg_data;
4627 	struct iterate_module_ctx tpc_iter;
4628 
4629 	if (!hdev->asic_prop.tpc_enabled_mask)
4630 		return;
4631 
4632 	if ((gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK) == HW_CAP_TPC_MASK)
4633 		return;
4634 
4635 	init_cfg_data.dcore_tpc_qid_base[0] = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0;
4636 	init_cfg_data.dcore_tpc_qid_base[1] = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0;
4637 	init_cfg_data.dcore_tpc_qid_base[2] = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0;
4638 	init_cfg_data.dcore_tpc_qid_base[3] = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0;
4639 	tpc_iter.fn = &gaudi2_init_tpc_config;
4640 	tpc_iter.data = &init_cfg_data;
4641 	gaudi2_iterate_tpcs(hdev, &tpc_iter);
4642 }
4643 
gaudi2_init_rotator(struct hl_device * hdev)4644 static void gaudi2_init_rotator(struct hl_device *hdev)
4645 {
4646 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4647 	u32 i, reg_base, queue_id;
4648 
4649 	queue_id = GAUDI2_QUEUE_ID_ROT_0_0;
4650 
4651 	for (i = 0 ; i < NUM_OF_ROT ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
4652 		reg_base = gaudi2_qm_blocks_bases[queue_id];
4653 		gaudi2_init_qman(hdev, reg_base, queue_id);
4654 
4655 		gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_ROT_SHIFT + i);
4656 	}
4657 }
4658 
gaudi2_init_vdec_brdg_ctrl(struct hl_device * hdev,u64 base_addr,u32 decoder_id)4659 static void gaudi2_init_vdec_brdg_ctrl(struct hl_device *hdev, u64 base_addr, u32 decoder_id)
4660 {
4661 	u32 sob_id;
4662 
4663 	/* TODO:
4664 	 * Remove when virtual MSI-X doorbell is supported in simulator (SW-93022) and in F/W
4665 	 * (SW-93024).
4666 	 */
4667 	if (!hdev->pdev || hdev->asic_prop.fw_security_enabled) {
4668 		u32 interrupt_id = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 2 * decoder_id;
4669 
4670 		WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_AWADDR, mmPCIE_DBI_MSIX_DOORBELL_OFF);
4671 		WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_WDATA, interrupt_id);
4672 		WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_AWADDR, mmPCIE_DBI_MSIX_DOORBELL_OFF);
4673 		WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_WDATA, interrupt_id + 1);
4674 		return;
4675 	}
4676 
4677 	/* VCMD normal interrupt */
4678 	sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id;
4679 	WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_AWADDR,
4680 			mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_id * sizeof(u32));
4681 	WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_WDATA, GAUDI2_SOB_INCREMENT_BY_ONE);
4682 
4683 	/* VCMD abnormal interrupt */
4684 	sob_id = GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + decoder_id;
4685 	WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_AWADDR,
4686 			mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_id * sizeof(u32));
4687 	WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_WDATA, GAUDI2_SOB_INCREMENT_BY_ONE);
4688 }
4689 
gaudi2_init_dec(struct hl_device * hdev)4690 static void gaudi2_init_dec(struct hl_device *hdev)
4691 {
4692 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4693 	u32 dcore_id, dec_id, dec_bit;
4694 	u64 base_addr;
4695 
4696 	if (!hdev->asic_prop.decoder_enabled_mask)
4697 		return;
4698 
4699 	if ((gaudi2->dec_hw_cap_initialized & HW_CAP_DEC_MASK) == HW_CAP_DEC_MASK)
4700 		return;
4701 
4702 	for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
4703 		for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
4704 			dec_bit = dcore_id * NUM_OF_DEC_PER_DCORE + dec_id;
4705 
4706 			if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
4707 				continue;
4708 
4709 			base_addr =  mmDCORE0_DEC0_CMD_BASE +
4710 					BRDG_CTRL_BLOCK_OFFSET +
4711 					dcore_id * DCORE_OFFSET +
4712 					dec_id * DCORE_VDEC_OFFSET;
4713 
4714 			gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, dec_bit);
4715 
4716 			gaudi2->dec_hw_cap_initialized |= BIT_ULL(HW_CAP_DEC_SHIFT + dec_bit);
4717 		}
4718 
4719 	for (dec_id = 0 ; dec_id < NUM_OF_PCIE_VDEC ; dec_id++) {
4720 		dec_bit = PCIE_DEC_SHIFT + dec_id;
4721 		if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
4722 			continue;
4723 
4724 		base_addr = mmPCIE_DEC0_CMD_BASE + BRDG_CTRL_BLOCK_OFFSET +
4725 				dec_id * DCORE_VDEC_OFFSET;
4726 
4727 		gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, dec_bit);
4728 
4729 		gaudi2->dec_hw_cap_initialized |= BIT_ULL(HW_CAP_DEC_SHIFT + dec_bit);
4730 	}
4731 }
4732 
gaudi2_init_msix_gw_table(struct hl_device * hdev)4733 static void gaudi2_init_msix_gw_table(struct hl_device *hdev)
4734 {
4735 	u32 first_reg_offset, last_reg_offset, msix_gw_table_base;
4736 	u8 first_bit, last_bit;
4737 	int i;
4738 
4739 	msix_gw_table_base = mmPCIE_WRAP_MSIX_GW_TABLE_0;
4740 	first_reg_offset = (GAUDI2_IRQ_NUM_USER_FIRST >> 5) << 2;
4741 	first_bit = GAUDI2_IRQ_NUM_USER_FIRST % 32;
4742 	last_reg_offset = (GAUDI2_IRQ_NUM_USER_LAST >> 5) << 2;
4743 	last_bit = GAUDI2_IRQ_NUM_USER_LAST % 32;
4744 
4745 	if (first_reg_offset == last_reg_offset) {
4746 		WREG32(msix_gw_table_base + first_reg_offset, GENMASK(last_bit, first_bit));
4747 		return;
4748 	}
4749 
4750 	WREG32(msix_gw_table_base + first_reg_offset, GENMASK(31, first_bit));
4751 	WREG32(msix_gw_table_base + last_reg_offset, GENMASK(last_bit, 0));
4752 
4753 	for (i = first_reg_offset + 4; i < last_reg_offset ; i += 4)
4754 		WREG32(msix_gw_table_base + i, 0xFFFFFFFF);
4755 }
4756 
gaudi2_mmu_update_asid_hop0_addr(struct hl_device * hdev,u32 stlb_base,u32 asid,u64 phys_addr)4757 static int gaudi2_mmu_update_asid_hop0_addr(struct hl_device *hdev,
4758 					u32 stlb_base, u32 asid, u64 phys_addr)
4759 {
4760 	u32 status, timeout_usec;
4761 	int rc;
4762 
4763 	if (hdev->pldm || !hdev->pdev)
4764 		timeout_usec = GAUDI2_PLDM_MMU_TIMEOUT_USEC;
4765 	else
4766 		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
4767 
4768 	WREG32(stlb_base + STLB_ASID_OFFSET, asid);
4769 	WREG32(stlb_base + STLB_HOP0_PA43_12_OFFSET, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
4770 	WREG32(stlb_base + STLB_HOP0_PA63_44_OFFSET, phys_addr >> MMU_HOP0_PA63_44_SHIFT);
4771 	WREG32(stlb_base + STLB_BUSY_OFFSET, 0x80000000);
4772 
4773 	rc = hl_poll_timeout(
4774 		hdev,
4775 		stlb_base + STLB_BUSY_OFFSET,
4776 		status,
4777 		!(status & 0x80000000),
4778 		1000,
4779 		timeout_usec);
4780 
4781 	if (rc) {
4782 		dev_err(hdev->dev, "Timeout during MMU hop0 config of asid %d\n", asid);
4783 		return rc;
4784 	}
4785 
4786 	return 0;
4787 }
4788 
gaudi2_mmu_send_invalidate_cache_cmd(struct hl_device * hdev,u32 stlb_base,u32 start_offset,u32 inv_start_val,u32 flags)4789 static void gaudi2_mmu_send_invalidate_cache_cmd(struct hl_device *hdev, u32 stlb_base,
4790 					u32 start_offset, u32 inv_start_val,
4791 					u32 flags)
4792 {
4793 	/* clear PMMU mem line cache (only needed in mmu range invalidation) */
4794 	if (flags & MMU_OP_CLEAR_MEMCACHE)
4795 		WREG32(mmPMMU_HBW_STLB_MEM_CACHE_INVALIDATION, 0x1);
4796 
4797 	if (flags & MMU_OP_SKIP_LOW_CACHE_INV)
4798 		return;
4799 
4800 	WREG32(stlb_base + start_offset, inv_start_val);
4801 }
4802 
gaudi2_mmu_invalidate_cache_status_poll(struct hl_device * hdev,u32 stlb_base,struct gaudi2_cache_invld_params * inv_params)4803 static int gaudi2_mmu_invalidate_cache_status_poll(struct hl_device *hdev, u32 stlb_base,
4804 						struct gaudi2_cache_invld_params *inv_params)
4805 {
4806 	u32 status, timeout_usec, start_offset;
4807 	int rc;
4808 
4809 	timeout_usec = (hdev->pldm) ? GAUDI2_PLDM_MMU_TIMEOUT_USEC :
4810 					GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC;
4811 
4812 	/* poll PMMU mem line cache (only needed in mmu range invalidation) */
4813 	if (inv_params->flags & MMU_OP_CLEAR_MEMCACHE) {
4814 		rc = hl_poll_timeout(
4815 			hdev,
4816 			mmPMMU_HBW_STLB_MEM_CACHE_INV_STATUS,
4817 			status,
4818 			status & 0x1,
4819 			1000,
4820 			timeout_usec);
4821 
4822 		if (rc)
4823 			return rc;
4824 
4825 		/* Need to manually reset the status to 0 */
4826 		WREG32(mmPMMU_HBW_STLB_MEM_CACHE_INV_STATUS, 0x0);
4827 	}
4828 
4829 	/* Lower cache does not work with cache lines, hence we can skip its
4830 	 * invalidation upon map and invalidate only upon unmap
4831 	 */
4832 	if (inv_params->flags & MMU_OP_SKIP_LOW_CACHE_INV)
4833 		return 0;
4834 
4835 	start_offset = inv_params->range_invalidation ?
4836 			STLB_RANGE_CACHE_INVALIDATION_OFFSET : STLB_INV_ALL_START_OFFSET;
4837 
4838 	rc = hl_poll_timeout(
4839 		hdev,
4840 		stlb_base + start_offset,
4841 		status,
4842 		!(status & 0x1),
4843 		1000,
4844 		timeout_usec);
4845 
4846 	return rc;
4847 }
4848 
gaudi2_is_hmmu_enabled(struct hl_device * hdev,int dcore_id,int hmmu_id)4849 bool gaudi2_is_hmmu_enabled(struct hl_device *hdev, int dcore_id, int hmmu_id)
4850 {
4851 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4852 	u32 hw_cap;
4853 
4854 	hw_cap = HW_CAP_DCORE0_DMMU0 << (NUM_OF_HMMU_PER_DCORE * dcore_id + hmmu_id);
4855 
4856 	if (gaudi2->hw_cap_initialized & hw_cap)
4857 		return true;
4858 
4859 	return false;
4860 }
4861 
4862 /* this function shall be called only for HMMUs for which capability bit is set */
get_hmmu_stlb_base(int dcore_id,int hmmu_id)4863 static inline u32 get_hmmu_stlb_base(int dcore_id, int hmmu_id)
4864 {
4865 	u32 offset;
4866 
4867 	offset =  (u32) (dcore_id * DCORE_OFFSET + hmmu_id * DCORE_HMMU_OFFSET);
4868 	return (u32)(mmDCORE0_HMMU0_STLB_BASE + offset);
4869 }
4870 
gaudi2_mmu_invalidate_cache_trigger(struct hl_device * hdev,u32 stlb_base,struct gaudi2_cache_invld_params * inv_params)4871 static void gaudi2_mmu_invalidate_cache_trigger(struct hl_device *hdev, u32 stlb_base,
4872 						struct gaudi2_cache_invld_params *inv_params)
4873 {
4874 	u32 start_offset;
4875 
4876 	if (inv_params->range_invalidation) {
4877 		/* Set the addresses range
4878 		 * Note: that the start address we set in register, is not included in
4879 		 * the range of the invalidation, by design.
4880 		 * that's why we need to set lower address than the one we actually
4881 		 * want to be included in the range invalidation.
4882 		 */
4883 		u64 start = inv_params->start_va - 1;
4884 
4885 		start_offset = STLB_RANGE_CACHE_INVALIDATION_OFFSET;
4886 
4887 		WREG32(stlb_base + STLB_RANGE_INV_START_LSB_OFFSET,
4888 				start >> MMU_RANGE_INV_VA_LSB_SHIFT);
4889 
4890 		WREG32(stlb_base + STLB_RANGE_INV_START_MSB_OFFSET,
4891 				start >> MMU_RANGE_INV_VA_MSB_SHIFT);
4892 
4893 		WREG32(stlb_base + STLB_RANGE_INV_END_LSB_OFFSET,
4894 				inv_params->end_va >> MMU_RANGE_INV_VA_LSB_SHIFT);
4895 
4896 		WREG32(stlb_base + STLB_RANGE_INV_END_MSB_OFFSET,
4897 				inv_params->end_va >> MMU_RANGE_INV_VA_MSB_SHIFT);
4898 	} else {
4899 		start_offset = STLB_INV_ALL_START_OFFSET;
4900 	}
4901 
4902 	gaudi2_mmu_send_invalidate_cache_cmd(hdev, stlb_base, start_offset,
4903 						inv_params->inv_start_val, inv_params->flags);
4904 }
4905 
gaudi2_hmmu_invalidate_cache_trigger(struct hl_device * hdev,int dcore_id,int hmmu_id,struct gaudi2_cache_invld_params * inv_params)4906 static inline void gaudi2_hmmu_invalidate_cache_trigger(struct hl_device *hdev,
4907 						int dcore_id, int hmmu_id,
4908 						struct gaudi2_cache_invld_params *inv_params)
4909 {
4910 	u32 stlb_base = get_hmmu_stlb_base(dcore_id, hmmu_id);
4911 
4912 	gaudi2_mmu_invalidate_cache_trigger(hdev, stlb_base, inv_params);
4913 }
4914 
gaudi2_hmmu_invalidate_cache_status_poll(struct hl_device * hdev,int dcore_id,int hmmu_id,struct gaudi2_cache_invld_params * inv_params)4915 static inline int gaudi2_hmmu_invalidate_cache_status_poll(struct hl_device *hdev,
4916 						int dcore_id, int hmmu_id,
4917 						struct gaudi2_cache_invld_params *inv_params)
4918 {
4919 	u32 stlb_base = get_hmmu_stlb_base(dcore_id, hmmu_id);
4920 
4921 	return gaudi2_mmu_invalidate_cache_status_poll(hdev, stlb_base, inv_params);
4922 }
4923 
gaudi2_hmmus_invalidate_cache(struct hl_device * hdev,struct gaudi2_cache_invld_params * inv_params)4924 static int gaudi2_hmmus_invalidate_cache(struct hl_device *hdev,
4925 						struct gaudi2_cache_invld_params *inv_params)
4926 {
4927 	int dcore_id, hmmu_id;
4928 
4929 	/* first send all invalidation commands */
4930 	for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
4931 		for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE ; hmmu_id++) {
4932 			if (!gaudi2_is_hmmu_enabled(hdev, dcore_id, hmmu_id))
4933 				continue;
4934 
4935 			gaudi2_hmmu_invalidate_cache_trigger(hdev, dcore_id, hmmu_id, inv_params);
4936 		}
4937 	}
4938 
4939 	/* next, poll all invalidations status */
4940 	for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
4941 		for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE ; hmmu_id++) {
4942 			int rc;
4943 
4944 			if (!gaudi2_is_hmmu_enabled(hdev, dcore_id, hmmu_id))
4945 				continue;
4946 
4947 			rc = gaudi2_hmmu_invalidate_cache_status_poll(hdev, dcore_id, hmmu_id,
4948 										inv_params);
4949 			if (rc)
4950 				return rc;
4951 		}
4952 	}
4953 
4954 	return 0;
4955 }
4956 
gaudi2_mmu_invalidate_cache(struct hl_device * hdev,bool is_hard,u32 flags)4957 static int gaudi2_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)
4958 {
4959 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4960 	struct gaudi2_cache_invld_params invld_params;
4961 	int rc = 0;
4962 
4963 	if (hdev->reset_info.hard_reset_pending)
4964 		return rc;
4965 
4966 	invld_params.range_invalidation = false;
4967 	invld_params.inv_start_val = 1;
4968 
4969 	if ((flags & MMU_OP_USERPTR) && (gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
4970 		invld_params.flags = flags;
4971 		gaudi2_mmu_invalidate_cache_trigger(hdev, mmPMMU_HBW_STLB_BASE, &invld_params);
4972 		rc = gaudi2_mmu_invalidate_cache_status_poll(hdev, mmPMMU_HBW_STLB_BASE,
4973 										&invld_params);
4974 	} else if (flags & MMU_OP_PHYS_PACK) {
4975 		invld_params.flags = 0;
4976 		rc = gaudi2_hmmus_invalidate_cache(hdev, &invld_params);
4977 	}
4978 
4979 	return rc;
4980 }
4981 
gaudi2_mmu_invalidate_cache_range(struct hl_device * hdev,bool is_hard,u32 flags,u32 asid,u64 va,u64 size)4982 static int gaudi2_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard,
4983 				u32 flags, u32 asid, u64 va, u64 size)
4984 {
4985 	struct gaudi2_cache_invld_params invld_params = {0};
4986 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4987 	u64 start_va, end_va;
4988 	u32 inv_start_val;
4989 	int rc = 0;
4990 
4991 	if (hdev->reset_info.hard_reset_pending)
4992 		return 0;
4993 
4994 	inv_start_val = (1 << MMU_RANGE_INV_EN_SHIFT |
4995 			1 << MMU_RANGE_INV_ASID_EN_SHIFT |
4996 			asid << MMU_RANGE_INV_ASID_SHIFT);
4997 	start_va = va;
4998 	end_va = start_va + size;
4999 
5000 	if ((flags & MMU_OP_USERPTR) && (gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
5001 		/* As range invalidation does not support zero address we will
5002 		 * do full invalidation in this case
5003 		 */
5004 		if (start_va) {
5005 			invld_params.range_invalidation = true;
5006 			invld_params.start_va = start_va;
5007 			invld_params.end_va = end_va;
5008 			invld_params.inv_start_val = inv_start_val;
5009 			invld_params.flags = flags | MMU_OP_CLEAR_MEMCACHE;
5010 		} else {
5011 			invld_params.range_invalidation = false;
5012 			invld_params.inv_start_val = 1;
5013 			invld_params.flags = flags;
5014 		}
5015 
5016 
5017 		gaudi2_mmu_invalidate_cache_trigger(hdev, mmPMMU_HBW_STLB_BASE, &invld_params);
5018 		rc = gaudi2_mmu_invalidate_cache_status_poll(hdev, mmPMMU_HBW_STLB_BASE,
5019 										&invld_params);
5020 		if (rc)
5021 			return rc;
5022 
5023 	} else if (flags & MMU_OP_PHYS_PACK) {
5024 		invld_params.start_va = gaudi2_mmu_scramble_addr(hdev, start_va);
5025 		invld_params.end_va = gaudi2_mmu_scramble_addr(hdev, end_va);
5026 		invld_params.inv_start_val = inv_start_val;
5027 		invld_params.flags = flags;
5028 		rc = gaudi2_hmmus_invalidate_cache(hdev, &invld_params);
5029 	}
5030 
5031 	return rc;
5032 }
5033 
gaudi2_mmu_update_hop0_addr(struct hl_device * hdev,u32 stlb_base)5034 static int gaudi2_mmu_update_hop0_addr(struct hl_device *hdev, u32 stlb_base)
5035 {
5036 	struct asic_fixed_properties *prop = &hdev->asic_prop;
5037 	u64 hop0_addr;
5038 	u32 asid, max_asid = prop->max_asid;
5039 	int rc;
5040 
5041 	/* it takes too much time to init all of the ASIDs on palladium */
5042 	if (hdev->pldm)
5043 		max_asid = min((u32) 8, max_asid);
5044 
5045 	for (asid = 0 ; asid < max_asid ; asid++) {
5046 		hop0_addr = hdev->mmu_priv.hr.mmu_asid_hop0[asid].phys_addr;
5047 		rc = gaudi2_mmu_update_asid_hop0_addr(hdev, stlb_base, asid, hop0_addr);
5048 		if (rc) {
5049 			dev_err(hdev->dev, "failed to set hop0 addr for asid %d\n", asid);
5050 			return rc;
5051 		}
5052 	}
5053 
5054 	return 0;
5055 }
5056 
gaudi2_mmu_init_common(struct hl_device * hdev,u32 mmu_base,u32 stlb_base)5057 static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base, u32 stlb_base)
5058 {
5059 	u32 status, timeout_usec;
5060 	int rc;
5061 
5062 	if (hdev->pldm || !hdev->pdev)
5063 		timeout_usec = GAUDI2_PLDM_MMU_TIMEOUT_USEC;
5064 	else
5065 		timeout_usec = GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC;
5066 
5067 	WREG32(stlb_base + STLB_INV_ALL_START_OFFSET, 1);
5068 
5069 	rc = hl_poll_timeout(
5070 		hdev,
5071 		stlb_base + STLB_SRAM_INIT_OFFSET,
5072 		status,
5073 		!status,
5074 		1000,
5075 		timeout_usec);
5076 
5077 	if (rc)
5078 		dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU SRAM init\n");
5079 
5080 	rc = gaudi2_mmu_update_hop0_addr(hdev, stlb_base);
5081 	if (rc)
5082 		return rc;
5083 
5084 	WREG32(mmu_base + MMU_BYPASS_OFFSET, 0);
5085 
5086 	rc = hl_poll_timeout(
5087 		hdev,
5088 		stlb_base + STLB_INV_ALL_START_OFFSET,
5089 		status,
5090 		!status,
5091 		1000,
5092 		timeout_usec);
5093 
5094 	if (rc)
5095 		dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU invalidate all\n");
5096 
5097 	WREG32(mmu_base + MMU_ENABLE_OFFSET, 1);
5098 
5099 	return rc;
5100 }
5101 
gaudi2_pci_mmu_init(struct hl_device * hdev)5102 static int gaudi2_pci_mmu_init(struct hl_device *hdev)
5103 {
5104 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5105 	u32 mmu_base, stlb_base;
5106 	int rc;
5107 
5108 	if (gaudi2->hw_cap_initialized & HW_CAP_PMMU)
5109 		return 0;
5110 
5111 	mmu_base = mmPMMU_HBW_MMU_BASE;
5112 	stlb_base = mmPMMU_HBW_STLB_BASE;
5113 
5114 	RMWREG32(stlb_base + STLB_HOP_CONFIGURATION_OFFSET,
5115 		(0 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_HOP_SHIFT) |
5116 		(5 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_SHIFT) |
5117 		(4 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_SHIFT) |
5118 		(5 << PMMU_HBW_STLB_HOP_CONFIGURATION_LAST_HOP_SHIFT) |
5119 		(5 << PMMU_HBW_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_SHIFT),
5120 		PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK |
5121 		PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK |
5122 		PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK |
5123 		PMMU_HBW_STLB_HOP_CONFIGURATION_LAST_HOP_MASK |
5124 		PMMU_HBW_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK);
5125 
5126 	WREG32(stlb_base + STLB_LL_LOOKUP_MASK_63_32_OFFSET, 0);
5127 
5128 	if (PAGE_SIZE == SZ_64K) {
5129 		/* Set page sizes to 64K on hop5 and 16M on hop4 + enable 8 bit hops */
5130 		RMWREG32(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET,
5131 			FIELD_PREP(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP5_PAGE_SIZE_MASK, 4) |
5132 			FIELD_PREP(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK, 3) |
5133 			FIELD_PREP(
5134 				DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_CFG_8_BITS_HOP_MODE_EN_MASK,
5135 				1),
5136 			DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP5_PAGE_SIZE_MASK |
5137 			DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK |
5138 			DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_CFG_8_BITS_HOP_MODE_EN_MASK);
5139 	}
5140 
5141 	WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_PMMU_SPI_SEI_ENABLE_MASK);
5142 
5143 	rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base);
5144 	if (rc)
5145 		return rc;
5146 
5147 	gaudi2->hw_cap_initialized |= HW_CAP_PMMU;
5148 
5149 	return 0;
5150 }
5151 
gaudi2_dcore_hmmu_init(struct hl_device * hdev,int dcore_id,int hmmu_id)5152 static int gaudi2_dcore_hmmu_init(struct hl_device *hdev, int dcore_id,
5153 				int hmmu_id)
5154 {
5155 	struct asic_fixed_properties *prop = &hdev->asic_prop;
5156 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5157 	u32 offset, mmu_base, stlb_base, hw_cap;
5158 	u8 dmmu_seq;
5159 	int rc;
5160 
5161 	dmmu_seq = NUM_OF_HMMU_PER_DCORE * dcore_id + hmmu_id;
5162 	hw_cap = HW_CAP_DCORE0_DMMU0 << dmmu_seq;
5163 
5164 	/*
5165 	 * return if DMMU is already initialized or if it's not out of
5166 	 * isolation (due to cluster binning)
5167 	 */
5168 	if ((gaudi2->hw_cap_initialized & hw_cap) || !(prop->hmmu_hif_enabled_mask & BIT(dmmu_seq)))
5169 		return 0;
5170 
5171 	offset = (u32) (dcore_id * DCORE_OFFSET + hmmu_id * DCORE_HMMU_OFFSET);
5172 	mmu_base = mmDCORE0_HMMU0_MMU_BASE + offset;
5173 	stlb_base = mmDCORE0_HMMU0_STLB_BASE + offset;
5174 
5175 	RMWREG32(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET, 5 /* 64MB */,
5176 			MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK);
5177 
5178 	RMWREG32(stlb_base + STLB_HOP_CONFIGURATION_OFFSET,
5179 		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK, 0) |
5180 		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK, 3) |
5181 		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK, 3) |
5182 		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_LAST_HOP_MASK, 3) |
5183 		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK, 3),
5184 			DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK |
5185 			DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK |
5186 			DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK |
5187 			DCORE0_HMMU0_STLB_HOP_CONFIGURATION_LAST_HOP_MASK |
5188 			DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK);
5189 
5190 	RMWREG32(stlb_base + STLB_HOP_CONFIGURATION_OFFSET, 1,
5191 			STLB_HOP_CONFIGURATION_ONLY_LARGE_PAGE_MASK);
5192 
5193 	WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_HMMU_SPI_SEI_ENABLE_MASK);
5194 
5195 	rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base);
5196 	if (rc)
5197 		return rc;
5198 
5199 	gaudi2->hw_cap_initialized |= hw_cap;
5200 
5201 	return 0;
5202 }
5203 
gaudi2_hbm_mmu_init(struct hl_device * hdev)5204 static int gaudi2_hbm_mmu_init(struct hl_device *hdev)
5205 {
5206 	int rc, dcore_id, hmmu_id;
5207 
5208 	for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
5209 		for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE; hmmu_id++) {
5210 			rc = gaudi2_dcore_hmmu_init(hdev, dcore_id, hmmu_id);
5211 			if (rc)
5212 				return rc;
5213 		}
5214 
5215 	return 0;
5216 }
5217 
gaudi2_mmu_init(struct hl_device * hdev)5218 static int gaudi2_mmu_init(struct hl_device *hdev)
5219 {
5220 	int rc;
5221 
5222 	rc = gaudi2_pci_mmu_init(hdev);
5223 	if (rc)
5224 		return rc;
5225 
5226 	rc = gaudi2_hbm_mmu_init(hdev);
5227 	if (rc)
5228 		return rc;
5229 
5230 	return 0;
5231 }
5232 
gaudi2_hw_init(struct hl_device * hdev)5233 static int gaudi2_hw_init(struct hl_device *hdev)
5234 {
5235 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5236 	int rc;
5237 
5238 	/* Let's mark in the H/W that we have reached this point. We check
5239 	 * this value in the reset_before_init function to understand whether
5240 	 * we need to reset the chip before doing H/W init. This register is
5241 	 * cleared by the H/W upon H/W reset
5242 	 */
5243 	WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
5244 
5245 	/* Perform read from the device to make sure device is up */
5246 	RREG32(mmHW_STATE);
5247 
5248 	/* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
5249 	 * So we set it here and if anyone tries to move it later to
5250 	 * a different address, there will be an error
5251 	 */
5252 	if (hdev->asic_prop.iatu_done_by_fw)
5253 		gaudi2->dram_bar_cur_addr = DRAM_PHYS_BASE;
5254 
5255 	/*
5256 	 * Before pushing u-boot/linux to device, need to set the hbm bar to
5257 	 * base address of dram
5258 	 */
5259 	if (gaudi2_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
5260 		dev_err(hdev->dev, "failed to map HBM bar to DRAM base address\n");
5261 		return -EIO;
5262 	}
5263 
5264 	rc = gaudi2_init_cpu(hdev);
5265 	if (rc) {
5266 		dev_err(hdev->dev, "failed to initialize CPU\n");
5267 		return rc;
5268 	}
5269 
5270 	gaudi2_init_msix_gw_table(hdev);
5271 
5272 	gaudi2_init_scrambler_hbm(hdev);
5273 	gaudi2_init_kdma(hdev);
5274 
5275 	rc = gaudi2_init_cpu_queues(hdev, GAUDI2_CPU_TIMEOUT_USEC);
5276 	if (rc) {
5277 		dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n", rc);
5278 		return rc;
5279 	}
5280 
5281 	rc = gaudi2->cpucp_info_get(hdev);
5282 	if (rc) {
5283 		dev_err(hdev->dev, "Failed to get cpucp info\n");
5284 		return rc;
5285 	}
5286 
5287 	rc = gaudi2_mmu_init(hdev);
5288 	if (rc)
5289 		return rc;
5290 
5291 	gaudi2_init_pdma(hdev);
5292 	gaudi2_init_edma(hdev);
5293 	gaudi2_init_sm(hdev);
5294 	gaudi2_init_tpc(hdev);
5295 	gaudi2_init_mme(hdev);
5296 	gaudi2_init_rotator(hdev);
5297 	gaudi2_init_dec(hdev);
5298 	gaudi2_enable_timestamp(hdev);
5299 
5300 	rc = gaudi2_coresight_init(hdev);
5301 	if (rc)
5302 		goto disable_queues;
5303 
5304 	rc = gaudi2_enable_msix(hdev);
5305 	if (rc)
5306 		goto disable_queues;
5307 
5308 	/* Perform read from the device to flush all configuration */
5309 	RREG32(mmHW_STATE);
5310 
5311 	return 0;
5312 
5313 disable_queues:
5314 	gaudi2_disable_dma_qmans(hdev);
5315 	gaudi2_disable_mme_qmans(hdev);
5316 	gaudi2_disable_tpc_qmans(hdev);
5317 	gaudi2_disable_rot_qmans(hdev);
5318 	gaudi2_disable_nic_qmans(hdev);
5319 
5320 	gaudi2_disable_timestamp(hdev);
5321 
5322 	return rc;
5323 }
5324 
5325 /**
5326  * gaudi2_send_hard_reset_cmd - common function to handle reset
5327  *
5328  * @hdev: pointer to the habanalabs device structure
5329  *
5330  * This function handles the various possible scenarios for reset.
5331  * It considers if reset is handled by driver\FW and what FW components are loaded
5332  */
gaudi2_send_hard_reset_cmd(struct hl_device * hdev)5333 static void gaudi2_send_hard_reset_cmd(struct hl_device *hdev)
5334 {
5335 	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
5336 	bool heartbeat_reset, preboot_only, cpu_initialized = false;
5337 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5338 	u32 cpu_boot_status;
5339 
5340 	preboot_only = (hdev->fw_loader.fw_comp_loaded == FW_TYPE_PREBOOT_CPU);
5341 	heartbeat_reset = (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT);
5342 
5343 	/*
5344 	 * Handle corner case where failure was at cpu management app load,
5345 	 * and driver didn't detect any failure while loading the FW,
5346 	 * then at such scenario driver will send only HALT_MACHINE
5347 	 * and no one will respond to this request since FW already back to preboot
5348 	 * and it cannot handle such cmd.
5349 	 * In this case next time the management app loads it'll check on events register
5350 	 * which will still have the halt indication, and will reboot the device.
5351 	 * The solution is to let preboot clear all relevant registers before next boot
5352 	 * once driver send COMMS_RST_DEV.
5353 	 */
5354 	cpu_boot_status = RREG32(mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS);
5355 
5356 	if (gaudi2 && (gaudi2->hw_cap_initialized & HW_CAP_CPU) &&
5357 			(cpu_boot_status == CPU_BOOT_STATUS_SRAM_AVAIL))
5358 		cpu_initialized = true;
5359 
5360 	/*
5361 	 * when Linux/Bootfit exist this write to the SP can be interpreted in 2 ways:
5362 	 * 1. FW reset: FW initiate the reset sequence
5363 	 * 2. driver reset: FW will start HALT sequence (the preparations for the
5364 	 *                  reset but not the reset itself as it is not implemented
5365 	 *                  on their part) and LKD will wait to let FW complete the
5366 	 *                  sequence before issuing the reset
5367 	 */
5368 	if (!preboot_only && cpu_initialized) {
5369 		WREG32(le32_to_cpu(dyn_regs->gic_host_halt_irq),
5370 			gaudi2_irq_map_table[GAUDI2_EVENT_CPU_HALT_MACHINE].cpu_id);
5371 
5372 		msleep(GAUDI2_CPU_RESET_WAIT_MSEC);
5373 	}
5374 
5375 	/*
5376 	 * When working with preboot (without Linux/Boot fit) we can
5377 	 * communicate only using the COMMS commands to issue halt/reset.
5378 	 *
5379 	 * For the case in which we are working with Linux/Bootfit this is a hail-mary
5380 	 * attempt to revive the card in the small chance that the f/w has
5381 	 * experienced a watchdog event, which caused it to return back to preboot.
5382 	 * In that case, triggering reset through GIC won't help. We need to
5383 	 * trigger the reset as if Linux wasn't loaded.
5384 	 *
5385 	 * We do it only if the reset cause was HB, because that would be the
5386 	 * indication of such an event.
5387 	 *
5388 	 * In case watchdog hasn't expired but we still got HB, then this won't
5389 	 * do any damage.
5390 	 */
5391 
5392 	if (heartbeat_reset || preboot_only || !cpu_initialized) {
5393 		if (hdev->asic_prop.hard_reset_done_by_fw)
5394 			hl_fw_ask_hard_reset_without_linux(hdev);
5395 		else
5396 			hl_fw_ask_halt_machine_without_linux(hdev);
5397 	}
5398 }
5399 
5400 /**
5401  * gaudi2_execute_hard_reset - execute hard reset by driver/FW
5402  *
5403  * @hdev: pointer to the habanalabs device structure
5404  * @reset_sleep_ms: sleep time in msec after reset
5405  *
5406  * This function executes hard reset based on if driver/FW should do the reset
5407  */
gaudi2_execute_hard_reset(struct hl_device * hdev,u32 reset_sleep_ms)5408 static void gaudi2_execute_hard_reset(struct hl_device *hdev, u32 reset_sleep_ms)
5409 {
5410 	if (hdev->asic_prop.hard_reset_done_by_fw) {
5411 		gaudi2_send_hard_reset_cmd(hdev);
5412 		return;
5413 	}
5414 
5415 	/* Set device to handle FLR by H/W as we will put the device
5416 	 * CPU to halt mode
5417 	 */
5418 	WREG32(mmPCIE_AUX_FLR_CTRL,
5419 			(PCIE_AUX_FLR_CTRL_HW_CTRL_MASK | PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
5420 
5421 	gaudi2_send_hard_reset_cmd(hdev);
5422 
5423 	WREG32(mmPSOC_RESET_CONF_SW_ALL_RST, 1);
5424 }
5425 
5426 /**
5427  * gaudi2_execute_soft_reset - execute soft reset by driver/FW
5428  *
5429  * @hdev: pointer to the habanalabs device structure
5430  * @reset_sleep_ms: sleep time in msec after reset
5431  * @driver_performs_reset: true if driver should perform reset instead of f/w.
5432  *
5433  * This function executes soft reset based on if driver/FW should do the reset
5434  */
gaudi2_execute_soft_reset(struct hl_device * hdev,u32 reset_sleep_ms,bool driver_performs_reset)5435 static void gaudi2_execute_soft_reset(struct hl_device *hdev, u32 reset_sleep_ms,
5436 						bool driver_performs_reset)
5437 {
5438 	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
5439 
5440 	if (!driver_performs_reset) {
5441 		/* set SP to indicate reset request sent to FW */
5442 		if (dyn_regs->cpu_rst_status)
5443 			WREG32(le32_to_cpu(dyn_regs->cpu_rst_status), CPU_RST_STATUS_NA);
5444 		else
5445 			WREG32(mmCPU_RST_STATUS_TO_HOST, CPU_RST_STATUS_NA);
5446 
5447 		WREG32(le32_to_cpu(dyn_regs->gic_host_soft_rst_irq),
5448 			gaudi2_irq_map_table[GAUDI2_EVENT_CPU_SOFT_RESET].cpu_id);
5449 		return;
5450 	}
5451 
5452 	/* Block access to engines, QMANs and SM during reset, these
5453 	 * RRs will be reconfigured after soft reset.
5454 	 * PCIE_MSIX is left unsecured to allow NIC packets processing during the reset.
5455 	 */
5456 	gaudi2_write_rr_to_all_lbw_rtrs(hdev, RR_TYPE_LONG, NUM_LONG_LBW_RR - 1,
5457 					mmDCORE0_TPC0_QM_DCCM_BASE, mmPCIE_MSIX_BASE);
5458 
5459 	gaudi2_write_rr_to_all_lbw_rtrs(hdev, RR_TYPE_LONG, NUM_LONG_LBW_RR - 2,
5460 				mmPCIE_MSIX_BASE + HL_BLOCK_SIZE,
5461 				mmPCIE_VDEC1_MSTR_IF_RR_SHRD_HBW_BASE + HL_BLOCK_SIZE);
5462 
5463 	WREG32(mmPSOC_RESET_CONF_SOFT_RST, 1);
5464 }
5465 
gaudi2_poll_btm_indication(struct hl_device * hdev,u32 reset_sleep_ms,u32 poll_timeout_us)5466 static void gaudi2_poll_btm_indication(struct hl_device *hdev, u32 reset_sleep_ms,
5467 								u32 poll_timeout_us)
5468 {
5469 	int i, rc = 0;
5470 	u32 reg_val;
5471 
5472 	/* without this sleep reset will not work */
5473 	msleep(reset_sleep_ms);
5474 
5475 	/* We poll the BTM done indication multiple times after reset due to
5476 	 * a HW errata 'GAUDI2_0300'
5477 	 */
5478 	for (i = 0 ; i < GAUDI2_RESET_POLL_CNT ; i++)
5479 		rc = hl_poll_timeout(
5480 			hdev,
5481 			mmPSOC_GLOBAL_CONF_BTM_FSM,
5482 			reg_val,
5483 			reg_val == 0,
5484 			1000,
5485 			poll_timeout_us);
5486 
5487 	if (rc)
5488 		dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", reg_val);
5489 }
5490 
gaudi2_get_soft_rst_done_indication(struct hl_device * hdev,u32 poll_timeout_us)5491 static void gaudi2_get_soft_rst_done_indication(struct hl_device *hdev, u32 poll_timeout_us)
5492 {
5493 	int i, rc = 0;
5494 	u32 reg_val;
5495 
5496 	for (i = 0 ; i < GAUDI2_RESET_POLL_CNT ; i++)
5497 		rc = hl_poll_timeout(
5498 			hdev,
5499 			mmCPU_RST_STATUS_TO_HOST,
5500 			reg_val,
5501 			reg_val == CPU_RST_STATUS_SOFT_RST_DONE,
5502 			1000,
5503 			poll_timeout_us);
5504 
5505 	if (rc)
5506 		dev_err(hdev->dev, "Timeout while waiting for FW to complete soft reset (0x%x)\n",
5507 				reg_val);
5508 }
5509 
gaudi2_hw_fini(struct hl_device * hdev,bool hard_reset,bool fw_reset)5510 static void gaudi2_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
5511 {
5512 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5513 	u32 poll_timeout_us, reset_sleep_ms;
5514 	bool driver_performs_reset = false;
5515 
5516 	if (hdev->pldm) {
5517 		reset_sleep_ms = hard_reset ? GAUDI2_PLDM_HRESET_TIMEOUT_MSEC :
5518 						GAUDI2_PLDM_SRESET_TIMEOUT_MSEC;
5519 		poll_timeout_us = GAUDI2_PLDM_RESET_POLL_TIMEOUT_USEC;
5520 	} else {
5521 		reset_sleep_ms = GAUDI2_RESET_TIMEOUT_MSEC;
5522 		poll_timeout_us = GAUDI2_RESET_POLL_TIMEOUT_USEC;
5523 	}
5524 
5525 	if (fw_reset)
5526 		goto skip_reset;
5527 
5528 	gaudi2_reset_arcs(hdev);
5529 
5530 	if (hard_reset) {
5531 		driver_performs_reset = !hdev->asic_prop.hard_reset_done_by_fw;
5532 		gaudi2_execute_hard_reset(hdev, reset_sleep_ms);
5533 	} else {
5534 		/*
5535 		 * As we have to support also work with preboot only (which does not supports
5536 		 * soft reset) we have to make sure that security is disabled before letting driver
5537 		 * do the reset. user shall control the BFE flags to avoid asking soft reset in
5538 		 * secured device with preboot only.
5539 		 */
5540 		driver_performs_reset = (hdev->fw_components == FW_TYPE_PREBOOT_CPU &&
5541 							!hdev->asic_prop.fw_security_enabled);
5542 		gaudi2_execute_soft_reset(hdev, reset_sleep_ms, driver_performs_reset);
5543 	}
5544 
5545 skip_reset:
5546 	if (driver_performs_reset || hard_reset)
5547 		gaudi2_poll_btm_indication(hdev, reset_sleep_ms, poll_timeout_us);
5548 	else
5549 		gaudi2_get_soft_rst_done_indication(hdev, poll_timeout_us);
5550 
5551 	if (!gaudi2)
5552 		return;
5553 
5554 	gaudi2->dec_hw_cap_initialized &= ~(HW_CAP_DEC_MASK);
5555 	gaudi2->tpc_hw_cap_initialized &= ~(HW_CAP_TPC_MASK);
5556 
5557 	/*
5558 	 * Clear NIC capability mask in order for driver to re-configure
5559 	 * NIC QMANs. NIC ports will not be re-configured during soft
5560 	 * reset as we call gaudi2_nic_init only during hard reset
5561 	 */
5562 	gaudi2->nic_hw_cap_initialized &= ~(HW_CAP_NIC_MASK);
5563 
5564 	if (hard_reset) {
5565 		gaudi2->hw_cap_initialized &=
5566 			~(HW_CAP_DRAM | HW_CAP_CLK_GATE | HW_CAP_HBM_SCRAMBLER_MASK |
5567 			HW_CAP_PMMU | HW_CAP_CPU | HW_CAP_CPU_Q |
5568 			HW_CAP_SRAM_SCRAMBLER | HW_CAP_DMMU_MASK |
5569 			HW_CAP_PDMA_MASK | HW_CAP_EDMA_MASK | HW_CAP_KDMA |
5570 			HW_CAP_MME_MASK | HW_CAP_ROT_MASK);
5571 
5572 		memset(gaudi2->events_stat, 0, sizeof(gaudi2->events_stat));
5573 	} else {
5574 		gaudi2->hw_cap_initialized &=
5575 			~(HW_CAP_CLK_GATE | HW_CAP_HBM_SCRAMBLER_SW_RESET |
5576 			HW_CAP_PDMA_MASK | HW_CAP_EDMA_MASK | HW_CAP_MME_MASK |
5577 			HW_CAP_ROT_MASK);
5578 	}
5579 }
5580 
gaudi2_suspend(struct hl_device * hdev)5581 static int gaudi2_suspend(struct hl_device *hdev)
5582 {
5583 	int rc;
5584 
5585 	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
5586 	if (rc)
5587 		dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
5588 
5589 	return rc;
5590 }
5591 
gaudi2_resume(struct hl_device * hdev)5592 static int gaudi2_resume(struct hl_device *hdev)
5593 {
5594 	return gaudi2_init_iatu(hdev);
5595 }
5596 
gaudi2_mmap(struct hl_device * hdev,struct vm_area_struct * vma,void * cpu_addr,dma_addr_t dma_addr,size_t size)5597 static int gaudi2_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
5598 		void *cpu_addr, dma_addr_t dma_addr, size_t size)
5599 {
5600 	int rc;
5601 
5602 	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
5603 			VM_DONTCOPY | VM_NORESERVE;
5604 
5605 #ifdef _HAS_DMA_MMAP_COHERENT
5606 
5607 	rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size);
5608 	if (rc)
5609 		dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
5610 
5611 #else
5612 
5613 	rc = remap_pfn_range(vma, vma->vm_start,
5614 				virt_to_phys(cpu_addr) >> PAGE_SHIFT,
5615 				size, vma->vm_page_prot);
5616 	if (rc)
5617 		dev_err(hdev->dev, "remap_pfn_range error %d", rc);
5618 
5619 #endif
5620 
5621 	return rc;
5622 }
5623 
gaudi2_is_queue_enabled(struct hl_device * hdev,u32 hw_queue_id)5624 static bool gaudi2_is_queue_enabled(struct hl_device *hdev, u32 hw_queue_id)
5625 {
5626 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5627 	u64 hw_cap_mask = 0;
5628 	u64 hw_tpc_cap_bit = 0;
5629 	u64 hw_nic_cap_bit = 0;
5630 	u64 hw_test_cap_bit = 0;
5631 
5632 	switch (hw_queue_id) {
5633 	case GAUDI2_QUEUE_ID_PDMA_0_0:
5634 	case GAUDI2_QUEUE_ID_PDMA_0_1:
5635 	case GAUDI2_QUEUE_ID_PDMA_1_0:
5636 		hw_cap_mask = HW_CAP_PDMA_MASK;
5637 		break;
5638 	case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3:
5639 		hw_test_cap_bit = HW_CAP_EDMA_SHIFT +
5640 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0) >> 2);
5641 		break;
5642 	case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3:
5643 		hw_test_cap_bit = HW_CAP_EDMA_SHIFT + NUM_OF_EDMA_PER_DCORE +
5644 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0) >> 2);
5645 		break;
5646 	case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3:
5647 		hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 2 * NUM_OF_EDMA_PER_DCORE +
5648 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0) >> 2);
5649 		break;
5650 	case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3:
5651 		hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 3 * NUM_OF_EDMA_PER_DCORE +
5652 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0) >> 2);
5653 		break;
5654 
5655 	case GAUDI2_QUEUE_ID_DCORE0_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE0_MME_0_3:
5656 		hw_test_cap_bit = HW_CAP_MME_SHIFT;
5657 		break;
5658 
5659 	case GAUDI2_QUEUE_ID_DCORE1_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE1_MME_0_3:
5660 		hw_test_cap_bit = HW_CAP_MME_SHIFT + 1;
5661 		break;
5662 
5663 	case GAUDI2_QUEUE_ID_DCORE2_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE2_MME_0_3:
5664 		hw_test_cap_bit = HW_CAP_MME_SHIFT + 2;
5665 		break;
5666 
5667 	case GAUDI2_QUEUE_ID_DCORE3_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE3_MME_0_3:
5668 		hw_test_cap_bit = HW_CAP_MME_SHIFT + 3;
5669 		break;
5670 
5671 	case GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_5_3:
5672 		hw_tpc_cap_bit = HW_CAP_TPC_SHIFT +
5673 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE0_TPC_0_0) >> 2);
5674 
5675 		/* special case where cap bit refers to the first queue id */
5676 		if (!hw_tpc_cap_bit)
5677 			return !!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(0));
5678 		break;
5679 
5680 	case GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE1_TPC_5_3:
5681 		hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + NUM_OF_TPC_PER_DCORE +
5682 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE1_TPC_0_0) >> 2);
5683 		break;
5684 
5685 	case GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE2_TPC_5_3:
5686 		hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (2 * NUM_OF_TPC_PER_DCORE) +
5687 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE2_TPC_0_0) >> 2);
5688 		break;
5689 
5690 	case GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE3_TPC_5_3:
5691 		hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (3 * NUM_OF_TPC_PER_DCORE) +
5692 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE3_TPC_0_0) >> 2);
5693 		break;
5694 
5695 	case GAUDI2_QUEUE_ID_DCORE0_TPC_6_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_6_3:
5696 		hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (4 * NUM_OF_TPC_PER_DCORE);
5697 		break;
5698 
5699 	case GAUDI2_QUEUE_ID_ROT_0_0 ... GAUDI2_QUEUE_ID_ROT_1_3:
5700 		hw_test_cap_bit = HW_CAP_ROT_SHIFT + ((hw_queue_id - GAUDI2_QUEUE_ID_ROT_0_0) >> 2);
5701 		break;
5702 
5703 	case GAUDI2_QUEUE_ID_NIC_0_0 ... GAUDI2_QUEUE_ID_NIC_23_3:
5704 		hw_nic_cap_bit = HW_CAP_NIC_SHIFT + ((hw_queue_id - GAUDI2_QUEUE_ID_NIC_0_0) >> 2);
5705 
5706 		/* special case where cap bit refers to the first queue id */
5707 		if (!hw_nic_cap_bit)
5708 			return !!(gaudi2->nic_hw_cap_initialized & BIT_ULL(0));
5709 		break;
5710 
5711 	case GAUDI2_QUEUE_ID_CPU_PQ:
5712 		return !!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q);
5713 
5714 	default:
5715 		return false;
5716 	}
5717 
5718 	if (hw_tpc_cap_bit)
5719 		return  !!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(hw_tpc_cap_bit));
5720 
5721 	if (hw_nic_cap_bit)
5722 		return  !!(gaudi2->nic_hw_cap_initialized & BIT_ULL(hw_nic_cap_bit));
5723 
5724 	if (hw_test_cap_bit)
5725 		hw_cap_mask = BIT_ULL(hw_test_cap_bit);
5726 
5727 	return !!(gaudi2->hw_cap_initialized & hw_cap_mask);
5728 }
5729 
gaudi2_is_arc_enabled(struct hl_device * hdev,u64 arc_id)5730 static bool gaudi2_is_arc_enabled(struct hl_device *hdev, u64 arc_id)
5731 {
5732 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5733 
5734 	switch (arc_id) {
5735 	case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
5736 	case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
5737 		return !!(gaudi2->active_hw_arc & BIT_ULL(arc_id));
5738 
5739 	case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
5740 		return !!(gaudi2->active_tpc_arc & BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0));
5741 
5742 	case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
5743 		return !!(gaudi2->active_nic_arc & BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0));
5744 
5745 	default:
5746 		return false;
5747 	}
5748 }
5749 
gaudi2_clr_arc_id_cap(struct hl_device * hdev,u64 arc_id)5750 static void gaudi2_clr_arc_id_cap(struct hl_device *hdev, u64 arc_id)
5751 {
5752 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5753 
5754 	switch (arc_id) {
5755 	case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
5756 	case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
5757 		gaudi2->active_hw_arc &= ~(BIT_ULL(arc_id));
5758 		break;
5759 
5760 	case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
5761 		gaudi2->active_tpc_arc &= ~(BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0));
5762 		break;
5763 
5764 	case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
5765 		gaudi2->active_nic_arc &= ~(BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0));
5766 		break;
5767 
5768 	default:
5769 		return;
5770 	}
5771 }
5772 
gaudi2_set_arc_id_cap(struct hl_device * hdev,u64 arc_id)5773 static void gaudi2_set_arc_id_cap(struct hl_device *hdev, u64 arc_id)
5774 {
5775 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5776 
5777 	switch (arc_id) {
5778 	case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
5779 	case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
5780 		gaudi2->active_hw_arc |= BIT_ULL(arc_id);
5781 		break;
5782 
5783 	case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
5784 		gaudi2->active_tpc_arc |= BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0);
5785 		break;
5786 
5787 	case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
5788 		gaudi2->active_nic_arc |= BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0);
5789 		break;
5790 
5791 	default:
5792 		return;
5793 	}
5794 }
5795 
gaudi2_ring_doorbell(struct hl_device * hdev,u32 hw_queue_id,u32 pi)5796 static void gaudi2_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
5797 {
5798 	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
5799 	u32 pq_offset, reg_base, db_reg_offset, db_value;
5800 
5801 	if (hw_queue_id != GAUDI2_QUEUE_ID_CPU_PQ) {
5802 		/*
5803 		 * QMAN has 4 successive PQ_PI registers, 1 for each of the QMAN PQs.
5804 		 * Masking the H/W queue ID with 0x3 extracts the QMAN internal PQ
5805 		 * number.
5806 		 */
5807 		pq_offset = (hw_queue_id & 0x3) * 4;
5808 		reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
5809 		db_reg_offset = reg_base + QM_PQ_PI_0_OFFSET + pq_offset;
5810 	} else {
5811 		db_reg_offset = mmCPU_IF_PF_PQ_PI;
5812 	}
5813 
5814 	db_value = pi;
5815 
5816 	/* ring the doorbell */
5817 	WREG32(db_reg_offset, db_value);
5818 
5819 	if (hw_queue_id == GAUDI2_QUEUE_ID_CPU_PQ) {
5820 		/* make sure device CPU will read latest data from host */
5821 		mb();
5822 		WREG32(le32_to_cpu(dyn_regs->gic_host_pi_upd_irq),
5823 			gaudi2_irq_map_table[GAUDI2_EVENT_CPU_PI_UPDATE].cpu_id);
5824 	}
5825 }
5826 
gaudi2_pqe_write(struct hl_device * hdev,__le64 * pqe,struct hl_bd * bd)5827 static void gaudi2_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd)
5828 {
5829 	__le64 *pbd = (__le64 *) bd;
5830 
5831 	/* The QMANs are on the host memory so a simple copy suffice */
5832 	pqe[0] = pbd[0];
5833 	pqe[1] = pbd[1];
5834 }
5835 
gaudi2_dma_alloc_coherent(struct hl_device * hdev,size_t size,dma_addr_t * dma_handle,gfp_t flags)5836 static void *gaudi2_dma_alloc_coherent(struct hl_device *hdev, size_t size,
5837 				dma_addr_t *dma_handle, gfp_t flags)
5838 {
5839 	return dma_alloc_coherent(&hdev->pdev->dev, size, dma_handle, flags);
5840 }
5841 
gaudi2_dma_free_coherent(struct hl_device * hdev,size_t size,void * cpu_addr,dma_addr_t dma_handle)5842 static void gaudi2_dma_free_coherent(struct hl_device *hdev, size_t size,
5843 				void *cpu_addr, dma_addr_t dma_handle)
5844 {
5845 	dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, dma_handle);
5846 }
5847 
gaudi2_send_cpu_message(struct hl_device * hdev,u32 * msg,u16 len,u32 timeout,u64 * result)5848 static int gaudi2_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len,
5849 				u32 timeout, u64 *result)
5850 {
5851 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5852 
5853 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) {
5854 		if (result)
5855 			*result = 0;
5856 		return 0;
5857 	}
5858 
5859 	if (!timeout)
5860 		timeout = GAUDI2_MSG_TO_CPU_TIMEOUT_USEC;
5861 
5862 	return hl_fw_send_cpu_message(hdev, GAUDI2_QUEUE_ID_CPU_PQ, msg, len, timeout, result);
5863 }
5864 
gaudi2_dma_pool_zalloc(struct hl_device * hdev,size_t size,gfp_t mem_flags,dma_addr_t * dma_handle)5865 static void *gaudi2_dma_pool_zalloc(struct hl_device *hdev, size_t size,
5866 				gfp_t mem_flags, dma_addr_t *dma_handle)
5867 {
5868 	if (size > GAUDI2_DMA_POOL_BLK_SIZE)
5869 		return NULL;
5870 
5871 	return dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
5872 }
5873 
gaudi2_dma_pool_free(struct hl_device * hdev,void * vaddr,dma_addr_t dma_addr)5874 static void gaudi2_dma_pool_free(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr)
5875 {
5876 	dma_pool_free(hdev->dma_pool, vaddr, dma_addr);
5877 }
5878 
gaudi2_cpu_accessible_dma_pool_alloc(struct hl_device * hdev,size_t size,dma_addr_t * dma_handle)5879 static void *gaudi2_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
5880 						dma_addr_t *dma_handle)
5881 {
5882 	return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
5883 }
5884 
gaudi2_cpu_accessible_dma_pool_free(struct hl_device * hdev,size_t size,void * vaddr)5885 static void gaudi2_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr)
5886 {
5887 	hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
5888 }
5889 
gaudi2_dma_map_single(struct hl_device * hdev,void * addr,int len,enum dma_data_direction dir)5890 static dma_addr_t gaudi2_dma_map_single(struct hl_device *hdev, void *addr, int len,
5891 					enum dma_data_direction dir)
5892 {
5893 	dma_addr_t dma_addr;
5894 
5895 	dma_addr = dma_map_single(&hdev->pdev->dev, addr, len, dir);
5896 	if (unlikely(dma_mapping_error(&hdev->pdev->dev, dma_addr)))
5897 		return 0;
5898 
5899 	return dma_addr;
5900 }
5901 
gaudi2_dma_unmap_single(struct hl_device * hdev,dma_addr_t addr,int len,enum dma_data_direction dir)5902 static void gaudi2_dma_unmap_single(struct hl_device *hdev, dma_addr_t addr, int len,
5903 					enum dma_data_direction dir)
5904 {
5905 	dma_unmap_single(&hdev->pdev->dev, addr, len, dir);
5906 }
5907 
gaudi2_validate_cb_address(struct hl_device * hdev,struct hl_cs_parser * parser)5908 static int gaudi2_validate_cb_address(struct hl_device *hdev, struct hl_cs_parser *parser)
5909 {
5910 	struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5911 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5912 
5913 	if (!gaudi2_is_queue_enabled(hdev, parser->hw_queue_id)) {
5914 		dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
5915 		return -EINVAL;
5916 	}
5917 
5918 	/* Just check if CB address is valid */
5919 
5920 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5921 					parser->user_cb_size,
5922 					asic_prop->sram_user_base_address,
5923 					asic_prop->sram_end_address))
5924 		return 0;
5925 
5926 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5927 					parser->user_cb_size,
5928 					asic_prop->dram_user_base_address,
5929 					asic_prop->dram_end_address))
5930 		return 0;
5931 
5932 	if ((gaudi2->hw_cap_initialized & HW_CAP_DMMU_MASK) &&
5933 		hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5934 						parser->user_cb_size,
5935 						asic_prop->dmmu.start_addr,
5936 						asic_prop->dmmu.end_addr))
5937 		return 0;
5938 
5939 	if (gaudi2->hw_cap_initialized & HW_CAP_PMMU) {
5940 		if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5941 					parser->user_cb_size,
5942 					asic_prop->pmmu.start_addr,
5943 					asic_prop->pmmu.end_addr) ||
5944 			hl_mem_area_inside_range(
5945 					(u64) (uintptr_t) parser->user_cb,
5946 					parser->user_cb_size,
5947 					asic_prop->pmmu_huge.start_addr,
5948 					asic_prop->pmmu_huge.end_addr))
5949 			return 0;
5950 
5951 	} else if (gaudi2_host_phys_addr_valid((u64) (uintptr_t) parser->user_cb)) {
5952 		if (!hdev->pdev)
5953 			return 0;
5954 
5955 		if (!device_iommu_mapped(&hdev->pdev->dev))
5956 			return 0;
5957 	}
5958 
5959 	dev_err(hdev->dev, "CB address %p + 0x%x for internal QMAN is not valid\n",
5960 		parser->user_cb, parser->user_cb_size);
5961 
5962 	return -EFAULT;
5963 }
5964 
gaudi2_cs_parser(struct hl_device * hdev,struct hl_cs_parser * parser)5965 static int gaudi2_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5966 {
5967 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5968 
5969 	if (!parser->is_kernel_allocated_cb)
5970 		return gaudi2_validate_cb_address(hdev, parser);
5971 
5972 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
5973 		dev_err(hdev->dev, "PMMU not initialized - Unsupported mode in Gaudi2\n");
5974 		return -EINVAL;
5975 	}
5976 
5977 	return 0;
5978 }
5979 
gaudi2_send_heartbeat(struct hl_device * hdev)5980 static int gaudi2_send_heartbeat(struct hl_device *hdev)
5981 {
5982 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5983 
5984 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
5985 		return 0;
5986 
5987 	return hl_fw_send_heartbeat(hdev);
5988 }
5989 
5990 /* This is an internal helper function, used to update the KDMA mmu props.
5991  * Should be called with a proper kdma lock.
5992  */
gaudi2_kdma_set_mmbp_asid(struct hl_device * hdev,bool mmu_bypass,u32 asid)5993 static void gaudi2_kdma_set_mmbp_asid(struct hl_device *hdev,
5994 					   bool mmu_bypass, u32 asid)
5995 {
5996 	u32 rw_asid, rw_mmu_bp;
5997 
5998 	rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
5999 		      (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
6000 
6001 	rw_mmu_bp = (!!mmu_bypass << ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_RD_SHIFT) |
6002 			(!!mmu_bypass << ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_WR_SHIFT);
6003 
6004 	WREG32(mmARC_FARM_KDMA_CTX_AXUSER_HB_ASID, rw_asid);
6005 	WREG32(mmARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP, rw_mmu_bp);
6006 }
6007 
gaudi2_arm_cq_monitor(struct hl_device * hdev,u32 sob_id,u32 mon_id,u32 cq_id,u32 mon_payload,u32 sync_value)6008 static void gaudi2_arm_cq_monitor(struct hl_device *hdev, u32 sob_id, u32 mon_id, u32 cq_id,
6009 						u32 mon_payload, u32 sync_value)
6010 {
6011 	u32 sob_offset, mon_offset, sync_group_id, mode, mon_arm;
6012 	u8 mask;
6013 
6014 	sob_offset = sob_id * 4;
6015 	mon_offset = mon_id * 4;
6016 
6017 	/* Reset the SOB value */
6018 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset, 0);
6019 
6020 	/* Configure this address with CQ_ID 0 because CQ_EN is set */
6021 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, cq_id);
6022 
6023 	/* Configure this address with CS index because CQ_EN is set */
6024 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, mon_payload);
6025 
6026 	sync_group_id = sob_id / 8;
6027 	mask = ~(1 << (sob_id & 0x7));
6028 	mode = 1; /* comparison mode is "equal to" */
6029 
6030 	mon_arm = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOD_MASK, sync_value);
6031 	mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOP_MASK, mode);
6032 	mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_MASK_MASK, mask);
6033 	mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SID_MASK, sync_group_id);
6034 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + mon_offset, mon_arm);
6035 }
6036 
6037 /* This is an internal helper function used by gaudi2_send_job_to_kdma only */
gaudi2_send_job_to_kdma(struct hl_device * hdev,u64 src_addr,u64 dst_addr,u32 size,bool is_memset)6038 static int gaudi2_send_job_to_kdma(struct hl_device *hdev,
6039 					u64 src_addr, u64 dst_addr,
6040 					u32 size, bool is_memset)
6041 {
6042 	u32 comp_val, commit_mask, *polling_addr, timeout, status = 0;
6043 	struct hl_cq_entry *cq_base;
6044 	struct hl_cq *cq;
6045 	u64 comp_addr;
6046 	int rc;
6047 
6048 	gaudi2_arm_cq_monitor(hdev, GAUDI2_RESERVED_SOB_KDMA_COMPLETION,
6049 				GAUDI2_RESERVED_MON_KDMA_COMPLETION,
6050 				GAUDI2_RESERVED_CQ_KDMA_COMPLETION, 1, 1);
6051 
6052 	comp_addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6053 			(GAUDI2_RESERVED_SOB_KDMA_COMPLETION * sizeof(u32));
6054 
6055 	comp_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1) |
6056 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1);
6057 
6058 	WREG32(mmARC_FARM_KDMA_CTX_SRC_BASE_LO, lower_32_bits(src_addr));
6059 	WREG32(mmARC_FARM_KDMA_CTX_SRC_BASE_HI, upper_32_bits(src_addr));
6060 	WREG32(mmARC_FARM_KDMA_CTX_DST_BASE_LO, lower_32_bits(dst_addr));
6061 	WREG32(mmARC_FARM_KDMA_CTX_DST_BASE_HI, upper_32_bits(dst_addr));
6062 	WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_ADDR_LO, lower_32_bits(comp_addr));
6063 	WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_ADDR_HI, upper_32_bits(comp_addr));
6064 	WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_WDATA, comp_val);
6065 	WREG32(mmARC_FARM_KDMA_CTX_DST_TSIZE_0, size);
6066 
6067 	commit_mask = FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_LIN_MASK, 1) |
6068 				FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_WR_COMP_EN_MASK, 1);
6069 
6070 	if (is_memset)
6071 		commit_mask |= FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_MEM_SET_MASK, 1);
6072 
6073 	WREG32(mmARC_FARM_KDMA_CTX_COMMIT, commit_mask);
6074 
6075 	/* Wait for completion */
6076 	cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_KDMA_COMPLETION];
6077 	cq_base = cq->kernel_address;
6078 	polling_addr = (u32 *)&cq_base[cq->ci];
6079 
6080 	if (hdev->pldm)
6081 		/* for each 1MB 20 second of timeout */
6082 		timeout = ((size / SZ_1M) + 1) * USEC_PER_SEC * 20;
6083 	else
6084 		timeout = KDMA_TIMEOUT_USEC;
6085 
6086 	/* Polling */
6087 	rc = hl_poll_timeout_memory(
6088 			hdev,
6089 			polling_addr,
6090 			status,
6091 			(status == 1),
6092 			1000,
6093 			timeout,
6094 			true);
6095 
6096 	*polling_addr = 0;
6097 
6098 	if (rc) {
6099 		dev_err(hdev->dev, "Timeout while waiting for KDMA to be idle\n");
6100 		WREG32(mmARC_FARM_KDMA_CFG_1, 1 << ARC_FARM_KDMA_CFG_1_HALT_SHIFT);
6101 		return rc;
6102 	}
6103 
6104 	cq->ci = hl_cq_inc_ptr(cq->ci);
6105 
6106 	return 0;
6107 }
6108 
gaudi2_memset_device_lbw(struct hl_device * hdev,u32 addr,u32 size,u32 val)6109 static void gaudi2_memset_device_lbw(struct hl_device *hdev, u32 addr, u32 size, u32 val)
6110 {
6111 	u32 i;
6112 
6113 	for (i = 0 ; i < size ; i += sizeof(u32))
6114 		WREG32(addr + i, val);
6115 }
6116 
gaudi2_qman_set_test_mode(struct hl_device * hdev,u32 hw_queue_id,bool enable)6117 static void gaudi2_qman_set_test_mode(struct hl_device *hdev, u32 hw_queue_id, bool enable)
6118 {
6119 	u32 reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
6120 
6121 	if (enable) {
6122 		WREG32(reg_base + QM_GLBL_PROT_OFFSET, QMAN_MAKE_TRUSTED_TEST_MODE);
6123 		WREG32(reg_base + QM_PQC_CFG_OFFSET, 0);
6124 	} else {
6125 		WREG32(reg_base + QM_GLBL_PROT_OFFSET, QMAN_MAKE_TRUSTED);
6126 		WREG32(reg_base + QM_PQC_CFG_OFFSET, 1 << PDMA0_QM_PQC_CFG_EN_SHIFT);
6127 	}
6128 }
6129 
gaudi2_test_queue(struct hl_device * hdev,u32 hw_queue_id)6130 static int gaudi2_test_queue(struct hl_device *hdev, u32 hw_queue_id)
6131 {
6132 	u32 sob_offset = hdev->asic_prop.first_available_user_sob[0] * 4;
6133 	u32 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
6134 	u32 timeout_usec, tmp, sob_base = 1, sob_val = 0x5a5a;
6135 	struct packet_msg_short *msg_short_pkt;
6136 	dma_addr_t pkt_dma_addr;
6137 	size_t pkt_size;
6138 	int rc;
6139 
6140 	if (hdev->pldm)
6141 		timeout_usec = GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC;
6142 	else
6143 		timeout_usec = GAUDI2_TEST_QUEUE_WAIT_USEC;
6144 
6145 	pkt_size = sizeof(*msg_short_pkt);
6146 	msg_short_pkt = hl_asic_dma_pool_zalloc(hdev, pkt_size, GFP_KERNEL, &pkt_dma_addr);
6147 	if (!msg_short_pkt) {
6148 		dev_err(hdev->dev, "Failed to allocate packet for H/W queue %d testing\n",
6149 			hw_queue_id);
6150 		return -ENOMEM;
6151 	}
6152 
6153 	tmp = (PACKET_MSG_SHORT << GAUDI2_PKT_CTL_OPCODE_SHIFT) |
6154 		(1 << GAUDI2_PKT_CTL_EB_SHIFT) |
6155 		(1 << GAUDI2_PKT_CTL_MB_SHIFT) |
6156 		(sob_base << GAUDI2_PKT_SHORT_CTL_BASE_SHIFT) |
6157 		(sob_offset << GAUDI2_PKT_SHORT_CTL_ADDR_SHIFT);
6158 
6159 	msg_short_pkt->value = cpu_to_le32(sob_val);
6160 	msg_short_pkt->ctl = cpu_to_le32(tmp);
6161 
6162 	/* Reset the SOB value */
6163 	WREG32(sob_addr, 0);
6164 
6165 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, pkt_dma_addr);
6166 	if (rc) {
6167 		dev_err(hdev->dev, "Failed to send msg_short packet to H/W queue %d\n",
6168 			hw_queue_id);
6169 		goto free_pkt;
6170 	}
6171 
6172 	rc = hl_poll_timeout(
6173 			hdev,
6174 			sob_addr,
6175 			tmp,
6176 			(tmp == sob_val),
6177 			1000,
6178 			timeout_usec);
6179 
6180 	if (rc == -ETIMEDOUT) {
6181 		dev_err(hdev->dev, "H/W queue %d test failed (SOB_OBJ_0 == 0x%x)\n",
6182 			hw_queue_id, tmp);
6183 		rc = -EIO;
6184 	}
6185 
6186 	/* Reset the SOB value */
6187 	WREG32(sob_addr, 0);
6188 
6189 free_pkt:
6190 	hl_asic_dma_pool_free(hdev, (void *) msg_short_pkt, pkt_dma_addr);
6191 	return rc;
6192 }
6193 
gaudi2_test_cpu_queue(struct hl_device * hdev)6194 static int gaudi2_test_cpu_queue(struct hl_device *hdev)
6195 {
6196 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6197 
6198 	/*
6199 	 * check capability here as send_cpu_message() won't update the result
6200 	 * value if no capability
6201 	 */
6202 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
6203 		return 0;
6204 
6205 	return hl_fw_test_cpu_queue(hdev);
6206 }
6207 
gaudi2_test_queues(struct hl_device * hdev)6208 static int gaudi2_test_queues(struct hl_device *hdev)
6209 {
6210 	int i, rc, ret_val = 0;
6211 
6212 	for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ; i++) {
6213 		if (!gaudi2_is_queue_enabled(hdev, i))
6214 			continue;
6215 
6216 		gaudi2_qman_set_test_mode(hdev, i, true);
6217 		rc = gaudi2_test_queue(hdev, i);
6218 		gaudi2_qman_set_test_mode(hdev, i, false);
6219 
6220 		if (rc) {
6221 			ret_val = -EINVAL;
6222 			goto done;
6223 		}
6224 	}
6225 
6226 	rc = gaudi2_test_cpu_queue(hdev);
6227 	if (rc) {
6228 		ret_val = -EINVAL;
6229 		goto done;
6230 	}
6231 
6232 done:
6233 	return ret_val;
6234 }
6235 
gaudi2_compute_reset_late_init(struct hl_device * hdev)6236 static int gaudi2_compute_reset_late_init(struct hl_device *hdev)
6237 {
6238 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6239 	size_t irq_arr_size;
6240 
6241 	/* TODO: missing gaudi2_nic_resume.
6242 	 * Until implemented nic_hw_cap_initialized will remain zeroed
6243 	 */
6244 	gaudi2_init_arcs(hdev);
6245 	gaudi2_scrub_arcs_dccm(hdev);
6246 	gaudi2_init_security(hdev);
6247 
6248 	/* Unmask all IRQs since some could have been received during the soft reset */
6249 	irq_arr_size = gaudi2->num_of_valid_hw_events * sizeof(gaudi2->hw_events[0]);
6250 	return hl_fw_unmask_irq_arr(hdev, gaudi2->hw_events, irq_arr_size);
6251 }
6252 
gaudi2_is_tpc_engine_idle(struct hl_device * hdev,int dcore,int inst,u32 offset,struct iterate_module_ctx * ctx)6253 static void gaudi2_is_tpc_engine_idle(struct hl_device *hdev, int dcore, int inst, u32 offset,
6254 					struct iterate_module_ctx *ctx)
6255 {
6256 	struct gaudi2_tpc_idle_data *idle_data = ctx->data;
6257 	u32 tpc_cfg_sts, qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts;
6258 	bool is_eng_idle;
6259 	int engine_idx;
6260 
6261 	if ((dcore == 0) && (inst == (NUM_DCORE0_TPC - 1)))
6262 		engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_6;
6263 	else
6264 		engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_0 +
6265 				dcore * GAUDI2_ENGINE_ID_DCORE_OFFSET + inst;
6266 
6267 	tpc_cfg_sts = RREG32(mmDCORE0_TPC0_CFG_STATUS + offset);
6268 	qm_glbl_sts0 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS0 + offset);
6269 	qm_glbl_sts1 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS1 + offset);
6270 	qm_cgm_sts = RREG32(mmDCORE0_TPC0_QM_CGM_STS + offset);
6271 
6272 	is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
6273 						IS_TPC_IDLE(tpc_cfg_sts);
6274 	*(idle_data->is_idle) &= is_eng_idle;
6275 
6276 	if (idle_data->mask && !is_eng_idle)
6277 		set_bit(engine_idx, idle_data->mask);
6278 
6279 	if (idle_data->e)
6280 		hl_engine_data_sprintf(idle_data->e,
6281 					idle_data->tpc_fmt, dcore, inst,
6282 					is_eng_idle ? "Y" : "N",
6283 					qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
6284 }
6285 
gaudi2_is_device_idle(struct hl_device * hdev,u64 * mask_arr,u8 mask_len,struct engines_data * e)6286 static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
6287 					struct engines_data *e)
6288 {
6289 	u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, dma_core_idle_ind_mask,
6290 		mme_arch_sts, dec_swreg15, dec_enabled_bit;
6291 	struct asic_fixed_properties *prop = &hdev->asic_prop;
6292 	const char *rot_fmt = "%-6d%-5d%-9s%#-14x%#-12x%s\n";
6293 	unsigned long *mask = (unsigned long *) mask_arr;
6294 	const char *edma_fmt = "%-6d%-6d%-9s%#-14x%#x\n";
6295 	const char *mme_fmt = "%-5d%-6s%-9s%#-14x%#x\n";
6296 	const char *nic_fmt = "%-5d%-9s%#-14x%#-12x\n";
6297 	const char *pdma_fmt = "%-6d%-9s%#-14x%#x\n";
6298 	const char *pcie_dec_fmt = "%-10d%-9s%#x\n";
6299 	const char *dec_fmt = "%-6d%-5d%-9s%#x\n";
6300 	bool is_idle = true, is_eng_idle;
6301 	u64 offset;
6302 
6303 	struct gaudi2_tpc_idle_data tpc_idle_data = {
6304 		.tpc_fmt = "%-6d%-5d%-9s%#-14x%#-12x%#x\n",
6305 		.e = e,
6306 		.mask = mask,
6307 		.is_idle = &is_idle,
6308 	};
6309 	struct iterate_module_ctx tpc_iter = {
6310 		.fn = &gaudi2_is_tpc_engine_idle,
6311 		.data = &tpc_idle_data,
6312 	};
6313 
6314 	int engine_idx, i, j;
6315 
6316 	/* EDMA, Two engines per Dcore */
6317 	if (e)
6318 		hl_engine_data_sprintf(e,
6319 			"\nCORE  EDMA  is_idle  QM_GLBL_STS0  DMA_CORE_IDLE_IND_MASK\n"
6320 			"----  ----  -------  ------------  ----------------------\n");
6321 
6322 	for (i = 0; i < NUM_OF_DCORES; i++) {
6323 		for (j = 0 ; j < NUM_OF_EDMA_PER_DCORE ; j++) {
6324 			int seq = i * NUM_OF_EDMA_PER_DCORE + j;
6325 
6326 			if (!(prop->edma_enabled_mask & BIT(seq)))
6327 				continue;
6328 
6329 			engine_idx = GAUDI2_DCORE0_ENGINE_ID_EDMA_0 +
6330 					i * GAUDI2_ENGINE_ID_DCORE_OFFSET + j;
6331 			offset = i * DCORE_OFFSET + j * DCORE_EDMA_OFFSET;
6332 
6333 			dma_core_idle_ind_mask =
6334 			RREG32(mmDCORE0_EDMA0_CORE_IDLE_IND_MASK + offset);
6335 
6336 			qm_glbl_sts0 = RREG32(mmDCORE0_EDMA0_QM_GLBL_STS0 + offset);
6337 			qm_glbl_sts1 = RREG32(mmDCORE0_EDMA0_QM_GLBL_STS1 + offset);
6338 			qm_cgm_sts = RREG32(mmDCORE0_EDMA0_QM_CGM_STS + offset);
6339 
6340 			is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
6341 					IS_DMA_IDLE(dma_core_idle_ind_mask);
6342 			is_idle &= is_eng_idle;
6343 
6344 			if (mask && !is_eng_idle)
6345 				set_bit(engine_idx, mask);
6346 
6347 			if (e)
6348 				hl_engine_data_sprintf(e, edma_fmt, i, j,
6349 							is_eng_idle ? "Y" : "N",
6350 							qm_glbl_sts0,
6351 							dma_core_idle_ind_mask);
6352 		}
6353 	}
6354 
6355 	/* PDMA, Two engines in Full chip */
6356 	if (e)
6357 		hl_engine_data_sprintf(e,
6358 					"\nPDMA  is_idle  QM_GLBL_STS0  DMA_CORE_IDLE_IND_MASK\n"
6359 					"----  -------  ------------  ----------------------\n");
6360 
6361 	for (i = 0 ; i < NUM_OF_PDMA ; i++) {
6362 		engine_idx = GAUDI2_ENGINE_ID_PDMA_0 + i;
6363 		offset = i * PDMA_OFFSET;
6364 		dma_core_idle_ind_mask = RREG32(mmPDMA0_CORE_IDLE_IND_MASK + offset);
6365 
6366 		qm_glbl_sts0 = RREG32(mmPDMA0_QM_GLBL_STS0 + offset);
6367 		qm_glbl_sts1 = RREG32(mmPDMA0_QM_GLBL_STS1 + offset);
6368 		qm_cgm_sts = RREG32(mmPDMA0_QM_CGM_STS + offset);
6369 
6370 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
6371 				IS_DMA_IDLE(dma_core_idle_ind_mask);
6372 		is_idle &= is_eng_idle;
6373 
6374 		if (mask && !is_eng_idle)
6375 			set_bit(engine_idx, mask);
6376 
6377 		if (e)
6378 			hl_engine_data_sprintf(e, pdma_fmt, i, is_eng_idle ? "Y" : "N",
6379 						qm_glbl_sts0, dma_core_idle_ind_mask);
6380 	}
6381 
6382 	/* NIC, twelve macros in Full chip */
6383 	if (e && hdev->nic_ports_mask)
6384 		hl_engine_data_sprintf(e,
6385 					"\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
6386 					"---  -------  ------------  ----------\n");
6387 
6388 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
6389 		if (!(i & 1))
6390 			offset = i / 2 * NIC_OFFSET;
6391 		else
6392 			offset += NIC_QM_OFFSET;
6393 
6394 		if (!(hdev->nic_ports_mask & BIT(i)))
6395 			continue;
6396 
6397 		engine_idx = GAUDI2_ENGINE_ID_NIC0_0 + i;
6398 
6399 
6400 		qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
6401 		qm_glbl_sts1 = RREG32(mmNIC0_QM0_GLBL_STS1 + offset);
6402 		qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
6403 
6404 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
6405 		is_idle &= is_eng_idle;
6406 
6407 		if (mask && !is_eng_idle)
6408 			set_bit(engine_idx, mask);
6409 
6410 		if (e)
6411 			hl_engine_data_sprintf(e, nic_fmt, i, is_eng_idle ? "Y" : "N",
6412 						qm_glbl_sts0, qm_cgm_sts);
6413 	}
6414 
6415 	if (e)
6416 		hl_engine_data_sprintf(e,
6417 					"\nMME  Stub  is_idle  QM_GLBL_STS0  MME_ARCH_STATUS\n"
6418 					"---  ----  -------  ------------  ---------------\n");
6419 	/* MME, one per Dcore */
6420 	for (i = 0 ; i < NUM_OF_DCORES ; i++) {
6421 		engine_idx = GAUDI2_DCORE0_ENGINE_ID_MME + i * GAUDI2_ENGINE_ID_DCORE_OFFSET;
6422 		offset = i * DCORE_OFFSET;
6423 
6424 		qm_glbl_sts0 = RREG32(mmDCORE0_MME_QM_GLBL_STS0 + offset);
6425 		qm_glbl_sts1 = RREG32(mmDCORE0_MME_QM_GLBL_STS1 + offset);
6426 		qm_cgm_sts = RREG32(mmDCORE0_MME_QM_CGM_STS + offset);
6427 
6428 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
6429 		is_idle &= is_eng_idle;
6430 
6431 		mme_arch_sts = RREG32(mmDCORE0_MME_CTRL_LO_ARCH_STATUS + offset);
6432 		is_eng_idle &= IS_MME_IDLE(mme_arch_sts);
6433 		is_idle &= is_eng_idle;
6434 
6435 		if (e)
6436 			hl_engine_data_sprintf(e, mme_fmt, i, "N",
6437 				is_eng_idle ? "Y" : "N",
6438 				qm_glbl_sts0,
6439 				mme_arch_sts);
6440 
6441 		if (mask && !is_eng_idle)
6442 			set_bit(engine_idx, mask);
6443 	}
6444 
6445 	/*
6446 	 * TPC
6447 	 */
6448 	if (e && prop->tpc_enabled_mask)
6449 		hl_engine_data_sprintf(e,
6450 			"\nCORE  TPC   is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_IDLE_IND_MASK\n"
6451 			"----  ---  --------  ------------  ----------  ----------------------\n");
6452 
6453 	gaudi2_iterate_tpcs(hdev, &tpc_iter);
6454 
6455 	/* Decoders, two each Dcore and two shared PCIe decoders */
6456 	if (e && (prop->decoder_enabled_mask & (~PCIE_DEC_EN_MASK)))
6457 		hl_engine_data_sprintf(e,
6458 			"\nCORE  DEC  is_idle  VSI_CMD_SWREG15\n"
6459 			"----  ---  -------  ---------------\n");
6460 
6461 	for (i = 0 ; i < NUM_OF_DCORES ; i++) {
6462 		for (j = 0 ; j < NUM_OF_DEC_PER_DCORE ; j++) {
6463 			dec_enabled_bit = 1 << (i * NUM_OF_DEC_PER_DCORE + j);
6464 			if (!(prop->decoder_enabled_mask & dec_enabled_bit))
6465 				continue;
6466 
6467 			engine_idx = GAUDI2_DCORE0_ENGINE_ID_DEC_0 +
6468 					i * GAUDI2_ENGINE_ID_DCORE_OFFSET + j;
6469 			offset = i * DCORE_OFFSET + j * DCORE_DEC_OFFSET;
6470 
6471 			dec_swreg15 = RREG32(mmDCORE0_DEC0_CMD_SWREG15 + offset);
6472 			is_eng_idle = IS_DEC_IDLE(dec_swreg15);
6473 			is_idle &= is_eng_idle;
6474 
6475 			if (mask && !is_eng_idle)
6476 				set_bit(engine_idx, mask);
6477 
6478 			if (e)
6479 				hl_engine_data_sprintf(e, dec_fmt, i, j,
6480 							is_eng_idle ? "Y" : "N", dec_swreg15);
6481 		}
6482 	}
6483 
6484 	if (e && (prop->decoder_enabled_mask & PCIE_DEC_EN_MASK))
6485 		hl_engine_data_sprintf(e,
6486 			"\nPCIe DEC  is_idle  VSI_CMD_SWREG15\n"
6487 			"--------  -------  ---------------\n");
6488 
6489 	/* Check shared(PCIe) decoders */
6490 	for (i = 0 ; i < NUM_OF_DEC_PER_DCORE ; i++) {
6491 		dec_enabled_bit = PCIE_DEC_SHIFT + i;
6492 		if (!(prop->decoder_enabled_mask & BIT(dec_enabled_bit)))
6493 			continue;
6494 
6495 		engine_idx = GAUDI2_PCIE_ENGINE_ID_DEC_0 + i;
6496 		offset = i * DCORE_DEC_OFFSET;
6497 		dec_swreg15 = RREG32(mmPCIE_DEC0_CMD_SWREG15 + offset);
6498 		is_eng_idle = IS_DEC_IDLE(dec_swreg15);
6499 		is_idle &= is_eng_idle;
6500 
6501 		if (mask && !is_eng_idle)
6502 			set_bit(engine_idx, mask);
6503 
6504 		if (e)
6505 			hl_engine_data_sprintf(e, pcie_dec_fmt, i,
6506 						is_eng_idle ? "Y" : "N", dec_swreg15);
6507 	}
6508 
6509 	if (e)
6510 		hl_engine_data_sprintf(e,
6511 			"\nCORE  ROT  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
6512 			"----  ----  -------  ------------  ----------  -------------\n");
6513 
6514 	for (i = 0 ; i < NUM_OF_ROT ; i++) {
6515 		engine_idx = GAUDI2_ENGINE_ID_ROT_0 + i;
6516 
6517 		offset = i * ROT_OFFSET;
6518 
6519 		qm_glbl_sts0 = RREG32(mmROT0_QM_GLBL_STS0 + offset);
6520 		qm_glbl_sts1 = RREG32(mmROT0_QM_GLBL_STS1 + offset);
6521 		qm_cgm_sts = RREG32(mmROT0_QM_CGM_STS + offset);
6522 
6523 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
6524 		is_idle &= is_eng_idle;
6525 
6526 		if (mask && !is_eng_idle)
6527 			set_bit(engine_idx, mask);
6528 
6529 		if (e)
6530 			hl_engine_data_sprintf(e, rot_fmt, i, 0, is_eng_idle ? "Y" : "N",
6531 					qm_glbl_sts0, qm_cgm_sts, "-");
6532 	}
6533 
6534 	return is_idle;
6535 }
6536 
gaudi2_hw_queues_lock(struct hl_device * hdev)6537 static void gaudi2_hw_queues_lock(struct hl_device *hdev)
6538 	__acquires(&gaudi2->hw_queues_lock)
6539 {
6540 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6541 
6542 	spin_lock(&gaudi2->hw_queues_lock);
6543 }
6544 
gaudi2_hw_queues_unlock(struct hl_device * hdev)6545 static void gaudi2_hw_queues_unlock(struct hl_device *hdev)
6546 	__releases(&gaudi2->hw_queues_lock)
6547 {
6548 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6549 
6550 	spin_unlock(&gaudi2->hw_queues_lock);
6551 }
6552 
gaudi2_get_pci_id(struct hl_device * hdev)6553 static u32 gaudi2_get_pci_id(struct hl_device *hdev)
6554 {
6555 	return hdev->pdev->device;
6556 }
6557 
gaudi2_get_eeprom_data(struct hl_device * hdev,void * data,size_t max_size)6558 static int gaudi2_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
6559 {
6560 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6561 
6562 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
6563 		return 0;
6564 
6565 	return hl_fw_get_eeprom_data(hdev, data, max_size);
6566 }
6567 
gaudi2_update_eq_ci(struct hl_device * hdev,u32 val)6568 static void gaudi2_update_eq_ci(struct hl_device *hdev, u32 val)
6569 {
6570 	WREG32(mmCPU_IF_EQ_RD_OFFS, val);
6571 }
6572 
gaudi2_get_events_stat(struct hl_device * hdev,bool aggregate,u32 * size)6573 static void *gaudi2_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
6574 {
6575 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6576 
6577 	if (aggregate) {
6578 		*size = (u32) sizeof(gaudi2->events_stat_aggregate);
6579 		return gaudi2->events_stat_aggregate;
6580 	}
6581 
6582 	*size = (u32) sizeof(gaudi2->events_stat);
6583 	return gaudi2->events_stat;
6584 }
6585 
gaudi2_mmu_vdec_dcore_prepare(struct hl_device * hdev,int dcore_id,int dcore_vdec_id,u32 rw_asid,u32 rw_mmu_bp)6586 static void gaudi2_mmu_vdec_dcore_prepare(struct hl_device *hdev, int dcore_id,
6587 				int dcore_vdec_id, u32 rw_asid, u32 rw_mmu_bp)
6588 {
6589 	u32 offset = (mmDCORE0_VDEC1_BRDG_CTRL_BASE - mmDCORE0_VDEC0_BRDG_CTRL_BASE) *
6590 			dcore_vdec_id + DCORE_OFFSET * dcore_id;
6591 
6592 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_MMU_BP + offset, rw_mmu_bp);
6593 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_ASID + offset, rw_asid);
6594 
6595 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_MMU_BP + offset, rw_mmu_bp);
6596 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_ASID + offset, rw_asid);
6597 
6598 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_MMU_BP + offset, rw_mmu_bp);
6599 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_ASID + offset, rw_asid);
6600 
6601 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_MMU_BP + offset, rw_mmu_bp);
6602 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_ASID + offset, rw_asid);
6603 
6604 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_MMU_BP + offset, rw_mmu_bp);
6605 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_ASID + offset, rw_asid);
6606 }
6607 
gaudi2_mmu_dcore_prepare(struct hl_device * hdev,int dcore_id,u32 asid)6608 static void gaudi2_mmu_dcore_prepare(struct hl_device *hdev, int dcore_id, u32 asid)
6609 {
6610 	u32 rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
6611 			(asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
6612 	struct asic_fixed_properties *prop = &hdev->asic_prop;
6613 	u32 dcore_offset = dcore_id * DCORE_OFFSET;
6614 	u32 vdec_id, i, ports_offset, reg_val;
6615 	u8 edma_seq_base;
6616 
6617 	/* EDMA */
6618 	edma_seq_base = dcore_id * NUM_OF_EDMA_PER_DCORE;
6619 	if (prop->edma_enabled_mask & BIT(edma_seq_base)) {
6620 		WREG32(mmDCORE0_EDMA0_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
6621 		WREG32(mmDCORE0_EDMA0_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
6622 		WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + dcore_offset, 0);
6623 		WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_ASID + dcore_offset, rw_asid);
6624 	}
6625 
6626 	if (prop->edma_enabled_mask & BIT(edma_seq_base + 1)) {
6627 		WREG32(mmDCORE0_EDMA1_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
6628 		WREG32(mmDCORE0_EDMA1_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
6629 		WREG32(mmDCORE0_EDMA1_CORE_CTX_AXUSER_HB_ASID + dcore_offset, rw_asid);
6630 		WREG32(mmDCORE0_EDMA1_CORE_CTX_AXUSER_HB_MMU_BP + dcore_offset, 0);
6631 	}
6632 
6633 	/* Sync Mngr */
6634 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_NONE_SEC_PRIV + dcore_offset, asid);
6635 	/*
6636 	 * Sync Mngrs on dcores 1 - 3 are exposed to user, so must use user ASID
6637 	 * for any access type
6638 	 */
6639 	if (dcore_id > 0) {
6640 		reg_val = (asid << DCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID_RD_SHIFT) |
6641 			  (asid << DCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID_WR_SHIFT);
6642 		WREG32(mmDCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID + dcore_offset, reg_val);
6643 		WREG32(mmDCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_MMU_BP + dcore_offset, 0);
6644 	}
6645 
6646 	WREG32(mmDCORE0_MME_CTRL_LO_MME_AXUSER_HB_MMU_BP + dcore_offset, 0);
6647 	WREG32(mmDCORE0_MME_CTRL_LO_MME_AXUSER_HB_ASID + dcore_offset, rw_asid);
6648 
6649 	for (i = 0 ; i < NUM_OF_MME_SBTE_PORTS ; i++) {
6650 		ports_offset = i * DCORE_MME_SBTE_OFFSET;
6651 		WREG32(mmDCORE0_MME_SBTE0_MSTR_IF_AXUSER_HB_MMU_BP +
6652 				dcore_offset + ports_offset, 0);
6653 		WREG32(mmDCORE0_MME_SBTE0_MSTR_IF_AXUSER_HB_ASID +
6654 				dcore_offset + ports_offset, rw_asid);
6655 	}
6656 
6657 	for (i = 0 ; i < NUM_OF_MME_WB_PORTS ; i++) {
6658 		ports_offset = i * DCORE_MME_WB_OFFSET;
6659 		WREG32(mmDCORE0_MME_WB0_MSTR_IF_AXUSER_HB_MMU_BP +
6660 				dcore_offset + ports_offset, 0);
6661 		WREG32(mmDCORE0_MME_WB0_MSTR_IF_AXUSER_HB_ASID +
6662 				dcore_offset + ports_offset, rw_asid);
6663 	}
6664 
6665 	WREG32(mmDCORE0_MME_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
6666 	WREG32(mmDCORE0_MME_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
6667 
6668 	/*
6669 	 * Decoders
6670 	 */
6671 	for (vdec_id = 0 ; vdec_id < NUM_OF_DEC_PER_DCORE ; vdec_id++) {
6672 		if (prop->decoder_enabled_mask & BIT(dcore_id * NUM_OF_DEC_PER_DCORE + vdec_id))
6673 			gaudi2_mmu_vdec_dcore_prepare(hdev, dcore_id, vdec_id, rw_asid, 0);
6674 	}
6675 }
6676 
gudi2_mmu_vdec_shared_prepare(struct hl_device * hdev,int shared_vdec_id,u32 rw_asid,u32 rw_mmu_bp)6677 static void gudi2_mmu_vdec_shared_prepare(struct hl_device *hdev,
6678 				int shared_vdec_id, u32 rw_asid, u32 rw_mmu_bp)
6679 {
6680 	u32 offset = (mmPCIE_VDEC1_BRDG_CTRL_BASE - mmPCIE_VDEC0_BRDG_CTRL_BASE) * shared_vdec_id;
6681 
6682 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_MMU_BP + offset, rw_mmu_bp);
6683 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_ASID + offset, rw_asid);
6684 
6685 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_MMU_BP + offset, rw_mmu_bp);
6686 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_ASID + offset, rw_asid);
6687 
6688 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_MMU_BP + offset, rw_mmu_bp);
6689 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_ASID + offset, rw_asid);
6690 
6691 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_MMU_BP + offset, rw_mmu_bp);
6692 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_ASID + offset, rw_asid);
6693 
6694 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_MMU_BP + offset, rw_mmu_bp);
6695 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_ASID + offset, rw_asid);
6696 }
6697 
gudi2_mmu_arc_farm_arc_dup_eng_prepare(struct hl_device * hdev,int arc_farm_id,u32 rw_asid,u32 rw_mmu_bp)6698 static void gudi2_mmu_arc_farm_arc_dup_eng_prepare(struct hl_device *hdev, int arc_farm_id,
6699 							u32 rw_asid, u32 rw_mmu_bp)
6700 {
6701 	u32 offset = (mmARC_FARM_ARC1_DUP_ENG_BASE - mmARC_FARM_ARC0_DUP_ENG_BASE) * arc_farm_id;
6702 
6703 	WREG32(mmARC_FARM_ARC0_DUP_ENG_AXUSER_HB_MMU_BP + offset, rw_mmu_bp);
6704 	WREG32(mmARC_FARM_ARC0_DUP_ENG_AXUSER_HB_ASID + offset, rw_asid);
6705 }
6706 
gaudi2_arc_mmu_prepare(struct hl_device * hdev,u32 cpu_id,u32 asid)6707 static void gaudi2_arc_mmu_prepare(struct hl_device *hdev, u32 cpu_id, u32 asid)
6708 {
6709 	u32 reg_base, reg_offset, reg_val = 0;
6710 
6711 	reg_base = gaudi2_arc_blocks_bases[cpu_id];
6712 
6713 	/* Enable MMU and configure asid for all relevant ARC regions */
6714 	reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_ARC_REGION_CFG_MMU_BP_MASK, 0);
6715 	reg_val |= FIELD_PREP(ARC_FARM_ARC0_AUX_ARC_REGION_CFG_0_ASID_MASK, asid);
6716 
6717 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION3_GENERAL);
6718 	WREG32(reg_base + reg_offset, reg_val);
6719 
6720 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION4_HBM0_FW);
6721 	WREG32(reg_base + reg_offset, reg_val);
6722 
6723 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION5_HBM1_GC_DATA);
6724 	WREG32(reg_base + reg_offset, reg_val);
6725 
6726 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION6_HBM2_GC_DATA);
6727 	WREG32(reg_base + reg_offset, reg_val);
6728 
6729 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION7_HBM3_GC_DATA);
6730 	WREG32(reg_base + reg_offset, reg_val);
6731 
6732 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION9_PCIE);
6733 	WREG32(reg_base + reg_offset, reg_val);
6734 
6735 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION10_GENERAL);
6736 	WREG32(reg_base + reg_offset, reg_val);
6737 
6738 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION11_GENERAL);
6739 	WREG32(reg_base + reg_offset, reg_val);
6740 
6741 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION12_GENERAL);
6742 	WREG32(reg_base + reg_offset, reg_val);
6743 
6744 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION13_GENERAL);
6745 	WREG32(reg_base + reg_offset, reg_val);
6746 
6747 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION14_GENERAL);
6748 	WREG32(reg_base + reg_offset, reg_val);
6749 }
6750 
gaudi2_arc_mmu_prepare_all(struct hl_device * hdev,u32 asid)6751 static int gaudi2_arc_mmu_prepare_all(struct hl_device *hdev, u32 asid)
6752 {
6753 	int i;
6754 
6755 	if (hdev->fw_components & FW_TYPE_BOOT_CPU)
6756 		return hl_fw_cpucp_engine_core_asid_set(hdev, asid);
6757 
6758 	for (i = CPU_ID_SCHED_ARC0 ; i < NUM_OF_ARC_FARMS_ARC ; i++)
6759 		gaudi2_arc_mmu_prepare(hdev, i, asid);
6760 
6761 	for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i += 4) {
6762 		if (!gaudi2_is_queue_enabled(hdev, i))
6763 			continue;
6764 
6765 		gaudi2_arc_mmu_prepare(hdev, gaudi2_queue_id_to_arc_id[i], asid);
6766 	}
6767 
6768 	return 0;
6769 }
6770 
gaudi2_mmu_shared_prepare(struct hl_device * hdev,u32 asid)6771 static int gaudi2_mmu_shared_prepare(struct hl_device *hdev, u32 asid)
6772 {
6773 	struct asic_fixed_properties *prop = &hdev->asic_prop;
6774 	u32 rw_asid, offset;
6775 	int rc, i;
6776 
6777 	rw_asid = FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_MASK, asid) |
6778 			FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_MASK, asid);
6779 
6780 	WREG32(mmPDMA0_QM_AXUSER_NONSECURED_HB_ASID, rw_asid);
6781 	WREG32(mmPDMA0_QM_AXUSER_NONSECURED_HB_MMU_BP, 0);
6782 	WREG32(mmPDMA0_CORE_CTX_AXUSER_HB_ASID, rw_asid);
6783 	WREG32(mmPDMA0_CORE_CTX_AXUSER_HB_MMU_BP, 0);
6784 
6785 	WREG32(mmPDMA1_QM_AXUSER_NONSECURED_HB_ASID, rw_asid);
6786 	WREG32(mmPDMA1_QM_AXUSER_NONSECURED_HB_MMU_BP, 0);
6787 	WREG32(mmPDMA1_CORE_CTX_AXUSER_HB_ASID, rw_asid);
6788 	WREG32(mmPDMA1_CORE_CTX_AXUSER_HB_MMU_BP, 0);
6789 
6790 	/* ROT */
6791 	for (i = 0 ; i < NUM_OF_ROT ; i++) {
6792 		offset = i * ROT_OFFSET;
6793 		WREG32(mmROT0_QM_AXUSER_NONSECURED_HB_ASID + offset, rw_asid);
6794 		WREG32(mmROT0_QM_AXUSER_NONSECURED_HB_MMU_BP + offset, 0);
6795 		RMWREG32(mmROT0_CPL_QUEUE_AWUSER + offset, asid, MMUBP_ASID_MASK);
6796 		RMWREG32(mmROT0_DESC_HBW_ARUSER_LO + offset, asid, MMUBP_ASID_MASK);
6797 		RMWREG32(mmROT0_DESC_HBW_AWUSER_LO + offset, asid, MMUBP_ASID_MASK);
6798 	}
6799 
6800 	/* Shared Decoders are the last bits in the decoders mask */
6801 	if (prop->decoder_enabled_mask & BIT(NUM_OF_DCORES * NUM_OF_DEC_PER_DCORE + 0))
6802 		gudi2_mmu_vdec_shared_prepare(hdev, 0, rw_asid, 0);
6803 
6804 	if (prop->decoder_enabled_mask & BIT(NUM_OF_DCORES * NUM_OF_DEC_PER_DCORE + 1))
6805 		gudi2_mmu_vdec_shared_prepare(hdev, 1, rw_asid, 0);
6806 
6807 	/* arc farm arc dup eng */
6808 	for (i = 0 ; i < NUM_OF_ARC_FARMS_ARC ; i++)
6809 		gudi2_mmu_arc_farm_arc_dup_eng_prepare(hdev, i, rw_asid, 0);
6810 
6811 	rc = gaudi2_arc_mmu_prepare_all(hdev, asid);
6812 	if (rc)
6813 		return rc;
6814 
6815 	return 0;
6816 }
6817 
gaudi2_tpc_mmu_prepare(struct hl_device * hdev,int dcore,int inst,u32 offset,struct iterate_module_ctx * ctx)6818 static void gaudi2_tpc_mmu_prepare(struct hl_device *hdev, int dcore, int inst,	u32 offset,
6819 					struct iterate_module_ctx *ctx)
6820 {
6821 	struct gaudi2_tpc_mmu_data *mmu_data = ctx->data;
6822 
6823 	WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_MMU_BP + offset, 0);
6824 	WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_ASID + offset, mmu_data->rw_asid);
6825 	WREG32(mmDCORE0_TPC0_QM_AXUSER_NONSECURED_HB_MMU_BP + offset, 0);
6826 	WREG32(mmDCORE0_TPC0_QM_AXUSER_NONSECURED_HB_ASID + offset, mmu_data->rw_asid);
6827 }
6828 
6829 /* zero the MMUBP and set the ASID */
gaudi2_mmu_prepare(struct hl_device * hdev,u32 asid)6830 static int gaudi2_mmu_prepare(struct hl_device *hdev, u32 asid)
6831 {
6832 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6833 	struct gaudi2_tpc_mmu_data tpc_mmu_data;
6834 	struct iterate_module_ctx tpc_iter = {
6835 		.fn = &gaudi2_tpc_mmu_prepare,
6836 		.data = &tpc_mmu_data,
6837 	};
6838 	int rc, i;
6839 
6840 	if (asid & ~DCORE0_HMMU0_STLB_ASID_ASID_MASK) {
6841 		dev_crit(hdev->dev, "asid %u is too big\n", asid);
6842 		return -EINVAL;
6843 	}
6844 
6845 	if (!(gaudi2->hw_cap_initialized & HW_CAP_MMU_MASK))
6846 		return 0;
6847 
6848 	rc = gaudi2_mmu_shared_prepare(hdev, asid);
6849 	if (rc)
6850 		return rc;
6851 
6852 	/* configure DCORE MMUs */
6853 	tpc_mmu_data.rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
6854 				(asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
6855 	gaudi2_iterate_tpcs(hdev, &tpc_iter);
6856 	for (i = 0 ; i < NUM_OF_DCORES ; i++)
6857 		gaudi2_mmu_dcore_prepare(hdev, i, asid);
6858 
6859 	return 0;
6860 }
6861 
is_info_event(u32 event)6862 static inline bool is_info_event(u32 event)
6863 {
6864 	switch (event) {
6865 	case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE:
6866 		return true;
6867 	default:
6868 		return false;
6869 	}
6870 }
6871 
gaudi2_print_irq_info(struct hl_device * hdev,u16 event_type)6872 static void gaudi2_print_irq_info(struct hl_device *hdev, u16 event_type)
6873 {
6874 	char desc[64] = "";
6875 	bool event_valid = false;
6876 
6877 	/* return in case of NIC status event - these events are received periodically and not as
6878 	 * an indication to an error, thus not printed.
6879 	 */
6880 	if (event_type >= GAUDI2_EVENT_CPU0_STATUS_NIC0_ENG0 &&
6881 		event_type <= GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG1)
6882 		return;
6883 
6884 	if (gaudi2_irq_map_table[event_type].valid) {
6885 		snprintf(desc, sizeof(desc), gaudi2_irq_map_table[event_type].name);
6886 		event_valid = true;
6887 	}
6888 
6889 	if (!event_valid)
6890 		snprintf(desc, sizeof(desc), "N/A");
6891 
6892 	if (is_info_event(event_type))
6893 		dev_info_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
6894 								event_type, desc);
6895 	else
6896 		dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
6897 								event_type, desc);
6898 }
6899 
gaudi2_handle_ecc_event(struct hl_device * hdev,u16 event_type,struct hl_eq_ecc_data * ecc_data)6900 static bool gaudi2_handle_ecc_event(struct hl_device *hdev, u16 event_type,
6901 		struct hl_eq_ecc_data *ecc_data)
6902 {
6903 	u64 ecc_address = 0, ecc_syndrom = 0;
6904 	u8 memory_wrapper_idx = 0;
6905 
6906 	ecc_address = le64_to_cpu(ecc_data->ecc_address);
6907 	ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
6908 	memory_wrapper_idx = ecc_data->memory_wrapper_idx;
6909 
6910 	dev_err(hdev->dev,
6911 		"ECC error detected. address: %#llx. Syndrom: %#llx. block id %u. critical %u.\n",
6912 		ecc_address, ecc_syndrom, memory_wrapper_idx, ecc_data->is_critical);
6913 
6914 	return !!ecc_data->is_critical;
6915 }
6916 
6917 /*
6918  * gaudi2_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
6919  *
6920  * @idx: the current pi/ci value
6921  * @q_len: the queue length (power of 2)
6922  *
6923  * @return the cyclically decremented index
6924  */
gaudi2_queue_idx_dec(u32 idx,u32 q_len)6925 static inline u32 gaudi2_queue_idx_dec(u32 idx, u32 q_len)
6926 {
6927 	u32 mask = q_len - 1;
6928 
6929 	/*
6930 	 * modular decrement is equivalent to adding (queue_size -1)
6931 	 * later we take LSBs to make sure the value is in the
6932 	 * range [0, queue_len - 1]
6933 	 */
6934 	return (idx + q_len - 1) & mask;
6935 }
6936 
6937 /**
6938  * gaudi2_print_sw_config_stream_data - print SW config stream data
6939  *
6940  * @hdev: pointer to the habanalabs device structure
6941  * @stream: the QMAN's stream
6942  * @qman_base: base address of QMAN registers block
6943  */
gaudi2_print_sw_config_stream_data(struct hl_device * hdev,u32 stream,u64 qman_base)6944 static void gaudi2_print_sw_config_stream_data(struct hl_device *hdev,
6945 						u32 stream, u64 qman_base)
6946 {
6947 	u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
6948 	u32 cq_ptr_lo_off, size;
6949 
6950 	cq_ptr_lo_off = mmDCORE0_TPC0_QM_CQ_PTR_LO_1 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0;
6951 
6952 	cq_ptr_lo = qman_base + (mmDCORE0_TPC0_QM_CQ_PTR_LO_0 - mmDCORE0_TPC0_QM_BASE) +
6953 									stream * cq_ptr_lo_off;
6954 
6955 	cq_ptr_hi = cq_ptr_lo + (mmDCORE0_TPC0_QM_CQ_PTR_HI_0 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0);
6956 
6957 	cq_tsize = cq_ptr_lo + (mmDCORE0_TPC0_QM_CQ_TSIZE_0 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0);
6958 
6959 	cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
6960 	size = RREG32(cq_tsize);
6961 	dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %x\n",
6962 		stream, cq_ptr, size);
6963 }
6964 
6965 /**
6966  * gaudi2_print_last_pqes_on_err - print last PQEs on error
6967  *
6968  * @hdev: pointer to the habanalabs device structure
6969  * @qid_base: first QID of the QMAN (out of 4 streams)
6970  * @stream: the QMAN's stream
6971  * @qman_base: base address of QMAN registers block
6972  * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
6973  */
gaudi2_print_last_pqes_on_err(struct hl_device * hdev,u32 qid_base,u32 stream,u64 qman_base,bool pr_sw_conf)6974 static void gaudi2_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base, u32 stream,
6975 						u64 qman_base, bool pr_sw_conf)
6976 {
6977 	u32 ci, qm_ci_stream_off;
6978 	struct hl_hw_queue *q;
6979 	u64 pq_ci;
6980 	int i;
6981 
6982 	q = &hdev->kernel_queues[qid_base + stream];
6983 
6984 	qm_ci_stream_off = mmDCORE0_TPC0_QM_PQ_CI_1 - mmDCORE0_TPC0_QM_PQ_CI_0;
6985 	pq_ci = qman_base + (mmDCORE0_TPC0_QM_PQ_CI_0 - mmDCORE0_TPC0_QM_BASE) +
6986 						stream * qm_ci_stream_off;
6987 
6988 	hdev->asic_funcs->hw_queues_lock(hdev);
6989 
6990 	if (pr_sw_conf)
6991 		gaudi2_print_sw_config_stream_data(hdev, stream, qman_base);
6992 
6993 	ci = RREG32(pq_ci);
6994 
6995 	/* we should start printing form ci -1 */
6996 	ci = gaudi2_queue_idx_dec(ci, HL_QUEUE_LENGTH);
6997 
6998 	for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
6999 		struct hl_bd *bd;
7000 		u64 addr;
7001 		u32 len;
7002 
7003 		bd = q->kernel_address;
7004 		bd += ci;
7005 
7006 		len = le32_to_cpu(bd->len);
7007 		/* len 0 means uninitialized entry- break */
7008 		if (!len)
7009 			break;
7010 
7011 		addr = le64_to_cpu(bd->ptr);
7012 
7013 		dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %x\n",
7014 			stream, ci, addr, len);
7015 
7016 		/* get previous ci, wrap if needed */
7017 		ci = gaudi2_queue_idx_dec(ci, HL_QUEUE_LENGTH);
7018 	}
7019 
7020 	hdev->asic_funcs->hw_queues_unlock(hdev);
7021 }
7022 
7023 /**
7024  * print_qman_data_on_err - extract QMAN data on error
7025  *
7026  * @hdev: pointer to the habanalabs device structure
7027  * @qid_base: first QID of the QMAN (out of 4 streams)
7028  * @stream: the QMAN's stream
7029  * @qman_base: base address of QMAN registers block
7030  *
7031  * This function attempt to extract as much data as possible on QMAN error.
7032  * On upper CP print the SW config stream data and last 8 PQEs.
7033  * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
7034  */
print_qman_data_on_err(struct hl_device * hdev,u32 qid_base,u32 stream,u64 qman_base)7035 static void print_qman_data_on_err(struct hl_device *hdev, u32 qid_base, u32 stream, u64 qman_base)
7036 {
7037 	u32 i;
7038 
7039 	if (stream != QMAN_STREAMS) {
7040 		gaudi2_print_last_pqes_on_err(hdev, qid_base, stream, qman_base, true);
7041 		return;
7042 	}
7043 
7044 	gaudi2_print_sw_config_stream_data(hdev, stream, qman_base);
7045 
7046 	for (i = 0 ; i < QMAN_STREAMS ; i++)
7047 		gaudi2_print_last_pqes_on_err(hdev, qid_base, i, qman_base, false);
7048 }
7049 
gaudi2_handle_qman_err_generic(struct hl_device * hdev,const char * qm_name,u64 qman_base,u32 qid_base)7050 static void gaudi2_handle_qman_err_generic(struct hl_device *hdev, const char *qm_name,
7051 						u64 qman_base, u32 qid_base)
7052 {
7053 	u32 i, j, glbl_sts_val, arb_err_val, num_error_causes;
7054 	u64 glbl_sts_addr, arb_err_addr;
7055 	char reg_desc[32];
7056 
7057 	glbl_sts_addr = qman_base + (mmDCORE0_TPC0_QM_GLBL_ERR_STS_0 - mmDCORE0_TPC0_QM_BASE);
7058 	arb_err_addr = qman_base + (mmDCORE0_TPC0_QM_ARB_ERR_CAUSE - mmDCORE0_TPC0_QM_BASE);
7059 
7060 	/* Iterate through all stream GLBL_ERR_STS registers + Lower CP */
7061 	for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7062 		glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7063 
7064 		if (!glbl_sts_val)
7065 			continue;
7066 
7067 		if (i == QMAN_STREAMS) {
7068 			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
7069 			num_error_causes = GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE;
7070 		} else {
7071 			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7072 			num_error_causes = GAUDI2_NUM_OF_QM_ERR_CAUSE;
7073 		}
7074 
7075 		for (j = 0 ; j < num_error_causes ; j++)
7076 			if (glbl_sts_val & BIT(j))
7077 				dev_err_ratelimited(hdev->dev, "%s %s. err cause: %s\n",
7078 						qm_name, reg_desc,
7079 						i == QMAN_STREAMS ?
7080 						gaudi2_qman_lower_cp_error_cause[j] :
7081 						gaudi2_qman_error_cause[j]);
7082 
7083 		print_qman_data_on_err(hdev, qid_base, i, qman_base);
7084 	}
7085 
7086 	arb_err_val = RREG32(arb_err_addr);
7087 
7088 	if (!arb_err_val)
7089 		return;
7090 
7091 	for (j = 0 ; j < GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7092 		if (arb_err_val & BIT(j))
7093 			dev_err_ratelimited(hdev->dev, "%s ARB_ERR. err cause: %s\n",
7094 						qm_name, gaudi2_qman_arb_error_cause[j]);
7095 	}
7096 }
7097 
gaudi2_razwi_rr_hbw_shared_printf_info(struct hl_device * hdev,u64 rtr_mstr_if_base_addr,bool is_write,char * name,bool read_razwi_regs,struct hl_eq_razwi_info * razwi_info)7098 static void gaudi2_razwi_rr_hbw_shared_printf_info(struct hl_device *hdev,
7099 			u64 rtr_mstr_if_base_addr, bool is_write, char *name,
7100 			bool read_razwi_regs, struct hl_eq_razwi_info *razwi_info)
7101 {
7102 	u32 razwi_hi, razwi_lo, razwi_xy;
7103 
7104 	if (is_write) {
7105 		if (read_razwi_regs) {
7106 			razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HI);
7107 			razwi_lo = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_LO);
7108 			razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_XY);
7109 		} else {
7110 			razwi_hi = le32_to_cpu(razwi_info->hbw.rr_aw_razwi_hi_reg);
7111 			razwi_lo = le32_to_cpu(razwi_info->hbw.rr_aw_razwi_lo_reg);
7112 			razwi_xy = le32_to_cpu(razwi_info->hbw.rr_aw_razwi_id_reg);
7113 		}
7114 	} else {
7115 		if (read_razwi_regs) {
7116 			razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HI);
7117 			razwi_lo = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_LO);
7118 			razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_XY);
7119 		} else {
7120 			razwi_hi = le32_to_cpu(razwi_info->hbw.rr_ar_razwi_hi_reg);
7121 			razwi_lo = le32_to_cpu(razwi_info->hbw.rr_ar_razwi_lo_reg);
7122 			razwi_xy = le32_to_cpu(razwi_info->hbw.rr_ar_razwi_id_reg);
7123 		}
7124 	}
7125 
7126 	dev_err_ratelimited(hdev->dev,
7127 		"%s-RAZWI SHARED RR HBW %s error, address %#llx, Initiator coordinates 0x%x\n",
7128 		name, is_write ? "WR" : "RD", (u64)razwi_hi << 32 | razwi_lo, razwi_xy);
7129 }
7130 
gaudi2_razwi_rr_lbw_shared_printf_info(struct hl_device * hdev,u64 rtr_mstr_if_base_addr,bool is_write,char * name,bool read_razwi_regs,struct hl_eq_razwi_info * razwi_info)7131 static void gaudi2_razwi_rr_lbw_shared_printf_info(struct hl_device *hdev,
7132 			u64 rtr_mstr_if_base_addr, bool is_write, char *name,
7133 			bool read_razwi_regs, struct hl_eq_razwi_info *razwi_info)
7134 {
7135 	u32 razwi_addr, razwi_xy;
7136 
7137 	if (is_write) {
7138 		if (read_razwi_regs) {
7139 			razwi_addr = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI);
7140 			razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_XY);
7141 		} else {
7142 			razwi_addr = le32_to_cpu(razwi_info->lbw.rr_aw_razwi_reg);
7143 			razwi_xy = le32_to_cpu(razwi_info->lbw.rr_aw_razwi_id_reg);
7144 		}
7145 
7146 		dev_err_ratelimited(hdev->dev,
7147 			"%s-RAZWI SHARED RR LBW WR error, mstr_if 0x%llx, captured address 0x%x, Initiator coordinates 0x%x\n",
7148 			name, rtr_mstr_if_base_addr, razwi_addr, razwi_xy);
7149 	} else {
7150 		if (read_razwi_regs) {
7151 			razwi_addr = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI);
7152 			razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_XY);
7153 		} else {
7154 			razwi_addr = le32_to_cpu(razwi_info->lbw.rr_ar_razwi_reg);
7155 			razwi_xy = le32_to_cpu(razwi_info->lbw.rr_ar_razwi_id_reg);
7156 		}
7157 
7158 		dev_err_ratelimited(hdev->dev,
7159 			"%s-RAZWI SHARED RR LBW AR error, mstr_if 0x%llx, captured address 0x%x Initiator coordinates 0x%x\n",
7160 			name, rtr_mstr_if_base_addr, razwi_addr, razwi_xy);
7161 	}
7162 }
7163 
7164 /*
7165  * This function handles RR(Range register) hit events.
7166  * raised be initiators not PSOC RAZWI.
7167  */
gaudi2_ack_module_razwi_event_handler(struct hl_device * hdev,enum razwi_event_sources module,u8 module_idx,u8 module_sub_idx,struct hl_eq_razwi_info * razwi_info)7168 static void gaudi2_ack_module_razwi_event_handler(struct hl_device *hdev,
7169 				enum razwi_event_sources module, u8 module_idx,
7170 				u8 module_sub_idx, struct hl_eq_razwi_info *razwi_info)
7171 {
7172 	bool via_sft = false, read_razwi_regs = false;
7173 	u32 rtr_id, dcore_id, dcore_rtr_id, sft_id;
7174 	u64 rtr_mstr_if_base_addr;
7175 	u32 hbw_shrd_aw = 0, hbw_shrd_ar = 0;
7176 	u32 lbw_shrd_aw = 0, lbw_shrd_ar = 0;
7177 	char initiator_name[64];
7178 
7179 	if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX) || !razwi_info)
7180 		read_razwi_regs = true;
7181 
7182 	switch (module) {
7183 	case RAZWI_TPC:
7184 		rtr_id = gaudi2_tpc_initiator_rtr_id[module_idx];
7185 		sprintf(initiator_name, "TPC_%u", module_idx);
7186 		break;
7187 	case RAZWI_MME:
7188 		sprintf(initiator_name, "MME_%u", module_idx);
7189 		switch (module_sub_idx) {
7190 		case MME_WAP0:
7191 			rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].wap0;
7192 			break;
7193 		case MME_WAP1:
7194 			rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].wap1;
7195 			break;
7196 		case MME_WRITE:
7197 			rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].write;
7198 			break;
7199 		case MME_READ:
7200 			rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].read;
7201 			break;
7202 		case MME_SBTE0:
7203 			rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte0;
7204 			break;
7205 		case MME_SBTE1:
7206 			rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte1;
7207 			break;
7208 		case MME_SBTE2:
7209 			rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte2;
7210 			break;
7211 		case MME_SBTE3:
7212 			rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte3;
7213 			break;
7214 		case MME_SBTE4:
7215 			rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte4;
7216 			break;
7217 		default:
7218 			return;
7219 		}
7220 		break;
7221 	case RAZWI_EDMA:
7222 		sft_id = gaudi2_edma_initiator_sft_id[module_idx].interface_id;
7223 		dcore_id = gaudi2_edma_initiator_sft_id[module_idx].dcore_id;
7224 		via_sft = true;
7225 		sprintf(initiator_name, "EDMA_%u", module_idx);
7226 		break;
7227 	case RAZWI_PDMA:
7228 		rtr_id = gaudi2_pdma_initiator_rtr_id[module_idx];
7229 		sprintf(initiator_name, "PDMA_%u", module_idx);
7230 		break;
7231 	case RAZWI_NIC:
7232 		rtr_id = gaudi2_nic_initiator_rtr_id[module_idx];
7233 		sprintf(initiator_name, "NIC_%u", module_idx);
7234 		break;
7235 	case RAZWI_DEC:
7236 		rtr_id = gaudi2_dec_initiator_rtr_id[module_idx];
7237 		sprintf(initiator_name, "DEC_%u", module_idx);
7238 		break;
7239 	case RAZWI_ROT:
7240 		rtr_id = gaudi2_rot_initiator_rtr_id[module_idx];
7241 		sprintf(initiator_name, "ROT_%u", module_idx);
7242 		break;
7243 	default:
7244 		return;
7245 	}
7246 
7247 	if (!read_razwi_regs) {
7248 		if (le32_to_cpu(razwi_info->razwi_happened_mask) & RAZWI_HAPPENED_HBW) {
7249 			hbw_shrd_aw = le32_to_cpu(razwi_info->razwi_happened_mask) &
7250 								RAZWI_HAPPENED_AW;
7251 			hbw_shrd_ar = le32_to_cpu(razwi_info->razwi_happened_mask) &
7252 								RAZWI_HAPPENED_AR;
7253 		} else if (le32_to_cpu(razwi_info->razwi_happened_mask) & RAZWI_HAPPENED_LBW) {
7254 			lbw_shrd_aw = le32_to_cpu(razwi_info->razwi_happened_mask) &
7255 								RAZWI_HAPPENED_AW;
7256 			lbw_shrd_ar = le32_to_cpu(razwi_info->razwi_happened_mask) &
7257 								RAZWI_HAPPENED_AR;
7258 		}
7259 		rtr_mstr_if_base_addr = 0;
7260 
7261 		goto dump_info;
7262 	}
7263 
7264 	/* Find router mstr_if register base */
7265 	if (via_sft) {
7266 		rtr_mstr_if_base_addr = mmSFT0_HBW_RTR_IF0_RTR_CTRL_BASE +
7267 				dcore_id * SFT_DCORE_OFFSET +
7268 				sft_id * SFT_IF_OFFSET +
7269 				RTR_MSTR_IF_OFFSET;
7270 	} else {
7271 		dcore_id = rtr_id / NUM_OF_RTR_PER_DCORE;
7272 		dcore_rtr_id = rtr_id % NUM_OF_RTR_PER_DCORE;
7273 		rtr_mstr_if_base_addr = mmDCORE0_RTR0_CTRL_BASE +
7274 				dcore_id * DCORE_OFFSET +
7275 				dcore_rtr_id * DCORE_RTR_OFFSET +
7276 				RTR_MSTR_IF_OFFSET;
7277 	}
7278 
7279 	/* Find out event cause by reading "RAZWI_HAPPENED" registers */
7280 	hbw_shrd_aw = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED);
7281 
7282 	hbw_shrd_ar = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED);
7283 
7284 	if (via_sft) {
7285 		/* SFT has separate MSTR_IF for LBW, only there we can
7286 		 * read the LBW razwi related registers
7287 		 */
7288 		u64 base;
7289 
7290 		base = mmSFT0_HBW_RTR_IF0_RTR_CTRL_BASE + dcore_id * SFT_DCORE_OFFSET +
7291 				RTR_LBW_MSTR_IF_OFFSET;
7292 
7293 		lbw_shrd_aw = RREG32(base + RR_SHRD_LBW_AW_RAZWI_HAPPENED);
7294 
7295 		lbw_shrd_ar = RREG32(base + RR_SHRD_LBW_AR_RAZWI_HAPPENED);
7296 	} else {
7297 		lbw_shrd_aw = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED);
7298 
7299 		lbw_shrd_ar = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED);
7300 	}
7301 
7302 dump_info:
7303 	/* check if there is no RR razwi indication at all */
7304 	if (!hbw_shrd_aw && !hbw_shrd_ar && !lbw_shrd_aw && !lbw_shrd_ar)
7305 		return;
7306 
7307 	if (hbw_shrd_aw) {
7308 		gaudi2_razwi_rr_hbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, true,
7309 						initiator_name, read_razwi_regs, razwi_info);
7310 
7311 		/* Clear event indication */
7312 		if (read_razwi_regs)
7313 			WREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED, hbw_shrd_aw);
7314 	}
7315 
7316 	if (hbw_shrd_ar) {
7317 		gaudi2_razwi_rr_hbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, false,
7318 						initiator_name, read_razwi_regs, razwi_info);
7319 
7320 		/* Clear event indication */
7321 		if (read_razwi_regs)
7322 			WREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED, hbw_shrd_ar);
7323 	}
7324 
7325 	if (lbw_shrd_aw) {
7326 		gaudi2_razwi_rr_lbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, true,
7327 						initiator_name, read_razwi_regs, razwi_info);
7328 
7329 		/* Clear event indication */
7330 		if (read_razwi_regs)
7331 			WREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED, lbw_shrd_aw);
7332 	}
7333 
7334 	if (lbw_shrd_ar) {
7335 		gaudi2_razwi_rr_lbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, false,
7336 						initiator_name, read_razwi_regs, razwi_info);
7337 
7338 		/* Clear event indication */
7339 		if (read_razwi_regs)
7340 			WREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED, lbw_shrd_ar);
7341 	}
7342 }
7343 
gaudi2_check_if_razwi_happened(struct hl_device * hdev)7344 static void gaudi2_check_if_razwi_happened(struct hl_device *hdev)
7345 {
7346 	struct asic_fixed_properties *prop = &hdev->asic_prop;
7347 	u8 mod_idx, sub_mod;
7348 
7349 	/* check all TPCs */
7350 	for (mod_idx = 0 ; mod_idx < (NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1) ; mod_idx++) {
7351 		if (prop->tpc_enabled_mask & BIT(mod_idx))
7352 			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, mod_idx, 0, NULL);
7353 	}
7354 
7355 	/* check all MMEs */
7356 	for (mod_idx = 0 ; mod_idx < (NUM_OF_MME_PER_DCORE * NUM_OF_DCORES) ; mod_idx++)
7357 		for (sub_mod = MME_WAP0 ; sub_mod < MME_INITIATORS_MAX ; sub_mod++)
7358 			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mod_idx,
7359 								sub_mod, NULL);
7360 
7361 	/* check all EDMAs */
7362 	for (mod_idx = 0 ; mod_idx < (NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES) ; mod_idx++)
7363 		if (prop->edma_enabled_mask & BIT(mod_idx))
7364 			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_EDMA, mod_idx, 0, NULL);
7365 
7366 	/* check all PDMAs */
7367 	for (mod_idx = 0 ; mod_idx < NUM_OF_PDMA ; mod_idx++)
7368 		gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_PDMA, mod_idx, 0, NULL);
7369 
7370 	/* check all NICs */
7371 	for (mod_idx = 0 ; mod_idx < NIC_NUMBER_OF_PORTS ; mod_idx++)
7372 		if (hdev->nic_ports_mask & BIT(mod_idx))
7373 			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_NIC, mod_idx >> 1, 0,
7374 								NULL);
7375 
7376 	/* check all DECs */
7377 	for (mod_idx = 0 ; mod_idx < NUMBER_OF_DEC ; mod_idx++)
7378 		if (prop->decoder_enabled_mask & BIT(mod_idx))
7379 			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, mod_idx, 0, NULL);
7380 
7381 	/* check all ROTs */
7382 	for (mod_idx = 0 ; mod_idx < NUM_OF_ROT ; mod_idx++)
7383 		gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, mod_idx, 0, NULL);
7384 }
7385 
gaudi2_get_initiators_name(u32 rtr_id)7386 static const char *gaudi2_get_initiators_name(u32 rtr_id)
7387 {
7388 	switch (rtr_id) {
7389 	case DCORE0_RTR0:
7390 		return "DEC0/1/8/9, TPC24, PDMA0/1, PMMU, PCIE_IF, EDMA0/2, HMMU0/2/4/6, CPU";
7391 	case DCORE0_RTR1:
7392 		return "TPC0/1";
7393 	case DCORE0_RTR2:
7394 		return "TPC2/3";
7395 	case DCORE0_RTR3:
7396 		return "TPC4/5";
7397 	case DCORE0_RTR4:
7398 		return "MME0_SBTE0/1";
7399 	case DCORE0_RTR5:
7400 		return "MME0_WAP0/SBTE2";
7401 	case DCORE0_RTR6:
7402 		return "MME0_CTRL_WR/SBTE3";
7403 	case DCORE0_RTR7:
7404 		return "MME0_WAP1/CTRL_RD/SBTE4";
7405 	case DCORE1_RTR0:
7406 		return "MME1_WAP1/CTRL_RD/SBTE4";
7407 	case DCORE1_RTR1:
7408 		return "MME1_CTRL_WR/SBTE3";
7409 	case DCORE1_RTR2:
7410 		return "MME1_WAP0/SBTE2";
7411 	case DCORE1_RTR3:
7412 		return "MME1_SBTE0/1";
7413 	case DCORE1_RTR4:
7414 		return "TPC10/11";
7415 	case DCORE1_RTR5:
7416 		return "TPC8/9";
7417 	case DCORE1_RTR6:
7418 		return "TPC6/7";
7419 	case DCORE1_RTR7:
7420 		return "DEC2/3, NIC0/1/2/3/4, ARC_FARM, KDMA, EDMA1/3, HMMU1/3/5/7";
7421 	case DCORE2_RTR0:
7422 		return "DEC4/5, NIC5/6/7/8, EDMA4/6, HMMU8/10/12/14, ROT0";
7423 	case DCORE2_RTR1:
7424 		return "TPC16/17";
7425 	case DCORE2_RTR2:
7426 		return "TPC14/15";
7427 	case DCORE2_RTR3:
7428 		return "TPC12/13";
7429 	case DCORE2_RTR4:
7430 		return "MME2_SBTE0/1";
7431 	case DCORE2_RTR5:
7432 		return "MME2_WAP0/SBTE2";
7433 	case DCORE2_RTR6:
7434 		return "MME2_CTRL_WR/SBTE3";
7435 	case DCORE2_RTR7:
7436 		return "MME2_WAP1/CTRL_RD/SBTE4";
7437 	case DCORE3_RTR0:
7438 		return "MME3_WAP1/CTRL_RD/SBTE4";
7439 	case DCORE3_RTR1:
7440 		return "MME3_CTRL_WR/SBTE3";
7441 	case DCORE3_RTR2:
7442 		return "MME3_WAP0/SBTE2";
7443 	case DCORE3_RTR3:
7444 		return "MME3_SBTE0/1";
7445 	case DCORE3_RTR4:
7446 		return "TPC18/19";
7447 	case DCORE3_RTR5:
7448 		return "TPC20/21";
7449 	case DCORE3_RTR6:
7450 		return "TPC22/23";
7451 	case DCORE3_RTR7:
7452 		return "DEC6/7, NIC9/10/11, EDMA5/7, HMMU9/11/13/15, ROT1, PSOC";
7453 	default:
7454 	return "N/A";
7455 	}
7456 }
7457 
gaudi2_razwi_unmapped_addr_hbw_printf_info(struct hl_device * hdev,u32 rtr_id,u64 rtr_ctrl_base_addr,bool is_write)7458 static void gaudi2_razwi_unmapped_addr_hbw_printf_info(struct hl_device *hdev, u32 rtr_id,
7459 							u64 rtr_ctrl_base_addr, bool is_write)
7460 {
7461 	u32 razwi_hi, razwi_lo;
7462 
7463 	if (is_write) {
7464 		razwi_hi = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_ADDR_HI);
7465 		razwi_lo = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_ADDR_LO);
7466 
7467 		/* Clear set indication */
7468 		WREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_SET, 0x1);
7469 	} else {
7470 		razwi_hi = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_ADDR_HI);
7471 		razwi_lo = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_ADDR_LO);
7472 
7473 		/* Clear set indication */
7474 		WREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_SET, 0x1);
7475 	}
7476 
7477 	dev_err_ratelimited(hdev->dev,
7478 		"RAZWI PSOC unmapped HBW %s error, rtr id %u, address %#llx\n",
7479 		is_write ? "WR" : "RD", rtr_id, (u64)razwi_hi << 32 | razwi_lo);
7480 
7481 	dev_err_ratelimited(hdev->dev,
7482 		"Initiators: %s\n", gaudi2_get_initiators_name(rtr_id));
7483 }
7484 
gaudi2_razwi_unmapped_addr_lbw_printf_info(struct hl_device * hdev,u32 rtr_id,u64 rtr_ctrl_base_addr,bool is_write)7485 static void gaudi2_razwi_unmapped_addr_lbw_printf_info(struct hl_device *hdev, u32 rtr_id,
7486 							u64 rtr_ctrl_base_addr, bool is_write)
7487 {
7488 	u32 razwi_addr;
7489 
7490 	if (is_write) {
7491 		razwi_addr = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_ADDR);
7492 
7493 		/* Clear set indication */
7494 		WREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_SET, 0x1);
7495 	} else {
7496 		razwi_addr = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_ADDR);
7497 
7498 		/* Clear set indication */
7499 		WREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_SET, 0x1);
7500 	}
7501 
7502 	dev_err_ratelimited(hdev->dev,
7503 		"RAZWI PSOC unmapped LBW %s error, rtr id %u, address %#x\n",
7504 		is_write ? "WR" : "RD", rtr_id, razwi_addr);
7505 
7506 	dev_err_ratelimited(hdev->dev,
7507 		"Initiators: %s\n", gaudi2_get_initiators_name(rtr_id));
7508 }
7509 
7510 /* PSOC RAZWI interrupt occurs only when trying to access a bad address */
gaudi2_ack_psoc_razwi_event_handler(struct hl_device * hdev)7511 static void gaudi2_ack_psoc_razwi_event_handler(struct hl_device *hdev)
7512 {
7513 	u32 hbw_aw_set, hbw_ar_set, lbw_aw_set, lbw_ar_set, rtr_id, dcore_id, dcore_rtr_id, xy,
7514 								razwi_mask_info, razwi_intr = 0;
7515 	int rtr_map_arr_len = NUM_OF_RTR_PER_DCORE * NUM_OF_DCORES;
7516 	u64 rtr_ctrl_base_addr;
7517 
7518 	if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX)) {
7519 		razwi_intr = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_INTERRUPT);
7520 		if (!razwi_intr)
7521 			return;
7522 	}
7523 
7524 	razwi_mask_info = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_MASK_INFO);
7525 	xy = FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_L_MASK, razwi_mask_info);
7526 
7527 	dev_err_ratelimited(hdev->dev,
7528 		"PSOC RAZWI interrupt: Mask %d, AR %d, AW %d, AXUSER_L 0x%x AXUSER_H 0x%x\n",
7529 		FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_MASK_MASK, razwi_mask_info),
7530 		FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AR_MASK, razwi_mask_info),
7531 		FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AW_MASK, razwi_mask_info),
7532 		xy,
7533 		FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_H_MASK, razwi_mask_info));
7534 
7535 	if (xy == 0) {
7536 		dev_err_ratelimited(hdev->dev,
7537 				"PSOC RAZWI interrupt: received event from 0 rtr coordinates\n");
7538 		goto clear;
7539 	}
7540 
7541 	/* Find router id by router coordinates */
7542 	for (rtr_id = 0 ; rtr_id < rtr_map_arr_len ; rtr_id++)
7543 		if (rtr_coordinates_to_rtr_id[rtr_id] == xy)
7544 			break;
7545 
7546 	if (rtr_id == rtr_map_arr_len) {
7547 		dev_err_ratelimited(hdev->dev,
7548 				"PSOC RAZWI interrupt: invalid rtr coordinates (0x%x)\n", xy);
7549 		goto clear;
7550 	}
7551 
7552 	/* Find router mstr_if register base */
7553 	dcore_id = rtr_id / NUM_OF_RTR_PER_DCORE;
7554 	dcore_rtr_id = rtr_id % NUM_OF_RTR_PER_DCORE;
7555 	rtr_ctrl_base_addr = mmDCORE0_RTR0_CTRL_BASE + dcore_id * DCORE_OFFSET +
7556 				dcore_rtr_id * DCORE_RTR_OFFSET;
7557 
7558 	hbw_aw_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_SET);
7559 	hbw_ar_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_SET);
7560 	lbw_aw_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_SET);
7561 	lbw_ar_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_SET);
7562 
7563 	if (hbw_aw_set)
7564 		gaudi2_razwi_unmapped_addr_hbw_printf_info(hdev, rtr_id,
7565 						rtr_ctrl_base_addr, true);
7566 
7567 	if (hbw_ar_set)
7568 		gaudi2_razwi_unmapped_addr_hbw_printf_info(hdev, rtr_id,
7569 						rtr_ctrl_base_addr, false);
7570 
7571 	if (lbw_aw_set)
7572 		gaudi2_razwi_unmapped_addr_lbw_printf_info(hdev, rtr_id,
7573 						rtr_ctrl_base_addr, true);
7574 
7575 	if (lbw_ar_set)
7576 		gaudi2_razwi_unmapped_addr_lbw_printf_info(hdev, rtr_id,
7577 						rtr_ctrl_base_addr, false);
7578 
7579 clear:
7580 	/* Clear Interrupts only on pldm or if f/w doesn't handle interrupts */
7581 	if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX))
7582 		WREG32(mmPSOC_GLOBAL_CONF_RAZWI_INTERRUPT, razwi_intr);
7583 }
7584 
_gaudi2_handle_qm_sei_err(struct hl_device * hdev,u64 qman_base)7585 static void _gaudi2_handle_qm_sei_err(struct hl_device *hdev, u64 qman_base)
7586 {
7587 	u32 i, sts_val, sts_clr_val = 0;
7588 
7589 	sts_val = RREG32(qman_base + QM_SEI_STATUS_OFFSET);
7590 
7591 	for (i = 0 ; i < GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE ; i++) {
7592 		if (sts_val & BIT(i)) {
7593 			dev_err_ratelimited(hdev->dev, "QM SEI. err cause: %s\n",
7594 						gaudi2_qm_sei_error_cause[i]);
7595 			sts_clr_val |= BIT(i);
7596 		}
7597 	}
7598 
7599 	WREG32(qman_base + QM_SEI_STATUS_OFFSET, sts_clr_val);
7600 }
7601 
gaudi2_handle_qm_sei_err(struct hl_device * hdev,u16 event_type,struct hl_eq_razwi_info * razwi_info)7602 static void gaudi2_handle_qm_sei_err(struct hl_device *hdev, u16 event_type,
7603 					struct hl_eq_razwi_info *razwi_info)
7604 {
7605 	u64 qman_base;
7606 	u8 index;
7607 
7608 	switch (event_type) {
7609 	case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC23_AXI_ERR_RSP:
7610 		index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP;
7611 		qman_base = mmDCORE0_TPC0_QM_BASE +
7612 				(index / NUM_OF_TPC_PER_DCORE) * DCORE_OFFSET +
7613 				(index % NUM_OF_TPC_PER_DCORE) * DCORE_TPC_OFFSET;
7614 		break;
7615 	case GAUDI2_EVENT_TPC24_AXI_ERR_RSP:
7616 		qman_base = mmDCORE0_TPC6_QM_BASE;
7617 		break;
7618 	case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE:
7619 	case GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE:
7620 	case GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE:
7621 	case GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE:
7622 		index = (event_type - GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE) /
7623 				(GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE -
7624 						GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE);
7625 		qman_base = mmDCORE0_MME_QM_BASE + index * DCORE_OFFSET;
7626 		break;
7627 	case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP:
7628 	case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP:
7629 		index = event_type - GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP;
7630 		qman_base = mmPDMA0_QM_BASE + index * PDMA_OFFSET;
7631 		break;
7632 	case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE:
7633 	case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE:
7634 		index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE;
7635 		qman_base = mmROT0_QM_BASE + index * ROT_OFFSET;
7636 		break;
7637 	default:
7638 		return;
7639 	}
7640 
7641 	_gaudi2_handle_qm_sei_err(hdev, qman_base);
7642 
7643 	/* There is a single event per NIC macro, so should check its both QMAN blocks */
7644 	if (event_type >= GAUDI2_EVENT_NIC0_AXI_ERROR_RESPONSE &&
7645 			event_type <= GAUDI2_EVENT_NIC11_AXI_ERROR_RESPONSE)
7646 		_gaudi2_handle_qm_sei_err(hdev, qman_base + NIC_QM_OFFSET);
7647 
7648 	/* check if RAZWI happened */
7649 	if (razwi_info)
7650 		gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_PDMA, 0, 0, razwi_info);
7651 }
7652 
gaudi2_handle_qman_err(struct hl_device * hdev,u16 event_type)7653 static void gaudi2_handle_qman_err(struct hl_device *hdev, u16 event_type)
7654 {
7655 	u32 qid_base;
7656 	u64 qman_base;
7657 	char desc[32];
7658 	u8 index;
7659 
7660 	switch (event_type) {
7661 	case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_TPC5_QM:
7662 		index = event_type - GAUDI2_EVENT_TPC0_QM;
7663 		qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 + index * QMAN_STREAMS;
7664 		qman_base = mmDCORE0_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
7665 		snprintf(desc, ARRAY_SIZE(desc), "DCORE0_TPC%d_QM", index);
7666 		break;
7667 	case GAUDI2_EVENT_TPC6_QM ... GAUDI2_EVENT_TPC11_QM:
7668 		index = event_type - GAUDI2_EVENT_TPC6_QM;
7669 		qid_base = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 + index * QMAN_STREAMS;
7670 		qman_base = mmDCORE1_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
7671 		snprintf(desc, ARRAY_SIZE(desc), "DCORE1_TPC%d_QM", index);
7672 		break;
7673 	case GAUDI2_EVENT_TPC12_QM ... GAUDI2_EVENT_TPC17_QM:
7674 		index = event_type - GAUDI2_EVENT_TPC12_QM;
7675 		qid_base = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 + index * QMAN_STREAMS;
7676 		qman_base = mmDCORE2_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
7677 		snprintf(desc, ARRAY_SIZE(desc), "DCORE2_TPC%d_QM", index);
7678 		break;
7679 	case GAUDI2_EVENT_TPC18_QM ... GAUDI2_EVENT_TPC23_QM:
7680 		index = event_type - GAUDI2_EVENT_TPC18_QM;
7681 		qid_base = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 + index * QMAN_STREAMS;
7682 		qman_base = mmDCORE3_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
7683 		snprintf(desc, ARRAY_SIZE(desc), "DCORE3_TPC%d_QM", index);
7684 		break;
7685 	case GAUDI2_EVENT_TPC24_QM:
7686 		qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0;
7687 		qman_base = mmDCORE0_TPC6_QM_BASE;
7688 		snprintf(desc, ARRAY_SIZE(desc), "DCORE0_TPC6_QM");
7689 		break;
7690 	case GAUDI2_EVENT_MME0_QM:
7691 		qid_base = GAUDI2_QUEUE_ID_DCORE0_MME_0_0;
7692 		qman_base = mmDCORE0_MME_QM_BASE;
7693 		snprintf(desc, ARRAY_SIZE(desc), "DCORE0_MME_QM");
7694 		break;
7695 	case GAUDI2_EVENT_MME1_QM:
7696 		qid_base = GAUDI2_QUEUE_ID_DCORE1_MME_0_0;
7697 		qman_base = mmDCORE1_MME_QM_BASE;
7698 		snprintf(desc, ARRAY_SIZE(desc), "DCORE1_MME_QM");
7699 		break;
7700 	case GAUDI2_EVENT_MME2_QM:
7701 		qid_base = GAUDI2_QUEUE_ID_DCORE2_MME_0_0;
7702 		qman_base = mmDCORE2_MME_QM_BASE;
7703 		snprintf(desc, ARRAY_SIZE(desc), "DCORE2_MME_QM");
7704 		break;
7705 	case GAUDI2_EVENT_MME3_QM:
7706 		qid_base = GAUDI2_QUEUE_ID_DCORE3_MME_0_0;
7707 		qman_base = mmDCORE3_MME_QM_BASE;
7708 		snprintf(desc, ARRAY_SIZE(desc), "DCORE3_MME_QM");
7709 		break;
7710 	case GAUDI2_EVENT_HDMA0_QM:
7711 		qid_base = GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0;
7712 		qman_base = mmDCORE0_EDMA0_QM_BASE;
7713 		snprintf(desc, ARRAY_SIZE(desc), "DCORE0_EDMA0_QM");
7714 		break;
7715 	case GAUDI2_EVENT_HDMA1_QM:
7716 		qid_base = GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0;
7717 		qman_base = mmDCORE0_EDMA1_QM_BASE;
7718 		snprintf(desc, ARRAY_SIZE(desc), "DCORE0_EDMA1_QM");
7719 		break;
7720 	case GAUDI2_EVENT_HDMA2_QM:
7721 		qid_base = GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0;
7722 		qman_base = mmDCORE1_EDMA0_QM_BASE;
7723 		snprintf(desc, ARRAY_SIZE(desc), "DCORE1_EDMA0_QM");
7724 		break;
7725 	case GAUDI2_EVENT_HDMA3_QM:
7726 		qid_base = GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0;
7727 		qman_base = mmDCORE1_EDMA1_QM_BASE;
7728 		snprintf(desc, ARRAY_SIZE(desc), "DCORE1_EDMA1_QM");
7729 		break;
7730 	case GAUDI2_EVENT_HDMA4_QM:
7731 		qid_base = GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0;
7732 		qman_base = mmDCORE2_EDMA0_QM_BASE;
7733 		snprintf(desc, ARRAY_SIZE(desc), "DCORE2_EDMA0_QM");
7734 		break;
7735 	case GAUDI2_EVENT_HDMA5_QM:
7736 		qid_base = GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0;
7737 		qman_base = mmDCORE2_EDMA1_QM_BASE;
7738 		snprintf(desc, ARRAY_SIZE(desc), "DCORE2_EDMA1_QM");
7739 		break;
7740 	case GAUDI2_EVENT_HDMA6_QM:
7741 		qid_base = GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0;
7742 		qman_base = mmDCORE3_EDMA0_QM_BASE;
7743 		snprintf(desc, ARRAY_SIZE(desc), "DCORE3_EDMA0_QM");
7744 		break;
7745 	case GAUDI2_EVENT_HDMA7_QM:
7746 		qid_base = GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0;
7747 		qman_base = mmDCORE3_EDMA1_QM_BASE;
7748 		snprintf(desc, ARRAY_SIZE(desc), "DCORE3_EDMA1_QM");
7749 		break;
7750 	case GAUDI2_EVENT_PDMA0_QM:
7751 		qid_base = GAUDI2_QUEUE_ID_PDMA_0_0;
7752 		qman_base = mmPDMA0_QM_BASE;
7753 		snprintf(desc, ARRAY_SIZE(desc), "PDMA0_QM");
7754 		break;
7755 	case GAUDI2_EVENT_PDMA1_QM:
7756 		qid_base = GAUDI2_QUEUE_ID_PDMA_1_0;
7757 		qman_base = mmPDMA1_QM_BASE;
7758 		snprintf(desc, ARRAY_SIZE(desc), "PDMA1_QM");
7759 		break;
7760 	case GAUDI2_EVENT_ROTATOR0_ROT0_QM:
7761 		qid_base = GAUDI2_QUEUE_ID_ROT_0_0;
7762 		qman_base = mmROT0_QM_BASE;
7763 		snprintf(desc, ARRAY_SIZE(desc), "ROTATOR0_QM");
7764 		break;
7765 	case GAUDI2_EVENT_ROTATOR1_ROT1_QM:
7766 		qid_base = GAUDI2_QUEUE_ID_ROT_1_0;
7767 		qman_base = mmROT1_QM_BASE;
7768 		snprintf(desc, ARRAY_SIZE(desc), "ROTATOR1_QM");
7769 		break;
7770 	default:
7771 		return;
7772 	}
7773 
7774 	gaudi2_handle_qman_err_generic(hdev, desc, qman_base, qid_base);
7775 
7776 	/* Handle EDMA QM SEI here because there is no AXI error response event for EDMA */
7777 	if (event_type >= GAUDI2_EVENT_HDMA2_QM && event_type <= GAUDI2_EVENT_HDMA5_QM)
7778 		_gaudi2_handle_qm_sei_err(hdev, qman_base);
7779 }
7780 
gaudi2_handle_arc_farm_sei_err(struct hl_device * hdev)7781 static void gaudi2_handle_arc_farm_sei_err(struct hl_device *hdev)
7782 {
7783 	u32 i, sts_val, sts_clr_val = 0;
7784 
7785 	sts_val = RREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_STS);
7786 
7787 	for (i = 0 ; i < GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE ; i++) {
7788 		if (sts_val & BIT(i)) {
7789 			dev_err_ratelimited(hdev->dev, "ARC SEI. err cause: %s\n",
7790 						gaudi2_arc_sei_error_cause[i]);
7791 			sts_clr_val |= BIT(i);
7792 		}
7793 	}
7794 
7795 	WREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_CLR, sts_clr_val);
7796 }
7797 
gaudi2_handle_cpu_sei_err(struct hl_device * hdev)7798 static void gaudi2_handle_cpu_sei_err(struct hl_device *hdev)
7799 {
7800 	u32 i, sts_val, sts_clr_val = 0;
7801 
7802 	sts_val = RREG32(mmCPU_IF_CPU_SEI_INTR_STS);
7803 
7804 	for (i = 0 ; i < GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE ; i++) {
7805 		if (sts_val & BIT(i)) {
7806 			dev_err_ratelimited(hdev->dev, "CPU SEI. err cause: %s\n",
7807 						gaudi2_cpu_sei_error_cause[i]);
7808 			sts_clr_val |= BIT(i);
7809 		}
7810 	}
7811 
7812 	WREG32(mmCPU_IF_CPU_SEI_INTR_CLR, sts_clr_val);
7813 }
7814 
gaudi2_handle_rot_err(struct hl_device * hdev,u8 rot_index,struct hl_eq_razwi_with_intr_cause * razwi_with_intr_cause)7815 static void gaudi2_handle_rot_err(struct hl_device *hdev, u8 rot_index,
7816 					struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause)
7817 {
7818 	u64 intr_cause_data = le64_to_cpu(razwi_with_intr_cause->intr_cause.intr_cause_data);
7819 	int i;
7820 
7821 	for (i = 0 ; i < GAUDI2_NUM_OF_ROT_ERR_CAUSE ; i++)
7822 		if (intr_cause_data & BIT(i))
7823 			dev_err_ratelimited(hdev->dev, "ROT%u. err cause: %s\n",
7824 						rot_index, guadi2_rot_error_cause[i]);
7825 
7826 	/* check if RAZWI happened */
7827 	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, rot_index, 0,
7828 						&razwi_with_intr_cause->razwi_info);
7829 }
7830 
gaudi2_tpc_ack_interrupts(struct hl_device * hdev,u8 tpc_index,char * interrupt_name,struct hl_eq_razwi_with_intr_cause * razwi_with_intr_cause)7831 static void gaudi2_tpc_ack_interrupts(struct hl_device *hdev, u8 tpc_index, char *interrupt_name,
7832 					struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause)
7833 {
7834 	u64 intr_cause_data = le64_to_cpu(razwi_with_intr_cause->intr_cause.intr_cause_data);
7835 	int i;
7836 
7837 	for (i = 0 ; i < GAUDI2_NUM_OF_TPC_INTR_CAUSE ; i++)
7838 		if (intr_cause_data & BIT(i))
7839 			dev_err_ratelimited(hdev->dev, "TPC%d_%s interrupt cause: %s\n",
7840 					tpc_index, interrupt_name, gaudi2_tpc_interrupts_cause[i]);
7841 
7842 	/* check if RAZWI happened */
7843 	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, tpc_index, 0,
7844 						&razwi_with_intr_cause->razwi_info);
7845 }
7846 
gaudi2_handle_dec_err(struct hl_device * hdev,u8 dec_index,const char * interrupt_name,struct hl_eq_razwi_info * razwi_info)7847 static void gaudi2_handle_dec_err(struct hl_device *hdev, u8 dec_index, const char *interrupt_name,
7848 				struct hl_eq_razwi_info *razwi_info)
7849 {
7850 	u32 sts_addr, sts_val, sts_clr_val = 0;
7851 	int i;
7852 
7853 	if (dec_index < NUM_OF_VDEC_PER_DCORE * NUM_OF_DCORES)
7854 		/* DCORE DEC */
7855 		sts_addr = mmDCORE0_VDEC0_BRDG_CTRL_CAUSE_INTR +
7856 				DCORE_OFFSET * (dec_index / NUM_OF_DEC_PER_DCORE) +
7857 				DCORE_VDEC_OFFSET * (dec_index % NUM_OF_DEC_PER_DCORE);
7858 	else
7859 		/* PCIE DEC */
7860 		sts_addr = mmPCIE_VDEC0_BRDG_CTRL_CAUSE_INTR + PCIE_VDEC_OFFSET *
7861 				(dec_index - NUM_OF_VDEC_PER_DCORE * NUM_OF_DCORES);
7862 
7863 	sts_val = RREG32(sts_addr);
7864 
7865 	for (i = 0 ; i < GAUDI2_NUM_OF_DEC_ERR_CAUSE ; i++) {
7866 		if (sts_val & BIT(i)) {
7867 			dev_err_ratelimited(hdev->dev, "DEC%u_%s err cause: %s\n",
7868 					dec_index, interrupt_name, gaudi2_dec_error_cause[i]);
7869 			sts_clr_val |= BIT(i);
7870 		}
7871 	}
7872 
7873 	/* check if RAZWI happened */
7874 	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, dec_index, 0, razwi_info);
7875 
7876 	/* Write 1 clear errors */
7877 	WREG32(sts_addr, sts_clr_val);
7878 }
7879 
gaudi2_handle_mme_err(struct hl_device * hdev,u8 mme_index,const char * interrupt_name,struct hl_eq_razwi_info * razwi_info)7880 static void gaudi2_handle_mme_err(struct hl_device *hdev, u8 mme_index, const char *interrupt_name,
7881 				struct hl_eq_razwi_info *razwi_info)
7882 {
7883 	u32 sts_addr, sts_val, sts_clr_addr, sts_clr_val = 0;
7884 	int i;
7885 
7886 	sts_addr = mmDCORE0_MME_CTRL_LO_INTR_CAUSE + DCORE_OFFSET * mme_index;
7887 	sts_clr_addr = mmDCORE0_MME_CTRL_LO_INTR_CLEAR + DCORE_OFFSET * mme_index;
7888 
7889 	sts_val = RREG32(sts_addr);
7890 
7891 	for (i = 0 ; i < GAUDI2_NUM_OF_MME_ERR_CAUSE ; i++) {
7892 		if (sts_val & BIT(i)) {
7893 			dev_err_ratelimited(hdev->dev, "MME%u_%s err cause: %s\n",
7894 					mme_index, interrupt_name, guadi2_mme_error_cause[i]);
7895 			sts_clr_val |= BIT(i);
7896 		}
7897 	}
7898 
7899 	/* check if RAZWI happened */
7900 	for (i = MME_WRITE ; i < MME_INITIATORS_MAX ; i++)
7901 		gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, i, razwi_info);
7902 
7903 	WREG32(sts_clr_addr, sts_clr_val);
7904 }
7905 
gaudi2_handle_mme_sbte_err(struct hl_device * hdev,u8 mme_index,u8 sbte_index,u64 intr_cause_data)7906 static void gaudi2_handle_mme_sbte_err(struct hl_device *hdev, u8 mme_index, u8 sbte_index,
7907 					u64 intr_cause_data)
7908 {
7909 	int i;
7910 
7911 	for (i = 0 ; i < GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE ; i++)
7912 		if (intr_cause_data & BIT(i))
7913 			dev_err_ratelimited(hdev->dev, "MME%uSBTE%u_AXI_ERR_RSP err cause: %s\n",
7914 					mme_index, sbte_index, guadi2_mme_sbte_error_cause[i]);
7915 }
7916 
gaudi2_handle_mme_wap_err(struct hl_device * hdev,u8 mme_index,struct hl_eq_razwi_info * razwi_info)7917 static void gaudi2_handle_mme_wap_err(struct hl_device *hdev, u8 mme_index,
7918 					struct hl_eq_razwi_info *razwi_info)
7919 {
7920 	u32 sts_addr, sts_val, sts_clr_addr, sts_clr_val = 0;
7921 	int i;
7922 
7923 	sts_addr = mmDCORE0_MME_ACC_INTR_CAUSE + DCORE_OFFSET * mme_index;
7924 	sts_clr_addr = mmDCORE0_MME_ACC_INTR_CLEAR + DCORE_OFFSET * mme_index;
7925 
7926 	sts_val = RREG32(sts_addr);
7927 
7928 	for (i = 0 ; i < GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE ; i++) {
7929 		if (sts_val & BIT(i)) {
7930 			dev_err_ratelimited(hdev->dev,
7931 					"MME%u_WAP_SOURCE_RESULT_INVALID err cause: %s\n",
7932 					mme_index, guadi2_mme_wap_error_cause[i]);
7933 			sts_clr_val |= BIT(i);
7934 		}
7935 	}
7936 
7937 	/* check if RAZWI happened on WAP0/1 */
7938 	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP0, razwi_info);
7939 	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP1, razwi_info);
7940 
7941 	WREG32(sts_clr_addr, sts_clr_val);
7942 }
7943 
gaudi2_handle_kdma_core_event(struct hl_device * hdev,u64 intr_cause_data)7944 static void gaudi2_handle_kdma_core_event(struct hl_device *hdev, u64 intr_cause_data)
7945 {
7946 	int i;
7947 
7948 	/* If an AXI read or write error is received, an error is reported and
7949 	 * interrupt message is sent. Due to an HW errata, when reading the cause
7950 	 * register of the KDMA engine, the reported error is always HBW even if
7951 	 * the actual error caused by a LBW KDMA transaction.
7952 	 */
7953 	for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++)
7954 		if (intr_cause_data & BIT(i))
7955 			dev_err_ratelimited(hdev->dev, "kdma core err cause: %s\n",
7956 						gaudi2_kdma_core_interrupts_cause[i]);
7957 }
7958 
gaudi2_handle_dma_core_event(struct hl_device * hdev,u64 intr_cause_data)7959 static void gaudi2_handle_dma_core_event(struct hl_device *hdev, u64 intr_cause_data)
7960 {
7961 	int i;
7962 
7963 	for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++)
7964 		if (intr_cause_data & BIT(i))
7965 			dev_err_ratelimited(hdev->dev, "dma core err cause: %s\n",
7966 						gaudi2_dma_core_interrupts_cause[i]);
7967 }
7968 
gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(struct hl_device * hdev)7969 static void gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(struct hl_device *hdev)
7970 {
7971 	u32 mstr_if_base_addr = mmPCIE_MSTR_RR_MSTR_IF_RR_SHRD_HBW_BASE, razwi_happened_addr;
7972 
7973 	razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED;
7974 	if (RREG32(razwi_happened_addr)) {
7975 		gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE", true,
7976 							NULL);
7977 		WREG32(razwi_happened_addr, 0x1);
7978 	}
7979 
7980 	razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED;
7981 	if (RREG32(razwi_happened_addr)) {
7982 		gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE", true,
7983 							NULL);
7984 		WREG32(razwi_happened_addr, 0x1);
7985 	}
7986 
7987 	razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED;
7988 	if (RREG32(razwi_happened_addr)) {
7989 		gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE", true,
7990 							NULL);
7991 		WREG32(razwi_happened_addr, 0x1);
7992 	}
7993 
7994 	razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED;
7995 	if (RREG32(razwi_happened_addr)) {
7996 		gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE", true,
7997 							NULL);
7998 		WREG32(razwi_happened_addr, 0x1);
7999 	}
8000 }
8001 
gaudi2_print_pcie_addr_dec_info(struct hl_device * hdev,u64 intr_cause_data)8002 static void gaudi2_print_pcie_addr_dec_info(struct hl_device *hdev, u64 intr_cause_data)
8003 {
8004 	int i;
8005 
8006 	for (i = 0 ; i < GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE ; i++) {
8007 		if (!(intr_cause_data & BIT_ULL(i)))
8008 			continue;
8009 
8010 		dev_err_ratelimited(hdev->dev, "PCIE ADDR DEC Error: %s\n",
8011 					gaudi2_pcie_addr_dec_error_cause[i]);
8012 
8013 		switch (intr_cause_data & BIT_ULL(i)) {
8014 		case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK:
8015 			break;
8016 		case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK:
8017 			gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(hdev);
8018 			break;
8019 		}
8020 	}
8021 }
8022 
gaudi2_handle_pif_fatal(struct hl_device * hdev,u64 intr_cause_data)8023 static void gaudi2_handle_pif_fatal(struct hl_device *hdev, u64 intr_cause_data)
8024 
8025 {
8026 	int i;
8027 
8028 	for (i = 0 ; i < GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE ; i++) {
8029 		if (intr_cause_data & BIT_ULL(i))
8030 			dev_err_ratelimited(hdev->dev, "PMMU PIF err cause: %s\n",
8031 					gaudi2_pmmu_fatal_interrupts_cause[i]);
8032 	}
8033 }
8034 
gaudi2_handle_hif_fatal(struct hl_device * hdev,u16 event_type,u64 intr_cause_data)8035 static void gaudi2_handle_hif_fatal(struct hl_device *hdev, u16 event_type, u64 intr_cause_data)
8036 {
8037 	u32 dcore_id, hif_id;
8038 	int i;
8039 
8040 	dcore_id = (event_type - GAUDI2_EVENT_HIF0_FATAL) / 4;
8041 	hif_id = (event_type - GAUDI2_EVENT_HIF0_FATAL) % 4;
8042 
8043 	for (i = 0 ; i < GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE ; i++) {
8044 		if (intr_cause_data & BIT_ULL(i))
8045 			dev_err_ratelimited(hdev->dev, "DCORE%u_HIF%u: %s\n", dcore_id, hif_id,
8046 					gaudi2_hif_fatal_interrupts_cause[i]);
8047 	}
8048 }
8049 
gaudi2_handle_page_error(struct hl_device * hdev,u64 mmu_base,bool is_pmmu)8050 static void gaudi2_handle_page_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu)
8051 {
8052 	u32 valid, val;
8053 	u64 addr;
8054 
8055 	valid = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID));
8056 
8057 	if (!(valid & DCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID_PAGE_ERR_VALID_ENTRY_MASK))
8058 		return;
8059 
8060 	val = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE));
8061 	addr = val & DCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE_VA_63_32_MASK;
8062 	addr <<= 32;
8063 	addr |= RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE_VA));
8064 
8065 	dev_err_ratelimited(hdev->dev, "%s page fault on va 0x%llx\n",
8066 				is_pmmu ? "PMMU" : "HMMU", addr);
8067 
8068 	WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE), 0);
8069 }
8070 
gaudi2_handle_access_error(struct hl_device * hdev,u64 mmu_base,bool is_pmmu)8071 static void gaudi2_handle_access_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu)
8072 {
8073 	u32 valid, val;
8074 	u64 addr;
8075 
8076 	valid = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID));
8077 
8078 	if (!(valid & DCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID_ACCESS_ERR_VALID_ENTRY_MASK))
8079 		return;
8080 
8081 	val = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE));
8082 	addr = val & DCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE_VA_63_32_MASK;
8083 	addr <<= 32;
8084 	addr |= RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE_VA));
8085 
8086 	dev_err_ratelimited(hdev->dev, "%s access error on va 0x%llx\n",
8087 				is_pmmu ? "PMMU" : "HMMU", addr);
8088 	WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE), 0);
8089 }
8090 
gaudi2_handle_mmu_spi_sei_generic(struct hl_device * hdev,const char * mmu_name,u64 mmu_base,bool is_pmmu)8091 static void gaudi2_handle_mmu_spi_sei_generic(struct hl_device *hdev, const char *mmu_name,
8092 						u64 mmu_base, bool is_pmmu)
8093 {
8094 	u32 spi_sei_cause, interrupt_clr = 0x0;
8095 	int i;
8096 
8097 	spi_sei_cause = RREG32(mmu_base + MMU_SPI_SEI_CAUSE_OFFSET);
8098 
8099 	for (i = 0 ; i < GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE ; i++) {
8100 		if (spi_sei_cause & BIT(i)) {
8101 			dev_err_ratelimited(hdev->dev, "%s SPI_SEI ERR. err cause: %s\n",
8102 						mmu_name, gaudi2_mmu_spi_sei[i].cause);
8103 
8104 			if (i == 0)
8105 				gaudi2_handle_page_error(hdev, mmu_base, is_pmmu);
8106 			else if (i == 1)
8107 				gaudi2_handle_access_error(hdev, mmu_base, is_pmmu);
8108 
8109 			if (gaudi2_mmu_spi_sei[i].clear_bit >= 0)
8110 				interrupt_clr |= BIT(gaudi2_mmu_spi_sei[i].clear_bit);
8111 		}
8112 	}
8113 
8114 	/* Clear cause */
8115 	WREG32_AND(mmu_base + MMU_SPI_SEI_CAUSE_OFFSET, ~spi_sei_cause);
8116 
8117 	/* Clear interrupt */
8118 	WREG32(mmu_base + MMU_INTERRUPT_CLR_OFFSET, interrupt_clr);
8119 }
8120 
gaudi2_handle_sm_err(struct hl_device * hdev,u8 sm_index)8121 static bool gaudi2_handle_sm_err(struct hl_device *hdev, u8 sm_index)
8122 {
8123 	u32 sei_cause_addr, sei_cause_val, sei_cause_cause, sei_cause_log;
8124 	u32 cq_intr_addr, cq_intr_val, cq_intr_queue_index;
8125 	bool reset = true;
8126 	int i;
8127 
8128 	sei_cause_addr = mmDCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE + DCORE_OFFSET * sm_index;
8129 	cq_intr_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_INTR + DCORE_OFFSET * sm_index;
8130 
8131 	sei_cause_val = RREG32(sei_cause_addr);
8132 	sei_cause_cause = FIELD_GET(DCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE_CAUSE_MASK, sei_cause_val);
8133 	cq_intr_val = RREG32(cq_intr_addr);
8134 
8135 	/* SEI interrupt */
8136 	if (sei_cause_cause) {
8137 		/* There are corresponding SEI_CAUSE_log bits for every SEI_CAUSE_cause bit */
8138 		sei_cause_log = FIELD_GET(DCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE_LOG_MASK,
8139 					sei_cause_val);
8140 
8141 		for (i = 0 ; i < GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE ; i++) {
8142 			if (!(sei_cause_cause & BIT(i)))
8143 				continue;
8144 
8145 			dev_err_ratelimited(hdev->dev, "SM%u SEI ERR. err cause: %s. %s: 0x%X\n",
8146 					sm_index,
8147 					gaudi2_sm_sei_cause[i].cause_name,
8148 					gaudi2_sm_sei_cause[i].log_name,
8149 					sei_cause_log & gaudi2_sm_sei_cause[i].log_mask);
8150 
8151 			/* Due to a potential H/W issue, do not reset upon BRESP errors */
8152 			if (i == 2)
8153 				reset = false;
8154 			break;
8155 		}
8156 
8157 		/* Clear SM_SEI_CAUSE */
8158 		WREG32(sei_cause_addr, 0);
8159 	}
8160 
8161 	/* CQ interrupt */
8162 	if (cq_intr_val & DCORE0_SYNC_MNGR_GLBL_CQ_INTR_CQ_SEC_INTR_MASK) {
8163 		cq_intr_queue_index =
8164 				FIELD_GET(DCORE0_SYNC_MNGR_GLBL_CQ_INTR_CQ_INTR_QUEUE_INDEX_MASK,
8165 					cq_intr_val);
8166 
8167 		dev_err_ratelimited(hdev->dev, "SM%u err. err cause: CQ_INTR. queue index: %u\n",
8168 				sm_index, cq_intr_queue_index);
8169 
8170 		/* Clear CQ_INTR */
8171 		WREG32(cq_intr_addr, 0);
8172 	}
8173 
8174 	return reset;
8175 }
8176 
gaudi2_handle_mmu_spi_sei_err(struct hl_device * hdev,u16 event_type)8177 static void gaudi2_handle_mmu_spi_sei_err(struct hl_device *hdev, u16 event_type)
8178 {
8179 	bool is_pmmu = false;
8180 	char desc[32];
8181 	u64 mmu_base;
8182 	u8 index;
8183 
8184 	switch (event_type) {
8185 	case GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM ... GAUDI2_EVENT_HMMU3_SECURITY_ERROR:
8186 		index = (event_type - GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM) / 3;
8187 		mmu_base = mmDCORE0_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8188 		snprintf(desc, ARRAY_SIZE(desc), "DCORE0_HMMU%d", index);
8189 		break;
8190 	case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_3_AXI_ERR_RSP:
8191 		index = (event_type - GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP);
8192 		mmu_base = mmDCORE0_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8193 		snprintf(desc, ARRAY_SIZE(desc), "DCORE0_HMMU%d", index);
8194 		break;
8195 	case GAUDI2_EVENT_HMMU8_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_HMMU11_SECURITY_ERROR:
8196 		index = (event_type - GAUDI2_EVENT_HMMU8_PAGE_FAULT_WR_PERM) / 3;
8197 		mmu_base = mmDCORE1_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8198 		snprintf(desc, ARRAY_SIZE(desc), "DCORE1_HMMU%d", index);
8199 		break;
8200 	case GAUDI2_EVENT_HMMU_8_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_11_AXI_ERR_RSP:
8201 		index = (event_type - GAUDI2_EVENT_HMMU_8_AXI_ERR_RSP);
8202 		mmu_base = mmDCORE1_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8203 		snprintf(desc, ARRAY_SIZE(desc), "DCORE1_HMMU%d", index);
8204 		break;
8205 	case GAUDI2_EVENT_HMMU7_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_HMMU4_SECURITY_ERROR:
8206 		index = (event_type - GAUDI2_EVENT_HMMU7_PAGE_FAULT_WR_PERM) / 3;
8207 		mmu_base = mmDCORE2_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8208 		snprintf(desc, ARRAY_SIZE(desc), "DCORE2_HMMU%d", index);
8209 		break;
8210 	case GAUDI2_EVENT_HMMU_7_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_4_AXI_ERR_RSP:
8211 		index = (event_type - GAUDI2_EVENT_HMMU_7_AXI_ERR_RSP);
8212 		mmu_base = mmDCORE2_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8213 		snprintf(desc, ARRAY_SIZE(desc), "DCORE2_HMMU%d", index);
8214 		break;
8215 	case GAUDI2_EVENT_HMMU15_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR:
8216 		index = (event_type - GAUDI2_EVENT_HMMU15_PAGE_FAULT_WR_PERM) / 3;
8217 		mmu_base = mmDCORE3_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8218 		snprintf(desc, ARRAY_SIZE(desc), "DCORE3_HMMU%d", index);
8219 		break;
8220 	case GAUDI2_EVENT_HMMU_15_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP:
8221 		index = (event_type - GAUDI2_EVENT_HMMU_15_AXI_ERR_RSP);
8222 		mmu_base = mmDCORE3_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8223 		snprintf(desc, ARRAY_SIZE(desc), "DCORE3_HMMU%d", index);
8224 		break;
8225 	case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR:
8226 	case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0:
8227 		is_pmmu = true;
8228 		mmu_base = mmPMMU_HBW_MMU_BASE;
8229 		snprintf(desc, ARRAY_SIZE(desc), "PMMU");
8230 		break;
8231 	default:
8232 		return;
8233 	}
8234 
8235 	gaudi2_handle_mmu_spi_sei_generic(hdev, desc, mmu_base, is_pmmu);
8236 }
8237 
8238 
8239 /* returns true if hard reset is required (ECC DERR or Read parity), false otherwise (ECC SERR) */
gaudi2_hbm_sei_handle_read_err(struct hl_device * hdev,struct hl_eq_hbm_sei_read_err_intr_info * rd_err_data,u32 err_cnt)8240 static bool gaudi2_hbm_sei_handle_read_err(struct hl_device *hdev,
8241 			struct hl_eq_hbm_sei_read_err_intr_info *rd_err_data, u32 err_cnt)
8242 {
8243 	u32 addr, beat, beat_shift;
8244 	bool rc = false;
8245 
8246 	dev_err_ratelimited(hdev->dev,
8247 			"READ ERROR count: ECC SERR: %d, ECC DERR: %d, RD_PARITY: %d\n",
8248 			FIELD_GET(HBM_ECC_SERR_CNTR_MASK, err_cnt),
8249 			FIELD_GET(HBM_ECC_DERR_CNTR_MASK, err_cnt),
8250 			FIELD_GET(HBM_RD_PARITY_CNTR_MASK, err_cnt));
8251 
8252 	addr = le32_to_cpu(rd_err_data->dbg_rd_err_addr.rd_addr_val);
8253 	dev_err_ratelimited(hdev->dev,
8254 			"READ ERROR address: sid(%u), bg(%u), ba(%u), col(%u), row(%u)\n",
8255 			FIELD_GET(HBM_RD_ADDR_SID_MASK, addr),
8256 			FIELD_GET(HBM_RD_ADDR_BG_MASK, addr),
8257 			FIELD_GET(HBM_RD_ADDR_BA_MASK, addr),
8258 			FIELD_GET(HBM_RD_ADDR_COL_MASK, addr),
8259 			FIELD_GET(HBM_RD_ADDR_ROW_MASK, addr));
8260 
8261 	/* For each beat (RDQS edge), look for possible errors and print relevant info */
8262 	for (beat = 0 ; beat < 4 ; beat++) {
8263 		if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
8264 			(HBM_RD_ERR_SERR_BEAT0_MASK << beat))
8265 			dev_err_ratelimited(hdev->dev, "Beat%d ECC SERR: DM: %#x, Syndrome: %#x\n",
8266 						beat,
8267 						le32_to_cpu(rd_err_data->dbg_rd_err_dm),
8268 						le32_to_cpu(rd_err_data->dbg_rd_err_syndrome));
8269 
8270 		if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
8271 			(HBM_RD_ERR_DERR_BEAT0_MASK << beat)) {
8272 			dev_err_ratelimited(hdev->dev, "Beat%d ECC DERR: DM: %#x, Syndrome: %#x\n",
8273 						beat,
8274 						le32_to_cpu(rd_err_data->dbg_rd_err_dm),
8275 						le32_to_cpu(rd_err_data->dbg_rd_err_syndrome));
8276 			rc |= true;
8277 		}
8278 
8279 		beat_shift = beat * HBM_RD_ERR_BEAT_SHIFT;
8280 		if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
8281 			(HBM_RD_ERR_PAR_ERR_BEAT0_MASK << beat_shift)) {
8282 			dev_err_ratelimited(hdev->dev,
8283 					"Beat%d read PARITY: DM: %#x, PAR data: %#x\n",
8284 					beat,
8285 					le32_to_cpu(rd_err_data->dbg_rd_err_dm),
8286 					(le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
8287 						(HBM_RD_ERR_PAR_DATA_BEAT0_MASK << beat_shift)) >>
8288 						(HBM_RD_ERR_PAR_DATA_BEAT0_SHIFT + beat_shift));
8289 			rc |= true;
8290 		}
8291 
8292 		dev_err_ratelimited(hdev->dev, "Beat%d DQ data:\n", beat);
8293 		dev_err_ratelimited(hdev->dev, "\t0x%08x\n",
8294 					le32_to_cpu(rd_err_data->dbg_rd_err_data[beat * 2]));
8295 		dev_err_ratelimited(hdev->dev, "\t0x%08x\n",
8296 					le32_to_cpu(rd_err_data->dbg_rd_err_data[beat * 2 + 1]));
8297 	}
8298 
8299 	return rc;
8300 }
8301 
gaudi2_hbm_sei_print_wr_par_info(struct hl_device * hdev,struct hl_eq_hbm_sei_wr_par_intr_info * wr_par_err_data,u32 err_cnt)8302 static void gaudi2_hbm_sei_print_wr_par_info(struct hl_device *hdev,
8303 			struct hl_eq_hbm_sei_wr_par_intr_info *wr_par_err_data, u32 err_cnt)
8304 {
8305 	struct hbm_sei_wr_cmd_address *wr_cmd_addr = wr_par_err_data->dbg_last_wr_cmds;
8306 	u32 i, curr_addr, derr = wr_par_err_data->dbg_derr;
8307 
8308 	dev_err_ratelimited(hdev->dev, "WRITE PARITY ERROR count: %d\n", err_cnt);
8309 
8310 	dev_err_ratelimited(hdev->dev, "CK-0 DERR: 0x%02x, CK-1 DERR: 0x%02x\n",
8311 				derr & 0x3, derr & 0xc);
8312 
8313 	/* JIRA H6-3286 - the following prints may not be valid */
8314 	dev_err_ratelimited(hdev->dev, "Last latched write commands addresses:\n");
8315 	for (i = 0 ; i < HBM_WR_PAR_CMD_LIFO_LEN ; i++) {
8316 		curr_addr = le32_to_cpu(wr_cmd_addr[i].dbg_wr_cmd_addr);
8317 		dev_err_ratelimited(hdev->dev,
8318 				"\twrite cmd[%u]: Address: SID(%u) BG(%u) BA(%u) COL(%u).\n",
8319 				i,
8320 				FIELD_GET(WR_PAR_LAST_CMD_SID_MASK, curr_addr),
8321 				FIELD_GET(WR_PAR_LAST_CMD_BG_MASK, curr_addr),
8322 				FIELD_GET(WR_PAR_LAST_CMD_BA_MASK, curr_addr),
8323 				FIELD_GET(WR_PAR_LAST_CMD_COL_MASK, curr_addr));
8324 	}
8325 }
8326 
gaudi2_hbm_sei_print_ca_par_info(struct hl_device * hdev,struct hl_eq_hbm_sei_ca_par_intr_info * ca_par_err_data,u32 err_cnt)8327 static void gaudi2_hbm_sei_print_ca_par_info(struct hl_device *hdev,
8328 		struct hl_eq_hbm_sei_ca_par_intr_info *ca_par_err_data, u32 err_cnt)
8329 {
8330 	__le32 *col_cmd = ca_par_err_data->dbg_col;
8331 	__le16 *row_cmd = ca_par_err_data->dbg_row;
8332 	u32 i;
8333 
8334 	dev_err_ratelimited(hdev->dev, "CA ERROR count: %d\n", err_cnt);
8335 
8336 	dev_err_ratelimited(hdev->dev, "Last latched C&R bus commands:\n");
8337 	for (i = 0 ; i < HBM_CA_ERR_CMD_LIFO_LEN ; i++)
8338 		dev_err_ratelimited(hdev->dev, "cmd%u: ROW(0x%04x) COL(0x%05x)\n", i,
8339 			le16_to_cpu(row_cmd[i]) & (u16)GENMASK(13, 0),
8340 			le32_to_cpu(col_cmd[i]) & (u32)GENMASK(17, 0));
8341 }
8342 
8343 /* Returns true if hard reset is needed or false otherwise */
gaudi2_handle_hbm_mc_sei_err(struct hl_device * hdev,u16 event_type,struct hl_eq_hbm_sei_data * sei_data)8344 static bool gaudi2_handle_hbm_mc_sei_err(struct hl_device *hdev, u16 event_type,
8345 					struct hl_eq_hbm_sei_data *sei_data)
8346 {
8347 	bool require_hard_reset = false;
8348 	u32 hbm_id, mc_id, cause_idx;
8349 
8350 	hbm_id = (event_type - GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE) / 4;
8351 	mc_id = ((event_type - GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE) / 2) % 2;
8352 
8353 	cause_idx = sei_data->hdr.sei_cause;
8354 	if (cause_idx > GAUDI2_NUM_OF_HBM_SEI_CAUSE - 1) {
8355 		dev_err_ratelimited(hdev->dev, "Invalid HBM SEI event cause (%d) provided by FW\n",
8356 					cause_idx);
8357 		return true;
8358 	}
8359 
8360 	if (sei_data->hdr.is_critical)
8361 		dev_err(hdev->dev,
8362 			"System Critical Error Interrupt - HBM(%u) MC(%u) MC_CH(%u) MC_PC(%u). Error cause: %s\n",
8363 			hbm_id, mc_id, sei_data->hdr.mc_channel, sei_data->hdr.mc_pseudo_channel,
8364 			hbm_mc_sei_cause[cause_idx]);
8365 
8366 	else
8367 		dev_err_ratelimited(hdev->dev,
8368 			"System Non-Critical Error Interrupt - HBM(%u) MC(%u) MC_CH(%u) MC_PC(%u). Error cause: %s\n",
8369 			hbm_id, mc_id, sei_data->hdr.mc_channel, sei_data->hdr.mc_pseudo_channel,
8370 			hbm_mc_sei_cause[cause_idx]);
8371 
8372 	/* Print error-specific info */
8373 	switch (cause_idx) {
8374 	case HBM_SEI_CATTRIP:
8375 		require_hard_reset = true;
8376 		break;
8377 
8378 	case  HBM_SEI_CMD_PARITY_EVEN:
8379 		gaudi2_hbm_sei_print_ca_par_info(hdev, &sei_data->ca_parity_even_info,
8380 						le32_to_cpu(sei_data->hdr.cnt));
8381 		require_hard_reset = true;
8382 		break;
8383 
8384 	case  HBM_SEI_CMD_PARITY_ODD:
8385 		gaudi2_hbm_sei_print_ca_par_info(hdev, &sei_data->ca_parity_odd_info,
8386 						le32_to_cpu(sei_data->hdr.cnt));
8387 		require_hard_reset = true;
8388 		break;
8389 
8390 	case HBM_SEI_WRITE_DATA_PARITY_ERR:
8391 		gaudi2_hbm_sei_print_wr_par_info(hdev, &sei_data->wr_parity_info,
8392 						le32_to_cpu(sei_data->hdr.cnt));
8393 		require_hard_reset = true;
8394 		break;
8395 
8396 	case HBM_SEI_READ_ERR:
8397 		/* Unlike other SEI events, read error requires further processing of the
8398 		 * raw data in order to determine the root cause.
8399 		 */
8400 		require_hard_reset = gaudi2_hbm_sei_handle_read_err(hdev,
8401 								&sei_data->read_err_info,
8402 								le32_to_cpu(sei_data->hdr.cnt));
8403 		break;
8404 
8405 	default:
8406 		break;
8407 	}
8408 
8409 	require_hard_reset |= !!sei_data->hdr.is_critical;
8410 
8411 	return require_hard_reset;
8412 }
8413 
gaudi2_handle_hbm_cattrip(struct hl_device * hdev,u64 intr_cause_data)8414 static void gaudi2_handle_hbm_cattrip(struct hl_device *hdev, u64 intr_cause_data)
8415 {
8416 	dev_err(hdev->dev,
8417 		"HBM catastrophic temperature error (CATTRIP) cause %#llx\n",
8418 		intr_cause_data);
8419 }
8420 
gaudi2_handle_hbm_mc_spi(struct hl_device * hdev,u64 intr_cause_data)8421 static void gaudi2_handle_hbm_mc_spi(struct hl_device *hdev, u64 intr_cause_data)
8422 {
8423 	u32 i;
8424 
8425 	for (i = 0 ; i < GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE ; i++)
8426 		if (intr_cause_data & hbm_mc_spi[i].mask)
8427 			dev_dbg(hdev->dev, "HBM spi event: notification cause(%s)\n",
8428 				hbm_mc_spi[i].cause);
8429 }
8430 
gaudi2_print_clk_change_info(struct hl_device * hdev,u16 event_type)8431 static void gaudi2_print_clk_change_info(struct hl_device *hdev, u16 event_type)
8432 {
8433 	ktime_t zero_time = ktime_set(0, 0);
8434 
8435 	mutex_lock(&hdev->clk_throttling.lock);
8436 
8437 	switch (event_type) {
8438 	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S:
8439 		hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
8440 		hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
8441 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
8442 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
8443 		dev_info_ratelimited(hdev->dev, "Clock throttling due to power consumption\n");
8444 		break;
8445 
8446 	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_E:
8447 		hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
8448 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
8449 		dev_info_ratelimited(hdev->dev, "Power envelop is safe, back to optimal clock\n");
8450 		break;
8451 
8452 	case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S:
8453 		hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
8454 		hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
8455 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
8456 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
8457 		dev_info_ratelimited(hdev->dev, "Clock throttling due to overheating\n");
8458 		break;
8459 
8460 	case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
8461 		hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
8462 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
8463 		dev_info_ratelimited(hdev->dev, "Thermal envelop is safe, back to optimal clock\n");
8464 		break;
8465 
8466 	default:
8467 		dev_err(hdev->dev, "Received invalid clock change event %d\n", event_type);
8468 		break;
8469 	}
8470 
8471 	mutex_unlock(&hdev->clk_throttling.lock);
8472 }
8473 
gaudi2_print_out_of_sync_info(struct hl_device * hdev,struct cpucp_pkt_sync_err * sync_err)8474 static void gaudi2_print_out_of_sync_info(struct hl_device *hdev,
8475 					struct cpucp_pkt_sync_err *sync_err)
8476 {
8477 	struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
8478 
8479 	dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n",
8480 			sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci));
8481 }
8482 
gaudi2_handle_pcie_p2p_msix(struct hl_device * hdev)8483 static void gaudi2_handle_pcie_p2p_msix(struct hl_device *hdev)
8484 {
8485 	u32 p2p_intr, msix_gw_intr;
8486 
8487 	p2p_intr = RREG32(mmPCIE_WRAP_P2P_INTR);
8488 	msix_gw_intr = RREG32(mmPCIE_WRAP_MSIX_GW_INTR);
8489 
8490 	if (p2p_intr) {
8491 		dev_err_ratelimited(hdev->dev,
8492 			"pcie p2p transaction terminated due to security, req_id(0x%x)\n",
8493 			RREG32(mmPCIE_WRAP_P2P_REQ_ID));
8494 
8495 		WREG32(mmPCIE_WRAP_P2P_INTR, 0x1);
8496 	}
8497 
8498 	if (msix_gw_intr) {
8499 		dev_err_ratelimited(hdev->dev,
8500 			"pcie msi-x gen denied due to vector num check failure, vec(0x%X)\n",
8501 			RREG32(mmPCIE_WRAP_MSIX_GW_VEC));
8502 
8503 		WREG32(mmPCIE_WRAP_MSIX_GW_INTR, 0x1);
8504 	}
8505 }
8506 
gaudi2_handle_pcie_drain(struct hl_device * hdev,struct hl_eq_pcie_drain_ind_data * drain_data)8507 static void gaudi2_handle_pcie_drain(struct hl_device *hdev,
8508 			struct hl_eq_pcie_drain_ind_data *drain_data)
8509 {
8510 	u64 lbw_rd, lbw_wr, hbw_rd, hbw_wr, cause;
8511 
8512 	cause = le64_to_cpu(drain_data->intr_cause.intr_cause_data);
8513 	lbw_rd = le64_to_cpu(drain_data->drain_rd_addr_lbw);
8514 	lbw_wr = le64_to_cpu(drain_data->drain_wr_addr_lbw);
8515 	hbw_rd = le64_to_cpu(drain_data->drain_rd_addr_hbw);
8516 	hbw_wr = le64_to_cpu(drain_data->drain_wr_addr_hbw);
8517 
8518 	if (cause & BIT_ULL(0))
8519 		dev_err_ratelimited(hdev->dev,
8520 			"PCIE AXI drain LBW completed, read_err %u, write_err %u\n",
8521 			!!lbw_rd, !!lbw_wr);
8522 
8523 	if (cause & BIT_ULL(1))
8524 		dev_err_ratelimited(hdev->dev,
8525 			"PCIE AXI drain HBW completed, raddr %#llx, waddr %#llx\n",
8526 			hbw_rd, hbw_wr);
8527 }
8528 
gaudi2_handle_psoc_drain(struct hl_device * hdev,u64 intr_cause_data)8529 static void gaudi2_handle_psoc_drain(struct hl_device *hdev, u64 intr_cause_data)
8530 {
8531 	int i;
8532 
8533 	for (i = 0 ; i < GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE ; i++) {
8534 		if (intr_cause_data & BIT_ULL(i))
8535 			dev_err_ratelimited(hdev->dev, "PSOC %s completed\n",
8536 				gaudi2_psoc_axi_drain_interrupts_cause[i]);
8537 	}
8538 }
8539 
gaudi2_print_cpu_pkt_failure_info(struct hl_device * hdev,struct cpucp_pkt_sync_err * sync_err)8540 static void gaudi2_print_cpu_pkt_failure_info(struct hl_device *hdev,
8541 					struct cpucp_pkt_sync_err *sync_err)
8542 {
8543 	struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
8544 
8545 	dev_warn(hdev->dev,
8546 		"FW reported sanity check failure, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n",
8547 		sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci));
8548 }
8549 
hl_arc_event_handle(struct hl_device * hdev,struct hl_eq_engine_arc_intr_data * data)8550 static void hl_arc_event_handle(struct hl_device *hdev,
8551 					struct hl_eq_engine_arc_intr_data *data)
8552 {
8553 	struct hl_engine_arc_dccm_queue_full_irq *q;
8554 	u32 intr_type, engine_id;
8555 	u64 payload;
8556 
8557 	intr_type = le32_to_cpu(data->intr_type);
8558 	engine_id = le32_to_cpu(data->engine_id);
8559 	payload = le64_to_cpu(data->payload);
8560 
8561 	switch (intr_type) {
8562 	case ENGINE_ARC_DCCM_QUEUE_FULL_IRQ:
8563 		q = (struct hl_engine_arc_dccm_queue_full_irq *) &payload;
8564 
8565 		dev_err_ratelimited(hdev->dev,
8566 				"ARC DCCM Full event: EngId: %u, Intr_type: %u, Qidx: %u\n",
8567 				engine_id, intr_type, q->queue_index);
8568 		break;
8569 	default:
8570 		dev_err_ratelimited(hdev->dev, "Unknown ARC event type\n");
8571 	}
8572 }
8573 
gaudi2_handle_eqe(struct hl_device * hdev,struct hl_eq_entry * eq_entry)8574 static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
8575 {
8576 	u32 ctl, reset_flags = HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY;
8577 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
8578 	bool reset_required = false, skip_reset = false;
8579 	int index, sbte_index;
8580 	u64 event_mask = 0;
8581 	u16 event_type;
8582 
8583 	ctl = le32_to_cpu(eq_entry->hdr.ctl);
8584 	event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK) >> EQ_CTL_EVENT_TYPE_SHIFT);
8585 
8586 	if (event_type >= GAUDI2_EVENT_SIZE) {
8587 		dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
8588 				event_type, GAUDI2_EVENT_SIZE - 1);
8589 		return;
8590 	}
8591 
8592 	gaudi2->events_stat[event_type]++;
8593 	gaudi2->events_stat_aggregate[event_type]++;
8594 
8595 	gaudi2_print_irq_info(hdev, event_type);
8596 
8597 	switch (event_type) {
8598 	case GAUDI2_EVENT_PCIE_CORE_SERR ... GAUDI2_EVENT_ARC0_ECC_DERR:
8599 		fallthrough;
8600 	case GAUDI2_EVENT_ROTATOR0_SERR ... GAUDI2_EVENT_ROTATOR1_DERR:
8601 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8602 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8603 		reset_required = gaudi2_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
8604 		break;
8605 
8606 	case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_PDMA1_QM:
8607 		fallthrough;
8608 	case GAUDI2_EVENT_ROTATOR0_ROT0_QM ... GAUDI2_EVENT_ROTATOR1_ROT1_QM:
8609 		fallthrough;
8610 	case GAUDI2_EVENT_NIC0_QM0 ... GAUDI2_EVENT_NIC11_QM1:
8611 		gaudi2_handle_qman_err(hdev, event_type);
8612 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8613 		break;
8614 
8615 	case GAUDI2_EVENT_ARC_AXI_ERROR_RESPONSE_0:
8616 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8617 		gaudi2_handle_arc_farm_sei_err(hdev);
8618 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8619 		break;
8620 
8621 	case GAUDI2_EVENT_CPU_AXI_ERR_RSP:
8622 		gaudi2_handle_cpu_sei_err(hdev);
8623 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8624 		break;
8625 
8626 	case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP:
8627 	case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP:
8628 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8629 		gaudi2_handle_qm_sei_err(hdev, event_type, &eq_entry->razwi_info);
8630 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8631 		break;
8632 
8633 	case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE:
8634 	case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE:
8635 		index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE;
8636 		gaudi2_handle_rot_err(hdev, index, &eq_entry->razwi_with_intr_cause);
8637 		gaudi2_handle_qm_sei_err(hdev, event_type, NULL);
8638 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8639 		break;
8640 
8641 	case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC24_AXI_ERR_RSP:
8642 		index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP;
8643 		gaudi2_tpc_ack_interrupts(hdev, index, "AXI_ERR_RSP",
8644 						&eq_entry->razwi_with_intr_cause);
8645 		gaudi2_handle_qm_sei_err(hdev, event_type, NULL);
8646 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8647 		break;
8648 
8649 	case GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE ... GAUDI2_EVENT_DEC9_AXI_ERR_RSPONSE:
8650 		index = event_type - GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE;
8651 		gaudi2_handle_dec_err(hdev, index, "AXI_ERR_RESPONSE", &eq_entry->razwi_info);
8652 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8653 		break;
8654 
8655 	case GAUDI2_EVENT_TPC0_KERNEL_ERR:
8656 	case GAUDI2_EVENT_TPC1_KERNEL_ERR:
8657 	case GAUDI2_EVENT_TPC2_KERNEL_ERR:
8658 	case GAUDI2_EVENT_TPC3_KERNEL_ERR:
8659 	case GAUDI2_EVENT_TPC4_KERNEL_ERR:
8660 	case GAUDI2_EVENT_TPC5_KERNEL_ERR:
8661 	case GAUDI2_EVENT_TPC6_KERNEL_ERR:
8662 	case GAUDI2_EVENT_TPC7_KERNEL_ERR:
8663 	case GAUDI2_EVENT_TPC8_KERNEL_ERR:
8664 	case GAUDI2_EVENT_TPC9_KERNEL_ERR:
8665 	case GAUDI2_EVENT_TPC10_KERNEL_ERR:
8666 	case GAUDI2_EVENT_TPC11_KERNEL_ERR:
8667 	case GAUDI2_EVENT_TPC12_KERNEL_ERR:
8668 	case GAUDI2_EVENT_TPC13_KERNEL_ERR:
8669 	case GAUDI2_EVENT_TPC14_KERNEL_ERR:
8670 	case GAUDI2_EVENT_TPC15_KERNEL_ERR:
8671 	case GAUDI2_EVENT_TPC16_KERNEL_ERR:
8672 	case GAUDI2_EVENT_TPC17_KERNEL_ERR:
8673 	case GAUDI2_EVENT_TPC18_KERNEL_ERR:
8674 	case GAUDI2_EVENT_TPC19_KERNEL_ERR:
8675 	case GAUDI2_EVENT_TPC20_KERNEL_ERR:
8676 	case GAUDI2_EVENT_TPC21_KERNEL_ERR:
8677 	case GAUDI2_EVENT_TPC22_KERNEL_ERR:
8678 	case GAUDI2_EVENT_TPC23_KERNEL_ERR:
8679 	case GAUDI2_EVENT_TPC24_KERNEL_ERR:
8680 		index = (event_type - GAUDI2_EVENT_TPC0_KERNEL_ERR) /
8681 			(GAUDI2_EVENT_TPC1_KERNEL_ERR - GAUDI2_EVENT_TPC0_KERNEL_ERR);
8682 		gaudi2_tpc_ack_interrupts(hdev, index, "KRN_ERR", &eq_entry->razwi_with_intr_cause);
8683 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8684 		break;
8685 
8686 	case GAUDI2_EVENT_DEC0_SPI:
8687 	case GAUDI2_EVENT_DEC1_SPI:
8688 	case GAUDI2_EVENT_DEC2_SPI:
8689 	case GAUDI2_EVENT_DEC3_SPI:
8690 	case GAUDI2_EVENT_DEC4_SPI:
8691 	case GAUDI2_EVENT_DEC5_SPI:
8692 	case GAUDI2_EVENT_DEC6_SPI:
8693 	case GAUDI2_EVENT_DEC7_SPI:
8694 	case GAUDI2_EVENT_DEC8_SPI:
8695 	case GAUDI2_EVENT_DEC9_SPI:
8696 		index = (event_type - GAUDI2_EVENT_DEC0_SPI) /
8697 				(GAUDI2_EVENT_DEC1_SPI - GAUDI2_EVENT_DEC0_SPI);
8698 		gaudi2_handle_dec_err(hdev, index, "SPI", &eq_entry->razwi_info);
8699 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8700 		break;
8701 
8702 	case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE:
8703 	case GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE:
8704 	case GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE:
8705 	case GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE:
8706 		index = (event_type - GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE) /
8707 				(GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE -
8708 						GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE);
8709 		gaudi2_handle_mme_err(hdev, index,
8710 				"CTRL_AXI_ERROR_RESPONSE", &eq_entry->razwi_info);
8711 		gaudi2_handle_qm_sei_err(hdev, event_type, NULL);
8712 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8713 		break;
8714 
8715 	case GAUDI2_EVENT_MME0_QMAN_SW_ERROR:
8716 	case GAUDI2_EVENT_MME1_QMAN_SW_ERROR:
8717 	case GAUDI2_EVENT_MME2_QMAN_SW_ERROR:
8718 	case GAUDI2_EVENT_MME3_QMAN_SW_ERROR:
8719 		index = (event_type - GAUDI2_EVENT_MME0_QMAN_SW_ERROR) /
8720 				(GAUDI2_EVENT_MME1_QMAN_SW_ERROR -
8721 					GAUDI2_EVENT_MME0_QMAN_SW_ERROR);
8722 		gaudi2_handle_mme_err(hdev, index, "QMAN_SW_ERROR", &eq_entry->razwi_info);
8723 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8724 		break;
8725 
8726 	case GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID:
8727 	case GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID:
8728 	case GAUDI2_EVENT_MME2_WAP_SOURCE_RESULT_INVALID:
8729 	case GAUDI2_EVENT_MME3_WAP_SOURCE_RESULT_INVALID:
8730 		index = (event_type - GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID) /
8731 				(GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID -
8732 					GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID);
8733 		gaudi2_handle_mme_wap_err(hdev, index, &eq_entry->razwi_info);
8734 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8735 		break;
8736 
8737 	case GAUDI2_EVENT_KDMA_CH0_AXI_ERR_RSP:
8738 	case GAUDI2_EVENT_KDMA0_CORE:
8739 		gaudi2_handle_kdma_core_event(hdev,
8740 					le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
8741 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8742 		break;
8743 
8744 	case GAUDI2_EVENT_HDMA2_CORE ... GAUDI2_EVENT_PDMA1_CORE:
8745 		gaudi2_handle_dma_core_event(hdev,
8746 					le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
8747 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8748 		break;
8749 
8750 	case GAUDI2_EVENT_PCIE_ADDR_DEC_ERR:
8751 		gaudi2_print_pcie_addr_dec_info(hdev,
8752 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
8753 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8754 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8755 		break;
8756 
8757 	case GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR:
8758 	case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP:
8759 	case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR:
8760 	case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0:
8761 		gaudi2_handle_mmu_spi_sei_err(hdev, event_type);
8762 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8763 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8764 		break;
8765 
8766 	case GAUDI2_EVENT_HIF0_FATAL ... GAUDI2_EVENT_HIF12_FATAL:
8767 		gaudi2_handle_hif_fatal(hdev, event_type,
8768 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
8769 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8770 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8771 		break;
8772 
8773 	case GAUDI2_EVENT_PMMU_FATAL_0:
8774 		gaudi2_handle_pif_fatal(hdev,
8775 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
8776 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8777 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8778 		break;
8779 
8780 	case GAUDI2_EVENT_PSOC63_RAZWI_OR_PID_MIN_MAX_INTERRUPT:
8781 		gaudi2_ack_psoc_razwi_event_handler(hdev);
8782 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8783 		break;
8784 
8785 	case GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE ... GAUDI2_EVENT_HBM5_MC1_SEI_NON_SEVERE:
8786 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8787 		if (gaudi2_handle_hbm_mc_sei_err(hdev, event_type, &eq_entry->sei_data)) {
8788 			reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8789 			reset_required = true;
8790 		}
8791 		break;
8792 
8793 	case GAUDI2_EVENT_HBM_CATTRIP_0 ... GAUDI2_EVENT_HBM_CATTRIP_5:
8794 		gaudi2_handle_hbm_cattrip(hdev, le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
8795 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8796 		break;
8797 
8798 	case GAUDI2_EVENT_HBM0_MC0_SPI ... GAUDI2_EVENT_HBM5_MC1_SPI:
8799 		gaudi2_handle_hbm_mc_spi(hdev, le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
8800 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8801 		break;
8802 
8803 	case GAUDI2_EVENT_PCIE_DRAIN_COMPLETE:
8804 		gaudi2_handle_pcie_drain(hdev, &eq_entry->pcie_drain_ind_data);
8805 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8806 		break;
8807 
8808 	case GAUDI2_EVENT_PSOC59_RPM_ERROR_OR_DRAIN:
8809 		gaudi2_handle_psoc_drain(hdev, le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
8810 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8811 		break;
8812 
8813 	case GAUDI2_EVENT_CPU_AXI_ECC:
8814 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8815 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8816 		break;
8817 	case GAUDI2_EVENT_CPU_L2_RAM_ECC:
8818 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8819 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8820 		break;
8821 	case GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME0_SBTE4_AXI_ERR_RSP:
8822 	case GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME1_SBTE4_AXI_ERR_RSP:
8823 	case GAUDI2_EVENT_MME2_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME2_SBTE4_AXI_ERR_RSP:
8824 	case GAUDI2_EVENT_MME3_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME3_SBTE4_AXI_ERR_RSP:
8825 		index = (event_type - GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP) /
8826 				(GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP -
8827 					GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP);
8828 		sbte_index = (event_type - GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP) %
8829 				(GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP -
8830 					GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP);
8831 		gaudi2_handle_mme_sbte_err(hdev, index, sbte_index,
8832 						le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
8833 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8834 		break;
8835 	case GAUDI2_EVENT_VM0_ALARM_A ... GAUDI2_EVENT_VM3_ALARM_B:
8836 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8837 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8838 		break;
8839 	case GAUDI2_EVENT_PSOC_AXI_ERR_RSP:
8840 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8841 		break;
8842 	case GAUDI2_EVENT_PSOC_PRSTN_FALL:
8843 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8844 		break;
8845 	case GAUDI2_EVENT_PCIE_APB_TIMEOUT:
8846 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8847 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8848 		break;
8849 	case GAUDI2_EVENT_PCIE_FATAL_ERR:
8850 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8851 		break;
8852 	case GAUDI2_EVENT_TPC0_BMON_SPMU:
8853 	case GAUDI2_EVENT_TPC1_BMON_SPMU:
8854 	case GAUDI2_EVENT_TPC2_BMON_SPMU:
8855 	case GAUDI2_EVENT_TPC3_BMON_SPMU:
8856 	case GAUDI2_EVENT_TPC4_BMON_SPMU:
8857 	case GAUDI2_EVENT_TPC5_BMON_SPMU:
8858 	case GAUDI2_EVENT_TPC6_BMON_SPMU:
8859 	case GAUDI2_EVENT_TPC7_BMON_SPMU:
8860 	case GAUDI2_EVENT_TPC8_BMON_SPMU:
8861 	case GAUDI2_EVENT_TPC9_BMON_SPMU:
8862 	case GAUDI2_EVENT_TPC10_BMON_SPMU:
8863 	case GAUDI2_EVENT_TPC11_BMON_SPMU:
8864 	case GAUDI2_EVENT_TPC12_BMON_SPMU:
8865 	case GAUDI2_EVENT_TPC13_BMON_SPMU:
8866 	case GAUDI2_EVENT_TPC14_BMON_SPMU:
8867 	case GAUDI2_EVENT_TPC15_BMON_SPMU:
8868 	case GAUDI2_EVENT_TPC16_BMON_SPMU:
8869 	case GAUDI2_EVENT_TPC17_BMON_SPMU:
8870 	case GAUDI2_EVENT_TPC18_BMON_SPMU:
8871 	case GAUDI2_EVENT_TPC19_BMON_SPMU:
8872 	case GAUDI2_EVENT_TPC20_BMON_SPMU:
8873 	case GAUDI2_EVENT_TPC21_BMON_SPMU:
8874 	case GAUDI2_EVENT_TPC22_BMON_SPMU:
8875 	case GAUDI2_EVENT_TPC23_BMON_SPMU:
8876 	case GAUDI2_EVENT_TPC24_BMON_SPMU:
8877 	case GAUDI2_EVENT_MME0_CTRL_BMON_SPMU:
8878 	case GAUDI2_EVENT_MME0_SBTE_BMON_SPMU:
8879 	case GAUDI2_EVENT_MME0_WAP_BMON_SPMU:
8880 	case GAUDI2_EVENT_MME1_CTRL_BMON_SPMU:
8881 	case GAUDI2_EVENT_MME1_SBTE_BMON_SPMU:
8882 	case GAUDI2_EVENT_MME1_WAP_BMON_SPMU:
8883 	case GAUDI2_EVENT_MME2_CTRL_BMON_SPMU:
8884 	case GAUDI2_EVENT_MME2_SBTE_BMON_SPMU:
8885 	case GAUDI2_EVENT_MME2_WAP_BMON_SPMU:
8886 	case GAUDI2_EVENT_MME3_CTRL_BMON_SPMU:
8887 	case GAUDI2_EVENT_MME3_SBTE_BMON_SPMU:
8888 	case GAUDI2_EVENT_MME3_WAP_BMON_SPMU:
8889 	case GAUDI2_EVENT_HDMA2_BM_SPMU ... GAUDI2_EVENT_PDMA1_BM_SPMU:
8890 		fallthrough;
8891 	case GAUDI2_EVENT_DEC0_BMON_SPMU:
8892 	case GAUDI2_EVENT_DEC1_BMON_SPMU:
8893 	case GAUDI2_EVENT_DEC2_BMON_SPMU:
8894 	case GAUDI2_EVENT_DEC3_BMON_SPMU:
8895 	case GAUDI2_EVENT_DEC4_BMON_SPMU:
8896 	case GAUDI2_EVENT_DEC5_BMON_SPMU:
8897 	case GAUDI2_EVENT_DEC6_BMON_SPMU:
8898 	case GAUDI2_EVENT_DEC7_BMON_SPMU:
8899 	case GAUDI2_EVENT_DEC8_BMON_SPMU:
8900 	case GAUDI2_EVENT_DEC9_BMON_SPMU:
8901 	case GAUDI2_EVENT_ROTATOR0_BMON_SPMU ... GAUDI2_EVENT_SM3_BMON_SPMU:
8902 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8903 		break;
8904 
8905 	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S:
8906 	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_E:
8907 	case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S:
8908 	case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
8909 		gaudi2_print_clk_change_info(hdev, event_type);
8910 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8911 		break;
8912 
8913 	case GAUDI2_EVENT_CPU_PKT_QUEUE_OUT_SYNC:
8914 		gaudi2_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
8915 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8916 		break;
8917 
8918 	case GAUDI2_EVENT_PCIE_FLR_REQUESTED:
8919 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8920 		/* Do nothing- FW will handle it */
8921 		break;
8922 
8923 	case GAUDI2_EVENT_PCIE_P2P_MSIX:
8924 		gaudi2_handle_pcie_p2p_msix(hdev);
8925 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8926 		break;
8927 
8928 	case GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE ... GAUDI2_EVENT_SM3_AXI_ERROR_RESPONSE:
8929 		index = event_type - GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE;
8930 		skip_reset = !gaudi2_handle_sm_err(hdev, index);
8931 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8932 		break;
8933 
8934 	case GAUDI2_EVENT_PSOC_MME_PLL_LOCK_ERR ... GAUDI2_EVENT_DCORE2_HBM_PLL_LOCK_ERR:
8935 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8936 		break;
8937 
8938 	case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE:
8939 		dev_info(hdev->dev, "CPLD shutdown cause, reset reason: 0x%llx\n",
8940 						le64_to_cpu(eq_entry->data[0]));
8941 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8942 		break;
8943 	case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_EVENT:
8944 		dev_err(hdev->dev, "CPLD shutdown event, reset reason: 0x%llx\n",
8945 						le64_to_cpu(eq_entry->data[0]));
8946 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8947 		break;
8948 
8949 	case GAUDI2_EVENT_CPU_PKT_SANITY_FAILED:
8950 		gaudi2_print_cpu_pkt_failure_info(hdev, &eq_entry->pkt_sync_err);
8951 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8952 		break;
8953 
8954 	case GAUDI2_EVENT_ARC_DCCM_FULL:
8955 		hl_arc_event_handle(hdev, &eq_entry->arc_data);
8956 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8957 		break;
8958 
8959 	default:
8960 		if (gaudi2_irq_map_table[event_type].valid)
8961 			dev_err_ratelimited(hdev->dev, "Cannot find handler for event %d\n",
8962 						event_type);
8963 	}
8964 
8965 	if ((gaudi2_irq_map_table[event_type].reset || reset_required) && !skip_reset)
8966 		goto reset_device;
8967 
8968 	/* Send unmask irq only for interrupts not classified as MSG */
8969 	if (!gaudi2_irq_map_table[event_type].msg)
8970 		hl_fw_unmask_irq(hdev, event_type);
8971 
8972 	if (event_mask)
8973 		hl_notifier_event_send_all(hdev, event_mask);
8974 
8975 	return;
8976 
8977 reset_device:
8978 	if (hdev->hard_reset_on_fw_events) {
8979 		hl_device_reset(hdev, reset_flags);
8980 		event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
8981 	} else {
8982 		if (!gaudi2_irq_map_table[event_type].msg)
8983 			hl_fw_unmask_irq(hdev, event_type);
8984 	}
8985 
8986 	if (event_mask)
8987 		hl_notifier_event_send_all(hdev, event_mask);
8988 }
8989 
gaudi2_memset_device_memory(struct hl_device * hdev,u64 addr,u64 size,u64 val)8990 static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val)
8991 {
8992 	struct asic_fixed_properties *prop = &hdev->asic_prop;
8993 	u64 comp_addr, cur_addr = addr, end_addr = addr + size;
8994 	u32 chunk_size, busy, dcore, edma_idx, sob_offset, sob_addr, comp_val, edma_commit;
8995 	u32 old_mmubp, mmubp;
8996 	int rc = 0;
8997 
8998 	sob_offset = hdev->asic_prop.first_available_user_sob[0] * 4;
8999 	sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
9000 	comp_addr = CFG_BASE + sob_addr;
9001 	comp_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1) |
9002 		FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1);
9003 
9004 	edma_commit = FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_LIN_MASK, 1) |
9005 			FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_MEM_SET_MASK, 1) |
9006 			FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_WR_COMP_EN_MASK, 1);
9007 	mmubp = FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_WR_MASK, 1) |
9008 		FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_RD_MASK, 1);
9009 
9010 	if (prop->edma_enabled_mask == 0) {
9011 		dev_info(hdev->dev, "non of the EDMA engines is enabled - skip dram scrubbing\n");
9012 		return -EIO;
9013 	}
9014 
9015 	/*
9016 	 * set mmu bypass for the scrubbing - all ddmas are configured the same so save
9017 	 * only the first one to restore later
9018 	 */
9019 	old_mmubp = RREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP);
9020 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
9021 		for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
9022 			u32 edma_offset = dcore * DCORE_OFFSET + edma_idx * DCORE_EDMA_OFFSET;
9023 			u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
9024 
9025 			if (!(prop->edma_enabled_mask & BIT(edma_bit)))
9026 				continue;
9027 
9028 			WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP +
9029 					edma_offset, mmubp);
9030 		}
9031 	}
9032 
9033 	while (cur_addr < end_addr) {
9034 		int dma_num = 0;
9035 
9036 		WREG32(sob_addr, 0);
9037 		for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
9038 			for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
9039 				u32 edma_offset = dcore * DCORE_OFFSET +
9040 					edma_idx * DCORE_EDMA_OFFSET;
9041 				u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
9042 
9043 				if (!(prop->edma_enabled_mask & BIT(edma_bit)))
9044 					continue;
9045 
9046 				chunk_size = min_t(u64, SZ_2G, end_addr - cur_addr);
9047 
9048 				WREG32(mmDCORE0_EDMA0_CORE_CTX_SRC_BASE_LO + edma_offset,
9049 						lower_32_bits(val));
9050 				WREG32(mmDCORE0_EDMA0_CORE_CTX_SRC_BASE_HI + edma_offset,
9051 						upper_32_bits(val));
9052 
9053 				WREG32(mmDCORE0_EDMA0_CORE_CTX_DST_BASE_LO + edma_offset,
9054 						lower_32_bits(cur_addr));
9055 				WREG32(mmDCORE0_EDMA0_CORE_CTX_DST_BASE_HI + edma_offset,
9056 						upper_32_bits(cur_addr));
9057 
9058 				WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_LO + edma_offset,
9059 						lower_32_bits(comp_addr));
9060 				WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_HI + edma_offset,
9061 						upper_32_bits(comp_addr));
9062 				WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_WDATA + edma_offset,
9063 						comp_val);
9064 
9065 				WREG32(mmDCORE0_EDMA0_CORE_CTX_DST_TSIZE_0 + edma_offset,
9066 						chunk_size);
9067 				WREG32(mmDCORE0_EDMA0_CORE_CTX_COMMIT + edma_offset, edma_commit);
9068 
9069 				dma_num++;
9070 
9071 				cur_addr += chunk_size;
9072 
9073 				if (cur_addr == end_addr)
9074 					goto poll;
9075 			}
9076 		}
9077 poll:
9078 		rc = hl_poll_timeout(hdev, sob_addr, busy, (busy == dma_num), 1000, 1000000);
9079 		if (rc) {
9080 			dev_err(hdev->dev, "DMA Timeout during HBM scrubbing\n");
9081 			goto end;
9082 		}
9083 	}
9084 end:
9085 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
9086 		for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
9087 			u32 edma_offset = dcore * DCORE_OFFSET + edma_idx * DCORE_EDMA_OFFSET;
9088 			u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
9089 
9090 			if (!(prop->edma_enabled_mask & BIT(edma_bit)))
9091 				continue;
9092 
9093 			WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + edma_offset, old_mmubp);
9094 		}
9095 	}
9096 
9097 	WREG32(sob_addr, 0);
9098 	return rc;
9099 }
9100 
gaudi2_scrub_device_dram(struct hl_device * hdev,u64 val)9101 static int gaudi2_scrub_device_dram(struct hl_device *hdev, u64 val)
9102 {
9103 	int rc;
9104 	struct asic_fixed_properties *prop = &hdev->asic_prop;
9105 	u64 size = prop->dram_end_address - prop->dram_user_base_address;
9106 
9107 	rc = gaudi2_memset_device_memory(hdev, prop->dram_user_base_address, size, val);
9108 
9109 	if (rc)
9110 		dev_err(hdev->dev, "Failed to scrub dram, address: 0x%llx size: %llu\n",
9111 				prop->dram_user_base_address, size);
9112 	return rc;
9113 }
9114 
gaudi2_scrub_device_mem(struct hl_device * hdev)9115 static int gaudi2_scrub_device_mem(struct hl_device *hdev)
9116 {
9117 	int rc;
9118 	struct asic_fixed_properties *prop = &hdev->asic_prop;
9119 	u64 val = hdev->memory_scrub_val;
9120 	u64 addr, size;
9121 
9122 	if (!hdev->memory_scrub)
9123 		return 0;
9124 
9125 	/* scrub SRAM */
9126 	addr = prop->sram_user_base_address;
9127 	size = hdev->pldm ? 0x10000 : (prop->sram_size - SRAM_USER_BASE_OFFSET);
9128 	dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx, val: 0x%llx\n",
9129 			addr, addr + size, val);
9130 	rc = gaudi2_memset_device_memory(hdev, addr, size, val);
9131 	if (rc) {
9132 		dev_err(hdev->dev, "scrubbing SRAM failed (%d)\n", rc);
9133 		return rc;
9134 	}
9135 
9136 	/* scrub DRAM */
9137 	rc = gaudi2_scrub_device_dram(hdev, val);
9138 	if (rc) {
9139 		dev_err(hdev->dev, "scrubbing DRAM failed (%d)\n", rc);
9140 		return rc;
9141 	}
9142 	return 0;
9143 }
9144 
gaudi2_restore_user_sm_registers(struct hl_device * hdev)9145 static void gaudi2_restore_user_sm_registers(struct hl_device *hdev)
9146 {
9147 	u64 addr, mon_sts_addr, mon_cfg_addr, cq_lbw_l_addr, cq_lbw_h_addr,
9148 		cq_lbw_data_addr, cq_base_l_addr, cq_base_h_addr, cq_size_addr;
9149 	u32 val, size, offset;
9150 	int dcore_id;
9151 
9152 	offset = hdev->asic_prop.first_available_cq[0] * 4;
9153 	cq_lbw_l_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + offset;
9154 	cq_lbw_h_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 + offset;
9155 	cq_lbw_data_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0 + offset;
9156 	cq_base_l_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + offset;
9157 	cq_base_h_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + offset;
9158 	cq_size_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + offset;
9159 	size = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 -
9160 			(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + offset);
9161 
9162 	/* memset dcore0 CQ registers */
9163 	gaudi2_memset_device_lbw(hdev, cq_lbw_l_addr, size, 0);
9164 	gaudi2_memset_device_lbw(hdev, cq_lbw_h_addr, size, 0);
9165 	gaudi2_memset_device_lbw(hdev, cq_lbw_data_addr, size, 0);
9166 	gaudi2_memset_device_lbw(hdev, cq_base_l_addr, size, 0);
9167 	gaudi2_memset_device_lbw(hdev, cq_base_h_addr, size, 0);
9168 
9169 	cq_lbw_l_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + DCORE_OFFSET;
9170 	cq_lbw_h_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 + DCORE_OFFSET;
9171 	cq_lbw_data_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0 + DCORE_OFFSET;
9172 	cq_base_l_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + DCORE_OFFSET;
9173 	cq_base_h_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + DCORE_OFFSET;
9174 	cq_size_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + DCORE_OFFSET;
9175 	size = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 - mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0;
9176 
9177 	for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
9178 		gaudi2_memset_device_lbw(hdev, cq_lbw_l_addr, size, 0);
9179 		gaudi2_memset_device_lbw(hdev, cq_lbw_h_addr, size, 0);
9180 		gaudi2_memset_device_lbw(hdev, cq_lbw_data_addr, size, 0);
9181 		gaudi2_memset_device_lbw(hdev, cq_base_l_addr, size, 0);
9182 		gaudi2_memset_device_lbw(hdev, cq_base_h_addr, size, 0);
9183 		gaudi2_memset_device_lbw(hdev, cq_size_addr, size, 0);
9184 
9185 		cq_lbw_l_addr += DCORE_OFFSET;
9186 		cq_lbw_h_addr += DCORE_OFFSET;
9187 		cq_lbw_data_addr += DCORE_OFFSET;
9188 		cq_base_l_addr += DCORE_OFFSET;
9189 		cq_base_h_addr += DCORE_OFFSET;
9190 		cq_size_addr += DCORE_OFFSET;
9191 	}
9192 
9193 	offset = hdev->asic_prop.first_available_user_mon[0] * 4;
9194 	addr = mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + offset;
9195 	val = 1 << DCORE0_SYNC_MNGR_OBJS_MON_STATUS_PROT_SHIFT;
9196 	size = mmDCORE0_SYNC_MNGR_OBJS_SM_SEC_0 - (mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + offset);
9197 
9198 	/* memset dcore0 monitors */
9199 	gaudi2_memset_device_lbw(hdev, addr, size, val);
9200 
9201 	addr = mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + offset;
9202 	gaudi2_memset_device_lbw(hdev, addr, size, 0);
9203 
9204 	mon_sts_addr = mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + DCORE_OFFSET;
9205 	mon_cfg_addr = mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + DCORE_OFFSET;
9206 	size = mmDCORE0_SYNC_MNGR_OBJS_SM_SEC_0 - mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0;
9207 
9208 	for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
9209 		gaudi2_memset_device_lbw(hdev, mon_sts_addr, size, val);
9210 		gaudi2_memset_device_lbw(hdev, mon_cfg_addr, size, 0);
9211 		mon_sts_addr += DCORE_OFFSET;
9212 		mon_cfg_addr += DCORE_OFFSET;
9213 	}
9214 
9215 	offset = hdev->asic_prop.first_available_user_sob[0] * 4;
9216 	addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset;
9217 	val = 0;
9218 	size = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 -
9219 			(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset);
9220 
9221 	/* memset dcore0 sobs */
9222 	gaudi2_memset_device_lbw(hdev, addr, size, val);
9223 
9224 	addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + DCORE_OFFSET;
9225 	size = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 - mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0;
9226 
9227 	for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
9228 		gaudi2_memset_device_lbw(hdev, addr, size, val);
9229 		addr += DCORE_OFFSET;
9230 	}
9231 
9232 	/* Flush all WREG to prevent race */
9233 	val = RREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset);
9234 }
9235 
gaudi2_restore_user_qm_registers(struct hl_device * hdev)9236 static void gaudi2_restore_user_qm_registers(struct hl_device *hdev)
9237 {
9238 	u32 reg_base, hw_queue_id;
9239 
9240 	for (hw_queue_id = GAUDI2_QUEUE_ID_PDMA_0_0 ; hw_queue_id <= GAUDI2_QUEUE_ID_ROT_1_0;
9241 							hw_queue_id += NUM_OF_PQ_PER_QMAN) {
9242 		if (!gaudi2_is_queue_enabled(hdev, hw_queue_id))
9243 			continue;
9244 
9245 		gaudi2_clear_qm_fence_counters_common(hdev, hw_queue_id, false);
9246 
9247 		reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
9248 		WREG32(reg_base + QM_ARB_CFG_0_OFFSET, 0);
9249 	}
9250 
9251 	/* Flush all WREG to prevent race */
9252 	RREG32(mmPDMA0_QM_ARB_CFG_0);
9253 }
9254 
gaudi2_restore_nic_qm_registers(struct hl_device * hdev)9255 static void gaudi2_restore_nic_qm_registers(struct hl_device *hdev)
9256 {
9257 	u32 reg_base, hw_queue_id;
9258 
9259 	for (hw_queue_id = GAUDI2_QUEUE_ID_NIC_0_0 ; hw_queue_id <= GAUDI2_QUEUE_ID_NIC_23_3;
9260 							hw_queue_id += NUM_OF_PQ_PER_QMAN) {
9261 		if (!gaudi2_is_queue_enabled(hdev, hw_queue_id))
9262 			continue;
9263 
9264 		gaudi2_clear_qm_fence_counters_common(hdev, hw_queue_id, false);
9265 
9266 		reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
9267 		WREG32(reg_base + QM_ARB_CFG_0_OFFSET, 0);
9268 	}
9269 
9270 	/* Flush all WREG to prevent race */
9271 	RREG32(mmPDMA0_QM_ARB_CFG_0);
9272 }
9273 
gaudi2_context_switch(struct hl_device * hdev,u32 asid)9274 static int gaudi2_context_switch(struct hl_device *hdev, u32 asid)
9275 {
9276 	return 0;
9277 }
9278 
gaudi2_restore_phase_topology(struct hl_device * hdev)9279 static void gaudi2_restore_phase_topology(struct hl_device *hdev)
9280 {
9281 }
9282 
gaudi2_init_block_instances(struct hl_device * hdev,u32 block_idx,struct dup_block_ctx * cfg_ctx)9283 static void gaudi2_init_block_instances(struct hl_device *hdev, u32 block_idx,
9284 						struct dup_block_ctx *cfg_ctx)
9285 {
9286 	u64 block_base = cfg_ctx->base + block_idx * cfg_ctx->block_off;
9287 	u8 seq;
9288 	int i;
9289 
9290 	for (i = 0 ; i < cfg_ctx->instances ; i++) {
9291 		seq = block_idx * cfg_ctx->instances + i;
9292 
9293 		/* skip disabled instance */
9294 		if (!(cfg_ctx->enabled_mask & BIT_ULL(seq)))
9295 			continue;
9296 
9297 		cfg_ctx->instance_cfg_fn(hdev, block_base + i * cfg_ctx->instance_off,
9298 					cfg_ctx->data);
9299 	}
9300 }
9301 
gaudi2_init_blocks_with_mask(struct hl_device * hdev,struct dup_block_ctx * cfg_ctx,u64 mask)9302 static void gaudi2_init_blocks_with_mask(struct hl_device *hdev, struct dup_block_ctx *cfg_ctx,
9303 						u64 mask)
9304 {
9305 	int i;
9306 
9307 	cfg_ctx->enabled_mask = mask;
9308 
9309 	for (i = 0 ; i < cfg_ctx->blocks ; i++)
9310 		gaudi2_init_block_instances(hdev, i, cfg_ctx);
9311 }
9312 
gaudi2_init_blocks(struct hl_device * hdev,struct dup_block_ctx * cfg_ctx)9313 void gaudi2_init_blocks(struct hl_device *hdev, struct dup_block_ctx *cfg_ctx)
9314 {
9315 	gaudi2_init_blocks_with_mask(hdev, cfg_ctx, U64_MAX);
9316 }
9317 
gaudi2_debugfs_read_dma(struct hl_device * hdev,u64 addr,u32 size,void * blob_addr)9318 static int gaudi2_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, void *blob_addr)
9319 {
9320 	void *host_mem_virtual_addr;
9321 	dma_addr_t host_mem_dma_addr;
9322 	u64 reserved_va_base;
9323 	u32 pos, size_left, size_to_dma;
9324 	struct hl_ctx *ctx;
9325 	int rc = 0;
9326 
9327 	/* Fetch the ctx */
9328 	ctx = hl_get_compute_ctx(hdev);
9329 	if (!ctx) {
9330 		dev_err(hdev->dev, "No ctx available\n");
9331 		return -EINVAL;
9332 	}
9333 
9334 	/* Allocate buffers for read and for poll */
9335 	host_mem_virtual_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &host_mem_dma_addr,
9336 								GFP_KERNEL | __GFP_ZERO);
9337 	if (host_mem_virtual_addr == NULL) {
9338 		dev_err(hdev->dev, "Failed to allocate memory for KDMA read\n");
9339 		rc = -ENOMEM;
9340 		goto put_ctx;
9341 	}
9342 
9343 	/* Reserve VM region on asic side */
9344 	reserved_va_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST, SZ_2M,
9345 						HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
9346 	if (!reserved_va_base) {
9347 		dev_err(hdev->dev, "Failed to reserve vmem on asic\n");
9348 		rc = -ENOMEM;
9349 		goto free_data_buffer;
9350 	}
9351 
9352 	/* Create mapping on asic side */
9353 	mutex_lock(&hdev->mmu_lock);
9354 	rc = hl_mmu_map_contiguous(ctx, reserved_va_base, host_mem_dma_addr, SZ_2M);
9355 	hl_mmu_invalidate_cache_range(hdev, false,
9356 				      MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV,
9357 				      ctx->asid, reserved_va_base, SZ_2M);
9358 	mutex_unlock(&hdev->mmu_lock);
9359 	if (rc) {
9360 		dev_err(hdev->dev, "Failed to create mapping on asic mmu\n");
9361 		goto unreserve_va;
9362 	}
9363 
9364 	/* Enable MMU on KDMA */
9365 	gaudi2_kdma_set_mmbp_asid(hdev, false, ctx->asid);
9366 
9367 	pos = 0;
9368 	size_left = size;
9369 	size_to_dma = SZ_2M;
9370 
9371 	while (size_left > 0) {
9372 		if (size_left < SZ_2M)
9373 			size_to_dma = size_left;
9374 
9375 		rc = gaudi2_send_job_to_kdma(hdev, addr, reserved_va_base, size_to_dma, false);
9376 		if (rc)
9377 			break;
9378 
9379 		memcpy(blob_addr + pos, host_mem_virtual_addr, size_to_dma);
9380 
9381 		if (size_left <= SZ_2M)
9382 			break;
9383 
9384 		pos += SZ_2M;
9385 		addr += SZ_2M;
9386 		size_left -= SZ_2M;
9387 	}
9388 
9389 	gaudi2_kdma_set_mmbp_asid(hdev, true, HL_KERNEL_ASID_ID);
9390 
9391 	mutex_lock(&hdev->mmu_lock);
9392 	hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M);
9393 	hl_mmu_invalidate_cache_range(hdev, false, MMU_OP_USERPTR,
9394 				      ctx->asid, reserved_va_base, SZ_2M);
9395 	mutex_unlock(&hdev->mmu_lock);
9396 unreserve_va:
9397 	hl_unreserve_va_block(hdev, ctx, reserved_va_base, SZ_2M);
9398 free_data_buffer:
9399 	hl_asic_dma_free_coherent(hdev, SZ_2M, host_mem_virtual_addr, host_mem_dma_addr);
9400 put_ctx:
9401 	hl_ctx_put(ctx);
9402 
9403 	return rc;
9404 }
9405 
gaudi2_internal_cb_pool_init(struct hl_device * hdev,struct hl_ctx * ctx)9406 static int gaudi2_internal_cb_pool_init(struct hl_device *hdev, struct hl_ctx *ctx)
9407 {
9408 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
9409 	int min_alloc_order, rc;
9410 
9411 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU))
9412 		return 0;
9413 
9414 	hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev,
9415 								HOST_SPACE_INTERNAL_CB_SZ,
9416 								&hdev->internal_cb_pool_dma_addr,
9417 								GFP_KERNEL | __GFP_ZERO);
9418 
9419 	if (!hdev->internal_cb_pool_virt_addr)
9420 		return -ENOMEM;
9421 
9422 	min_alloc_order = ilog2(min(gaudi2_get_signal_cb_size(hdev),
9423 					gaudi2_get_wait_cb_size(hdev)));
9424 
9425 	hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
9426 	if (!hdev->internal_cb_pool) {
9427 		dev_err(hdev->dev, "Failed to create internal CB pool\n");
9428 		rc = -ENOMEM;
9429 		goto free_internal_cb_pool;
9430 	}
9431 
9432 	rc = gen_pool_add(hdev->internal_cb_pool, (uintptr_t) hdev->internal_cb_pool_virt_addr,
9433 				HOST_SPACE_INTERNAL_CB_SZ, -1);
9434 	if (rc) {
9435 		dev_err(hdev->dev, "Failed to add memory to internal CB pool\n");
9436 		rc = -EFAULT;
9437 		goto destroy_internal_cb_pool;
9438 	}
9439 
9440 	hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST,
9441 					HOST_SPACE_INTERNAL_CB_SZ, HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
9442 
9443 	if (!hdev->internal_cb_va_base) {
9444 		rc = -ENOMEM;
9445 		goto destroy_internal_cb_pool;
9446 	}
9447 
9448 	mutex_lock(&hdev->mmu_lock);
9449 	rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base, hdev->internal_cb_pool_dma_addr,
9450 					HOST_SPACE_INTERNAL_CB_SZ);
9451 	hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
9452 	mutex_unlock(&hdev->mmu_lock);
9453 
9454 	if (rc)
9455 		goto unreserve_internal_cb_pool;
9456 
9457 	return 0;
9458 
9459 unreserve_internal_cb_pool:
9460 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
9461 destroy_internal_cb_pool:
9462 	gen_pool_destroy(hdev->internal_cb_pool);
9463 free_internal_cb_pool:
9464 	hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
9465 					hdev->internal_cb_pool_dma_addr);
9466 
9467 	return rc;
9468 }
9469 
gaudi2_internal_cb_pool_fini(struct hl_device * hdev,struct hl_ctx * ctx)9470 static void gaudi2_internal_cb_pool_fini(struct hl_device *hdev, struct hl_ctx *ctx)
9471 {
9472 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
9473 
9474 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU))
9475 		return;
9476 
9477 	mutex_lock(&hdev->mmu_lock);
9478 	hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
9479 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
9480 	hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
9481 	mutex_unlock(&hdev->mmu_lock);
9482 
9483 	gen_pool_destroy(hdev->internal_cb_pool);
9484 
9485 	hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
9486 					hdev->internal_cb_pool_dma_addr);
9487 }
9488 
gaudi2_restore_user_registers(struct hl_device * hdev)9489 static void gaudi2_restore_user_registers(struct hl_device *hdev)
9490 {
9491 	gaudi2_restore_user_sm_registers(hdev);
9492 	gaudi2_restore_user_qm_registers(hdev);
9493 }
9494 
gaudi2_map_virtual_msix_doorbell_memory(struct hl_ctx * ctx)9495 static int gaudi2_map_virtual_msix_doorbell_memory(struct hl_ctx *ctx)
9496 {
9497 	struct hl_device *hdev = ctx->hdev;
9498 	struct asic_fixed_properties *prop = &hdev->asic_prop;
9499 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
9500 	int rc;
9501 
9502 	rc = hl_mmu_map_page(ctx, RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START,
9503 				gaudi2->virt_msix_db_dma_addr, prop->pmmu.page_size, true);
9504 	if (rc)
9505 		dev_err(hdev->dev, "Failed to map VA %#llx for virtual MSI-X doorbell memory\n",
9506 			RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START);
9507 
9508 	return rc;
9509 }
9510 
gaudi2_unmap_virtual_msix_doorbell_memory(struct hl_ctx * ctx)9511 static void gaudi2_unmap_virtual_msix_doorbell_memory(struct hl_ctx *ctx)
9512 {
9513 	struct hl_device *hdev = ctx->hdev;
9514 	struct asic_fixed_properties *prop = &hdev->asic_prop;
9515 	int rc;
9516 
9517 	rc = hl_mmu_unmap_page(ctx, RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START,
9518 				prop->pmmu.page_size, true);
9519 	if (rc)
9520 		dev_err(hdev->dev, "Failed to unmap VA %#llx of virtual MSI-X doorbell memory\n",
9521 			RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START);
9522 }
9523 
gaudi2_ctx_init(struct hl_ctx * ctx)9524 static int gaudi2_ctx_init(struct hl_ctx *ctx)
9525 {
9526 	int rc;
9527 
9528 	rc = gaudi2_mmu_prepare(ctx->hdev, ctx->asid);
9529 	if (rc)
9530 		return rc;
9531 
9532 	/* No need to clear user registers if the device has just
9533 	 * performed reset, we restore only nic qm registers
9534 	 */
9535 	if (ctx->hdev->reset_upon_device_release)
9536 		gaudi2_restore_nic_qm_registers(ctx->hdev);
9537 	else
9538 		gaudi2_restore_user_registers(ctx->hdev);
9539 
9540 	rc = gaudi2_internal_cb_pool_init(ctx->hdev, ctx);
9541 	if (rc)
9542 		return rc;
9543 
9544 	rc = gaudi2_map_virtual_msix_doorbell_memory(ctx);
9545 	if (rc)
9546 		gaudi2_internal_cb_pool_fini(ctx->hdev, ctx);
9547 
9548 	return rc;
9549 }
9550 
gaudi2_ctx_fini(struct hl_ctx * ctx)9551 static void gaudi2_ctx_fini(struct hl_ctx *ctx)
9552 {
9553 	if (ctx->asid == HL_KERNEL_ASID_ID)
9554 		return;
9555 
9556 	gaudi2_internal_cb_pool_fini(ctx->hdev, ctx);
9557 
9558 	gaudi2_unmap_virtual_msix_doorbell_memory(ctx);
9559 }
9560 
gaudi2_pre_schedule_cs(struct hl_cs * cs)9561 static int gaudi2_pre_schedule_cs(struct hl_cs *cs)
9562 {
9563 	struct hl_device *hdev = cs->ctx->hdev;
9564 	int index = cs->sequence & (hdev->asic_prop.max_pending_cs - 1);
9565 	u32 mon_payload, sob_id, mon_id;
9566 
9567 	if (!cs_needs_completion(cs))
9568 		return 0;
9569 
9570 	/*
9571 	 * First 64 SOB/MON are reserved for driver for QMAN auto completion
9572 	 * mechanism. Each SOB/MON pair are used for a pending CS with the same
9573 	 * cyclic index. The SOB value is increased when each of the CS jobs is
9574 	 * completed. When the SOB reaches the number of CS jobs, the monitor
9575 	 * generates MSI-X interrupt.
9576 	 */
9577 
9578 	sob_id = mon_id = index;
9579 	mon_payload = (1 << CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT) |
9580 				(1 << CQ_ENTRY_READY_SHIFT) | index;
9581 
9582 	gaudi2_arm_cq_monitor(hdev, sob_id, mon_id, GAUDI2_RESERVED_CQ_CS_COMPLETION, mon_payload,
9583 				cs->jobs_cnt);
9584 
9585 	return 0;
9586 }
9587 
gaudi2_get_queue_id_for_cq(struct hl_device * hdev,u32 cq_idx)9588 static u32 gaudi2_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
9589 {
9590 	return HL_INVALID_QUEUE;
9591 }
9592 
gaudi2_gen_signal_cb(struct hl_device * hdev,void * data,u16 sob_id,u32 size,bool eb)9593 static u32 gaudi2_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, u32 size, bool eb)
9594 {
9595 	struct hl_cb *cb = data;
9596 	struct packet_msg_short *pkt;
9597 	u32 value, ctl, pkt_size = sizeof(*pkt);
9598 
9599 	pkt = (struct packet_msg_short *) (uintptr_t) (cb->kernel_address + size);
9600 	memset(pkt, 0, pkt_size);
9601 
9602 	/* Inc by 1, Mode ADD */
9603 	value = FIELD_PREP(GAUDI2_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
9604 	value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
9605 
9606 	ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
9607 	ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 1); /* SOB base */
9608 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
9609 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, eb);
9610 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
9611 
9612 	pkt->value = cpu_to_le32(value);
9613 	pkt->ctl = cpu_to_le32(ctl);
9614 
9615 	return size + pkt_size;
9616 }
9617 
gaudi2_add_mon_msg_short(struct packet_msg_short * pkt,u32 value,u16 addr)9618 static u32 gaudi2_add_mon_msg_short(struct packet_msg_short *pkt, u32 value, u16 addr)
9619 {
9620 	u32 ctl, pkt_size = sizeof(*pkt);
9621 
9622 	memset(pkt, 0, pkt_size);
9623 
9624 	ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, addr);
9625 	ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 0);  /* MON base */
9626 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
9627 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
9628 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 0);
9629 
9630 	pkt->value = cpu_to_le32(value);
9631 	pkt->ctl = cpu_to_le32(ctl);
9632 
9633 	return pkt_size;
9634 }
9635 
gaudi2_add_arm_monitor_pkt(struct hl_device * hdev,struct packet_msg_short * pkt,u16 sob_base,u8 sob_mask,u16 sob_val,u16 addr)9636 static u32 gaudi2_add_arm_monitor_pkt(struct hl_device *hdev, struct packet_msg_short *pkt,
9637 					u16 sob_base, u8 sob_mask, u16 sob_val, u16 addr)
9638 {
9639 	u32 ctl, value, pkt_size = sizeof(*pkt);
9640 	u8 mask;
9641 
9642 	if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
9643 		dev_err(hdev->dev, "sob_base %u (mask %#x) is not valid\n", sob_base, sob_mask);
9644 		return 0;
9645 	}
9646 
9647 	memset(pkt, 0, pkt_size);
9648 
9649 	value = FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
9650 	value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
9651 	value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_MODE_MASK, 0); /* GREATER OR EQUAL*/
9652 	value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_MASK_MASK, mask);
9653 
9654 	ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, addr);
9655 	ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 0); /* MON base */
9656 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
9657 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
9658 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
9659 
9660 	pkt->value = cpu_to_le32(value);
9661 	pkt->ctl = cpu_to_le32(ctl);
9662 
9663 	return pkt_size;
9664 }
9665 
gaudi2_add_fence_pkt(struct packet_fence * pkt)9666 static u32 gaudi2_add_fence_pkt(struct packet_fence *pkt)
9667 {
9668 	u32 ctl, cfg, pkt_size = sizeof(*pkt);
9669 
9670 	memset(pkt, 0, pkt_size);
9671 
9672 	cfg = FIELD_PREP(GAUDI2_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
9673 	cfg |= FIELD_PREP(GAUDI2_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
9674 	cfg |= FIELD_PREP(GAUDI2_PKT_FENCE_CFG_ID_MASK, 2);
9675 
9676 	ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
9677 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
9678 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
9679 
9680 	pkt->cfg = cpu_to_le32(cfg);
9681 	pkt->ctl = cpu_to_le32(ctl);
9682 
9683 	return pkt_size;
9684 }
9685 
gaudi2_gen_wait_cb(struct hl_device * hdev,struct hl_gen_wait_properties * prop)9686 static u32 gaudi2_gen_wait_cb(struct hl_device *hdev, struct hl_gen_wait_properties *prop)
9687 {
9688 	struct hl_cb *cb = prop->data;
9689 	void *buf = (void *) (uintptr_t) (cb->kernel_address);
9690 
9691 	u64 monitor_base, fence_addr = 0;
9692 	u32 stream_index, size = prop->size;
9693 	u16 msg_addr_offset;
9694 
9695 	stream_index = prop->q_idx % 4;
9696 	fence_addr = CFG_BASE + gaudi2_qm_blocks_bases[prop->q_idx] +
9697 			QM_FENCE2_OFFSET + stream_index * 4;
9698 
9699 	/*
9700 	 * monitor_base should be the content of the base0 address registers,
9701 	 * so it will be added to the msg short offsets
9702 	 */
9703 	monitor_base = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
9704 
9705 	/* First monitor config packet: low address of the sync */
9706 	msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + prop->mon_id * 4) -
9707 				monitor_base;
9708 
9709 	size += gaudi2_add_mon_msg_short(buf + size, (u32) fence_addr, msg_addr_offset);
9710 
9711 	/* Second monitor config packet: high address of the sync */
9712 	msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + prop->mon_id * 4) -
9713 				monitor_base;
9714 
9715 	size += gaudi2_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32), msg_addr_offset);
9716 
9717 	/*
9718 	 * Third monitor config packet: the payload, i.e. what to write when the
9719 	 * sync triggers
9720 	 */
9721 	msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + prop->mon_id * 4) -
9722 				monitor_base;
9723 
9724 	size += gaudi2_add_mon_msg_short(buf + size, 1, msg_addr_offset);
9725 
9726 	/* Fourth monitor config packet: bind the monitor to a sync object */
9727 	msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + prop->mon_id * 4) - monitor_base;
9728 
9729 	size += gaudi2_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base, prop->sob_mask,
9730 						prop->sob_val, msg_addr_offset);
9731 
9732 	/* Fence packet */
9733 	size += gaudi2_add_fence_pkt(buf + size);
9734 
9735 	return size;
9736 }
9737 
gaudi2_reset_sob(struct hl_device * hdev,void * data)9738 static void gaudi2_reset_sob(struct hl_device *hdev, void *data)
9739 {
9740 	struct hl_hw_sob *hw_sob = data;
9741 
9742 	dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx, hw_sob->sob_id);
9743 
9744 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + hw_sob->sob_id * 4, 0);
9745 
9746 	kref_init(&hw_sob->kref);
9747 }
9748 
gaudi2_reset_sob_group(struct hl_device * hdev,u16 sob_group)9749 static void gaudi2_reset_sob_group(struct hl_device *hdev, u16 sob_group)
9750 {
9751 }
9752 
gaudi2_get_device_time(struct hl_device * hdev)9753 static u64 gaudi2_get_device_time(struct hl_device *hdev)
9754 {
9755 	u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
9756 
9757 	return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
9758 }
9759 
gaudi2_collective_wait_init_cs(struct hl_cs * cs)9760 static int gaudi2_collective_wait_init_cs(struct hl_cs *cs)
9761 {
9762 	return 0;
9763 }
9764 
gaudi2_collective_wait_create_jobs(struct hl_device * hdev,struct hl_ctx * ctx,struct hl_cs * cs,u32 wait_queue_id,u32 collective_engine_id,u32 encaps_signal_offset)9765 static int gaudi2_collective_wait_create_jobs(struct hl_device *hdev, struct hl_ctx *ctx,
9766 					struct hl_cs *cs, u32 wait_queue_id,
9767 					u32 collective_engine_id, u32 encaps_signal_offset)
9768 {
9769 	return -EINVAL;
9770 }
9771 
9772 /*
9773  * hl_mmu_scramble - converts a dram (non power of 2) page-size aligned address
9774  *                   to DMMU page-size address (64MB) before mapping it in
9775  *                   the MMU.
9776  * The operation is performed on both the virtual and physical addresses.
9777  * for device with 6 HBMs the scramble is:
9778  * (addr[47:0] / 48M) * 64M + addr % 48M + addr[63:48]
9779  *
9780  * Example:
9781  * =============================================================================
9782  * Allocated DRAM  Reserved VA      scrambled VA for MMU mapping    Scrambled PA
9783  * Phys address                                                     in MMU last
9784  *                                                                    HOP
9785  * =============================================================================
9786  * PA1 0x3000000  VA1 0x9C000000  SVA1= (VA1/48M)*64M 0xD0000000  <- PA1/48M 0x1
9787  * PA2 0x9000000  VA2 0x9F000000  SVA2= (VA2/48M)*64M 0xD4000000  <- PA2/48M 0x3
9788  * =============================================================================
9789  */
gaudi2_mmu_scramble_addr(struct hl_device * hdev,u64 raw_addr)9790 static u64 gaudi2_mmu_scramble_addr(struct hl_device *hdev, u64 raw_addr)
9791 {
9792 	struct asic_fixed_properties *prop = &hdev->asic_prop;
9793 	u32 divisor, mod_va;
9794 	u64 div_va;
9795 
9796 	/* accept any address in the DRAM address space */
9797 	if (hl_mem_area_inside_range(raw_addr, sizeof(raw_addr), DRAM_PHYS_BASE,
9798 									VA_HBM_SPACE_END)) {
9799 
9800 		divisor = prop->num_functional_hbms * GAUDI2_HBM_MMU_SCRM_MEM_SIZE;
9801 		div_va = div_u64_rem(raw_addr & GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK, divisor, &mod_va);
9802 		return (raw_addr & ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK) |
9803 			(div_va << GAUDI2_HBM_MMU_SCRM_DIV_SHIFT) |
9804 			(mod_va << GAUDI2_HBM_MMU_SCRM_MOD_SHIFT);
9805 	}
9806 
9807 	return raw_addr;
9808 }
9809 
gaudi2_mmu_descramble_addr(struct hl_device * hdev,u64 scrambled_addr)9810 static u64 gaudi2_mmu_descramble_addr(struct hl_device *hdev, u64 scrambled_addr)
9811 {
9812 	struct asic_fixed_properties *prop = &hdev->asic_prop;
9813 	u32 divisor, mod_va;
9814 	u64 div_va;
9815 
9816 	/* accept any address in the DRAM address space */
9817 	if (hl_mem_area_inside_range(scrambled_addr, sizeof(scrambled_addr), DRAM_PHYS_BASE,
9818 									VA_HBM_SPACE_END)) {
9819 
9820 		divisor = prop->num_functional_hbms * GAUDI2_HBM_MMU_SCRM_MEM_SIZE;
9821 		div_va = div_u64_rem(scrambled_addr & GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK,
9822 					PAGE_SIZE_64MB, &mod_va);
9823 
9824 		return ((scrambled_addr & ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK) +
9825 					(div_va * divisor + mod_va));
9826 	}
9827 
9828 	return scrambled_addr;
9829 }
9830 
gaudi2_get_dec_base_addr(struct hl_device * hdev,u32 core_id)9831 static u32 gaudi2_get_dec_base_addr(struct hl_device *hdev, u32 core_id)
9832 {
9833 	u32 base = 0, dcore_id, dec_id;
9834 
9835 	if (core_id >= NUMBER_OF_DEC) {
9836 		dev_err(hdev->dev, "Unexpected core number %d for DEC\n", core_id);
9837 		goto out;
9838 	}
9839 
9840 	if (core_id < 8) {
9841 		dcore_id = core_id / NUM_OF_DEC_PER_DCORE;
9842 		dec_id = core_id % NUM_OF_DEC_PER_DCORE;
9843 
9844 		base = mmDCORE0_DEC0_CMD_BASE + dcore_id * DCORE_OFFSET +
9845 				dec_id * DCORE_VDEC_OFFSET;
9846 	} else {
9847 		/* PCIe Shared Decoder */
9848 		base = mmPCIE_DEC0_CMD_BASE + ((core_id % 8) * PCIE_VDEC_OFFSET);
9849 	}
9850 out:
9851 	return base;
9852 }
9853 
gaudi2_get_hw_block_id(struct hl_device * hdev,u64 block_addr,u32 * block_size,u32 * block_id)9854 static int gaudi2_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
9855 				u32 *block_size, u32 *block_id)
9856 {
9857 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
9858 	int i;
9859 
9860 	for (i = 0 ; i < NUM_USER_MAPPED_BLOCKS ; i++) {
9861 		if (block_addr == CFG_BASE + gaudi2->mapped_blocks[i].address) {
9862 			*block_id = i;
9863 			if (block_size)
9864 				*block_size = gaudi2->mapped_blocks[i].size;
9865 			return 0;
9866 		}
9867 	}
9868 
9869 	dev_err(hdev->dev, "Invalid block address %#llx", block_addr);
9870 
9871 	return -EINVAL;
9872 }
9873 
gaudi2_block_mmap(struct hl_device * hdev,struct vm_area_struct * vma,u32 block_id,u32 block_size)9874 static int gaudi2_block_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
9875 			u32 block_id, u32 block_size)
9876 {
9877 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
9878 	u64 offset_in_bar;
9879 	u64 address;
9880 	int rc;
9881 
9882 	if (block_id >= NUM_USER_MAPPED_BLOCKS) {
9883 		dev_err(hdev->dev, "Invalid block id %u", block_id);
9884 		return -EINVAL;
9885 	}
9886 
9887 	/* we allow mapping only an entire block */
9888 	if (block_size != gaudi2->mapped_blocks[block_id].size) {
9889 		dev_err(hdev->dev, "Invalid block size %u", block_size);
9890 		return -EINVAL;
9891 	}
9892 
9893 	offset_in_bar = CFG_BASE + gaudi2->mapped_blocks[block_id].address - STM_FLASH_BASE_ADDR;
9894 
9895 	address = pci_resource_start(hdev->pdev, SRAM_CFG_BAR_ID) + offset_in_bar;
9896 
9897 	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
9898 			VM_DONTCOPY | VM_NORESERVE;
9899 
9900 	rc = remap_pfn_range(vma, vma->vm_start, address >> PAGE_SHIFT,
9901 			block_size, vma->vm_page_prot);
9902 	if (rc)
9903 		dev_err(hdev->dev, "remap_pfn_range error %d", rc);
9904 
9905 	return rc;
9906 }
9907 
gaudi2_enable_events_from_fw(struct hl_device * hdev)9908 static void gaudi2_enable_events_from_fw(struct hl_device *hdev)
9909 {
9910 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
9911 
9912 	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
9913 	u32 irq_handler_offset = le32_to_cpu(dyn_regs->gic_host_ints_irq);
9914 
9915 	if (gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)
9916 		WREG32(irq_handler_offset,
9917 			gaudi2_irq_map_table[GAUDI2_EVENT_CPU_INTS_REGISTER].cpu_id);
9918 }
9919 
gaudi2_get_mmu_base(struct hl_device * hdev,u64 mmu_id,u32 * mmu_base)9920 static int gaudi2_get_mmu_base(struct hl_device *hdev, u64 mmu_id, u32 *mmu_base)
9921 {
9922 	switch (mmu_id) {
9923 	case HW_CAP_DCORE0_DMMU0:
9924 		*mmu_base = mmDCORE0_HMMU0_MMU_BASE;
9925 		break;
9926 	case HW_CAP_DCORE0_DMMU1:
9927 		*mmu_base = mmDCORE0_HMMU1_MMU_BASE;
9928 		break;
9929 	case HW_CAP_DCORE0_DMMU2:
9930 		*mmu_base = mmDCORE0_HMMU2_MMU_BASE;
9931 		break;
9932 	case HW_CAP_DCORE0_DMMU3:
9933 		*mmu_base = mmDCORE0_HMMU3_MMU_BASE;
9934 		break;
9935 	case HW_CAP_DCORE1_DMMU0:
9936 		*mmu_base = mmDCORE1_HMMU0_MMU_BASE;
9937 		break;
9938 	case HW_CAP_DCORE1_DMMU1:
9939 		*mmu_base = mmDCORE1_HMMU1_MMU_BASE;
9940 		break;
9941 	case HW_CAP_DCORE1_DMMU2:
9942 		*mmu_base = mmDCORE1_HMMU2_MMU_BASE;
9943 		break;
9944 	case HW_CAP_DCORE1_DMMU3:
9945 		*mmu_base = mmDCORE1_HMMU3_MMU_BASE;
9946 		break;
9947 	case HW_CAP_DCORE2_DMMU0:
9948 		*mmu_base = mmDCORE2_HMMU0_MMU_BASE;
9949 		break;
9950 	case HW_CAP_DCORE2_DMMU1:
9951 		*mmu_base = mmDCORE2_HMMU1_MMU_BASE;
9952 		break;
9953 	case HW_CAP_DCORE2_DMMU2:
9954 		*mmu_base = mmDCORE2_HMMU2_MMU_BASE;
9955 		break;
9956 	case HW_CAP_DCORE2_DMMU3:
9957 		*mmu_base = mmDCORE2_HMMU3_MMU_BASE;
9958 		break;
9959 	case HW_CAP_DCORE3_DMMU0:
9960 		*mmu_base = mmDCORE3_HMMU0_MMU_BASE;
9961 		break;
9962 	case HW_CAP_DCORE3_DMMU1:
9963 		*mmu_base = mmDCORE3_HMMU1_MMU_BASE;
9964 		break;
9965 	case HW_CAP_DCORE3_DMMU2:
9966 		*mmu_base = mmDCORE3_HMMU2_MMU_BASE;
9967 		break;
9968 	case HW_CAP_DCORE3_DMMU3:
9969 		*mmu_base = mmDCORE3_HMMU3_MMU_BASE;
9970 		break;
9971 	case HW_CAP_PMMU:
9972 		*mmu_base = mmPMMU_HBW_MMU_BASE;
9973 		break;
9974 	default:
9975 		return -EINVAL;
9976 	}
9977 
9978 	return 0;
9979 }
9980 
gaudi2_ack_mmu_error(struct hl_device * hdev,u64 mmu_id)9981 static void gaudi2_ack_mmu_error(struct hl_device *hdev, u64 mmu_id)
9982 {
9983 	bool is_pmmu = (mmu_id == HW_CAP_PMMU);
9984 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
9985 	u32 mmu_base;
9986 
9987 	if (!(gaudi2->hw_cap_initialized & mmu_id))
9988 		return;
9989 
9990 	if (gaudi2_get_mmu_base(hdev, mmu_id, &mmu_base))
9991 		return;
9992 
9993 	gaudi2_handle_page_error(hdev, mmu_base, is_pmmu);
9994 	gaudi2_handle_access_error(hdev, mmu_base, is_pmmu);
9995 }
9996 
gaudi2_ack_mmu_page_fault_or_access_error(struct hl_device * hdev,u64 mmu_cap_mask)9997 static int gaudi2_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask)
9998 {
9999 	u32 i, mmu_id, num_of_hmmus = NUM_OF_HMMU_PER_DCORE * NUM_OF_DCORES;
10000 
10001 	/* check all HMMUs */
10002 	for (i = 0 ; i < num_of_hmmus ; i++) {
10003 		mmu_id = HW_CAP_DCORE0_DMMU0 << i;
10004 
10005 		if (mmu_cap_mask & mmu_id)
10006 			gaudi2_ack_mmu_error(hdev, mmu_id);
10007 	}
10008 
10009 	/* check PMMU */
10010 	if (mmu_cap_mask & HW_CAP_PMMU)
10011 		gaudi2_ack_mmu_error(hdev, HW_CAP_PMMU);
10012 
10013 	return 0;
10014 }
10015 
gaudi2_get_msi_info(__le32 * table)10016 static void gaudi2_get_msi_info(__le32 *table)
10017 {
10018 	table[CPUCP_EVENT_QUEUE_MSI_TYPE] = cpu_to_le32(GAUDI2_EVENT_QUEUE_MSIX_IDX);
10019 }
10020 
gaudi2_map_pll_idx_to_fw_idx(u32 pll_idx)10021 static int gaudi2_map_pll_idx_to_fw_idx(u32 pll_idx)
10022 {
10023 	switch (pll_idx) {
10024 	case HL_GAUDI2_CPU_PLL: return CPU_PLL;
10025 	case HL_GAUDI2_PCI_PLL: return PCI_PLL;
10026 	case HL_GAUDI2_NIC_PLL: return NIC_PLL;
10027 	case HL_GAUDI2_DMA_PLL: return DMA_PLL;
10028 	case HL_GAUDI2_MESH_PLL: return MESH_PLL;
10029 	case HL_GAUDI2_MME_PLL: return MME_PLL;
10030 	case HL_GAUDI2_TPC_PLL: return TPC_PLL;
10031 	case HL_GAUDI2_IF_PLL: return IF_PLL;
10032 	case HL_GAUDI2_SRAM_PLL: return SRAM_PLL;
10033 	case HL_GAUDI2_HBM_PLL: return HBM_PLL;
10034 	case HL_GAUDI2_VID_PLL: return VID_PLL;
10035 	case HL_GAUDI2_MSS_PLL: return MSS_PLL;
10036 	default: return -EINVAL;
10037 	}
10038 }
10039 
gaudi2_gen_sync_to_engine_map(struct hl_device * hdev,struct hl_sync_to_engine_map * map)10040 static int gaudi2_gen_sync_to_engine_map(struct hl_device *hdev, struct hl_sync_to_engine_map *map)
10041 {
10042 	/* Not implemented */
10043 	return 0;
10044 }
10045 
gaudi2_monitor_valid(struct hl_mon_state_dump * mon)10046 static int gaudi2_monitor_valid(struct hl_mon_state_dump *mon)
10047 {
10048 	/* Not implemented */
10049 	return 0;
10050 }
10051 
gaudi2_print_single_monitor(char ** buf,size_t * size,size_t * offset,struct hl_device * hdev,struct hl_mon_state_dump * mon)10052 static int gaudi2_print_single_monitor(char **buf, size_t *size, size_t *offset,
10053 				struct hl_device *hdev, struct hl_mon_state_dump *mon)
10054 {
10055 	/* Not implemented */
10056 	return 0;
10057 }
10058 
10059 
gaudi2_print_fences_single_engine(struct hl_device * hdev,u64 base_offset,u64 status_base_offset,enum hl_sync_engine_type engine_type,u32 engine_id,char ** buf,size_t * size,size_t * offset)10060 static int gaudi2_print_fences_single_engine(struct hl_device *hdev, u64 base_offset,
10061 				u64 status_base_offset, enum hl_sync_engine_type engine_type,
10062 				u32 engine_id, char **buf, size_t *size, size_t *offset)
10063 {
10064 	/* Not implemented */
10065 	return 0;
10066 }
10067 
10068 
10069 static struct hl_state_dump_specs_funcs gaudi2_state_dump_funcs = {
10070 	.monitor_valid = gaudi2_monitor_valid,
10071 	.print_single_monitor = gaudi2_print_single_monitor,
10072 	.gen_sync_to_engine_map = gaudi2_gen_sync_to_engine_map,
10073 	.print_fences_single_engine = gaudi2_print_fences_single_engine,
10074 };
10075 
gaudi2_state_dump_init(struct hl_device * hdev)10076 static void gaudi2_state_dump_init(struct hl_device *hdev)
10077 {
10078 	/* Not implemented */
10079 	hdev->state_dump_specs.props = gaudi2_state_dump_specs_props;
10080 	hdev->state_dump_specs.funcs = gaudi2_state_dump_funcs;
10081 }
10082 
gaudi2_get_sob_addr(struct hl_device * hdev,u32 sob_id)10083 static u32 gaudi2_get_sob_addr(struct hl_device *hdev, u32 sob_id)
10084 {
10085 	return 0;
10086 }
10087 
gaudi2_get_stream_master_qid_arr(void)10088 static u32 *gaudi2_get_stream_master_qid_arr(void)
10089 {
10090 	return NULL;
10091 }
10092 
gaudi2_add_device_attr(struct hl_device * hdev,struct attribute_group * dev_clk_attr_grp,struct attribute_group * dev_vrm_attr_grp)10093 static void gaudi2_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
10094 				struct attribute_group *dev_vrm_attr_grp)
10095 {
10096 	hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
10097 	hl_sysfs_add_dev_vrm_attr(hdev, dev_vrm_attr_grp);
10098 }
10099 
gaudi2_mmu_get_real_page_size(struct hl_device * hdev,struct hl_mmu_properties * mmu_prop,u32 page_size,u32 * real_page_size,bool is_dram_addr)10100 static int gaudi2_mmu_get_real_page_size(struct hl_device *hdev, struct hl_mmu_properties *mmu_prop,
10101 					u32 page_size, u32 *real_page_size, bool is_dram_addr)
10102 {
10103 	struct asic_fixed_properties *prop = &hdev->asic_prop;
10104 
10105 	/* for host pages the page size must be  */
10106 	if (!is_dram_addr) {
10107 		if (page_size % mmu_prop->page_size)
10108 			goto page_size_err;
10109 
10110 		*real_page_size = mmu_prop->page_size;
10111 		return 0;
10112 	}
10113 
10114 	if ((page_size % prop->dram_page_size) || (prop->dram_page_size > mmu_prop->page_size))
10115 		goto page_size_err;
10116 
10117 	/*
10118 	 * MMU page size is different from DRAM page size (more precisely, DMMU page is greater
10119 	 * than DRAM page size).
10120 	 * for this reason work with the DRAM page size and let the MMU scrambling routine handle
10121 	 * this mismatch when calculating the address to place in the MMU page table.
10122 	 * (in that case also make sure that the dram_page_size is not greater than the
10123 	 * mmu page size)
10124 	 */
10125 	*real_page_size = prop->dram_page_size;
10126 
10127 	return 0;
10128 
10129 page_size_err:
10130 	dev_err(hdev->dev, "page size of %u is not %uKB aligned, can't map\n",
10131 							page_size, mmu_prop->page_size >> 10);
10132 	return -EFAULT;
10133 }
10134 
gaudi2_get_monitor_dump(struct hl_device * hdev,void * data)10135 static int gaudi2_get_monitor_dump(struct hl_device *hdev, void *data)
10136 {
10137 	return -EOPNOTSUPP;
10138 }
10139 
gaudi2_send_device_activity(struct hl_device * hdev,bool open)10140 int gaudi2_send_device_activity(struct hl_device *hdev, bool open)
10141 {
10142 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10143 
10144 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q) || hdev->fw_major_version < 37)
10145 		return 0;
10146 
10147 	/* TODO: add check for FW version using minor ver once it's known */
10148 	return hl_fw_send_device_activity(hdev, open);
10149 }
10150 
10151 static const struct hl_asic_funcs gaudi2_funcs = {
10152 	.early_init = gaudi2_early_init,
10153 	.early_fini = gaudi2_early_fini,
10154 	.late_init = gaudi2_late_init,
10155 	.late_fini = gaudi2_late_fini,
10156 	.sw_init = gaudi2_sw_init,
10157 	.sw_fini = gaudi2_sw_fini,
10158 	.hw_init = gaudi2_hw_init,
10159 	.hw_fini = gaudi2_hw_fini,
10160 	.halt_engines = gaudi2_halt_engines,
10161 	.suspend = gaudi2_suspend,
10162 	.resume = gaudi2_resume,
10163 	.mmap = gaudi2_mmap,
10164 	.ring_doorbell = gaudi2_ring_doorbell,
10165 	.pqe_write = gaudi2_pqe_write,
10166 	.asic_dma_alloc_coherent = gaudi2_dma_alloc_coherent,
10167 	.asic_dma_free_coherent = gaudi2_dma_free_coherent,
10168 	.scrub_device_mem = gaudi2_scrub_device_mem,
10169 	.scrub_device_dram = gaudi2_scrub_device_dram,
10170 	.get_int_queue_base = NULL,
10171 	.test_queues = gaudi2_test_queues,
10172 	.asic_dma_pool_zalloc = gaudi2_dma_pool_zalloc,
10173 	.asic_dma_pool_free = gaudi2_dma_pool_free,
10174 	.cpu_accessible_dma_pool_alloc = gaudi2_cpu_accessible_dma_pool_alloc,
10175 	.cpu_accessible_dma_pool_free = gaudi2_cpu_accessible_dma_pool_free,
10176 	.asic_dma_unmap_single = gaudi2_dma_unmap_single,
10177 	.asic_dma_map_single = gaudi2_dma_map_single,
10178 	.hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
10179 	.cs_parser = gaudi2_cs_parser,
10180 	.asic_dma_map_sgtable = hl_dma_map_sgtable,
10181 	.add_end_of_cb_packets = NULL,
10182 	.update_eq_ci = gaudi2_update_eq_ci,
10183 	.context_switch = gaudi2_context_switch,
10184 	.restore_phase_topology = gaudi2_restore_phase_topology,
10185 	.debugfs_read_dma = gaudi2_debugfs_read_dma,
10186 	.add_device_attr = gaudi2_add_device_attr,
10187 	.handle_eqe = gaudi2_handle_eqe,
10188 	.get_events_stat = gaudi2_get_events_stat,
10189 	.read_pte = NULL,
10190 	.write_pte = NULL,
10191 	.mmu_invalidate_cache = gaudi2_mmu_invalidate_cache,
10192 	.mmu_invalidate_cache_range = gaudi2_mmu_invalidate_cache_range,
10193 	.mmu_prefetch_cache_range = NULL,
10194 	.send_heartbeat = gaudi2_send_heartbeat,
10195 	.debug_coresight = gaudi2_debug_coresight,
10196 	.is_device_idle = gaudi2_is_device_idle,
10197 	.compute_reset_late_init = gaudi2_compute_reset_late_init,
10198 	.hw_queues_lock = gaudi2_hw_queues_lock,
10199 	.hw_queues_unlock = gaudi2_hw_queues_unlock,
10200 	.get_pci_id = gaudi2_get_pci_id,
10201 	.get_eeprom_data = gaudi2_get_eeprom_data,
10202 	.get_monitor_dump = gaudi2_get_monitor_dump,
10203 	.send_cpu_message = gaudi2_send_cpu_message,
10204 	.pci_bars_map = gaudi2_pci_bars_map,
10205 	.init_iatu = gaudi2_init_iatu,
10206 	.rreg = hl_rreg,
10207 	.wreg = hl_wreg,
10208 	.halt_coresight = gaudi2_halt_coresight,
10209 	.ctx_init = gaudi2_ctx_init,
10210 	.ctx_fini = gaudi2_ctx_fini,
10211 	.pre_schedule_cs = gaudi2_pre_schedule_cs,
10212 	.get_queue_id_for_cq = gaudi2_get_queue_id_for_cq,
10213 	.load_firmware_to_device = NULL,
10214 	.load_boot_fit_to_device = NULL,
10215 	.get_signal_cb_size = gaudi2_get_signal_cb_size,
10216 	.get_wait_cb_size = gaudi2_get_wait_cb_size,
10217 	.gen_signal_cb = gaudi2_gen_signal_cb,
10218 	.gen_wait_cb = gaudi2_gen_wait_cb,
10219 	.reset_sob = gaudi2_reset_sob,
10220 	.reset_sob_group = gaudi2_reset_sob_group,
10221 	.get_device_time = gaudi2_get_device_time,
10222 	.pb_print_security_errors = gaudi2_pb_print_security_errors,
10223 	.collective_wait_init_cs = gaudi2_collective_wait_init_cs,
10224 	.collective_wait_create_jobs = gaudi2_collective_wait_create_jobs,
10225 	.get_dec_base_addr = gaudi2_get_dec_base_addr,
10226 	.scramble_addr = gaudi2_mmu_scramble_addr,
10227 	.descramble_addr = gaudi2_mmu_descramble_addr,
10228 	.ack_protection_bits_errors = gaudi2_ack_protection_bits_errors,
10229 	.get_hw_block_id = gaudi2_get_hw_block_id,
10230 	.hw_block_mmap = gaudi2_block_mmap,
10231 	.enable_events_from_fw = gaudi2_enable_events_from_fw,
10232 	.ack_mmu_errors = gaudi2_ack_mmu_page_fault_or_access_error,
10233 	.get_msi_info = gaudi2_get_msi_info,
10234 	.map_pll_idx_to_fw_idx = gaudi2_map_pll_idx_to_fw_idx,
10235 	.init_firmware_preload_params = gaudi2_init_firmware_preload_params,
10236 	.init_firmware_loader = gaudi2_init_firmware_loader,
10237 	.init_cpu_scrambler_dram = gaudi2_init_scrambler_hbm,
10238 	.state_dump_init = gaudi2_state_dump_init,
10239 	.get_sob_addr = &gaudi2_get_sob_addr,
10240 	.set_pci_memory_regions = gaudi2_set_pci_memory_regions,
10241 	.get_stream_master_qid_arr = gaudi2_get_stream_master_qid_arr,
10242 	.check_if_razwi_happened = gaudi2_check_if_razwi_happened,
10243 	.mmu_get_real_page_size = gaudi2_mmu_get_real_page_size,
10244 	.access_dev_mem = hl_access_dev_mem,
10245 	.set_dram_bar_base = gaudi2_set_hbm_bar_base,
10246 	.set_engine_cores = gaudi2_set_engine_cores,
10247 	.send_device_activity = gaudi2_send_device_activity,
10248 };
10249 
gaudi2_set_asic_funcs(struct hl_device * hdev)10250 void gaudi2_set_asic_funcs(struct hl_device *hdev)
10251 {
10252 	hdev->asic_funcs = &gaudi2_funcs;
10253 }
10254