1 // SPDX-License-Identifier: GPL-2.0+
2 /* Copyright (c) 2016-2017 Hisilicon Limited. */
3
4 #include "hclge_err.h"
5
6 static const struct hclge_hw_error hclge_imp_tcm_ecc_int[] = {
7 {
8 .int_msk = BIT(1),
9 .msg = "imp_itcm0_ecc_mbit_err",
10 .reset_level = HNAE3_NONE_RESET
11 }, {
12 .int_msk = BIT(3),
13 .msg = "imp_itcm1_ecc_mbit_err",
14 .reset_level = HNAE3_NONE_RESET
15 }, {
16 .int_msk = BIT(5),
17 .msg = "imp_itcm2_ecc_mbit_err",
18 .reset_level = HNAE3_NONE_RESET
19 }, {
20 .int_msk = BIT(7),
21 .msg = "imp_itcm3_ecc_mbit_err",
22 .reset_level = HNAE3_NONE_RESET
23 }, {
24 .int_msk = BIT(9),
25 .msg = "imp_dtcm0_mem0_ecc_mbit_err",
26 .reset_level = HNAE3_NONE_RESET
27 }, {
28 .int_msk = BIT(11),
29 .msg = "imp_dtcm0_mem1_ecc_mbit_err",
30 .reset_level = HNAE3_NONE_RESET
31 }, {
32 .int_msk = BIT(13),
33 .msg = "imp_dtcm1_mem0_ecc_mbit_err",
34 .reset_level = HNAE3_NONE_RESET
35 }, {
36 .int_msk = BIT(15),
37 .msg = "imp_dtcm1_mem1_ecc_mbit_err",
38 .reset_level = HNAE3_NONE_RESET
39 }, {
40 .int_msk = BIT(17),
41 .msg = "imp_itcm4_ecc_mbit_err",
42 .reset_level = HNAE3_NONE_RESET
43 }, {
44 /* sentinel */
45 }
46 };
47
48 static const struct hclge_hw_error hclge_cmdq_nic_mem_ecc_int[] = {
49 {
50 .int_msk = BIT(1),
51 .msg = "cmdq_nic_rx_depth_ecc_mbit_err",
52 .reset_level = HNAE3_NONE_RESET
53 }, {
54 .int_msk = BIT(3),
55 .msg = "cmdq_nic_tx_depth_ecc_mbit_err",
56 .reset_level = HNAE3_NONE_RESET
57 }, {
58 .int_msk = BIT(5),
59 .msg = "cmdq_nic_rx_tail_ecc_mbit_err",
60 .reset_level = HNAE3_NONE_RESET
61 }, {
62 .int_msk = BIT(7),
63 .msg = "cmdq_nic_tx_tail_ecc_mbit_err",
64 .reset_level = HNAE3_NONE_RESET
65 }, {
66 .int_msk = BIT(9),
67 .msg = "cmdq_nic_rx_head_ecc_mbit_err",
68 .reset_level = HNAE3_NONE_RESET
69 }, {
70 .int_msk = BIT(11),
71 .msg = "cmdq_nic_tx_head_ecc_mbit_err",
72 .reset_level = HNAE3_NONE_RESET
73 }, {
74 .int_msk = BIT(13),
75 .msg = "cmdq_nic_rx_addr_ecc_mbit_err",
76 .reset_level = HNAE3_NONE_RESET
77 }, {
78 .int_msk = BIT(15),
79 .msg = "cmdq_nic_tx_addr_ecc_mbit_err",
80 .reset_level = HNAE3_NONE_RESET
81 }, {
82 .int_msk = BIT(17),
83 .msg = "cmdq_rocee_rx_depth_ecc_mbit_err",
84 .reset_level = HNAE3_NONE_RESET
85 }, {
86 .int_msk = BIT(19),
87 .msg = "cmdq_rocee_tx_depth_ecc_mbit_err",
88 .reset_level = HNAE3_NONE_RESET
89 }, {
90 .int_msk = BIT(21),
91 .msg = "cmdq_rocee_rx_tail_ecc_mbit_err",
92 .reset_level = HNAE3_NONE_RESET
93 }, {
94 .int_msk = BIT(23),
95 .msg = "cmdq_rocee_tx_tail_ecc_mbit_err",
96 .reset_level = HNAE3_NONE_RESET
97 }, {
98 .int_msk = BIT(25),
99 .msg = "cmdq_rocee_rx_head_ecc_mbit_err",
100 .reset_level = HNAE3_NONE_RESET
101 }, {
102 .int_msk = BIT(27),
103 .msg = "cmdq_rocee_tx_head_ecc_mbit_err",
104 .reset_level = HNAE3_NONE_RESET
105 }, {
106 .int_msk = BIT(29),
107 .msg = "cmdq_rocee_rx_addr_ecc_mbit_err",
108 .reset_level = HNAE3_NONE_RESET
109 }, {
110 .int_msk = BIT(31),
111 .msg = "cmdq_rocee_tx_addr_ecc_mbit_err",
112 .reset_level = HNAE3_NONE_RESET
113 }, {
114 /* sentinel */
115 }
116 };
117
118 static const struct hclge_hw_error hclge_tqp_int_ecc_int[] = {
119 {
120 .int_msk = BIT(6),
121 .msg = "tqp_int_cfg_even_ecc_mbit_err",
122 .reset_level = HNAE3_NONE_RESET
123 }, {
124 .int_msk = BIT(7),
125 .msg = "tqp_int_cfg_odd_ecc_mbit_err",
126 .reset_level = HNAE3_NONE_RESET
127 }, {
128 .int_msk = BIT(8),
129 .msg = "tqp_int_ctrl_even_ecc_mbit_err",
130 .reset_level = HNAE3_NONE_RESET
131 }, {
132 .int_msk = BIT(9),
133 .msg = "tqp_int_ctrl_odd_ecc_mbit_err",
134 .reset_level = HNAE3_NONE_RESET
135 }, {
136 .int_msk = BIT(10),
137 .msg = "tx_que_scan_int_ecc_mbit_err",
138 .reset_level = HNAE3_NONE_RESET
139 }, {
140 .int_msk = BIT(11),
141 .msg = "rx_que_scan_int_ecc_mbit_err",
142 .reset_level = HNAE3_NONE_RESET
143 }, {
144 /* sentinel */
145 }
146 };
147
148 static const struct hclge_hw_error hclge_msix_sram_ecc_int[] = {
149 {
150 .int_msk = BIT(1),
151 .msg = "msix_nic_ecc_mbit_err",
152 .reset_level = HNAE3_NONE_RESET
153 }, {
154 .int_msk = BIT(3),
155 .msg = "msix_rocee_ecc_mbit_err",
156 .reset_level = HNAE3_NONE_RESET
157 }, {
158 /* sentinel */
159 }
160 };
161
162 static const struct hclge_hw_error hclge_igu_int[] = {
163 {
164 .int_msk = BIT(0),
165 .msg = "igu_rx_buf0_ecc_mbit_err",
166 .reset_level = HNAE3_GLOBAL_RESET
167 }, {
168 .int_msk = BIT(2),
169 .msg = "igu_rx_buf1_ecc_mbit_err",
170 .reset_level = HNAE3_GLOBAL_RESET
171 }, {
172 /* sentinel */
173 }
174 };
175
176 static const struct hclge_hw_error hclge_igu_egu_tnl_int[] = {
177 {
178 .int_msk = BIT(0),
179 .msg = "rx_buf_overflow",
180 .reset_level = HNAE3_GLOBAL_RESET
181 }, {
182 .int_msk = BIT(1),
183 .msg = "rx_stp_fifo_overflow",
184 .reset_level = HNAE3_GLOBAL_RESET
185 }, {
186 .int_msk = BIT(2),
187 .msg = "rx_stp_fifo_underflow",
188 .reset_level = HNAE3_GLOBAL_RESET
189 }, {
190 .int_msk = BIT(3),
191 .msg = "tx_buf_overflow",
192 .reset_level = HNAE3_GLOBAL_RESET
193 }, {
194 .int_msk = BIT(4),
195 .msg = "tx_buf_underrun",
196 .reset_level = HNAE3_GLOBAL_RESET
197 }, {
198 .int_msk = BIT(5),
199 .msg = "rx_stp_buf_overflow",
200 .reset_level = HNAE3_GLOBAL_RESET
201 }, {
202 /* sentinel */
203 }
204 };
205
206 static const struct hclge_hw_error hclge_ncsi_err_int[] = {
207 {
208 .int_msk = BIT(1),
209 .msg = "ncsi_tx_ecc_mbit_err",
210 .reset_level = HNAE3_NONE_RESET
211 }, {
212 /* sentinel */
213 }
214 };
215
216 static const struct hclge_hw_error hclge_ppp_mpf_abnormal_int_st1[] = {
217 {
218 .int_msk = BIT(0),
219 .msg = "vf_vlan_ad_mem_ecc_mbit_err",
220 .reset_level = HNAE3_GLOBAL_RESET
221 }, {
222 .int_msk = BIT(1),
223 .msg = "umv_mcast_group_mem_ecc_mbit_err",
224 .reset_level = HNAE3_GLOBAL_RESET
225 }, {
226 .int_msk = BIT(2),
227 .msg = "umv_key_mem0_ecc_mbit_err",
228 .reset_level = HNAE3_GLOBAL_RESET
229 }, {
230 .int_msk = BIT(3),
231 .msg = "umv_key_mem1_ecc_mbit_err",
232 .reset_level = HNAE3_GLOBAL_RESET
233 }, {
234 .int_msk = BIT(4),
235 .msg = "umv_key_mem2_ecc_mbit_err",
236 .reset_level = HNAE3_GLOBAL_RESET
237 }, {
238 .int_msk = BIT(5),
239 .msg = "umv_key_mem3_ecc_mbit_err",
240 .reset_level = HNAE3_GLOBAL_RESET
241 }, {
242 .int_msk = BIT(6),
243 .msg = "umv_ad_mem_ecc_mbit_err",
244 .reset_level = HNAE3_GLOBAL_RESET
245 }, {
246 .int_msk = BIT(7),
247 .msg = "rss_tc_mode_mem_ecc_mbit_err",
248 .reset_level = HNAE3_GLOBAL_RESET
249 }, {
250 .int_msk = BIT(8),
251 .msg = "rss_idt_mem0_ecc_mbit_err",
252 .reset_level = HNAE3_GLOBAL_RESET
253 }, {
254 .int_msk = BIT(9),
255 .msg = "rss_idt_mem1_ecc_mbit_err",
256 .reset_level = HNAE3_GLOBAL_RESET
257 }, {
258 .int_msk = BIT(10),
259 .msg = "rss_idt_mem2_ecc_mbit_err",
260 .reset_level = HNAE3_GLOBAL_RESET
261 }, {
262 .int_msk = BIT(11),
263 .msg = "rss_idt_mem3_ecc_mbit_err",
264 .reset_level = HNAE3_GLOBAL_RESET
265 }, {
266 .int_msk = BIT(12),
267 .msg = "rss_idt_mem4_ecc_mbit_err",
268 .reset_level = HNAE3_GLOBAL_RESET
269 }, {
270 .int_msk = BIT(13),
271 .msg = "rss_idt_mem5_ecc_mbit_err",
272 .reset_level = HNAE3_GLOBAL_RESET
273 }, {
274 .int_msk = BIT(14),
275 .msg = "rss_idt_mem6_ecc_mbit_err",
276 .reset_level = HNAE3_GLOBAL_RESET
277 }, {
278 .int_msk = BIT(15),
279 .msg = "rss_idt_mem7_ecc_mbit_err",
280 .reset_level = HNAE3_GLOBAL_RESET
281 }, {
282 .int_msk = BIT(16),
283 .msg = "rss_idt_mem8_ecc_mbit_err",
284 .reset_level = HNAE3_GLOBAL_RESET
285 }, {
286 .int_msk = BIT(17),
287 .msg = "rss_idt_mem9_ecc_mbit_err",
288 .reset_level = HNAE3_GLOBAL_RESET
289 }, {
290 .int_msk = BIT(18),
291 .msg = "rss_idt_mem10_ecc_mbit_err",
292 .reset_level = HNAE3_GLOBAL_RESET
293 }, {
294 .int_msk = BIT(19),
295 .msg = "rss_idt_mem11_ecc_mbit_err",
296 .reset_level = HNAE3_GLOBAL_RESET
297 }, {
298 .int_msk = BIT(20),
299 .msg = "rss_idt_mem12_ecc_mbit_err",
300 .reset_level = HNAE3_GLOBAL_RESET
301 }, {
302 .int_msk = BIT(21),
303 .msg = "rss_idt_mem13_ecc_mbit_err",
304 .reset_level = HNAE3_GLOBAL_RESET
305 }, {
306 .int_msk = BIT(22),
307 .msg = "rss_idt_mem14_ecc_mbit_err",
308 .reset_level = HNAE3_GLOBAL_RESET
309 }, {
310 .int_msk = BIT(23),
311 .msg = "rss_idt_mem15_ecc_mbit_err",
312 .reset_level = HNAE3_GLOBAL_RESET
313 }, {
314 .int_msk = BIT(24),
315 .msg = "port_vlan_mem_ecc_mbit_err",
316 .reset_level = HNAE3_GLOBAL_RESET
317 }, {
318 .int_msk = BIT(25),
319 .msg = "mcast_linear_table_mem_ecc_mbit_err",
320 .reset_level = HNAE3_GLOBAL_RESET
321 }, {
322 .int_msk = BIT(26),
323 .msg = "mcast_result_mem_ecc_mbit_err",
324 .reset_level = HNAE3_GLOBAL_RESET
325 }, {
326 .int_msk = BIT(27),
327 .msg = "flow_director_ad_mem0_ecc_mbit_err",
328 .reset_level = HNAE3_GLOBAL_RESET
329 }, {
330 .int_msk = BIT(28),
331 .msg = "flow_director_ad_mem1_ecc_mbit_err",
332 .reset_level = HNAE3_GLOBAL_RESET
333 }, {
334 .int_msk = BIT(29),
335 .msg = "rx_vlan_tag_memory_ecc_mbit_err",
336 .reset_level = HNAE3_GLOBAL_RESET
337 }, {
338 .int_msk = BIT(30),
339 .msg = "Tx_UP_mapping_config_mem_ecc_mbit_err",
340 .reset_level = HNAE3_GLOBAL_RESET
341 }, {
342 /* sentinel */
343 }
344 };
345
346 static const struct hclge_hw_error hclge_ppp_pf_abnormal_int[] = {
347 {
348 .int_msk = BIT(0),
349 .msg = "tx_vlan_tag_err",
350 .reset_level = HNAE3_NONE_RESET
351 }, {
352 .int_msk = BIT(1),
353 .msg = "rss_list_tc_unassigned_queue_err",
354 .reset_level = HNAE3_NONE_RESET
355 }, {
356 /* sentinel */
357 }
358 };
359
360 static const struct hclge_hw_error hclge_ppp_mpf_abnormal_int_st3[] = {
361 {
362 .int_msk = BIT(0),
363 .msg = "hfs_fifo_mem_ecc_mbit_err",
364 .reset_level = HNAE3_GLOBAL_RESET
365 }, {
366 .int_msk = BIT(1),
367 .msg = "rslt_descr_fifo_mem_ecc_mbit_err",
368 .reset_level = HNAE3_GLOBAL_RESET
369 }, {
370 .int_msk = BIT(2),
371 .msg = "tx_vlan_tag_mem_ecc_mbit_err",
372 .reset_level = HNAE3_GLOBAL_RESET
373 }, {
374 .int_msk = BIT(3),
375 .msg = "FD_CN0_memory_ecc_mbit_err",
376 .reset_level = HNAE3_GLOBAL_RESET
377 }, {
378 .int_msk = BIT(4),
379 .msg = "FD_CN1_memory_ecc_mbit_err",
380 .reset_level = HNAE3_GLOBAL_RESET
381 }, {
382 .int_msk = BIT(5),
383 .msg = "GRO_AD_memory_ecc_mbit_err",
384 .reset_level = HNAE3_GLOBAL_RESET
385 }, {
386 /* sentinel */
387 }
388 };
389
390 static const struct hclge_hw_error hclge_tm_sch_rint[] = {
391 {
392 .int_msk = BIT(1),
393 .msg = "tm_sch_ecc_mbit_err",
394 .reset_level = HNAE3_GLOBAL_RESET
395 }, {
396 .int_msk = BIT(2),
397 .msg = "tm_sch_port_shap_sub_fifo_wr_err",
398 .reset_level = HNAE3_GLOBAL_RESET
399 }, {
400 .int_msk = BIT(3),
401 .msg = "tm_sch_port_shap_sub_fifo_rd_err",
402 .reset_level = HNAE3_GLOBAL_RESET
403 }, {
404 .int_msk = BIT(4),
405 .msg = "tm_sch_pg_pshap_sub_fifo_wr_err",
406 .reset_level = HNAE3_GLOBAL_RESET
407 }, {
408 .int_msk = BIT(5),
409 .msg = "tm_sch_pg_pshap_sub_fifo_rd_err",
410 .reset_level = HNAE3_GLOBAL_RESET
411 }, {
412 .int_msk = BIT(6),
413 .msg = "tm_sch_pg_cshap_sub_fifo_wr_err",
414 .reset_level = HNAE3_GLOBAL_RESET
415 }, {
416 .int_msk = BIT(7),
417 .msg = "tm_sch_pg_cshap_sub_fifo_rd_err",
418 .reset_level = HNAE3_GLOBAL_RESET
419 }, {
420 .int_msk = BIT(8),
421 .msg = "tm_sch_pri_pshap_sub_fifo_wr_err",
422 .reset_level = HNAE3_GLOBAL_RESET
423 }, {
424 .int_msk = BIT(9),
425 .msg = "tm_sch_pri_pshap_sub_fifo_rd_err",
426 .reset_level = HNAE3_GLOBAL_RESET
427 }, {
428 .int_msk = BIT(10),
429 .msg = "tm_sch_pri_cshap_sub_fifo_wr_err",
430 .reset_level = HNAE3_GLOBAL_RESET
431 }, {
432 .int_msk = BIT(11),
433 .msg = "tm_sch_pri_cshap_sub_fifo_rd_err",
434 .reset_level = HNAE3_GLOBAL_RESET
435 }, {
436 .int_msk = BIT(12),
437 .msg = "tm_sch_port_shap_offset_fifo_wr_err",
438 .reset_level = HNAE3_GLOBAL_RESET
439 }, {
440 .int_msk = BIT(13),
441 .msg = "tm_sch_port_shap_offset_fifo_rd_err",
442 .reset_level = HNAE3_GLOBAL_RESET
443 }, {
444 .int_msk = BIT(14),
445 .msg = "tm_sch_pg_pshap_offset_fifo_wr_err",
446 .reset_level = HNAE3_GLOBAL_RESET
447 }, {
448 .int_msk = BIT(15),
449 .msg = "tm_sch_pg_pshap_offset_fifo_rd_err",
450 .reset_level = HNAE3_GLOBAL_RESET
451 }, {
452 .int_msk = BIT(16),
453 .msg = "tm_sch_pg_cshap_offset_fifo_wr_err",
454 .reset_level = HNAE3_GLOBAL_RESET
455 }, {
456 .int_msk = BIT(17),
457 .msg = "tm_sch_pg_cshap_offset_fifo_rd_err",
458 .reset_level = HNAE3_GLOBAL_RESET
459 }, {
460 .int_msk = BIT(18),
461 .msg = "tm_sch_pri_pshap_offset_fifo_wr_err",
462 .reset_level = HNAE3_GLOBAL_RESET
463 }, {
464 .int_msk = BIT(19),
465 .msg = "tm_sch_pri_pshap_offset_fifo_rd_err",
466 .reset_level = HNAE3_GLOBAL_RESET
467 }, {
468 .int_msk = BIT(20),
469 .msg = "tm_sch_pri_cshap_offset_fifo_wr_err",
470 .reset_level = HNAE3_GLOBAL_RESET
471 }, {
472 .int_msk = BIT(21),
473 .msg = "tm_sch_pri_cshap_offset_fifo_rd_err",
474 .reset_level = HNAE3_GLOBAL_RESET
475 }, {
476 .int_msk = BIT(22),
477 .msg = "tm_sch_rq_fifo_wr_err",
478 .reset_level = HNAE3_GLOBAL_RESET
479 }, {
480 .int_msk = BIT(23),
481 .msg = "tm_sch_rq_fifo_rd_err",
482 .reset_level = HNAE3_GLOBAL_RESET
483 }, {
484 .int_msk = BIT(24),
485 .msg = "tm_sch_nq_fifo_wr_err",
486 .reset_level = HNAE3_GLOBAL_RESET
487 }, {
488 .int_msk = BIT(25),
489 .msg = "tm_sch_nq_fifo_rd_err",
490 .reset_level = HNAE3_GLOBAL_RESET
491 }, {
492 .int_msk = BIT(26),
493 .msg = "tm_sch_roce_up_fifo_wr_err",
494 .reset_level = HNAE3_GLOBAL_RESET
495 }, {
496 .int_msk = BIT(27),
497 .msg = "tm_sch_roce_up_fifo_rd_err",
498 .reset_level = HNAE3_GLOBAL_RESET
499 }, {
500 .int_msk = BIT(28),
501 .msg = "tm_sch_rcb_byte_fifo_wr_err",
502 .reset_level = HNAE3_GLOBAL_RESET
503 }, {
504 .int_msk = BIT(29),
505 .msg = "tm_sch_rcb_byte_fifo_rd_err",
506 .reset_level = HNAE3_GLOBAL_RESET
507 }, {
508 .int_msk = BIT(30),
509 .msg = "tm_sch_ssu_byte_fifo_wr_err",
510 .reset_level = HNAE3_GLOBAL_RESET
511 }, {
512 .int_msk = BIT(31),
513 .msg = "tm_sch_ssu_byte_fifo_rd_err",
514 .reset_level = HNAE3_GLOBAL_RESET
515 }, {
516 /* sentinel */
517 }
518 };
519
520 static const struct hclge_hw_error hclge_qcn_fifo_rint[] = {
521 {
522 .int_msk = BIT(0),
523 .msg = "qcn_shap_gp0_sch_fifo_rd_err",
524 .reset_level = HNAE3_GLOBAL_RESET
525 }, {
526 .int_msk = BIT(1),
527 .msg = "qcn_shap_gp0_sch_fifo_wr_err",
528 .reset_level = HNAE3_GLOBAL_RESET
529 }, {
530 .int_msk = BIT(2),
531 .msg = "qcn_shap_gp1_sch_fifo_rd_err",
532 .reset_level = HNAE3_GLOBAL_RESET
533 }, {
534 .int_msk = BIT(3),
535 .msg = "qcn_shap_gp1_sch_fifo_wr_err",
536 .reset_level = HNAE3_GLOBAL_RESET
537 }, {
538 .int_msk = BIT(4),
539 .msg = "qcn_shap_gp2_sch_fifo_rd_err",
540 .reset_level = HNAE3_GLOBAL_RESET
541 }, {
542 .int_msk = BIT(5),
543 .msg = "qcn_shap_gp2_sch_fifo_wr_err",
544 .reset_level = HNAE3_GLOBAL_RESET
545 }, {
546 .int_msk = BIT(6),
547 .msg = "qcn_shap_gp3_sch_fifo_rd_err",
548 .reset_level = HNAE3_GLOBAL_RESET
549 }, {
550 .int_msk = BIT(7),
551 .msg = "qcn_shap_gp3_sch_fifo_wr_err",
552 .reset_level = HNAE3_GLOBAL_RESET
553 }, {
554 .int_msk = BIT(8),
555 .msg = "qcn_shap_gp0_offset_fifo_rd_err",
556 .reset_level = HNAE3_GLOBAL_RESET
557 }, {
558 .int_msk = BIT(9),
559 .msg = "qcn_shap_gp0_offset_fifo_wr_err",
560 .reset_level = HNAE3_GLOBAL_RESET
561 }, {
562 .int_msk = BIT(10),
563 .msg = "qcn_shap_gp1_offset_fifo_rd_err",
564 .reset_level = HNAE3_GLOBAL_RESET
565 }, {
566 .int_msk = BIT(11),
567 .msg = "qcn_shap_gp1_offset_fifo_wr_err",
568 .reset_level = HNAE3_GLOBAL_RESET
569 }, {
570 .int_msk = BIT(12),
571 .msg = "qcn_shap_gp2_offset_fifo_rd_err",
572 .reset_level = HNAE3_GLOBAL_RESET
573 }, {
574 .int_msk = BIT(13),
575 .msg = "qcn_shap_gp2_offset_fifo_wr_err",
576 .reset_level = HNAE3_GLOBAL_RESET
577 }, {
578 .int_msk = BIT(14),
579 .msg = "qcn_shap_gp3_offset_fifo_rd_err",
580 .reset_level = HNAE3_GLOBAL_RESET
581 }, {
582 .int_msk = BIT(15),
583 .msg = "qcn_shap_gp3_offset_fifo_wr_err",
584 .reset_level = HNAE3_GLOBAL_RESET
585 }, {
586 .int_msk = BIT(16),
587 .msg = "qcn_byte_info_fifo_rd_err",
588 .reset_level = HNAE3_GLOBAL_RESET
589 }, {
590 .int_msk = BIT(17),
591 .msg = "qcn_byte_info_fifo_wr_err",
592 .reset_level = HNAE3_GLOBAL_RESET
593 }, {
594 /* sentinel */
595 }
596 };
597
598 static const struct hclge_hw_error hclge_qcn_ecc_rint[] = {
599 {
600 .int_msk = BIT(1),
601 .msg = "qcn_byte_mem_ecc_mbit_err",
602 .reset_level = HNAE3_GLOBAL_RESET
603 }, {
604 .int_msk = BIT(3),
605 .msg = "qcn_time_mem_ecc_mbit_err",
606 .reset_level = HNAE3_GLOBAL_RESET
607 }, {
608 .int_msk = BIT(5),
609 .msg = "qcn_fb_mem_ecc_mbit_err",
610 .reset_level = HNAE3_GLOBAL_RESET
611 }, {
612 .int_msk = BIT(7),
613 .msg = "qcn_link_mem_ecc_mbit_err",
614 .reset_level = HNAE3_GLOBAL_RESET
615 }, {
616 .int_msk = BIT(9),
617 .msg = "qcn_rate_mem_ecc_mbit_err",
618 .reset_level = HNAE3_GLOBAL_RESET
619 }, {
620 .int_msk = BIT(11),
621 .msg = "qcn_tmplt_mem_ecc_mbit_err",
622 .reset_level = HNAE3_GLOBAL_RESET
623 }, {
624 .int_msk = BIT(13),
625 .msg = "qcn_shap_cfg_mem_ecc_mbit_err",
626 .reset_level = HNAE3_GLOBAL_RESET
627 }, {
628 .int_msk = BIT(15),
629 .msg = "qcn_gp0_barrel_mem_ecc_mbit_err",
630 .reset_level = HNAE3_GLOBAL_RESET
631 }, {
632 .int_msk = BIT(17),
633 .msg = "qcn_gp1_barrel_mem_ecc_mbit_err",
634 .reset_level = HNAE3_GLOBAL_RESET
635 }, {
636 .int_msk = BIT(19),
637 .msg = "qcn_gp2_barrel_mem_ecc_mbit_err",
638 .reset_level = HNAE3_GLOBAL_RESET
639 }, {
640 .int_msk = BIT(21),
641 .msg = "qcn_gp3_barral_mem_ecc_mbit_err",
642 .reset_level = HNAE3_GLOBAL_RESET
643 }, {
644 /* sentinel */
645 }
646 };
647
648 static const struct hclge_hw_error hclge_mac_afifo_tnl_int[] = {
649 {
650 .int_msk = BIT(0),
651 .msg = "egu_cge_afifo_ecc_1bit_err",
652 .reset_level = HNAE3_NONE_RESET
653 }, {
654 .int_msk = BIT(1),
655 .msg = "egu_cge_afifo_ecc_mbit_err",
656 .reset_level = HNAE3_GLOBAL_RESET
657 }, {
658 .int_msk = BIT(2),
659 .msg = "egu_lge_afifo_ecc_1bit_err",
660 .reset_level = HNAE3_NONE_RESET
661 }, {
662 .int_msk = BIT(3),
663 .msg = "egu_lge_afifo_ecc_mbit_err",
664 .reset_level = HNAE3_GLOBAL_RESET
665 }, {
666 .int_msk = BIT(4),
667 .msg = "cge_igu_afifo_ecc_1bit_err",
668 .reset_level = HNAE3_NONE_RESET
669 }, {
670 .int_msk = BIT(5),
671 .msg = "cge_igu_afifo_ecc_mbit_err",
672 .reset_level = HNAE3_GLOBAL_RESET
673 }, {
674 .int_msk = BIT(6),
675 .msg = "lge_igu_afifo_ecc_1bit_err",
676 .reset_level = HNAE3_NONE_RESET
677 }, {
678 .int_msk = BIT(7),
679 .msg = "lge_igu_afifo_ecc_mbit_err",
680 .reset_level = HNAE3_GLOBAL_RESET
681 }, {
682 .int_msk = BIT(8),
683 .msg = "cge_igu_afifo_overflow_err",
684 .reset_level = HNAE3_GLOBAL_RESET
685 }, {
686 .int_msk = BIT(9),
687 .msg = "lge_igu_afifo_overflow_err",
688 .reset_level = HNAE3_GLOBAL_RESET
689 }, {
690 .int_msk = BIT(10),
691 .msg = "egu_cge_afifo_underrun_err",
692 .reset_level = HNAE3_GLOBAL_RESET
693 }, {
694 .int_msk = BIT(11),
695 .msg = "egu_lge_afifo_underrun_err",
696 .reset_level = HNAE3_GLOBAL_RESET
697 }, {
698 .int_msk = BIT(12),
699 .msg = "egu_ge_afifo_underrun_err",
700 .reset_level = HNAE3_GLOBAL_RESET
701 }, {
702 .int_msk = BIT(13),
703 .msg = "ge_igu_afifo_overflow_err",
704 .reset_level = HNAE3_GLOBAL_RESET
705 }, {
706 /* sentinel */
707 }
708 };
709
710 static const struct hclge_hw_error hclge_ppu_mpf_abnormal_int_st2[] = {
711 {
712 .int_msk = BIT(13),
713 .msg = "rpu_rx_pkt_bit32_ecc_mbit_err",
714 .reset_level = HNAE3_GLOBAL_RESET
715 }, {
716 .int_msk = BIT(14),
717 .msg = "rpu_rx_pkt_bit33_ecc_mbit_err",
718 .reset_level = HNAE3_GLOBAL_RESET
719 }, {
720 .int_msk = BIT(15),
721 .msg = "rpu_rx_pkt_bit34_ecc_mbit_err",
722 .reset_level = HNAE3_GLOBAL_RESET
723 }, {
724 .int_msk = BIT(16),
725 .msg = "rpu_rx_pkt_bit35_ecc_mbit_err",
726 .reset_level = HNAE3_GLOBAL_RESET
727 }, {
728 .int_msk = BIT(17),
729 .msg = "rcb_tx_ring_ecc_mbit_err",
730 .reset_level = HNAE3_GLOBAL_RESET
731 }, {
732 .int_msk = BIT(18),
733 .msg = "rcb_rx_ring_ecc_mbit_err",
734 .reset_level = HNAE3_GLOBAL_RESET
735 }, {
736 .int_msk = BIT(19),
737 .msg = "rcb_tx_fbd_ecc_mbit_err",
738 .reset_level = HNAE3_GLOBAL_RESET
739 }, {
740 .int_msk = BIT(20),
741 .msg = "rcb_rx_ebd_ecc_mbit_err",
742 .reset_level = HNAE3_GLOBAL_RESET
743 }, {
744 .int_msk = BIT(21),
745 .msg = "rcb_tso_info_ecc_mbit_err",
746 .reset_level = HNAE3_GLOBAL_RESET
747 }, {
748 .int_msk = BIT(22),
749 .msg = "rcb_tx_int_info_ecc_mbit_err",
750 .reset_level = HNAE3_GLOBAL_RESET
751 }, {
752 .int_msk = BIT(23),
753 .msg = "rcb_rx_int_info_ecc_mbit_err",
754 .reset_level = HNAE3_GLOBAL_RESET
755 }, {
756 .int_msk = BIT(24),
757 .msg = "tpu_tx_pkt_0_ecc_mbit_err",
758 .reset_level = HNAE3_GLOBAL_RESET
759 }, {
760 .int_msk = BIT(25),
761 .msg = "tpu_tx_pkt_1_ecc_mbit_err",
762 .reset_level = HNAE3_GLOBAL_RESET
763 }, {
764 .int_msk = BIT(26),
765 .msg = "rd_bus_err",
766 .reset_level = HNAE3_GLOBAL_RESET
767 }, {
768 .int_msk = BIT(27),
769 .msg = "wr_bus_err",
770 .reset_level = HNAE3_GLOBAL_RESET
771 }, {
772 .int_msk = BIT(28),
773 .msg = "reg_search_miss",
774 .reset_level = HNAE3_GLOBAL_RESET
775 }, {
776 .int_msk = BIT(29),
777 .msg = "rx_q_search_miss",
778 .reset_level = HNAE3_NONE_RESET
779 }, {
780 .int_msk = BIT(30),
781 .msg = "ooo_ecc_err_detect",
782 .reset_level = HNAE3_NONE_RESET
783 }, {
784 .int_msk = BIT(31),
785 .msg = "ooo_ecc_err_multpl",
786 .reset_level = HNAE3_GLOBAL_RESET
787 }, {
788 /* sentinel */
789 }
790 };
791
792 static const struct hclge_hw_error hclge_ppu_mpf_abnormal_int_st3[] = {
793 {
794 .int_msk = BIT(4),
795 .msg = "gro_bd_ecc_mbit_err",
796 .reset_level = HNAE3_GLOBAL_RESET
797 }, {
798 .int_msk = BIT(5),
799 .msg = "gro_context_ecc_mbit_err",
800 .reset_level = HNAE3_GLOBAL_RESET
801 }, {
802 .int_msk = BIT(6),
803 .msg = "rx_stash_cfg_ecc_mbit_err",
804 .reset_level = HNAE3_GLOBAL_RESET
805 }, {
806 .int_msk = BIT(7),
807 .msg = "axi_rd_fbd_ecc_mbit_err",
808 .reset_level = HNAE3_GLOBAL_RESET
809 }, {
810 /* sentinel */
811 }
812 };
813
814 static const struct hclge_hw_error hclge_ppu_pf_abnormal_int[] = {
815 {
816 .int_msk = BIT(0),
817 .msg = "over_8bd_no_fe",
818 .reset_level = HNAE3_FUNC_RESET
819 }, {
820 .int_msk = BIT(1),
821 .msg = "tso_mss_cmp_min_err",
822 .reset_level = HNAE3_NONE_RESET
823 }, {
824 .int_msk = BIT(2),
825 .msg = "tso_mss_cmp_max_err",
826 .reset_level = HNAE3_NONE_RESET
827 }, {
828 .int_msk = BIT(3),
829 .msg = "tx_rd_fbd_poison",
830 .reset_level = HNAE3_FUNC_RESET
831 }, {
832 .int_msk = BIT(4),
833 .msg = "rx_rd_ebd_poison",
834 .reset_level = HNAE3_FUNC_RESET
835 }, {
836 .int_msk = BIT(5),
837 .msg = "buf_wait_timeout",
838 .reset_level = HNAE3_NONE_RESET
839 }, {
840 /* sentinel */
841 }
842 };
843
844 static const struct hclge_hw_error hclge_ssu_com_err_int[] = {
845 {
846 .int_msk = BIT(0),
847 .msg = "buf_sum_err",
848 .reset_level = HNAE3_NONE_RESET
849 }, {
850 .int_msk = BIT(1),
851 .msg = "ppp_mb_num_err",
852 .reset_level = HNAE3_NONE_RESET
853 }, {
854 .int_msk = BIT(2),
855 .msg = "ppp_mbid_err",
856 .reset_level = HNAE3_GLOBAL_RESET
857 }, {
858 .int_msk = BIT(3),
859 .msg = "ppp_rlt_mac_err",
860 .reset_level = HNAE3_GLOBAL_RESET
861 }, {
862 .int_msk = BIT(4),
863 .msg = "ppp_rlt_host_err",
864 .reset_level = HNAE3_GLOBAL_RESET
865 }, {
866 .int_msk = BIT(5),
867 .msg = "cks_edit_position_err",
868 .reset_level = HNAE3_GLOBAL_RESET
869 }, {
870 .int_msk = BIT(6),
871 .msg = "cks_edit_condition_err",
872 .reset_level = HNAE3_GLOBAL_RESET
873 }, {
874 .int_msk = BIT(7),
875 .msg = "vlan_edit_condition_err",
876 .reset_level = HNAE3_GLOBAL_RESET
877 }, {
878 .int_msk = BIT(8),
879 .msg = "vlan_num_ot_err",
880 .reset_level = HNAE3_GLOBAL_RESET
881 }, {
882 .int_msk = BIT(9),
883 .msg = "vlan_num_in_err",
884 .reset_level = HNAE3_GLOBAL_RESET
885 }, {
886 /* sentinel */
887 }
888 };
889
890 #define HCLGE_SSU_MEM_ECC_ERR(x) \
891 { \
892 .int_msk = BIT(x), \
893 .msg = "ssu_mem" #x "_ecc_mbit_err", \
894 .reset_level = HNAE3_GLOBAL_RESET \
895 }
896
897 static const struct hclge_hw_error hclge_ssu_mem_ecc_err_int[] = {
898 HCLGE_SSU_MEM_ECC_ERR(0),
899 HCLGE_SSU_MEM_ECC_ERR(1),
900 HCLGE_SSU_MEM_ECC_ERR(2),
901 HCLGE_SSU_MEM_ECC_ERR(3),
902 HCLGE_SSU_MEM_ECC_ERR(4),
903 HCLGE_SSU_MEM_ECC_ERR(5),
904 HCLGE_SSU_MEM_ECC_ERR(6),
905 HCLGE_SSU_MEM_ECC_ERR(7),
906 HCLGE_SSU_MEM_ECC_ERR(8),
907 HCLGE_SSU_MEM_ECC_ERR(9),
908 HCLGE_SSU_MEM_ECC_ERR(10),
909 HCLGE_SSU_MEM_ECC_ERR(11),
910 HCLGE_SSU_MEM_ECC_ERR(12),
911 HCLGE_SSU_MEM_ECC_ERR(13),
912 HCLGE_SSU_MEM_ECC_ERR(14),
913 HCLGE_SSU_MEM_ECC_ERR(15),
914 HCLGE_SSU_MEM_ECC_ERR(16),
915 HCLGE_SSU_MEM_ECC_ERR(17),
916 HCLGE_SSU_MEM_ECC_ERR(18),
917 HCLGE_SSU_MEM_ECC_ERR(19),
918 HCLGE_SSU_MEM_ECC_ERR(20),
919 HCLGE_SSU_MEM_ECC_ERR(21),
920 HCLGE_SSU_MEM_ECC_ERR(22),
921 HCLGE_SSU_MEM_ECC_ERR(23),
922 HCLGE_SSU_MEM_ECC_ERR(24),
923 HCLGE_SSU_MEM_ECC_ERR(25),
924 HCLGE_SSU_MEM_ECC_ERR(26),
925 HCLGE_SSU_MEM_ECC_ERR(27),
926 HCLGE_SSU_MEM_ECC_ERR(28),
927 HCLGE_SSU_MEM_ECC_ERR(29),
928 HCLGE_SSU_MEM_ECC_ERR(30),
929 HCLGE_SSU_MEM_ECC_ERR(31),
930 { /* sentinel */ }
931 };
932
933 static const struct hclge_hw_error hclge_ssu_port_based_err_int[] = {
934 {
935 .int_msk = BIT(0),
936 .msg = "roc_pkt_without_key_port",
937 .reset_level = HNAE3_FUNC_RESET
938 }, {
939 .int_msk = BIT(1),
940 .msg = "tpu_pkt_without_key_port",
941 .reset_level = HNAE3_GLOBAL_RESET
942 }, {
943 .int_msk = BIT(2),
944 .msg = "igu_pkt_without_key_port",
945 .reset_level = HNAE3_GLOBAL_RESET
946 }, {
947 .int_msk = BIT(3),
948 .msg = "roc_eof_mis_match_port",
949 .reset_level = HNAE3_GLOBAL_RESET
950 }, {
951 .int_msk = BIT(4),
952 .msg = "tpu_eof_mis_match_port",
953 .reset_level = HNAE3_GLOBAL_RESET
954 }, {
955 .int_msk = BIT(5),
956 .msg = "igu_eof_mis_match_port",
957 .reset_level = HNAE3_GLOBAL_RESET
958 }, {
959 .int_msk = BIT(6),
960 .msg = "roc_sof_mis_match_port",
961 .reset_level = HNAE3_GLOBAL_RESET
962 }, {
963 .int_msk = BIT(7),
964 .msg = "tpu_sof_mis_match_port",
965 .reset_level = HNAE3_GLOBAL_RESET
966 }, {
967 .int_msk = BIT(8),
968 .msg = "igu_sof_mis_match_port",
969 .reset_level = HNAE3_GLOBAL_RESET
970 }, {
971 .int_msk = BIT(11),
972 .msg = "ets_rd_int_rx_port",
973 .reset_level = HNAE3_GLOBAL_RESET
974 }, {
975 .int_msk = BIT(12),
976 .msg = "ets_wr_int_rx_port",
977 .reset_level = HNAE3_GLOBAL_RESET
978 }, {
979 .int_msk = BIT(13),
980 .msg = "ets_rd_int_tx_port",
981 .reset_level = HNAE3_GLOBAL_RESET
982 }, {
983 .int_msk = BIT(14),
984 .msg = "ets_wr_int_tx_port",
985 .reset_level = HNAE3_GLOBAL_RESET
986 }, {
987 /* sentinel */
988 }
989 };
990
991 static const struct hclge_hw_error hclge_ssu_fifo_overflow_int[] = {
992 {
993 .int_msk = BIT(0),
994 .msg = "ig_mac_inf_int",
995 .reset_level = HNAE3_GLOBAL_RESET
996 }, {
997 .int_msk = BIT(1),
998 .msg = "ig_host_inf_int",
999 .reset_level = HNAE3_GLOBAL_RESET
1000 }, {
1001 .int_msk = BIT(2),
1002 .msg = "ig_roc_buf_int",
1003 .reset_level = HNAE3_GLOBAL_RESET
1004 }, {
1005 .int_msk = BIT(3),
1006 .msg = "ig_host_data_fifo_int",
1007 .reset_level = HNAE3_GLOBAL_RESET
1008 }, {
1009 .int_msk = BIT(4),
1010 .msg = "ig_host_key_fifo_int",
1011 .reset_level = HNAE3_GLOBAL_RESET
1012 }, {
1013 .int_msk = BIT(5),
1014 .msg = "tx_qcn_fifo_int",
1015 .reset_level = HNAE3_GLOBAL_RESET
1016 }, {
1017 .int_msk = BIT(6),
1018 .msg = "rx_qcn_fifo_int",
1019 .reset_level = HNAE3_GLOBAL_RESET
1020 }, {
1021 .int_msk = BIT(7),
1022 .msg = "tx_pf_rd_fifo_int",
1023 .reset_level = HNAE3_GLOBAL_RESET
1024 }, {
1025 .int_msk = BIT(8),
1026 .msg = "rx_pf_rd_fifo_int",
1027 .reset_level = HNAE3_GLOBAL_RESET
1028 }, {
1029 .int_msk = BIT(9),
1030 .msg = "qm_eof_fifo_int",
1031 .reset_level = HNAE3_GLOBAL_RESET
1032 }, {
1033 .int_msk = BIT(10),
1034 .msg = "mb_rlt_fifo_int",
1035 .reset_level = HNAE3_GLOBAL_RESET
1036 }, {
1037 .int_msk = BIT(11),
1038 .msg = "dup_uncopy_fifo_int",
1039 .reset_level = HNAE3_GLOBAL_RESET
1040 }, {
1041 .int_msk = BIT(12),
1042 .msg = "dup_cnt_rd_fifo_int",
1043 .reset_level = HNAE3_GLOBAL_RESET
1044 }, {
1045 .int_msk = BIT(13),
1046 .msg = "dup_cnt_drop_fifo_int",
1047 .reset_level = HNAE3_GLOBAL_RESET
1048 }, {
1049 .int_msk = BIT(14),
1050 .msg = "dup_cnt_wrb_fifo_int",
1051 .reset_level = HNAE3_GLOBAL_RESET
1052 }, {
1053 .int_msk = BIT(15),
1054 .msg = "host_cmd_fifo_int",
1055 .reset_level = HNAE3_GLOBAL_RESET
1056 }, {
1057 .int_msk = BIT(16),
1058 .msg = "mac_cmd_fifo_int",
1059 .reset_level = HNAE3_GLOBAL_RESET
1060 }, {
1061 .int_msk = BIT(17),
1062 .msg = "host_cmd_bitmap_empty_int",
1063 .reset_level = HNAE3_GLOBAL_RESET
1064 }, {
1065 .int_msk = BIT(18),
1066 .msg = "mac_cmd_bitmap_empty_int",
1067 .reset_level = HNAE3_GLOBAL_RESET
1068 }, {
1069 .int_msk = BIT(19),
1070 .msg = "dup_bitmap_empty_int",
1071 .reset_level = HNAE3_GLOBAL_RESET
1072 }, {
1073 .int_msk = BIT(20),
1074 .msg = "out_queue_bitmap_empty_int",
1075 .reset_level = HNAE3_GLOBAL_RESET
1076 }, {
1077 .int_msk = BIT(21),
1078 .msg = "bank2_bitmap_empty_int",
1079 .reset_level = HNAE3_GLOBAL_RESET
1080 }, {
1081 .int_msk = BIT(22),
1082 .msg = "bank1_bitmap_empty_int",
1083 .reset_level = HNAE3_GLOBAL_RESET
1084 }, {
1085 .int_msk = BIT(23),
1086 .msg = "bank0_bitmap_empty_int",
1087 .reset_level = HNAE3_GLOBAL_RESET
1088 }, {
1089 /* sentinel */
1090 }
1091 };
1092
1093 static const struct hclge_hw_error hclge_ssu_ets_tcg_int[] = {
1094 {
1095 .int_msk = BIT(0),
1096 .msg = "ets_rd_int_rx_tcg",
1097 .reset_level = HNAE3_GLOBAL_RESET
1098 }, {
1099 .int_msk = BIT(1),
1100 .msg = "ets_wr_int_rx_tcg",
1101 .reset_level = HNAE3_GLOBAL_RESET
1102 }, {
1103 .int_msk = BIT(2),
1104 .msg = "ets_rd_int_tx_tcg",
1105 .reset_level = HNAE3_GLOBAL_RESET
1106 }, {
1107 .int_msk = BIT(3),
1108 .msg = "ets_wr_int_tx_tcg",
1109 .reset_level = HNAE3_GLOBAL_RESET
1110 }, {
1111 /* sentinel */
1112 }
1113 };
1114
1115 static const struct hclge_hw_error hclge_ssu_port_based_pf_int[] = {
1116 {
1117 .int_msk = BIT(0),
1118 .msg = "roc_pkt_without_key_port",
1119 .reset_level = HNAE3_FUNC_RESET
1120 }, {
1121 .int_msk = BIT(9),
1122 .msg = "low_water_line_err_port",
1123 .reset_level = HNAE3_NONE_RESET
1124 }, {
1125 .int_msk = BIT(10),
1126 .msg = "hi_water_line_err_port",
1127 .reset_level = HNAE3_GLOBAL_RESET
1128 }, {
1129 /* sentinel */
1130 }
1131 };
1132
1133 static const struct hclge_hw_error hclge_rocee_qmm_ovf_err_int[] = {
1134 {
1135 .int_msk = 0,
1136 .msg = "rocee qmm ovf: sgid invalid err"
1137 }, {
1138 .int_msk = 0x4,
1139 .msg = "rocee qmm ovf: sgid ovf err"
1140 }, {
1141 .int_msk = 0x8,
1142 .msg = "rocee qmm ovf: smac invalid err"
1143 }, {
1144 .int_msk = 0xC,
1145 .msg = "rocee qmm ovf: smac ovf err"
1146 }, {
1147 .int_msk = 0x10,
1148 .msg = "rocee qmm ovf: cqc invalid err"
1149 }, {
1150 .int_msk = 0x11,
1151 .msg = "rocee qmm ovf: cqc ovf err"
1152 }, {
1153 .int_msk = 0x12,
1154 .msg = "rocee qmm ovf: cqc hopnum err"
1155 }, {
1156 .int_msk = 0x13,
1157 .msg = "rocee qmm ovf: cqc ba0 err"
1158 }, {
1159 .int_msk = 0x14,
1160 .msg = "rocee qmm ovf: srqc invalid err"
1161 }, {
1162 .int_msk = 0x15,
1163 .msg = "rocee qmm ovf: srqc ovf err"
1164 }, {
1165 .int_msk = 0x16,
1166 .msg = "rocee qmm ovf: srqc hopnum err"
1167 }, {
1168 .int_msk = 0x17,
1169 .msg = "rocee qmm ovf: srqc ba0 err"
1170 }, {
1171 .int_msk = 0x18,
1172 .msg = "rocee qmm ovf: mpt invalid err"
1173 }, {
1174 .int_msk = 0x19,
1175 .msg = "rocee qmm ovf: mpt ovf err"
1176 }, {
1177 .int_msk = 0x1A,
1178 .msg = "rocee qmm ovf: mpt hopnum err"
1179 }, {
1180 .int_msk = 0x1B,
1181 .msg = "rocee qmm ovf: mpt ba0 err"
1182 }, {
1183 .int_msk = 0x1C,
1184 .msg = "rocee qmm ovf: qpc invalid err"
1185 }, {
1186 .int_msk = 0x1D,
1187 .msg = "rocee qmm ovf: qpc ovf err"
1188 }, {
1189 .int_msk = 0x1E,
1190 .msg = "rocee qmm ovf: qpc hopnum err"
1191 }, {
1192 .int_msk = 0x1F,
1193 .msg = "rocee qmm ovf: qpc ba0 err"
1194 }, {
1195 /* sentinel */
1196 }
1197 };
1198
1199 static const struct hclge_hw_module_id hclge_hw_module_id_st[] = {
1200 {
1201 .module_id = MODULE_NONE,
1202 .msg = "MODULE_NONE"
1203 }, {
1204 .module_id = MODULE_BIOS_COMMON,
1205 .msg = "MODULE_BIOS_COMMON"
1206 }, {
1207 .module_id = MODULE_GE,
1208 .msg = "MODULE_GE"
1209 }, {
1210 .module_id = MODULE_IGU_EGU,
1211 .msg = "MODULE_IGU_EGU"
1212 }, {
1213 .module_id = MODULE_LGE,
1214 .msg = "MODULE_LGE"
1215 }, {
1216 .module_id = MODULE_NCSI,
1217 .msg = "MODULE_NCSI"
1218 }, {
1219 .module_id = MODULE_PPP,
1220 .msg = "MODULE_PPP"
1221 }, {
1222 .module_id = MODULE_QCN,
1223 .msg = "MODULE_QCN"
1224 }, {
1225 .module_id = MODULE_RCB_RX,
1226 .msg = "MODULE_RCB_RX"
1227 }, {
1228 .module_id = MODULE_RTC,
1229 .msg = "MODULE_RTC"
1230 }, {
1231 .module_id = MODULE_SSU,
1232 .msg = "MODULE_SSU"
1233 }, {
1234 .module_id = MODULE_TM,
1235 .msg = "MODULE_TM"
1236 }, {
1237 .module_id = MODULE_RCB_TX,
1238 .msg = "MODULE_RCB_TX"
1239 }, {
1240 .module_id = MODULE_TXDMA,
1241 .msg = "MODULE_TXDMA"
1242 }, {
1243 .module_id = MODULE_MASTER,
1244 .msg = "MODULE_MASTER"
1245 }, {
1246 .module_id = MODULE_HIMAC,
1247 .msg = "MODULE_HIMAC"
1248 }, {
1249 .module_id = MODULE_ROCEE_TOP,
1250 .msg = "MODULE_ROCEE_TOP"
1251 }, {
1252 .module_id = MODULE_ROCEE_TIMER,
1253 .msg = "MODULE_ROCEE_TIMER"
1254 }, {
1255 .module_id = MODULE_ROCEE_MDB,
1256 .msg = "MODULE_ROCEE_MDB"
1257 }, {
1258 .module_id = MODULE_ROCEE_TSP,
1259 .msg = "MODULE_ROCEE_TSP"
1260 }, {
1261 .module_id = MODULE_ROCEE_TRP,
1262 .msg = "MODULE_ROCEE_TRP"
1263 }, {
1264 .module_id = MODULE_ROCEE_SCC,
1265 .msg = "MODULE_ROCEE_SCC"
1266 }, {
1267 .module_id = MODULE_ROCEE_CAEP,
1268 .msg = "MODULE_ROCEE_CAEP"
1269 }, {
1270 .module_id = MODULE_ROCEE_GEN_AC,
1271 .msg = "MODULE_ROCEE_GEN_AC"
1272 }, {
1273 .module_id = MODULE_ROCEE_QMM,
1274 .msg = "MODULE_ROCEE_QMM"
1275 }, {
1276 .module_id = MODULE_ROCEE_LSAN,
1277 .msg = "MODULE_ROCEE_LSAN"
1278 }
1279 };
1280
1281 static const struct hclge_hw_type_id hclge_hw_type_id_st[] = {
1282 {
1283 .type_id = NONE_ERROR,
1284 .msg = "none_error"
1285 }, {
1286 .type_id = FIFO_ERROR,
1287 .msg = "fifo_error"
1288 }, {
1289 .type_id = MEMORY_ERROR,
1290 .msg = "memory_error"
1291 }, {
1292 .type_id = POISON_ERROR,
1293 .msg = "poison_error"
1294 }, {
1295 .type_id = MSIX_ECC_ERROR,
1296 .msg = "msix_ecc_error"
1297 }, {
1298 .type_id = TQP_INT_ECC_ERROR,
1299 .msg = "tqp_int_ecc_error"
1300 }, {
1301 .type_id = PF_ABNORMAL_INT_ERROR,
1302 .msg = "pf_abnormal_int_error"
1303 }, {
1304 .type_id = MPF_ABNORMAL_INT_ERROR,
1305 .msg = "mpf_abnormal_int_error"
1306 }, {
1307 .type_id = COMMON_ERROR,
1308 .msg = "common_error"
1309 }, {
1310 .type_id = PORT_ERROR,
1311 .msg = "port_error"
1312 }, {
1313 .type_id = ETS_ERROR,
1314 .msg = "ets_error"
1315 }, {
1316 .type_id = NCSI_ERROR,
1317 .msg = "ncsi_error"
1318 }, {
1319 .type_id = GLB_ERROR,
1320 .msg = "glb_error"
1321 }, {
1322 .type_id = LINK_ERROR,
1323 .msg = "link_error"
1324 }, {
1325 .type_id = PTP_ERROR,
1326 .msg = "ptp_error"
1327 }, {
1328 .type_id = ROCEE_NORMAL_ERR,
1329 .msg = "rocee_normal_error"
1330 }, {
1331 .type_id = ROCEE_OVF_ERR,
1332 .msg = "rocee_ovf_error"
1333 }, {
1334 .type_id = ROCEE_BUS_ERR,
1335 .msg = "rocee_bus_error"
1336 },
1337 };
1338
hclge_log_error(struct device * dev,char * reg,const struct hclge_hw_error * err,u32 err_sts,unsigned long * reset_requests)1339 static void hclge_log_error(struct device *dev, char *reg,
1340 const struct hclge_hw_error *err,
1341 u32 err_sts, unsigned long *reset_requests)
1342 {
1343 while (err->msg) {
1344 if (err->int_msk & err_sts) {
1345 dev_err(dev, "%s %s found [error status=0x%x]\n",
1346 reg, err->msg, err_sts);
1347 if (err->reset_level &&
1348 err->reset_level != HNAE3_NONE_RESET)
1349 set_bit(err->reset_level, reset_requests);
1350 }
1351 err++;
1352 }
1353 }
1354
1355 /* hclge_cmd_query_error: read the error information
1356 * @hdev: pointer to struct hclge_dev
1357 * @desc: descriptor for describing the command
1358 * @cmd: command opcode
1359 * @flag: flag for extended command structure
1360 *
1361 * This function query the error info from hw register/s using command
1362 */
hclge_cmd_query_error(struct hclge_dev * hdev,struct hclge_desc * desc,u32 cmd,u16 flag)1363 static int hclge_cmd_query_error(struct hclge_dev *hdev,
1364 struct hclge_desc *desc, u32 cmd, u16 flag)
1365 {
1366 struct device *dev = &hdev->pdev->dev;
1367 int desc_num = 1;
1368 int ret;
1369
1370 hclge_cmd_setup_basic_desc(&desc[0], cmd, true);
1371 if (flag) {
1372 desc[0].flag |= cpu_to_le16(flag);
1373 hclge_cmd_setup_basic_desc(&desc[1], cmd, true);
1374 desc_num = 2;
1375 }
1376
1377 ret = hclge_cmd_send(&hdev->hw, &desc[0], desc_num);
1378 if (ret)
1379 dev_err(dev, "query error cmd failed (%d)\n", ret);
1380
1381 return ret;
1382 }
1383
hclge_clear_mac_tnl_int(struct hclge_dev * hdev)1384 static int hclge_clear_mac_tnl_int(struct hclge_dev *hdev)
1385 {
1386 struct hclge_desc desc;
1387
1388 hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CLEAR_MAC_TNL_INT, false);
1389 desc.data[0] = cpu_to_le32(HCLGE_MAC_TNL_INT_CLR);
1390
1391 return hclge_cmd_send(&hdev->hw, &desc, 1);
1392 }
1393
hclge_config_common_hw_err_int(struct hclge_dev * hdev,bool en)1394 static int hclge_config_common_hw_err_int(struct hclge_dev *hdev, bool en)
1395 {
1396 struct device *dev = &hdev->pdev->dev;
1397 struct hclge_desc desc[2];
1398 int ret;
1399
1400 /* configure common error interrupts */
1401 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_COMMON_ECC_INT_CFG, false);
1402 desc[0].flag |= cpu_to_le16(HCLGE_COMM_CMD_FLAG_NEXT);
1403 hclge_cmd_setup_basic_desc(&desc[1], HCLGE_COMMON_ECC_INT_CFG, false);
1404
1405 if (en) {
1406 desc[0].data[0] = cpu_to_le32(HCLGE_IMP_TCM_ECC_ERR_INT_EN);
1407 desc[0].data[2] = cpu_to_le32(HCLGE_CMDQ_NIC_ECC_ERR_INT_EN |
1408 HCLGE_CMDQ_ROCEE_ECC_ERR_INT_EN);
1409 desc[0].data[3] = cpu_to_le32(HCLGE_IMP_RD_POISON_ERR_INT_EN);
1410 desc[0].data[4] = cpu_to_le32(HCLGE_TQP_ECC_ERR_INT_EN |
1411 HCLGE_MSIX_SRAM_ECC_ERR_INT_EN);
1412 desc[0].data[5] = cpu_to_le32(HCLGE_IMP_ITCM4_ECC_ERR_INT_EN);
1413 }
1414
1415 desc[1].data[0] = cpu_to_le32(HCLGE_IMP_TCM_ECC_ERR_INT_EN_MASK);
1416 desc[1].data[2] = cpu_to_le32(HCLGE_CMDQ_NIC_ECC_ERR_INT_EN_MASK |
1417 HCLGE_CMDQ_ROCEE_ECC_ERR_INT_EN_MASK);
1418 desc[1].data[3] = cpu_to_le32(HCLGE_IMP_RD_POISON_ERR_INT_EN_MASK);
1419 desc[1].data[4] = cpu_to_le32(HCLGE_TQP_ECC_ERR_INT_EN_MASK |
1420 HCLGE_MSIX_SRAM_ECC_ERR_INT_EN_MASK);
1421 desc[1].data[5] = cpu_to_le32(HCLGE_IMP_ITCM4_ECC_ERR_INT_EN_MASK);
1422
1423 ret = hclge_cmd_send(&hdev->hw, &desc[0], 2);
1424 if (ret)
1425 dev_err(dev,
1426 "fail(%d) to configure common err interrupts\n", ret);
1427
1428 return ret;
1429 }
1430
hclge_config_ncsi_hw_err_int(struct hclge_dev * hdev,bool en)1431 static int hclge_config_ncsi_hw_err_int(struct hclge_dev *hdev, bool en)
1432 {
1433 struct device *dev = &hdev->pdev->dev;
1434 struct hclge_desc desc;
1435 int ret;
1436
1437 if (hdev->ae_dev->dev_version < HNAE3_DEVICE_VERSION_V2)
1438 return 0;
1439
1440 /* configure NCSI error interrupts */
1441 hclge_cmd_setup_basic_desc(&desc, HCLGE_NCSI_INT_EN, false);
1442 if (en)
1443 desc.data[0] = cpu_to_le32(HCLGE_NCSI_ERR_INT_EN);
1444
1445 ret = hclge_cmd_send(&hdev->hw, &desc, 1);
1446 if (ret)
1447 dev_err(dev,
1448 "fail(%d) to configure NCSI error interrupts\n", ret);
1449
1450 return ret;
1451 }
1452
hclge_config_igu_egu_hw_err_int(struct hclge_dev * hdev,bool en)1453 static int hclge_config_igu_egu_hw_err_int(struct hclge_dev *hdev, bool en)
1454 {
1455 struct device *dev = &hdev->pdev->dev;
1456 struct hclge_desc desc;
1457 int ret;
1458
1459 /* configure IGU,EGU error interrupts */
1460 hclge_cmd_setup_basic_desc(&desc, HCLGE_IGU_COMMON_INT_EN, false);
1461 desc.data[0] = cpu_to_le32(HCLGE_IGU_ERR_INT_TYPE);
1462 if (en)
1463 desc.data[0] |= cpu_to_le32(HCLGE_IGU_ERR_INT_EN);
1464
1465 desc.data[1] = cpu_to_le32(HCLGE_IGU_ERR_INT_EN_MASK);
1466
1467 ret = hclge_cmd_send(&hdev->hw, &desc, 1);
1468 if (ret) {
1469 dev_err(dev,
1470 "fail(%d) to configure IGU common interrupts\n", ret);
1471 return ret;
1472 }
1473
1474 hclge_cmd_setup_basic_desc(&desc, HCLGE_IGU_EGU_TNL_INT_EN, false);
1475 if (en)
1476 desc.data[0] = cpu_to_le32(HCLGE_IGU_TNL_ERR_INT_EN);
1477
1478 desc.data[1] = cpu_to_le32(HCLGE_IGU_TNL_ERR_INT_EN_MASK);
1479
1480 ret = hclge_cmd_send(&hdev->hw, &desc, 1);
1481 if (ret) {
1482 dev_err(dev,
1483 "fail(%d) to configure IGU-EGU TNL interrupts\n", ret);
1484 return ret;
1485 }
1486
1487 ret = hclge_config_ncsi_hw_err_int(hdev, en);
1488
1489 return ret;
1490 }
1491
hclge_config_ppp_error_interrupt(struct hclge_dev * hdev,u32 cmd,bool en)1492 static int hclge_config_ppp_error_interrupt(struct hclge_dev *hdev, u32 cmd,
1493 bool en)
1494 {
1495 struct device *dev = &hdev->pdev->dev;
1496 struct hclge_desc desc[2];
1497 int ret;
1498
1499 /* configure PPP error interrupts */
1500 hclge_cmd_setup_basic_desc(&desc[0], cmd, false);
1501 desc[0].flag |= cpu_to_le16(HCLGE_COMM_CMD_FLAG_NEXT);
1502 hclge_cmd_setup_basic_desc(&desc[1], cmd, false);
1503
1504 if (cmd == HCLGE_PPP_CMD0_INT_CMD) {
1505 if (en) {
1506 desc[0].data[0] =
1507 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT0_EN);
1508 desc[0].data[1] =
1509 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT1_EN);
1510 desc[0].data[4] = cpu_to_le32(HCLGE_PPP_PF_ERR_INT_EN);
1511 }
1512
1513 desc[1].data[0] =
1514 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT0_EN_MASK);
1515 desc[1].data[1] =
1516 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT1_EN_MASK);
1517 if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2)
1518 desc[1].data[2] =
1519 cpu_to_le32(HCLGE_PPP_PF_ERR_INT_EN_MASK);
1520 } else if (cmd == HCLGE_PPP_CMD1_INT_CMD) {
1521 if (en) {
1522 desc[0].data[0] =
1523 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT2_EN);
1524 desc[0].data[1] =
1525 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT3_EN);
1526 }
1527
1528 desc[1].data[0] =
1529 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT2_EN_MASK);
1530 desc[1].data[1] =
1531 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT3_EN_MASK);
1532 }
1533
1534 ret = hclge_cmd_send(&hdev->hw, &desc[0], 2);
1535 if (ret)
1536 dev_err(dev, "fail(%d) to configure PPP error intr\n", ret);
1537
1538 return ret;
1539 }
1540
hclge_config_ppp_hw_err_int(struct hclge_dev * hdev,bool en)1541 static int hclge_config_ppp_hw_err_int(struct hclge_dev *hdev, bool en)
1542 {
1543 int ret;
1544
1545 ret = hclge_config_ppp_error_interrupt(hdev, HCLGE_PPP_CMD0_INT_CMD,
1546 en);
1547 if (ret)
1548 return ret;
1549
1550 ret = hclge_config_ppp_error_interrupt(hdev, HCLGE_PPP_CMD1_INT_CMD,
1551 en);
1552
1553 return ret;
1554 }
1555
hclge_config_tm_hw_err_int(struct hclge_dev * hdev,bool en)1556 static int hclge_config_tm_hw_err_int(struct hclge_dev *hdev, bool en)
1557 {
1558 struct device *dev = &hdev->pdev->dev;
1559 struct hclge_desc desc;
1560 int ret;
1561
1562 /* configure TM SCH hw errors */
1563 hclge_cmd_setup_basic_desc(&desc, HCLGE_TM_SCH_ECC_INT_EN, false);
1564 if (en)
1565 desc.data[0] = cpu_to_le32(HCLGE_TM_SCH_ECC_ERR_INT_EN);
1566
1567 ret = hclge_cmd_send(&hdev->hw, &desc, 1);
1568 if (ret) {
1569 dev_err(dev, "fail(%d) to configure TM SCH errors\n", ret);
1570 return ret;
1571 }
1572
1573 /* configure TM QCN hw errors */
1574 hclge_cmd_setup_basic_desc(&desc, HCLGE_TM_QCN_MEM_INT_CFG, false);
1575 desc.data[0] = cpu_to_le32(HCLGE_TM_QCN_ERR_INT_TYPE);
1576 if (en) {
1577 desc.data[0] |= cpu_to_le32(HCLGE_TM_QCN_FIFO_INT_EN);
1578 desc.data[1] = cpu_to_le32(HCLGE_TM_QCN_MEM_ERR_INT_EN);
1579 }
1580
1581 ret = hclge_cmd_send(&hdev->hw, &desc, 1);
1582 if (ret)
1583 dev_err(dev,
1584 "fail(%d) to configure TM QCN mem errors\n", ret);
1585
1586 return ret;
1587 }
1588
hclge_config_mac_err_int(struct hclge_dev * hdev,bool en)1589 static int hclge_config_mac_err_int(struct hclge_dev *hdev, bool en)
1590 {
1591 struct device *dev = &hdev->pdev->dev;
1592 struct hclge_desc desc;
1593 int ret;
1594
1595 /* configure MAC common error interrupts */
1596 hclge_cmd_setup_basic_desc(&desc, HCLGE_MAC_COMMON_INT_EN, false);
1597 if (en)
1598 desc.data[0] = cpu_to_le32(HCLGE_MAC_COMMON_ERR_INT_EN);
1599
1600 desc.data[1] = cpu_to_le32(HCLGE_MAC_COMMON_ERR_INT_EN_MASK);
1601
1602 ret = hclge_cmd_send(&hdev->hw, &desc, 1);
1603 if (ret)
1604 dev_err(dev,
1605 "fail(%d) to configure MAC COMMON error intr\n", ret);
1606
1607 return ret;
1608 }
1609
hclge_config_mac_tnl_int(struct hclge_dev * hdev,bool en)1610 int hclge_config_mac_tnl_int(struct hclge_dev *hdev, bool en)
1611 {
1612 struct hclge_desc desc;
1613
1614 hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MAC_TNL_INT_EN, false);
1615 if (en)
1616 desc.data[0] = cpu_to_le32(HCLGE_MAC_TNL_INT_EN);
1617 else
1618 desc.data[0] = 0;
1619
1620 desc.data[1] = cpu_to_le32(HCLGE_MAC_TNL_INT_EN_MASK);
1621
1622 return hclge_cmd_send(&hdev->hw, &desc, 1);
1623 }
1624
hclge_config_ppu_error_interrupts(struct hclge_dev * hdev,u32 cmd,bool en)1625 static int hclge_config_ppu_error_interrupts(struct hclge_dev *hdev, u32 cmd,
1626 bool en)
1627 {
1628 struct device *dev = &hdev->pdev->dev;
1629 struct hclge_desc desc[2];
1630 int desc_num = 1;
1631 int ret;
1632
1633 /* configure PPU error interrupts */
1634 if (cmd == HCLGE_PPU_MPF_ECC_INT_CMD) {
1635 hclge_cmd_setup_basic_desc(&desc[0], cmd, false);
1636 desc[0].flag |= cpu_to_le16(HCLGE_COMM_CMD_FLAG_NEXT);
1637 hclge_cmd_setup_basic_desc(&desc[1], cmd, false);
1638 if (en) {
1639 desc[0].data[0] =
1640 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT0_EN);
1641 desc[0].data[1] =
1642 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT1_EN);
1643 desc[1].data[3] =
1644 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT3_EN);
1645 desc[1].data[4] =
1646 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT2_EN);
1647 }
1648
1649 desc[1].data[0] =
1650 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT0_EN_MASK);
1651 desc[1].data[1] =
1652 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT1_EN_MASK);
1653 desc[1].data[2] =
1654 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT2_EN_MASK);
1655 desc[1].data[3] |=
1656 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT3_EN_MASK);
1657 desc_num = 2;
1658 } else if (cmd == HCLGE_PPU_MPF_OTHER_INT_CMD) {
1659 hclge_cmd_setup_basic_desc(&desc[0], cmd, false);
1660 if (en)
1661 desc[0].data[0] =
1662 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT2_EN2);
1663
1664 desc[0].data[2] =
1665 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT2_EN2_MASK);
1666 } else if (cmd == HCLGE_PPU_PF_OTHER_INT_CMD) {
1667 hclge_cmd_setup_basic_desc(&desc[0], cmd, false);
1668 if (en)
1669 desc[0].data[0] =
1670 cpu_to_le32(HCLGE_PPU_PF_ABNORMAL_INT_EN);
1671
1672 desc[0].data[2] =
1673 cpu_to_le32(HCLGE_PPU_PF_ABNORMAL_INT_EN_MASK);
1674 } else {
1675 dev_err(dev, "Invalid cmd to configure PPU error interrupts\n");
1676 return -EINVAL;
1677 }
1678
1679 ret = hclge_cmd_send(&hdev->hw, &desc[0], desc_num);
1680
1681 return ret;
1682 }
1683
hclge_config_ppu_hw_err_int(struct hclge_dev * hdev,bool en)1684 static int hclge_config_ppu_hw_err_int(struct hclge_dev *hdev, bool en)
1685 {
1686 struct device *dev = &hdev->pdev->dev;
1687 int ret;
1688
1689 ret = hclge_config_ppu_error_interrupts(hdev, HCLGE_PPU_MPF_ECC_INT_CMD,
1690 en);
1691 if (ret) {
1692 dev_err(dev, "fail(%d) to configure PPU MPF ECC error intr\n",
1693 ret);
1694 return ret;
1695 }
1696
1697 ret = hclge_config_ppu_error_interrupts(hdev,
1698 HCLGE_PPU_MPF_OTHER_INT_CMD,
1699 en);
1700 if (ret) {
1701 dev_err(dev, "fail(%d) to configure PPU MPF other intr\n", ret);
1702 return ret;
1703 }
1704
1705 ret = hclge_config_ppu_error_interrupts(hdev,
1706 HCLGE_PPU_PF_OTHER_INT_CMD, en);
1707 if (ret)
1708 dev_err(dev, "fail(%d) to configure PPU PF error interrupts\n",
1709 ret);
1710 return ret;
1711 }
1712
hclge_config_ssu_hw_err_int(struct hclge_dev * hdev,bool en)1713 static int hclge_config_ssu_hw_err_int(struct hclge_dev *hdev, bool en)
1714 {
1715 struct device *dev = &hdev->pdev->dev;
1716 struct hclge_desc desc[2];
1717 int ret;
1718
1719 /* configure SSU ecc error interrupts */
1720 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_SSU_ECC_INT_CMD, false);
1721 desc[0].flag |= cpu_to_le16(HCLGE_COMM_CMD_FLAG_NEXT);
1722 hclge_cmd_setup_basic_desc(&desc[1], HCLGE_SSU_ECC_INT_CMD, false);
1723 if (en) {
1724 desc[0].data[0] = cpu_to_le32(HCLGE_SSU_1BIT_ECC_ERR_INT_EN);
1725 desc[0].data[1] =
1726 cpu_to_le32(HCLGE_SSU_MULTI_BIT_ECC_ERR_INT_EN);
1727 desc[0].data[4] = cpu_to_le32(HCLGE_SSU_BIT32_ECC_ERR_INT_EN);
1728 }
1729
1730 desc[1].data[0] = cpu_to_le32(HCLGE_SSU_1BIT_ECC_ERR_INT_EN_MASK);
1731 desc[1].data[1] = cpu_to_le32(HCLGE_SSU_MULTI_BIT_ECC_ERR_INT_EN_MASK);
1732 desc[1].data[2] = cpu_to_le32(HCLGE_SSU_BIT32_ECC_ERR_INT_EN_MASK);
1733
1734 ret = hclge_cmd_send(&hdev->hw, &desc[0], 2);
1735 if (ret) {
1736 dev_err(dev,
1737 "fail(%d) to configure SSU ECC error interrupt\n", ret);
1738 return ret;
1739 }
1740
1741 /* configure SSU common error interrupts */
1742 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_SSU_COMMON_INT_CMD, false);
1743 desc[0].flag |= cpu_to_le16(HCLGE_COMM_CMD_FLAG_NEXT);
1744 hclge_cmd_setup_basic_desc(&desc[1], HCLGE_SSU_COMMON_INT_CMD, false);
1745
1746 if (en) {
1747 if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2)
1748 desc[0].data[0] =
1749 cpu_to_le32(HCLGE_SSU_COMMON_INT_EN);
1750 else
1751 desc[0].data[0] =
1752 cpu_to_le32(HCLGE_SSU_COMMON_INT_EN & ~BIT(5));
1753 desc[0].data[1] = cpu_to_le32(HCLGE_SSU_PORT_BASED_ERR_INT_EN);
1754 desc[0].data[2] =
1755 cpu_to_le32(HCLGE_SSU_FIFO_OVERFLOW_ERR_INT_EN);
1756 }
1757
1758 desc[1].data[0] = cpu_to_le32(HCLGE_SSU_COMMON_INT_EN_MASK |
1759 HCLGE_SSU_PORT_BASED_ERR_INT_EN_MASK);
1760 desc[1].data[1] = cpu_to_le32(HCLGE_SSU_FIFO_OVERFLOW_ERR_INT_EN_MASK);
1761
1762 ret = hclge_cmd_send(&hdev->hw, &desc[0], 2);
1763 if (ret)
1764 dev_err(dev,
1765 "fail(%d) to configure SSU COMMON error intr\n", ret);
1766
1767 return ret;
1768 }
1769
1770 /* hclge_query_bd_num: query number of buffer descriptors
1771 * @hdev: pointer to struct hclge_dev
1772 * @is_ras: true for ras, false for msix
1773 * @mpf_bd_num: number of main PF interrupt buffer descriptors
1774 * @pf_bd_num: number of not main PF interrupt buffer descriptors
1775 *
1776 * This function querys number of mpf and pf buffer descriptors.
1777 */
hclge_query_bd_num(struct hclge_dev * hdev,bool is_ras,u32 * mpf_bd_num,u32 * pf_bd_num)1778 static int hclge_query_bd_num(struct hclge_dev *hdev, bool is_ras,
1779 u32 *mpf_bd_num, u32 *pf_bd_num)
1780 {
1781 struct device *dev = &hdev->pdev->dev;
1782 u32 mpf_min_bd_num, pf_min_bd_num;
1783 enum hclge_opcode_type opcode;
1784 struct hclge_desc desc_bd;
1785 int ret;
1786
1787 if (is_ras) {
1788 opcode = HCLGE_QUERY_RAS_INT_STS_BD_NUM;
1789 mpf_min_bd_num = HCLGE_MPF_RAS_INT_MIN_BD_NUM;
1790 pf_min_bd_num = HCLGE_PF_RAS_INT_MIN_BD_NUM;
1791 } else {
1792 opcode = HCLGE_QUERY_MSIX_INT_STS_BD_NUM;
1793 mpf_min_bd_num = HCLGE_MPF_MSIX_INT_MIN_BD_NUM;
1794 pf_min_bd_num = HCLGE_PF_MSIX_INT_MIN_BD_NUM;
1795 }
1796
1797 hclge_cmd_setup_basic_desc(&desc_bd, opcode, true);
1798 ret = hclge_cmd_send(&hdev->hw, &desc_bd, 1);
1799 if (ret) {
1800 dev_err(dev, "fail(%d) to query msix int status bd num\n",
1801 ret);
1802 return ret;
1803 }
1804
1805 *mpf_bd_num = le32_to_cpu(desc_bd.data[0]);
1806 *pf_bd_num = le32_to_cpu(desc_bd.data[1]);
1807 if (*mpf_bd_num < mpf_min_bd_num || *pf_bd_num < pf_min_bd_num) {
1808 dev_err(dev, "Invalid bd num: mpf(%u), pf(%u)\n",
1809 *mpf_bd_num, *pf_bd_num);
1810 return -EINVAL;
1811 }
1812
1813 return 0;
1814 }
1815
1816 /* hclge_handle_mpf_ras_error: handle all main PF RAS errors
1817 * @hdev: pointer to struct hclge_dev
1818 * @desc: descriptor for describing the command
1819 * @num: number of extended command structures
1820 *
1821 * This function handles all the main PF RAS errors in the
1822 * hw register/s using command.
1823 */
hclge_handle_mpf_ras_error(struct hclge_dev * hdev,struct hclge_desc * desc,int num)1824 static int hclge_handle_mpf_ras_error(struct hclge_dev *hdev,
1825 struct hclge_desc *desc,
1826 int num)
1827 {
1828 struct hnae3_ae_dev *ae_dev = hdev->ae_dev;
1829 struct device *dev = &hdev->pdev->dev;
1830 __le32 *desc_data;
1831 u32 status;
1832 int ret;
1833
1834 /* query all main PF RAS errors */
1835 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_MPF_RAS_INT,
1836 true);
1837 ret = hclge_cmd_send(&hdev->hw, &desc[0], num);
1838 if (ret) {
1839 dev_err(dev, "query all mpf ras int cmd failed (%d)\n", ret);
1840 return ret;
1841 }
1842
1843 /* log HNS common errors */
1844 status = le32_to_cpu(desc[0].data[0]);
1845 if (status)
1846 hclge_log_error(dev, "IMP_TCM_ECC_INT_STS",
1847 &hclge_imp_tcm_ecc_int[0], status,
1848 &ae_dev->hw_err_reset_req);
1849
1850 status = le32_to_cpu(desc[0].data[1]);
1851 if (status)
1852 hclge_log_error(dev, "CMDQ_MEM_ECC_INT_STS",
1853 &hclge_cmdq_nic_mem_ecc_int[0], status,
1854 &ae_dev->hw_err_reset_req);
1855
1856 if ((le32_to_cpu(desc[0].data[2])) & BIT(0))
1857 dev_warn(dev, "imp_rd_data_poison_err found\n");
1858
1859 status = le32_to_cpu(desc[0].data[3]);
1860 if (status)
1861 hclge_log_error(dev, "TQP_INT_ECC_INT_STS",
1862 &hclge_tqp_int_ecc_int[0], status,
1863 &ae_dev->hw_err_reset_req);
1864
1865 status = le32_to_cpu(desc[0].data[4]);
1866 if (status)
1867 hclge_log_error(dev, "MSIX_ECC_INT_STS",
1868 &hclge_msix_sram_ecc_int[0], status,
1869 &ae_dev->hw_err_reset_req);
1870
1871 /* log SSU(Storage Switch Unit) errors */
1872 desc_data = (__le32 *)&desc[2];
1873 status = le32_to_cpu(*(desc_data + 2));
1874 if (status)
1875 hclge_log_error(dev, "SSU_ECC_MULTI_BIT_INT_0",
1876 &hclge_ssu_mem_ecc_err_int[0], status,
1877 &ae_dev->hw_err_reset_req);
1878
1879 status = le32_to_cpu(*(desc_data + 3)) & BIT(0);
1880 if (status) {
1881 dev_err(dev, "SSU_ECC_MULTI_BIT_INT_1 ssu_mem32_ecc_mbit_err found [error status=0x%x]\n",
1882 status);
1883 set_bit(HNAE3_GLOBAL_RESET, &ae_dev->hw_err_reset_req);
1884 }
1885
1886 status = le32_to_cpu(*(desc_data + 4)) & HCLGE_SSU_COMMON_ERR_INT_MASK;
1887 if (status)
1888 hclge_log_error(dev, "SSU_COMMON_ERR_INT",
1889 &hclge_ssu_com_err_int[0], status,
1890 &ae_dev->hw_err_reset_req);
1891
1892 /* log IGU(Ingress Unit) errors */
1893 desc_data = (__le32 *)&desc[3];
1894 status = le32_to_cpu(*desc_data) & HCLGE_IGU_INT_MASK;
1895 if (status)
1896 hclge_log_error(dev, "IGU_INT_STS",
1897 &hclge_igu_int[0], status,
1898 &ae_dev->hw_err_reset_req);
1899
1900 /* log PPP(Programmable Packet Process) errors */
1901 desc_data = (__le32 *)&desc[4];
1902 status = le32_to_cpu(*(desc_data + 1));
1903 if (status)
1904 hclge_log_error(dev, "PPP_MPF_ABNORMAL_INT_ST1",
1905 &hclge_ppp_mpf_abnormal_int_st1[0], status,
1906 &ae_dev->hw_err_reset_req);
1907
1908 status = le32_to_cpu(*(desc_data + 3)) & HCLGE_PPP_MPF_INT_ST3_MASK;
1909 if (status)
1910 hclge_log_error(dev, "PPP_MPF_ABNORMAL_INT_ST3",
1911 &hclge_ppp_mpf_abnormal_int_st3[0], status,
1912 &ae_dev->hw_err_reset_req);
1913
1914 /* log PPU(RCB) errors */
1915 desc_data = (__le32 *)&desc[5];
1916 status = le32_to_cpu(*(desc_data + 1));
1917 if (status) {
1918 dev_err(dev,
1919 "PPU_MPF_ABNORMAL_INT_ST1 rpu_rx_pkt_ecc_mbit_err found\n");
1920 set_bit(HNAE3_GLOBAL_RESET, &ae_dev->hw_err_reset_req);
1921 }
1922
1923 status = le32_to_cpu(*(desc_data + 2));
1924 if (status)
1925 hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST2",
1926 &hclge_ppu_mpf_abnormal_int_st2[0], status,
1927 &ae_dev->hw_err_reset_req);
1928
1929 status = le32_to_cpu(*(desc_data + 3)) & HCLGE_PPU_MPF_INT_ST3_MASK;
1930 if (status)
1931 hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST3",
1932 &hclge_ppu_mpf_abnormal_int_st3[0], status,
1933 &ae_dev->hw_err_reset_req);
1934
1935 /* log TM(Traffic Manager) errors */
1936 desc_data = (__le32 *)&desc[6];
1937 status = le32_to_cpu(*desc_data);
1938 if (status)
1939 hclge_log_error(dev, "TM_SCH_RINT",
1940 &hclge_tm_sch_rint[0], status,
1941 &ae_dev->hw_err_reset_req);
1942
1943 /* log QCN(Quantized Congestion Control) errors */
1944 desc_data = (__le32 *)&desc[7];
1945 status = le32_to_cpu(*desc_data) & HCLGE_QCN_FIFO_INT_MASK;
1946 if (status)
1947 hclge_log_error(dev, "QCN_FIFO_RINT",
1948 &hclge_qcn_fifo_rint[0], status,
1949 &ae_dev->hw_err_reset_req);
1950
1951 status = le32_to_cpu(*(desc_data + 1)) & HCLGE_QCN_ECC_INT_MASK;
1952 if (status)
1953 hclge_log_error(dev, "QCN_ECC_RINT",
1954 &hclge_qcn_ecc_rint[0], status,
1955 &ae_dev->hw_err_reset_req);
1956
1957 /* log NCSI errors */
1958 desc_data = (__le32 *)&desc[9];
1959 status = le32_to_cpu(*desc_data) & HCLGE_NCSI_ECC_INT_MASK;
1960 if (status)
1961 hclge_log_error(dev, "NCSI_ECC_INT_RPT",
1962 &hclge_ncsi_err_int[0], status,
1963 &ae_dev->hw_err_reset_req);
1964
1965 /* clear all main PF RAS errors */
1966 hclge_comm_cmd_reuse_desc(&desc[0], false);
1967 ret = hclge_cmd_send(&hdev->hw, &desc[0], num);
1968 if (ret)
1969 dev_err(dev, "clear all mpf ras int cmd failed (%d)\n", ret);
1970
1971 return ret;
1972 }
1973
1974 /* hclge_handle_pf_ras_error: handle all PF RAS errors
1975 * @hdev: pointer to struct hclge_dev
1976 * @desc: descriptor for describing the command
1977 * @num: number of extended command structures
1978 *
1979 * This function handles all the PF RAS errors in the
1980 * hw registers using command.
1981 */
hclge_handle_pf_ras_error(struct hclge_dev * hdev,struct hclge_desc * desc,int num)1982 static int hclge_handle_pf_ras_error(struct hclge_dev *hdev,
1983 struct hclge_desc *desc,
1984 int num)
1985 {
1986 struct hnae3_ae_dev *ae_dev = hdev->ae_dev;
1987 struct device *dev = &hdev->pdev->dev;
1988 __le32 *desc_data;
1989 u32 status;
1990 int ret;
1991
1992 /* query all PF RAS errors */
1993 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_PF_RAS_INT,
1994 true);
1995 ret = hclge_cmd_send(&hdev->hw, &desc[0], num);
1996 if (ret) {
1997 dev_err(dev, "query all pf ras int cmd failed (%d)\n", ret);
1998 return ret;
1999 }
2000
2001 /* log SSU(Storage Switch Unit) errors */
2002 status = le32_to_cpu(desc[0].data[0]);
2003 if (status)
2004 hclge_log_error(dev, "SSU_PORT_BASED_ERR_INT",
2005 &hclge_ssu_port_based_err_int[0], status,
2006 &ae_dev->hw_err_reset_req);
2007
2008 status = le32_to_cpu(desc[0].data[1]);
2009 if (status)
2010 hclge_log_error(dev, "SSU_FIFO_OVERFLOW_INT",
2011 &hclge_ssu_fifo_overflow_int[0], status,
2012 &ae_dev->hw_err_reset_req);
2013
2014 status = le32_to_cpu(desc[0].data[2]);
2015 if (status)
2016 hclge_log_error(dev, "SSU_ETS_TCG_INT",
2017 &hclge_ssu_ets_tcg_int[0], status,
2018 &ae_dev->hw_err_reset_req);
2019
2020 /* log IGU(Ingress Unit) EGU(Egress Unit) TNL errors */
2021 desc_data = (__le32 *)&desc[1];
2022 status = le32_to_cpu(*desc_data) & HCLGE_IGU_EGU_TNL_INT_MASK;
2023 if (status)
2024 hclge_log_error(dev, "IGU_EGU_TNL_INT_STS",
2025 &hclge_igu_egu_tnl_int[0], status,
2026 &ae_dev->hw_err_reset_req);
2027
2028 /* log PPU(RCB) errors */
2029 desc_data = (__le32 *)&desc[3];
2030 status = le32_to_cpu(*desc_data) & HCLGE_PPU_PF_INT_RAS_MASK;
2031 if (status) {
2032 hclge_log_error(dev, "PPU_PF_ABNORMAL_INT_ST0",
2033 &hclge_ppu_pf_abnormal_int[0], status,
2034 &ae_dev->hw_err_reset_req);
2035 hclge_report_hw_error(hdev, HNAE3_PPU_POISON_ERROR);
2036 }
2037
2038 /* clear all PF RAS errors */
2039 hclge_comm_cmd_reuse_desc(&desc[0], false);
2040 ret = hclge_cmd_send(&hdev->hw, &desc[0], num);
2041 if (ret)
2042 dev_err(dev, "clear all pf ras int cmd failed (%d)\n", ret);
2043
2044 return ret;
2045 }
2046
hclge_handle_all_ras_errors(struct hclge_dev * hdev)2047 static int hclge_handle_all_ras_errors(struct hclge_dev *hdev)
2048 {
2049 u32 mpf_bd_num, pf_bd_num, bd_num;
2050 struct hclge_desc *desc;
2051 int ret;
2052
2053 /* query the number of registers in the RAS int status */
2054 ret = hclge_query_bd_num(hdev, true, &mpf_bd_num, &pf_bd_num);
2055 if (ret)
2056 return ret;
2057
2058 bd_num = max_t(u32, mpf_bd_num, pf_bd_num);
2059 desc = kcalloc(bd_num, sizeof(struct hclge_desc), GFP_KERNEL);
2060 if (!desc)
2061 return -ENOMEM;
2062
2063 /* handle all main PF RAS errors */
2064 ret = hclge_handle_mpf_ras_error(hdev, desc, mpf_bd_num);
2065 if (ret) {
2066 kfree(desc);
2067 return ret;
2068 }
2069 memset(desc, 0, bd_num * sizeof(struct hclge_desc));
2070
2071 /* handle all PF RAS errors */
2072 ret = hclge_handle_pf_ras_error(hdev, desc, pf_bd_num);
2073 kfree(desc);
2074
2075 return ret;
2076 }
2077
hclge_log_rocee_axi_error(struct hclge_dev * hdev)2078 static int hclge_log_rocee_axi_error(struct hclge_dev *hdev)
2079 {
2080 struct device *dev = &hdev->pdev->dev;
2081 struct hclge_desc desc[3];
2082 int ret;
2083
2084 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD,
2085 true);
2086 hclge_cmd_setup_basic_desc(&desc[1], HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD,
2087 true);
2088 hclge_cmd_setup_basic_desc(&desc[2], HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD,
2089 true);
2090 desc[0].flag |= cpu_to_le16(HCLGE_COMM_CMD_FLAG_NEXT);
2091 desc[1].flag |= cpu_to_le16(HCLGE_COMM_CMD_FLAG_NEXT);
2092
2093 ret = hclge_cmd_send(&hdev->hw, &desc[0], 3);
2094 if (ret) {
2095 dev_err(dev, "failed(%d) to query ROCEE AXI error sts\n", ret);
2096 return ret;
2097 }
2098
2099 dev_err(dev, "AXI1: %08X %08X %08X %08X %08X %08X\n",
2100 le32_to_cpu(desc[0].data[0]), le32_to_cpu(desc[0].data[1]),
2101 le32_to_cpu(desc[0].data[2]), le32_to_cpu(desc[0].data[3]),
2102 le32_to_cpu(desc[0].data[4]), le32_to_cpu(desc[0].data[5]));
2103 dev_err(dev, "AXI2: %08X %08X %08X %08X %08X %08X\n",
2104 le32_to_cpu(desc[1].data[0]), le32_to_cpu(desc[1].data[1]),
2105 le32_to_cpu(desc[1].data[2]), le32_to_cpu(desc[1].data[3]),
2106 le32_to_cpu(desc[1].data[4]), le32_to_cpu(desc[1].data[5]));
2107 dev_err(dev, "AXI3: %08X %08X %08X %08X\n",
2108 le32_to_cpu(desc[2].data[0]), le32_to_cpu(desc[2].data[1]),
2109 le32_to_cpu(desc[2].data[2]), le32_to_cpu(desc[2].data[3]));
2110
2111 return 0;
2112 }
2113
hclge_log_rocee_ecc_error(struct hclge_dev * hdev)2114 static int hclge_log_rocee_ecc_error(struct hclge_dev *hdev)
2115 {
2116 struct device *dev = &hdev->pdev->dev;
2117 struct hclge_desc desc[2];
2118 int ret;
2119
2120 ret = hclge_cmd_query_error(hdev, &desc[0],
2121 HCLGE_QUERY_ROCEE_ECC_RAS_INFO_CMD,
2122 HCLGE_COMM_CMD_FLAG_NEXT);
2123 if (ret) {
2124 dev_err(dev, "failed(%d) to query ROCEE ECC error sts\n", ret);
2125 return ret;
2126 }
2127
2128 dev_err(dev, "ECC1: %08X %08X %08X %08X %08X %08X\n",
2129 le32_to_cpu(desc[0].data[0]), le32_to_cpu(desc[0].data[1]),
2130 le32_to_cpu(desc[0].data[2]), le32_to_cpu(desc[0].data[3]),
2131 le32_to_cpu(desc[0].data[4]), le32_to_cpu(desc[0].data[5]));
2132 dev_err(dev, "ECC2: %08X %08X %08X\n", le32_to_cpu(desc[1].data[0]),
2133 le32_to_cpu(desc[1].data[1]), le32_to_cpu(desc[1].data[2]));
2134
2135 return 0;
2136 }
2137
hclge_log_rocee_ovf_error(struct hclge_dev * hdev)2138 static int hclge_log_rocee_ovf_error(struct hclge_dev *hdev)
2139 {
2140 struct device *dev = &hdev->pdev->dev;
2141 struct hclge_desc desc[2];
2142 int ret;
2143
2144 /* read overflow error status */
2145 ret = hclge_cmd_query_error(hdev, &desc[0], HCLGE_ROCEE_PF_RAS_INT_CMD,
2146 0);
2147 if (ret) {
2148 dev_err(dev, "failed(%d) to query ROCEE OVF error sts\n", ret);
2149 return ret;
2150 }
2151
2152 /* log overflow error */
2153 if (le32_to_cpu(desc[0].data[0]) & HCLGE_ROCEE_OVF_ERR_INT_MASK) {
2154 const struct hclge_hw_error *err;
2155 u32 err_sts;
2156
2157 err = &hclge_rocee_qmm_ovf_err_int[0];
2158 err_sts = HCLGE_ROCEE_OVF_ERR_TYPE_MASK &
2159 le32_to_cpu(desc[0].data[0]);
2160 while (err->msg) {
2161 if (err->int_msk == err_sts) {
2162 dev_err(dev, "%s [error status=0x%x] found\n",
2163 err->msg,
2164 le32_to_cpu(desc[0].data[0]));
2165 break;
2166 }
2167 err++;
2168 }
2169 }
2170
2171 if (le32_to_cpu(desc[0].data[1]) & HCLGE_ROCEE_OVF_ERR_INT_MASK) {
2172 dev_err(dev, "ROCEE TSP OVF [error status=0x%x] found\n",
2173 le32_to_cpu(desc[0].data[1]));
2174 }
2175
2176 if (le32_to_cpu(desc[0].data[2]) & HCLGE_ROCEE_OVF_ERR_INT_MASK) {
2177 dev_err(dev, "ROCEE SCC OVF [error status=0x%x] found\n",
2178 le32_to_cpu(desc[0].data[2]));
2179 }
2180
2181 return 0;
2182 }
2183
2184 static enum hnae3_reset_type
hclge_log_and_clear_rocee_ras_error(struct hclge_dev * hdev)2185 hclge_log_and_clear_rocee_ras_error(struct hclge_dev *hdev)
2186 {
2187 enum hnae3_reset_type reset_type = HNAE3_NONE_RESET;
2188 struct device *dev = &hdev->pdev->dev;
2189 struct hclge_desc desc[2];
2190 unsigned int status;
2191 int ret;
2192
2193 /* read RAS error interrupt status */
2194 ret = hclge_cmd_query_error(hdev, &desc[0],
2195 HCLGE_QUERY_CLEAR_ROCEE_RAS_INT, 0);
2196 if (ret) {
2197 dev_err(dev, "failed(%d) to query ROCEE RAS INT SRC\n", ret);
2198 /* reset everything for now */
2199 return HNAE3_GLOBAL_RESET;
2200 }
2201
2202 status = le32_to_cpu(desc[0].data[0]);
2203 if (status & HCLGE_ROCEE_AXI_ERR_INT_MASK) {
2204 if (status & HCLGE_ROCEE_RERR_INT_MASK)
2205 dev_err(dev, "ROCEE RAS AXI rresp error\n");
2206
2207 if (status & HCLGE_ROCEE_BERR_INT_MASK)
2208 dev_err(dev, "ROCEE RAS AXI bresp error\n");
2209
2210 reset_type = HNAE3_FUNC_RESET;
2211
2212 hclge_report_hw_error(hdev, HNAE3_ROCEE_AXI_RESP_ERROR);
2213
2214 ret = hclge_log_rocee_axi_error(hdev);
2215 if (ret)
2216 return HNAE3_GLOBAL_RESET;
2217 }
2218
2219 if (status & HCLGE_ROCEE_ECC_INT_MASK) {
2220 dev_err(dev, "ROCEE RAS 2bit ECC error\n");
2221 reset_type = HNAE3_GLOBAL_RESET;
2222
2223 ret = hclge_log_rocee_ecc_error(hdev);
2224 if (ret)
2225 return HNAE3_GLOBAL_RESET;
2226 }
2227
2228 if (status & HCLGE_ROCEE_OVF_INT_MASK) {
2229 ret = hclge_log_rocee_ovf_error(hdev);
2230 if (ret) {
2231 dev_err(dev, "failed(%d) to process ovf error\n", ret);
2232 /* reset everything for now */
2233 return HNAE3_GLOBAL_RESET;
2234 }
2235 }
2236
2237 /* clear error status */
2238 hclge_comm_cmd_reuse_desc(&desc[0], false);
2239 ret = hclge_cmd_send(&hdev->hw, &desc[0], 1);
2240 if (ret) {
2241 dev_err(dev, "failed(%d) to clear ROCEE RAS error\n", ret);
2242 /* reset everything for now */
2243 return HNAE3_GLOBAL_RESET;
2244 }
2245
2246 return reset_type;
2247 }
2248
hclge_config_rocee_ras_interrupt(struct hclge_dev * hdev,bool en)2249 int hclge_config_rocee_ras_interrupt(struct hclge_dev *hdev, bool en)
2250 {
2251 struct device *dev = &hdev->pdev->dev;
2252 struct hclge_desc desc;
2253 int ret;
2254
2255 if (hdev->ae_dev->dev_version < HNAE3_DEVICE_VERSION_V2 ||
2256 !hnae3_dev_roce_supported(hdev))
2257 return 0;
2258
2259 hclge_cmd_setup_basic_desc(&desc, HCLGE_CONFIG_ROCEE_RAS_INT_EN, false);
2260 if (en) {
2261 /* enable ROCEE hw error interrupts */
2262 desc.data[0] = cpu_to_le32(HCLGE_ROCEE_RAS_NFE_INT_EN);
2263 desc.data[1] = cpu_to_le32(HCLGE_ROCEE_RAS_CE_INT_EN);
2264
2265 hclge_log_and_clear_rocee_ras_error(hdev);
2266 }
2267 desc.data[2] = cpu_to_le32(HCLGE_ROCEE_RAS_NFE_INT_EN_MASK);
2268 desc.data[3] = cpu_to_le32(HCLGE_ROCEE_RAS_CE_INT_EN_MASK);
2269
2270 ret = hclge_cmd_send(&hdev->hw, &desc, 1);
2271 if (ret)
2272 dev_err(dev, "failed(%d) to config ROCEE RAS interrupt\n", ret);
2273
2274 return ret;
2275 }
2276
hclge_handle_rocee_ras_error(struct hnae3_ae_dev * ae_dev)2277 static void hclge_handle_rocee_ras_error(struct hnae3_ae_dev *ae_dev)
2278 {
2279 struct hclge_dev *hdev = ae_dev->priv;
2280 enum hnae3_reset_type reset_type;
2281
2282 if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state))
2283 return;
2284
2285 reset_type = hclge_log_and_clear_rocee_ras_error(hdev);
2286 if (reset_type != HNAE3_NONE_RESET)
2287 set_bit(reset_type, &ae_dev->hw_err_reset_req);
2288 }
2289
2290 static const struct hclge_hw_blk hw_blk[] = {
2291 {
2292 .msk = BIT(0),
2293 .name = "IGU_EGU",
2294 .config_err_int = hclge_config_igu_egu_hw_err_int,
2295 }, {
2296 .msk = BIT(1),
2297 .name = "PPP",
2298 .config_err_int = hclge_config_ppp_hw_err_int,
2299 }, {
2300 .msk = BIT(2),
2301 .name = "SSU",
2302 .config_err_int = hclge_config_ssu_hw_err_int,
2303 }, {
2304 .msk = BIT(3),
2305 .name = "PPU",
2306 .config_err_int = hclge_config_ppu_hw_err_int,
2307 }, {
2308 .msk = BIT(4),
2309 .name = "TM",
2310 .config_err_int = hclge_config_tm_hw_err_int,
2311 }, {
2312 .msk = BIT(5),
2313 .name = "COMMON",
2314 .config_err_int = hclge_config_common_hw_err_int,
2315 }, {
2316 .msk = BIT(8),
2317 .name = "MAC",
2318 .config_err_int = hclge_config_mac_err_int,
2319 }, {
2320 /* sentinel */
2321 }
2322 };
2323
hclge_config_all_msix_error(struct hclge_dev * hdev,bool enable)2324 static void hclge_config_all_msix_error(struct hclge_dev *hdev, bool enable)
2325 {
2326 u32 reg_val;
2327
2328 reg_val = hclge_read_dev(&hdev->hw, HCLGE_PF_OTHER_INT_REG);
2329
2330 if (enable)
2331 reg_val |= BIT(HCLGE_VECTOR0_ALL_MSIX_ERR_B);
2332 else
2333 reg_val &= ~BIT(HCLGE_VECTOR0_ALL_MSIX_ERR_B);
2334
2335 hclge_write_dev(&hdev->hw, HCLGE_PF_OTHER_INT_REG, reg_val);
2336 }
2337
hclge_config_nic_hw_error(struct hclge_dev * hdev,bool state)2338 int hclge_config_nic_hw_error(struct hclge_dev *hdev, bool state)
2339 {
2340 const struct hclge_hw_blk *module = hw_blk;
2341 int ret = 0;
2342
2343 hclge_config_all_msix_error(hdev, state);
2344
2345 while (module->name) {
2346 if (module->config_err_int) {
2347 ret = module->config_err_int(hdev, state);
2348 if (ret)
2349 return ret;
2350 }
2351 module++;
2352 }
2353
2354 return ret;
2355 }
2356
hclge_handle_hw_ras_error(struct hnae3_ae_dev * ae_dev)2357 pci_ers_result_t hclge_handle_hw_ras_error(struct hnae3_ae_dev *ae_dev)
2358 {
2359 struct hclge_dev *hdev = ae_dev->priv;
2360 struct device *dev = &hdev->pdev->dev;
2361 u32 status;
2362
2363 if (!test_bit(HCLGE_STATE_SERVICE_INITED, &hdev->state)) {
2364 dev_err(dev,
2365 "Can't recover - RAS error reported during dev init\n");
2366 return PCI_ERS_RESULT_NONE;
2367 }
2368
2369 status = hclge_read_dev(&hdev->hw, HCLGE_RAS_PF_OTHER_INT_STS_REG);
2370 if (status & HCLGE_RAS_REG_NFE_MASK ||
2371 status & HCLGE_RAS_REG_ROCEE_ERR_MASK)
2372 ae_dev->hw_err_reset_req = 0;
2373 else
2374 goto out;
2375
2376 /* Handling Non-fatal HNS RAS errors */
2377 if (status & HCLGE_RAS_REG_NFE_MASK) {
2378 dev_err(dev,
2379 "HNS Non-Fatal RAS error(status=0x%x) identified\n",
2380 status);
2381 hclge_handle_all_ras_errors(hdev);
2382 }
2383
2384 /* Handling Non-fatal Rocee RAS errors */
2385 if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2 &&
2386 status & HCLGE_RAS_REG_ROCEE_ERR_MASK) {
2387 dev_err(dev, "ROCEE Non-Fatal RAS error identified\n");
2388 hclge_handle_rocee_ras_error(ae_dev);
2389 }
2390
2391 if (ae_dev->hw_err_reset_req)
2392 return PCI_ERS_RESULT_NEED_RESET;
2393
2394 out:
2395 return PCI_ERS_RESULT_RECOVERED;
2396 }
2397
hclge_clear_hw_msix_error(struct hclge_dev * hdev,struct hclge_desc * desc,bool is_mpf,u32 bd_num)2398 static int hclge_clear_hw_msix_error(struct hclge_dev *hdev,
2399 struct hclge_desc *desc, bool is_mpf,
2400 u32 bd_num)
2401 {
2402 if (is_mpf)
2403 desc[0].opcode =
2404 cpu_to_le16(HCLGE_QUERY_CLEAR_ALL_MPF_MSIX_INT);
2405 else
2406 desc[0].opcode = cpu_to_le16(HCLGE_QUERY_CLEAR_ALL_PF_MSIX_INT);
2407
2408 desc[0].flag = cpu_to_le16(HCLGE_COMM_CMD_FLAG_NO_INTR |
2409 HCLGE_COMM_CMD_FLAG_IN);
2410
2411 return hclge_cmd_send(&hdev->hw, &desc[0], bd_num);
2412 }
2413
2414 /* hclge_query_8bd_info: query information about over_8bd_nfe_err
2415 * @hdev: pointer to struct hclge_dev
2416 * @vf_id: Index of the virtual function with error
2417 * @q_id: Physical index of the queue with error
2418 *
2419 * This function get specific index of queue and function which causes
2420 * over_8bd_nfe_err by using command. If vf_id is 0, it means error is
2421 * caused by PF instead of VF.
2422 */
hclge_query_over_8bd_err_info(struct hclge_dev * hdev,u16 * vf_id,u16 * q_id)2423 static int hclge_query_over_8bd_err_info(struct hclge_dev *hdev, u16 *vf_id,
2424 u16 *q_id)
2425 {
2426 struct hclge_query_ppu_pf_other_int_dfx_cmd *req;
2427 struct hclge_desc desc;
2428 int ret;
2429
2430 hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_PPU_PF_OTHER_INT_DFX, true);
2431 ret = hclge_cmd_send(&hdev->hw, &desc, 1);
2432 if (ret)
2433 return ret;
2434
2435 req = (struct hclge_query_ppu_pf_other_int_dfx_cmd *)desc.data;
2436 *vf_id = le16_to_cpu(req->over_8bd_no_fe_vf_id);
2437 *q_id = le16_to_cpu(req->over_8bd_no_fe_qid);
2438
2439 return 0;
2440 }
2441
2442 /* hclge_handle_over_8bd_err: handle MSI-X error named over_8bd_nfe_err
2443 * @hdev: pointer to struct hclge_dev
2444 * @reset_requests: reset level that we need to trigger later
2445 *
2446 * over_8bd_nfe_err is a special MSI-X because it may caused by a VF, in
2447 * that case, we need to trigger VF reset. Otherwise, a PF reset is needed.
2448 */
hclge_handle_over_8bd_err(struct hclge_dev * hdev,unsigned long * reset_requests)2449 static void hclge_handle_over_8bd_err(struct hclge_dev *hdev,
2450 unsigned long *reset_requests)
2451 {
2452 struct device *dev = &hdev->pdev->dev;
2453 u16 vf_id;
2454 u16 q_id;
2455 int ret;
2456
2457 ret = hclge_query_over_8bd_err_info(hdev, &vf_id, &q_id);
2458 if (ret) {
2459 dev_err(dev, "fail(%d) to query over_8bd_no_fe info\n",
2460 ret);
2461 return;
2462 }
2463
2464 dev_err(dev, "PPU_PF_ABNORMAL_INT_ST over_8bd_no_fe found, vport(%u), queue_id(%u)\n",
2465 vf_id, q_id);
2466
2467 if (vf_id) {
2468 if (vf_id >= hdev->num_alloc_vport) {
2469 dev_err(dev, "invalid vport(%u)\n", vf_id);
2470 return;
2471 }
2472
2473 /* If we need to trigger other reset whose level is higher
2474 * than HNAE3_VF_FUNC_RESET, no need to trigger a VF reset
2475 * here.
2476 */
2477 if (*reset_requests != 0)
2478 return;
2479
2480 ret = hclge_inform_reset_assert_to_vf(&hdev->vport[vf_id]);
2481 if (ret)
2482 dev_err(dev, "inform reset to vport(%u) failed %d!\n",
2483 vf_id, ret);
2484 } else {
2485 set_bit(HNAE3_FUNC_RESET, reset_requests);
2486 }
2487 }
2488
2489 /* hclge_handle_mpf_msix_error: handle all main PF MSI-X errors
2490 * @hdev: pointer to struct hclge_dev
2491 * @desc: descriptor for describing the command
2492 * @mpf_bd_num: number of extended command structures
2493 * @reset_requests: record of the reset level that we need
2494 *
2495 * This function handles all the main PF MSI-X errors in the hw register/s
2496 * using command.
2497 */
hclge_handle_mpf_msix_error(struct hclge_dev * hdev,struct hclge_desc * desc,int mpf_bd_num,unsigned long * reset_requests)2498 static int hclge_handle_mpf_msix_error(struct hclge_dev *hdev,
2499 struct hclge_desc *desc,
2500 int mpf_bd_num,
2501 unsigned long *reset_requests)
2502 {
2503 struct device *dev = &hdev->pdev->dev;
2504 __le32 *desc_data;
2505 u32 status;
2506 int ret;
2507 /* query all main PF MSIx errors */
2508 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_ALL_MPF_MSIX_INT,
2509 true);
2510 ret = hclge_cmd_send(&hdev->hw, &desc[0], mpf_bd_num);
2511 if (ret) {
2512 dev_err(dev, "query all mpf msix int cmd failed (%d)\n", ret);
2513 return ret;
2514 }
2515
2516 /* log MAC errors */
2517 desc_data = (__le32 *)&desc[1];
2518 status = le32_to_cpu(*desc_data);
2519 if (status)
2520 hclge_log_error(dev, "MAC_AFIFO_TNL_INT_R",
2521 &hclge_mac_afifo_tnl_int[0], status,
2522 reset_requests);
2523
2524 /* log PPU(RCB) MPF errors */
2525 desc_data = (__le32 *)&desc[5];
2526 status = le32_to_cpu(*(desc_data + 2)) &
2527 HCLGE_PPU_MPF_INT_ST2_MSIX_MASK;
2528 if (status)
2529 dev_err(dev, "PPU_MPF_ABNORMAL_INT_ST2 rx_q_search_miss found [dfx status=0x%x\n]",
2530 status);
2531
2532 /* clear all main PF MSIx errors */
2533 ret = hclge_clear_hw_msix_error(hdev, desc, true, mpf_bd_num);
2534 if (ret)
2535 dev_err(dev, "clear all mpf msix int cmd failed (%d)\n", ret);
2536
2537 return ret;
2538 }
2539
2540 /* hclge_handle_pf_msix_error: handle all PF MSI-X errors
2541 * @hdev: pointer to struct hclge_dev
2542 * @desc: descriptor for describing the command
2543 * @mpf_bd_num: number of extended command structures
2544 * @reset_requests: record of the reset level that we need
2545 *
2546 * This function handles all the PF MSI-X errors in the hw register/s using
2547 * command.
2548 */
hclge_handle_pf_msix_error(struct hclge_dev * hdev,struct hclge_desc * desc,int pf_bd_num,unsigned long * reset_requests)2549 static int hclge_handle_pf_msix_error(struct hclge_dev *hdev,
2550 struct hclge_desc *desc,
2551 int pf_bd_num,
2552 unsigned long *reset_requests)
2553 {
2554 struct device *dev = &hdev->pdev->dev;
2555 __le32 *desc_data;
2556 u32 status;
2557 int ret;
2558
2559 /* query all PF MSIx errors */
2560 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_ALL_PF_MSIX_INT,
2561 true);
2562 ret = hclge_cmd_send(&hdev->hw, &desc[0], pf_bd_num);
2563 if (ret) {
2564 dev_err(dev, "query all pf msix int cmd failed (%d)\n", ret);
2565 return ret;
2566 }
2567
2568 /* log SSU PF errors */
2569 status = le32_to_cpu(desc[0].data[0]) & HCLGE_SSU_PORT_INT_MSIX_MASK;
2570 if (status)
2571 hclge_log_error(dev, "SSU_PORT_BASED_ERR_INT",
2572 &hclge_ssu_port_based_pf_int[0],
2573 status, reset_requests);
2574
2575 /* read and log PPP PF errors */
2576 desc_data = (__le32 *)&desc[2];
2577 status = le32_to_cpu(*desc_data);
2578 if (status)
2579 hclge_log_error(dev, "PPP_PF_ABNORMAL_INT_ST0",
2580 &hclge_ppp_pf_abnormal_int[0],
2581 status, reset_requests);
2582
2583 /* log PPU(RCB) PF errors */
2584 desc_data = (__le32 *)&desc[3];
2585 status = le32_to_cpu(*desc_data) & HCLGE_PPU_PF_INT_MSIX_MASK;
2586 if (status)
2587 hclge_log_error(dev, "PPU_PF_ABNORMAL_INT_ST",
2588 &hclge_ppu_pf_abnormal_int[0],
2589 status, reset_requests);
2590
2591 status = le32_to_cpu(*desc_data) & HCLGE_PPU_PF_OVER_8BD_ERR_MASK;
2592 if (status)
2593 hclge_handle_over_8bd_err(hdev, reset_requests);
2594
2595 /* clear all PF MSIx errors */
2596 ret = hclge_clear_hw_msix_error(hdev, desc, false, pf_bd_num);
2597 if (ret)
2598 dev_err(dev, "clear all pf msix int cmd failed (%d)\n", ret);
2599
2600 return ret;
2601 }
2602
hclge_handle_all_hw_msix_error(struct hclge_dev * hdev,unsigned long * reset_requests)2603 static int hclge_handle_all_hw_msix_error(struct hclge_dev *hdev,
2604 unsigned long *reset_requests)
2605 {
2606 u32 mpf_bd_num, pf_bd_num, bd_num;
2607 struct hclge_desc *desc;
2608 int ret;
2609
2610 /* query the number of bds for the MSIx int status */
2611 ret = hclge_query_bd_num(hdev, false, &mpf_bd_num, &pf_bd_num);
2612 if (ret)
2613 goto out;
2614
2615 bd_num = max_t(u32, mpf_bd_num, pf_bd_num);
2616 desc = kcalloc(bd_num, sizeof(struct hclge_desc), GFP_KERNEL);
2617 if (!desc)
2618 return -ENOMEM;
2619
2620 ret = hclge_handle_mpf_msix_error(hdev, desc, mpf_bd_num,
2621 reset_requests);
2622 if (ret)
2623 goto msi_error;
2624
2625 memset(desc, 0, bd_num * sizeof(struct hclge_desc));
2626 ret = hclge_handle_pf_msix_error(hdev, desc, pf_bd_num, reset_requests);
2627 if (ret)
2628 goto msi_error;
2629
2630 ret = hclge_handle_mac_tnl(hdev);
2631
2632 msi_error:
2633 kfree(desc);
2634 out:
2635 return ret;
2636 }
2637
hclge_handle_hw_msix_error(struct hclge_dev * hdev,unsigned long * reset_requests)2638 int hclge_handle_hw_msix_error(struct hclge_dev *hdev,
2639 unsigned long *reset_requests)
2640 {
2641 struct device *dev = &hdev->pdev->dev;
2642
2643 if (!test_bit(HCLGE_STATE_SERVICE_INITED, &hdev->state)) {
2644 dev_err(dev,
2645 "failed to handle msix error during dev init\n");
2646 return -EAGAIN;
2647 }
2648
2649 return hclge_handle_all_hw_msix_error(hdev, reset_requests);
2650 }
2651
hclge_handle_mac_tnl(struct hclge_dev * hdev)2652 int hclge_handle_mac_tnl(struct hclge_dev *hdev)
2653 {
2654 struct hclge_mac_tnl_stats mac_tnl_stats;
2655 struct device *dev = &hdev->pdev->dev;
2656 struct hclge_desc desc;
2657 u32 status;
2658 int ret;
2659
2660 /* query and clear mac tnl interruptions */
2661 hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_MAC_TNL_INT, true);
2662 ret = hclge_cmd_send(&hdev->hw, &desc, 1);
2663 if (ret) {
2664 dev_err(dev, "failed to query mac tnl int, ret = %d.\n", ret);
2665 return ret;
2666 }
2667
2668 status = le32_to_cpu(desc.data[0]);
2669 if (status) {
2670 /* When mac tnl interrupt occurs, we record current time and
2671 * register status here in a fifo, then clear the status. So
2672 * that if link status changes suddenly at some time, we can
2673 * query them by debugfs.
2674 */
2675 mac_tnl_stats.time = local_clock();
2676 mac_tnl_stats.status = status;
2677 kfifo_put(&hdev->mac_tnl_log, mac_tnl_stats);
2678 ret = hclge_clear_mac_tnl_int(hdev);
2679 if (ret)
2680 dev_err(dev, "failed to clear mac tnl int, ret = %d.\n",
2681 ret);
2682 }
2683
2684 return ret;
2685 }
2686
hclge_handle_all_hns_hw_errors(struct hnae3_ae_dev * ae_dev)2687 void hclge_handle_all_hns_hw_errors(struct hnae3_ae_dev *ae_dev)
2688 {
2689 struct hclge_dev *hdev = ae_dev->priv;
2690 struct device *dev = &hdev->pdev->dev;
2691 u32 mpf_bd_num, pf_bd_num, bd_num;
2692 struct hclge_desc *desc;
2693 u32 status;
2694 int ret;
2695
2696 ae_dev->hw_err_reset_req = 0;
2697 status = hclge_read_dev(&hdev->hw, HCLGE_RAS_PF_OTHER_INT_STS_REG);
2698
2699 /* query the number of bds for the MSIx int status */
2700 ret = hclge_query_bd_num(hdev, false, &mpf_bd_num, &pf_bd_num);
2701 if (ret)
2702 return;
2703
2704 bd_num = max_t(u32, mpf_bd_num, pf_bd_num);
2705 desc = kcalloc(bd_num, sizeof(struct hclge_desc), GFP_KERNEL);
2706 if (!desc)
2707 return;
2708
2709 /* Clear HNS hw errors reported through msix */
2710 memset(&desc[0].data[0], 0xFF, mpf_bd_num * sizeof(struct hclge_desc) -
2711 HCLGE_DESC_NO_DATA_LEN);
2712 ret = hclge_clear_hw_msix_error(hdev, desc, true, mpf_bd_num);
2713 if (ret) {
2714 dev_err(dev, "fail(%d) to clear mpf msix int during init\n",
2715 ret);
2716 goto msi_error;
2717 }
2718
2719 memset(&desc[0].data[0], 0xFF, pf_bd_num * sizeof(struct hclge_desc) -
2720 HCLGE_DESC_NO_DATA_LEN);
2721 ret = hclge_clear_hw_msix_error(hdev, desc, false, pf_bd_num);
2722 if (ret) {
2723 dev_err(dev, "fail(%d) to clear pf msix int during init\n",
2724 ret);
2725 goto msi_error;
2726 }
2727
2728 /* Handle Non-fatal HNS RAS errors */
2729 if (status & HCLGE_RAS_REG_NFE_MASK) {
2730 dev_err(dev, "HNS hw error(RAS) identified during init\n");
2731 hclge_handle_all_ras_errors(hdev);
2732 }
2733
2734 msi_error:
2735 kfree(desc);
2736 }
2737
hclge_find_error_source(struct hclge_dev * hdev)2738 bool hclge_find_error_source(struct hclge_dev *hdev)
2739 {
2740 u32 msix_src_flag, hw_err_src_flag;
2741
2742 msix_src_flag = hclge_read_dev(&hdev->hw, HCLGE_MISC_VECTOR_INT_STS) &
2743 HCLGE_VECTOR0_REG_MSIX_MASK;
2744
2745 hw_err_src_flag = hclge_read_dev(&hdev->hw,
2746 HCLGE_RAS_PF_OTHER_INT_STS_REG) &
2747 HCLGE_RAS_REG_ERR_MASK;
2748
2749 return msix_src_flag || hw_err_src_flag;
2750 }
2751
hclge_handle_occurred_error(struct hclge_dev * hdev)2752 void hclge_handle_occurred_error(struct hclge_dev *hdev)
2753 {
2754 struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
2755
2756 if (hclge_find_error_source(hdev))
2757 hclge_handle_error_info_log(ae_dev);
2758 }
2759
2760 static void
hclge_handle_error_type_reg_log(struct device * dev,struct hclge_mod_err_info * mod_info,struct hclge_type_reg_err_info * type_reg_info)2761 hclge_handle_error_type_reg_log(struct device *dev,
2762 struct hclge_mod_err_info *mod_info,
2763 struct hclge_type_reg_err_info *type_reg_info)
2764 {
2765 #define HCLGE_ERR_TYPE_MASK 0x7F
2766 #define HCLGE_ERR_TYPE_IS_RAS_OFFSET 7
2767
2768 u8 mod_id, total_module, type_id, total_type, i, is_ras;
2769 u8 index_module = MODULE_NONE;
2770 u8 index_type = NONE_ERROR;
2771
2772 mod_id = mod_info->mod_id;
2773 type_id = type_reg_info->type_id & HCLGE_ERR_TYPE_MASK;
2774 is_ras = type_reg_info->type_id >> HCLGE_ERR_TYPE_IS_RAS_OFFSET;
2775
2776 total_module = ARRAY_SIZE(hclge_hw_module_id_st);
2777 total_type = ARRAY_SIZE(hclge_hw_type_id_st);
2778
2779 for (i = 0; i < total_module; i++) {
2780 if (mod_id == hclge_hw_module_id_st[i].module_id) {
2781 index_module = i;
2782 break;
2783 }
2784 }
2785
2786 for (i = 0; i < total_type; i++) {
2787 if (type_id == hclge_hw_type_id_st[i].type_id) {
2788 index_type = i;
2789 break;
2790 }
2791 }
2792
2793 if (index_module != MODULE_NONE && index_type != NONE_ERROR)
2794 dev_err(dev,
2795 "found %s %s, is %s error.\n",
2796 hclge_hw_module_id_st[index_module].msg,
2797 hclge_hw_type_id_st[index_type].msg,
2798 is_ras ? "ras" : "msix");
2799 else
2800 dev_err(dev,
2801 "unknown module[%u] or type[%u].\n", mod_id, type_id);
2802
2803 dev_err(dev, "reg_value:\n");
2804 for (i = 0; i < type_reg_info->reg_num; i++)
2805 dev_err(dev, "0x%08x\n", type_reg_info->hclge_reg[i]);
2806 }
2807
hclge_handle_error_module_log(struct hnae3_ae_dev * ae_dev,const u32 * buf,u32 buf_size)2808 static void hclge_handle_error_module_log(struct hnae3_ae_dev *ae_dev,
2809 const u32 *buf, u32 buf_size)
2810 {
2811 struct hclge_type_reg_err_info *type_reg_info;
2812 struct hclge_dev *hdev = ae_dev->priv;
2813 struct device *dev = &hdev->pdev->dev;
2814 struct hclge_mod_err_info *mod_info;
2815 struct hclge_sum_err_info *sum_info;
2816 u8 mod_num, err_num, i;
2817 u32 offset = 0;
2818
2819 sum_info = (struct hclge_sum_err_info *)&buf[offset++];
2820 if (sum_info->reset_type &&
2821 sum_info->reset_type != HNAE3_NONE_RESET)
2822 set_bit(sum_info->reset_type, &ae_dev->hw_err_reset_req);
2823 mod_num = sum_info->mod_num;
2824
2825 while (mod_num--) {
2826 if (offset >= buf_size) {
2827 dev_err(dev, "The offset(%u) exceeds buf's size(%u).\n",
2828 offset, buf_size);
2829 return;
2830 }
2831 mod_info = (struct hclge_mod_err_info *)&buf[offset++];
2832 err_num = mod_info->err_num;
2833
2834 for (i = 0; i < err_num; i++) {
2835 if (offset >= buf_size) {
2836 dev_err(dev,
2837 "The offset(%u) exceeds buf size(%u).\n",
2838 offset, buf_size);
2839 return;
2840 }
2841
2842 type_reg_info = (struct hclge_type_reg_err_info *)
2843 &buf[offset++];
2844 hclge_handle_error_type_reg_log(dev, mod_info,
2845 type_reg_info);
2846
2847 offset += type_reg_info->reg_num;
2848 }
2849 }
2850 }
2851
hclge_query_all_err_bd_num(struct hclge_dev * hdev,u32 * bd_num)2852 static int hclge_query_all_err_bd_num(struct hclge_dev *hdev, u32 *bd_num)
2853 {
2854 struct device *dev = &hdev->pdev->dev;
2855 struct hclge_desc desc_bd;
2856 int ret;
2857
2858 hclge_cmd_setup_basic_desc(&desc_bd, HCLGE_QUERY_ALL_ERR_BD_NUM, true);
2859 ret = hclge_cmd_send(&hdev->hw, &desc_bd, 1);
2860 if (ret) {
2861 dev_err(dev, "failed to query error bd_num, ret = %d.\n", ret);
2862 return ret;
2863 }
2864
2865 *bd_num = le32_to_cpu(desc_bd.data[0]);
2866 if (!(*bd_num)) {
2867 dev_err(dev, "The value of bd_num is 0!\n");
2868 return -EINVAL;
2869 }
2870
2871 return 0;
2872 }
2873
hclge_query_all_err_info(struct hclge_dev * hdev,struct hclge_desc * desc,u32 bd_num)2874 static int hclge_query_all_err_info(struct hclge_dev *hdev,
2875 struct hclge_desc *desc, u32 bd_num)
2876 {
2877 struct device *dev = &hdev->pdev->dev;
2878 int ret;
2879
2880 hclge_cmd_setup_basic_desc(desc, HCLGE_QUERY_ALL_ERR_INFO, true);
2881 ret = hclge_cmd_send(&hdev->hw, desc, bd_num);
2882 if (ret)
2883 dev_err(dev, "failed to query error info, ret = %d.\n", ret);
2884
2885 return ret;
2886 }
2887
hclge_handle_error_info_log(struct hnae3_ae_dev * ae_dev)2888 int hclge_handle_error_info_log(struct hnae3_ae_dev *ae_dev)
2889 {
2890 u32 bd_num, desc_len, buf_len, buf_size, i;
2891 struct hclge_dev *hdev = ae_dev->priv;
2892 struct hclge_desc *desc;
2893 __le32 *desc_data;
2894 u32 *buf;
2895 int ret;
2896
2897 ret = hclge_query_all_err_bd_num(hdev, &bd_num);
2898 if (ret)
2899 goto out;
2900
2901 desc_len = bd_num * sizeof(struct hclge_desc);
2902 desc = kzalloc(desc_len, GFP_KERNEL);
2903 if (!desc) {
2904 ret = -ENOMEM;
2905 goto out;
2906 }
2907
2908 ret = hclge_query_all_err_info(hdev, desc, bd_num);
2909 if (ret)
2910 goto err_desc;
2911
2912 buf_len = bd_num * sizeof(struct hclge_desc) - HCLGE_DESC_NO_DATA_LEN;
2913 buf_size = buf_len / sizeof(u32);
2914
2915 desc_data = kzalloc(buf_len, GFP_KERNEL);
2916 if (!desc_data) {
2917 ret = -ENOMEM;
2918 goto err_desc;
2919 }
2920
2921 buf = kzalloc(buf_len, GFP_KERNEL);
2922 if (!buf) {
2923 ret = -ENOMEM;
2924 goto err_buf_alloc;
2925 }
2926
2927 memcpy(desc_data, &desc[0].data[0], buf_len);
2928 for (i = 0; i < buf_size; i++)
2929 buf[i] = le32_to_cpu(desc_data[i]);
2930
2931 hclge_handle_error_module_log(ae_dev, buf, buf_size);
2932 kfree(buf);
2933
2934 err_buf_alloc:
2935 kfree(desc_data);
2936 err_desc:
2937 kfree(desc);
2938 out:
2939 return ret;
2940 }
2941