1 // SPDX-License-Identifier: GPL-2.0
2 // Copyright (c) 2019 Mellanox Technologies.
3 
4 #include "health.h"
5 #include "params.h"
6 #include "txrx.h"
7 #include "devlink.h"
8 #include "ptp.h"
9 #include "lib/tout.h"
10 
11 /* Keep this string array consistent with the MLX5E_RQ_STATE_* enums in en.h */
12 static const char * const rq_sw_state_type_name[] = {
13 	[MLX5E_RQ_STATE_ENABLED] = "enabled",
14 	[MLX5E_RQ_STATE_RECOVERING] = "recovering",
15 	[MLX5E_RQ_STATE_DIM] = "dim",
16 	[MLX5E_RQ_STATE_NO_CSUM_COMPLETE] = "no_csum_complete",
17 	[MLX5E_RQ_STATE_CSUM_FULL] = "csum_full",
18 	[MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX] = "mini_cqe_hw_stridx",
19 	[MLX5E_RQ_STATE_SHAMPO] = "shampo",
20 	[MLX5E_RQ_STATE_MINI_CQE_ENHANCED] = "mini_cqe_enhanced",
21 	[MLX5E_RQ_STATE_XSK] = "xsk",
22 };
23 
mlx5e_query_rq_state(struct mlx5_core_dev * dev,u32 rqn,u8 * state)24 static int mlx5e_query_rq_state(struct mlx5_core_dev *dev, u32 rqn, u8 *state)
25 {
26 	int outlen = MLX5_ST_SZ_BYTES(query_rq_out);
27 	void *out;
28 	void *rqc;
29 	int err;
30 
31 	out = kvzalloc(outlen, GFP_KERNEL);
32 	if (!out)
33 		return -ENOMEM;
34 
35 	err = mlx5_core_query_rq(dev, rqn, out);
36 	if (err)
37 		goto out;
38 
39 	rqc = MLX5_ADDR_OF(query_rq_out, out, rq_context);
40 	*state = MLX5_GET(rqc, rqc, state);
41 
42 out:
43 	kvfree(out);
44 	return err;
45 }
46 
mlx5e_wait_for_icosq_flush(struct mlx5e_icosq * icosq)47 static int mlx5e_wait_for_icosq_flush(struct mlx5e_icosq *icosq)
48 {
49 	struct mlx5_core_dev *dev = icosq->channel->mdev;
50 	unsigned long exp_time;
51 
52 	exp_time = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FLUSH_ON_ERROR));
53 
54 	while (time_before(jiffies, exp_time)) {
55 		if (icosq->cc == icosq->pc)
56 			return 0;
57 
58 		msleep(20);
59 	}
60 
61 	netdev_err(icosq->channel->netdev,
62 		   "Wait for ICOSQ 0x%x flush timeout (cc = 0x%x, pc = 0x%x)\n",
63 		   icosq->sqn, icosq->cc, icosq->pc);
64 
65 	return -ETIMEDOUT;
66 }
67 
mlx5e_reset_icosq_cc_pc(struct mlx5e_icosq * icosq)68 static void mlx5e_reset_icosq_cc_pc(struct mlx5e_icosq *icosq)
69 {
70 	WARN_ONCE(icosq->cc != icosq->pc, "ICOSQ 0x%x: cc (0x%x) != pc (0x%x)\n",
71 		  icosq->sqn, icosq->cc, icosq->pc);
72 	icosq->cc = 0;
73 	icosq->pc = 0;
74 }
75 
mlx5e_rx_reporter_err_icosq_cqe_recover(void * ctx)76 static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx)
77 {
78 	struct mlx5e_rq *xskrq = NULL;
79 	struct mlx5_core_dev *mdev;
80 	struct mlx5e_icosq *icosq;
81 	struct net_device *dev;
82 	struct mlx5e_rq *rq;
83 	u8 state;
84 	int err;
85 
86 	icosq = ctx;
87 
88 	mutex_lock(&icosq->channel->icosq_recovery_lock);
89 
90 	/* mlx5e_close_rq cancels this work before RQ and ICOSQ are killed. */
91 	rq = &icosq->channel->rq;
92 	if (test_bit(MLX5E_RQ_STATE_ENABLED, &icosq->channel->xskrq.state))
93 		xskrq = &icosq->channel->xskrq;
94 	mdev = icosq->channel->mdev;
95 	dev = icosq->channel->netdev;
96 	err = mlx5_core_query_sq_state(mdev, icosq->sqn, &state);
97 	if (err) {
98 		netdev_err(dev, "Failed to query ICOSQ 0x%x state. err = %d\n",
99 			   icosq->sqn, err);
100 		goto out;
101 	}
102 
103 	if (state != MLX5_SQC_STATE_ERR)
104 		goto out;
105 
106 	mlx5e_deactivate_rq(rq);
107 	if (xskrq)
108 		mlx5e_deactivate_rq(xskrq);
109 
110 	err = mlx5e_wait_for_icosq_flush(icosq);
111 	if (err)
112 		goto out;
113 
114 	mlx5e_deactivate_icosq(icosq);
115 
116 	/* At this point, both the rq and the icosq are disabled */
117 
118 	err = mlx5e_health_sq_to_ready(mdev, dev, icosq->sqn);
119 	if (err)
120 		goto out;
121 
122 	mlx5e_reset_icosq_cc_pc(icosq);
123 
124 	mlx5e_free_rx_missing_descs(rq);
125 	if (xskrq)
126 		mlx5e_free_rx_missing_descs(xskrq);
127 
128 	clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state);
129 	mlx5e_activate_icosq(icosq);
130 
131 	mlx5e_activate_rq(rq);
132 	rq->stats->recover++;
133 
134 	if (xskrq) {
135 		mlx5e_activate_rq(xskrq);
136 		xskrq->stats->recover++;
137 	}
138 
139 	mlx5e_trigger_napi_icosq(icosq->channel);
140 
141 	mutex_unlock(&icosq->channel->icosq_recovery_lock);
142 
143 	return 0;
144 out:
145 	clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state);
146 	mutex_unlock(&icosq->channel->icosq_recovery_lock);
147 	return err;
148 }
149 
mlx5e_rx_reporter_err_rq_cqe_recover(void * ctx)150 static int mlx5e_rx_reporter_err_rq_cqe_recover(void *ctx)
151 {
152 	struct mlx5e_rq *rq = ctx;
153 	int err;
154 
155 	mlx5e_deactivate_rq(rq);
156 	err = mlx5e_flush_rq(rq, MLX5_RQC_STATE_ERR);
157 	clear_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state);
158 	if (err)
159 		return err;
160 
161 	mlx5e_activate_rq(rq);
162 	rq->stats->recover++;
163 	if (rq->channel)
164 		mlx5e_trigger_napi_icosq(rq->channel);
165 	else
166 		mlx5e_trigger_napi_sched(rq->cq.napi);
167 	return 0;
168 }
169 
mlx5e_rx_reporter_timeout_recover(void * ctx)170 static int mlx5e_rx_reporter_timeout_recover(void *ctx)
171 {
172 	struct mlx5_eq_comp *eq;
173 	struct mlx5e_rq *rq;
174 	int err;
175 
176 	rq = ctx;
177 	eq = rq->cq.mcq.eq;
178 
179 	err = mlx5e_health_channel_eq_recover(rq->netdev, eq, rq->cq.ch_stats);
180 	if (err && rq->icosq)
181 		clear_bit(MLX5E_SQ_STATE_ENABLED, &rq->icosq->state);
182 
183 	return err;
184 }
185 
mlx5e_rx_reporter_recover_from_ctx(struct mlx5e_err_ctx * err_ctx)186 static int mlx5e_rx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx)
187 {
188 	return err_ctx->recover(err_ctx->ctx);
189 }
190 
mlx5e_rx_reporter_recover(struct devlink_health_reporter * reporter,void * context,struct netlink_ext_ack * extack)191 static int mlx5e_rx_reporter_recover(struct devlink_health_reporter *reporter,
192 				     void *context,
193 				     struct netlink_ext_ack *extack)
194 {
195 	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
196 	struct mlx5e_err_ctx *err_ctx = context;
197 
198 	return err_ctx ? mlx5e_rx_reporter_recover_from_ctx(err_ctx) :
199 			 mlx5e_health_recover_channels(priv);
200 }
201 
mlx5e_reporter_icosq_diagnose(struct mlx5e_icosq * icosq,u8 hw_state,struct devlink_fmsg * fmsg)202 static int mlx5e_reporter_icosq_diagnose(struct mlx5e_icosq *icosq, u8 hw_state,
203 					 struct devlink_fmsg *fmsg)
204 {
205 	int err;
206 
207 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "ICOSQ");
208 	if (err)
209 		return err;
210 
211 	err = devlink_fmsg_u32_pair_put(fmsg, "sqn", icosq->sqn);
212 	if (err)
213 		return err;
214 
215 	err = devlink_fmsg_u8_pair_put(fmsg, "HW state", hw_state);
216 	if (err)
217 		return err;
218 
219 	err = devlink_fmsg_u32_pair_put(fmsg, "cc", icosq->cc);
220 	if (err)
221 		return err;
222 
223 	err = devlink_fmsg_u32_pair_put(fmsg, "pc", icosq->pc);
224 	if (err)
225 		return err;
226 
227 	err = devlink_fmsg_u32_pair_put(fmsg, "WQE size",
228 					mlx5_wq_cyc_get_size(&icosq->wq));
229 	if (err)
230 		return err;
231 
232 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "CQ");
233 	if (err)
234 		return err;
235 
236 	err = devlink_fmsg_u32_pair_put(fmsg, "cqn", icosq->cq.mcq.cqn);
237 	if (err)
238 		return err;
239 
240 	err = devlink_fmsg_u32_pair_put(fmsg, "cc", icosq->cq.wq.cc);
241 	if (err)
242 		return err;
243 
244 	err = devlink_fmsg_u32_pair_put(fmsg, "size", mlx5_cqwq_get_size(&icosq->cq.wq));
245 	if (err)
246 		return err;
247 
248 	err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
249 	if (err)
250 		return err;
251 
252 	return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
253 }
254 
mlx5e_health_rq_put_sw_state(struct devlink_fmsg * fmsg,struct mlx5e_rq * rq)255 static int mlx5e_health_rq_put_sw_state(struct devlink_fmsg *fmsg, struct mlx5e_rq *rq)
256 {
257 	int err;
258 	int i;
259 
260 	BUILD_BUG_ON_MSG(ARRAY_SIZE(rq_sw_state_type_name) != MLX5E_NUM_RQ_STATES,
261 			 "rq_sw_state_type_name string array must be consistent with MLX5E_RQ_STATE_* enum in en.h");
262 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SW State");
263 	if (err)
264 		return err;
265 
266 	for (i = 0; i < ARRAY_SIZE(rq_sw_state_type_name); ++i) {
267 		err = devlink_fmsg_u32_pair_put(fmsg, rq_sw_state_type_name[i],
268 						test_bit(i, &rq->state));
269 		if (err)
270 			return err;
271 	}
272 
273 	return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
274 }
275 
276 static int
mlx5e_rx_reporter_build_diagnose_output_rq_common(struct mlx5e_rq * rq,struct devlink_fmsg * fmsg)277 mlx5e_rx_reporter_build_diagnose_output_rq_common(struct mlx5e_rq *rq,
278 						  struct devlink_fmsg *fmsg)
279 {
280 	u16 wqe_counter;
281 	int wqes_sz;
282 	u8 hw_state;
283 	u16 wq_head;
284 	int err;
285 
286 	err = mlx5e_query_rq_state(rq->mdev, rq->rqn, &hw_state);
287 	if (err)
288 		return err;
289 
290 	wqes_sz = mlx5e_rqwq_get_cur_sz(rq);
291 	wq_head = mlx5e_rqwq_get_head(rq);
292 	wqe_counter = mlx5e_rqwq_get_wqe_counter(rq);
293 
294 	err = devlink_fmsg_u32_pair_put(fmsg, "rqn", rq->rqn);
295 	if (err)
296 		return err;
297 
298 	err = devlink_fmsg_u8_pair_put(fmsg, "HW state", hw_state);
299 	if (err)
300 		return err;
301 
302 	err = devlink_fmsg_u32_pair_put(fmsg, "WQE counter", wqe_counter);
303 	if (err)
304 		return err;
305 
306 	err = devlink_fmsg_u32_pair_put(fmsg, "posted WQEs", wqes_sz);
307 	if (err)
308 		return err;
309 
310 	err = devlink_fmsg_u32_pair_put(fmsg, "cc", wq_head);
311 	if (err)
312 		return err;
313 
314 	err = mlx5e_health_rq_put_sw_state(fmsg, rq);
315 	if (err)
316 		return err;
317 
318 	err = mlx5e_health_cq_diag_fmsg(&rq->cq, fmsg);
319 	if (err)
320 		return err;
321 
322 	err = mlx5e_health_eq_diag_fmsg(rq->cq.mcq.eq, fmsg);
323 	if (err)
324 		return err;
325 
326 	if (rq->icosq) {
327 		struct mlx5e_icosq *icosq = rq->icosq;
328 		u8 icosq_hw_state;
329 
330 		err = mlx5_core_query_sq_state(rq->mdev, icosq->sqn, &icosq_hw_state);
331 		if (err)
332 			return err;
333 
334 		err = mlx5e_reporter_icosq_diagnose(icosq, icosq_hw_state, fmsg);
335 		if (err)
336 			return err;
337 	}
338 
339 	return 0;
340 }
341 
mlx5e_rx_reporter_build_diagnose_output(struct mlx5e_rq * rq,struct devlink_fmsg * fmsg)342 static int mlx5e_rx_reporter_build_diagnose_output(struct mlx5e_rq *rq,
343 						   struct devlink_fmsg *fmsg)
344 {
345 	int err;
346 
347 	err = devlink_fmsg_obj_nest_start(fmsg);
348 	if (err)
349 		return err;
350 
351 	err = devlink_fmsg_u32_pair_put(fmsg, "channel ix", rq->ix);
352 	if (err)
353 		return err;
354 
355 	err = mlx5e_rx_reporter_build_diagnose_output_rq_common(rq, fmsg);
356 	if (err)
357 		return err;
358 
359 	return devlink_fmsg_obj_nest_end(fmsg);
360 }
361 
mlx5e_rx_reporter_diagnose_generic_rq(struct mlx5e_rq * rq,struct devlink_fmsg * fmsg)362 static int mlx5e_rx_reporter_diagnose_generic_rq(struct mlx5e_rq *rq,
363 						 struct devlink_fmsg *fmsg)
364 {
365 	struct mlx5e_priv *priv = rq->priv;
366 	struct mlx5e_params *params;
367 	u32 rq_stride, rq_sz;
368 	bool real_time;
369 	int err;
370 
371 	params = &priv->channels.params;
372 	rq_sz = mlx5e_rqwq_get_size(rq);
373 	real_time =  mlx5_is_real_time_rq(priv->mdev);
374 	rq_stride = BIT(mlx5e_mpwqe_get_log_stride_size(priv->mdev, params, NULL));
375 
376 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "RQ");
377 	if (err)
378 		return err;
379 
380 	err = devlink_fmsg_u8_pair_put(fmsg, "type", params->rq_wq_type);
381 	if (err)
382 		return err;
383 
384 	err = devlink_fmsg_u64_pair_put(fmsg, "stride size", rq_stride);
385 	if (err)
386 		return err;
387 
388 	err = devlink_fmsg_u32_pair_put(fmsg, "size", rq_sz);
389 	if (err)
390 		return err;
391 
392 	err = devlink_fmsg_string_pair_put(fmsg, "ts_format", real_time ? "RT" : "FRC");
393 	if (err)
394 		return err;
395 
396 	err = mlx5e_health_cq_common_diag_fmsg(&rq->cq, fmsg);
397 	if (err)
398 		return err;
399 
400 	return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
401 }
402 
403 static int
mlx5e_rx_reporter_diagnose_common_ptp_config(struct mlx5e_priv * priv,struct mlx5e_ptp * ptp_ch,struct devlink_fmsg * fmsg)404 mlx5e_rx_reporter_diagnose_common_ptp_config(struct mlx5e_priv *priv, struct mlx5e_ptp *ptp_ch,
405 					     struct devlink_fmsg *fmsg)
406 {
407 	int err;
408 
409 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "PTP");
410 	if (err)
411 		return err;
412 
413 	err = devlink_fmsg_u32_pair_put(fmsg, "filter_type", priv->tstamp.rx_filter);
414 	if (err)
415 		return err;
416 
417 	err = mlx5e_rx_reporter_diagnose_generic_rq(&ptp_ch->rq, fmsg);
418 	if (err)
419 		return err;
420 
421 	return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
422 }
423 
424 static int
mlx5e_rx_reporter_diagnose_common_config(struct devlink_health_reporter * reporter,struct devlink_fmsg * fmsg)425 mlx5e_rx_reporter_diagnose_common_config(struct devlink_health_reporter *reporter,
426 					 struct devlink_fmsg *fmsg)
427 {
428 	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
429 	struct mlx5e_rq *generic_rq = &priv->channels.c[0]->rq;
430 	struct mlx5e_ptp *ptp_ch = priv->channels.ptp;
431 	int err;
432 
433 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Common config");
434 	if (err)
435 		return err;
436 
437 	err = mlx5e_rx_reporter_diagnose_generic_rq(generic_rq, fmsg);
438 	if (err)
439 		return err;
440 
441 	if (ptp_ch && test_bit(MLX5E_PTP_STATE_RX, ptp_ch->state)) {
442 		err = mlx5e_rx_reporter_diagnose_common_ptp_config(priv, ptp_ch, fmsg);
443 		if (err)
444 			return err;
445 	}
446 
447 	return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
448 }
449 
mlx5e_rx_reporter_build_diagnose_output_ptp_rq(struct mlx5e_rq * rq,struct devlink_fmsg * fmsg)450 static int mlx5e_rx_reporter_build_diagnose_output_ptp_rq(struct mlx5e_rq *rq,
451 							  struct devlink_fmsg *fmsg)
452 {
453 	int err;
454 
455 	err = devlink_fmsg_obj_nest_start(fmsg);
456 	if (err)
457 		return err;
458 
459 	err = devlink_fmsg_string_pair_put(fmsg, "channel", "ptp");
460 	if (err)
461 		return err;
462 
463 	err = mlx5e_rx_reporter_build_diagnose_output_rq_common(rq, fmsg);
464 	if (err)
465 		return err;
466 
467 	err = devlink_fmsg_obj_nest_end(fmsg);
468 	if (err)
469 		return err;
470 
471 	return 0;
472 }
473 
mlx5e_rx_reporter_diagnose(struct devlink_health_reporter * reporter,struct devlink_fmsg * fmsg,struct netlink_ext_ack * extack)474 static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter,
475 				      struct devlink_fmsg *fmsg,
476 				      struct netlink_ext_ack *extack)
477 {
478 	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
479 	struct mlx5e_ptp *ptp_ch = priv->channels.ptp;
480 	int i, err = 0;
481 
482 	mutex_lock(&priv->state_lock);
483 
484 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
485 		goto unlock;
486 
487 	err = mlx5e_rx_reporter_diagnose_common_config(reporter, fmsg);
488 	if (err)
489 		goto unlock;
490 
491 	err = devlink_fmsg_arr_pair_nest_start(fmsg, "RQs");
492 	if (err)
493 		goto unlock;
494 
495 	for (i = 0; i < priv->channels.num; i++) {
496 		struct mlx5e_channel *c = priv->channels.c[i];
497 		struct mlx5e_rq *rq;
498 
499 		rq = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state) ?
500 			&c->xskrq : &c->rq;
501 
502 		err = mlx5e_rx_reporter_build_diagnose_output(rq, fmsg);
503 		if (err)
504 			goto unlock;
505 	}
506 	if (ptp_ch && test_bit(MLX5E_PTP_STATE_RX, ptp_ch->state)) {
507 		err = mlx5e_rx_reporter_build_diagnose_output_ptp_rq(&ptp_ch->rq, fmsg);
508 		if (err)
509 			goto unlock;
510 	}
511 	err = devlink_fmsg_arr_pair_nest_end(fmsg);
512 unlock:
513 	mutex_unlock(&priv->state_lock);
514 	return err;
515 }
516 
mlx5e_rx_reporter_dump_icosq(struct mlx5e_priv * priv,struct devlink_fmsg * fmsg,void * ctx)517 static int mlx5e_rx_reporter_dump_icosq(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
518 					void *ctx)
519 {
520 	struct mlx5e_txqsq *icosq = ctx;
521 	struct mlx5_rsc_key key = {};
522 	int err;
523 
524 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
525 		return 0;
526 
527 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SX Slice");
528 	if (err)
529 		return err;
530 
531 	key.size = PAGE_SIZE;
532 	key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL;
533 	err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
534 	if (err)
535 		return err;
536 
537 	err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
538 	if (err)
539 		return err;
540 
541 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "ICOSQ");
542 	if (err)
543 		return err;
544 
545 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "QPC");
546 	if (err)
547 		return err;
548 
549 	key.rsc = MLX5_SGMT_TYPE_FULL_QPC;
550 	key.index1 = icosq->sqn;
551 	key.num_of_obj1 = 1;
552 
553 	err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
554 	if (err)
555 		return err;
556 
557 	err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
558 	if (err)
559 		return err;
560 
561 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "send_buff");
562 	if (err)
563 		return err;
564 
565 	key.rsc = MLX5_SGMT_TYPE_SND_BUFF;
566 	key.num_of_obj2 = MLX5_RSC_DUMP_ALL;
567 
568 	err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
569 	if (err)
570 		return err;
571 
572 	err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
573 	if (err)
574 		return err;
575 
576 	return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
577 }
578 
mlx5e_rx_reporter_dump_rq(struct mlx5e_priv * priv,struct devlink_fmsg * fmsg,void * ctx)579 static int mlx5e_rx_reporter_dump_rq(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
580 				     void *ctx)
581 {
582 	struct mlx5_rsc_key key = {};
583 	struct mlx5e_rq *rq = ctx;
584 	int err;
585 
586 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
587 		return 0;
588 
589 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "RX Slice");
590 	if (err)
591 		return err;
592 
593 	key.size = PAGE_SIZE;
594 	key.rsc = MLX5_SGMT_TYPE_RX_SLICE_ALL;
595 	err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
596 	if (err)
597 		return err;
598 
599 	err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
600 	if (err)
601 		return err;
602 
603 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "RQ");
604 	if (err)
605 		return err;
606 
607 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "QPC");
608 	if (err)
609 		return err;
610 
611 	key.rsc = MLX5_SGMT_TYPE_FULL_QPC;
612 	key.index1 = rq->rqn;
613 	key.num_of_obj1 = 1;
614 
615 	err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
616 	if (err)
617 		return err;
618 
619 	err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
620 	if (err)
621 		return err;
622 
623 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "receive_buff");
624 	if (err)
625 		return err;
626 
627 	key.rsc = MLX5_SGMT_TYPE_RCV_BUFF;
628 	key.num_of_obj2 = MLX5_RSC_DUMP_ALL;
629 	err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
630 	if (err)
631 		return err;
632 
633 	err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
634 	if (err)
635 		return err;
636 
637 	return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
638 }
639 
mlx5e_rx_reporter_dump_all_rqs(struct mlx5e_priv * priv,struct devlink_fmsg * fmsg)640 static int mlx5e_rx_reporter_dump_all_rqs(struct mlx5e_priv *priv,
641 					  struct devlink_fmsg *fmsg)
642 {
643 	struct mlx5e_ptp *ptp_ch = priv->channels.ptp;
644 	struct mlx5_rsc_key key = {};
645 	int i, err;
646 
647 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
648 		return 0;
649 
650 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "RX Slice");
651 	if (err)
652 		return err;
653 
654 	key.size = PAGE_SIZE;
655 	key.rsc = MLX5_SGMT_TYPE_RX_SLICE_ALL;
656 	err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
657 	if (err)
658 		return err;
659 
660 	err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
661 	if (err)
662 		return err;
663 
664 	err = devlink_fmsg_arr_pair_nest_start(fmsg, "RQs");
665 	if (err)
666 		return err;
667 
668 	for (i = 0; i < priv->channels.num; i++) {
669 		struct mlx5e_rq *rq = &priv->channels.c[i]->rq;
670 
671 		err = mlx5e_health_queue_dump(priv, fmsg, rq->rqn, "RQ");
672 		if (err)
673 			return err;
674 	}
675 
676 	if (ptp_ch && test_bit(MLX5E_PTP_STATE_RX, ptp_ch->state)) {
677 		err = mlx5e_health_queue_dump(priv, fmsg, ptp_ch->rq.rqn, "PTP RQ");
678 		if (err)
679 			return err;
680 	}
681 
682 	return devlink_fmsg_arr_pair_nest_end(fmsg);
683 }
684 
mlx5e_rx_reporter_dump_from_ctx(struct mlx5e_priv * priv,struct mlx5e_err_ctx * err_ctx,struct devlink_fmsg * fmsg)685 static int mlx5e_rx_reporter_dump_from_ctx(struct mlx5e_priv *priv,
686 					   struct mlx5e_err_ctx *err_ctx,
687 					   struct devlink_fmsg *fmsg)
688 {
689 	return err_ctx->dump(priv, fmsg, err_ctx->ctx);
690 }
691 
mlx5e_rx_reporter_dump(struct devlink_health_reporter * reporter,struct devlink_fmsg * fmsg,void * context,struct netlink_ext_ack * extack)692 static int mlx5e_rx_reporter_dump(struct devlink_health_reporter *reporter,
693 				  struct devlink_fmsg *fmsg, void *context,
694 				  struct netlink_ext_ack *extack)
695 {
696 	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
697 	struct mlx5e_err_ctx *err_ctx = context;
698 
699 	return err_ctx ? mlx5e_rx_reporter_dump_from_ctx(priv, err_ctx, fmsg) :
700 			 mlx5e_rx_reporter_dump_all_rqs(priv, fmsg);
701 }
702 
mlx5e_reporter_rx_timeout(struct mlx5e_rq * rq)703 void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq)
704 {
705 	char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
706 	struct mlx5e_icosq *icosq = rq->icosq;
707 	struct mlx5e_priv *priv = rq->priv;
708 	struct mlx5e_err_ctx err_ctx = {};
709 	char icosq_str[32] = {};
710 
711 	err_ctx.ctx = rq;
712 	err_ctx.recover = mlx5e_rx_reporter_timeout_recover;
713 	err_ctx.dump = mlx5e_rx_reporter_dump_rq;
714 
715 	if (icosq)
716 		snprintf(icosq_str, sizeof(icosq_str), "ICOSQ: 0x%x, ", icosq->sqn);
717 	snprintf(err_str, sizeof(err_str),
718 		 "RX timeout on channel: %d, %s RQ: 0x%x, CQ: 0x%x",
719 		 rq->ix, icosq_str, rq->rqn, rq->cq.mcq.cqn);
720 
721 	mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx);
722 }
723 
mlx5e_reporter_rq_cqe_err(struct mlx5e_rq * rq)724 void mlx5e_reporter_rq_cqe_err(struct mlx5e_rq *rq)
725 {
726 	char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
727 	struct mlx5e_priv *priv = rq->priv;
728 	struct mlx5e_err_ctx err_ctx = {};
729 
730 	err_ctx.ctx = rq;
731 	err_ctx.recover = mlx5e_rx_reporter_err_rq_cqe_recover;
732 	err_ctx.dump = mlx5e_rx_reporter_dump_rq;
733 	snprintf(err_str, sizeof(err_str), "ERR CQE on RQ: 0x%x", rq->rqn);
734 
735 	mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx);
736 }
737 
mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq * icosq)738 void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq)
739 {
740 	struct mlx5e_priv *priv = icosq->channel->priv;
741 	char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
742 	struct mlx5e_err_ctx err_ctx = {};
743 
744 	err_ctx.ctx = icosq;
745 	err_ctx.recover = mlx5e_rx_reporter_err_icosq_cqe_recover;
746 	err_ctx.dump = mlx5e_rx_reporter_dump_icosq;
747 	snprintf(err_str, sizeof(err_str), "ERR CQE on ICOSQ: 0x%x", icosq->sqn);
748 
749 	mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx);
750 }
751 
mlx5e_reporter_icosq_suspend_recovery(struct mlx5e_channel * c)752 void mlx5e_reporter_icosq_suspend_recovery(struct mlx5e_channel *c)
753 {
754 	mutex_lock(&c->icosq_recovery_lock);
755 }
756 
mlx5e_reporter_icosq_resume_recovery(struct mlx5e_channel * c)757 void mlx5e_reporter_icosq_resume_recovery(struct mlx5e_channel *c)
758 {
759 	mutex_unlock(&c->icosq_recovery_lock);
760 }
761 
762 static const struct devlink_health_reporter_ops mlx5_rx_reporter_ops = {
763 	.name = "rx",
764 	.recover = mlx5e_rx_reporter_recover,
765 	.diagnose = mlx5e_rx_reporter_diagnose,
766 	.dump = mlx5e_rx_reporter_dump,
767 };
768 
769 #define MLX5E_REPORTER_RX_GRACEFUL_PERIOD 500
770 
mlx5e_reporter_rx_create(struct mlx5e_priv * priv)771 void mlx5e_reporter_rx_create(struct mlx5e_priv *priv)
772 {
773 	struct devlink_health_reporter *reporter;
774 
775 	reporter = devlink_port_health_reporter_create(priv->netdev->devlink_port,
776 						       &mlx5_rx_reporter_ops,
777 						       MLX5E_REPORTER_RX_GRACEFUL_PERIOD, priv);
778 	if (IS_ERR(reporter)) {
779 		netdev_warn(priv->netdev, "Failed to create rx reporter, err = %ld\n",
780 			    PTR_ERR(reporter));
781 		return;
782 	}
783 	priv->rx_reporter = reporter;
784 }
785 
mlx5e_reporter_rx_destroy(struct mlx5e_priv * priv)786 void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv)
787 {
788 	if (!priv->rx_reporter)
789 		return;
790 
791 	devlink_health_reporter_destroy(priv->rx_reporter);
792 	priv->rx_reporter = NULL;
793 }
794