1 // SPDX-License-Identifier: GPL-2.0
2 // Copyright (c) 2019 Mellanox Technologies.
3
4 #include "health.h"
5 #include "lib/eq.h"
6 #include "lib/mlx5.h"
7
mlx5e_health_fmsg_named_obj_nest_start(struct devlink_fmsg * fmsg,char * name)8 int mlx5e_health_fmsg_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name)
9 {
10 int err;
11
12 err = devlink_fmsg_pair_nest_start(fmsg, name);
13 if (err)
14 return err;
15
16 err = devlink_fmsg_obj_nest_start(fmsg);
17 if (err)
18 return err;
19
20 return 0;
21 }
22
mlx5e_health_fmsg_named_obj_nest_end(struct devlink_fmsg * fmsg)23 int mlx5e_health_fmsg_named_obj_nest_end(struct devlink_fmsg *fmsg)
24 {
25 int err;
26
27 err = devlink_fmsg_obj_nest_end(fmsg);
28 if (err)
29 return err;
30
31 err = devlink_fmsg_pair_nest_end(fmsg);
32 if (err)
33 return err;
34
35 return 0;
36 }
37
mlx5e_health_cq_diag_fmsg(struct mlx5e_cq * cq,struct devlink_fmsg * fmsg)38 int mlx5e_health_cq_diag_fmsg(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg)
39 {
40 u32 out[MLX5_ST_SZ_DW(query_cq_out)] = {};
41 u8 hw_status;
42 void *cqc;
43 int err;
44
45 err = mlx5_core_query_cq(cq->mdev, &cq->mcq, out);
46 if (err)
47 return err;
48
49 cqc = MLX5_ADDR_OF(query_cq_out, out, cq_context);
50 hw_status = MLX5_GET(cqc, cqc, status);
51
52 err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "CQ");
53 if (err)
54 return err;
55
56 err = devlink_fmsg_u32_pair_put(fmsg, "cqn", cq->mcq.cqn);
57 if (err)
58 return err;
59
60 err = devlink_fmsg_u8_pair_put(fmsg, "HW status", hw_status);
61 if (err)
62 return err;
63
64 err = devlink_fmsg_u32_pair_put(fmsg, "ci", mlx5_cqwq_get_ci(&cq->wq));
65 if (err)
66 return err;
67
68 err = devlink_fmsg_u32_pair_put(fmsg, "size", mlx5_cqwq_get_size(&cq->wq));
69 if (err)
70 return err;
71
72 err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
73 if (err)
74 return err;
75
76 return 0;
77 }
78
mlx5e_health_cq_common_diag_fmsg(struct mlx5e_cq * cq,struct devlink_fmsg * fmsg)79 int mlx5e_health_cq_common_diag_fmsg(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg)
80 {
81 u8 cq_log_stride;
82 u32 cq_sz;
83 int err;
84
85 cq_sz = mlx5_cqwq_get_size(&cq->wq);
86 cq_log_stride = mlx5_cqwq_get_log_stride_size(&cq->wq);
87
88 err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "CQ");
89 if (err)
90 return err;
91
92 err = devlink_fmsg_u64_pair_put(fmsg, "stride size", BIT(cq_log_stride));
93 if (err)
94 return err;
95
96 err = devlink_fmsg_u32_pair_put(fmsg, "size", cq_sz);
97 if (err)
98 return err;
99
100 err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
101 if (err)
102 return err;
103
104 return 0;
105 }
106
mlx5e_health_eq_diag_fmsg(struct mlx5_eq_comp * eq,struct devlink_fmsg * fmsg)107 int mlx5e_health_eq_diag_fmsg(struct mlx5_eq_comp *eq, struct devlink_fmsg *fmsg)
108 {
109 int err;
110
111 err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "EQ");
112 if (err)
113 return err;
114
115 err = devlink_fmsg_u8_pair_put(fmsg, "eqn", eq->core.eqn);
116 if (err)
117 return err;
118
119 err = devlink_fmsg_u32_pair_put(fmsg, "irqn", eq->core.irqn);
120 if (err)
121 return err;
122
123 err = devlink_fmsg_u32_pair_put(fmsg, "vecidx", eq->core.vecidx);
124 if (err)
125 return err;
126
127 err = devlink_fmsg_u32_pair_put(fmsg, "ci", eq->core.cons_index);
128 if (err)
129 return err;
130
131 err = devlink_fmsg_u32_pair_put(fmsg, "size", eq_get_size(&eq->core));
132 if (err)
133 return err;
134
135 return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
136 }
137
mlx5e_health_create_reporters(struct mlx5e_priv * priv)138 void mlx5e_health_create_reporters(struct mlx5e_priv *priv)
139 {
140 mlx5e_reporter_tx_create(priv);
141 mlx5e_reporter_rx_create(priv);
142 }
143
mlx5e_health_destroy_reporters(struct mlx5e_priv * priv)144 void mlx5e_health_destroy_reporters(struct mlx5e_priv *priv)
145 {
146 mlx5e_reporter_rx_destroy(priv);
147 mlx5e_reporter_tx_destroy(priv);
148 }
149
mlx5e_health_channels_update(struct mlx5e_priv * priv)150 void mlx5e_health_channels_update(struct mlx5e_priv *priv)
151 {
152 if (priv->tx_reporter)
153 devlink_health_reporter_state_update(priv->tx_reporter,
154 DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
155 if (priv->rx_reporter)
156 devlink_health_reporter_state_update(priv->rx_reporter,
157 DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
158 }
159
mlx5e_health_sq_to_ready(struct mlx5_core_dev * mdev,struct net_device * dev,u32 sqn)160 int mlx5e_health_sq_to_ready(struct mlx5_core_dev *mdev, struct net_device *dev, u32 sqn)
161 {
162 struct mlx5e_modify_sq_param msp = {};
163 int err;
164
165 msp.curr_state = MLX5_SQC_STATE_ERR;
166 msp.next_state = MLX5_SQC_STATE_RST;
167
168 err = mlx5e_modify_sq(mdev, sqn, &msp);
169 if (err) {
170 netdev_err(dev, "Failed to move sq 0x%x to reset\n", sqn);
171 return err;
172 }
173
174 memset(&msp, 0, sizeof(msp));
175 msp.curr_state = MLX5_SQC_STATE_RST;
176 msp.next_state = MLX5_SQC_STATE_RDY;
177
178 err = mlx5e_modify_sq(mdev, sqn, &msp);
179 if (err) {
180 netdev_err(dev, "Failed to move sq 0x%x to ready\n", sqn);
181 return err;
182 }
183
184 return 0;
185 }
186
mlx5e_health_recover_channels(struct mlx5e_priv * priv)187 int mlx5e_health_recover_channels(struct mlx5e_priv *priv)
188 {
189 int err = 0;
190
191 rtnl_lock();
192 mutex_lock(&priv->state_lock);
193
194 if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
195 goto out;
196
197 err = mlx5e_safe_reopen_channels(priv);
198
199 out:
200 mutex_unlock(&priv->state_lock);
201 rtnl_unlock();
202
203 return err;
204 }
205
mlx5e_health_channel_eq_recover(struct net_device * dev,struct mlx5_eq_comp * eq,struct mlx5e_ch_stats * stats)206 int mlx5e_health_channel_eq_recover(struct net_device *dev, struct mlx5_eq_comp *eq,
207 struct mlx5e_ch_stats *stats)
208 {
209 u32 eqe_count;
210
211 netdev_err(dev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n",
212 eq->core.eqn, eq->core.cons_index, eq->core.irqn);
213
214 eqe_count = mlx5_eq_poll_irq_disabled(eq);
215 if (!eqe_count)
216 return -EIO;
217
218 netdev_err(dev, "Recovered %d eqes on EQ 0x%x\n",
219 eqe_count, eq->core.eqn);
220
221 stats->eq_rearm++;
222 return 0;
223 }
224
mlx5e_health_report(struct mlx5e_priv * priv,struct devlink_health_reporter * reporter,char * err_str,struct mlx5e_err_ctx * err_ctx)225 int mlx5e_health_report(struct mlx5e_priv *priv,
226 struct devlink_health_reporter *reporter, char *err_str,
227 struct mlx5e_err_ctx *err_ctx)
228 {
229 netdev_err(priv->netdev, "%s\n", err_str);
230
231 if (!reporter)
232 return err_ctx->recover(err_ctx->ctx);
233
234 return devlink_health_report(reporter, err_str, err_ctx);
235 }
236
237 #define MLX5_HEALTH_DEVLINK_MAX_SIZE 1024
mlx5e_health_rsc_fmsg_binary(struct devlink_fmsg * fmsg,const void * value,u32 value_len)238 static int mlx5e_health_rsc_fmsg_binary(struct devlink_fmsg *fmsg,
239 const void *value, u32 value_len)
240
241 {
242 u32 data_size;
243 int err = 0;
244 u32 offset;
245
246 for (offset = 0; offset < value_len; offset += data_size) {
247 data_size = value_len - offset;
248 if (data_size > MLX5_HEALTH_DEVLINK_MAX_SIZE)
249 data_size = MLX5_HEALTH_DEVLINK_MAX_SIZE;
250 err = devlink_fmsg_binary_put(fmsg, value + offset, data_size);
251 if (err)
252 break;
253 }
254 return err;
255 }
256
mlx5e_health_rsc_fmsg_dump(struct mlx5e_priv * priv,struct mlx5_rsc_key * key,struct devlink_fmsg * fmsg)257 int mlx5e_health_rsc_fmsg_dump(struct mlx5e_priv *priv, struct mlx5_rsc_key *key,
258 struct devlink_fmsg *fmsg)
259 {
260 struct mlx5_core_dev *mdev = priv->mdev;
261 struct mlx5_rsc_dump_cmd *cmd;
262 struct page *page;
263 int cmd_err, err;
264 int end_err;
265 int size;
266
267 if (IS_ERR_OR_NULL(mdev->rsc_dump))
268 return -EOPNOTSUPP;
269
270 page = alloc_page(GFP_KERNEL);
271 if (!page)
272 return -ENOMEM;
273
274 err = devlink_fmsg_binary_pair_nest_start(fmsg, "data");
275 if (err)
276 goto free_page;
277
278 cmd = mlx5_rsc_dump_cmd_create(mdev, key);
279 if (IS_ERR(cmd)) {
280 err = PTR_ERR(cmd);
281 goto free_page;
282 }
283
284 do {
285 cmd_err = mlx5_rsc_dump_next(mdev, cmd, page, &size);
286 if (cmd_err < 0) {
287 err = cmd_err;
288 goto destroy_cmd;
289 }
290
291 err = mlx5e_health_rsc_fmsg_binary(fmsg, page_address(page), size);
292 if (err)
293 goto destroy_cmd;
294
295 } while (cmd_err > 0);
296
297 destroy_cmd:
298 mlx5_rsc_dump_cmd_destroy(cmd);
299 end_err = devlink_fmsg_binary_pair_nest_end(fmsg);
300 if (end_err)
301 err = end_err;
302 free_page:
303 __free_page(page);
304 return err;
305 }
306
mlx5e_health_queue_dump(struct mlx5e_priv * priv,struct devlink_fmsg * fmsg,int queue_idx,char * lbl)307 int mlx5e_health_queue_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
308 int queue_idx, char *lbl)
309 {
310 struct mlx5_rsc_key key = {};
311 int err;
312
313 key.rsc = MLX5_SGMT_TYPE_FULL_QPC;
314 key.index1 = queue_idx;
315 key.size = PAGE_SIZE;
316 key.num_of_obj1 = 1;
317
318 err = devlink_fmsg_obj_nest_start(fmsg);
319 if (err)
320 return err;
321
322 err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, lbl);
323 if (err)
324 return err;
325
326 err = devlink_fmsg_u32_pair_put(fmsg, "index", queue_idx);
327 if (err)
328 return err;
329
330 err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
331 if (err)
332 return err;
333
334 err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
335 if (err)
336 return err;
337
338 return devlink_fmsg_obj_nest_end(fmsg);
339 }
340