1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2020 Mellanox Technologies Ltd */
3
4 #include <linux/mlx5/driver.h>
5 #include <linux/mlx5/device.h>
6 #include "mlx5_core.h"
7 #include "dev.h"
8 #include "sf/vhca_event.h"
9 #include "sf/sf.h"
10 #include "sf/mlx5_ifc_vhca_event.h"
11 #include "ecpf.h"
12 #define CREATE_TRACE_POINTS
13 #include "diag/dev_tracepoint.h"
14
15 struct mlx5_sf_dev_table {
16 struct xarray devices;
17 unsigned int max_sfs;
18 phys_addr_t base_address;
19 u64 sf_bar_length;
20 struct notifier_block nb;
21 struct mutex table_lock; /* Serializes sf life cycle and vhca state change handler */
22 struct workqueue_struct *active_wq;
23 struct work_struct work;
24 u8 stop_active_wq:1;
25 struct mlx5_core_dev *dev;
26 };
27
mlx5_sf_dev_supported(const struct mlx5_core_dev * dev)28 static bool mlx5_sf_dev_supported(const struct mlx5_core_dev *dev)
29 {
30 return MLX5_CAP_GEN(dev, sf) && mlx5_vhca_event_supported(dev);
31 }
32
mlx5_sf_dev_allocated(const struct mlx5_core_dev * dev)33 bool mlx5_sf_dev_allocated(const struct mlx5_core_dev *dev)
34 {
35 struct mlx5_sf_dev_table *table = dev->priv.sf_dev_table;
36
37 return table && !xa_empty(&table->devices);
38 }
39
sfnum_show(struct device * dev,struct device_attribute * attr,char * buf)40 static ssize_t sfnum_show(struct device *dev, struct device_attribute *attr, char *buf)
41 {
42 struct auxiliary_device *adev = container_of(dev, struct auxiliary_device, dev);
43 struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev);
44
45 return sysfs_emit(buf, "%u\n", sf_dev->sfnum);
46 }
47 static DEVICE_ATTR_RO(sfnum);
48
49 static struct attribute *sf_device_attrs[] = {
50 &dev_attr_sfnum.attr,
51 NULL,
52 };
53
54 static const struct attribute_group sf_attr_group = {
55 .attrs = sf_device_attrs,
56 };
57
58 static const struct attribute_group *sf_attr_groups[2] = {
59 &sf_attr_group,
60 NULL
61 };
62
mlx5_sf_dev_release(struct device * device)63 static void mlx5_sf_dev_release(struct device *device)
64 {
65 struct auxiliary_device *adev = container_of(device, struct auxiliary_device, dev);
66 struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev);
67
68 mlx5_adev_idx_free(adev->id);
69 kfree(sf_dev);
70 }
71
mlx5_sf_dev_remove(struct mlx5_core_dev * dev,struct mlx5_sf_dev * sf_dev)72 static void mlx5_sf_dev_remove(struct mlx5_core_dev *dev, struct mlx5_sf_dev *sf_dev)
73 {
74 int id;
75
76 id = sf_dev->adev.id;
77 trace_mlx5_sf_dev_del(dev, sf_dev, id);
78
79 auxiliary_device_delete(&sf_dev->adev);
80 auxiliary_device_uninit(&sf_dev->adev);
81 }
82
mlx5_sf_dev_add(struct mlx5_core_dev * dev,u16 sf_index,u16 fn_id,u32 sfnum)83 static void mlx5_sf_dev_add(struct mlx5_core_dev *dev, u16 sf_index, u16 fn_id, u32 sfnum)
84 {
85 struct mlx5_sf_dev_table *table = dev->priv.sf_dev_table;
86 struct mlx5_sf_dev *sf_dev;
87 struct pci_dev *pdev;
88 int err;
89 int id;
90
91 id = mlx5_adev_idx_alloc();
92 if (id < 0) {
93 err = id;
94 goto add_err;
95 }
96
97 sf_dev = kzalloc(sizeof(*sf_dev), GFP_KERNEL);
98 if (!sf_dev) {
99 mlx5_adev_idx_free(id);
100 err = -ENOMEM;
101 goto add_err;
102 }
103 pdev = dev->pdev;
104 sf_dev->adev.id = id;
105 sf_dev->adev.name = MLX5_SF_DEV_ID_NAME;
106 sf_dev->adev.dev.release = mlx5_sf_dev_release;
107 sf_dev->adev.dev.parent = &pdev->dev;
108 sf_dev->adev.dev.groups = sf_attr_groups;
109 sf_dev->sfnum = sfnum;
110 sf_dev->parent_mdev = dev;
111 sf_dev->fn_id = fn_id;
112
113 if (!table->max_sfs) {
114 mlx5_adev_idx_free(id);
115 kfree(sf_dev);
116 err = -EOPNOTSUPP;
117 goto add_err;
118 }
119 sf_dev->bar_base_addr = table->base_address + (sf_index * table->sf_bar_length);
120
121 trace_mlx5_sf_dev_add(dev, sf_dev, id);
122
123 err = auxiliary_device_init(&sf_dev->adev);
124 if (err) {
125 mlx5_adev_idx_free(id);
126 kfree(sf_dev);
127 goto add_err;
128 }
129
130 err = auxiliary_device_add(&sf_dev->adev);
131 if (err) {
132 auxiliary_device_uninit(&sf_dev->adev);
133 goto add_err;
134 }
135
136 err = xa_insert(&table->devices, sf_index, sf_dev, GFP_KERNEL);
137 if (err)
138 goto xa_err;
139 return;
140
141 xa_err:
142 mlx5_sf_dev_remove(dev, sf_dev);
143 add_err:
144 mlx5_core_err(dev, "SF DEV: fail device add for index=%d sfnum=%d err=%d\n",
145 sf_index, sfnum, err);
146 }
147
mlx5_sf_dev_del(struct mlx5_core_dev * dev,struct mlx5_sf_dev * sf_dev,u16 sf_index)148 static void mlx5_sf_dev_del(struct mlx5_core_dev *dev, struct mlx5_sf_dev *sf_dev, u16 sf_index)
149 {
150 struct mlx5_sf_dev_table *table = dev->priv.sf_dev_table;
151
152 xa_erase(&table->devices, sf_index);
153 mlx5_sf_dev_remove(dev, sf_dev);
154 }
155
156 static int
mlx5_sf_dev_state_change_handler(struct notifier_block * nb,unsigned long event_code,void * data)157 mlx5_sf_dev_state_change_handler(struct notifier_block *nb, unsigned long event_code, void *data)
158 {
159 struct mlx5_sf_dev_table *table = container_of(nb, struct mlx5_sf_dev_table, nb);
160 const struct mlx5_vhca_state_event *event = data;
161 struct mlx5_sf_dev *sf_dev;
162 u16 max_functions;
163 u16 sf_index;
164 u16 base_id;
165
166 max_functions = mlx5_sf_max_functions(table->dev);
167 if (!max_functions)
168 return 0;
169
170 base_id = mlx5_sf_start_function_id(table->dev);
171 if (event->function_id < base_id || event->function_id >= (base_id + max_functions))
172 return 0;
173
174 sf_index = event->function_id - base_id;
175 mutex_lock(&table->table_lock);
176 sf_dev = xa_load(&table->devices, sf_index);
177 switch (event->new_vhca_state) {
178 case MLX5_VHCA_STATE_INVALID:
179 case MLX5_VHCA_STATE_ALLOCATED:
180 if (sf_dev)
181 mlx5_sf_dev_del(table->dev, sf_dev, sf_index);
182 break;
183 case MLX5_VHCA_STATE_TEARDOWN_REQUEST:
184 if (sf_dev)
185 mlx5_sf_dev_del(table->dev, sf_dev, sf_index);
186 else
187 mlx5_core_err(table->dev,
188 "SF DEV: teardown state for invalid dev index=%d sfnum=0x%x\n",
189 sf_index, event->sw_function_id);
190 break;
191 case MLX5_VHCA_STATE_ACTIVE:
192 if (!sf_dev)
193 mlx5_sf_dev_add(table->dev, sf_index, event->function_id,
194 event->sw_function_id);
195 break;
196 default:
197 break;
198 }
199 mutex_unlock(&table->table_lock);
200 return 0;
201 }
202
mlx5_sf_dev_vhca_arm_all(struct mlx5_sf_dev_table * table)203 static int mlx5_sf_dev_vhca_arm_all(struct mlx5_sf_dev_table *table)
204 {
205 struct mlx5_core_dev *dev = table->dev;
206 u16 max_functions;
207 u16 function_id;
208 int err = 0;
209 int i;
210
211 max_functions = mlx5_sf_max_functions(dev);
212 function_id = mlx5_sf_start_function_id(dev);
213 /* Arm the vhca context as the vhca event notifier */
214 for (i = 0; i < max_functions; i++) {
215 err = mlx5_vhca_event_arm(dev, function_id);
216 if (err)
217 return err;
218
219 function_id++;
220 }
221 return 0;
222 }
223
mlx5_sf_dev_add_active_work(struct work_struct * work)224 static void mlx5_sf_dev_add_active_work(struct work_struct *work)
225 {
226 struct mlx5_sf_dev_table *table = container_of(work, struct mlx5_sf_dev_table, work);
227 u32 out[MLX5_ST_SZ_DW(query_vhca_state_out)] = {};
228 struct mlx5_core_dev *dev = table->dev;
229 u16 max_functions;
230 u16 function_id;
231 u16 sw_func_id;
232 int err = 0;
233 u8 state;
234 int i;
235
236 max_functions = mlx5_sf_max_functions(dev);
237 function_id = mlx5_sf_start_function_id(dev);
238 for (i = 0; i < max_functions; i++, function_id++) {
239 if (table->stop_active_wq)
240 return;
241 err = mlx5_cmd_query_vhca_state(dev, function_id, out, sizeof(out));
242 if (err)
243 /* A failure of specific vhca doesn't mean others will
244 * fail as well.
245 */
246 continue;
247 state = MLX5_GET(query_vhca_state_out, out, vhca_state_context.vhca_state);
248 if (state != MLX5_VHCA_STATE_ACTIVE)
249 continue;
250
251 sw_func_id = MLX5_GET(query_vhca_state_out, out, vhca_state_context.sw_function_id);
252 mutex_lock(&table->table_lock);
253 /* Don't probe device which is already probe */
254 if (!xa_load(&table->devices, i))
255 mlx5_sf_dev_add(dev, i, function_id, sw_func_id);
256 /* There is a race where SF got inactive after the query
257 * above. e.g.: the query returns that the state of the
258 * SF is active, and after that the eswitch manager set it to
259 * inactive.
260 * This case cannot be managed in SW, since the probing of the
261 * SF is on one system, and the inactivation is on a different
262 * system.
263 * If the inactive is done after the SF perform init_hca(),
264 * the SF will fully probe and then removed. If it was
265 * done before init_hca(), the SF probe will fail.
266 */
267 mutex_unlock(&table->table_lock);
268 }
269 }
270
271 /* In case SFs are generated externally, probe active SFs */
mlx5_sf_dev_queue_active_work(struct mlx5_sf_dev_table * table)272 static int mlx5_sf_dev_queue_active_work(struct mlx5_sf_dev_table *table)
273 {
274 if (MLX5_CAP_GEN(table->dev, eswitch_manager))
275 return 0; /* the table is local */
276
277 /* Use a workqueue to probe active SFs, which are in large
278 * quantity and may take up to minutes to probe.
279 */
280 table->active_wq = create_singlethread_workqueue("mlx5_active_sf");
281 if (!table->active_wq)
282 return -ENOMEM;
283 INIT_WORK(&table->work, &mlx5_sf_dev_add_active_work);
284 queue_work(table->active_wq, &table->work);
285 return 0;
286 }
287
mlx5_sf_dev_destroy_active_work(struct mlx5_sf_dev_table * table)288 static void mlx5_sf_dev_destroy_active_work(struct mlx5_sf_dev_table *table)
289 {
290 if (table->active_wq) {
291 table->stop_active_wq = true;
292 destroy_workqueue(table->active_wq);
293 }
294 }
295
mlx5_sf_dev_table_create(struct mlx5_core_dev * dev)296 void mlx5_sf_dev_table_create(struct mlx5_core_dev *dev)
297 {
298 struct mlx5_sf_dev_table *table;
299 unsigned int max_sfs;
300 int err;
301
302 if (!mlx5_sf_dev_supported(dev))
303 return;
304
305 table = kzalloc(sizeof(*table), GFP_KERNEL);
306 if (!table) {
307 err = -ENOMEM;
308 goto table_err;
309 }
310
311 table->nb.notifier_call = mlx5_sf_dev_state_change_handler;
312 table->dev = dev;
313 if (MLX5_CAP_GEN(dev, max_num_sf))
314 max_sfs = MLX5_CAP_GEN(dev, max_num_sf);
315 else
316 max_sfs = 1 << MLX5_CAP_GEN(dev, log_max_sf);
317 table->sf_bar_length = 1 << (MLX5_CAP_GEN(dev, log_min_sf_size) + 12);
318 table->base_address = pci_resource_start(dev->pdev, 2);
319 table->max_sfs = max_sfs;
320 xa_init(&table->devices);
321 mutex_init(&table->table_lock);
322 dev->priv.sf_dev_table = table;
323
324 err = mlx5_vhca_event_notifier_register(dev, &table->nb);
325 if (err)
326 goto vhca_err;
327
328 err = mlx5_sf_dev_queue_active_work(table);
329 if (err)
330 goto add_active_err;
331
332 err = mlx5_sf_dev_vhca_arm_all(table);
333 if (err)
334 goto arm_err;
335 mlx5_core_dbg(dev, "SF DEV: max sf devices=%d\n", max_sfs);
336 return;
337
338 arm_err:
339 mlx5_sf_dev_destroy_active_work(table);
340 add_active_err:
341 mlx5_vhca_event_notifier_unregister(dev, &table->nb);
342 vhca_err:
343 table->max_sfs = 0;
344 kfree(table);
345 dev->priv.sf_dev_table = NULL;
346 table_err:
347 mlx5_core_err(dev, "SF DEV table create err = %d\n", err);
348 }
349
mlx5_sf_dev_destroy_all(struct mlx5_sf_dev_table * table)350 static void mlx5_sf_dev_destroy_all(struct mlx5_sf_dev_table *table)
351 {
352 struct mlx5_sf_dev *sf_dev;
353 unsigned long index;
354
355 xa_for_each(&table->devices, index, sf_dev) {
356 xa_erase(&table->devices, index);
357 mlx5_sf_dev_remove(table->dev, sf_dev);
358 }
359 }
360
mlx5_sf_dev_table_destroy(struct mlx5_core_dev * dev)361 void mlx5_sf_dev_table_destroy(struct mlx5_core_dev *dev)
362 {
363 struct mlx5_sf_dev_table *table = dev->priv.sf_dev_table;
364
365 if (!table)
366 return;
367
368 mlx5_sf_dev_destroy_active_work(table);
369 mlx5_vhca_event_notifier_unregister(dev, &table->nb);
370 mutex_destroy(&table->table_lock);
371
372 /* Now that event handler is not running, it is safe to destroy
373 * the sf device without race.
374 */
375 mlx5_sf_dev_destroy_all(table);
376
377 WARN_ON(!xa_empty(&table->devices));
378 kfree(table);
379 dev->priv.sf_dev_table = NULL;
380 }
381