1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
3
4 #include <linux/pci.h>
5 #include <linux/interrupt.h>
6 #include <linux/notifier.h>
7 #include <linux/mlx5/driver.h>
8 #include <linux/mlx5/vport.h>
9 #include "mlx5_core.h"
10 #include "mlx5_irq.h"
11 #include "pci_irq.h"
12 #include "lib/sf.h"
13 #include "lib/eq.h"
14 #ifdef CONFIG_RFS_ACCEL
15 #include <linux/cpu_rmap.h>
16 #endif
17
18 #define MLX5_SFS_PER_CTRL_IRQ 64
19 #define MLX5_IRQ_CTRL_SF_MAX 8
20 /* min num of vectors for SFs to be enabled */
21 #define MLX5_IRQ_VEC_COMP_BASE_SF 2
22
23 #define MLX5_EQ_SHARE_IRQ_MAX_COMP (8)
24 #define MLX5_EQ_SHARE_IRQ_MAX_CTRL (UINT_MAX)
25 #define MLX5_EQ_SHARE_IRQ_MIN_COMP (1)
26 #define MLX5_EQ_SHARE_IRQ_MIN_CTRL (4)
27
28 struct mlx5_irq {
29 struct atomic_notifier_head nh;
30 cpumask_var_t mask;
31 char name[MLX5_MAX_IRQ_FORMATTED_NAME];
32 struct mlx5_irq_pool *pool;
33 int refcount;
34 struct msi_map map;
35 u32 pool_index;
36 };
37
38 struct mlx5_irq_table {
39 struct mlx5_irq_pool *pcif_pool;
40 struct mlx5_irq_pool *sf_ctrl_pool;
41 struct mlx5_irq_pool *sf_comp_pool;
42 };
43
mlx5_core_func_to_vport(const struct mlx5_core_dev * dev,int func,bool ec_vf_func)44 static int mlx5_core_func_to_vport(const struct mlx5_core_dev *dev,
45 int func,
46 bool ec_vf_func)
47 {
48 if (!ec_vf_func)
49 return func;
50 return mlx5_core_ec_vf_vport_base(dev) + func - 1;
51 }
52
53 /**
54 * mlx5_get_default_msix_vec_count - Get the default number of MSI-X vectors
55 * to be ssigned to each VF.
56 * @dev: PF to work on
57 * @num_vfs: Number of enabled VFs
58 */
mlx5_get_default_msix_vec_count(struct mlx5_core_dev * dev,int num_vfs)59 int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs)
60 {
61 int num_vf_msix, min_msix, max_msix;
62
63 num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix);
64 if (!num_vf_msix)
65 return 0;
66
67 min_msix = MLX5_CAP_GEN(dev, min_dynamic_vf_msix_table_size);
68 max_msix = MLX5_CAP_GEN(dev, max_dynamic_vf_msix_table_size);
69
70 /* Limit maximum number of MSI-X vectors so the default configuration
71 * has some available in the pool. This will allow the user to increase
72 * the number of vectors in a VF without having to first size-down other
73 * VFs.
74 */
75 return max(min(num_vf_msix / num_vfs, max_msix / 2), min_msix);
76 }
77
78 /**
79 * mlx5_set_msix_vec_count - Set dynamically allocated MSI-X on the VF
80 * @dev: PF to work on
81 * @function_id: Internal PCI VF function IDd
82 * @msix_vec_count: Number of MSI-X vectors to set
83 */
mlx5_set_msix_vec_count(struct mlx5_core_dev * dev,int function_id,int msix_vec_count)84 int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id,
85 int msix_vec_count)
86 {
87 int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
88 int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
89 void *hca_cap = NULL, *query_cap = NULL, *cap;
90 int num_vf_msix, min_msix, max_msix;
91 bool ec_vf_function;
92 int vport;
93 int ret;
94
95 num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix);
96 if (!num_vf_msix)
97 return 0;
98
99 if (!MLX5_CAP_GEN(dev, vport_group_manager) || !mlx5_core_is_pf(dev))
100 return -EOPNOTSUPP;
101
102 min_msix = MLX5_CAP_GEN(dev, min_dynamic_vf_msix_table_size);
103 max_msix = MLX5_CAP_GEN(dev, max_dynamic_vf_msix_table_size);
104
105 if (msix_vec_count < min_msix)
106 return -EINVAL;
107
108 if (msix_vec_count > max_msix)
109 return -EOVERFLOW;
110
111 query_cap = kvzalloc(query_sz, GFP_KERNEL);
112 hca_cap = kvzalloc(set_sz, GFP_KERNEL);
113 if (!hca_cap || !query_cap) {
114 ret = -ENOMEM;
115 goto out;
116 }
117
118 ec_vf_function = mlx5_core_ec_sriov_enabled(dev);
119 vport = mlx5_core_func_to_vport(dev, function_id, ec_vf_function);
120 ret = mlx5_vport_get_other_func_general_cap(dev, vport, query_cap);
121 if (ret)
122 goto out;
123
124 cap = MLX5_ADDR_OF(set_hca_cap_in, hca_cap, capability);
125 memcpy(cap, MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability),
126 MLX5_UN_SZ_BYTES(hca_cap_union));
127 MLX5_SET(cmd_hca_cap, cap, dynamic_msix_table_size, msix_vec_count);
128
129 MLX5_SET(set_hca_cap_in, hca_cap, opcode, MLX5_CMD_OP_SET_HCA_CAP);
130 MLX5_SET(set_hca_cap_in, hca_cap, other_function, 1);
131 MLX5_SET(set_hca_cap_in, hca_cap, ec_vf_function, ec_vf_function);
132 MLX5_SET(set_hca_cap_in, hca_cap, function_id, function_id);
133
134 MLX5_SET(set_hca_cap_in, hca_cap, op_mod,
135 MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1);
136 ret = mlx5_cmd_exec_in(dev, set_hca_cap, hca_cap);
137 out:
138 kvfree(hca_cap);
139 kvfree(query_cap);
140 return ret;
141 }
142
143 /* mlx5_system_free_irq - Free an IRQ
144 * @irq: IRQ to free
145 *
146 * Free the IRQ and other resources such as rmap from the system.
147 * BUT doesn't free or remove reference from mlx5.
148 * This function is very important for the shutdown flow, where we need to
149 * cleanup system resoruces but keep mlx5 objects alive,
150 * see mlx5_irq_table_free_irqs().
151 */
mlx5_system_free_irq(struct mlx5_irq * irq)152 static void mlx5_system_free_irq(struct mlx5_irq *irq)
153 {
154 struct mlx5_irq_pool *pool = irq->pool;
155 #ifdef CONFIG_RFS_ACCEL
156 struct cpu_rmap *rmap;
157 #endif
158
159 /* free_irq requires that affinity_hint and rmap will be cleared before
160 * calling it. To satisfy this requirement, we call
161 * irq_cpu_rmap_remove() to remove the notifier
162 */
163 irq_update_affinity_hint(irq->map.virq, NULL);
164 #ifdef CONFIG_RFS_ACCEL
165 rmap = mlx5_eq_table_get_rmap(pool->dev);
166 if (rmap)
167 irq_cpu_rmap_remove(rmap, irq->map.virq);
168 #endif
169
170 free_irq(irq->map.virq, &irq->nh);
171 if (irq->map.index && pci_msix_can_alloc_dyn(pool->dev->pdev))
172 pci_msix_free_irq(pool->dev->pdev, irq->map);
173 }
174
irq_release(struct mlx5_irq * irq)175 static void irq_release(struct mlx5_irq *irq)
176 {
177 struct mlx5_irq_pool *pool = irq->pool;
178
179 xa_erase(&pool->irqs, irq->pool_index);
180 mlx5_system_free_irq(irq);
181 free_cpumask_var(irq->mask);
182 kfree(irq);
183 }
184
mlx5_irq_put(struct mlx5_irq * irq)185 int mlx5_irq_put(struct mlx5_irq *irq)
186 {
187 struct mlx5_irq_pool *pool = irq->pool;
188 int ret = 0;
189
190 mutex_lock(&pool->lock);
191 irq->refcount--;
192 if (!irq->refcount) {
193 irq_release(irq);
194 ret = 1;
195 }
196 mutex_unlock(&pool->lock);
197 return ret;
198 }
199
mlx5_irq_read_locked(struct mlx5_irq * irq)200 int mlx5_irq_read_locked(struct mlx5_irq *irq)
201 {
202 lockdep_assert_held(&irq->pool->lock);
203 return irq->refcount;
204 }
205
mlx5_irq_get_locked(struct mlx5_irq * irq)206 int mlx5_irq_get_locked(struct mlx5_irq *irq)
207 {
208 lockdep_assert_held(&irq->pool->lock);
209 if (WARN_ON_ONCE(!irq->refcount))
210 return 0;
211 irq->refcount++;
212 return 1;
213 }
214
irq_get(struct mlx5_irq * irq)215 static int irq_get(struct mlx5_irq *irq)
216 {
217 int err;
218
219 mutex_lock(&irq->pool->lock);
220 err = mlx5_irq_get_locked(irq);
221 mutex_unlock(&irq->pool->lock);
222 return err;
223 }
224
irq_int_handler(int irq,void * nh)225 static irqreturn_t irq_int_handler(int irq, void *nh)
226 {
227 atomic_notifier_call_chain(nh, 0, NULL);
228 return IRQ_HANDLED;
229 }
230
irq_sf_set_name(struct mlx5_irq_pool * pool,char * name,int vecidx)231 static void irq_sf_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx)
232 {
233 snprintf(name, MLX5_MAX_IRQ_NAME, "%s%d", pool->name, vecidx);
234 }
235
irq_set_name(struct mlx5_irq_pool * pool,char * name,int vecidx)236 static void irq_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx)
237 {
238 if (!pool->xa_num_irqs.max) {
239 /* in case we only have a single irq for the device */
240 snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_combined%d", vecidx);
241 return;
242 }
243
244 if (!vecidx) {
245 snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async%d", vecidx);
246 return;
247 }
248
249 snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", vecidx);
250 }
251
mlx5_irq_alloc(struct mlx5_irq_pool * pool,int i,struct irq_affinity_desc * af_desc,struct cpu_rmap ** rmap)252 struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i,
253 struct irq_affinity_desc *af_desc,
254 struct cpu_rmap **rmap)
255 {
256 struct mlx5_core_dev *dev = pool->dev;
257 char name[MLX5_MAX_IRQ_NAME];
258 struct mlx5_irq *irq;
259 int err;
260
261 irq = kzalloc(sizeof(*irq), GFP_KERNEL);
262 if (!irq || !zalloc_cpumask_var(&irq->mask, GFP_KERNEL)) {
263 kfree(irq);
264 return ERR_PTR(-ENOMEM);
265 }
266
267 if (!i || !pci_msix_can_alloc_dyn(dev->pdev)) {
268 /* The vector at index 0 is always statically allocated. If
269 * dynamic irq is not supported all vectors are statically
270 * allocated. In both cases just get the irq number and set
271 * the index.
272 */
273 irq->map.virq = pci_irq_vector(dev->pdev, i);
274 irq->map.index = i;
275 } else {
276 irq->map = pci_msix_alloc_irq_at(dev->pdev, MSI_ANY_INDEX, af_desc);
277 if (!irq->map.virq) {
278 err = irq->map.index;
279 goto err_alloc_irq;
280 }
281 }
282
283 if (i && rmap && *rmap) {
284 #ifdef CONFIG_RFS_ACCEL
285 err = irq_cpu_rmap_add(*rmap, irq->map.virq);
286 if (err)
287 goto err_irq_rmap;
288 #endif
289 }
290 if (!mlx5_irq_pool_is_sf_pool(pool))
291 irq_set_name(pool, name, i);
292 else
293 irq_sf_set_name(pool, name, i);
294 ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh);
295 snprintf(irq->name, MLX5_MAX_IRQ_FORMATTED_NAME,
296 MLX5_IRQ_NAME_FORMAT_STR, name, pci_name(dev->pdev));
297 err = request_irq(irq->map.virq, irq_int_handler, 0, irq->name,
298 &irq->nh);
299 if (err) {
300 mlx5_core_err(dev, "Failed to request irq. err = %d\n", err);
301 goto err_req_irq;
302 }
303
304 if (af_desc) {
305 cpumask_copy(irq->mask, &af_desc->mask);
306 irq_set_affinity_and_hint(irq->map.virq, irq->mask);
307 }
308 irq->pool = pool;
309 irq->refcount = 1;
310 irq->pool_index = i;
311 err = xa_err(xa_store(&pool->irqs, irq->pool_index, irq, GFP_KERNEL));
312 if (err) {
313 mlx5_core_err(dev, "Failed to alloc xa entry for irq(%u). err = %d\n",
314 irq->pool_index, err);
315 goto err_xa;
316 }
317 return irq;
318 err_xa:
319 if (af_desc)
320 irq_update_affinity_hint(irq->map.virq, NULL);
321 free_irq(irq->map.virq, &irq->nh);
322 err_req_irq:
323 #ifdef CONFIG_RFS_ACCEL
324 if (i && rmap && *rmap) {
325 free_irq_cpu_rmap(*rmap);
326 *rmap = NULL;
327 }
328 err_irq_rmap:
329 #endif
330 if (i && pci_msix_can_alloc_dyn(dev->pdev))
331 pci_msix_free_irq(dev->pdev, irq->map);
332 err_alloc_irq:
333 free_cpumask_var(irq->mask);
334 kfree(irq);
335 return ERR_PTR(err);
336 }
337
mlx5_irq_attach_nb(struct mlx5_irq * irq,struct notifier_block * nb)338 int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb)
339 {
340 int ret;
341
342 ret = irq_get(irq);
343 if (!ret)
344 /* Something very bad happens here, we are enabling EQ
345 * on non-existing IRQ.
346 */
347 return -ENOENT;
348 ret = atomic_notifier_chain_register(&irq->nh, nb);
349 if (ret)
350 mlx5_irq_put(irq);
351 return ret;
352 }
353
mlx5_irq_detach_nb(struct mlx5_irq * irq,struct notifier_block * nb)354 int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb)
355 {
356 int err = 0;
357
358 err = atomic_notifier_chain_unregister(&irq->nh, nb);
359 mlx5_irq_put(irq);
360 return err;
361 }
362
mlx5_irq_get_affinity_mask(struct mlx5_irq * irq)363 struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq)
364 {
365 return irq->mask;
366 }
367
mlx5_irq_get_index(struct mlx5_irq * irq)368 int mlx5_irq_get_index(struct mlx5_irq *irq)
369 {
370 return irq->map.index;
371 }
372
373 /* irq_pool API */
374
375 /* requesting an irq from a given pool according to given index */
376 static struct mlx5_irq *
irq_pool_request_vector(struct mlx5_irq_pool * pool,int vecidx,struct irq_affinity_desc * af_desc,struct cpu_rmap ** rmap)377 irq_pool_request_vector(struct mlx5_irq_pool *pool, int vecidx,
378 struct irq_affinity_desc *af_desc,
379 struct cpu_rmap **rmap)
380 {
381 struct mlx5_irq *irq;
382
383 mutex_lock(&pool->lock);
384 irq = xa_load(&pool->irqs, vecidx);
385 if (irq) {
386 mlx5_irq_get_locked(irq);
387 goto unlock;
388 }
389 irq = mlx5_irq_alloc(pool, vecidx, af_desc, rmap);
390 unlock:
391 mutex_unlock(&pool->lock);
392 return irq;
393 }
394
sf_ctrl_irq_pool_get(struct mlx5_irq_table * irq_table)395 static struct mlx5_irq_pool *sf_ctrl_irq_pool_get(struct mlx5_irq_table *irq_table)
396 {
397 return irq_table->sf_ctrl_pool;
398 }
399
sf_irq_pool_get(struct mlx5_irq_table * irq_table)400 static struct mlx5_irq_pool *sf_irq_pool_get(struct mlx5_irq_table *irq_table)
401 {
402 return irq_table->sf_comp_pool;
403 }
404
mlx5_irq_pool_get(struct mlx5_core_dev * dev)405 struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev)
406 {
407 struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
408 struct mlx5_irq_pool *pool = NULL;
409
410 if (mlx5_core_is_sf(dev))
411 pool = sf_irq_pool_get(irq_table);
412
413 /* In some configs, there won't be a pool of SFs IRQs. Hence, returning
414 * the PF IRQs pool in case the SF pool doesn't exist.
415 */
416 return pool ? pool : irq_table->pcif_pool;
417 }
418
ctrl_irq_pool_get(struct mlx5_core_dev * dev)419 static struct mlx5_irq_pool *ctrl_irq_pool_get(struct mlx5_core_dev *dev)
420 {
421 struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
422 struct mlx5_irq_pool *pool = NULL;
423
424 if (mlx5_core_is_sf(dev))
425 pool = sf_ctrl_irq_pool_get(irq_table);
426
427 /* In some configs, there won't be a pool of SFs IRQs. Hence, returning
428 * the PF IRQs pool in case the SF pool doesn't exist.
429 */
430 return pool ? pool : irq_table->pcif_pool;
431 }
432
_mlx5_irq_release(struct mlx5_irq * irq)433 static void _mlx5_irq_release(struct mlx5_irq *irq)
434 {
435 synchronize_irq(irq->map.virq);
436 mlx5_irq_put(irq);
437 }
438
439 /**
440 * mlx5_ctrl_irq_release - release a ctrl IRQ back to the system.
441 * @ctrl_irq: ctrl IRQ to be released.
442 */
mlx5_ctrl_irq_release(struct mlx5_irq * ctrl_irq)443 void mlx5_ctrl_irq_release(struct mlx5_irq *ctrl_irq)
444 {
445 _mlx5_irq_release(ctrl_irq);
446 }
447
448 /**
449 * mlx5_ctrl_irq_request - request a ctrl IRQ for mlx5 device.
450 * @dev: mlx5 device that requesting the IRQ.
451 *
452 * This function returns a pointer to IRQ, or ERR_PTR in case of error.
453 */
mlx5_ctrl_irq_request(struct mlx5_core_dev * dev)454 struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev)
455 {
456 struct mlx5_irq_pool *pool = ctrl_irq_pool_get(dev);
457 struct irq_affinity_desc af_desc;
458 struct mlx5_irq *irq;
459
460 cpumask_copy(&af_desc.mask, cpu_online_mask);
461 af_desc.is_managed = false;
462 if (!mlx5_irq_pool_is_sf_pool(pool)) {
463 /* In case we are allocating a control IRQ from a pci device's pool.
464 * This can happen also for a SF if the SFs pool is empty.
465 */
466 if (!pool->xa_num_irqs.max) {
467 cpumask_clear(&af_desc.mask);
468 /* In case we only have a single IRQ for PF/VF */
469 cpumask_set_cpu(cpumask_first(cpu_online_mask), &af_desc.mask);
470 }
471 /* Allocate the IRQ in index 0. The vector was already allocated */
472 irq = irq_pool_request_vector(pool, 0, &af_desc, NULL);
473 } else {
474 irq = mlx5_irq_affinity_request(pool, &af_desc);
475 }
476
477 return irq;
478 }
479
480 /**
481 * mlx5_irq_request - request an IRQ for mlx5 PF/VF device.
482 * @dev: mlx5 device that requesting the IRQ.
483 * @vecidx: vector index of the IRQ. This argument is ignore if affinity is
484 * provided.
485 * @af_desc: affinity descriptor for this IRQ.
486 * @rmap: pointer to reverse map pointer for completion interrupts
487 *
488 * This function returns a pointer to IRQ, or ERR_PTR in case of error.
489 */
mlx5_irq_request(struct mlx5_core_dev * dev,u16 vecidx,struct irq_affinity_desc * af_desc,struct cpu_rmap ** rmap)490 struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx,
491 struct irq_affinity_desc *af_desc,
492 struct cpu_rmap **rmap)
493 {
494 struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
495 struct mlx5_irq_pool *pool;
496 struct mlx5_irq *irq;
497
498 pool = irq_table->pcif_pool;
499 irq = irq_pool_request_vector(pool, vecidx, af_desc, rmap);
500 if (IS_ERR(irq))
501 return irq;
502 mlx5_core_dbg(dev, "irq %u mapped to cpu %*pbl, %u EQs on this irq\n",
503 irq->map.virq, cpumask_pr_args(&af_desc->mask),
504 irq->refcount / MLX5_EQ_REFS_PER_IRQ);
505 return irq;
506 }
507
508 /**
509 * mlx5_msix_alloc - allocate msix interrupt
510 * @dev: mlx5 device from which to request
511 * @handler: interrupt handler
512 * @affdesc: affinity descriptor
513 * @name: interrupt name
514 *
515 * Returns: struct msi_map with result encoded.
516 * Note: the caller must make sure to release the irq by calling
517 * mlx5_msix_free() if shutdown was initiated.
518 */
mlx5_msix_alloc(struct mlx5_core_dev * dev,irqreturn_t (* handler)(int,void *),const struct irq_affinity_desc * affdesc,const char * name)519 struct msi_map mlx5_msix_alloc(struct mlx5_core_dev *dev,
520 irqreturn_t (*handler)(int, void *),
521 const struct irq_affinity_desc *affdesc,
522 const char *name)
523 {
524 struct msi_map map;
525 int err;
526
527 if (!dev->pdev) {
528 map.virq = 0;
529 map.index = -EINVAL;
530 return map;
531 }
532
533 map = pci_msix_alloc_irq_at(dev->pdev, MSI_ANY_INDEX, affdesc);
534 if (!map.virq)
535 return map;
536
537 err = request_irq(map.virq, handler, 0, name, NULL);
538 if (err) {
539 mlx5_core_warn(dev, "err %d\n", err);
540 pci_msix_free_irq(dev->pdev, map);
541 map.virq = 0;
542 map.index = -ENOMEM;
543 }
544 return map;
545 }
546 EXPORT_SYMBOL(mlx5_msix_alloc);
547
548 /**
549 * mlx5_msix_free - free a previously allocated msix interrupt
550 * @dev: mlx5 device associated with interrupt
551 * @map: map previously returned by mlx5_msix_alloc()
552 */
mlx5_msix_free(struct mlx5_core_dev * dev,struct msi_map map)553 void mlx5_msix_free(struct mlx5_core_dev *dev, struct msi_map map)
554 {
555 free_irq(map.virq, NULL);
556 pci_msix_free_irq(dev->pdev, map);
557 }
558 EXPORT_SYMBOL(mlx5_msix_free);
559
560 /**
561 * mlx5_irq_release_vector - release one IRQ back to the system.
562 * @irq: the irq to release.
563 */
mlx5_irq_release_vector(struct mlx5_irq * irq)564 void mlx5_irq_release_vector(struct mlx5_irq *irq)
565 {
566 _mlx5_irq_release(irq);
567 }
568
569 /**
570 * mlx5_irq_request_vector - request one IRQ for mlx5 device.
571 * @dev: mlx5 device that is requesting the IRQ.
572 * @cpu: CPU to bind the IRQ to.
573 * @vecidx: vector index to request an IRQ for.
574 * @rmap: pointer to reverse map pointer for completion interrupts
575 *
576 * Each IRQ is bound to at most 1 CPU.
577 * This function is requests one IRQ, for the given @vecidx.
578 *
579 * This function returns a pointer to the irq on success, or an error pointer
580 * in case of an error.
581 */
mlx5_irq_request_vector(struct mlx5_core_dev * dev,u16 cpu,u16 vecidx,struct cpu_rmap ** rmap)582 struct mlx5_irq *mlx5_irq_request_vector(struct mlx5_core_dev *dev, u16 cpu,
583 u16 vecidx, struct cpu_rmap **rmap)
584 {
585 struct mlx5_irq_table *table = mlx5_irq_table_get(dev);
586 struct mlx5_irq_pool *pool = table->pcif_pool;
587 struct irq_affinity_desc af_desc;
588 int offset = 1;
589
590 if (!pool->xa_num_irqs.max)
591 offset = 0;
592
593 af_desc.is_managed = false;
594 cpumask_clear(&af_desc.mask);
595 cpumask_set_cpu(cpu, &af_desc.mask);
596 return mlx5_irq_request(dev, vecidx + offset, &af_desc, rmap);
597 }
598
599 static struct mlx5_irq_pool *
irq_pool_alloc(struct mlx5_core_dev * dev,int start,int size,char * name,u32 min_threshold,u32 max_threshold)600 irq_pool_alloc(struct mlx5_core_dev *dev, int start, int size, char *name,
601 u32 min_threshold, u32 max_threshold)
602 {
603 struct mlx5_irq_pool *pool = kvzalloc(sizeof(*pool), GFP_KERNEL);
604
605 if (!pool)
606 return ERR_PTR(-ENOMEM);
607 pool->dev = dev;
608 mutex_init(&pool->lock);
609 xa_init_flags(&pool->irqs, XA_FLAGS_ALLOC);
610 pool->xa_num_irqs.min = start;
611 pool->xa_num_irqs.max = start + size - 1;
612 if (name)
613 snprintf(pool->name, MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS,
614 "%s", name);
615 pool->min_threshold = min_threshold * MLX5_EQ_REFS_PER_IRQ;
616 pool->max_threshold = max_threshold * MLX5_EQ_REFS_PER_IRQ;
617 mlx5_core_dbg(dev, "pool->name = %s, pool->size = %d, pool->start = %d",
618 name, size, start);
619 return pool;
620 }
621
irq_pool_free(struct mlx5_irq_pool * pool)622 static void irq_pool_free(struct mlx5_irq_pool *pool)
623 {
624 struct mlx5_irq *irq;
625 unsigned long index;
626
627 /* There are cases in which we are destrying the irq_table before
628 * freeing all the IRQs, fast teardown for example. Hence, free the irqs
629 * which might not have been freed.
630 */
631 xa_for_each(&pool->irqs, index, irq)
632 irq_release(irq);
633 xa_destroy(&pool->irqs);
634 mutex_destroy(&pool->lock);
635 kfree(pool->irqs_per_cpu);
636 kvfree(pool);
637 }
638
irq_pools_init(struct mlx5_core_dev * dev,int sf_vec,int pcif_vec)639 static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pcif_vec)
640 {
641 struct mlx5_irq_table *table = dev->priv.irq_table;
642 int num_sf_ctrl_by_msix;
643 int num_sf_ctrl_by_sfs;
644 int num_sf_ctrl;
645 int err;
646
647 /* init pcif_pool */
648 table->pcif_pool = irq_pool_alloc(dev, 0, pcif_vec, NULL,
649 MLX5_EQ_SHARE_IRQ_MIN_COMP,
650 MLX5_EQ_SHARE_IRQ_MAX_COMP);
651 if (IS_ERR(table->pcif_pool))
652 return PTR_ERR(table->pcif_pool);
653 if (!mlx5_sf_max_functions(dev))
654 return 0;
655 if (sf_vec < MLX5_IRQ_VEC_COMP_BASE_SF) {
656 mlx5_core_dbg(dev, "Not enught IRQs for SFs. SF may run at lower performance\n");
657 return 0;
658 }
659
660 /* init sf_ctrl_pool */
661 num_sf_ctrl_by_msix = DIV_ROUND_UP(sf_vec, MLX5_COMP_EQS_PER_SF);
662 num_sf_ctrl_by_sfs = DIV_ROUND_UP(mlx5_sf_max_functions(dev),
663 MLX5_SFS_PER_CTRL_IRQ);
664 num_sf_ctrl = min_t(int, num_sf_ctrl_by_msix, num_sf_ctrl_by_sfs);
665 num_sf_ctrl = min_t(int, MLX5_IRQ_CTRL_SF_MAX, num_sf_ctrl);
666 table->sf_ctrl_pool = irq_pool_alloc(dev, pcif_vec, num_sf_ctrl,
667 "mlx5_sf_ctrl",
668 MLX5_EQ_SHARE_IRQ_MIN_CTRL,
669 MLX5_EQ_SHARE_IRQ_MAX_CTRL);
670 if (IS_ERR(table->sf_ctrl_pool)) {
671 err = PTR_ERR(table->sf_ctrl_pool);
672 goto err_pf;
673 }
674 /* init sf_comp_pool */
675 table->sf_comp_pool = irq_pool_alloc(dev, pcif_vec + num_sf_ctrl,
676 sf_vec - num_sf_ctrl, "mlx5_sf_comp",
677 MLX5_EQ_SHARE_IRQ_MIN_COMP,
678 MLX5_EQ_SHARE_IRQ_MAX_COMP);
679 if (IS_ERR(table->sf_comp_pool)) {
680 err = PTR_ERR(table->sf_comp_pool);
681 goto err_sf_ctrl;
682 }
683
684 table->sf_comp_pool->irqs_per_cpu = kcalloc(nr_cpu_ids, sizeof(u16), GFP_KERNEL);
685 if (!table->sf_comp_pool->irqs_per_cpu) {
686 err = -ENOMEM;
687 goto err_irqs_per_cpu;
688 }
689
690 return 0;
691
692 err_irqs_per_cpu:
693 irq_pool_free(table->sf_comp_pool);
694 err_sf_ctrl:
695 irq_pool_free(table->sf_ctrl_pool);
696 err_pf:
697 irq_pool_free(table->pcif_pool);
698 return err;
699 }
700
irq_pools_destroy(struct mlx5_irq_table * table)701 static void irq_pools_destroy(struct mlx5_irq_table *table)
702 {
703 if (table->sf_ctrl_pool) {
704 irq_pool_free(table->sf_comp_pool);
705 irq_pool_free(table->sf_ctrl_pool);
706 }
707 irq_pool_free(table->pcif_pool);
708 }
709
mlx5_irq_pool_free_irqs(struct mlx5_irq_pool * pool)710 static void mlx5_irq_pool_free_irqs(struct mlx5_irq_pool *pool)
711 {
712 struct mlx5_irq *irq;
713 unsigned long index;
714
715 xa_for_each(&pool->irqs, index, irq)
716 mlx5_system_free_irq(irq);
717
718 }
719
mlx5_irq_pools_free_irqs(struct mlx5_irq_table * table)720 static void mlx5_irq_pools_free_irqs(struct mlx5_irq_table *table)
721 {
722 if (table->sf_ctrl_pool) {
723 mlx5_irq_pool_free_irqs(table->sf_comp_pool);
724 mlx5_irq_pool_free_irqs(table->sf_ctrl_pool);
725 }
726 mlx5_irq_pool_free_irqs(table->pcif_pool);
727 }
728
729 /* irq_table API */
730
mlx5_irq_table_init(struct mlx5_core_dev * dev)731 int mlx5_irq_table_init(struct mlx5_core_dev *dev)
732 {
733 struct mlx5_irq_table *irq_table;
734
735 if (mlx5_core_is_sf(dev))
736 return 0;
737
738 irq_table = kvzalloc_node(sizeof(*irq_table), GFP_KERNEL,
739 dev->priv.numa_node);
740 if (!irq_table)
741 return -ENOMEM;
742
743 dev->priv.irq_table = irq_table;
744 return 0;
745 }
746
mlx5_irq_table_cleanup(struct mlx5_core_dev * dev)747 void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev)
748 {
749 if (mlx5_core_is_sf(dev))
750 return;
751
752 kvfree(dev->priv.irq_table);
753 }
754
mlx5_irq_table_get_num_comp(struct mlx5_irq_table * table)755 int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table)
756 {
757 if (!table->pcif_pool->xa_num_irqs.max)
758 return 1;
759 return table->pcif_pool->xa_num_irqs.max - table->pcif_pool->xa_num_irqs.min;
760 }
761
mlx5_irq_table_create(struct mlx5_core_dev * dev)762 int mlx5_irq_table_create(struct mlx5_core_dev *dev)
763 {
764 int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
765 MLX5_CAP_GEN(dev, max_num_eqs) :
766 1 << MLX5_CAP_GEN(dev, log_max_eq);
767 int total_vec;
768 int pcif_vec;
769 int req_vec;
770 int err;
771 int n;
772
773 if (mlx5_core_is_sf(dev))
774 return 0;
775
776 pcif_vec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + 1;
777 pcif_vec = min_t(int, pcif_vec, num_eqs);
778
779 total_vec = pcif_vec;
780 if (mlx5_sf_max_functions(dev))
781 total_vec += MLX5_IRQ_CTRL_SF_MAX +
782 MLX5_COMP_EQS_PER_SF * mlx5_sf_max_functions(dev);
783 total_vec = min_t(int, total_vec, pci_msix_vec_count(dev->pdev));
784 pcif_vec = min_t(int, pcif_vec, pci_msix_vec_count(dev->pdev));
785
786 req_vec = pci_msix_can_alloc_dyn(dev->pdev) ? 1 : total_vec;
787 n = pci_alloc_irq_vectors(dev->pdev, 1, req_vec, PCI_IRQ_MSIX);
788 if (n < 0)
789 return n;
790
791 err = irq_pools_init(dev, total_vec - pcif_vec, pcif_vec);
792 if (err)
793 pci_free_irq_vectors(dev->pdev);
794
795 return err;
796 }
797
mlx5_irq_table_destroy(struct mlx5_core_dev * dev)798 void mlx5_irq_table_destroy(struct mlx5_core_dev *dev)
799 {
800 struct mlx5_irq_table *table = dev->priv.irq_table;
801
802 if (mlx5_core_is_sf(dev))
803 return;
804
805 /* There are cases where IRQs still will be in used when we reaching
806 * to here. Hence, making sure all the irqs are released.
807 */
808 irq_pools_destroy(table);
809 pci_free_irq_vectors(dev->pdev);
810 }
811
mlx5_irq_table_free_irqs(struct mlx5_core_dev * dev)812 void mlx5_irq_table_free_irqs(struct mlx5_core_dev *dev)
813 {
814 struct mlx5_irq_table *table = dev->priv.irq_table;
815
816 if (mlx5_core_is_sf(dev))
817 return;
818
819 mlx5_irq_pools_free_irqs(table);
820 pci_free_irq_vectors(dev->pdev);
821 }
822
mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table * table)823 int mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table *table)
824 {
825 if (table->sf_comp_pool)
826 return min_t(int, num_online_cpus(),
827 table->sf_comp_pool->xa_num_irqs.max -
828 table->sf_comp_pool->xa_num_irqs.min + 1);
829 else
830 return mlx5_irq_table_get_num_comp(table);
831 }
832
mlx5_irq_table_get(struct mlx5_core_dev * dev)833 struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev)
834 {
835 #ifdef CONFIG_MLX5_SF
836 if (mlx5_core_is_sf(dev))
837 return dev->priv.parent_mdev->priv.irq_table;
838 #endif
839 return dev->priv.irq_table;
840 }
841