1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Virtio PCI driver - modern (virtio 1.0) device support
4 *
5 * This module allows virtio devices to be used over a virtual PCI device.
6 * This can be used with QEMU based VMMs like KVM or Xen.
7 *
8 * Copyright IBM Corp. 2007
9 * Copyright Red Hat, Inc. 2014
10 *
11 * Authors:
12 * Anthony Liguori <aliguori@us.ibm.com>
13 * Rusty Russell <rusty@rustcorp.com.au>
14 * Michael S. Tsirkin <mst@redhat.com>
15 */
16
17 #include <linux/delay.h>
18 #define VIRTIO_PCI_NO_LEGACY
19 #define VIRTIO_RING_NO_LEGACY
20 #include "virtio_pci_common.h"
21
vp_get_features(struct virtio_device * vdev)22 static u64 vp_get_features(struct virtio_device *vdev)
23 {
24 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
25
26 return vp_modern_get_features(&vp_dev->mdev);
27 }
28
vp_transport_features(struct virtio_device * vdev,u64 features)29 static void vp_transport_features(struct virtio_device *vdev, u64 features)
30 {
31 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
32 struct pci_dev *pci_dev = vp_dev->pci_dev;
33
34 if ((features & BIT_ULL(VIRTIO_F_SR_IOV)) &&
35 pci_find_ext_capability(pci_dev, PCI_EXT_CAP_ID_SRIOV))
36 __virtio_set_bit(vdev, VIRTIO_F_SR_IOV);
37
38 if (features & BIT_ULL(VIRTIO_F_RING_RESET))
39 __virtio_set_bit(vdev, VIRTIO_F_RING_RESET);
40 }
41
42 /* virtio config->finalize_features() implementation */
vp_finalize_features(struct virtio_device * vdev)43 static int vp_finalize_features(struct virtio_device *vdev)
44 {
45 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
46 u64 features = vdev->features;
47
48 /* Give virtio_ring a chance to accept features. */
49 vring_transport_features(vdev);
50
51 /* Give virtio_pci a chance to accept features. */
52 vp_transport_features(vdev, features);
53
54 if (!__virtio_test_bit(vdev, VIRTIO_F_VERSION_1)) {
55 dev_err(&vdev->dev, "virtio: device uses modern interface "
56 "but does not have VIRTIO_F_VERSION_1\n");
57 return -EINVAL;
58 }
59
60 vp_modern_set_features(&vp_dev->mdev, vdev->features);
61
62 return 0;
63 }
64
65 /* virtio config->get() implementation */
vp_get(struct virtio_device * vdev,unsigned int offset,void * buf,unsigned int len)66 static void vp_get(struct virtio_device *vdev, unsigned int offset,
67 void *buf, unsigned int len)
68 {
69 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
70 struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
71 void __iomem *device = mdev->device;
72 u8 b;
73 __le16 w;
74 __le32 l;
75
76 BUG_ON(offset + len > mdev->device_len);
77
78 switch (len) {
79 case 1:
80 b = ioread8(device + offset);
81 memcpy(buf, &b, sizeof b);
82 break;
83 case 2:
84 w = cpu_to_le16(ioread16(device + offset));
85 memcpy(buf, &w, sizeof w);
86 break;
87 case 4:
88 l = cpu_to_le32(ioread32(device + offset));
89 memcpy(buf, &l, sizeof l);
90 break;
91 case 8:
92 l = cpu_to_le32(ioread32(device + offset));
93 memcpy(buf, &l, sizeof l);
94 l = cpu_to_le32(ioread32(device + offset + sizeof l));
95 memcpy(buf + sizeof l, &l, sizeof l);
96 break;
97 default:
98 BUG();
99 }
100 }
101
102 /* the config->set() implementation. it's symmetric to the config->get()
103 * implementation */
vp_set(struct virtio_device * vdev,unsigned int offset,const void * buf,unsigned int len)104 static void vp_set(struct virtio_device *vdev, unsigned int offset,
105 const void *buf, unsigned int len)
106 {
107 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
108 struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
109 void __iomem *device = mdev->device;
110 u8 b;
111 __le16 w;
112 __le32 l;
113
114 BUG_ON(offset + len > mdev->device_len);
115
116 switch (len) {
117 case 1:
118 memcpy(&b, buf, sizeof b);
119 iowrite8(b, device + offset);
120 break;
121 case 2:
122 memcpy(&w, buf, sizeof w);
123 iowrite16(le16_to_cpu(w), device + offset);
124 break;
125 case 4:
126 memcpy(&l, buf, sizeof l);
127 iowrite32(le32_to_cpu(l), device + offset);
128 break;
129 case 8:
130 memcpy(&l, buf, sizeof l);
131 iowrite32(le32_to_cpu(l), device + offset);
132 memcpy(&l, buf + sizeof l, sizeof l);
133 iowrite32(le32_to_cpu(l), device + offset + sizeof l);
134 break;
135 default:
136 BUG();
137 }
138 }
139
vp_generation(struct virtio_device * vdev)140 static u32 vp_generation(struct virtio_device *vdev)
141 {
142 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
143
144 return vp_modern_generation(&vp_dev->mdev);
145 }
146
147 /* config->{get,set}_status() implementations */
vp_get_status(struct virtio_device * vdev)148 static u8 vp_get_status(struct virtio_device *vdev)
149 {
150 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
151
152 return vp_modern_get_status(&vp_dev->mdev);
153 }
154
vp_set_status(struct virtio_device * vdev,u8 status)155 static void vp_set_status(struct virtio_device *vdev, u8 status)
156 {
157 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
158
159 /* We should never be setting status to 0. */
160 BUG_ON(status == 0);
161 vp_modern_set_status(&vp_dev->mdev, status);
162 }
163
vp_reset(struct virtio_device * vdev)164 static void vp_reset(struct virtio_device *vdev)
165 {
166 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
167 struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
168
169 /* 0 status means a reset. */
170 vp_modern_set_status(mdev, 0);
171 /* After writing 0 to device_status, the driver MUST wait for a read of
172 * device_status to return 0 before reinitializing the device.
173 * This will flush out the status write, and flush in device writes,
174 * including MSI-X interrupts, if any.
175 */
176 while (vp_modern_get_status(mdev))
177 msleep(1);
178 /* Flush pending VQ/configuration callbacks. */
179 vp_synchronize_vectors(vdev);
180 }
181
vp_active_vq(struct virtqueue * vq,u16 msix_vec)182 static int vp_active_vq(struct virtqueue *vq, u16 msix_vec)
183 {
184 struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
185 struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
186 unsigned long index;
187
188 index = vq->index;
189
190 /* activate the queue */
191 vp_modern_set_queue_size(mdev, index, virtqueue_get_vring_size(vq));
192 vp_modern_queue_address(mdev, index, virtqueue_get_desc_addr(vq),
193 virtqueue_get_avail_addr(vq),
194 virtqueue_get_used_addr(vq));
195
196 if (msix_vec != VIRTIO_MSI_NO_VECTOR) {
197 msix_vec = vp_modern_queue_vector(mdev, index, msix_vec);
198 if (msix_vec == VIRTIO_MSI_NO_VECTOR)
199 return -EBUSY;
200 }
201
202 return 0;
203 }
204
vp_modern_disable_vq_and_reset(struct virtqueue * vq)205 static int vp_modern_disable_vq_and_reset(struct virtqueue *vq)
206 {
207 struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
208 struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
209 struct virtio_pci_vq_info *info;
210 unsigned long flags;
211
212 if (!virtio_has_feature(vq->vdev, VIRTIO_F_RING_RESET))
213 return -ENOENT;
214
215 vp_modern_set_queue_reset(mdev, vq->index);
216
217 info = vp_dev->vqs[vq->index];
218
219 /* delete vq from irq handler */
220 spin_lock_irqsave(&vp_dev->lock, flags);
221 list_del(&info->node);
222 spin_unlock_irqrestore(&vp_dev->lock, flags);
223
224 INIT_LIST_HEAD(&info->node);
225
226 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
227 __virtqueue_break(vq);
228 #endif
229
230 /* For the case where vq has an exclusive irq, call synchronize_irq() to
231 * wait for completion.
232 *
233 * note: We can't use disable_irq() since it conflicts with the affinity
234 * managed IRQ that is used by some drivers.
235 */
236 if (vp_dev->per_vq_vectors && info->msix_vector != VIRTIO_MSI_NO_VECTOR)
237 synchronize_irq(pci_irq_vector(vp_dev->pci_dev, info->msix_vector));
238
239 vq->reset = true;
240
241 return 0;
242 }
243
vp_modern_enable_vq_after_reset(struct virtqueue * vq)244 static int vp_modern_enable_vq_after_reset(struct virtqueue *vq)
245 {
246 struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
247 struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
248 struct virtio_pci_vq_info *info;
249 unsigned long flags, index;
250 int err;
251
252 if (!vq->reset)
253 return -EBUSY;
254
255 index = vq->index;
256 info = vp_dev->vqs[index];
257
258 if (vp_modern_get_queue_reset(mdev, index))
259 return -EBUSY;
260
261 if (vp_modern_get_queue_enable(mdev, index))
262 return -EBUSY;
263
264 err = vp_active_vq(vq, info->msix_vector);
265 if (err)
266 return err;
267
268 if (vq->callback) {
269 spin_lock_irqsave(&vp_dev->lock, flags);
270 list_add(&info->node, &vp_dev->virtqueues);
271 spin_unlock_irqrestore(&vp_dev->lock, flags);
272 } else {
273 INIT_LIST_HEAD(&info->node);
274 }
275
276 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
277 __virtqueue_unbreak(vq);
278 #endif
279
280 vp_modern_set_queue_enable(&vp_dev->mdev, index, true);
281 vq->reset = false;
282
283 return 0;
284 }
285
vp_config_vector(struct virtio_pci_device * vp_dev,u16 vector)286 static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector)
287 {
288 return vp_modern_config_vector(&vp_dev->mdev, vector);
289 }
290
setup_vq(struct virtio_pci_device * vp_dev,struct virtio_pci_vq_info * info,unsigned int index,void (* callback)(struct virtqueue * vq),const char * name,bool ctx,u16 msix_vec)291 static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
292 struct virtio_pci_vq_info *info,
293 unsigned int index,
294 void (*callback)(struct virtqueue *vq),
295 const char *name,
296 bool ctx,
297 u16 msix_vec)
298 {
299
300 struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
301 struct virtqueue *vq;
302 u16 num;
303 int err;
304
305 if (index >= vp_modern_get_num_queues(mdev))
306 return ERR_PTR(-EINVAL);
307
308 /* Check if queue is either not available or already active. */
309 num = vp_modern_get_queue_size(mdev, index);
310 if (!num || vp_modern_get_queue_enable(mdev, index))
311 return ERR_PTR(-ENOENT);
312
313 if (num & (num - 1)) {
314 dev_warn(&vp_dev->pci_dev->dev, "bad queue size %u", num);
315 return ERR_PTR(-EINVAL);
316 }
317
318 info->msix_vector = msix_vec;
319
320 /* create the vring */
321 vq = vring_create_virtqueue(index, num,
322 SMP_CACHE_BYTES, &vp_dev->vdev,
323 true, true, ctx,
324 vp_notify, callback, name);
325 if (!vq)
326 return ERR_PTR(-ENOMEM);
327
328 vq->num_max = num;
329
330 err = vp_active_vq(vq, msix_vec);
331 if (err)
332 goto err;
333
334 vq->priv = (void __force *)vp_modern_map_vq_notify(mdev, index, NULL);
335 if (!vq->priv) {
336 err = -ENOMEM;
337 goto err;
338 }
339
340 return vq;
341
342 err:
343 vring_del_virtqueue(vq);
344 return ERR_PTR(err);
345 }
346
vp_modern_find_vqs(struct virtio_device * vdev,unsigned int nvqs,struct virtqueue * vqs[],vq_callback_t * callbacks[],const char * const names[],const bool * ctx,struct irq_affinity * desc)347 static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
348 struct virtqueue *vqs[],
349 vq_callback_t *callbacks[],
350 const char * const names[], const bool *ctx,
351 struct irq_affinity *desc)
352 {
353 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
354 struct virtqueue *vq;
355 int rc = vp_find_vqs(vdev, nvqs, vqs, callbacks, names, ctx, desc);
356
357 if (rc)
358 return rc;
359
360 /* Select and activate all queues. Has to be done last: once we do
361 * this, there's no way to go back except reset.
362 */
363 list_for_each_entry(vq, &vdev->vqs, list)
364 vp_modern_set_queue_enable(&vp_dev->mdev, vq->index, true);
365
366 return 0;
367 }
368
del_vq(struct virtio_pci_vq_info * info)369 static void del_vq(struct virtio_pci_vq_info *info)
370 {
371 struct virtqueue *vq = info->vq;
372 struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
373 struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
374
375 if (vp_dev->msix_enabled)
376 vp_modern_queue_vector(mdev, vq->index,
377 VIRTIO_MSI_NO_VECTOR);
378
379 if (!mdev->notify_base)
380 pci_iounmap(mdev->pci_dev, (void __force __iomem *)vq->priv);
381
382 vring_del_virtqueue(vq);
383 }
384
virtio_pci_find_shm_cap(struct pci_dev * dev,u8 required_id,u8 * bar,u64 * offset,u64 * len)385 static int virtio_pci_find_shm_cap(struct pci_dev *dev, u8 required_id,
386 u8 *bar, u64 *offset, u64 *len)
387 {
388 int pos;
389
390 for (pos = pci_find_capability(dev, PCI_CAP_ID_VNDR); pos > 0;
391 pos = pci_find_next_capability(dev, pos, PCI_CAP_ID_VNDR)) {
392 u8 type, cap_len, id, res_bar;
393 u32 tmp32;
394 u64 res_offset, res_length;
395
396 pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap,
397 cfg_type), &type);
398 if (type != VIRTIO_PCI_CAP_SHARED_MEMORY_CFG)
399 continue;
400
401 pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap,
402 cap_len), &cap_len);
403 if (cap_len != sizeof(struct virtio_pci_cap64)) {
404 dev_err(&dev->dev, "%s: shm cap with bad size offset:"
405 " %d size: %d\n", __func__, pos, cap_len);
406 continue;
407 }
408
409 pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap,
410 id), &id);
411 if (id != required_id)
412 continue;
413
414 pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap,
415 bar), &res_bar);
416 if (res_bar >= PCI_STD_NUM_BARS)
417 continue;
418
419 /* Type and ID match, and the BAR value isn't reserved.
420 * Looks good.
421 */
422
423 /* Read the lower 32bit of length and offset */
424 pci_read_config_dword(dev, pos + offsetof(struct virtio_pci_cap,
425 offset), &tmp32);
426 res_offset = tmp32;
427 pci_read_config_dword(dev, pos + offsetof(struct virtio_pci_cap,
428 length), &tmp32);
429 res_length = tmp32;
430
431 /* and now the top half */
432 pci_read_config_dword(dev,
433 pos + offsetof(struct virtio_pci_cap64,
434 offset_hi), &tmp32);
435 res_offset |= ((u64)tmp32) << 32;
436 pci_read_config_dword(dev,
437 pos + offsetof(struct virtio_pci_cap64,
438 length_hi), &tmp32);
439 res_length |= ((u64)tmp32) << 32;
440
441 *bar = res_bar;
442 *offset = res_offset;
443 *len = res_length;
444
445 return pos;
446 }
447 return 0;
448 }
449
vp_get_shm_region(struct virtio_device * vdev,struct virtio_shm_region * region,u8 id)450 static bool vp_get_shm_region(struct virtio_device *vdev,
451 struct virtio_shm_region *region, u8 id)
452 {
453 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
454 struct pci_dev *pci_dev = vp_dev->pci_dev;
455 u8 bar;
456 u64 offset, len;
457 phys_addr_t phys_addr;
458 size_t bar_len;
459
460 if (!virtio_pci_find_shm_cap(pci_dev, id, &bar, &offset, &len))
461 return false;
462
463 phys_addr = pci_resource_start(pci_dev, bar);
464 bar_len = pci_resource_len(pci_dev, bar);
465
466 if ((offset + len) < offset) {
467 dev_err(&pci_dev->dev, "%s: cap offset+len overflow detected\n",
468 __func__);
469 return false;
470 }
471
472 if (offset + len > bar_len) {
473 dev_err(&pci_dev->dev, "%s: bar shorter than cap offset+len\n",
474 __func__);
475 return false;
476 }
477
478 region->len = len;
479 region->addr = (u64) phys_addr + offset;
480
481 return true;
482 }
483
484 static const struct virtio_config_ops virtio_pci_config_nodev_ops = {
485 .get = NULL,
486 .set = NULL,
487 .generation = vp_generation,
488 .get_status = vp_get_status,
489 .set_status = vp_set_status,
490 .reset = vp_reset,
491 .find_vqs = vp_modern_find_vqs,
492 .del_vqs = vp_del_vqs,
493 .synchronize_cbs = vp_synchronize_vectors,
494 .get_features = vp_get_features,
495 .finalize_features = vp_finalize_features,
496 .bus_name = vp_bus_name,
497 .set_vq_affinity = vp_set_vq_affinity,
498 .get_vq_affinity = vp_get_vq_affinity,
499 .get_shm_region = vp_get_shm_region,
500 .disable_vq_and_reset = vp_modern_disable_vq_and_reset,
501 .enable_vq_after_reset = vp_modern_enable_vq_after_reset,
502 };
503
504 static const struct virtio_config_ops virtio_pci_config_ops = {
505 .get = vp_get,
506 .set = vp_set,
507 .generation = vp_generation,
508 .get_status = vp_get_status,
509 .set_status = vp_set_status,
510 .reset = vp_reset,
511 .find_vqs = vp_modern_find_vqs,
512 .del_vqs = vp_del_vqs,
513 .synchronize_cbs = vp_synchronize_vectors,
514 .get_features = vp_get_features,
515 .finalize_features = vp_finalize_features,
516 .bus_name = vp_bus_name,
517 .set_vq_affinity = vp_set_vq_affinity,
518 .get_vq_affinity = vp_get_vq_affinity,
519 .get_shm_region = vp_get_shm_region,
520 .disable_vq_and_reset = vp_modern_disable_vq_and_reset,
521 .enable_vq_after_reset = vp_modern_enable_vq_after_reset,
522 };
523
524 /* the PCI probing function */
virtio_pci_modern_probe(struct virtio_pci_device * vp_dev)525 int virtio_pci_modern_probe(struct virtio_pci_device *vp_dev)
526 {
527 struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
528 struct pci_dev *pci_dev = vp_dev->pci_dev;
529 int err;
530
531 mdev->pci_dev = pci_dev;
532
533 err = vp_modern_probe(mdev);
534 if (err)
535 return err;
536
537 if (mdev->device)
538 vp_dev->vdev.config = &virtio_pci_config_ops;
539 else
540 vp_dev->vdev.config = &virtio_pci_config_nodev_ops;
541
542 vp_dev->config_vector = vp_config_vector;
543 vp_dev->setup_vq = setup_vq;
544 vp_dev->del_vq = del_vq;
545 vp_dev->isr = mdev->isr;
546 vp_dev->vdev.id = mdev->id;
547
548 return 0;
549 }
550
virtio_pci_modern_remove(struct virtio_pci_device * vp_dev)551 void virtio_pci_modern_remove(struct virtio_pci_device *vp_dev)
552 {
553 struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
554
555 vp_modern_remove(mdev);
556 }
557