1 /*
2  * Copyright (C) 2007-2010 Advanced Micro Devices, Inc.
3  * Author: Joerg Roedel <joerg.roedel@amd.com>
4  *         Leo Duran <leo.duran@amd.com>
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License version 2 as published
8  * by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
18  */
19 
20 #include <linux/ratelimit.h>
21 #include <linux/pci.h>
22 #include <linux/pci-ats.h>
23 #include <linux/bitmap.h>
24 #include <linux/slab.h>
25 #include <linux/debugfs.h>
26 #include <linux/scatterlist.h>
27 #include <linux/dma-mapping.h>
28 #include <linux/iommu-helper.h>
29 #include <linux/iommu.h>
30 #include <linux/delay.h>
31 #include <linux/amd-iommu.h>
32 #include <linux/notifier.h>
33 #include <linux/export.h>
34 #include <asm/msidef.h>
35 #include <asm/proto.h>
36 #include <asm/iommu.h>
37 #include <asm/gart.h>
38 #include <asm/dma.h>
39 
40 #include "amd_iommu_proto.h"
41 #include "amd_iommu_types.h"
42 
43 #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))
44 
45 #define LOOP_TIMEOUT	100000
46 
47 /*
48  * This bitmap is used to advertise the page sizes our hardware support
49  * to the IOMMU core, which will then use this information to split
50  * physically contiguous memory regions it is mapping into page sizes
51  * that we support.
52  *
53  * Traditionally the IOMMU core just handed us the mappings directly,
54  * after making sure the size is an order of a 4KiB page and that the
55  * mapping has natural alignment.
56  *
57  * To retain this behavior, we currently advertise that we support
58  * all page sizes that are an order of 4KiB.
59  *
60  * If at some point we'd like to utilize the IOMMU core's new behavior,
61  * we could change this to advertise the real page sizes we support.
62  */
63 #define AMD_IOMMU_PGSIZES	(~0xFFFUL)
64 
65 static DEFINE_RWLOCK(amd_iommu_devtable_lock);
66 
67 /* A list of preallocated protection domains */
68 static LIST_HEAD(iommu_pd_list);
69 static DEFINE_SPINLOCK(iommu_pd_list_lock);
70 
71 /* List of all available dev_data structures */
72 static LIST_HEAD(dev_data_list);
73 static DEFINE_SPINLOCK(dev_data_list_lock);
74 
75 /*
76  * Domain for untranslated devices - only allocated
77  * if iommu=pt passed on kernel cmd line.
78  */
79 static struct protection_domain *pt_domain;
80 
81 static struct iommu_ops amd_iommu_ops;
82 
83 static ATOMIC_NOTIFIER_HEAD(ppr_notifier);
84 int amd_iommu_max_glx_val = -1;
85 
86 static struct dma_map_ops amd_iommu_dma_ops;
87 
88 /*
89  * general struct to manage commands send to an IOMMU
90  */
91 struct iommu_cmd {
92 	u32 data[4];
93 };
94 
95 static void update_domain(struct protection_domain *domain);
96 static int __init alloc_passthrough_domain(void);
97 
98 /****************************************************************************
99  *
100  * Helper functions
101  *
102  ****************************************************************************/
103 
alloc_dev_data(u16 devid)104 static struct iommu_dev_data *alloc_dev_data(u16 devid)
105 {
106 	struct iommu_dev_data *dev_data;
107 	unsigned long flags;
108 
109 	dev_data = kzalloc(sizeof(*dev_data), GFP_KERNEL);
110 	if (!dev_data)
111 		return NULL;
112 
113 	dev_data->devid = devid;
114 	atomic_set(&dev_data->bind, 0);
115 
116 	spin_lock_irqsave(&dev_data_list_lock, flags);
117 	list_add_tail(&dev_data->dev_data_list, &dev_data_list);
118 	spin_unlock_irqrestore(&dev_data_list_lock, flags);
119 
120 	return dev_data;
121 }
122 
free_dev_data(struct iommu_dev_data * dev_data)123 static void free_dev_data(struct iommu_dev_data *dev_data)
124 {
125 	unsigned long flags;
126 
127 	spin_lock_irqsave(&dev_data_list_lock, flags);
128 	list_del(&dev_data->dev_data_list);
129 	spin_unlock_irqrestore(&dev_data_list_lock, flags);
130 
131 	kfree(dev_data);
132 }
133 
search_dev_data(u16 devid)134 static struct iommu_dev_data *search_dev_data(u16 devid)
135 {
136 	struct iommu_dev_data *dev_data;
137 	unsigned long flags;
138 
139 	spin_lock_irqsave(&dev_data_list_lock, flags);
140 	list_for_each_entry(dev_data, &dev_data_list, dev_data_list) {
141 		if (dev_data->devid == devid)
142 			goto out_unlock;
143 	}
144 
145 	dev_data = NULL;
146 
147 out_unlock:
148 	spin_unlock_irqrestore(&dev_data_list_lock, flags);
149 
150 	return dev_data;
151 }
152 
find_dev_data(u16 devid)153 static struct iommu_dev_data *find_dev_data(u16 devid)
154 {
155 	struct iommu_dev_data *dev_data;
156 
157 	dev_data = search_dev_data(devid);
158 
159 	if (dev_data == NULL)
160 		dev_data = alloc_dev_data(devid);
161 
162 	return dev_data;
163 }
164 
get_device_id(struct device * dev)165 static inline u16 get_device_id(struct device *dev)
166 {
167 	struct pci_dev *pdev = to_pci_dev(dev);
168 
169 	return calc_devid(pdev->bus->number, pdev->devfn);
170 }
171 
get_dev_data(struct device * dev)172 static struct iommu_dev_data *get_dev_data(struct device *dev)
173 {
174 	return dev->archdata.iommu;
175 }
176 
pci_iommuv2_capable(struct pci_dev * pdev)177 static bool pci_iommuv2_capable(struct pci_dev *pdev)
178 {
179 	static const int caps[] = {
180 		PCI_EXT_CAP_ID_ATS,
181 		PCI_EXT_CAP_ID_PRI,
182 		PCI_EXT_CAP_ID_PASID,
183 	};
184 	int i, pos;
185 
186 	for (i = 0; i < 3; ++i) {
187 		pos = pci_find_ext_capability(pdev, caps[i]);
188 		if (pos == 0)
189 			return false;
190 	}
191 
192 	return true;
193 }
194 
pdev_pri_erratum(struct pci_dev * pdev,u32 erratum)195 static bool pdev_pri_erratum(struct pci_dev *pdev, u32 erratum)
196 {
197 	struct iommu_dev_data *dev_data;
198 
199 	dev_data = get_dev_data(&pdev->dev);
200 
201 	return dev_data->errata & (1 << erratum) ? true : false;
202 }
203 
204 /*
205  * In this function the list of preallocated protection domains is traversed to
206  * find the domain for a specific device
207  */
find_protection_domain(u16 devid)208 static struct dma_ops_domain *find_protection_domain(u16 devid)
209 {
210 	struct dma_ops_domain *entry, *ret = NULL;
211 	unsigned long flags;
212 	u16 alias = amd_iommu_alias_table[devid];
213 
214 	if (list_empty(&iommu_pd_list))
215 		return NULL;
216 
217 	spin_lock_irqsave(&iommu_pd_list_lock, flags);
218 
219 	list_for_each_entry(entry, &iommu_pd_list, list) {
220 		if (entry->target_dev == devid ||
221 		    entry->target_dev == alias) {
222 			ret = entry;
223 			break;
224 		}
225 	}
226 
227 	spin_unlock_irqrestore(&iommu_pd_list_lock, flags);
228 
229 	return ret;
230 }
231 
232 /*
233  * This function checks if the driver got a valid device from the caller to
234  * avoid dereferencing invalid pointers.
235  */
check_device(struct device * dev)236 static bool check_device(struct device *dev)
237 {
238 	u16 devid;
239 
240 	if (!dev || !dev->dma_mask)
241 		return false;
242 
243 	/* No device or no PCI device */
244 	if (dev->bus != &pci_bus_type)
245 		return false;
246 
247 	devid = get_device_id(dev);
248 
249 	/* Out of our scope? */
250 	if (devid > amd_iommu_last_bdf)
251 		return false;
252 
253 	if (amd_iommu_rlookup_table[devid] == NULL)
254 		return false;
255 
256 	return true;
257 }
258 
iommu_init_device(struct device * dev)259 static int iommu_init_device(struct device *dev)
260 {
261 	struct pci_dev *pdev = to_pci_dev(dev);
262 	struct iommu_dev_data *dev_data;
263 	u16 alias;
264 
265 	if (dev->archdata.iommu)
266 		return 0;
267 
268 	dev_data = find_dev_data(get_device_id(dev));
269 	if (!dev_data)
270 		return -ENOMEM;
271 
272 	alias = amd_iommu_alias_table[dev_data->devid];
273 	if (alias != dev_data->devid) {
274 		struct iommu_dev_data *alias_data;
275 
276 		alias_data = find_dev_data(alias);
277 		if (alias_data == NULL) {
278 			pr_err("AMD-Vi: Warning: Unhandled device %s\n",
279 					dev_name(dev));
280 			free_dev_data(dev_data);
281 			return -ENOTSUPP;
282 		}
283 		dev_data->alias_data = alias_data;
284 	}
285 
286 	if (pci_iommuv2_capable(pdev)) {
287 		struct amd_iommu *iommu;
288 
289 		iommu              = amd_iommu_rlookup_table[dev_data->devid];
290 		dev_data->iommu_v2 = iommu->is_iommu_v2;
291 	}
292 
293 	dev->archdata.iommu = dev_data;
294 
295 	return 0;
296 }
297 
iommu_ignore_device(struct device * dev)298 static void iommu_ignore_device(struct device *dev)
299 {
300 	u16 devid, alias;
301 
302 	devid = get_device_id(dev);
303 	alias = amd_iommu_alias_table[devid];
304 
305 	memset(&amd_iommu_dev_table[devid], 0, sizeof(struct dev_table_entry));
306 	memset(&amd_iommu_dev_table[alias], 0, sizeof(struct dev_table_entry));
307 
308 	amd_iommu_rlookup_table[devid] = NULL;
309 	amd_iommu_rlookup_table[alias] = NULL;
310 }
311 
iommu_uninit_device(struct device * dev)312 static void iommu_uninit_device(struct device *dev)
313 {
314 	/*
315 	 * Nothing to do here - we keep dev_data around for unplugged devices
316 	 * and reuse it when the device is re-plugged - not doing so would
317 	 * introduce a ton of races.
318 	 */
319 }
320 
amd_iommu_uninit_devices(void)321 void __init amd_iommu_uninit_devices(void)
322 {
323 	struct iommu_dev_data *dev_data, *n;
324 	struct pci_dev *pdev = NULL;
325 
326 	for_each_pci_dev(pdev) {
327 
328 		if (!check_device(&pdev->dev))
329 			continue;
330 
331 		iommu_uninit_device(&pdev->dev);
332 	}
333 
334 	/* Free all of our dev_data structures */
335 	list_for_each_entry_safe(dev_data, n, &dev_data_list, dev_data_list)
336 		free_dev_data(dev_data);
337 }
338 
amd_iommu_init_devices(void)339 int __init amd_iommu_init_devices(void)
340 {
341 	struct pci_dev *pdev = NULL;
342 	int ret = 0;
343 
344 	for_each_pci_dev(pdev) {
345 
346 		if (!check_device(&pdev->dev))
347 			continue;
348 
349 		ret = iommu_init_device(&pdev->dev);
350 		if (ret == -ENOTSUPP)
351 			iommu_ignore_device(&pdev->dev);
352 		else if (ret)
353 			goto out_free;
354 	}
355 
356 	return 0;
357 
358 out_free:
359 
360 	amd_iommu_uninit_devices();
361 
362 	return ret;
363 }
364 #ifdef CONFIG_AMD_IOMMU_STATS
365 
366 /*
367  * Initialization code for statistics collection
368  */
369 
370 DECLARE_STATS_COUNTER(compl_wait);
371 DECLARE_STATS_COUNTER(cnt_map_single);
372 DECLARE_STATS_COUNTER(cnt_unmap_single);
373 DECLARE_STATS_COUNTER(cnt_map_sg);
374 DECLARE_STATS_COUNTER(cnt_unmap_sg);
375 DECLARE_STATS_COUNTER(cnt_alloc_coherent);
376 DECLARE_STATS_COUNTER(cnt_free_coherent);
377 DECLARE_STATS_COUNTER(cross_page);
378 DECLARE_STATS_COUNTER(domain_flush_single);
379 DECLARE_STATS_COUNTER(domain_flush_all);
380 DECLARE_STATS_COUNTER(alloced_io_mem);
381 DECLARE_STATS_COUNTER(total_map_requests);
382 DECLARE_STATS_COUNTER(complete_ppr);
383 DECLARE_STATS_COUNTER(invalidate_iotlb);
384 DECLARE_STATS_COUNTER(invalidate_iotlb_all);
385 DECLARE_STATS_COUNTER(pri_requests);
386 
387 
388 static struct dentry *stats_dir;
389 static struct dentry *de_fflush;
390 
amd_iommu_stats_add(struct __iommu_counter * cnt)391 static void amd_iommu_stats_add(struct __iommu_counter *cnt)
392 {
393 	if (stats_dir == NULL)
394 		return;
395 
396 	cnt->dent = debugfs_create_u64(cnt->name, 0444, stats_dir,
397 				       &cnt->value);
398 }
399 
amd_iommu_stats_init(void)400 static void amd_iommu_stats_init(void)
401 {
402 	stats_dir = debugfs_create_dir("amd-iommu", NULL);
403 	if (stats_dir == NULL)
404 		return;
405 
406 	de_fflush  = debugfs_create_bool("fullflush", 0444, stats_dir,
407 					 (u32 *)&amd_iommu_unmap_flush);
408 
409 	amd_iommu_stats_add(&compl_wait);
410 	amd_iommu_stats_add(&cnt_map_single);
411 	amd_iommu_stats_add(&cnt_unmap_single);
412 	amd_iommu_stats_add(&cnt_map_sg);
413 	amd_iommu_stats_add(&cnt_unmap_sg);
414 	amd_iommu_stats_add(&cnt_alloc_coherent);
415 	amd_iommu_stats_add(&cnt_free_coherent);
416 	amd_iommu_stats_add(&cross_page);
417 	amd_iommu_stats_add(&domain_flush_single);
418 	amd_iommu_stats_add(&domain_flush_all);
419 	amd_iommu_stats_add(&alloced_io_mem);
420 	amd_iommu_stats_add(&total_map_requests);
421 	amd_iommu_stats_add(&complete_ppr);
422 	amd_iommu_stats_add(&invalidate_iotlb);
423 	amd_iommu_stats_add(&invalidate_iotlb_all);
424 	amd_iommu_stats_add(&pri_requests);
425 }
426 
427 #endif
428 
429 /****************************************************************************
430  *
431  * Interrupt handling functions
432  *
433  ****************************************************************************/
434 
dump_dte_entry(u16 devid)435 static void dump_dte_entry(u16 devid)
436 {
437 	int i;
438 
439 	for (i = 0; i < 4; ++i)
440 		pr_err("AMD-Vi: DTE[%d]: %016llx\n", i,
441 			amd_iommu_dev_table[devid].data[i]);
442 }
443 
dump_command(unsigned long phys_addr)444 static void dump_command(unsigned long phys_addr)
445 {
446 	struct iommu_cmd *cmd = phys_to_virt(phys_addr);
447 	int i;
448 
449 	for (i = 0; i < 4; ++i)
450 		pr_err("AMD-Vi: CMD[%d]: %08x\n", i, cmd->data[i]);
451 }
452 
iommu_print_event(struct amd_iommu * iommu,void * __evt)453 static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
454 {
455 	int type, devid, domid, flags;
456 	volatile u32 *event = __evt;
457 	int count = 0;
458 	u64 address;
459 
460 retry:
461 	type    = (event[1] >> EVENT_TYPE_SHIFT)  & EVENT_TYPE_MASK;
462 	devid   = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK;
463 	domid   = (event[1] >> EVENT_DOMID_SHIFT) & EVENT_DOMID_MASK;
464 	flags   = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK;
465 	address = (u64)(((u64)event[3]) << 32) | event[2];
466 
467 	if (type == 0) {
468 		/* Did we hit the erratum? */
469 		if (++count == LOOP_TIMEOUT) {
470 			pr_err("AMD-Vi: No event written to event log\n");
471 			return;
472 		}
473 		udelay(1);
474 		goto retry;
475 	}
476 
477 	printk(KERN_ERR "AMD-Vi: Event logged [");
478 
479 	switch (type) {
480 	case EVENT_TYPE_ILL_DEV:
481 		printk("ILLEGAL_DEV_TABLE_ENTRY device=%02x:%02x.%x "
482 		       "address=0x%016llx flags=0x%04x]\n",
483 		       PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
484 		       address, flags);
485 		dump_dte_entry(devid);
486 		break;
487 	case EVENT_TYPE_IO_FAULT:
488 		printk("IO_PAGE_FAULT device=%02x:%02x.%x "
489 		       "domain=0x%04x address=0x%016llx flags=0x%04x]\n",
490 		       PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
491 		       domid, address, flags);
492 		break;
493 	case EVENT_TYPE_DEV_TAB_ERR:
494 		printk("DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
495 		       "address=0x%016llx flags=0x%04x]\n",
496 		       PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
497 		       address, flags);
498 		break;
499 	case EVENT_TYPE_PAGE_TAB_ERR:
500 		printk("PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
501 		       "domain=0x%04x address=0x%016llx flags=0x%04x]\n",
502 		       PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
503 		       domid, address, flags);
504 		break;
505 	case EVENT_TYPE_ILL_CMD:
506 		printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address);
507 		dump_command(address);
508 		break;
509 	case EVENT_TYPE_CMD_HARD_ERR:
510 		printk("COMMAND_HARDWARE_ERROR address=0x%016llx "
511 		       "flags=0x%04x]\n", address, flags);
512 		break;
513 	case EVENT_TYPE_IOTLB_INV_TO:
514 		printk("IOTLB_INV_TIMEOUT device=%02x:%02x.%x "
515 		       "address=0x%016llx]\n",
516 		       PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
517 		       address);
518 		break;
519 	case EVENT_TYPE_INV_DEV_REQ:
520 		printk("INVALID_DEVICE_REQUEST device=%02x:%02x.%x "
521 		       "address=0x%016llx flags=0x%04x]\n",
522 		       PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
523 		       address, flags);
524 		break;
525 	default:
526 		printk(KERN_ERR "UNKNOWN type=0x%02x]\n", type);
527 	}
528 
529 	memset(__evt, 0, 4 * sizeof(u32));
530 }
531 
iommu_poll_events(struct amd_iommu * iommu)532 static void iommu_poll_events(struct amd_iommu *iommu)
533 {
534 	u32 head, tail, status;
535 	unsigned long flags;
536 
537 	spin_lock_irqsave(&iommu->lock, flags);
538 
539 	/* enable event interrupts again */
540 	do {
541 		/*
542 		 * Workaround for Erratum ERBT1312
543 		 * Clearing the EVT_INT bit may race in the hardware, so read
544 		 * it again and make sure it was really cleared
545 		 */
546 		status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
547 		writel(MMIO_STATUS_EVT_INT_MASK,
548 		       iommu->mmio_base + MMIO_STATUS_OFFSET);
549 	} while (status & MMIO_STATUS_EVT_INT_MASK);
550 
551 	head = readl(iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
552 	tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);
553 
554 	while (head != tail) {
555 		iommu_print_event(iommu, iommu->evt_buf + head);
556 		head = (head + EVENT_ENTRY_SIZE) % iommu->evt_buf_size;
557 	}
558 
559 	writel(head, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
560 
561 	spin_unlock_irqrestore(&iommu->lock, flags);
562 }
563 
iommu_handle_ppr_entry(struct amd_iommu * iommu,u64 * raw)564 static void iommu_handle_ppr_entry(struct amd_iommu *iommu, u64 *raw)
565 {
566 	struct amd_iommu_fault fault;
567 
568 	INC_STATS_COUNTER(pri_requests);
569 
570 	if (PPR_REQ_TYPE(raw[0]) != PPR_REQ_FAULT) {
571 		pr_err_ratelimited("AMD-Vi: Unknown PPR request received\n");
572 		return;
573 	}
574 
575 	fault.address   = raw[1];
576 	fault.pasid     = PPR_PASID(raw[0]);
577 	fault.device_id = PPR_DEVID(raw[0]);
578 	fault.tag       = PPR_TAG(raw[0]);
579 	fault.flags     = PPR_FLAGS(raw[0]);
580 
581 	atomic_notifier_call_chain(&ppr_notifier, 0, &fault);
582 }
583 
iommu_poll_ppr_log(struct amd_iommu * iommu)584 static void iommu_poll_ppr_log(struct amd_iommu *iommu)
585 {
586 	unsigned long flags;
587 	u32 head, tail, status;
588 
589 	if (iommu->ppr_log == NULL)
590 		return;
591 
592 	spin_lock_irqsave(&iommu->lock, flags);
593 
594 	/* enable ppr interrupts again */
595 	do {
596 		/*
597 		 * Workaround for Erratum ERBT1312
598 		 * Clearing the PPR_INT bit may race in the hardware, so read
599 		 * it again and make sure it was really cleared
600 		 */
601 		status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
602 		writel(MMIO_STATUS_PPR_INT_MASK,
603 		       iommu->mmio_base + MMIO_STATUS_OFFSET);
604 	} while (status & MMIO_STATUS_PPR_INT_MASK);
605 
606 	head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
607 	tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);
608 
609 	while (head != tail) {
610 		volatile u64 *raw;
611 		u64 entry[2];
612 		int i;
613 
614 		raw = (u64 *)(iommu->ppr_log + head);
615 
616 		/*
617 		 * Hardware bug: Interrupt may arrive before the entry is
618 		 * written to memory. If this happens we need to wait for the
619 		 * entry to arrive.
620 		 */
621 		for (i = 0; i < LOOP_TIMEOUT; ++i) {
622 			if (PPR_REQ_TYPE(raw[0]) != 0)
623 				break;
624 			udelay(1);
625 		}
626 
627 		/* Avoid memcpy function-call overhead */
628 		entry[0] = raw[0];
629 		entry[1] = raw[1];
630 
631 		/*
632 		 * To detect the hardware bug we need to clear the entry
633 		 * back to zero.
634 		 */
635 		raw[0] = raw[1] = 0UL;
636 
637 		/* Update head pointer of hardware ring-buffer */
638 		head = (head + PPR_ENTRY_SIZE) % PPR_LOG_SIZE;
639 		writel(head, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
640 
641 		/*
642 		 * Release iommu->lock because ppr-handling might need to
643 		 * re-aquire it
644 		 */
645 		spin_unlock_irqrestore(&iommu->lock, flags);
646 
647 		/* Handle PPR entry */
648 		iommu_handle_ppr_entry(iommu, entry);
649 
650 		spin_lock_irqsave(&iommu->lock, flags);
651 
652 		/* Refresh ring-buffer information */
653 		head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
654 		tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);
655 	}
656 
657 	spin_unlock_irqrestore(&iommu->lock, flags);
658 }
659 
amd_iommu_int_thread(int irq,void * data)660 irqreturn_t amd_iommu_int_thread(int irq, void *data)
661 {
662 	struct amd_iommu *iommu;
663 
664 	for_each_iommu(iommu) {
665 		iommu_poll_events(iommu);
666 		iommu_poll_ppr_log(iommu);
667 	}
668 
669 	return IRQ_HANDLED;
670 }
671 
amd_iommu_int_handler(int irq,void * data)672 irqreturn_t amd_iommu_int_handler(int irq, void *data)
673 {
674 	return IRQ_WAKE_THREAD;
675 }
676 
677 /****************************************************************************
678  *
679  * IOMMU command queuing functions
680  *
681  ****************************************************************************/
682 
wait_on_sem(volatile u64 * sem)683 static int wait_on_sem(volatile u64 *sem)
684 {
685 	int i = 0;
686 
687 	while (*sem == 0 && i < LOOP_TIMEOUT) {
688 		udelay(1);
689 		i += 1;
690 	}
691 
692 	if (i == LOOP_TIMEOUT) {
693 		pr_alert("AMD-Vi: Completion-Wait loop timed out\n");
694 		return -EIO;
695 	}
696 
697 	return 0;
698 }
699 
copy_cmd_to_buffer(struct amd_iommu * iommu,struct iommu_cmd * cmd,u32 tail)700 static void copy_cmd_to_buffer(struct amd_iommu *iommu,
701 			       struct iommu_cmd *cmd,
702 			       u32 tail)
703 {
704 	u8 *target;
705 
706 	target = iommu->cmd_buf + tail;
707 	tail   = (tail + sizeof(*cmd)) % iommu->cmd_buf_size;
708 
709 	/* Copy command to buffer */
710 	memcpy(target, cmd, sizeof(*cmd));
711 
712 	/* Tell the IOMMU about it */
713 	writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
714 }
715 
build_completion_wait(struct iommu_cmd * cmd,u64 address)716 static void build_completion_wait(struct iommu_cmd *cmd, u64 address)
717 {
718 	WARN_ON(address & 0x7ULL);
719 
720 	memset(cmd, 0, sizeof(*cmd));
721 	cmd->data[0] = lower_32_bits(__pa(address)) | CMD_COMPL_WAIT_STORE_MASK;
722 	cmd->data[1] = upper_32_bits(__pa(address));
723 	cmd->data[2] = 1;
724 	CMD_SET_TYPE(cmd, CMD_COMPL_WAIT);
725 }
726 
build_inv_dte(struct iommu_cmd * cmd,u16 devid)727 static void build_inv_dte(struct iommu_cmd *cmd, u16 devid)
728 {
729 	memset(cmd, 0, sizeof(*cmd));
730 	cmd->data[0] = devid;
731 	CMD_SET_TYPE(cmd, CMD_INV_DEV_ENTRY);
732 }
733 
build_inv_iommu_pages(struct iommu_cmd * cmd,u64 address,size_t size,u16 domid,int pde)734 static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address,
735 				  size_t size, u16 domid, int pde)
736 {
737 	u64 pages;
738 	int s;
739 
740 	pages = iommu_num_pages(address, size, PAGE_SIZE);
741 	s     = 0;
742 
743 	if (pages > 1) {
744 		/*
745 		 * If we have to flush more than one page, flush all
746 		 * TLB entries for this domain
747 		 */
748 		address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
749 		s = 1;
750 	}
751 
752 	address &= PAGE_MASK;
753 
754 	memset(cmd, 0, sizeof(*cmd));
755 	cmd->data[1] |= domid;
756 	cmd->data[2]  = lower_32_bits(address);
757 	cmd->data[3]  = upper_32_bits(address);
758 	CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES);
759 	if (s) /* size bit - we flush more than one 4kb page */
760 		cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
761 	if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
762 		cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
763 }
764 
build_inv_iotlb_pages(struct iommu_cmd * cmd,u16 devid,int qdep,u64 address,size_t size)765 static void build_inv_iotlb_pages(struct iommu_cmd *cmd, u16 devid, int qdep,
766 				  u64 address, size_t size)
767 {
768 	u64 pages;
769 	int s;
770 
771 	pages = iommu_num_pages(address, size, PAGE_SIZE);
772 	s     = 0;
773 
774 	if (pages > 1) {
775 		/*
776 		 * If we have to flush more than one page, flush all
777 		 * TLB entries for this domain
778 		 */
779 		address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
780 		s = 1;
781 	}
782 
783 	address &= PAGE_MASK;
784 
785 	memset(cmd, 0, sizeof(*cmd));
786 	cmd->data[0]  = devid;
787 	cmd->data[0] |= (qdep & 0xff) << 24;
788 	cmd->data[1]  = devid;
789 	cmd->data[2]  = lower_32_bits(address);
790 	cmd->data[3]  = upper_32_bits(address);
791 	CMD_SET_TYPE(cmd, CMD_INV_IOTLB_PAGES);
792 	if (s)
793 		cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
794 }
795 
build_inv_iommu_pasid(struct iommu_cmd * cmd,u16 domid,int pasid,u64 address,bool size)796 static void build_inv_iommu_pasid(struct iommu_cmd *cmd, u16 domid, int pasid,
797 				  u64 address, bool size)
798 {
799 	memset(cmd, 0, sizeof(*cmd));
800 
801 	address &= ~(0xfffULL);
802 
803 	cmd->data[0]  = pasid & PASID_MASK;
804 	cmd->data[1]  = domid;
805 	cmd->data[2]  = lower_32_bits(address);
806 	cmd->data[3]  = upper_32_bits(address);
807 	cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
808 	cmd->data[2] |= CMD_INV_IOMMU_PAGES_GN_MASK;
809 	if (size)
810 		cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
811 	CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES);
812 }
813 
build_inv_iotlb_pasid(struct iommu_cmd * cmd,u16 devid,int pasid,int qdep,u64 address,bool size)814 static void build_inv_iotlb_pasid(struct iommu_cmd *cmd, u16 devid, int pasid,
815 				  int qdep, u64 address, bool size)
816 {
817 	memset(cmd, 0, sizeof(*cmd));
818 
819 	address &= ~(0xfffULL);
820 
821 	cmd->data[0]  = devid;
822 	cmd->data[0] |= (pasid & 0xff) << 16;
823 	cmd->data[0] |= (qdep  & 0xff) << 24;
824 	cmd->data[1]  = devid;
825 	cmd->data[1] |= ((pasid >> 8) & 0xfff) << 16;
826 	cmd->data[2]  = lower_32_bits(address);
827 	cmd->data[2] |= CMD_INV_IOMMU_PAGES_GN_MASK;
828 	cmd->data[3]  = upper_32_bits(address);
829 	if (size)
830 		cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
831 	CMD_SET_TYPE(cmd, CMD_INV_IOTLB_PAGES);
832 }
833 
build_complete_ppr(struct iommu_cmd * cmd,u16 devid,int pasid,int status,int tag,bool gn)834 static void build_complete_ppr(struct iommu_cmd *cmd, u16 devid, int pasid,
835 			       int status, int tag, bool gn)
836 {
837 	memset(cmd, 0, sizeof(*cmd));
838 
839 	cmd->data[0]  = devid;
840 	if (gn) {
841 		cmd->data[1]  = pasid & PASID_MASK;
842 		cmd->data[2]  = CMD_INV_IOMMU_PAGES_GN_MASK;
843 	}
844 	cmd->data[3]  = tag & 0x1ff;
845 	cmd->data[3] |= (status & PPR_STATUS_MASK) << PPR_STATUS_SHIFT;
846 
847 	CMD_SET_TYPE(cmd, CMD_COMPLETE_PPR);
848 }
849 
build_inv_all(struct iommu_cmd * cmd)850 static void build_inv_all(struct iommu_cmd *cmd)
851 {
852 	memset(cmd, 0, sizeof(*cmd));
853 	CMD_SET_TYPE(cmd, CMD_INV_ALL);
854 }
855 
856 /*
857  * Writes the command to the IOMMUs command buffer and informs the
858  * hardware about the new command.
859  */
iommu_queue_command_sync(struct amd_iommu * iommu,struct iommu_cmd * cmd,bool sync)860 static int iommu_queue_command_sync(struct amd_iommu *iommu,
861 				    struct iommu_cmd *cmd,
862 				    bool sync)
863 {
864 	u32 left, tail, head, next_tail;
865 	unsigned long flags;
866 
867 	WARN_ON(iommu->cmd_buf_size & CMD_BUFFER_UNINITIALIZED);
868 
869 again:
870 	spin_lock_irqsave(&iommu->lock, flags);
871 
872 	head      = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
873 	tail      = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
874 	next_tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size;
875 	left      = (head - next_tail) % iommu->cmd_buf_size;
876 
877 	if (left <= 2) {
878 		struct iommu_cmd sync_cmd;
879 		volatile u64 sem = 0;
880 		int ret;
881 
882 		build_completion_wait(&sync_cmd, (u64)&sem);
883 		copy_cmd_to_buffer(iommu, &sync_cmd, tail);
884 
885 		spin_unlock_irqrestore(&iommu->lock, flags);
886 
887 		if ((ret = wait_on_sem(&sem)) != 0)
888 			return ret;
889 
890 		goto again;
891 	}
892 
893 	copy_cmd_to_buffer(iommu, cmd, tail);
894 
895 	/* We need to sync now to make sure all commands are processed */
896 	iommu->need_sync = sync;
897 
898 	spin_unlock_irqrestore(&iommu->lock, flags);
899 
900 	return 0;
901 }
902 
iommu_queue_command(struct amd_iommu * iommu,struct iommu_cmd * cmd)903 static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
904 {
905 	return iommu_queue_command_sync(iommu, cmd, true);
906 }
907 
908 /*
909  * This function queues a completion wait command into the command
910  * buffer of an IOMMU
911  */
iommu_completion_wait(struct amd_iommu * iommu)912 static int iommu_completion_wait(struct amd_iommu *iommu)
913 {
914 	struct iommu_cmd cmd;
915 	volatile u64 sem = 0;
916 	int ret;
917 
918 	if (!iommu->need_sync)
919 		return 0;
920 
921 	build_completion_wait(&cmd, (u64)&sem);
922 
923 	ret = iommu_queue_command_sync(iommu, &cmd, false);
924 	if (ret)
925 		return ret;
926 
927 	return wait_on_sem(&sem);
928 }
929 
iommu_flush_dte(struct amd_iommu * iommu,u16 devid)930 static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid)
931 {
932 	struct iommu_cmd cmd;
933 
934 	build_inv_dte(&cmd, devid);
935 
936 	return iommu_queue_command(iommu, &cmd);
937 }
938 
iommu_flush_dte_all(struct amd_iommu * iommu)939 static void iommu_flush_dte_all(struct amd_iommu *iommu)
940 {
941 	u32 devid;
942 
943 	for (devid = 0; devid <= 0xffff; ++devid)
944 		iommu_flush_dte(iommu, devid);
945 
946 	iommu_completion_wait(iommu);
947 }
948 
949 /*
950  * This function uses heavy locking and may disable irqs for some time. But
951  * this is no issue because it is only called during resume.
952  */
iommu_flush_tlb_all(struct amd_iommu * iommu)953 static void iommu_flush_tlb_all(struct amd_iommu *iommu)
954 {
955 	u32 dom_id;
956 
957 	for (dom_id = 0; dom_id <= 0xffff; ++dom_id) {
958 		struct iommu_cmd cmd;
959 		build_inv_iommu_pages(&cmd, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
960 				      dom_id, 1);
961 		iommu_queue_command(iommu, &cmd);
962 	}
963 
964 	iommu_completion_wait(iommu);
965 }
966 
iommu_flush_all(struct amd_iommu * iommu)967 static void iommu_flush_all(struct amd_iommu *iommu)
968 {
969 	struct iommu_cmd cmd;
970 
971 	build_inv_all(&cmd);
972 
973 	iommu_queue_command(iommu, &cmd);
974 	iommu_completion_wait(iommu);
975 }
976 
iommu_flush_all_caches(struct amd_iommu * iommu)977 void iommu_flush_all_caches(struct amd_iommu *iommu)
978 {
979 	if (iommu_feature(iommu, FEATURE_IA)) {
980 		iommu_flush_all(iommu);
981 	} else {
982 		iommu_flush_dte_all(iommu);
983 		iommu_flush_tlb_all(iommu);
984 	}
985 }
986 
987 /*
988  * Command send function for flushing on-device TLB
989  */
device_flush_iotlb(struct iommu_dev_data * dev_data,u64 address,size_t size)990 static int device_flush_iotlb(struct iommu_dev_data *dev_data,
991 			      u64 address, size_t size)
992 {
993 	struct amd_iommu *iommu;
994 	struct iommu_cmd cmd;
995 	int qdep;
996 
997 	qdep     = dev_data->ats.qdep;
998 	iommu    = amd_iommu_rlookup_table[dev_data->devid];
999 
1000 	build_inv_iotlb_pages(&cmd, dev_data->devid, qdep, address, size);
1001 
1002 	return iommu_queue_command(iommu, &cmd);
1003 }
1004 
1005 /*
1006  * Command send function for invalidating a device table entry
1007  */
device_flush_dte(struct iommu_dev_data * dev_data)1008 static int device_flush_dte(struct iommu_dev_data *dev_data)
1009 {
1010 	struct amd_iommu *iommu;
1011 	int ret;
1012 
1013 	iommu = amd_iommu_rlookup_table[dev_data->devid];
1014 
1015 	ret = iommu_flush_dte(iommu, dev_data->devid);
1016 	if (ret)
1017 		return ret;
1018 
1019 	if (dev_data->ats.enabled)
1020 		ret = device_flush_iotlb(dev_data, 0, ~0UL);
1021 
1022 	return ret;
1023 }
1024 
1025 /*
1026  * TLB invalidation function which is called from the mapping functions.
1027  * It invalidates a single PTE if the range to flush is within a single
1028  * page. Otherwise it flushes the whole TLB of the IOMMU.
1029  */
__domain_flush_pages(struct protection_domain * domain,u64 address,size_t size,int pde)1030 static void __domain_flush_pages(struct protection_domain *domain,
1031 				 u64 address, size_t size, int pde)
1032 {
1033 	struct iommu_dev_data *dev_data;
1034 	struct iommu_cmd cmd;
1035 	int ret = 0, i;
1036 
1037 	build_inv_iommu_pages(&cmd, address, size, domain->id, pde);
1038 
1039 	for (i = 0; i < amd_iommus_present; ++i) {
1040 		if (!domain->dev_iommu[i])
1041 			continue;
1042 
1043 		/*
1044 		 * Devices of this domain are behind this IOMMU
1045 		 * We need a TLB flush
1046 		 */
1047 		ret |= iommu_queue_command(amd_iommus[i], &cmd);
1048 	}
1049 
1050 	list_for_each_entry(dev_data, &domain->dev_list, list) {
1051 
1052 		if (!dev_data->ats.enabled)
1053 			continue;
1054 
1055 		ret |= device_flush_iotlb(dev_data, address, size);
1056 	}
1057 
1058 	WARN_ON(ret);
1059 }
1060 
domain_flush_pages(struct protection_domain * domain,u64 address,size_t size)1061 static void domain_flush_pages(struct protection_domain *domain,
1062 			       u64 address, size_t size)
1063 {
1064 	__domain_flush_pages(domain, address, size, 0);
1065 }
1066 
1067 /* Flush the whole IO/TLB for a given protection domain */
domain_flush_tlb(struct protection_domain * domain)1068 static void domain_flush_tlb(struct protection_domain *domain)
1069 {
1070 	__domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 0);
1071 }
1072 
1073 /* Flush the whole IO/TLB for a given protection domain - including PDE */
domain_flush_tlb_pde(struct protection_domain * domain)1074 static void domain_flush_tlb_pde(struct protection_domain *domain)
1075 {
1076 	__domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1);
1077 }
1078 
domain_flush_complete(struct protection_domain * domain)1079 static void domain_flush_complete(struct protection_domain *domain)
1080 {
1081 	int i;
1082 
1083 	for (i = 0; i < amd_iommus_present; ++i) {
1084 		if (!domain->dev_iommu[i])
1085 			continue;
1086 
1087 		/*
1088 		 * Devices of this domain are behind this IOMMU
1089 		 * We need to wait for completion of all commands.
1090 		 */
1091 		iommu_completion_wait(amd_iommus[i]);
1092 	}
1093 }
1094 
1095 
1096 /*
1097  * This function flushes the DTEs for all devices in domain
1098  */
domain_flush_devices(struct protection_domain * domain)1099 static void domain_flush_devices(struct protection_domain *domain)
1100 {
1101 	struct iommu_dev_data *dev_data;
1102 
1103 	list_for_each_entry(dev_data, &domain->dev_list, list)
1104 		device_flush_dte(dev_data);
1105 }
1106 
1107 /****************************************************************************
1108  *
1109  * The functions below are used the create the page table mappings for
1110  * unity mapped regions.
1111  *
1112  ****************************************************************************/
1113 
1114 /*
1115  * This function is used to add another level to an IO page table. Adding
1116  * another level increases the size of the address space by 9 bits to a size up
1117  * to 64 bits.
1118  */
increase_address_space(struct protection_domain * domain,gfp_t gfp)1119 static bool increase_address_space(struct protection_domain *domain,
1120 				   gfp_t gfp)
1121 {
1122 	u64 *pte;
1123 
1124 	if (domain->mode == PAGE_MODE_6_LEVEL)
1125 		/* address space already 64 bit large */
1126 		return false;
1127 
1128 	pte = (void *)get_zeroed_page(gfp);
1129 	if (!pte)
1130 		return false;
1131 
1132 	*pte             = PM_LEVEL_PDE(domain->mode,
1133 					virt_to_phys(domain->pt_root));
1134 	domain->pt_root  = pte;
1135 	domain->mode    += 1;
1136 	domain->updated  = true;
1137 
1138 	return true;
1139 }
1140 
alloc_pte(struct protection_domain * domain,unsigned long address,unsigned long page_size,u64 ** pte_page,gfp_t gfp)1141 static u64 *alloc_pte(struct protection_domain *domain,
1142 		      unsigned long address,
1143 		      unsigned long page_size,
1144 		      u64 **pte_page,
1145 		      gfp_t gfp)
1146 {
1147 	int level, end_lvl;
1148 	u64 *pte, *page;
1149 
1150 	BUG_ON(!is_power_of_2(page_size));
1151 
1152 	while (address > PM_LEVEL_SIZE(domain->mode))
1153 		increase_address_space(domain, gfp);
1154 
1155 	level   = domain->mode - 1;
1156 	pte     = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
1157 	address = PAGE_SIZE_ALIGN(address, page_size);
1158 	end_lvl = PAGE_SIZE_LEVEL(page_size);
1159 
1160 	while (level > end_lvl) {
1161 		if (!IOMMU_PTE_PRESENT(*pte)) {
1162 			page = (u64 *)get_zeroed_page(gfp);
1163 			if (!page)
1164 				return NULL;
1165 			*pte = PM_LEVEL_PDE(level, virt_to_phys(page));
1166 		}
1167 
1168 		/* No level skipping support yet */
1169 		if (PM_PTE_LEVEL(*pte) != level)
1170 			return NULL;
1171 
1172 		level -= 1;
1173 
1174 		pte = IOMMU_PTE_PAGE(*pte);
1175 
1176 		if (pte_page && level == end_lvl)
1177 			*pte_page = pte;
1178 
1179 		pte = &pte[PM_LEVEL_INDEX(level, address)];
1180 	}
1181 
1182 	return pte;
1183 }
1184 
1185 /*
1186  * This function checks if there is a PTE for a given dma address. If
1187  * there is one, it returns the pointer to it.
1188  */
fetch_pte(struct protection_domain * domain,unsigned long address)1189 static u64 *fetch_pte(struct protection_domain *domain, unsigned long address)
1190 {
1191 	int level;
1192 	u64 *pte;
1193 
1194 	if (address > PM_LEVEL_SIZE(domain->mode))
1195 		return NULL;
1196 
1197 	level   =  domain->mode - 1;
1198 	pte     = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
1199 
1200 	while (level > 0) {
1201 
1202 		/* Not Present */
1203 		if (!IOMMU_PTE_PRESENT(*pte))
1204 			return NULL;
1205 
1206 		/* Large PTE */
1207 		if (PM_PTE_LEVEL(*pte) == 0x07) {
1208 			unsigned long pte_mask, __pte;
1209 
1210 			/*
1211 			 * If we have a series of large PTEs, make
1212 			 * sure to return a pointer to the first one.
1213 			 */
1214 			pte_mask = PTE_PAGE_SIZE(*pte);
1215 			pte_mask = ~((PAGE_SIZE_PTE_COUNT(pte_mask) << 3) - 1);
1216 			__pte    = ((unsigned long)pte) & pte_mask;
1217 
1218 			return (u64 *)__pte;
1219 		}
1220 
1221 		/* No level skipping support yet */
1222 		if (PM_PTE_LEVEL(*pte) != level)
1223 			return NULL;
1224 
1225 		level -= 1;
1226 
1227 		/* Walk to the next level */
1228 		pte = IOMMU_PTE_PAGE(*pte);
1229 		pte = &pte[PM_LEVEL_INDEX(level, address)];
1230 	}
1231 
1232 	return pte;
1233 }
1234 
1235 /*
1236  * Generic mapping functions. It maps a physical address into a DMA
1237  * address space. It allocates the page table pages if necessary.
1238  * In the future it can be extended to a generic mapping function
1239  * supporting all features of AMD IOMMU page tables like level skipping
1240  * and full 64 bit address spaces.
1241  */
iommu_map_page(struct protection_domain * dom,unsigned long bus_addr,unsigned long phys_addr,int prot,unsigned long page_size)1242 static int iommu_map_page(struct protection_domain *dom,
1243 			  unsigned long bus_addr,
1244 			  unsigned long phys_addr,
1245 			  int prot,
1246 			  unsigned long page_size)
1247 {
1248 	u64 __pte, *pte;
1249 	int i, count;
1250 
1251 	if (!(prot & IOMMU_PROT_MASK))
1252 		return -EINVAL;
1253 
1254 	bus_addr  = PAGE_ALIGN(bus_addr);
1255 	phys_addr = PAGE_ALIGN(phys_addr);
1256 	count     = PAGE_SIZE_PTE_COUNT(page_size);
1257 	pte       = alloc_pte(dom, bus_addr, page_size, NULL, GFP_KERNEL);
1258 
1259 	for (i = 0; i < count; ++i)
1260 		if (IOMMU_PTE_PRESENT(pte[i]))
1261 			return -EBUSY;
1262 
1263 	if (page_size > PAGE_SIZE) {
1264 		__pte = PAGE_SIZE_PTE(phys_addr, page_size);
1265 		__pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_P | IOMMU_PTE_FC;
1266 	} else
1267 		__pte = phys_addr | IOMMU_PTE_P | IOMMU_PTE_FC;
1268 
1269 	if (prot & IOMMU_PROT_IR)
1270 		__pte |= IOMMU_PTE_IR;
1271 	if (prot & IOMMU_PROT_IW)
1272 		__pte |= IOMMU_PTE_IW;
1273 
1274 	for (i = 0; i < count; ++i)
1275 		pte[i] = __pte;
1276 
1277 	update_domain(dom);
1278 
1279 	return 0;
1280 }
1281 
iommu_unmap_page(struct protection_domain * dom,unsigned long bus_addr,unsigned long page_size)1282 static unsigned long iommu_unmap_page(struct protection_domain *dom,
1283 				      unsigned long bus_addr,
1284 				      unsigned long page_size)
1285 {
1286 	unsigned long long unmap_size, unmapped;
1287 	u64 *pte;
1288 
1289 	BUG_ON(!is_power_of_2(page_size));
1290 
1291 	unmapped = 0;
1292 
1293 	while (unmapped < page_size) {
1294 
1295 		pte = fetch_pte(dom, bus_addr);
1296 
1297 		if (!pte) {
1298 			/*
1299 			 * No PTE for this address
1300 			 * move forward in 4kb steps
1301 			 */
1302 			unmap_size = PAGE_SIZE;
1303 		} else if (PM_PTE_LEVEL(*pte) == 0) {
1304 			/* 4kb PTE found for this address */
1305 			unmap_size = PAGE_SIZE;
1306 			*pte       = 0ULL;
1307 		} else {
1308 			int count, i;
1309 
1310 			/* Large PTE found which maps this address */
1311 			unmap_size = PTE_PAGE_SIZE(*pte);
1312 
1313 			/* Only unmap from the first pte in the page */
1314 			if ((unmap_size - 1) & bus_addr)
1315 				break;
1316 			count      = PAGE_SIZE_PTE_COUNT(unmap_size);
1317 			for (i = 0; i < count; i++)
1318 				pte[i] = 0ULL;
1319 		}
1320 
1321 		bus_addr  = (bus_addr & ~(unmap_size - 1)) + unmap_size;
1322 		unmapped += unmap_size;
1323 	}
1324 
1325 	BUG_ON(unmapped && !is_power_of_2(unmapped));
1326 
1327 	return unmapped;
1328 }
1329 
1330 /*
1331  * This function checks if a specific unity mapping entry is needed for
1332  * this specific IOMMU.
1333  */
iommu_for_unity_map(struct amd_iommu * iommu,struct unity_map_entry * entry)1334 static int iommu_for_unity_map(struct amd_iommu *iommu,
1335 			       struct unity_map_entry *entry)
1336 {
1337 	u16 bdf, i;
1338 
1339 	for (i = entry->devid_start; i <= entry->devid_end; ++i) {
1340 		bdf = amd_iommu_alias_table[i];
1341 		if (amd_iommu_rlookup_table[bdf] == iommu)
1342 			return 1;
1343 	}
1344 
1345 	return 0;
1346 }
1347 
1348 /*
1349  * This function actually applies the mapping to the page table of the
1350  * dma_ops domain.
1351  */
dma_ops_unity_map(struct dma_ops_domain * dma_dom,struct unity_map_entry * e)1352 static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
1353 			     struct unity_map_entry *e)
1354 {
1355 	u64 addr;
1356 	int ret;
1357 
1358 	for (addr = e->address_start; addr < e->address_end;
1359 	     addr += PAGE_SIZE) {
1360 		ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot,
1361 				     PAGE_SIZE);
1362 		if (ret)
1363 			return ret;
1364 		/*
1365 		 * if unity mapping is in aperture range mark the page
1366 		 * as allocated in the aperture
1367 		 */
1368 		if (addr < dma_dom->aperture_size)
1369 			__set_bit(addr >> PAGE_SHIFT,
1370 				  dma_dom->aperture[0]->bitmap);
1371 	}
1372 
1373 	return 0;
1374 }
1375 
1376 /*
1377  * Init the unity mappings for a specific IOMMU in the system
1378  *
1379  * Basically iterates over all unity mapping entries and applies them to
1380  * the default domain DMA of that IOMMU if necessary.
1381  */
iommu_init_unity_mappings(struct amd_iommu * iommu)1382 static int iommu_init_unity_mappings(struct amd_iommu *iommu)
1383 {
1384 	struct unity_map_entry *entry;
1385 	int ret;
1386 
1387 	list_for_each_entry(entry, &amd_iommu_unity_map, list) {
1388 		if (!iommu_for_unity_map(iommu, entry))
1389 			continue;
1390 		ret = dma_ops_unity_map(iommu->default_dom, entry);
1391 		if (ret)
1392 			return ret;
1393 	}
1394 
1395 	return 0;
1396 }
1397 
1398 /*
1399  * Inits the unity mappings required for a specific device
1400  */
init_unity_mappings_for_device(struct dma_ops_domain * dma_dom,u16 devid)1401 static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
1402 					  u16 devid)
1403 {
1404 	struct unity_map_entry *e;
1405 	int ret;
1406 
1407 	list_for_each_entry(e, &amd_iommu_unity_map, list) {
1408 		if (!(devid >= e->devid_start && devid <= e->devid_end))
1409 			continue;
1410 		ret = dma_ops_unity_map(dma_dom, e);
1411 		if (ret)
1412 			return ret;
1413 	}
1414 
1415 	return 0;
1416 }
1417 
1418 /****************************************************************************
1419  *
1420  * The next functions belong to the address allocator for the dma_ops
1421  * interface functions. They work like the allocators in the other IOMMU
1422  * drivers. Its basically a bitmap which marks the allocated pages in
1423  * the aperture. Maybe it could be enhanced in the future to a more
1424  * efficient allocator.
1425  *
1426  ****************************************************************************/
1427 
1428 /*
1429  * The address allocator core functions.
1430  *
1431  * called with domain->lock held
1432  */
1433 
1434 /*
1435  * Used to reserve address ranges in the aperture (e.g. for exclusion
1436  * ranges.
1437  */
dma_ops_reserve_addresses(struct dma_ops_domain * dom,unsigned long start_page,unsigned int pages)1438 static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
1439 				      unsigned long start_page,
1440 				      unsigned int pages)
1441 {
1442 	unsigned int i, last_page = dom->aperture_size >> PAGE_SHIFT;
1443 
1444 	if (start_page + pages > last_page)
1445 		pages = last_page - start_page;
1446 
1447 	for (i = start_page; i < start_page + pages; ++i) {
1448 		int index = i / APERTURE_RANGE_PAGES;
1449 		int page  = i % APERTURE_RANGE_PAGES;
1450 		__set_bit(page, dom->aperture[index]->bitmap);
1451 	}
1452 }
1453 
1454 /*
1455  * This function is used to add a new aperture range to an existing
1456  * aperture in case of dma_ops domain allocation or address allocation
1457  * failure.
1458  */
alloc_new_range(struct dma_ops_domain * dma_dom,bool populate,gfp_t gfp)1459 static int alloc_new_range(struct dma_ops_domain *dma_dom,
1460 			   bool populate, gfp_t gfp)
1461 {
1462 	int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT;
1463 	struct amd_iommu *iommu;
1464 	unsigned long i, old_size;
1465 
1466 #ifdef CONFIG_IOMMU_STRESS
1467 	populate = false;
1468 #endif
1469 
1470 	if (index >= APERTURE_MAX_RANGES)
1471 		return -ENOMEM;
1472 
1473 	dma_dom->aperture[index] = kzalloc(sizeof(struct aperture_range), gfp);
1474 	if (!dma_dom->aperture[index])
1475 		return -ENOMEM;
1476 
1477 	dma_dom->aperture[index]->bitmap = (void *)get_zeroed_page(gfp);
1478 	if (!dma_dom->aperture[index]->bitmap)
1479 		goto out_free;
1480 
1481 	dma_dom->aperture[index]->offset = dma_dom->aperture_size;
1482 
1483 	if (populate) {
1484 		unsigned long address = dma_dom->aperture_size;
1485 		int i, num_ptes = APERTURE_RANGE_PAGES / 512;
1486 		u64 *pte, *pte_page;
1487 
1488 		for (i = 0; i < num_ptes; ++i) {
1489 			pte = alloc_pte(&dma_dom->domain, address, PAGE_SIZE,
1490 					&pte_page, gfp);
1491 			if (!pte)
1492 				goto out_free;
1493 
1494 			dma_dom->aperture[index]->pte_pages[i] = pte_page;
1495 
1496 			address += APERTURE_RANGE_SIZE / 64;
1497 		}
1498 	}
1499 
1500 	old_size                = dma_dom->aperture_size;
1501 	dma_dom->aperture_size += APERTURE_RANGE_SIZE;
1502 
1503 	/* Reserve address range used for MSI messages */
1504 	if (old_size < MSI_ADDR_BASE_LO &&
1505 	    dma_dom->aperture_size > MSI_ADDR_BASE_LO) {
1506 		unsigned long spage;
1507 		int pages;
1508 
1509 		pages = iommu_num_pages(MSI_ADDR_BASE_LO, 0x10000, PAGE_SIZE);
1510 		spage = MSI_ADDR_BASE_LO >> PAGE_SHIFT;
1511 
1512 		dma_ops_reserve_addresses(dma_dom, spage, pages);
1513 	}
1514 
1515 	/* Initialize the exclusion range if necessary */
1516 	for_each_iommu(iommu) {
1517 		if (iommu->exclusion_start &&
1518 		    iommu->exclusion_start >= dma_dom->aperture[index]->offset
1519 		    && iommu->exclusion_start < dma_dom->aperture_size) {
1520 			unsigned long startpage;
1521 			int pages = iommu_num_pages(iommu->exclusion_start,
1522 						    iommu->exclusion_length,
1523 						    PAGE_SIZE);
1524 			startpage = iommu->exclusion_start >> PAGE_SHIFT;
1525 			dma_ops_reserve_addresses(dma_dom, startpage, pages);
1526 		}
1527 	}
1528 
1529 	/*
1530 	 * Check for areas already mapped as present in the new aperture
1531 	 * range and mark those pages as reserved in the allocator. Such
1532 	 * mappings may already exist as a result of requested unity
1533 	 * mappings for devices.
1534 	 */
1535 	for (i = dma_dom->aperture[index]->offset;
1536 	     i < dma_dom->aperture_size;
1537 	     i += PAGE_SIZE) {
1538 		u64 *pte = fetch_pte(&dma_dom->domain, i);
1539 		if (!pte || !IOMMU_PTE_PRESENT(*pte))
1540 			continue;
1541 
1542 		dma_ops_reserve_addresses(dma_dom, i >> PAGE_SHIFT, 1);
1543 	}
1544 
1545 	update_domain(&dma_dom->domain);
1546 
1547 	return 0;
1548 
1549 out_free:
1550 	update_domain(&dma_dom->domain);
1551 
1552 	free_page((unsigned long)dma_dom->aperture[index]->bitmap);
1553 
1554 	kfree(dma_dom->aperture[index]);
1555 	dma_dom->aperture[index] = NULL;
1556 
1557 	return -ENOMEM;
1558 }
1559 
dma_ops_area_alloc(struct device * dev,struct dma_ops_domain * dom,unsigned int pages,unsigned long align_mask,u64 dma_mask,unsigned long start)1560 static unsigned long dma_ops_area_alloc(struct device *dev,
1561 					struct dma_ops_domain *dom,
1562 					unsigned int pages,
1563 					unsigned long align_mask,
1564 					u64 dma_mask,
1565 					unsigned long start)
1566 {
1567 	unsigned long next_bit = dom->next_address % APERTURE_RANGE_SIZE;
1568 	int max_index = dom->aperture_size >> APERTURE_RANGE_SHIFT;
1569 	int i = start >> APERTURE_RANGE_SHIFT;
1570 	unsigned long boundary_size;
1571 	unsigned long address = -1;
1572 	unsigned long limit;
1573 
1574 	next_bit >>= PAGE_SHIFT;
1575 
1576 	boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
1577 			PAGE_SIZE) >> PAGE_SHIFT;
1578 
1579 	for (;i < max_index; ++i) {
1580 		unsigned long offset = dom->aperture[i]->offset >> PAGE_SHIFT;
1581 
1582 		if (dom->aperture[i]->offset >= dma_mask)
1583 			break;
1584 
1585 		limit = iommu_device_max_index(APERTURE_RANGE_PAGES, offset,
1586 					       dma_mask >> PAGE_SHIFT);
1587 
1588 		address = iommu_area_alloc(dom->aperture[i]->bitmap,
1589 					   limit, next_bit, pages, 0,
1590 					    boundary_size, align_mask);
1591 		if (address != -1) {
1592 			address = dom->aperture[i]->offset +
1593 				  (address << PAGE_SHIFT);
1594 			dom->next_address = address + (pages << PAGE_SHIFT);
1595 			break;
1596 		}
1597 
1598 		next_bit = 0;
1599 	}
1600 
1601 	return address;
1602 }
1603 
dma_ops_alloc_addresses(struct device * dev,struct dma_ops_domain * dom,unsigned int pages,unsigned long align_mask,u64 dma_mask)1604 static unsigned long dma_ops_alloc_addresses(struct device *dev,
1605 					     struct dma_ops_domain *dom,
1606 					     unsigned int pages,
1607 					     unsigned long align_mask,
1608 					     u64 dma_mask)
1609 {
1610 	unsigned long address;
1611 
1612 #ifdef CONFIG_IOMMU_STRESS
1613 	dom->next_address = 0;
1614 	dom->need_flush = true;
1615 #endif
1616 
1617 	address = dma_ops_area_alloc(dev, dom, pages, align_mask,
1618 				     dma_mask, dom->next_address);
1619 
1620 	if (address == -1) {
1621 		dom->next_address = 0;
1622 		address = dma_ops_area_alloc(dev, dom, pages, align_mask,
1623 					     dma_mask, 0);
1624 		dom->need_flush = true;
1625 	}
1626 
1627 	if (unlikely(address == -1))
1628 		address = DMA_ERROR_CODE;
1629 
1630 	WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size);
1631 
1632 	return address;
1633 }
1634 
1635 /*
1636  * The address free function.
1637  *
1638  * called with domain->lock held
1639  */
dma_ops_free_addresses(struct dma_ops_domain * dom,unsigned long address,unsigned int pages)1640 static void dma_ops_free_addresses(struct dma_ops_domain *dom,
1641 				   unsigned long address,
1642 				   unsigned int pages)
1643 {
1644 	unsigned i = address >> APERTURE_RANGE_SHIFT;
1645 	struct aperture_range *range = dom->aperture[i];
1646 
1647 	BUG_ON(i >= APERTURE_MAX_RANGES || range == NULL);
1648 
1649 #ifdef CONFIG_IOMMU_STRESS
1650 	if (i < 4)
1651 		return;
1652 #endif
1653 
1654 	if (address >= dom->next_address)
1655 		dom->need_flush = true;
1656 
1657 	address = (address % APERTURE_RANGE_SIZE) >> PAGE_SHIFT;
1658 
1659 	bitmap_clear(range->bitmap, address, pages);
1660 
1661 }
1662 
1663 /****************************************************************************
1664  *
1665  * The next functions belong to the domain allocation. A domain is
1666  * allocated for every IOMMU as the default domain. If device isolation
1667  * is enabled, every device get its own domain. The most important thing
1668  * about domains is the page table mapping the DMA address space they
1669  * contain.
1670  *
1671  ****************************************************************************/
1672 
1673 /*
1674  * This function adds a protection domain to the global protection domain list
1675  */
add_domain_to_list(struct protection_domain * domain)1676 static void add_domain_to_list(struct protection_domain *domain)
1677 {
1678 	unsigned long flags;
1679 
1680 	spin_lock_irqsave(&amd_iommu_pd_lock, flags);
1681 	list_add(&domain->list, &amd_iommu_pd_list);
1682 	spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
1683 }
1684 
1685 /*
1686  * This function removes a protection domain to the global
1687  * protection domain list
1688  */
del_domain_from_list(struct protection_domain * domain)1689 static void del_domain_from_list(struct protection_domain *domain)
1690 {
1691 	unsigned long flags;
1692 
1693 	spin_lock_irqsave(&amd_iommu_pd_lock, flags);
1694 	list_del(&domain->list);
1695 	spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
1696 }
1697 
domain_id_alloc(void)1698 static u16 domain_id_alloc(void)
1699 {
1700 	unsigned long flags;
1701 	int id;
1702 
1703 	write_lock_irqsave(&amd_iommu_devtable_lock, flags);
1704 	id = find_first_zero_bit(amd_iommu_pd_alloc_bitmap, MAX_DOMAIN_ID);
1705 	BUG_ON(id == 0);
1706 	if (id > 0 && id < MAX_DOMAIN_ID)
1707 		__set_bit(id, amd_iommu_pd_alloc_bitmap);
1708 	else
1709 		id = 0;
1710 	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1711 
1712 	return id;
1713 }
1714 
domain_id_free(int id)1715 static void domain_id_free(int id)
1716 {
1717 	unsigned long flags;
1718 
1719 	write_lock_irqsave(&amd_iommu_devtable_lock, flags);
1720 	if (id > 0 && id < MAX_DOMAIN_ID)
1721 		__clear_bit(id, amd_iommu_pd_alloc_bitmap);
1722 	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1723 }
1724 
free_pagetable(struct protection_domain * domain)1725 static void free_pagetable(struct protection_domain *domain)
1726 {
1727 	int i, j;
1728 	u64 *p1, *p2, *p3;
1729 
1730 	p1 = domain->pt_root;
1731 
1732 	if (!p1)
1733 		return;
1734 
1735 	for (i = 0; i < 512; ++i) {
1736 		if (!IOMMU_PTE_PRESENT(p1[i]))
1737 			continue;
1738 
1739 		p2 = IOMMU_PTE_PAGE(p1[i]);
1740 		for (j = 0; j < 512; ++j) {
1741 			if (!IOMMU_PTE_PRESENT(p2[j]))
1742 				continue;
1743 			p3 = IOMMU_PTE_PAGE(p2[j]);
1744 			free_page((unsigned long)p3);
1745 		}
1746 
1747 		free_page((unsigned long)p2);
1748 	}
1749 
1750 	free_page((unsigned long)p1);
1751 
1752 	domain->pt_root = NULL;
1753 }
1754 
free_gcr3_tbl_level1(u64 * tbl)1755 static void free_gcr3_tbl_level1(u64 *tbl)
1756 {
1757 	u64 *ptr;
1758 	int i;
1759 
1760 	for (i = 0; i < 512; ++i) {
1761 		if (!(tbl[i] & GCR3_VALID))
1762 			continue;
1763 
1764 		ptr = __va(tbl[i] & PAGE_MASK);
1765 
1766 		free_page((unsigned long)ptr);
1767 	}
1768 }
1769 
free_gcr3_tbl_level2(u64 * tbl)1770 static void free_gcr3_tbl_level2(u64 *tbl)
1771 {
1772 	u64 *ptr;
1773 	int i;
1774 
1775 	for (i = 0; i < 512; ++i) {
1776 		if (!(tbl[i] & GCR3_VALID))
1777 			continue;
1778 
1779 		ptr = __va(tbl[i] & PAGE_MASK);
1780 
1781 		free_gcr3_tbl_level1(ptr);
1782 	}
1783 }
1784 
free_gcr3_table(struct protection_domain * domain)1785 static void free_gcr3_table(struct protection_domain *domain)
1786 {
1787 	if (domain->glx == 2)
1788 		free_gcr3_tbl_level2(domain->gcr3_tbl);
1789 	else if (domain->glx == 1)
1790 		free_gcr3_tbl_level1(domain->gcr3_tbl);
1791 	else if (domain->glx != 0)
1792 		BUG();
1793 
1794 	free_page((unsigned long)domain->gcr3_tbl);
1795 }
1796 
1797 /*
1798  * Free a domain, only used if something went wrong in the
1799  * allocation path and we need to free an already allocated page table
1800  */
dma_ops_domain_free(struct dma_ops_domain * dom)1801 static void dma_ops_domain_free(struct dma_ops_domain *dom)
1802 {
1803 	int i;
1804 
1805 	if (!dom)
1806 		return;
1807 
1808 	del_domain_from_list(&dom->domain);
1809 
1810 	free_pagetable(&dom->domain);
1811 
1812 	for (i = 0; i < APERTURE_MAX_RANGES; ++i) {
1813 		if (!dom->aperture[i])
1814 			continue;
1815 		free_page((unsigned long)dom->aperture[i]->bitmap);
1816 		kfree(dom->aperture[i]);
1817 	}
1818 
1819 	kfree(dom);
1820 }
1821 
1822 /*
1823  * Allocates a new protection domain usable for the dma_ops functions.
1824  * It also initializes the page table and the address allocator data
1825  * structures required for the dma_ops interface
1826  */
dma_ops_domain_alloc(void)1827 static struct dma_ops_domain *dma_ops_domain_alloc(void)
1828 {
1829 	struct dma_ops_domain *dma_dom;
1830 
1831 	dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL);
1832 	if (!dma_dom)
1833 		return NULL;
1834 
1835 	spin_lock_init(&dma_dom->domain.lock);
1836 
1837 	dma_dom->domain.id = domain_id_alloc();
1838 	if (dma_dom->domain.id == 0)
1839 		goto free_dma_dom;
1840 	INIT_LIST_HEAD(&dma_dom->domain.dev_list);
1841 	dma_dom->domain.mode = PAGE_MODE_2_LEVEL;
1842 	dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL);
1843 	dma_dom->domain.flags = PD_DMA_OPS_MASK;
1844 	dma_dom->domain.priv = dma_dom;
1845 	if (!dma_dom->domain.pt_root)
1846 		goto free_dma_dom;
1847 
1848 	dma_dom->need_flush = false;
1849 	dma_dom->target_dev = 0xffff;
1850 
1851 	add_domain_to_list(&dma_dom->domain);
1852 
1853 	if (alloc_new_range(dma_dom, true, GFP_KERNEL))
1854 		goto free_dma_dom;
1855 
1856 	/*
1857 	 * mark the first page as allocated so we never return 0 as
1858 	 * a valid dma-address. So we can use 0 as error value
1859 	 */
1860 	dma_dom->aperture[0]->bitmap[0] = 1;
1861 	dma_dom->next_address = 0;
1862 
1863 
1864 	return dma_dom;
1865 
1866 free_dma_dom:
1867 	dma_ops_domain_free(dma_dom);
1868 
1869 	return NULL;
1870 }
1871 
1872 /*
1873  * little helper function to check whether a given protection domain is a
1874  * dma_ops domain
1875  */
dma_ops_domain(struct protection_domain * domain)1876 static bool dma_ops_domain(struct protection_domain *domain)
1877 {
1878 	return domain->flags & PD_DMA_OPS_MASK;
1879 }
1880 
set_dte_entry(u16 devid,struct protection_domain * domain,bool ats)1881 static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats)
1882 {
1883 	u64 pte_root = 0;
1884 	u64 flags = 0;
1885 
1886 	if (domain->mode != PAGE_MODE_NONE)
1887 		pte_root = virt_to_phys(domain->pt_root);
1888 
1889 	pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
1890 		    << DEV_ENTRY_MODE_SHIFT;
1891 	pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV;
1892 
1893 	flags = amd_iommu_dev_table[devid].data[1];
1894 
1895 	if (ats)
1896 		flags |= DTE_FLAG_IOTLB;
1897 
1898 	if (domain->flags & PD_IOMMUV2_MASK) {
1899 		u64 gcr3 = __pa(domain->gcr3_tbl);
1900 		u64 glx  = domain->glx;
1901 		u64 tmp;
1902 
1903 		pte_root |= DTE_FLAG_GV;
1904 		pte_root |= (glx & DTE_GLX_MASK) << DTE_GLX_SHIFT;
1905 
1906 		/* First mask out possible old values for GCR3 table */
1907 		tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B;
1908 		flags    &= ~tmp;
1909 
1910 		tmp = DTE_GCR3_VAL_C(~0ULL) << DTE_GCR3_SHIFT_C;
1911 		flags    &= ~tmp;
1912 
1913 		/* Encode GCR3 table into DTE */
1914 		tmp = DTE_GCR3_VAL_A(gcr3) << DTE_GCR3_SHIFT_A;
1915 		pte_root |= tmp;
1916 
1917 		tmp = DTE_GCR3_VAL_B(gcr3) << DTE_GCR3_SHIFT_B;
1918 		flags    |= tmp;
1919 
1920 		tmp = DTE_GCR3_VAL_C(gcr3) << DTE_GCR3_SHIFT_C;
1921 		flags    |= tmp;
1922 	}
1923 
1924 	flags &= ~(0xffffUL);
1925 	flags |= domain->id;
1926 
1927 	amd_iommu_dev_table[devid].data[1]  = flags;
1928 	amd_iommu_dev_table[devid].data[0]  = pte_root;
1929 }
1930 
clear_dte_entry(u16 devid)1931 static void clear_dte_entry(u16 devid)
1932 {
1933 	/* remove entry from the device table seen by the hardware */
1934 	amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV;
1935 	amd_iommu_dev_table[devid].data[1] = 0;
1936 
1937 	amd_iommu_apply_erratum_63(devid);
1938 }
1939 
do_attach(struct iommu_dev_data * dev_data,struct protection_domain * domain)1940 static void do_attach(struct iommu_dev_data *dev_data,
1941 		      struct protection_domain *domain)
1942 {
1943 	struct amd_iommu *iommu;
1944 	bool ats;
1945 
1946 	iommu = amd_iommu_rlookup_table[dev_data->devid];
1947 	ats   = dev_data->ats.enabled;
1948 
1949 	/* Update data structures */
1950 	dev_data->domain = domain;
1951 	list_add(&dev_data->list, &domain->dev_list);
1952 	set_dte_entry(dev_data->devid, domain, ats);
1953 
1954 	/* Do reference counting */
1955 	domain->dev_iommu[iommu->index] += 1;
1956 	domain->dev_cnt                 += 1;
1957 
1958 	/* Flush the DTE entry */
1959 	device_flush_dte(dev_data);
1960 }
1961 
do_detach(struct iommu_dev_data * dev_data)1962 static void do_detach(struct iommu_dev_data *dev_data)
1963 {
1964 	struct amd_iommu *iommu;
1965 
1966 	iommu = amd_iommu_rlookup_table[dev_data->devid];
1967 
1968 	/* decrease reference counters */
1969 	dev_data->domain->dev_iommu[iommu->index] -= 1;
1970 	dev_data->domain->dev_cnt                 -= 1;
1971 
1972 	/* Update data structures */
1973 	dev_data->domain = NULL;
1974 	list_del(&dev_data->list);
1975 	clear_dte_entry(dev_data->devid);
1976 
1977 	/* Flush the DTE entry */
1978 	device_flush_dte(dev_data);
1979 }
1980 
1981 /*
1982  * If a device is not yet associated with a domain, this function does
1983  * assigns it visible for the hardware
1984  */
__attach_device(struct iommu_dev_data * dev_data,struct protection_domain * domain)1985 static int __attach_device(struct iommu_dev_data *dev_data,
1986 			   struct protection_domain *domain)
1987 {
1988 	int ret;
1989 
1990 	/* lock domain */
1991 	spin_lock(&domain->lock);
1992 
1993 	if (dev_data->alias_data != NULL) {
1994 		struct iommu_dev_data *alias_data = dev_data->alias_data;
1995 
1996 		/* Some sanity checks */
1997 		ret = -EBUSY;
1998 		if (alias_data->domain != NULL &&
1999 				alias_data->domain != domain)
2000 			goto out_unlock;
2001 
2002 		if (dev_data->domain != NULL &&
2003 				dev_data->domain != domain)
2004 			goto out_unlock;
2005 
2006 		/* Do real assignment */
2007 		if (alias_data->domain == NULL)
2008 			do_attach(alias_data, domain);
2009 
2010 		atomic_inc(&alias_data->bind);
2011 	}
2012 
2013 	if (dev_data->domain == NULL)
2014 		do_attach(dev_data, domain);
2015 
2016 	atomic_inc(&dev_data->bind);
2017 
2018 	ret = 0;
2019 
2020 out_unlock:
2021 
2022 	/* ready */
2023 	spin_unlock(&domain->lock);
2024 
2025 	return ret;
2026 }
2027 
2028 
pdev_iommuv2_disable(struct pci_dev * pdev)2029 static void pdev_iommuv2_disable(struct pci_dev *pdev)
2030 {
2031 	pci_disable_ats(pdev);
2032 	pci_disable_pri(pdev);
2033 	pci_disable_pasid(pdev);
2034 }
2035 
2036 /* FIXME: Change generic reset-function to do the same */
pri_reset_while_enabled(struct pci_dev * pdev)2037 static int pri_reset_while_enabled(struct pci_dev *pdev)
2038 {
2039 	u16 control;
2040 	int pos;
2041 
2042 	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
2043 	if (!pos)
2044 		return -EINVAL;
2045 
2046 	pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control);
2047 	control |= PCI_PRI_CTRL_RESET;
2048 	pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control);
2049 
2050 	return 0;
2051 }
2052 
pdev_iommuv2_enable(struct pci_dev * pdev)2053 static int pdev_iommuv2_enable(struct pci_dev *pdev)
2054 {
2055 	bool reset_enable;
2056 	int reqs, ret;
2057 
2058 	/* FIXME: Hardcode number of outstanding requests for now */
2059 	reqs = 32;
2060 	if (pdev_pri_erratum(pdev, AMD_PRI_DEV_ERRATUM_LIMIT_REQ_ONE))
2061 		reqs = 1;
2062 	reset_enable = pdev_pri_erratum(pdev, AMD_PRI_DEV_ERRATUM_ENABLE_RESET);
2063 
2064 	/* Only allow access to user-accessible pages */
2065 	ret = pci_enable_pasid(pdev, 0);
2066 	if (ret)
2067 		goto out_err;
2068 
2069 	/* First reset the PRI state of the device */
2070 	ret = pci_reset_pri(pdev);
2071 	if (ret)
2072 		goto out_err;
2073 
2074 	/* Enable PRI */
2075 	ret = pci_enable_pri(pdev, reqs);
2076 	if (ret)
2077 		goto out_err;
2078 
2079 	if (reset_enable) {
2080 		ret = pri_reset_while_enabled(pdev);
2081 		if (ret)
2082 			goto out_err;
2083 	}
2084 
2085 	ret = pci_enable_ats(pdev, PAGE_SHIFT);
2086 	if (ret)
2087 		goto out_err;
2088 
2089 	return 0;
2090 
2091 out_err:
2092 	pci_disable_pri(pdev);
2093 	pci_disable_pasid(pdev);
2094 
2095 	return ret;
2096 }
2097 
2098 /* FIXME: Move this to PCI code */
2099 #define PCI_PRI_TLP_OFF		(1 << 15)
2100 
pci_pri_tlp_required(struct pci_dev * pdev)2101 bool pci_pri_tlp_required(struct pci_dev *pdev)
2102 {
2103 	u16 status;
2104 	int pos;
2105 
2106 	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
2107 	if (!pos)
2108 		return false;
2109 
2110 	pci_read_config_word(pdev, pos + PCI_PRI_STATUS, &status);
2111 
2112 	return (status & PCI_PRI_TLP_OFF) ? true : false;
2113 }
2114 
2115 /*
2116  * If a device is not yet associated with a domain, this function does
2117  * assigns it visible for the hardware
2118  */
attach_device(struct device * dev,struct protection_domain * domain)2119 static int attach_device(struct device *dev,
2120 			 struct protection_domain *domain)
2121 {
2122 	struct pci_dev *pdev = to_pci_dev(dev);
2123 	struct iommu_dev_data *dev_data;
2124 	unsigned long flags;
2125 	int ret;
2126 
2127 	dev_data = get_dev_data(dev);
2128 
2129 	if (domain->flags & PD_IOMMUV2_MASK) {
2130 		if (!dev_data->iommu_v2 || !dev_data->passthrough)
2131 			return -EINVAL;
2132 
2133 		if (pdev_iommuv2_enable(pdev) != 0)
2134 			return -EINVAL;
2135 
2136 		dev_data->ats.enabled = true;
2137 		dev_data->ats.qdep    = pci_ats_queue_depth(pdev);
2138 		dev_data->pri_tlp     = pci_pri_tlp_required(pdev);
2139 	} else if (amd_iommu_iotlb_sup &&
2140 		   pci_enable_ats(pdev, PAGE_SHIFT) == 0) {
2141 		dev_data->ats.enabled = true;
2142 		dev_data->ats.qdep    = pci_ats_queue_depth(pdev);
2143 	}
2144 
2145 	write_lock_irqsave(&amd_iommu_devtable_lock, flags);
2146 	ret = __attach_device(dev_data, domain);
2147 	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
2148 
2149 	/*
2150 	 * We might boot into a crash-kernel here. The crashed kernel
2151 	 * left the caches in the IOMMU dirty. So we have to flush
2152 	 * here to evict all dirty stuff.
2153 	 */
2154 	domain_flush_tlb_pde(domain);
2155 
2156 	return ret;
2157 }
2158 
2159 /*
2160  * Removes a device from a protection domain (unlocked)
2161  */
__detach_device(struct iommu_dev_data * dev_data)2162 static void __detach_device(struct iommu_dev_data *dev_data)
2163 {
2164 	struct protection_domain *domain;
2165 	unsigned long flags;
2166 
2167 	BUG_ON(!dev_data->domain);
2168 
2169 	domain = dev_data->domain;
2170 
2171 	spin_lock_irqsave(&domain->lock, flags);
2172 
2173 	if (dev_data->alias_data != NULL) {
2174 		struct iommu_dev_data *alias_data = dev_data->alias_data;
2175 
2176 		if (atomic_dec_and_test(&alias_data->bind))
2177 			do_detach(alias_data);
2178 	}
2179 
2180 	if (atomic_dec_and_test(&dev_data->bind))
2181 		do_detach(dev_data);
2182 
2183 	spin_unlock_irqrestore(&domain->lock, flags);
2184 
2185 	/*
2186 	 * If we run in passthrough mode the device must be assigned to the
2187 	 * passthrough domain if it is detached from any other domain.
2188 	 * Make sure we can deassign from the pt_domain itself.
2189 	 */
2190 	if (dev_data->passthrough &&
2191 	    (dev_data->domain == NULL && domain != pt_domain))
2192 		__attach_device(dev_data, pt_domain);
2193 }
2194 
2195 /*
2196  * Removes a device from a protection domain (with devtable_lock held)
2197  */
detach_device(struct device * dev)2198 static void detach_device(struct device *dev)
2199 {
2200 	struct protection_domain *domain;
2201 	struct iommu_dev_data *dev_data;
2202 	unsigned long flags;
2203 
2204 	dev_data = get_dev_data(dev);
2205 	domain   = dev_data->domain;
2206 
2207 	/* lock device table */
2208 	write_lock_irqsave(&amd_iommu_devtable_lock, flags);
2209 	__detach_device(dev_data);
2210 	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
2211 
2212 	if (domain->flags & PD_IOMMUV2_MASK)
2213 		pdev_iommuv2_disable(to_pci_dev(dev));
2214 	else if (dev_data->ats.enabled)
2215 		pci_disable_ats(to_pci_dev(dev));
2216 
2217 	dev_data->ats.enabled = false;
2218 }
2219 
2220 /*
2221  * Find out the protection domain structure for a given PCI device. This
2222  * will give us the pointer to the page table root for example.
2223  */
domain_for_device(struct device * dev)2224 static struct protection_domain *domain_for_device(struct device *dev)
2225 {
2226 	struct iommu_dev_data *dev_data;
2227 	struct protection_domain *dom = NULL;
2228 	unsigned long flags;
2229 
2230 	dev_data   = get_dev_data(dev);
2231 
2232 	if (dev_data->domain)
2233 		return dev_data->domain;
2234 
2235 	if (dev_data->alias_data != NULL) {
2236 		struct iommu_dev_data *alias_data = dev_data->alias_data;
2237 
2238 		read_lock_irqsave(&amd_iommu_devtable_lock, flags);
2239 		if (alias_data->domain != NULL) {
2240 			__attach_device(dev_data, alias_data->domain);
2241 			dom = alias_data->domain;
2242 		}
2243 		read_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
2244 	}
2245 
2246 	return dom;
2247 }
2248 
device_change_notifier(struct notifier_block * nb,unsigned long action,void * data)2249 static int device_change_notifier(struct notifier_block *nb,
2250 				  unsigned long action, void *data)
2251 {
2252 	struct dma_ops_domain *dma_domain;
2253 	struct protection_domain *domain;
2254 	struct iommu_dev_data *dev_data;
2255 	struct device *dev = data;
2256 	struct amd_iommu *iommu;
2257 	unsigned long flags;
2258 	u16 devid;
2259 
2260 	if (!check_device(dev))
2261 		return 0;
2262 
2263 	devid    = get_device_id(dev);
2264 	iommu    = amd_iommu_rlookup_table[devid];
2265 	dev_data = get_dev_data(dev);
2266 
2267 	switch (action) {
2268 	case BUS_NOTIFY_UNBOUND_DRIVER:
2269 
2270 		domain = domain_for_device(dev);
2271 
2272 		if (!domain)
2273 			goto out;
2274 		if (dev_data->passthrough)
2275 			break;
2276 		detach_device(dev);
2277 		break;
2278 	case BUS_NOTIFY_ADD_DEVICE:
2279 
2280 		iommu_init_device(dev);
2281 
2282 		/*
2283 		 * dev_data is still NULL and
2284 		 * got initialized in iommu_init_device
2285 		 */
2286 		dev_data = get_dev_data(dev);
2287 
2288 		if (iommu_pass_through || dev_data->iommu_v2) {
2289 			dev_data->passthrough = true;
2290 			attach_device(dev, pt_domain);
2291 			break;
2292 		}
2293 
2294 		domain = domain_for_device(dev);
2295 
2296 		/* allocate a protection domain if a device is added */
2297 		dma_domain = find_protection_domain(devid);
2298 		if (!dma_domain) {
2299 			dma_domain = dma_ops_domain_alloc();
2300 			if (!dma_domain)
2301 				goto out;
2302 			dma_domain->target_dev = devid;
2303 
2304 			spin_lock_irqsave(&iommu_pd_list_lock, flags);
2305 			list_add_tail(&dma_domain->list, &iommu_pd_list);
2306 			spin_unlock_irqrestore(&iommu_pd_list_lock, flags);
2307 		}
2308 
2309 		dev->archdata.dma_ops = &amd_iommu_dma_ops;
2310 
2311 		break;
2312 	case BUS_NOTIFY_DEL_DEVICE:
2313 
2314 		iommu_uninit_device(dev);
2315 
2316 	default:
2317 		goto out;
2318 	}
2319 
2320 	iommu_completion_wait(iommu);
2321 
2322 out:
2323 	return 0;
2324 }
2325 
2326 static struct notifier_block device_nb = {
2327 	.notifier_call = device_change_notifier,
2328 };
2329 
amd_iommu_init_notifier(void)2330 void amd_iommu_init_notifier(void)
2331 {
2332 	bus_register_notifier(&pci_bus_type, &device_nb);
2333 }
2334 
2335 /*****************************************************************************
2336  *
2337  * The next functions belong to the dma_ops mapping/unmapping code.
2338  *
2339  *****************************************************************************/
2340 
2341 /*
2342  * In the dma_ops path we only have the struct device. This function
2343  * finds the corresponding IOMMU, the protection domain and the
2344  * requestor id for a given device.
2345  * If the device is not yet associated with a domain this is also done
2346  * in this function.
2347  */
get_domain(struct device * dev)2348 static struct protection_domain *get_domain(struct device *dev)
2349 {
2350 	struct protection_domain *domain;
2351 	struct dma_ops_domain *dma_dom;
2352 	u16 devid = get_device_id(dev);
2353 
2354 	if (!check_device(dev))
2355 		return ERR_PTR(-EINVAL);
2356 
2357 	domain = domain_for_device(dev);
2358 	if (domain != NULL && !dma_ops_domain(domain))
2359 		return ERR_PTR(-EBUSY);
2360 
2361 	if (domain != NULL)
2362 		return domain;
2363 
2364 	/* Device not bount yet - bind it */
2365 	dma_dom = find_protection_domain(devid);
2366 	if (!dma_dom)
2367 		dma_dom = amd_iommu_rlookup_table[devid]->default_dom;
2368 	attach_device(dev, &dma_dom->domain);
2369 	DUMP_printk("Using protection domain %d for device %s\n",
2370 		    dma_dom->domain.id, dev_name(dev));
2371 
2372 	return &dma_dom->domain;
2373 }
2374 
update_device_table(struct protection_domain * domain)2375 static void update_device_table(struct protection_domain *domain)
2376 {
2377 	struct iommu_dev_data *dev_data;
2378 
2379 	list_for_each_entry(dev_data, &domain->dev_list, list)
2380 		set_dte_entry(dev_data->devid, domain, dev_data->ats.enabled);
2381 }
2382 
update_domain(struct protection_domain * domain)2383 static void update_domain(struct protection_domain *domain)
2384 {
2385 	if (!domain->updated)
2386 		return;
2387 
2388 	update_device_table(domain);
2389 
2390 	domain_flush_devices(domain);
2391 	domain_flush_tlb_pde(domain);
2392 
2393 	domain->updated = false;
2394 }
2395 
2396 /*
2397  * This function fetches the PTE for a given address in the aperture
2398  */
dma_ops_get_pte(struct dma_ops_domain * dom,unsigned long address)2399 static u64* dma_ops_get_pte(struct dma_ops_domain *dom,
2400 			    unsigned long address)
2401 {
2402 	struct aperture_range *aperture;
2403 	u64 *pte, *pte_page;
2404 
2405 	aperture = dom->aperture[APERTURE_RANGE_INDEX(address)];
2406 	if (!aperture)
2407 		return NULL;
2408 
2409 	pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)];
2410 	if (!pte) {
2411 		pte = alloc_pte(&dom->domain, address, PAGE_SIZE, &pte_page,
2412 				GFP_ATOMIC);
2413 		aperture->pte_pages[APERTURE_PAGE_INDEX(address)] = pte_page;
2414 	} else
2415 		pte += PM_LEVEL_INDEX(0, address);
2416 
2417 	update_domain(&dom->domain);
2418 
2419 	return pte;
2420 }
2421 
2422 /*
2423  * This is the generic map function. It maps one 4kb page at paddr to
2424  * the given address in the DMA address space for the domain.
2425  */
dma_ops_domain_map(struct dma_ops_domain * dom,unsigned long address,phys_addr_t paddr,int direction)2426 static dma_addr_t dma_ops_domain_map(struct dma_ops_domain *dom,
2427 				     unsigned long address,
2428 				     phys_addr_t paddr,
2429 				     int direction)
2430 {
2431 	u64 *pte, __pte;
2432 
2433 	WARN_ON(address > dom->aperture_size);
2434 
2435 	paddr &= PAGE_MASK;
2436 
2437 	pte  = dma_ops_get_pte(dom, address);
2438 	if (!pte)
2439 		return DMA_ERROR_CODE;
2440 
2441 	__pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC;
2442 
2443 	if (direction == DMA_TO_DEVICE)
2444 		__pte |= IOMMU_PTE_IR;
2445 	else if (direction == DMA_FROM_DEVICE)
2446 		__pte |= IOMMU_PTE_IW;
2447 	else if (direction == DMA_BIDIRECTIONAL)
2448 		__pte |= IOMMU_PTE_IR | IOMMU_PTE_IW;
2449 
2450 	WARN_ON(*pte);
2451 
2452 	*pte = __pte;
2453 
2454 	return (dma_addr_t)address;
2455 }
2456 
2457 /*
2458  * The generic unmapping function for on page in the DMA address space.
2459  */
dma_ops_domain_unmap(struct dma_ops_domain * dom,unsigned long address)2460 static void dma_ops_domain_unmap(struct dma_ops_domain *dom,
2461 				 unsigned long address)
2462 {
2463 	struct aperture_range *aperture;
2464 	u64 *pte;
2465 
2466 	if (address >= dom->aperture_size)
2467 		return;
2468 
2469 	aperture = dom->aperture[APERTURE_RANGE_INDEX(address)];
2470 	if (!aperture)
2471 		return;
2472 
2473 	pte  = aperture->pte_pages[APERTURE_PAGE_INDEX(address)];
2474 	if (!pte)
2475 		return;
2476 
2477 	pte += PM_LEVEL_INDEX(0, address);
2478 
2479 	WARN_ON(!*pte);
2480 
2481 	*pte = 0ULL;
2482 }
2483 
2484 /*
2485  * This function contains common code for mapping of a physically
2486  * contiguous memory region into DMA address space. It is used by all
2487  * mapping functions provided with this IOMMU driver.
2488  * Must be called with the domain lock held.
2489  */
__map_single(struct device * dev,struct dma_ops_domain * dma_dom,phys_addr_t paddr,size_t size,int dir,bool align,u64 dma_mask)2490 static dma_addr_t __map_single(struct device *dev,
2491 			       struct dma_ops_domain *dma_dom,
2492 			       phys_addr_t paddr,
2493 			       size_t size,
2494 			       int dir,
2495 			       bool align,
2496 			       u64 dma_mask)
2497 {
2498 	dma_addr_t offset = paddr & ~PAGE_MASK;
2499 	dma_addr_t address, start, ret;
2500 	unsigned int pages;
2501 	unsigned long align_mask = 0;
2502 	int i;
2503 
2504 	pages = iommu_num_pages(paddr, size, PAGE_SIZE);
2505 	paddr &= PAGE_MASK;
2506 
2507 	INC_STATS_COUNTER(total_map_requests);
2508 
2509 	if (pages > 1)
2510 		INC_STATS_COUNTER(cross_page);
2511 
2512 	if (align)
2513 		align_mask = (1UL << get_order(size)) - 1;
2514 
2515 retry:
2516 	address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask,
2517 					  dma_mask);
2518 	if (unlikely(address == DMA_ERROR_CODE)) {
2519 		/*
2520 		 * setting next_address here will let the address
2521 		 * allocator only scan the new allocated range in the
2522 		 * first run. This is a small optimization.
2523 		 */
2524 		dma_dom->next_address = dma_dom->aperture_size;
2525 
2526 		if (alloc_new_range(dma_dom, false, GFP_ATOMIC))
2527 			goto out;
2528 
2529 		/*
2530 		 * aperture was successfully enlarged by 128 MB, try
2531 		 * allocation again
2532 		 */
2533 		goto retry;
2534 	}
2535 
2536 	start = address;
2537 	for (i = 0; i < pages; ++i) {
2538 		ret = dma_ops_domain_map(dma_dom, start, paddr, dir);
2539 		if (ret == DMA_ERROR_CODE)
2540 			goto out_unmap;
2541 
2542 		paddr += PAGE_SIZE;
2543 		start += PAGE_SIZE;
2544 	}
2545 	address += offset;
2546 
2547 	ADD_STATS_COUNTER(alloced_io_mem, size);
2548 
2549 	if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) {
2550 		domain_flush_tlb(&dma_dom->domain);
2551 		dma_dom->need_flush = false;
2552 	} else if (unlikely(amd_iommu_np_cache))
2553 		domain_flush_pages(&dma_dom->domain, address, size);
2554 
2555 out:
2556 	return address;
2557 
2558 out_unmap:
2559 
2560 	for (--i; i >= 0; --i) {
2561 		start -= PAGE_SIZE;
2562 		dma_ops_domain_unmap(dma_dom, start);
2563 	}
2564 
2565 	dma_ops_free_addresses(dma_dom, address, pages);
2566 
2567 	return DMA_ERROR_CODE;
2568 }
2569 
2570 /*
2571  * Does the reverse of the __map_single function. Must be called with
2572  * the domain lock held too
2573  */
__unmap_single(struct dma_ops_domain * dma_dom,dma_addr_t dma_addr,size_t size,int dir)2574 static void __unmap_single(struct dma_ops_domain *dma_dom,
2575 			   dma_addr_t dma_addr,
2576 			   size_t size,
2577 			   int dir)
2578 {
2579 	dma_addr_t flush_addr;
2580 	dma_addr_t i, start;
2581 	unsigned int pages;
2582 
2583 	if ((dma_addr == DMA_ERROR_CODE) ||
2584 	    (dma_addr + size > dma_dom->aperture_size))
2585 		return;
2586 
2587 	flush_addr = dma_addr;
2588 	pages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
2589 	dma_addr &= PAGE_MASK;
2590 	start = dma_addr;
2591 
2592 	for (i = 0; i < pages; ++i) {
2593 		dma_ops_domain_unmap(dma_dom, start);
2594 		start += PAGE_SIZE;
2595 	}
2596 
2597 	SUB_STATS_COUNTER(alloced_io_mem, size);
2598 
2599 	dma_ops_free_addresses(dma_dom, dma_addr, pages);
2600 
2601 	if (amd_iommu_unmap_flush || dma_dom->need_flush) {
2602 		domain_flush_pages(&dma_dom->domain, flush_addr, size);
2603 		dma_dom->need_flush = false;
2604 	}
2605 }
2606 
2607 /*
2608  * The exported map_single function for dma_ops.
2609  */
map_page(struct device * dev,struct page * page,unsigned long offset,size_t size,enum dma_data_direction dir,struct dma_attrs * attrs)2610 static dma_addr_t map_page(struct device *dev, struct page *page,
2611 			   unsigned long offset, size_t size,
2612 			   enum dma_data_direction dir,
2613 			   struct dma_attrs *attrs)
2614 {
2615 	unsigned long flags;
2616 	struct protection_domain *domain;
2617 	dma_addr_t addr;
2618 	u64 dma_mask;
2619 	phys_addr_t paddr = page_to_phys(page) + offset;
2620 
2621 	INC_STATS_COUNTER(cnt_map_single);
2622 
2623 	domain = get_domain(dev);
2624 	if (PTR_ERR(domain) == -EINVAL)
2625 		return (dma_addr_t)paddr;
2626 	else if (IS_ERR(domain))
2627 		return DMA_ERROR_CODE;
2628 
2629 	dma_mask = *dev->dma_mask;
2630 
2631 	spin_lock_irqsave(&domain->lock, flags);
2632 
2633 	addr = __map_single(dev, domain->priv, paddr, size, dir, false,
2634 			    dma_mask);
2635 	if (addr == DMA_ERROR_CODE)
2636 		goto out;
2637 
2638 	domain_flush_complete(domain);
2639 
2640 out:
2641 	spin_unlock_irqrestore(&domain->lock, flags);
2642 
2643 	return addr;
2644 }
2645 
2646 /*
2647  * The exported unmap_single function for dma_ops.
2648  */
unmap_page(struct device * dev,dma_addr_t dma_addr,size_t size,enum dma_data_direction dir,struct dma_attrs * attrs)2649 static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
2650 		       enum dma_data_direction dir, struct dma_attrs *attrs)
2651 {
2652 	unsigned long flags;
2653 	struct protection_domain *domain;
2654 
2655 	INC_STATS_COUNTER(cnt_unmap_single);
2656 
2657 	domain = get_domain(dev);
2658 	if (IS_ERR(domain))
2659 		return;
2660 
2661 	spin_lock_irqsave(&domain->lock, flags);
2662 
2663 	__unmap_single(domain->priv, dma_addr, size, dir);
2664 
2665 	domain_flush_complete(domain);
2666 
2667 	spin_unlock_irqrestore(&domain->lock, flags);
2668 }
2669 
2670 /*
2671  * This is a special map_sg function which is used if we should map a
2672  * device which is not handled by an AMD IOMMU in the system.
2673  */
map_sg_no_iommu(struct device * dev,struct scatterlist * sglist,int nelems,int dir)2674 static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist,
2675 			   int nelems, int dir)
2676 {
2677 	struct scatterlist *s;
2678 	int i;
2679 
2680 	for_each_sg(sglist, s, nelems, i) {
2681 		s->dma_address = (dma_addr_t)sg_phys(s);
2682 		s->dma_length  = s->length;
2683 	}
2684 
2685 	return nelems;
2686 }
2687 
2688 /*
2689  * The exported map_sg function for dma_ops (handles scatter-gather
2690  * lists).
2691  */
map_sg(struct device * dev,struct scatterlist * sglist,int nelems,enum dma_data_direction dir,struct dma_attrs * attrs)2692 static int map_sg(struct device *dev, struct scatterlist *sglist,
2693 		  int nelems, enum dma_data_direction dir,
2694 		  struct dma_attrs *attrs)
2695 {
2696 	unsigned long flags;
2697 	struct protection_domain *domain;
2698 	int i;
2699 	struct scatterlist *s;
2700 	phys_addr_t paddr;
2701 	int mapped_elems = 0;
2702 	u64 dma_mask;
2703 
2704 	INC_STATS_COUNTER(cnt_map_sg);
2705 
2706 	domain = get_domain(dev);
2707 	if (PTR_ERR(domain) == -EINVAL)
2708 		return map_sg_no_iommu(dev, sglist, nelems, dir);
2709 	else if (IS_ERR(domain))
2710 		return 0;
2711 
2712 	dma_mask = *dev->dma_mask;
2713 
2714 	spin_lock_irqsave(&domain->lock, flags);
2715 
2716 	for_each_sg(sglist, s, nelems, i) {
2717 		paddr = sg_phys(s);
2718 
2719 		s->dma_address = __map_single(dev, domain->priv,
2720 					      paddr, s->length, dir, false,
2721 					      dma_mask);
2722 
2723 		if (s->dma_address) {
2724 			s->dma_length = s->length;
2725 			mapped_elems++;
2726 		} else
2727 			goto unmap;
2728 	}
2729 
2730 	domain_flush_complete(domain);
2731 
2732 out:
2733 	spin_unlock_irqrestore(&domain->lock, flags);
2734 
2735 	return mapped_elems;
2736 unmap:
2737 	for_each_sg(sglist, s, mapped_elems, i) {
2738 		if (s->dma_address)
2739 			__unmap_single(domain->priv, s->dma_address,
2740 				       s->dma_length, dir);
2741 		s->dma_address = s->dma_length = 0;
2742 	}
2743 
2744 	mapped_elems = 0;
2745 
2746 	goto out;
2747 }
2748 
2749 /*
2750  * The exported map_sg function for dma_ops (handles scatter-gather
2751  * lists).
2752  */
unmap_sg(struct device * dev,struct scatterlist * sglist,int nelems,enum dma_data_direction dir,struct dma_attrs * attrs)2753 static void unmap_sg(struct device *dev, struct scatterlist *sglist,
2754 		     int nelems, enum dma_data_direction dir,
2755 		     struct dma_attrs *attrs)
2756 {
2757 	unsigned long flags;
2758 	struct protection_domain *domain;
2759 	struct scatterlist *s;
2760 	int i;
2761 
2762 	INC_STATS_COUNTER(cnt_unmap_sg);
2763 
2764 	domain = get_domain(dev);
2765 	if (IS_ERR(domain))
2766 		return;
2767 
2768 	spin_lock_irqsave(&domain->lock, flags);
2769 
2770 	for_each_sg(sglist, s, nelems, i) {
2771 		__unmap_single(domain->priv, s->dma_address,
2772 			       s->dma_length, dir);
2773 		s->dma_address = s->dma_length = 0;
2774 	}
2775 
2776 	domain_flush_complete(domain);
2777 
2778 	spin_unlock_irqrestore(&domain->lock, flags);
2779 }
2780 
2781 /*
2782  * The exported alloc_coherent function for dma_ops.
2783  */
alloc_coherent(struct device * dev,size_t size,dma_addr_t * dma_addr,gfp_t flag,struct dma_attrs * attrs)2784 static void *alloc_coherent(struct device *dev, size_t size,
2785 			    dma_addr_t *dma_addr, gfp_t flag,
2786 			    struct dma_attrs *attrs)
2787 {
2788 	unsigned long flags;
2789 	void *virt_addr;
2790 	struct protection_domain *domain;
2791 	phys_addr_t paddr;
2792 	u64 dma_mask = dev->coherent_dma_mask;
2793 
2794 	INC_STATS_COUNTER(cnt_alloc_coherent);
2795 
2796 	domain = get_domain(dev);
2797 	if (PTR_ERR(domain) == -EINVAL) {
2798 		virt_addr = (void *)__get_free_pages(flag, get_order(size));
2799 		*dma_addr = __pa(virt_addr);
2800 		return virt_addr;
2801 	} else if (IS_ERR(domain))
2802 		return NULL;
2803 
2804 	dma_mask  = dev->coherent_dma_mask;
2805 	flag     &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
2806 	flag     |= __GFP_ZERO;
2807 
2808 	virt_addr = (void *)__get_free_pages(flag, get_order(size));
2809 	if (!virt_addr)
2810 		return NULL;
2811 
2812 	paddr = virt_to_phys(virt_addr);
2813 
2814 	if (!dma_mask)
2815 		dma_mask = *dev->dma_mask;
2816 
2817 	spin_lock_irqsave(&domain->lock, flags);
2818 
2819 	*dma_addr = __map_single(dev, domain->priv, paddr,
2820 				 size, DMA_BIDIRECTIONAL, true, dma_mask);
2821 
2822 	if (*dma_addr == DMA_ERROR_CODE) {
2823 		spin_unlock_irqrestore(&domain->lock, flags);
2824 		goto out_free;
2825 	}
2826 
2827 	domain_flush_complete(domain);
2828 
2829 	spin_unlock_irqrestore(&domain->lock, flags);
2830 
2831 	return virt_addr;
2832 
2833 out_free:
2834 
2835 	free_pages((unsigned long)virt_addr, get_order(size));
2836 
2837 	return NULL;
2838 }
2839 
2840 /*
2841  * The exported free_coherent function for dma_ops.
2842  */
free_coherent(struct device * dev,size_t size,void * virt_addr,dma_addr_t dma_addr,struct dma_attrs * attrs)2843 static void free_coherent(struct device *dev, size_t size,
2844 			  void *virt_addr, dma_addr_t dma_addr,
2845 			  struct dma_attrs *attrs)
2846 {
2847 	unsigned long flags;
2848 	struct protection_domain *domain;
2849 
2850 	INC_STATS_COUNTER(cnt_free_coherent);
2851 
2852 	domain = get_domain(dev);
2853 	if (IS_ERR(domain))
2854 		goto free_mem;
2855 
2856 	spin_lock_irqsave(&domain->lock, flags);
2857 
2858 	__unmap_single(domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
2859 
2860 	domain_flush_complete(domain);
2861 
2862 	spin_unlock_irqrestore(&domain->lock, flags);
2863 
2864 free_mem:
2865 	free_pages((unsigned long)virt_addr, get_order(size));
2866 }
2867 
2868 /*
2869  * This function is called by the DMA layer to find out if we can handle a
2870  * particular device. It is part of the dma_ops.
2871  */
amd_iommu_dma_supported(struct device * dev,u64 mask)2872 static int amd_iommu_dma_supported(struct device *dev, u64 mask)
2873 {
2874 	return check_device(dev);
2875 }
2876 
2877 /*
2878  * The function for pre-allocating protection domains.
2879  *
2880  * If the driver core informs the DMA layer if a driver grabs a device
2881  * we don't need to preallocate the protection domains anymore.
2882  * For now we have to.
2883  */
prealloc_protection_domains(void)2884 static void __init prealloc_protection_domains(void)
2885 {
2886 	struct iommu_dev_data *dev_data;
2887 	struct dma_ops_domain *dma_dom;
2888 	struct pci_dev *dev = NULL;
2889 	u16 devid;
2890 
2891 	for_each_pci_dev(dev) {
2892 
2893 		/* Do we handle this device? */
2894 		if (!check_device(&dev->dev))
2895 			continue;
2896 
2897 		dev_data = get_dev_data(&dev->dev);
2898 		if (!amd_iommu_force_isolation && dev_data->iommu_v2) {
2899 			/* Make sure passthrough domain is allocated */
2900 			alloc_passthrough_domain();
2901 			dev_data->passthrough = true;
2902 			attach_device(&dev->dev, pt_domain);
2903 			pr_info("AMD-Vi: Using passthough domain for device %s\n",
2904 				dev_name(&dev->dev));
2905 		}
2906 
2907 		/* Is there already any domain for it? */
2908 		if (domain_for_device(&dev->dev))
2909 			continue;
2910 
2911 		devid = get_device_id(&dev->dev);
2912 
2913 		dma_dom = dma_ops_domain_alloc();
2914 		if (!dma_dom)
2915 			continue;
2916 		init_unity_mappings_for_device(dma_dom, devid);
2917 		dma_dom->target_dev = devid;
2918 
2919 		attach_device(&dev->dev, &dma_dom->domain);
2920 
2921 		list_add_tail(&dma_dom->list, &iommu_pd_list);
2922 	}
2923 }
2924 
2925 static struct dma_map_ops amd_iommu_dma_ops = {
2926 	.alloc = alloc_coherent,
2927 	.free = free_coherent,
2928 	.map_page = map_page,
2929 	.unmap_page = unmap_page,
2930 	.map_sg = map_sg,
2931 	.unmap_sg = unmap_sg,
2932 	.dma_supported = amd_iommu_dma_supported,
2933 };
2934 
device_dma_ops_init(void)2935 static unsigned device_dma_ops_init(void)
2936 {
2937 	struct iommu_dev_data *dev_data;
2938 	struct pci_dev *pdev = NULL;
2939 	unsigned unhandled = 0;
2940 
2941 	for_each_pci_dev(pdev) {
2942 		if (!check_device(&pdev->dev)) {
2943 
2944 			iommu_ignore_device(&pdev->dev);
2945 
2946 			unhandled += 1;
2947 			continue;
2948 		}
2949 
2950 		dev_data = get_dev_data(&pdev->dev);
2951 
2952 		if (!dev_data->passthrough)
2953 			pdev->dev.archdata.dma_ops = &amd_iommu_dma_ops;
2954 		else
2955 			pdev->dev.archdata.dma_ops = &nommu_dma_ops;
2956 	}
2957 
2958 	return unhandled;
2959 }
2960 
2961 /*
2962  * The function which clues the AMD IOMMU driver into dma_ops.
2963  */
2964 
amd_iommu_init_api(void)2965 void __init amd_iommu_init_api(void)
2966 {
2967 	bus_set_iommu(&pci_bus_type, &amd_iommu_ops);
2968 }
2969 
amd_iommu_init_dma_ops(void)2970 int __init amd_iommu_init_dma_ops(void)
2971 {
2972 	struct amd_iommu *iommu;
2973 	int ret, unhandled;
2974 
2975 	/*
2976 	 * first allocate a default protection domain for every IOMMU we
2977 	 * found in the system. Devices not assigned to any other
2978 	 * protection domain will be assigned to the default one.
2979 	 */
2980 	for_each_iommu(iommu) {
2981 		iommu->default_dom = dma_ops_domain_alloc();
2982 		if (iommu->default_dom == NULL)
2983 			return -ENOMEM;
2984 		iommu->default_dom->domain.flags |= PD_DEFAULT_MASK;
2985 		ret = iommu_init_unity_mappings(iommu);
2986 		if (ret)
2987 			goto free_domains;
2988 	}
2989 
2990 	/*
2991 	 * Pre-allocate the protection domains for each device.
2992 	 */
2993 	prealloc_protection_domains();
2994 
2995 	iommu_detected = 1;
2996 	swiotlb = 0;
2997 
2998 	/* Make the driver finally visible to the drivers */
2999 	unhandled = device_dma_ops_init();
3000 	if (unhandled && max_pfn > MAX_DMA32_PFN) {
3001 		/* There are unhandled devices - initialize swiotlb for them */
3002 		swiotlb = 1;
3003 	}
3004 
3005 	amd_iommu_stats_init();
3006 
3007 	return 0;
3008 
3009 free_domains:
3010 
3011 	for_each_iommu(iommu) {
3012 		if (iommu->default_dom)
3013 			dma_ops_domain_free(iommu->default_dom);
3014 	}
3015 
3016 	return ret;
3017 }
3018 
3019 /*****************************************************************************
3020  *
3021  * The following functions belong to the exported interface of AMD IOMMU
3022  *
3023  * This interface allows access to lower level functions of the IOMMU
3024  * like protection domain handling and assignement of devices to domains
3025  * which is not possible with the dma_ops interface.
3026  *
3027  *****************************************************************************/
3028 
cleanup_domain(struct protection_domain * domain)3029 static void cleanup_domain(struct protection_domain *domain)
3030 {
3031 	struct iommu_dev_data *dev_data, *next;
3032 	unsigned long flags;
3033 
3034 	write_lock_irqsave(&amd_iommu_devtable_lock, flags);
3035 
3036 	list_for_each_entry_safe(dev_data, next, &domain->dev_list, list) {
3037 		__detach_device(dev_data);
3038 		atomic_set(&dev_data->bind, 0);
3039 	}
3040 
3041 	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
3042 }
3043 
protection_domain_free(struct protection_domain * domain)3044 static void protection_domain_free(struct protection_domain *domain)
3045 {
3046 	if (!domain)
3047 		return;
3048 
3049 	del_domain_from_list(domain);
3050 
3051 	if (domain->id)
3052 		domain_id_free(domain->id);
3053 
3054 	kfree(domain);
3055 }
3056 
protection_domain_alloc(void)3057 static struct protection_domain *protection_domain_alloc(void)
3058 {
3059 	struct protection_domain *domain;
3060 
3061 	domain = kzalloc(sizeof(*domain), GFP_KERNEL);
3062 	if (!domain)
3063 		return NULL;
3064 
3065 	spin_lock_init(&domain->lock);
3066 	mutex_init(&domain->api_lock);
3067 	domain->id = domain_id_alloc();
3068 	if (!domain->id)
3069 		goto out_err;
3070 	INIT_LIST_HEAD(&domain->dev_list);
3071 
3072 	add_domain_to_list(domain);
3073 
3074 	return domain;
3075 
3076 out_err:
3077 	kfree(domain);
3078 
3079 	return NULL;
3080 }
3081 
alloc_passthrough_domain(void)3082 static int __init alloc_passthrough_domain(void)
3083 {
3084 	if (pt_domain != NULL)
3085 		return 0;
3086 
3087 	/* allocate passthrough domain */
3088 	pt_domain = protection_domain_alloc();
3089 	if (!pt_domain)
3090 		return -ENOMEM;
3091 
3092 	pt_domain->mode = PAGE_MODE_NONE;
3093 
3094 	return 0;
3095 }
amd_iommu_domain_init(struct iommu_domain * dom)3096 static int amd_iommu_domain_init(struct iommu_domain *dom)
3097 {
3098 	struct protection_domain *domain;
3099 
3100 	domain = protection_domain_alloc();
3101 	if (!domain)
3102 		goto out_free;
3103 
3104 	domain->mode    = PAGE_MODE_3_LEVEL;
3105 	domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL);
3106 	if (!domain->pt_root)
3107 		goto out_free;
3108 
3109 	domain->iommu_domain = dom;
3110 
3111 	dom->priv = domain;
3112 
3113 	return 0;
3114 
3115 out_free:
3116 	protection_domain_free(domain);
3117 
3118 	return -ENOMEM;
3119 }
3120 
amd_iommu_domain_destroy(struct iommu_domain * dom)3121 static void amd_iommu_domain_destroy(struct iommu_domain *dom)
3122 {
3123 	struct protection_domain *domain = dom->priv;
3124 
3125 	if (!domain)
3126 		return;
3127 
3128 	if (domain->dev_cnt > 0)
3129 		cleanup_domain(domain);
3130 
3131 	BUG_ON(domain->dev_cnt != 0);
3132 
3133 	if (domain->mode != PAGE_MODE_NONE)
3134 		free_pagetable(domain);
3135 
3136 	if (domain->flags & PD_IOMMUV2_MASK)
3137 		free_gcr3_table(domain);
3138 
3139 	protection_domain_free(domain);
3140 
3141 	dom->priv = NULL;
3142 }
3143 
amd_iommu_detach_device(struct iommu_domain * dom,struct device * dev)3144 static void amd_iommu_detach_device(struct iommu_domain *dom,
3145 				    struct device *dev)
3146 {
3147 	struct iommu_dev_data *dev_data = dev->archdata.iommu;
3148 	struct amd_iommu *iommu;
3149 	u16 devid;
3150 
3151 	if (!check_device(dev))
3152 		return;
3153 
3154 	devid = get_device_id(dev);
3155 
3156 	if (dev_data->domain != NULL)
3157 		detach_device(dev);
3158 
3159 	iommu = amd_iommu_rlookup_table[devid];
3160 	if (!iommu)
3161 		return;
3162 
3163 	iommu_completion_wait(iommu);
3164 }
3165 
amd_iommu_attach_device(struct iommu_domain * dom,struct device * dev)3166 static int amd_iommu_attach_device(struct iommu_domain *dom,
3167 				   struct device *dev)
3168 {
3169 	struct protection_domain *domain = dom->priv;
3170 	struct iommu_dev_data *dev_data;
3171 	struct amd_iommu *iommu;
3172 	int ret;
3173 
3174 	if (!check_device(dev))
3175 		return -EINVAL;
3176 
3177 	dev_data = dev->archdata.iommu;
3178 
3179 	iommu = amd_iommu_rlookup_table[dev_data->devid];
3180 	if (!iommu)
3181 		return -EINVAL;
3182 
3183 	if (dev_data->domain)
3184 		detach_device(dev);
3185 
3186 	ret = attach_device(dev, domain);
3187 
3188 	iommu_completion_wait(iommu);
3189 
3190 	return ret;
3191 }
3192 
amd_iommu_map(struct iommu_domain * dom,unsigned long iova,phys_addr_t paddr,size_t page_size,int iommu_prot)3193 static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
3194 			 phys_addr_t paddr, size_t page_size, int iommu_prot)
3195 {
3196 	struct protection_domain *domain = dom->priv;
3197 	int prot = 0;
3198 	int ret;
3199 
3200 	if (domain->mode == PAGE_MODE_NONE)
3201 		return -EINVAL;
3202 
3203 	if (iommu_prot & IOMMU_READ)
3204 		prot |= IOMMU_PROT_IR;
3205 	if (iommu_prot & IOMMU_WRITE)
3206 		prot |= IOMMU_PROT_IW;
3207 
3208 	mutex_lock(&domain->api_lock);
3209 	ret = iommu_map_page(domain, iova, paddr, prot, page_size);
3210 	mutex_unlock(&domain->api_lock);
3211 
3212 	return ret;
3213 }
3214 
amd_iommu_unmap(struct iommu_domain * dom,unsigned long iova,size_t page_size)3215 static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
3216 			   size_t page_size)
3217 {
3218 	struct protection_domain *domain = dom->priv;
3219 	size_t unmap_size;
3220 
3221 	if (domain->mode == PAGE_MODE_NONE)
3222 		return -EINVAL;
3223 
3224 	mutex_lock(&domain->api_lock);
3225 	unmap_size = iommu_unmap_page(domain, iova, page_size);
3226 	mutex_unlock(&domain->api_lock);
3227 
3228 	domain_flush_tlb_pde(domain);
3229 
3230 	return unmap_size;
3231 }
3232 
amd_iommu_iova_to_phys(struct iommu_domain * dom,unsigned long iova)3233 static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
3234 					  unsigned long iova)
3235 {
3236 	struct protection_domain *domain = dom->priv;
3237 	unsigned long offset_mask;
3238 	phys_addr_t paddr;
3239 	u64 *pte, __pte;
3240 
3241 	if (domain->mode == PAGE_MODE_NONE)
3242 		return iova;
3243 
3244 	pte = fetch_pte(domain, iova);
3245 
3246 	if (!pte || !IOMMU_PTE_PRESENT(*pte))
3247 		return 0;
3248 
3249 	if (PM_PTE_LEVEL(*pte) == 0)
3250 		offset_mask = PAGE_SIZE - 1;
3251 	else
3252 		offset_mask = PTE_PAGE_SIZE(*pte) - 1;
3253 
3254 	__pte = *pte & PM_ADDR_MASK;
3255 	paddr = (__pte & ~offset_mask) | (iova & offset_mask);
3256 
3257 	return paddr;
3258 }
3259 
amd_iommu_domain_has_cap(struct iommu_domain * domain,unsigned long cap)3260 static int amd_iommu_domain_has_cap(struct iommu_domain *domain,
3261 				    unsigned long cap)
3262 {
3263 	switch (cap) {
3264 	case IOMMU_CAP_CACHE_COHERENCY:
3265 		return 1;
3266 	}
3267 
3268 	return 0;
3269 }
3270 
amd_iommu_device_group(struct device * dev,unsigned int * groupid)3271 static int amd_iommu_device_group(struct device *dev, unsigned int *groupid)
3272 {
3273 	struct iommu_dev_data *dev_data = dev->archdata.iommu;
3274 	struct pci_dev *pdev = to_pci_dev(dev);
3275 	u16 devid;
3276 
3277 	if (!dev_data)
3278 		return -ENODEV;
3279 
3280 	if (pdev->is_virtfn || !iommu_group_mf)
3281 		devid = dev_data->devid;
3282 	else
3283 		devid = calc_devid(pdev->bus->number,
3284 				   PCI_DEVFN(PCI_SLOT(pdev->devfn), 0));
3285 
3286 	*groupid = amd_iommu_alias_table[devid];
3287 
3288 	return 0;
3289 }
3290 
3291 static struct iommu_ops amd_iommu_ops = {
3292 	.domain_init = amd_iommu_domain_init,
3293 	.domain_destroy = amd_iommu_domain_destroy,
3294 	.attach_dev = amd_iommu_attach_device,
3295 	.detach_dev = amd_iommu_detach_device,
3296 	.map = amd_iommu_map,
3297 	.unmap = amd_iommu_unmap,
3298 	.iova_to_phys = amd_iommu_iova_to_phys,
3299 	.domain_has_cap = amd_iommu_domain_has_cap,
3300 	.device_group = amd_iommu_device_group,
3301 	.pgsize_bitmap	= AMD_IOMMU_PGSIZES,
3302 };
3303 
3304 /*****************************************************************************
3305  *
3306  * The next functions do a basic initialization of IOMMU for pass through
3307  * mode
3308  *
3309  * In passthrough mode the IOMMU is initialized and enabled but not used for
3310  * DMA-API translation.
3311  *
3312  *****************************************************************************/
3313 
amd_iommu_init_passthrough(void)3314 int __init amd_iommu_init_passthrough(void)
3315 {
3316 	struct iommu_dev_data *dev_data;
3317 	struct pci_dev *dev = NULL;
3318 	struct amd_iommu *iommu;
3319 	u16 devid;
3320 	int ret;
3321 
3322 	ret = alloc_passthrough_domain();
3323 	if (ret)
3324 		return ret;
3325 
3326 	for_each_pci_dev(dev) {
3327 		if (!check_device(&dev->dev))
3328 			continue;
3329 
3330 		dev_data = get_dev_data(&dev->dev);
3331 		dev_data->passthrough = true;
3332 
3333 		devid = get_device_id(&dev->dev);
3334 
3335 		iommu = amd_iommu_rlookup_table[devid];
3336 		if (!iommu)
3337 			continue;
3338 
3339 		attach_device(&dev->dev, pt_domain);
3340 	}
3341 
3342 	amd_iommu_stats_init();
3343 
3344 	pr_info("AMD-Vi: Initialized for Passthrough Mode\n");
3345 
3346 	return 0;
3347 }
3348 
3349 /* IOMMUv2 specific functions */
amd_iommu_register_ppr_notifier(struct notifier_block * nb)3350 int amd_iommu_register_ppr_notifier(struct notifier_block *nb)
3351 {
3352 	return atomic_notifier_chain_register(&ppr_notifier, nb);
3353 }
3354 EXPORT_SYMBOL(amd_iommu_register_ppr_notifier);
3355 
amd_iommu_unregister_ppr_notifier(struct notifier_block * nb)3356 int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb)
3357 {
3358 	return atomic_notifier_chain_unregister(&ppr_notifier, nb);
3359 }
3360 EXPORT_SYMBOL(amd_iommu_unregister_ppr_notifier);
3361 
amd_iommu_domain_direct_map(struct iommu_domain * dom)3362 void amd_iommu_domain_direct_map(struct iommu_domain *dom)
3363 {
3364 	struct protection_domain *domain = dom->priv;
3365 	unsigned long flags;
3366 
3367 	spin_lock_irqsave(&domain->lock, flags);
3368 
3369 	/* Update data structure */
3370 	domain->mode    = PAGE_MODE_NONE;
3371 	domain->updated = true;
3372 
3373 	/* Make changes visible to IOMMUs */
3374 	update_domain(domain);
3375 
3376 	/* Page-table is not visible to IOMMU anymore, so free it */
3377 	free_pagetable(domain);
3378 
3379 	spin_unlock_irqrestore(&domain->lock, flags);
3380 }
3381 EXPORT_SYMBOL(amd_iommu_domain_direct_map);
3382 
amd_iommu_domain_enable_v2(struct iommu_domain * dom,int pasids)3383 int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids)
3384 {
3385 	struct protection_domain *domain = dom->priv;
3386 	unsigned long flags;
3387 	int levels, ret;
3388 
3389 	if (pasids <= 0 || pasids > (PASID_MASK + 1))
3390 		return -EINVAL;
3391 
3392 	/* Number of GCR3 table levels required */
3393 	for (levels = 0; (pasids - 1) & ~0x1ff; pasids >>= 9)
3394 		levels += 1;
3395 
3396 	if (levels > amd_iommu_max_glx_val)
3397 		return -EINVAL;
3398 
3399 	spin_lock_irqsave(&domain->lock, flags);
3400 
3401 	/*
3402 	 * Save us all sanity checks whether devices already in the
3403 	 * domain support IOMMUv2. Just force that the domain has no
3404 	 * devices attached when it is switched into IOMMUv2 mode.
3405 	 */
3406 	ret = -EBUSY;
3407 	if (domain->dev_cnt > 0 || domain->flags & PD_IOMMUV2_MASK)
3408 		goto out;
3409 
3410 	ret = -ENOMEM;
3411 	domain->gcr3_tbl = (void *)get_zeroed_page(GFP_ATOMIC);
3412 	if (domain->gcr3_tbl == NULL)
3413 		goto out;
3414 
3415 	domain->glx      = levels;
3416 	domain->flags   |= PD_IOMMUV2_MASK;
3417 	domain->updated  = true;
3418 
3419 	update_domain(domain);
3420 
3421 	ret = 0;
3422 
3423 out:
3424 	spin_unlock_irqrestore(&domain->lock, flags);
3425 
3426 	return ret;
3427 }
3428 EXPORT_SYMBOL(amd_iommu_domain_enable_v2);
3429 
__flush_pasid(struct protection_domain * domain,int pasid,u64 address,bool size)3430 static int __flush_pasid(struct protection_domain *domain, int pasid,
3431 			 u64 address, bool size)
3432 {
3433 	struct iommu_dev_data *dev_data;
3434 	struct iommu_cmd cmd;
3435 	int i, ret;
3436 
3437 	if (!(domain->flags & PD_IOMMUV2_MASK))
3438 		return -EINVAL;
3439 
3440 	build_inv_iommu_pasid(&cmd, domain->id, pasid, address, size);
3441 
3442 	/*
3443 	 * IOMMU TLB needs to be flushed before Device TLB to
3444 	 * prevent device TLB refill from IOMMU TLB
3445 	 */
3446 	for (i = 0; i < amd_iommus_present; ++i) {
3447 		if (domain->dev_iommu[i] == 0)
3448 			continue;
3449 
3450 		ret = iommu_queue_command(amd_iommus[i], &cmd);
3451 		if (ret != 0)
3452 			goto out;
3453 	}
3454 
3455 	/* Wait until IOMMU TLB flushes are complete */
3456 	domain_flush_complete(domain);
3457 
3458 	/* Now flush device TLBs */
3459 	list_for_each_entry(dev_data, &domain->dev_list, list) {
3460 		struct amd_iommu *iommu;
3461 		int qdep;
3462 
3463 		BUG_ON(!dev_data->ats.enabled);
3464 
3465 		qdep  = dev_data->ats.qdep;
3466 		iommu = amd_iommu_rlookup_table[dev_data->devid];
3467 
3468 		build_inv_iotlb_pasid(&cmd, dev_data->devid, pasid,
3469 				      qdep, address, size);
3470 
3471 		ret = iommu_queue_command(iommu, &cmd);
3472 		if (ret != 0)
3473 			goto out;
3474 	}
3475 
3476 	/* Wait until all device TLBs are flushed */
3477 	domain_flush_complete(domain);
3478 
3479 	ret = 0;
3480 
3481 out:
3482 
3483 	return ret;
3484 }
3485 
__amd_iommu_flush_page(struct protection_domain * domain,int pasid,u64 address)3486 static int __amd_iommu_flush_page(struct protection_domain *domain, int pasid,
3487 				  u64 address)
3488 {
3489 	INC_STATS_COUNTER(invalidate_iotlb);
3490 
3491 	return __flush_pasid(domain, pasid, address, false);
3492 }
3493 
amd_iommu_flush_page(struct iommu_domain * dom,int pasid,u64 address)3494 int amd_iommu_flush_page(struct iommu_domain *dom, int pasid,
3495 			 u64 address)
3496 {
3497 	struct protection_domain *domain = dom->priv;
3498 	unsigned long flags;
3499 	int ret;
3500 
3501 	spin_lock_irqsave(&domain->lock, flags);
3502 	ret = __amd_iommu_flush_page(domain, pasid, address);
3503 	spin_unlock_irqrestore(&domain->lock, flags);
3504 
3505 	return ret;
3506 }
3507 EXPORT_SYMBOL(amd_iommu_flush_page);
3508 
__amd_iommu_flush_tlb(struct protection_domain * domain,int pasid)3509 static int __amd_iommu_flush_tlb(struct protection_domain *domain, int pasid)
3510 {
3511 	INC_STATS_COUNTER(invalidate_iotlb_all);
3512 
3513 	return __flush_pasid(domain, pasid, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
3514 			     true);
3515 }
3516 
amd_iommu_flush_tlb(struct iommu_domain * dom,int pasid)3517 int amd_iommu_flush_tlb(struct iommu_domain *dom, int pasid)
3518 {
3519 	struct protection_domain *domain = dom->priv;
3520 	unsigned long flags;
3521 	int ret;
3522 
3523 	spin_lock_irqsave(&domain->lock, flags);
3524 	ret = __amd_iommu_flush_tlb(domain, pasid);
3525 	spin_unlock_irqrestore(&domain->lock, flags);
3526 
3527 	return ret;
3528 }
3529 EXPORT_SYMBOL(amd_iommu_flush_tlb);
3530 
__get_gcr3_pte(u64 * root,int level,int pasid,bool alloc)3531 static u64 *__get_gcr3_pte(u64 *root, int level, int pasid, bool alloc)
3532 {
3533 	int index;
3534 	u64 *pte;
3535 
3536 	while (true) {
3537 
3538 		index = (pasid >> (9 * level)) & 0x1ff;
3539 		pte   = &root[index];
3540 
3541 		if (level == 0)
3542 			break;
3543 
3544 		if (!(*pte & GCR3_VALID)) {
3545 			if (!alloc)
3546 				return NULL;
3547 
3548 			root = (void *)get_zeroed_page(GFP_ATOMIC);
3549 			if (root == NULL)
3550 				return NULL;
3551 
3552 			*pte = __pa(root) | GCR3_VALID;
3553 		}
3554 
3555 		root = __va(*pte & PAGE_MASK);
3556 
3557 		level -= 1;
3558 	}
3559 
3560 	return pte;
3561 }
3562 
__set_gcr3(struct protection_domain * domain,int pasid,unsigned long cr3)3563 static int __set_gcr3(struct protection_domain *domain, int pasid,
3564 		      unsigned long cr3)
3565 {
3566 	u64 *pte;
3567 
3568 	if (domain->mode != PAGE_MODE_NONE)
3569 		return -EINVAL;
3570 
3571 	pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, true);
3572 	if (pte == NULL)
3573 		return -ENOMEM;
3574 
3575 	*pte = (cr3 & PAGE_MASK) | GCR3_VALID;
3576 
3577 	return __amd_iommu_flush_tlb(domain, pasid);
3578 }
3579 
__clear_gcr3(struct protection_domain * domain,int pasid)3580 static int __clear_gcr3(struct protection_domain *domain, int pasid)
3581 {
3582 	u64 *pte;
3583 
3584 	if (domain->mode != PAGE_MODE_NONE)
3585 		return -EINVAL;
3586 
3587 	pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, false);
3588 	if (pte == NULL)
3589 		return 0;
3590 
3591 	*pte = 0;
3592 
3593 	return __amd_iommu_flush_tlb(domain, pasid);
3594 }
3595 
amd_iommu_domain_set_gcr3(struct iommu_domain * dom,int pasid,unsigned long cr3)3596 int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid,
3597 			      unsigned long cr3)
3598 {
3599 	struct protection_domain *domain = dom->priv;
3600 	unsigned long flags;
3601 	int ret;
3602 
3603 	spin_lock_irqsave(&domain->lock, flags);
3604 	ret = __set_gcr3(domain, pasid, cr3);
3605 	spin_unlock_irqrestore(&domain->lock, flags);
3606 
3607 	return ret;
3608 }
3609 EXPORT_SYMBOL(amd_iommu_domain_set_gcr3);
3610 
amd_iommu_domain_clear_gcr3(struct iommu_domain * dom,int pasid)3611 int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, int pasid)
3612 {
3613 	struct protection_domain *domain = dom->priv;
3614 	unsigned long flags;
3615 	int ret;
3616 
3617 	spin_lock_irqsave(&domain->lock, flags);
3618 	ret = __clear_gcr3(domain, pasid);
3619 	spin_unlock_irqrestore(&domain->lock, flags);
3620 
3621 	return ret;
3622 }
3623 EXPORT_SYMBOL(amd_iommu_domain_clear_gcr3);
3624 
amd_iommu_complete_ppr(struct pci_dev * pdev,int pasid,int status,int tag)3625 int amd_iommu_complete_ppr(struct pci_dev *pdev, int pasid,
3626 			   int status, int tag)
3627 {
3628 	struct iommu_dev_data *dev_data;
3629 	struct amd_iommu *iommu;
3630 	struct iommu_cmd cmd;
3631 
3632 	INC_STATS_COUNTER(complete_ppr);
3633 
3634 	dev_data = get_dev_data(&pdev->dev);
3635 	iommu    = amd_iommu_rlookup_table[dev_data->devid];
3636 
3637 	build_complete_ppr(&cmd, dev_data->devid, pasid, status,
3638 			   tag, dev_data->pri_tlp);
3639 
3640 	return iommu_queue_command(iommu, &cmd);
3641 }
3642 EXPORT_SYMBOL(amd_iommu_complete_ppr);
3643 
amd_iommu_get_v2_domain(struct pci_dev * pdev)3644 struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev)
3645 {
3646 	struct protection_domain *domain;
3647 
3648 	domain = get_domain(&pdev->dev);
3649 	if (IS_ERR(domain))
3650 		return NULL;
3651 
3652 	/* Only return IOMMUv2 domains */
3653 	if (!(domain->flags & PD_IOMMUV2_MASK))
3654 		return NULL;
3655 
3656 	return domain->iommu_domain;
3657 }
3658 EXPORT_SYMBOL(amd_iommu_get_v2_domain);
3659 
amd_iommu_enable_device_erratum(struct pci_dev * pdev,u32 erratum)3660 void amd_iommu_enable_device_erratum(struct pci_dev *pdev, u32 erratum)
3661 {
3662 	struct iommu_dev_data *dev_data;
3663 
3664 	if (!amd_iommu_v2_supported())
3665 		return;
3666 
3667 	dev_data = get_dev_data(&pdev->dev);
3668 	dev_data->errata |= (1 << erratum);
3669 }
3670 EXPORT_SYMBOL(amd_iommu_enable_device_erratum);
3671 
amd_iommu_device_info(struct pci_dev * pdev,struct amd_iommu_device_info * info)3672 int amd_iommu_device_info(struct pci_dev *pdev,
3673                           struct amd_iommu_device_info *info)
3674 {
3675 	int max_pasids;
3676 	int pos;
3677 
3678 	if (pdev == NULL || info == NULL)
3679 		return -EINVAL;
3680 
3681 	if (!amd_iommu_v2_supported())
3682 		return -EINVAL;
3683 
3684 	memset(info, 0, sizeof(*info));
3685 
3686 	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS);
3687 	if (pos)
3688 		info->flags |= AMD_IOMMU_DEVICE_FLAG_ATS_SUP;
3689 
3690 	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
3691 	if (pos)
3692 		info->flags |= AMD_IOMMU_DEVICE_FLAG_PRI_SUP;
3693 
3694 	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID);
3695 	if (pos) {
3696 		int features;
3697 
3698 		max_pasids = 1 << (9 * (amd_iommu_max_glx_val + 1));
3699 		max_pasids = min(max_pasids, (1 << 20));
3700 
3701 		info->flags |= AMD_IOMMU_DEVICE_FLAG_PASID_SUP;
3702 		info->max_pasids = min(pci_max_pasids(pdev), max_pasids);
3703 
3704 		features = pci_pasid_features(pdev);
3705 		if (features & PCI_PASID_CAP_EXEC)
3706 			info->flags |= AMD_IOMMU_DEVICE_FLAG_EXEC_SUP;
3707 		if (features & PCI_PASID_CAP_PRIV)
3708 			info->flags |= AMD_IOMMU_DEVICE_FLAG_PRIV_SUP;
3709 	}
3710 
3711 	return 0;
3712 }
3713 EXPORT_SYMBOL(amd_iommu_device_info);
3714