1 /*
2  * Copyright (c) 2006, Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Copyright (C) 2006-2008 Intel Corporation
18  * Author: Ashok Raj <ashok.raj@intel.com>
19  * Author: Shaohua Li <shaohua.li@intel.com>
20  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21  * Author: Fenghua Yu <fenghua.yu@intel.com>
22  */
23 
24 #include <linux/init.h>
25 #include <linux/bitmap.h>
26 #include <linux/debugfs.h>
27 #include <linux/export.h>
28 #include <linux/slab.h>
29 #include <linux/irq.h>
30 #include <linux/interrupt.h>
31 #include <linux/spinlock.h>
32 #include <linux/pci.h>
33 #include <linux/dmar.h>
34 #include <linux/dma-mapping.h>
35 #include <linux/mempool.h>
36 #include <linux/timer.h>
37 #include <linux/iova.h>
38 #include <linux/iommu.h>
39 #include <linux/intel-iommu.h>
40 #include <linux/syscore_ops.h>
41 #include <linux/tboot.h>
42 #include <linux/dmi.h>
43 #include <linux/pci-ats.h>
44 #include <linux/memblock.h>
45 #include <asm/cacheflush.h>
46 #include <asm/iommu.h>
47 
48 #define ROOT_SIZE		VTD_PAGE_SIZE
49 #define CONTEXT_SIZE		VTD_PAGE_SIZE
50 
51 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
52 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
53 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
54 
55 #define IOAPIC_RANGE_START	(0xfee00000)
56 #define IOAPIC_RANGE_END	(0xfeefffff)
57 #define IOVA_START_ADDR		(0x1000)
58 
59 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
60 
61 #define MAX_AGAW_WIDTH 64
62 
63 #define __DOMAIN_MAX_PFN(gaw)  ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
64 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
65 
66 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
67    to match. That way, we can use 'unsigned long' for PFNs with impunity. */
68 #define DOMAIN_MAX_PFN(gaw)	((unsigned long) min_t(uint64_t, \
69 				__DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
70 #define DOMAIN_MAX_ADDR(gaw)	(((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
71 
72 #define IOVA_PFN(addr)		((addr) >> PAGE_SHIFT)
73 #define DMA_32BIT_PFN		IOVA_PFN(DMA_BIT_MASK(32))
74 #define DMA_64BIT_PFN		IOVA_PFN(DMA_BIT_MASK(64))
75 
76 /* page table handling */
77 #define LEVEL_STRIDE		(9)
78 #define LEVEL_MASK		(((u64)1 << LEVEL_STRIDE) - 1)
79 
80 /*
81  * This bitmap is used to advertise the page sizes our hardware support
82  * to the IOMMU core, which will then use this information to split
83  * physically contiguous memory regions it is mapping into page sizes
84  * that we support.
85  *
86  * Traditionally the IOMMU core just handed us the mappings directly,
87  * after making sure the size is an order of a 4KiB page and that the
88  * mapping has natural alignment.
89  *
90  * To retain this behavior, we currently advertise that we support
91  * all page sizes that are an order of 4KiB.
92  *
93  * If at some point we'd like to utilize the IOMMU core's new behavior,
94  * we could change this to advertise the real page sizes we support.
95  */
96 #define INTEL_IOMMU_PGSIZES	(~0xFFFUL)
97 
agaw_to_level(int agaw)98 static inline int agaw_to_level(int agaw)
99 {
100 	return agaw + 2;
101 }
102 
agaw_to_width(int agaw)103 static inline int agaw_to_width(int agaw)
104 {
105 	return 30 + agaw * LEVEL_STRIDE;
106 }
107 
width_to_agaw(int width)108 static inline int width_to_agaw(int width)
109 {
110 	return (width - 30) / LEVEL_STRIDE;
111 }
112 
level_to_offset_bits(int level)113 static inline unsigned int level_to_offset_bits(int level)
114 {
115 	return (level - 1) * LEVEL_STRIDE;
116 }
117 
pfn_level_offset(unsigned long pfn,int level)118 static inline int pfn_level_offset(unsigned long pfn, int level)
119 {
120 	return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
121 }
122 
level_mask(int level)123 static inline unsigned long level_mask(int level)
124 {
125 	return -1UL << level_to_offset_bits(level);
126 }
127 
level_size(int level)128 static inline unsigned long level_size(int level)
129 {
130 	return 1UL << level_to_offset_bits(level);
131 }
132 
align_to_level(unsigned long pfn,int level)133 static inline unsigned long align_to_level(unsigned long pfn, int level)
134 {
135 	return (pfn + level_size(level) - 1) & level_mask(level);
136 }
137 
lvl_to_nr_pages(unsigned int lvl)138 static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
139 {
140 	return  1 << ((lvl - 1) * LEVEL_STRIDE);
141 }
142 
143 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
144    are never going to work. */
dma_to_mm_pfn(unsigned long dma_pfn)145 static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
146 {
147 	return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
148 }
149 
mm_to_dma_pfn(unsigned long mm_pfn)150 static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
151 {
152 	return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
153 }
page_to_dma_pfn(struct page * pg)154 static inline unsigned long page_to_dma_pfn(struct page *pg)
155 {
156 	return mm_to_dma_pfn(page_to_pfn(pg));
157 }
virt_to_dma_pfn(void * p)158 static inline unsigned long virt_to_dma_pfn(void *p)
159 {
160 	return page_to_dma_pfn(virt_to_page(p));
161 }
162 
163 /* global iommu list, set NULL for ignored DMAR units */
164 static struct intel_iommu **g_iommus;
165 
166 static void __init check_tylersburg_isoch(void);
167 static int rwbf_quirk;
168 
169 /*
170  * set to 1 to panic kernel if can't successfully enable VT-d
171  * (used when kernel is launched w/ TXT)
172  */
173 static int force_on = 0;
174 
175 /*
176  * 0: Present
177  * 1-11: Reserved
178  * 12-63: Context Ptr (12 - (haw-1))
179  * 64-127: Reserved
180  */
181 struct root_entry {
182 	u64	val;
183 	u64	rsvd1;
184 };
185 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
root_present(struct root_entry * root)186 static inline bool root_present(struct root_entry *root)
187 {
188 	return (root->val & 1);
189 }
set_root_present(struct root_entry * root)190 static inline void set_root_present(struct root_entry *root)
191 {
192 	root->val |= 1;
193 }
set_root_value(struct root_entry * root,unsigned long value)194 static inline void set_root_value(struct root_entry *root, unsigned long value)
195 {
196 	root->val |= value & VTD_PAGE_MASK;
197 }
198 
199 static inline struct context_entry *
get_context_addr_from_root(struct root_entry * root)200 get_context_addr_from_root(struct root_entry *root)
201 {
202 	return (struct context_entry *)
203 		(root_present(root)?phys_to_virt(
204 		root->val & VTD_PAGE_MASK) :
205 		NULL);
206 }
207 
208 /*
209  * low 64 bits:
210  * 0: present
211  * 1: fault processing disable
212  * 2-3: translation type
213  * 12-63: address space root
214  * high 64 bits:
215  * 0-2: address width
216  * 3-6: aval
217  * 8-23: domain id
218  */
219 struct context_entry {
220 	u64 lo;
221 	u64 hi;
222 };
223 
context_present(struct context_entry * context)224 static inline bool context_present(struct context_entry *context)
225 {
226 	return (context->lo & 1);
227 }
context_set_present(struct context_entry * context)228 static inline void context_set_present(struct context_entry *context)
229 {
230 	context->lo |= 1;
231 }
232 
context_set_fault_enable(struct context_entry * context)233 static inline void context_set_fault_enable(struct context_entry *context)
234 {
235 	context->lo &= (((u64)-1) << 2) | 1;
236 }
237 
context_set_translation_type(struct context_entry * context,unsigned long value)238 static inline void context_set_translation_type(struct context_entry *context,
239 						unsigned long value)
240 {
241 	context->lo &= (((u64)-1) << 4) | 3;
242 	context->lo |= (value & 3) << 2;
243 }
244 
context_set_address_root(struct context_entry * context,unsigned long value)245 static inline void context_set_address_root(struct context_entry *context,
246 					    unsigned long value)
247 {
248 	context->lo |= value & VTD_PAGE_MASK;
249 }
250 
context_set_address_width(struct context_entry * context,unsigned long value)251 static inline void context_set_address_width(struct context_entry *context,
252 					     unsigned long value)
253 {
254 	context->hi |= value & 7;
255 }
256 
context_set_domain_id(struct context_entry * context,unsigned long value)257 static inline void context_set_domain_id(struct context_entry *context,
258 					 unsigned long value)
259 {
260 	context->hi |= (value & ((1 << 16) - 1)) << 8;
261 }
262 
context_clear_entry(struct context_entry * context)263 static inline void context_clear_entry(struct context_entry *context)
264 {
265 	context->lo = 0;
266 	context->hi = 0;
267 }
268 
269 /*
270  * 0: readable
271  * 1: writable
272  * 2-6: reserved
273  * 7: super page
274  * 8-10: available
275  * 11: snoop behavior
276  * 12-63: Host physcial address
277  */
278 struct dma_pte {
279 	u64 val;
280 };
281 
dma_clear_pte(struct dma_pte * pte)282 static inline void dma_clear_pte(struct dma_pte *pte)
283 {
284 	pte->val = 0;
285 }
286 
dma_set_pte_readable(struct dma_pte * pte)287 static inline void dma_set_pte_readable(struct dma_pte *pte)
288 {
289 	pte->val |= DMA_PTE_READ;
290 }
291 
dma_set_pte_writable(struct dma_pte * pte)292 static inline void dma_set_pte_writable(struct dma_pte *pte)
293 {
294 	pte->val |= DMA_PTE_WRITE;
295 }
296 
dma_set_pte_snp(struct dma_pte * pte)297 static inline void dma_set_pte_snp(struct dma_pte *pte)
298 {
299 	pte->val |= DMA_PTE_SNP;
300 }
301 
dma_set_pte_prot(struct dma_pte * pte,unsigned long prot)302 static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
303 {
304 	pte->val = (pte->val & ~3) | (prot & 3);
305 }
306 
dma_pte_addr(struct dma_pte * pte)307 static inline u64 dma_pte_addr(struct dma_pte *pte)
308 {
309 #ifdef CONFIG_64BIT
310 	return pte->val & VTD_PAGE_MASK;
311 #else
312 	/* Must have a full atomic 64-bit read */
313 	return  __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
314 #endif
315 }
316 
dma_set_pte_pfn(struct dma_pte * pte,unsigned long pfn)317 static inline void dma_set_pte_pfn(struct dma_pte *pte, unsigned long pfn)
318 {
319 	pte->val |= (uint64_t)pfn << VTD_PAGE_SHIFT;
320 }
321 
dma_pte_present(struct dma_pte * pte)322 static inline bool dma_pte_present(struct dma_pte *pte)
323 {
324 	return (pte->val & 3) != 0;
325 }
326 
dma_pte_superpage(struct dma_pte * pte)327 static inline bool dma_pte_superpage(struct dma_pte *pte)
328 {
329 	return (pte->val & (1 << 7));
330 }
331 
first_pte_in_page(struct dma_pte * pte)332 static inline int first_pte_in_page(struct dma_pte *pte)
333 {
334 	return !((unsigned long)pte & ~VTD_PAGE_MASK);
335 }
336 
337 /*
338  * This domain is a statically identity mapping domain.
339  *	1. This domain creats a static 1:1 mapping to all usable memory.
340  * 	2. It maps to each iommu if successful.
341  *	3. Each iommu mapps to this domain if successful.
342  */
343 static struct dmar_domain *si_domain;
344 static int hw_pass_through = 1;
345 
346 /* devices under the same p2p bridge are owned in one domain */
347 #define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
348 
349 /* domain represents a virtual machine, more than one devices
350  * across iommus may be owned in one domain, e.g. kvm guest.
351  */
352 #define DOMAIN_FLAG_VIRTUAL_MACHINE	(1 << 1)
353 
354 /* si_domain contains mulitple devices */
355 #define DOMAIN_FLAG_STATIC_IDENTITY	(1 << 2)
356 
357 /* define the limit of IOMMUs supported in each domain */
358 #ifdef	CONFIG_X86
359 # define	IOMMU_UNITS_SUPPORTED	MAX_IO_APICS
360 #else
361 # define	IOMMU_UNITS_SUPPORTED	64
362 #endif
363 
364 struct dmar_domain {
365 	int	id;			/* domain id */
366 	int	nid;			/* node id */
367 	DECLARE_BITMAP(iommu_bmp, IOMMU_UNITS_SUPPORTED);
368 					/* bitmap of iommus this domain uses*/
369 
370 	struct list_head devices; 	/* all devices' list */
371 	struct iova_domain iovad;	/* iova's that belong to this domain */
372 
373 	struct dma_pte	*pgd;		/* virtual address */
374 	int		gaw;		/* max guest address width */
375 
376 	/* adjusted guest address width, 0 is level 2 30-bit */
377 	int		agaw;
378 
379 	int		flags;		/* flags to find out type of domain */
380 
381 	int		iommu_coherency;/* indicate coherency of iommu access */
382 	int		iommu_snooping; /* indicate snooping control feature*/
383 	int		iommu_count;	/* reference count of iommu */
384 	int		iommu_superpage;/* Level of superpages supported:
385 					   0 == 4KiB (no superpages), 1 == 2MiB,
386 					   2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
387 	spinlock_t	iommu_lock;	/* protect iommu set in domain */
388 	u64		max_addr;	/* maximum mapped address */
389 };
390 
391 /* PCI domain-device relationship */
392 struct device_domain_info {
393 	struct list_head link;	/* link to domain siblings */
394 	struct list_head global; /* link to global list */
395 	int segment;		/* PCI domain */
396 	u8 bus;			/* PCI bus number */
397 	u8 devfn;		/* PCI devfn number */
398 	struct pci_dev *dev; /* it's NULL for PCIe-to-PCI bridge */
399 	struct intel_iommu *iommu; /* IOMMU used by this device */
400 	struct dmar_domain *domain; /* pointer to domain */
401 };
402 
403 static void flush_unmaps_timeout(unsigned long data);
404 
405 DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);
406 
407 #define HIGH_WATER_MARK 250
408 struct deferred_flush_tables {
409 	int next;
410 	struct iova *iova[HIGH_WATER_MARK];
411 	struct dmar_domain *domain[HIGH_WATER_MARK];
412 };
413 
414 static struct deferred_flush_tables *deferred_flush;
415 
416 /* bitmap for indexing intel_iommus */
417 static int g_num_of_iommus;
418 
419 static DEFINE_SPINLOCK(async_umap_flush_lock);
420 static LIST_HEAD(unmaps_to_do);
421 
422 static int timer_on;
423 static long list_size;
424 
425 static void domain_remove_dev_info(struct dmar_domain *domain);
426 
427 #ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
428 int dmar_disabled = 0;
429 #else
430 int dmar_disabled = 1;
431 #endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
432 
433 int intel_iommu_enabled = 0;
434 EXPORT_SYMBOL_GPL(intel_iommu_enabled);
435 
436 static int dmar_map_gfx = 1;
437 static int dmar_forcedac;
438 static int intel_iommu_strict;
439 static int intel_iommu_superpage = 1;
440 
441 int intel_iommu_gfx_mapped;
442 EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
443 
444 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
445 static DEFINE_SPINLOCK(device_domain_lock);
446 static LIST_HEAD(device_domain_list);
447 
448 static struct iommu_ops intel_iommu_ops;
449 
intel_iommu_setup(char * str)450 static int __init intel_iommu_setup(char *str)
451 {
452 	if (!str)
453 		return -EINVAL;
454 	while (*str) {
455 		if (!strncmp(str, "on", 2)) {
456 			dmar_disabled = 0;
457 			printk(KERN_INFO "Intel-IOMMU: enabled\n");
458 		} else if (!strncmp(str, "off", 3)) {
459 			dmar_disabled = 1;
460 			printk(KERN_INFO "Intel-IOMMU: disabled\n");
461 		} else if (!strncmp(str, "igfx_off", 8)) {
462 			dmar_map_gfx = 0;
463 			printk(KERN_INFO
464 				"Intel-IOMMU: disable GFX device mapping\n");
465 		} else if (!strncmp(str, "forcedac", 8)) {
466 			printk(KERN_INFO
467 				"Intel-IOMMU: Forcing DAC for PCI devices\n");
468 			dmar_forcedac = 1;
469 		} else if (!strncmp(str, "strict", 6)) {
470 			printk(KERN_INFO
471 				"Intel-IOMMU: disable batched IOTLB flush\n");
472 			intel_iommu_strict = 1;
473 		} else if (!strncmp(str, "sp_off", 6)) {
474 			printk(KERN_INFO
475 				"Intel-IOMMU: disable supported super page\n");
476 			intel_iommu_superpage = 0;
477 		}
478 
479 		str += strcspn(str, ",");
480 		while (*str == ',')
481 			str++;
482 	}
483 	return 0;
484 }
485 __setup("intel_iommu=", intel_iommu_setup);
486 
487 static struct kmem_cache *iommu_domain_cache;
488 static struct kmem_cache *iommu_devinfo_cache;
489 static struct kmem_cache *iommu_iova_cache;
490 
alloc_pgtable_page(int node)491 static inline void *alloc_pgtable_page(int node)
492 {
493 	struct page *page;
494 	void *vaddr = NULL;
495 
496 	page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
497 	if (page)
498 		vaddr = page_address(page);
499 	return vaddr;
500 }
501 
free_pgtable_page(void * vaddr)502 static inline void free_pgtable_page(void *vaddr)
503 {
504 	free_page((unsigned long)vaddr);
505 }
506 
alloc_domain_mem(void)507 static inline void *alloc_domain_mem(void)
508 {
509 	return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
510 }
511 
free_domain_mem(void * vaddr)512 static void free_domain_mem(void *vaddr)
513 {
514 	kmem_cache_free(iommu_domain_cache, vaddr);
515 }
516 
alloc_devinfo_mem(void)517 static inline void * alloc_devinfo_mem(void)
518 {
519 	return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
520 }
521 
free_devinfo_mem(void * vaddr)522 static inline void free_devinfo_mem(void *vaddr)
523 {
524 	kmem_cache_free(iommu_devinfo_cache, vaddr);
525 }
526 
alloc_iova_mem(void)527 struct iova *alloc_iova_mem(void)
528 {
529 	return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC);
530 }
531 
free_iova_mem(struct iova * iova)532 void free_iova_mem(struct iova *iova)
533 {
534 	kmem_cache_free(iommu_iova_cache, iova);
535 }
536 
537 
__iommu_calculate_agaw(struct intel_iommu * iommu,int max_gaw)538 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
539 {
540 	unsigned long sagaw;
541 	int agaw = -1;
542 
543 	sagaw = cap_sagaw(iommu->cap);
544 	for (agaw = width_to_agaw(max_gaw);
545 	     agaw >= 0; agaw--) {
546 		if (test_bit(agaw, &sagaw))
547 			break;
548 	}
549 
550 	return agaw;
551 }
552 
553 /*
554  * Calculate max SAGAW for each iommu.
555  */
iommu_calculate_max_sagaw(struct intel_iommu * iommu)556 int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
557 {
558 	return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
559 }
560 
561 /*
562  * calculate agaw for each iommu.
563  * "SAGAW" may be different across iommus, use a default agaw, and
564  * get a supported less agaw for iommus that don't support the default agaw.
565  */
iommu_calculate_agaw(struct intel_iommu * iommu)566 int iommu_calculate_agaw(struct intel_iommu *iommu)
567 {
568 	return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
569 }
570 
571 /* This functionin only returns single iommu in a domain */
domain_get_iommu(struct dmar_domain * domain)572 static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
573 {
574 	int iommu_id;
575 
576 	/* si_domain and vm domain should not get here. */
577 	BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
578 	BUG_ON(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY);
579 
580 	iommu_id = find_first_bit(domain->iommu_bmp, g_num_of_iommus);
581 	if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
582 		return NULL;
583 
584 	return g_iommus[iommu_id];
585 }
586 
domain_update_iommu_coherency(struct dmar_domain * domain)587 static void domain_update_iommu_coherency(struct dmar_domain *domain)
588 {
589 	int i;
590 
591 	i = find_first_bit(domain->iommu_bmp, g_num_of_iommus);
592 
593 	domain->iommu_coherency = i < g_num_of_iommus ? 1 : 0;
594 
595 	for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
596 		if (!ecap_coherent(g_iommus[i]->ecap)) {
597 			domain->iommu_coherency = 0;
598 			break;
599 		}
600 	}
601 }
602 
domain_update_iommu_snooping(struct dmar_domain * domain)603 static void domain_update_iommu_snooping(struct dmar_domain *domain)
604 {
605 	int i;
606 
607 	domain->iommu_snooping = 1;
608 
609 	for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
610 		if (!ecap_sc_support(g_iommus[i]->ecap)) {
611 			domain->iommu_snooping = 0;
612 			break;
613 		}
614 	}
615 }
616 
domain_update_iommu_superpage(struct dmar_domain * domain)617 static void domain_update_iommu_superpage(struct dmar_domain *domain)
618 {
619 	struct dmar_drhd_unit *drhd;
620 	struct intel_iommu *iommu = NULL;
621 	int mask = 0xf;
622 
623 	if (!intel_iommu_superpage) {
624 		domain->iommu_superpage = 0;
625 		return;
626 	}
627 
628 	/* set iommu_superpage to the smallest common denominator */
629 	for_each_active_iommu(iommu, drhd) {
630 		mask &= cap_super_page_val(iommu->cap);
631 		if (!mask) {
632 			break;
633 		}
634 	}
635 	domain->iommu_superpage = fls(mask);
636 }
637 
638 /* Some capabilities may be different across iommus */
domain_update_iommu_cap(struct dmar_domain * domain)639 static void domain_update_iommu_cap(struct dmar_domain *domain)
640 {
641 	domain_update_iommu_coherency(domain);
642 	domain_update_iommu_snooping(domain);
643 	domain_update_iommu_superpage(domain);
644 }
645 
device_to_iommu(int segment,u8 bus,u8 devfn)646 static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
647 {
648 	struct dmar_drhd_unit *drhd = NULL;
649 	int i;
650 
651 	for_each_drhd_unit(drhd) {
652 		if (drhd->ignored)
653 			continue;
654 		if (segment != drhd->segment)
655 			continue;
656 
657 		for (i = 0; i < drhd->devices_cnt; i++) {
658 			if (drhd->devices[i] &&
659 			    drhd->devices[i]->bus->number == bus &&
660 			    drhd->devices[i]->devfn == devfn)
661 				return drhd->iommu;
662 			if (drhd->devices[i] &&
663 			    drhd->devices[i]->subordinate &&
664 			    drhd->devices[i]->subordinate->number <= bus &&
665 			    drhd->devices[i]->subordinate->subordinate >= bus)
666 				return drhd->iommu;
667 		}
668 
669 		if (drhd->include_all)
670 			return drhd->iommu;
671 	}
672 
673 	return NULL;
674 }
675 
domain_flush_cache(struct dmar_domain * domain,void * addr,int size)676 static void domain_flush_cache(struct dmar_domain *domain,
677 			       void *addr, int size)
678 {
679 	if (!domain->iommu_coherency)
680 		clflush_cache_range(addr, size);
681 }
682 
683 /* Gets context entry for a given bus and devfn */
device_to_context_entry(struct intel_iommu * iommu,u8 bus,u8 devfn)684 static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
685 		u8 bus, u8 devfn)
686 {
687 	struct root_entry *root;
688 	struct context_entry *context;
689 	unsigned long phy_addr;
690 	unsigned long flags;
691 
692 	spin_lock_irqsave(&iommu->lock, flags);
693 	root = &iommu->root_entry[bus];
694 	context = get_context_addr_from_root(root);
695 	if (!context) {
696 		context = (struct context_entry *)
697 				alloc_pgtable_page(iommu->node);
698 		if (!context) {
699 			spin_unlock_irqrestore(&iommu->lock, flags);
700 			return NULL;
701 		}
702 		__iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
703 		phy_addr = virt_to_phys((void *)context);
704 		set_root_value(root, phy_addr);
705 		set_root_present(root);
706 		__iommu_flush_cache(iommu, root, sizeof(*root));
707 	}
708 	spin_unlock_irqrestore(&iommu->lock, flags);
709 	return &context[devfn];
710 }
711 
device_context_mapped(struct intel_iommu * iommu,u8 bus,u8 devfn)712 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
713 {
714 	struct root_entry *root;
715 	struct context_entry *context;
716 	int ret;
717 	unsigned long flags;
718 
719 	spin_lock_irqsave(&iommu->lock, flags);
720 	root = &iommu->root_entry[bus];
721 	context = get_context_addr_from_root(root);
722 	if (!context) {
723 		ret = 0;
724 		goto out;
725 	}
726 	ret = context_present(&context[devfn]);
727 out:
728 	spin_unlock_irqrestore(&iommu->lock, flags);
729 	return ret;
730 }
731 
clear_context_table(struct intel_iommu * iommu,u8 bus,u8 devfn)732 static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
733 {
734 	struct root_entry *root;
735 	struct context_entry *context;
736 	unsigned long flags;
737 
738 	spin_lock_irqsave(&iommu->lock, flags);
739 	root = &iommu->root_entry[bus];
740 	context = get_context_addr_from_root(root);
741 	if (context) {
742 		context_clear_entry(&context[devfn]);
743 		__iommu_flush_cache(iommu, &context[devfn], \
744 			sizeof(*context));
745 	}
746 	spin_unlock_irqrestore(&iommu->lock, flags);
747 }
748 
free_context_table(struct intel_iommu * iommu)749 static void free_context_table(struct intel_iommu *iommu)
750 {
751 	struct root_entry *root;
752 	int i;
753 	unsigned long flags;
754 	struct context_entry *context;
755 
756 	spin_lock_irqsave(&iommu->lock, flags);
757 	if (!iommu->root_entry) {
758 		goto out;
759 	}
760 	for (i = 0; i < ROOT_ENTRY_NR; i++) {
761 		root = &iommu->root_entry[i];
762 		context = get_context_addr_from_root(root);
763 		if (context)
764 			free_pgtable_page(context);
765 	}
766 	free_pgtable_page(iommu->root_entry);
767 	iommu->root_entry = NULL;
768 out:
769 	spin_unlock_irqrestore(&iommu->lock, flags);
770 }
771 
pfn_to_dma_pte(struct dmar_domain * domain,unsigned long pfn,int target_level)772 static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
773 				      unsigned long pfn, int target_level)
774 {
775 	int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
776 	struct dma_pte *parent, *pte = NULL;
777 	int level = agaw_to_level(domain->agaw);
778 	int offset;
779 
780 	BUG_ON(!domain->pgd);
781 
782 	if (addr_width < BITS_PER_LONG && pfn >> addr_width)
783 		/* Address beyond IOMMU's addressing capabilities. */
784 		return NULL;
785 
786 	parent = domain->pgd;
787 
788 	while (level > 0) {
789 		void *tmp_page;
790 
791 		offset = pfn_level_offset(pfn, level);
792 		pte = &parent[offset];
793 		if (!target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
794 			break;
795 		if (level == target_level)
796 			break;
797 
798 		if (!dma_pte_present(pte)) {
799 			uint64_t pteval;
800 
801 			tmp_page = alloc_pgtable_page(domain->nid);
802 
803 			if (!tmp_page)
804 				return NULL;
805 
806 			domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
807 			pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
808 			if (cmpxchg64(&pte->val, 0ULL, pteval)) {
809 				/* Someone else set it while we were thinking; use theirs. */
810 				free_pgtable_page(tmp_page);
811 			} else {
812 				dma_pte_addr(pte);
813 				domain_flush_cache(domain, pte, sizeof(*pte));
814 			}
815 		}
816 		parent = phys_to_virt(dma_pte_addr(pte));
817 		level--;
818 	}
819 
820 	return pte;
821 }
822 
823 
824 /* return address's pte at specific level */
dma_pfn_level_pte(struct dmar_domain * domain,unsigned long pfn,int level,int * large_page)825 static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
826 					 unsigned long pfn,
827 					 int level, int *large_page)
828 {
829 	struct dma_pte *parent, *pte = NULL;
830 	int total = agaw_to_level(domain->agaw);
831 	int offset;
832 
833 	parent = domain->pgd;
834 	while (level <= total) {
835 		offset = pfn_level_offset(pfn, total);
836 		pte = &parent[offset];
837 		if (level == total)
838 			return pte;
839 
840 		if (!dma_pte_present(pte)) {
841 			*large_page = total;
842 			break;
843 		}
844 
845 		if (pte->val & DMA_PTE_LARGE_PAGE) {
846 			*large_page = total;
847 			return pte;
848 		}
849 
850 		parent = phys_to_virt(dma_pte_addr(pte));
851 		total--;
852 	}
853 	return NULL;
854 }
855 
856 /* clear last level pte, a tlb flush should be followed */
dma_pte_clear_range(struct dmar_domain * domain,unsigned long start_pfn,unsigned long last_pfn)857 static int dma_pte_clear_range(struct dmar_domain *domain,
858 				unsigned long start_pfn,
859 				unsigned long last_pfn)
860 {
861 	int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
862 	unsigned int large_page = 1;
863 	struct dma_pte *first_pte, *pte;
864 	int order;
865 
866 	BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
867 	BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
868 	BUG_ON(start_pfn > last_pfn);
869 
870 	/* we don't need lock here; nobody else touches the iova range */
871 	do {
872 		large_page = 1;
873 		first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
874 		if (!pte) {
875 			start_pfn = align_to_level(start_pfn + 1, large_page + 1);
876 			continue;
877 		}
878 		do {
879 			dma_clear_pte(pte);
880 			start_pfn += lvl_to_nr_pages(large_page);
881 			pte++;
882 		} while (start_pfn <= last_pfn && !first_pte_in_page(pte));
883 
884 		domain_flush_cache(domain, first_pte,
885 				   (void *)pte - (void *)first_pte);
886 
887 	} while (start_pfn && start_pfn <= last_pfn);
888 
889 	order = (large_page - 1) * 9;
890 	return order;
891 }
892 
dma_pte_free_level(struct dmar_domain * domain,int level,struct dma_pte * pte,unsigned long pfn,unsigned long start_pfn,unsigned long last_pfn)893 static void dma_pte_free_level(struct dmar_domain *domain, int level,
894 			       struct dma_pte *pte, unsigned long pfn,
895 			       unsigned long start_pfn, unsigned long last_pfn)
896 {
897 	pfn = max(start_pfn, pfn);
898 	pte = &pte[pfn_level_offset(pfn, level)];
899 
900 	do {
901 		unsigned long level_pfn;
902 		struct dma_pte *level_pte;
903 
904 		if (!dma_pte_present(pte) || dma_pte_superpage(pte))
905 			goto next;
906 
907 		level_pfn = pfn & level_mask(level - 1);
908 		level_pte = phys_to_virt(dma_pte_addr(pte));
909 
910 		if (level > 2)
911 			dma_pte_free_level(domain, level - 1, level_pte,
912 					   level_pfn, start_pfn, last_pfn);
913 
914 		/* If range covers entire pagetable, free it */
915 		if (!(start_pfn > level_pfn ||
916 		      last_pfn < level_pfn + level_size(level) - 1)) {
917 			dma_clear_pte(pte);
918 			domain_flush_cache(domain, pte, sizeof(*pte));
919 			free_pgtable_page(level_pte);
920 		}
921 next:
922 		pfn += level_size(level);
923 	} while (!first_pte_in_page(++pte) && pfn <= last_pfn);
924 }
925 
926 /* free page table pages. last level pte should already be cleared */
dma_pte_free_pagetable(struct dmar_domain * domain,unsigned long start_pfn,unsigned long last_pfn)927 static void dma_pte_free_pagetable(struct dmar_domain *domain,
928 				   unsigned long start_pfn,
929 				   unsigned long last_pfn)
930 {
931 	int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
932 
933 	BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
934 	BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
935 	BUG_ON(start_pfn > last_pfn);
936 
937 	/* We don't need lock here; nobody else touches the iova range */
938 	dma_pte_free_level(domain, agaw_to_level(domain->agaw),
939 			   domain->pgd, 0, start_pfn, last_pfn);
940 
941 	/* free pgd */
942 	if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
943 		free_pgtable_page(domain->pgd);
944 		domain->pgd = NULL;
945 	}
946 }
947 
948 /* iommu handling */
iommu_alloc_root_entry(struct intel_iommu * iommu)949 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
950 {
951 	struct root_entry *root;
952 	unsigned long flags;
953 
954 	root = (struct root_entry *)alloc_pgtable_page(iommu->node);
955 	if (!root)
956 		return -ENOMEM;
957 
958 	__iommu_flush_cache(iommu, root, ROOT_SIZE);
959 
960 	spin_lock_irqsave(&iommu->lock, flags);
961 	iommu->root_entry = root;
962 	spin_unlock_irqrestore(&iommu->lock, flags);
963 
964 	return 0;
965 }
966 
iommu_set_root_entry(struct intel_iommu * iommu)967 static void iommu_set_root_entry(struct intel_iommu *iommu)
968 {
969 	void *addr;
970 	u32 sts;
971 	unsigned long flag;
972 
973 	addr = iommu->root_entry;
974 
975 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
976 	dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
977 
978 	writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
979 
980 	/* Make sure hardware complete it */
981 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
982 		      readl, (sts & DMA_GSTS_RTPS), sts);
983 
984 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
985 }
986 
iommu_flush_write_buffer(struct intel_iommu * iommu)987 static void iommu_flush_write_buffer(struct intel_iommu *iommu)
988 {
989 	u32 val;
990 	unsigned long flag;
991 
992 	if (!rwbf_quirk && !cap_rwbf(iommu->cap))
993 		return;
994 
995 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
996 	writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
997 
998 	/* Make sure hardware complete it */
999 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1000 		      readl, (!(val & DMA_GSTS_WBFS)), val);
1001 
1002 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1003 }
1004 
1005 /* return value determine if we need a write buffer flush */
__iommu_flush_context(struct intel_iommu * iommu,u16 did,u16 source_id,u8 function_mask,u64 type)1006 static void __iommu_flush_context(struct intel_iommu *iommu,
1007 				  u16 did, u16 source_id, u8 function_mask,
1008 				  u64 type)
1009 {
1010 	u64 val = 0;
1011 	unsigned long flag;
1012 
1013 	switch (type) {
1014 	case DMA_CCMD_GLOBAL_INVL:
1015 		val = DMA_CCMD_GLOBAL_INVL;
1016 		break;
1017 	case DMA_CCMD_DOMAIN_INVL:
1018 		val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1019 		break;
1020 	case DMA_CCMD_DEVICE_INVL:
1021 		val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1022 			| DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1023 		break;
1024 	default:
1025 		BUG();
1026 	}
1027 	val |= DMA_CCMD_ICC;
1028 
1029 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1030 	dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1031 
1032 	/* Make sure hardware complete it */
1033 	IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1034 		dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1035 
1036 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1037 }
1038 
1039 /* return value determine if we need a write buffer flush */
__iommu_flush_iotlb(struct intel_iommu * iommu,u16 did,u64 addr,unsigned int size_order,u64 type)1040 static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1041 				u64 addr, unsigned int size_order, u64 type)
1042 {
1043 	int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1044 	u64 val = 0, val_iva = 0;
1045 	unsigned long flag;
1046 
1047 	switch (type) {
1048 	case DMA_TLB_GLOBAL_FLUSH:
1049 		/* global flush doesn't need set IVA_REG */
1050 		val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1051 		break;
1052 	case DMA_TLB_DSI_FLUSH:
1053 		val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1054 		break;
1055 	case DMA_TLB_PSI_FLUSH:
1056 		val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1057 		/* Note: always flush non-leaf currently */
1058 		val_iva = size_order | addr;
1059 		break;
1060 	default:
1061 		BUG();
1062 	}
1063 	/* Note: set drain read/write */
1064 #if 0
1065 	/*
1066 	 * This is probably to be super secure.. Looks like we can
1067 	 * ignore it without any impact.
1068 	 */
1069 	if (cap_read_drain(iommu->cap))
1070 		val |= DMA_TLB_READ_DRAIN;
1071 #endif
1072 	if (cap_write_drain(iommu->cap))
1073 		val |= DMA_TLB_WRITE_DRAIN;
1074 
1075 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1076 	/* Note: Only uses first TLB reg currently */
1077 	if (val_iva)
1078 		dmar_writeq(iommu->reg + tlb_offset, val_iva);
1079 	dmar_writeq(iommu->reg + tlb_offset + 8, val);
1080 
1081 	/* Make sure hardware complete it */
1082 	IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1083 		dmar_readq, (!(val & DMA_TLB_IVT)), val);
1084 
1085 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1086 
1087 	/* check IOTLB invalidation granularity */
1088 	if (DMA_TLB_IAIG(val) == 0)
1089 		printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
1090 	if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1091 		pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
1092 			(unsigned long long)DMA_TLB_IIRG(type),
1093 			(unsigned long long)DMA_TLB_IAIG(val));
1094 }
1095 
iommu_support_dev_iotlb(struct dmar_domain * domain,int segment,u8 bus,u8 devfn)1096 static struct device_domain_info *iommu_support_dev_iotlb(
1097 	struct dmar_domain *domain, int segment, u8 bus, u8 devfn)
1098 {
1099 	int found = 0;
1100 	unsigned long flags;
1101 	struct device_domain_info *info;
1102 	struct intel_iommu *iommu = device_to_iommu(segment, bus, devfn);
1103 
1104 	if (!ecap_dev_iotlb_support(iommu->ecap))
1105 		return NULL;
1106 
1107 	if (!iommu->qi)
1108 		return NULL;
1109 
1110 	spin_lock_irqsave(&device_domain_lock, flags);
1111 	list_for_each_entry(info, &domain->devices, link)
1112 		if (info->bus == bus && info->devfn == devfn) {
1113 			found = 1;
1114 			break;
1115 		}
1116 	spin_unlock_irqrestore(&device_domain_lock, flags);
1117 
1118 	if (!found || !info->dev)
1119 		return NULL;
1120 
1121 	if (!pci_find_ext_capability(info->dev, PCI_EXT_CAP_ID_ATS))
1122 		return NULL;
1123 
1124 	if (!dmar_find_matched_atsr_unit(info->dev))
1125 		return NULL;
1126 
1127 	info->iommu = iommu;
1128 
1129 	return info;
1130 }
1131 
iommu_enable_dev_iotlb(struct device_domain_info * info)1132 static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1133 {
1134 	if (!info)
1135 		return;
1136 
1137 	pci_enable_ats(info->dev, VTD_PAGE_SHIFT);
1138 }
1139 
iommu_disable_dev_iotlb(struct device_domain_info * info)1140 static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1141 {
1142 	if (!info->dev || !pci_ats_enabled(info->dev))
1143 		return;
1144 
1145 	pci_disable_ats(info->dev);
1146 }
1147 
iommu_flush_dev_iotlb(struct dmar_domain * domain,u64 addr,unsigned mask)1148 static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1149 				  u64 addr, unsigned mask)
1150 {
1151 	u16 sid, qdep;
1152 	unsigned long flags;
1153 	struct device_domain_info *info;
1154 
1155 	spin_lock_irqsave(&device_domain_lock, flags);
1156 	list_for_each_entry(info, &domain->devices, link) {
1157 		if (!info->dev || !pci_ats_enabled(info->dev))
1158 			continue;
1159 
1160 		sid = info->bus << 8 | info->devfn;
1161 		qdep = pci_ats_queue_depth(info->dev);
1162 		qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1163 	}
1164 	spin_unlock_irqrestore(&device_domain_lock, flags);
1165 }
1166 
iommu_flush_iotlb_psi(struct intel_iommu * iommu,u16 did,unsigned long pfn,unsigned int pages,int map)1167 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
1168 				  unsigned long pfn, unsigned int pages, int map)
1169 {
1170 	unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1171 	uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1172 
1173 	BUG_ON(pages == 0);
1174 
1175 	/*
1176 	 * Fallback to domain selective flush if no PSI support or the size is
1177 	 * too big.
1178 	 * PSI requires page size to be 2 ^ x, and the base address is naturally
1179 	 * aligned to the size
1180 	 */
1181 	if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1182 		iommu->flush.flush_iotlb(iommu, did, 0, 0,
1183 						DMA_TLB_DSI_FLUSH);
1184 	else
1185 		iommu->flush.flush_iotlb(iommu, did, addr, mask,
1186 						DMA_TLB_PSI_FLUSH);
1187 
1188 	/*
1189 	 * In caching mode, changes of pages from non-present to present require
1190 	 * flush. However, device IOTLB doesn't need to be flushed in this case.
1191 	 */
1192 	if (!cap_caching_mode(iommu->cap) || !map)
1193 		iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
1194 }
1195 
iommu_disable_protect_mem_regions(struct intel_iommu * iommu)1196 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1197 {
1198 	u32 pmen;
1199 	unsigned long flags;
1200 
1201 	raw_spin_lock_irqsave(&iommu->register_lock, flags);
1202 	pmen = readl(iommu->reg + DMAR_PMEN_REG);
1203 	pmen &= ~DMA_PMEN_EPM;
1204 	writel(pmen, iommu->reg + DMAR_PMEN_REG);
1205 
1206 	/* wait for the protected region status bit to clear */
1207 	IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1208 		readl, !(pmen & DMA_PMEN_PRS), pmen);
1209 
1210 	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1211 }
1212 
iommu_enable_translation(struct intel_iommu * iommu)1213 static int iommu_enable_translation(struct intel_iommu *iommu)
1214 {
1215 	u32 sts;
1216 	unsigned long flags;
1217 
1218 	raw_spin_lock_irqsave(&iommu->register_lock, flags);
1219 	iommu->gcmd |= DMA_GCMD_TE;
1220 	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1221 
1222 	/* Make sure hardware complete it */
1223 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1224 		      readl, (sts & DMA_GSTS_TES), sts);
1225 
1226 	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1227 	return 0;
1228 }
1229 
iommu_disable_translation(struct intel_iommu * iommu)1230 static int iommu_disable_translation(struct intel_iommu *iommu)
1231 {
1232 	u32 sts;
1233 	unsigned long flag;
1234 
1235 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1236 	iommu->gcmd &= ~DMA_GCMD_TE;
1237 	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1238 
1239 	/* Make sure hardware complete it */
1240 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1241 		      readl, (!(sts & DMA_GSTS_TES)), sts);
1242 
1243 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1244 	return 0;
1245 }
1246 
1247 
iommu_init_domains(struct intel_iommu * iommu)1248 static int iommu_init_domains(struct intel_iommu *iommu)
1249 {
1250 	unsigned long ndomains;
1251 	unsigned long nlongs;
1252 
1253 	ndomains = cap_ndoms(iommu->cap);
1254 	pr_debug("IOMMU %d: Number of Domains supported <%ld>\n", iommu->seq_id,
1255 			ndomains);
1256 	nlongs = BITS_TO_LONGS(ndomains);
1257 
1258 	spin_lock_init(&iommu->lock);
1259 
1260 	/* TBD: there might be 64K domains,
1261 	 * consider other allocation for future chip
1262 	 */
1263 	iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1264 	if (!iommu->domain_ids) {
1265 		printk(KERN_ERR "Allocating domain id array failed\n");
1266 		return -ENOMEM;
1267 	}
1268 	iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1269 			GFP_KERNEL);
1270 	if (!iommu->domains) {
1271 		printk(KERN_ERR "Allocating domain array failed\n");
1272 		return -ENOMEM;
1273 	}
1274 
1275 	/*
1276 	 * if Caching mode is set, then invalid translations are tagged
1277 	 * with domainid 0. Hence we need to pre-allocate it.
1278 	 */
1279 	if (cap_caching_mode(iommu->cap))
1280 		set_bit(0, iommu->domain_ids);
1281 	return 0;
1282 }
1283 
1284 
1285 static void domain_exit(struct dmar_domain *domain);
1286 static void vm_domain_exit(struct dmar_domain *domain);
1287 
free_dmar_iommu(struct intel_iommu * iommu)1288 void free_dmar_iommu(struct intel_iommu *iommu)
1289 {
1290 	struct dmar_domain *domain;
1291 	int i;
1292 	unsigned long flags;
1293 
1294 	if ((iommu->domains) && (iommu->domain_ids)) {
1295 		for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) {
1296 			domain = iommu->domains[i];
1297 			clear_bit(i, iommu->domain_ids);
1298 
1299 			spin_lock_irqsave(&domain->iommu_lock, flags);
1300 			if (--domain->iommu_count == 0) {
1301 				if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
1302 					vm_domain_exit(domain);
1303 				else
1304 					domain_exit(domain);
1305 			}
1306 			spin_unlock_irqrestore(&domain->iommu_lock, flags);
1307 		}
1308 	}
1309 
1310 	if (iommu->gcmd & DMA_GCMD_TE)
1311 		iommu_disable_translation(iommu);
1312 
1313 	if (iommu->irq) {
1314 		irq_set_handler_data(iommu->irq, NULL);
1315 		/* This will mask the irq */
1316 		free_irq(iommu->irq, iommu);
1317 		destroy_irq(iommu->irq);
1318 	}
1319 
1320 	kfree(iommu->domains);
1321 	kfree(iommu->domain_ids);
1322 
1323 	g_iommus[iommu->seq_id] = NULL;
1324 
1325 	/* if all iommus are freed, free g_iommus */
1326 	for (i = 0; i < g_num_of_iommus; i++) {
1327 		if (g_iommus[i])
1328 			break;
1329 	}
1330 
1331 	if (i == g_num_of_iommus)
1332 		kfree(g_iommus);
1333 
1334 	/* free context mapping */
1335 	free_context_table(iommu);
1336 }
1337 
alloc_domain(void)1338 static struct dmar_domain *alloc_domain(void)
1339 {
1340 	struct dmar_domain *domain;
1341 
1342 	domain = alloc_domain_mem();
1343 	if (!domain)
1344 		return NULL;
1345 
1346 	domain->nid = -1;
1347 	memset(domain->iommu_bmp, 0, sizeof(domain->iommu_bmp));
1348 	domain->flags = 0;
1349 
1350 	return domain;
1351 }
1352 
iommu_attach_domain(struct dmar_domain * domain,struct intel_iommu * iommu)1353 static int iommu_attach_domain(struct dmar_domain *domain,
1354 			       struct intel_iommu *iommu)
1355 {
1356 	int num;
1357 	unsigned long ndomains;
1358 	unsigned long flags;
1359 
1360 	ndomains = cap_ndoms(iommu->cap);
1361 
1362 	spin_lock_irqsave(&iommu->lock, flags);
1363 
1364 	num = find_first_zero_bit(iommu->domain_ids, ndomains);
1365 	if (num >= ndomains) {
1366 		spin_unlock_irqrestore(&iommu->lock, flags);
1367 		printk(KERN_ERR "IOMMU: no free domain ids\n");
1368 		return -ENOMEM;
1369 	}
1370 
1371 	domain->id = num;
1372 	set_bit(num, iommu->domain_ids);
1373 	set_bit(iommu->seq_id, domain->iommu_bmp);
1374 	iommu->domains[num] = domain;
1375 	spin_unlock_irqrestore(&iommu->lock, flags);
1376 
1377 	return 0;
1378 }
1379 
iommu_detach_domain(struct dmar_domain * domain,struct intel_iommu * iommu)1380 static void iommu_detach_domain(struct dmar_domain *domain,
1381 				struct intel_iommu *iommu)
1382 {
1383 	unsigned long flags;
1384 	int num, ndomains;
1385 	int found = 0;
1386 
1387 	spin_lock_irqsave(&iommu->lock, flags);
1388 	ndomains = cap_ndoms(iommu->cap);
1389 	for_each_set_bit(num, iommu->domain_ids, ndomains) {
1390 		if (iommu->domains[num] == domain) {
1391 			found = 1;
1392 			break;
1393 		}
1394 	}
1395 
1396 	if (found) {
1397 		clear_bit(num, iommu->domain_ids);
1398 		clear_bit(iommu->seq_id, domain->iommu_bmp);
1399 		iommu->domains[num] = NULL;
1400 	}
1401 	spin_unlock_irqrestore(&iommu->lock, flags);
1402 }
1403 
1404 static struct iova_domain reserved_iova_list;
1405 static struct lock_class_key reserved_rbtree_key;
1406 
dmar_init_reserved_ranges(void)1407 static int dmar_init_reserved_ranges(void)
1408 {
1409 	struct pci_dev *pdev = NULL;
1410 	struct iova *iova;
1411 	int i;
1412 
1413 	init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
1414 
1415 	lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1416 		&reserved_rbtree_key);
1417 
1418 	/* IOAPIC ranges shouldn't be accessed by DMA */
1419 	iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1420 		IOVA_PFN(IOAPIC_RANGE_END));
1421 	if (!iova) {
1422 		printk(KERN_ERR "Reserve IOAPIC range failed\n");
1423 		return -ENODEV;
1424 	}
1425 
1426 	/* Reserve all PCI MMIO to avoid peer-to-peer access */
1427 	for_each_pci_dev(pdev) {
1428 		struct resource *r;
1429 
1430 		for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1431 			r = &pdev->resource[i];
1432 			if (!r->flags || !(r->flags & IORESOURCE_MEM))
1433 				continue;
1434 			iova = reserve_iova(&reserved_iova_list,
1435 					    IOVA_PFN(r->start),
1436 					    IOVA_PFN(r->end));
1437 			if (!iova) {
1438 				printk(KERN_ERR "Reserve iova failed\n");
1439 				return -ENODEV;
1440 			}
1441 		}
1442 	}
1443 	return 0;
1444 }
1445 
domain_reserve_special_ranges(struct dmar_domain * domain)1446 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1447 {
1448 	copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1449 }
1450 
guestwidth_to_adjustwidth(int gaw)1451 static inline int guestwidth_to_adjustwidth(int gaw)
1452 {
1453 	int agaw;
1454 	int r = (gaw - 12) % 9;
1455 
1456 	if (r == 0)
1457 		agaw = gaw;
1458 	else
1459 		agaw = gaw + 9 - r;
1460 	if (agaw > 64)
1461 		agaw = 64;
1462 	return agaw;
1463 }
1464 
domain_init(struct dmar_domain * domain,int guest_width)1465 static int domain_init(struct dmar_domain *domain, int guest_width)
1466 {
1467 	struct intel_iommu *iommu;
1468 	int adjust_width, agaw;
1469 	unsigned long sagaw;
1470 
1471 	init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1472 	spin_lock_init(&domain->iommu_lock);
1473 
1474 	domain_reserve_special_ranges(domain);
1475 
1476 	/* calculate AGAW */
1477 	iommu = domain_get_iommu(domain);
1478 	if (guest_width > cap_mgaw(iommu->cap))
1479 		guest_width = cap_mgaw(iommu->cap);
1480 	domain->gaw = guest_width;
1481 	adjust_width = guestwidth_to_adjustwidth(guest_width);
1482 	agaw = width_to_agaw(adjust_width);
1483 	sagaw = cap_sagaw(iommu->cap);
1484 	if (!test_bit(agaw, &sagaw)) {
1485 		/* hardware doesn't support it, choose a bigger one */
1486 		pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1487 		agaw = find_next_bit(&sagaw, 5, agaw);
1488 		if (agaw >= 5)
1489 			return -ENODEV;
1490 	}
1491 	domain->agaw = agaw;
1492 	INIT_LIST_HEAD(&domain->devices);
1493 
1494 	if (ecap_coherent(iommu->ecap))
1495 		domain->iommu_coherency = 1;
1496 	else
1497 		domain->iommu_coherency = 0;
1498 
1499 	if (ecap_sc_support(iommu->ecap))
1500 		domain->iommu_snooping = 1;
1501 	else
1502 		domain->iommu_snooping = 0;
1503 
1504 	domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1505 	domain->iommu_count = 1;
1506 	domain->nid = iommu->node;
1507 
1508 	/* always allocate the top pgd */
1509 	domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
1510 	if (!domain->pgd)
1511 		return -ENOMEM;
1512 	__iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1513 	return 0;
1514 }
1515 
domain_exit(struct dmar_domain * domain)1516 static void domain_exit(struct dmar_domain *domain)
1517 {
1518 	struct dmar_drhd_unit *drhd;
1519 	struct intel_iommu *iommu;
1520 
1521 	/* Domain 0 is reserved, so dont process it */
1522 	if (!domain)
1523 		return;
1524 
1525 	/* Flush any lazy unmaps that may reference this domain */
1526 	if (!intel_iommu_strict)
1527 		flush_unmaps_timeout(0);
1528 
1529 	domain_remove_dev_info(domain);
1530 	/* destroy iovas */
1531 	put_iova_domain(&domain->iovad);
1532 
1533 	/* clear ptes */
1534 	dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1535 
1536 	/* free page tables */
1537 	dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1538 
1539 	for_each_active_iommu(iommu, drhd)
1540 		if (test_bit(iommu->seq_id, domain->iommu_bmp))
1541 			iommu_detach_domain(domain, iommu);
1542 
1543 	free_domain_mem(domain);
1544 }
1545 
domain_context_mapping_one(struct dmar_domain * domain,int segment,u8 bus,u8 devfn,int translation)1546 static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
1547 				 u8 bus, u8 devfn, int translation)
1548 {
1549 	struct context_entry *context;
1550 	unsigned long flags;
1551 	struct intel_iommu *iommu;
1552 	struct dma_pte *pgd;
1553 	unsigned long num;
1554 	unsigned long ndomains;
1555 	int id;
1556 	int agaw;
1557 	struct device_domain_info *info = NULL;
1558 
1559 	pr_debug("Set context mapping for %02x:%02x.%d\n",
1560 		bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1561 
1562 	BUG_ON(!domain->pgd);
1563 	BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1564 	       translation != CONTEXT_TT_MULTI_LEVEL);
1565 
1566 	iommu = device_to_iommu(segment, bus, devfn);
1567 	if (!iommu)
1568 		return -ENODEV;
1569 
1570 	context = device_to_context_entry(iommu, bus, devfn);
1571 	if (!context)
1572 		return -ENOMEM;
1573 	spin_lock_irqsave(&iommu->lock, flags);
1574 	if (context_present(context)) {
1575 		spin_unlock_irqrestore(&iommu->lock, flags);
1576 		return 0;
1577 	}
1578 
1579 	id = domain->id;
1580 	pgd = domain->pgd;
1581 
1582 	if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
1583 	    domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) {
1584 		int found = 0;
1585 
1586 		/* find an available domain id for this device in iommu */
1587 		ndomains = cap_ndoms(iommu->cap);
1588 		for_each_set_bit(num, iommu->domain_ids, ndomains) {
1589 			if (iommu->domains[num] == domain) {
1590 				id = num;
1591 				found = 1;
1592 				break;
1593 			}
1594 		}
1595 
1596 		if (found == 0) {
1597 			num = find_first_zero_bit(iommu->domain_ids, ndomains);
1598 			if (num >= ndomains) {
1599 				spin_unlock_irqrestore(&iommu->lock, flags);
1600 				printk(KERN_ERR "IOMMU: no free domain ids\n");
1601 				return -EFAULT;
1602 			}
1603 
1604 			set_bit(num, iommu->domain_ids);
1605 			iommu->domains[num] = domain;
1606 			id = num;
1607 		}
1608 
1609 		/* Skip top levels of page tables for
1610 		 * iommu which has less agaw than default.
1611 		 * Unnecessary for PT mode.
1612 		 */
1613 		if (translation != CONTEXT_TT_PASS_THROUGH) {
1614 			for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1615 				pgd = phys_to_virt(dma_pte_addr(pgd));
1616 				if (!dma_pte_present(pgd)) {
1617 					spin_unlock_irqrestore(&iommu->lock, flags);
1618 					return -ENOMEM;
1619 				}
1620 			}
1621 		}
1622 	}
1623 
1624 	context_set_domain_id(context, id);
1625 
1626 	if (translation != CONTEXT_TT_PASS_THROUGH) {
1627 		info = iommu_support_dev_iotlb(domain, segment, bus, devfn);
1628 		translation = info ? CONTEXT_TT_DEV_IOTLB :
1629 				     CONTEXT_TT_MULTI_LEVEL;
1630 	}
1631 	/*
1632 	 * In pass through mode, AW must be programmed to indicate the largest
1633 	 * AGAW value supported by hardware. And ASR is ignored by hardware.
1634 	 */
1635 	if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
1636 		context_set_address_width(context, iommu->msagaw);
1637 	else {
1638 		context_set_address_root(context, virt_to_phys(pgd));
1639 		context_set_address_width(context, iommu->agaw);
1640 	}
1641 
1642 	context_set_translation_type(context, translation);
1643 	context_set_fault_enable(context);
1644 	context_set_present(context);
1645 	domain_flush_cache(domain, context, sizeof(*context));
1646 
1647 	/*
1648 	 * It's a non-present to present mapping. If hardware doesn't cache
1649 	 * non-present entry we only need to flush the write-buffer. If the
1650 	 * _does_ cache non-present entries, then it does so in the special
1651 	 * domain #0, which we have to flush:
1652 	 */
1653 	if (cap_caching_mode(iommu->cap)) {
1654 		iommu->flush.flush_context(iommu, 0,
1655 					   (((u16)bus) << 8) | devfn,
1656 					   DMA_CCMD_MASK_NOBIT,
1657 					   DMA_CCMD_DEVICE_INVL);
1658 		iommu->flush.flush_iotlb(iommu, domain->id, 0, 0, DMA_TLB_DSI_FLUSH);
1659 	} else {
1660 		iommu_flush_write_buffer(iommu);
1661 	}
1662 	iommu_enable_dev_iotlb(info);
1663 	spin_unlock_irqrestore(&iommu->lock, flags);
1664 
1665 	spin_lock_irqsave(&domain->iommu_lock, flags);
1666 	if (!test_and_set_bit(iommu->seq_id, domain->iommu_bmp)) {
1667 		domain->iommu_count++;
1668 		if (domain->iommu_count == 1)
1669 			domain->nid = iommu->node;
1670 		domain_update_iommu_cap(domain);
1671 	}
1672 	spin_unlock_irqrestore(&domain->iommu_lock, flags);
1673 	return 0;
1674 }
1675 
1676 static int
domain_context_mapping(struct dmar_domain * domain,struct pci_dev * pdev,int translation)1677 domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev,
1678 			int translation)
1679 {
1680 	int ret;
1681 	struct pci_dev *tmp, *parent;
1682 
1683 	ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus),
1684 					 pdev->bus->number, pdev->devfn,
1685 					 translation);
1686 	if (ret)
1687 		return ret;
1688 
1689 	/* dependent device mapping */
1690 	tmp = pci_find_upstream_pcie_bridge(pdev);
1691 	if (!tmp)
1692 		return 0;
1693 	/* Secondary interface's bus number and devfn 0 */
1694 	parent = pdev->bus->self;
1695 	while (parent != tmp) {
1696 		ret = domain_context_mapping_one(domain,
1697 						 pci_domain_nr(parent->bus),
1698 						 parent->bus->number,
1699 						 parent->devfn, translation);
1700 		if (ret)
1701 			return ret;
1702 		parent = parent->bus->self;
1703 	}
1704 	if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
1705 		return domain_context_mapping_one(domain,
1706 					pci_domain_nr(tmp->subordinate),
1707 					tmp->subordinate->number, 0,
1708 					translation);
1709 	else /* this is a legacy PCI bridge */
1710 		return domain_context_mapping_one(domain,
1711 						  pci_domain_nr(tmp->bus),
1712 						  tmp->bus->number,
1713 						  tmp->devfn,
1714 						  translation);
1715 }
1716 
domain_context_mapped(struct pci_dev * pdev)1717 static int domain_context_mapped(struct pci_dev *pdev)
1718 {
1719 	int ret;
1720 	struct pci_dev *tmp, *parent;
1721 	struct intel_iommu *iommu;
1722 
1723 	iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
1724 				pdev->devfn);
1725 	if (!iommu)
1726 		return -ENODEV;
1727 
1728 	ret = device_context_mapped(iommu, pdev->bus->number, pdev->devfn);
1729 	if (!ret)
1730 		return ret;
1731 	/* dependent device mapping */
1732 	tmp = pci_find_upstream_pcie_bridge(pdev);
1733 	if (!tmp)
1734 		return ret;
1735 	/* Secondary interface's bus number and devfn 0 */
1736 	parent = pdev->bus->self;
1737 	while (parent != tmp) {
1738 		ret = device_context_mapped(iommu, parent->bus->number,
1739 					    parent->devfn);
1740 		if (!ret)
1741 			return ret;
1742 		parent = parent->bus->self;
1743 	}
1744 	if (pci_is_pcie(tmp))
1745 		return device_context_mapped(iommu, tmp->subordinate->number,
1746 					     0);
1747 	else
1748 		return device_context_mapped(iommu, tmp->bus->number,
1749 					     tmp->devfn);
1750 }
1751 
1752 /* Returns a number of VTD pages, but aligned to MM page size */
aligned_nrpages(unsigned long host_addr,size_t size)1753 static inline unsigned long aligned_nrpages(unsigned long host_addr,
1754 					    size_t size)
1755 {
1756 	host_addr &= ~PAGE_MASK;
1757 	return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1758 }
1759 
1760 /* Return largest possible superpage level for a given mapping */
hardware_largepage_caps(struct dmar_domain * domain,unsigned long iov_pfn,unsigned long phy_pfn,unsigned long pages)1761 static inline int hardware_largepage_caps(struct dmar_domain *domain,
1762 					  unsigned long iov_pfn,
1763 					  unsigned long phy_pfn,
1764 					  unsigned long pages)
1765 {
1766 	int support, level = 1;
1767 	unsigned long pfnmerge;
1768 
1769 	support = domain->iommu_superpage;
1770 
1771 	/* To use a large page, the virtual *and* physical addresses
1772 	   must be aligned to 2MiB/1GiB/etc. Lower bits set in either
1773 	   of them will mean we have to use smaller pages. So just
1774 	   merge them and check both at once. */
1775 	pfnmerge = iov_pfn | phy_pfn;
1776 
1777 	while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
1778 		pages >>= VTD_STRIDE_SHIFT;
1779 		if (!pages)
1780 			break;
1781 		pfnmerge >>= VTD_STRIDE_SHIFT;
1782 		level++;
1783 		support--;
1784 	}
1785 	return level;
1786 }
1787 
__domain_mapping(struct dmar_domain * domain,unsigned long iov_pfn,struct scatterlist * sg,unsigned long phys_pfn,unsigned long nr_pages,int prot)1788 static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1789 			    struct scatterlist *sg, unsigned long phys_pfn,
1790 			    unsigned long nr_pages, int prot)
1791 {
1792 	struct dma_pte *first_pte = NULL, *pte = NULL;
1793 	phys_addr_t uninitialized_var(pteval);
1794 	int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
1795 	unsigned long sg_res;
1796 	unsigned int largepage_lvl = 0;
1797 	unsigned long lvl_pages = 0;
1798 
1799 	BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
1800 
1801 	if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1802 		return -EINVAL;
1803 
1804 	prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
1805 
1806 	if (sg)
1807 		sg_res = 0;
1808 	else {
1809 		sg_res = nr_pages + 1;
1810 		pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1811 	}
1812 
1813 	while (nr_pages > 0) {
1814 		uint64_t tmp;
1815 
1816 		if (!sg_res) {
1817 			sg_res = aligned_nrpages(sg->offset, sg->length);
1818 			sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
1819 			sg->dma_length = sg->length;
1820 			pteval = page_to_phys(sg_page(sg)) | prot;
1821 			phys_pfn = pteval >> VTD_PAGE_SHIFT;
1822 		}
1823 
1824 		if (!pte) {
1825 			largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
1826 
1827 			first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, largepage_lvl);
1828 			if (!pte)
1829 				return -ENOMEM;
1830 			/* It is large page*/
1831 			if (largepage_lvl > 1) {
1832 				pteval |= DMA_PTE_LARGE_PAGE;
1833 				/* Ensure that old small page tables are removed to make room
1834 				   for superpage, if they exist. */
1835 				dma_pte_clear_range(domain, iov_pfn,
1836 						    iov_pfn + lvl_to_nr_pages(largepage_lvl) - 1);
1837 				dma_pte_free_pagetable(domain, iov_pfn,
1838 						       iov_pfn + lvl_to_nr_pages(largepage_lvl) - 1);
1839 			} else {
1840 				pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
1841 			}
1842 
1843 		}
1844 		/* We don't need lock here, nobody else
1845 		 * touches the iova range
1846 		 */
1847 		tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
1848 		if (tmp) {
1849 			static int dumps = 5;
1850 			printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
1851 			       iov_pfn, tmp, (unsigned long long)pteval);
1852 			if (dumps) {
1853 				dumps--;
1854 				debug_dma_dump_mappings(NULL);
1855 			}
1856 			WARN_ON(1);
1857 		}
1858 
1859 		lvl_pages = lvl_to_nr_pages(largepage_lvl);
1860 
1861 		BUG_ON(nr_pages < lvl_pages);
1862 		BUG_ON(sg_res < lvl_pages);
1863 
1864 		nr_pages -= lvl_pages;
1865 		iov_pfn += lvl_pages;
1866 		phys_pfn += lvl_pages;
1867 		pteval += lvl_pages * VTD_PAGE_SIZE;
1868 		sg_res -= lvl_pages;
1869 
1870 		/* If the next PTE would be the first in a new page, then we
1871 		   need to flush the cache on the entries we've just written.
1872 		   And then we'll need to recalculate 'pte', so clear it and
1873 		   let it get set again in the if (!pte) block above.
1874 
1875 		   If we're done (!nr_pages) we need to flush the cache too.
1876 
1877 		   Also if we've been setting superpages, we may need to
1878 		   recalculate 'pte' and switch back to smaller pages for the
1879 		   end of the mapping, if the trailing size is not enough to
1880 		   use another superpage (i.e. sg_res < lvl_pages). */
1881 		pte++;
1882 		if (!nr_pages || first_pte_in_page(pte) ||
1883 		    (largepage_lvl > 1 && sg_res < lvl_pages)) {
1884 			domain_flush_cache(domain, first_pte,
1885 					   (void *)pte - (void *)first_pte);
1886 			pte = NULL;
1887 		}
1888 
1889 		if (!sg_res && nr_pages)
1890 			sg = sg_next(sg);
1891 	}
1892 	return 0;
1893 }
1894 
domain_sg_mapping(struct dmar_domain * domain,unsigned long iov_pfn,struct scatterlist * sg,unsigned long nr_pages,int prot)1895 static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1896 				    struct scatterlist *sg, unsigned long nr_pages,
1897 				    int prot)
1898 {
1899 	return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
1900 }
1901 
domain_pfn_mapping(struct dmar_domain * domain,unsigned long iov_pfn,unsigned long phys_pfn,unsigned long nr_pages,int prot)1902 static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1903 				     unsigned long phys_pfn, unsigned long nr_pages,
1904 				     int prot)
1905 {
1906 	return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
1907 }
1908 
iommu_detach_dev(struct intel_iommu * iommu,u8 bus,u8 devfn)1909 static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
1910 {
1911 	if (!iommu)
1912 		return;
1913 
1914 	clear_context_table(iommu, bus, devfn);
1915 	iommu->flush.flush_context(iommu, 0, 0, 0,
1916 					   DMA_CCMD_GLOBAL_INVL);
1917 	iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
1918 }
1919 
domain_remove_dev_info(struct dmar_domain * domain)1920 static void domain_remove_dev_info(struct dmar_domain *domain)
1921 {
1922 	struct device_domain_info *info;
1923 	unsigned long flags;
1924 	struct intel_iommu *iommu;
1925 
1926 	spin_lock_irqsave(&device_domain_lock, flags);
1927 	while (!list_empty(&domain->devices)) {
1928 		info = list_entry(domain->devices.next,
1929 			struct device_domain_info, link);
1930 		list_del(&info->link);
1931 		list_del(&info->global);
1932 		if (info->dev)
1933 			info->dev->dev.archdata.iommu = NULL;
1934 		spin_unlock_irqrestore(&device_domain_lock, flags);
1935 
1936 		iommu_disable_dev_iotlb(info);
1937 		iommu = device_to_iommu(info->segment, info->bus, info->devfn);
1938 		iommu_detach_dev(iommu, info->bus, info->devfn);
1939 		free_devinfo_mem(info);
1940 
1941 		spin_lock_irqsave(&device_domain_lock, flags);
1942 	}
1943 	spin_unlock_irqrestore(&device_domain_lock, flags);
1944 }
1945 
1946 /*
1947  * find_domain
1948  * Note: we use struct pci_dev->dev.archdata.iommu stores the info
1949  */
1950 static struct dmar_domain *
find_domain(struct pci_dev * pdev)1951 find_domain(struct pci_dev *pdev)
1952 {
1953 	struct device_domain_info *info;
1954 
1955 	/* No lock here, assumes no domain exit in normal case */
1956 	info = pdev->dev.archdata.iommu;
1957 	if (info)
1958 		return info->domain;
1959 	return NULL;
1960 }
1961 
1962 /* domain is initialized */
get_domain_for_dev(struct pci_dev * pdev,int gaw)1963 static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1964 {
1965 	struct dmar_domain *domain, *found = NULL;
1966 	struct intel_iommu *iommu;
1967 	struct dmar_drhd_unit *drhd;
1968 	struct device_domain_info *info, *tmp;
1969 	struct pci_dev *dev_tmp;
1970 	unsigned long flags;
1971 	int bus = 0, devfn = 0;
1972 	int segment;
1973 	int ret;
1974 
1975 	domain = find_domain(pdev);
1976 	if (domain)
1977 		return domain;
1978 
1979 	segment = pci_domain_nr(pdev->bus);
1980 
1981 	dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1982 	if (dev_tmp) {
1983 		if (pci_is_pcie(dev_tmp)) {
1984 			bus = dev_tmp->subordinate->number;
1985 			devfn = 0;
1986 		} else {
1987 			bus = dev_tmp->bus->number;
1988 			devfn = dev_tmp->devfn;
1989 		}
1990 		spin_lock_irqsave(&device_domain_lock, flags);
1991 		list_for_each_entry(info, &device_domain_list, global) {
1992 			if (info->segment == segment &&
1993 			    info->bus == bus && info->devfn == devfn) {
1994 				found = info->domain;
1995 				break;
1996 			}
1997 		}
1998 		spin_unlock_irqrestore(&device_domain_lock, flags);
1999 		/* pcie-pci bridge already has a domain, uses it */
2000 		if (found) {
2001 			domain = found;
2002 			goto found_domain;
2003 		}
2004 	}
2005 
2006 	domain = alloc_domain();
2007 	if (!domain)
2008 		goto error;
2009 
2010 	/* Allocate new domain for the device */
2011 	drhd = dmar_find_matched_drhd_unit(pdev);
2012 	if (!drhd) {
2013 		printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
2014 			pci_name(pdev));
2015 		return NULL;
2016 	}
2017 	iommu = drhd->iommu;
2018 
2019 	ret = iommu_attach_domain(domain, iommu);
2020 	if (ret) {
2021 		free_domain_mem(domain);
2022 		goto error;
2023 	}
2024 
2025 	if (domain_init(domain, gaw)) {
2026 		domain_exit(domain);
2027 		goto error;
2028 	}
2029 
2030 	/* register pcie-to-pci device */
2031 	if (dev_tmp) {
2032 		info = alloc_devinfo_mem();
2033 		if (!info) {
2034 			domain_exit(domain);
2035 			goto error;
2036 		}
2037 		info->segment = segment;
2038 		info->bus = bus;
2039 		info->devfn = devfn;
2040 		info->dev = NULL;
2041 		info->domain = domain;
2042 		/* This domain is shared by devices under p2p bridge */
2043 		domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
2044 
2045 		/* pcie-to-pci bridge already has a domain, uses it */
2046 		found = NULL;
2047 		spin_lock_irqsave(&device_domain_lock, flags);
2048 		list_for_each_entry(tmp, &device_domain_list, global) {
2049 			if (tmp->segment == segment &&
2050 			    tmp->bus == bus && tmp->devfn == devfn) {
2051 				found = tmp->domain;
2052 				break;
2053 			}
2054 		}
2055 		if (found) {
2056 			spin_unlock_irqrestore(&device_domain_lock, flags);
2057 			free_devinfo_mem(info);
2058 			domain_exit(domain);
2059 			domain = found;
2060 		} else {
2061 			list_add(&info->link, &domain->devices);
2062 			list_add(&info->global, &device_domain_list);
2063 			spin_unlock_irqrestore(&device_domain_lock, flags);
2064 		}
2065 	}
2066 
2067 found_domain:
2068 	info = alloc_devinfo_mem();
2069 	if (!info)
2070 		goto error;
2071 	info->segment = segment;
2072 	info->bus = pdev->bus->number;
2073 	info->devfn = pdev->devfn;
2074 	info->dev = pdev;
2075 	info->domain = domain;
2076 	spin_lock_irqsave(&device_domain_lock, flags);
2077 	/* somebody is fast */
2078 	found = find_domain(pdev);
2079 	if (found != NULL) {
2080 		spin_unlock_irqrestore(&device_domain_lock, flags);
2081 		if (found != domain) {
2082 			domain_exit(domain);
2083 			domain = found;
2084 		}
2085 		free_devinfo_mem(info);
2086 		return domain;
2087 	}
2088 	list_add(&info->link, &domain->devices);
2089 	list_add(&info->global, &device_domain_list);
2090 	pdev->dev.archdata.iommu = info;
2091 	spin_unlock_irqrestore(&device_domain_lock, flags);
2092 	return domain;
2093 error:
2094 	/* recheck it here, maybe others set it */
2095 	return find_domain(pdev);
2096 }
2097 
2098 static int iommu_identity_mapping;
2099 #define IDENTMAP_ALL		1
2100 #define IDENTMAP_GFX		2
2101 #define IDENTMAP_AZALIA		4
2102 
iommu_domain_identity_map(struct dmar_domain * domain,unsigned long long start,unsigned long long end)2103 static int iommu_domain_identity_map(struct dmar_domain *domain,
2104 				     unsigned long long start,
2105 				     unsigned long long end)
2106 {
2107 	unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2108 	unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2109 
2110 	if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2111 			  dma_to_mm_pfn(last_vpfn))) {
2112 		printk(KERN_ERR "IOMMU: reserve iova failed\n");
2113 		return -ENOMEM;
2114 	}
2115 
2116 	pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
2117 		 start, end, domain->id);
2118 	/*
2119 	 * RMRR range might have overlap with physical memory range,
2120 	 * clear it first
2121 	 */
2122 	dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2123 
2124 	return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2125 				  last_vpfn - first_vpfn + 1,
2126 				  DMA_PTE_READ|DMA_PTE_WRITE);
2127 }
2128 
iommu_prepare_identity_map(struct pci_dev * pdev,unsigned long long start,unsigned long long end)2129 static int iommu_prepare_identity_map(struct pci_dev *pdev,
2130 				      unsigned long long start,
2131 				      unsigned long long end)
2132 {
2133 	struct dmar_domain *domain;
2134 	int ret;
2135 
2136 	domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2137 	if (!domain)
2138 		return -ENOMEM;
2139 
2140 	/* For _hardware_ passthrough, don't bother. But for software
2141 	   passthrough, we do it anyway -- it may indicate a memory
2142 	   range which is reserved in E820, so which didn't get set
2143 	   up to start with in si_domain */
2144 	if (domain == si_domain && hw_pass_through) {
2145 		printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2146 		       pci_name(pdev), start, end);
2147 		return 0;
2148 	}
2149 
2150 	printk(KERN_INFO
2151 	       "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2152 	       pci_name(pdev), start, end);
2153 
2154 	if (end < start) {
2155 		WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2156 			"BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2157 			dmi_get_system_info(DMI_BIOS_VENDOR),
2158 			dmi_get_system_info(DMI_BIOS_VERSION),
2159 		     dmi_get_system_info(DMI_PRODUCT_VERSION));
2160 		ret = -EIO;
2161 		goto error;
2162 	}
2163 
2164 	if (end >> agaw_to_width(domain->agaw)) {
2165 		WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2166 		     "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2167 		     agaw_to_width(domain->agaw),
2168 		     dmi_get_system_info(DMI_BIOS_VENDOR),
2169 		     dmi_get_system_info(DMI_BIOS_VERSION),
2170 		     dmi_get_system_info(DMI_PRODUCT_VERSION));
2171 		ret = -EIO;
2172 		goto error;
2173 	}
2174 
2175 	ret = iommu_domain_identity_map(domain, start, end);
2176 	if (ret)
2177 		goto error;
2178 
2179 	/* context entry init */
2180 	ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL);
2181 	if (ret)
2182 		goto error;
2183 
2184 	return 0;
2185 
2186  error:
2187 	domain_exit(domain);
2188 	return ret;
2189 }
2190 
iommu_prepare_rmrr_dev(struct dmar_rmrr_unit * rmrr,struct pci_dev * pdev)2191 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2192 	struct pci_dev *pdev)
2193 {
2194 	if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2195 		return 0;
2196 	return iommu_prepare_identity_map(pdev, rmrr->base_address,
2197 		rmrr->end_address);
2198 }
2199 
2200 #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
iommu_prepare_isa(void)2201 static inline void iommu_prepare_isa(void)
2202 {
2203 	struct pci_dev *pdev;
2204 	int ret;
2205 
2206 	pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2207 	if (!pdev)
2208 		return;
2209 
2210 	printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
2211 	ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024 - 1);
2212 
2213 	if (ret)
2214 		printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
2215 		       "floppy might not work\n");
2216 
2217 }
2218 #else
iommu_prepare_isa(void)2219 static inline void iommu_prepare_isa(void)
2220 {
2221 	return;
2222 }
2223 #endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
2224 
2225 static int md_domain_init(struct dmar_domain *domain, int guest_width);
2226 
si_domain_init(int hw)2227 static int __init si_domain_init(int hw)
2228 {
2229 	struct dmar_drhd_unit *drhd;
2230 	struct intel_iommu *iommu;
2231 	int nid, ret = 0;
2232 
2233 	si_domain = alloc_domain();
2234 	if (!si_domain)
2235 		return -EFAULT;
2236 
2237 	pr_debug("Identity mapping domain is domain %d\n", si_domain->id);
2238 
2239 	for_each_active_iommu(iommu, drhd) {
2240 		ret = iommu_attach_domain(si_domain, iommu);
2241 		if (ret) {
2242 			domain_exit(si_domain);
2243 			return -EFAULT;
2244 		}
2245 	}
2246 
2247 	if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2248 		domain_exit(si_domain);
2249 		return -EFAULT;
2250 	}
2251 
2252 	si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
2253 
2254 	if (hw)
2255 		return 0;
2256 
2257 	for_each_online_node(nid) {
2258 		unsigned long start_pfn, end_pfn;
2259 		int i;
2260 
2261 		for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2262 			ret = iommu_domain_identity_map(si_domain,
2263 					PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2264 			if (ret)
2265 				return ret;
2266 		}
2267 	}
2268 
2269 	return 0;
2270 }
2271 
2272 static void domain_remove_one_dev_info(struct dmar_domain *domain,
2273 					  struct pci_dev *pdev);
identity_mapping(struct pci_dev * pdev)2274 static int identity_mapping(struct pci_dev *pdev)
2275 {
2276 	struct device_domain_info *info;
2277 
2278 	if (likely(!iommu_identity_mapping))
2279 		return 0;
2280 
2281 	info = pdev->dev.archdata.iommu;
2282 	if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2283 		return (info->domain == si_domain);
2284 
2285 	return 0;
2286 }
2287 
domain_add_dev_info(struct dmar_domain * domain,struct pci_dev * pdev,int translation)2288 static int domain_add_dev_info(struct dmar_domain *domain,
2289 			       struct pci_dev *pdev,
2290 			       int translation)
2291 {
2292 	struct device_domain_info *info;
2293 	unsigned long flags;
2294 	int ret;
2295 
2296 	info = alloc_devinfo_mem();
2297 	if (!info)
2298 		return -ENOMEM;
2299 
2300 	info->segment = pci_domain_nr(pdev->bus);
2301 	info->bus = pdev->bus->number;
2302 	info->devfn = pdev->devfn;
2303 	info->dev = pdev;
2304 	info->domain = domain;
2305 
2306 	spin_lock_irqsave(&device_domain_lock, flags);
2307 	list_add(&info->link, &domain->devices);
2308 	list_add(&info->global, &device_domain_list);
2309 	pdev->dev.archdata.iommu = info;
2310 	spin_unlock_irqrestore(&device_domain_lock, flags);
2311 
2312 	ret = domain_context_mapping(domain, pdev, translation);
2313 	if (ret) {
2314 		spin_lock_irqsave(&device_domain_lock, flags);
2315 		list_del(&info->link);
2316 		list_del(&info->global);
2317 		pdev->dev.archdata.iommu = NULL;
2318 		spin_unlock_irqrestore(&device_domain_lock, flags);
2319 		free_devinfo_mem(info);
2320 		return ret;
2321 	}
2322 
2323 	return 0;
2324 }
2325 
device_has_rmrr(struct pci_dev * dev)2326 static bool device_has_rmrr(struct pci_dev *dev)
2327 {
2328 	struct dmar_rmrr_unit *rmrr;
2329 	int i;
2330 
2331 	for_each_rmrr_units(rmrr) {
2332 		for (i = 0; i < rmrr->devices_cnt; i++) {
2333 			/*
2334 			 * Return TRUE if this RMRR contains the device that
2335 			 * is passed in.
2336 			 */
2337 			if (rmrr->devices[i] == dev)
2338 				return true;
2339 		}
2340 	}
2341 	return false;
2342 }
2343 
iommu_should_identity_map(struct pci_dev * pdev,int startup)2344 static int iommu_should_identity_map(struct pci_dev *pdev, int startup)
2345 {
2346 
2347 	/*
2348 	 * We want to prevent any device associated with an RMRR from
2349 	 * getting placed into the SI Domain. This is done because
2350 	 * problems exist when devices are moved in and out of domains
2351 	 * and their respective RMRR info is lost. We exempt USB devices
2352 	 * from this process due to their usage of RMRRs that are known
2353 	 * to not be needed after BIOS hand-off to OS.
2354 	 */
2355 	if (device_has_rmrr(pdev) &&
2356 	    (pdev->class >> 8) != PCI_CLASS_SERIAL_USB)
2357 		return 0;
2358 
2359 	if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2360 		return 1;
2361 
2362 	if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2363 		return 1;
2364 
2365 	if (!(iommu_identity_mapping & IDENTMAP_ALL))
2366 		return 0;
2367 
2368 	/*
2369 	 * We want to start off with all devices in the 1:1 domain, and
2370 	 * take them out later if we find they can't access all of memory.
2371 	 *
2372 	 * However, we can't do this for PCI devices behind bridges,
2373 	 * because all PCI devices behind the same bridge will end up
2374 	 * with the same source-id on their transactions.
2375 	 *
2376 	 * Practically speaking, we can't change things around for these
2377 	 * devices at run-time, because we can't be sure there'll be no
2378 	 * DMA transactions in flight for any of their siblings.
2379 	 *
2380 	 * So PCI devices (unless they're on the root bus) as well as
2381 	 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2382 	 * the 1:1 domain, just in _case_ one of their siblings turns out
2383 	 * not to be able to map all of memory.
2384 	 */
2385 	if (!pci_is_pcie(pdev)) {
2386 		if (!pci_is_root_bus(pdev->bus))
2387 			return 0;
2388 		if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2389 			return 0;
2390 	} else if (pdev->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
2391 		return 0;
2392 
2393 	/*
2394 	 * At boot time, we don't yet know if devices will be 64-bit capable.
2395 	 * Assume that they will -- if they turn out not to be, then we can
2396 	 * take them out of the 1:1 domain later.
2397 	 */
2398 	if (!startup) {
2399 		/*
2400 		 * If the device's dma_mask is less than the system's memory
2401 		 * size then this is not a candidate for identity mapping.
2402 		 */
2403 		u64 dma_mask = pdev->dma_mask;
2404 
2405 		if (pdev->dev.coherent_dma_mask &&
2406 		    pdev->dev.coherent_dma_mask < dma_mask)
2407 			dma_mask = pdev->dev.coherent_dma_mask;
2408 
2409 		return dma_mask >= dma_get_required_mask(&pdev->dev);
2410 	}
2411 
2412 	return 1;
2413 }
2414 
iommu_prepare_static_identity_mapping(int hw)2415 static int __init iommu_prepare_static_identity_mapping(int hw)
2416 {
2417 	struct pci_dev *pdev = NULL;
2418 	int ret;
2419 
2420 	ret = si_domain_init(hw);
2421 	if (ret)
2422 		return -EFAULT;
2423 
2424 	for_each_pci_dev(pdev) {
2425 		if (iommu_should_identity_map(pdev, 1)) {
2426 			ret = domain_add_dev_info(si_domain, pdev,
2427 					     hw ? CONTEXT_TT_PASS_THROUGH :
2428 						  CONTEXT_TT_MULTI_LEVEL);
2429 			if (ret) {
2430 				/* device not associated with an iommu */
2431 				if (ret == -ENODEV)
2432 					continue;
2433 				return ret;
2434 			}
2435 			pr_info("IOMMU: %s identity mapping for device %s\n",
2436 				hw ? "hardware" : "software", pci_name(pdev));
2437 		}
2438 	}
2439 
2440 	return 0;
2441 }
2442 
init_dmars(void)2443 static int __init init_dmars(void)
2444 {
2445 	struct dmar_drhd_unit *drhd;
2446 	struct dmar_rmrr_unit *rmrr;
2447 	struct pci_dev *pdev;
2448 	struct intel_iommu *iommu;
2449 	int i, ret;
2450 
2451 	/*
2452 	 * for each drhd
2453 	 *    allocate root
2454 	 *    initialize and program root entry to not present
2455 	 * endfor
2456 	 */
2457 	for_each_drhd_unit(drhd) {
2458 		/*
2459 		 * lock not needed as this is only incremented in the single
2460 		 * threaded kernel __init code path all other access are read
2461 		 * only
2462 		 */
2463 		if (g_num_of_iommus < IOMMU_UNITS_SUPPORTED) {
2464 			g_num_of_iommus++;
2465 			continue;
2466 		}
2467 		printk_once(KERN_ERR "intel-iommu: exceeded %d IOMMUs\n",
2468 			  IOMMU_UNITS_SUPPORTED);
2469 	}
2470 
2471 	g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2472 			GFP_KERNEL);
2473 	if (!g_iommus) {
2474 		printk(KERN_ERR "Allocating global iommu array failed\n");
2475 		ret = -ENOMEM;
2476 		goto error;
2477 	}
2478 
2479 	deferred_flush = kzalloc(g_num_of_iommus *
2480 		sizeof(struct deferred_flush_tables), GFP_KERNEL);
2481 	if (!deferred_flush) {
2482 		ret = -ENOMEM;
2483 		goto error;
2484 	}
2485 
2486 	for_each_drhd_unit(drhd) {
2487 		if (drhd->ignored)
2488 			continue;
2489 
2490 		iommu = drhd->iommu;
2491 		g_iommus[iommu->seq_id] = iommu;
2492 
2493 		ret = iommu_init_domains(iommu);
2494 		if (ret)
2495 			goto error;
2496 
2497 		/*
2498 		 * TBD:
2499 		 * we could share the same root & context tables
2500 		 * among all IOMMU's. Need to Split it later.
2501 		 */
2502 		ret = iommu_alloc_root_entry(iommu);
2503 		if (ret) {
2504 			printk(KERN_ERR "IOMMU: allocate root entry failed\n");
2505 			goto error;
2506 		}
2507 		if (!ecap_pass_through(iommu->ecap))
2508 			hw_pass_through = 0;
2509 	}
2510 
2511 	/*
2512 	 * Start from the sane iommu hardware state.
2513 	 */
2514 	for_each_drhd_unit(drhd) {
2515 		if (drhd->ignored)
2516 			continue;
2517 
2518 		iommu = drhd->iommu;
2519 
2520 		/*
2521 		 * If the queued invalidation is already initialized by us
2522 		 * (for example, while enabling interrupt-remapping) then
2523 		 * we got the things already rolling from a sane state.
2524 		 */
2525 		if (iommu->qi)
2526 			continue;
2527 
2528 		/*
2529 		 * Clear any previous faults.
2530 		 */
2531 		dmar_fault(-1, iommu);
2532 		/*
2533 		 * Disable queued invalidation if supported and already enabled
2534 		 * before OS handover.
2535 		 */
2536 		dmar_disable_qi(iommu);
2537 	}
2538 
2539 	for_each_drhd_unit(drhd) {
2540 		if (drhd->ignored)
2541 			continue;
2542 
2543 		iommu = drhd->iommu;
2544 
2545 		if (dmar_enable_qi(iommu)) {
2546 			/*
2547 			 * Queued Invalidate not enabled, use Register Based
2548 			 * Invalidate
2549 			 */
2550 			iommu->flush.flush_context = __iommu_flush_context;
2551 			iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2552 			printk(KERN_INFO "IOMMU %d 0x%Lx: using Register based "
2553 			       "invalidation\n",
2554 				iommu->seq_id,
2555 			       (unsigned long long)drhd->reg_base_addr);
2556 		} else {
2557 			iommu->flush.flush_context = qi_flush_context;
2558 			iommu->flush.flush_iotlb = qi_flush_iotlb;
2559 			printk(KERN_INFO "IOMMU %d 0x%Lx: using Queued "
2560 			       "invalidation\n",
2561 				iommu->seq_id,
2562 			       (unsigned long long)drhd->reg_base_addr);
2563 		}
2564 	}
2565 
2566 	if (iommu_pass_through)
2567 		iommu_identity_mapping |= IDENTMAP_ALL;
2568 
2569 #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
2570 	iommu_identity_mapping |= IDENTMAP_GFX;
2571 #endif
2572 
2573 	check_tylersburg_isoch();
2574 
2575 	/*
2576 	 * If pass through is not set or not enabled, setup context entries for
2577 	 * identity mappings for rmrr, gfx, and isa and may fall back to static
2578 	 * identity mapping if iommu_identity_mapping is set.
2579 	 */
2580 	if (iommu_identity_mapping) {
2581 		ret = iommu_prepare_static_identity_mapping(hw_pass_through);
2582 		if (ret) {
2583 			printk(KERN_CRIT "Failed to setup IOMMU pass-through\n");
2584 			goto error;
2585 		}
2586 	}
2587 	/*
2588 	 * For each rmrr
2589 	 *   for each dev attached to rmrr
2590 	 *   do
2591 	 *     locate drhd for dev, alloc domain for dev
2592 	 *     allocate free domain
2593 	 *     allocate page table entries for rmrr
2594 	 *     if context not allocated for bus
2595 	 *           allocate and init context
2596 	 *           set present in root table for this bus
2597 	 *     init context with domain, translation etc
2598 	 *    endfor
2599 	 * endfor
2600 	 */
2601 	printk(KERN_INFO "IOMMU: Setting RMRR:\n");
2602 	for_each_rmrr_units(rmrr) {
2603 		for (i = 0; i < rmrr->devices_cnt; i++) {
2604 			pdev = rmrr->devices[i];
2605 			/*
2606 			 * some BIOS lists non-exist devices in DMAR
2607 			 * table.
2608 			 */
2609 			if (!pdev)
2610 				continue;
2611 			ret = iommu_prepare_rmrr_dev(rmrr, pdev);
2612 			if (ret)
2613 				printk(KERN_ERR
2614 				       "IOMMU: mapping reserved region failed\n");
2615 		}
2616 	}
2617 
2618 	iommu_prepare_isa();
2619 
2620 	/*
2621 	 * for each drhd
2622 	 *   enable fault log
2623 	 *   global invalidate context cache
2624 	 *   global invalidate iotlb
2625 	 *   enable translation
2626 	 */
2627 	for_each_drhd_unit(drhd) {
2628 		if (drhd->ignored) {
2629 			/*
2630 			 * we always have to disable PMRs or DMA may fail on
2631 			 * this device
2632 			 */
2633 			if (force_on)
2634 				iommu_disable_protect_mem_regions(drhd->iommu);
2635 			continue;
2636 		}
2637 		iommu = drhd->iommu;
2638 
2639 		iommu_flush_write_buffer(iommu);
2640 
2641 		ret = dmar_set_interrupt(iommu);
2642 		if (ret)
2643 			goto error;
2644 
2645 		iommu_set_root_entry(iommu);
2646 
2647 		iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
2648 		iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2649 
2650 		ret = iommu_enable_translation(iommu);
2651 		if (ret)
2652 			goto error;
2653 
2654 		iommu_disable_protect_mem_regions(iommu);
2655 	}
2656 
2657 	return 0;
2658 error:
2659 	for_each_drhd_unit(drhd) {
2660 		if (drhd->ignored)
2661 			continue;
2662 		iommu = drhd->iommu;
2663 		free_iommu(iommu);
2664 	}
2665 	kfree(g_iommus);
2666 	return ret;
2667 }
2668 
2669 /* This takes a number of _MM_ pages, not VTD pages */
intel_alloc_iova(struct device * dev,struct dmar_domain * domain,unsigned long nrpages,uint64_t dma_mask)2670 static struct iova *intel_alloc_iova(struct device *dev,
2671 				     struct dmar_domain *domain,
2672 				     unsigned long nrpages, uint64_t dma_mask)
2673 {
2674 	struct pci_dev *pdev = to_pci_dev(dev);
2675 	struct iova *iova = NULL;
2676 
2677 	/* Restrict dma_mask to the width that the iommu can handle */
2678 	dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2679 
2680 	if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
2681 		/*
2682 		 * First try to allocate an io virtual address in
2683 		 * DMA_BIT_MASK(32) and if that fails then try allocating
2684 		 * from higher range
2685 		 */
2686 		iova = alloc_iova(&domain->iovad, nrpages,
2687 				  IOVA_PFN(DMA_BIT_MASK(32)), 1);
2688 		if (iova)
2689 			return iova;
2690 	}
2691 	iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2692 	if (unlikely(!iova)) {
2693 		printk(KERN_ERR "Allocating %ld-page iova for %s failed",
2694 		       nrpages, pci_name(pdev));
2695 		return NULL;
2696 	}
2697 
2698 	return iova;
2699 }
2700 
__get_valid_domain_for_dev(struct pci_dev * pdev)2701 static struct dmar_domain *__get_valid_domain_for_dev(struct pci_dev *pdev)
2702 {
2703 	struct dmar_domain *domain;
2704 	int ret;
2705 
2706 	domain = get_domain_for_dev(pdev,
2707 			DEFAULT_DOMAIN_ADDRESS_WIDTH);
2708 	if (!domain) {
2709 		printk(KERN_ERR
2710 			"Allocating domain for %s failed", pci_name(pdev));
2711 		return NULL;
2712 	}
2713 
2714 	/* make sure context mapping is ok */
2715 	if (unlikely(!domain_context_mapped(pdev))) {
2716 		ret = domain_context_mapping(domain, pdev,
2717 					     CONTEXT_TT_MULTI_LEVEL);
2718 		if (ret) {
2719 			printk(KERN_ERR
2720 				"Domain context map for %s failed",
2721 				pci_name(pdev));
2722 			return NULL;
2723 		}
2724 	}
2725 
2726 	return domain;
2727 }
2728 
get_valid_domain_for_dev(struct pci_dev * dev)2729 static inline struct dmar_domain *get_valid_domain_for_dev(struct pci_dev *dev)
2730 {
2731 	struct device_domain_info *info;
2732 
2733 	/* No lock here, assumes no domain exit in normal case */
2734 	info = dev->dev.archdata.iommu;
2735 	if (likely(info))
2736 		return info->domain;
2737 
2738 	return __get_valid_domain_for_dev(dev);
2739 }
2740 
iommu_dummy(struct pci_dev * pdev)2741 static int iommu_dummy(struct pci_dev *pdev)
2742 {
2743 	return pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
2744 }
2745 
2746 /* Check if the pdev needs to go through non-identity map and unmap process.*/
iommu_no_mapping(struct device * dev)2747 static int iommu_no_mapping(struct device *dev)
2748 {
2749 	struct pci_dev *pdev;
2750 	int found;
2751 
2752 	if (unlikely(dev->bus != &pci_bus_type))
2753 		return 1;
2754 
2755 	pdev = to_pci_dev(dev);
2756 	if (iommu_dummy(pdev))
2757 		return 1;
2758 
2759 	if (!iommu_identity_mapping)
2760 		return 0;
2761 
2762 	found = identity_mapping(pdev);
2763 	if (found) {
2764 		if (iommu_should_identity_map(pdev, 0))
2765 			return 1;
2766 		else {
2767 			/*
2768 			 * 32 bit DMA is removed from si_domain and fall back
2769 			 * to non-identity mapping.
2770 			 */
2771 			domain_remove_one_dev_info(si_domain, pdev);
2772 			printk(KERN_INFO "32bit %s uses non-identity mapping\n",
2773 			       pci_name(pdev));
2774 			return 0;
2775 		}
2776 	} else {
2777 		/*
2778 		 * In case of a detached 64 bit DMA device from vm, the device
2779 		 * is put into si_domain for identity mapping.
2780 		 */
2781 		if (iommu_should_identity_map(pdev, 0)) {
2782 			int ret;
2783 			ret = domain_add_dev_info(si_domain, pdev,
2784 						  hw_pass_through ?
2785 						  CONTEXT_TT_PASS_THROUGH :
2786 						  CONTEXT_TT_MULTI_LEVEL);
2787 			if (!ret) {
2788 				printk(KERN_INFO "64bit %s uses identity mapping\n",
2789 				       pci_name(pdev));
2790 				return 1;
2791 			}
2792 		}
2793 	}
2794 
2795 	return 0;
2796 }
2797 
__intel_map_single(struct device * hwdev,phys_addr_t paddr,size_t size,int dir,u64 dma_mask)2798 static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2799 				     size_t size, int dir, u64 dma_mask)
2800 {
2801 	struct pci_dev *pdev = to_pci_dev(hwdev);
2802 	struct dmar_domain *domain;
2803 	phys_addr_t start_paddr;
2804 	struct iova *iova;
2805 	int prot = 0;
2806 	int ret;
2807 	struct intel_iommu *iommu;
2808 	unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
2809 
2810 	BUG_ON(dir == DMA_NONE);
2811 
2812 	if (iommu_no_mapping(hwdev))
2813 		return paddr;
2814 
2815 	domain = get_valid_domain_for_dev(pdev);
2816 	if (!domain)
2817 		return 0;
2818 
2819 	iommu = domain_get_iommu(domain);
2820 	size = aligned_nrpages(paddr, size);
2821 
2822 	iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), dma_mask);
2823 	if (!iova)
2824 		goto error;
2825 
2826 	/*
2827 	 * Check if DMAR supports zero-length reads on write only
2828 	 * mappings..
2829 	 */
2830 	if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2831 			!cap_zlr(iommu->cap))
2832 		prot |= DMA_PTE_READ;
2833 	if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2834 		prot |= DMA_PTE_WRITE;
2835 	/*
2836 	 * paddr - (paddr + size) might be partial page, we should map the whole
2837 	 * page.  Note: if two part of one page are separately mapped, we
2838 	 * might have two guest_addr mapping to the same host paddr, but this
2839 	 * is not a big problem
2840 	 */
2841 	ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
2842 				 mm_to_dma_pfn(paddr_pfn), size, prot);
2843 	if (ret)
2844 		goto error;
2845 
2846 	/* it's a non-present to present mapping. Only flush if caching mode */
2847 	if (cap_caching_mode(iommu->cap))
2848 		iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 1);
2849 	else
2850 		iommu_flush_write_buffer(iommu);
2851 
2852 	start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2853 	start_paddr += paddr & ~PAGE_MASK;
2854 	return start_paddr;
2855 
2856 error:
2857 	if (iova)
2858 		__free_iova(&domain->iovad, iova);
2859 	printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n",
2860 		pci_name(pdev), size, (unsigned long long)paddr, dir);
2861 	return 0;
2862 }
2863 
intel_map_page(struct device * dev,struct page * page,unsigned long offset,size_t size,enum dma_data_direction dir,struct dma_attrs * attrs)2864 static dma_addr_t intel_map_page(struct device *dev, struct page *page,
2865 				 unsigned long offset, size_t size,
2866 				 enum dma_data_direction dir,
2867 				 struct dma_attrs *attrs)
2868 {
2869 	return __intel_map_single(dev, page_to_phys(page) + offset, size,
2870 				  dir, to_pci_dev(dev)->dma_mask);
2871 }
2872 
flush_unmaps(void)2873 static void flush_unmaps(void)
2874 {
2875 	int i, j;
2876 
2877 	timer_on = 0;
2878 
2879 	/* just flush them all */
2880 	for (i = 0; i < g_num_of_iommus; i++) {
2881 		struct intel_iommu *iommu = g_iommus[i];
2882 		if (!iommu)
2883 			continue;
2884 
2885 		if (!deferred_flush[i].next)
2886 			continue;
2887 
2888 		/* In caching mode, global flushes turn emulation expensive */
2889 		if (!cap_caching_mode(iommu->cap))
2890 			iommu->flush.flush_iotlb(iommu, 0, 0, 0,
2891 					 DMA_TLB_GLOBAL_FLUSH);
2892 		for (j = 0; j < deferred_flush[i].next; j++) {
2893 			unsigned long mask;
2894 			struct iova *iova = deferred_flush[i].iova[j];
2895 			struct dmar_domain *domain = deferred_flush[i].domain[j];
2896 
2897 			/* On real hardware multiple invalidations are expensive */
2898 			if (cap_caching_mode(iommu->cap))
2899 				iommu_flush_iotlb_psi(iommu, domain->id,
2900 				iova->pfn_lo, iova->pfn_hi - iova->pfn_lo + 1, 0);
2901 			else {
2902 				mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1));
2903 				iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
2904 						(uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
2905 			}
2906 			__free_iova(&deferred_flush[i].domain[j]->iovad, iova);
2907 		}
2908 		deferred_flush[i].next = 0;
2909 	}
2910 
2911 	list_size = 0;
2912 }
2913 
flush_unmaps_timeout(unsigned long data)2914 static void flush_unmaps_timeout(unsigned long data)
2915 {
2916 	unsigned long flags;
2917 
2918 	spin_lock_irqsave(&async_umap_flush_lock, flags);
2919 	flush_unmaps();
2920 	spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2921 }
2922 
add_unmap(struct dmar_domain * dom,struct iova * iova)2923 static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2924 {
2925 	unsigned long flags;
2926 	int next, iommu_id;
2927 	struct intel_iommu *iommu;
2928 
2929 	spin_lock_irqsave(&async_umap_flush_lock, flags);
2930 	if (list_size == HIGH_WATER_MARK)
2931 		flush_unmaps();
2932 
2933 	iommu = domain_get_iommu(dom);
2934 	iommu_id = iommu->seq_id;
2935 
2936 	next = deferred_flush[iommu_id].next;
2937 	deferred_flush[iommu_id].domain[next] = dom;
2938 	deferred_flush[iommu_id].iova[next] = iova;
2939 	deferred_flush[iommu_id].next++;
2940 
2941 	if (!timer_on) {
2942 		mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
2943 		timer_on = 1;
2944 	}
2945 	list_size++;
2946 	spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2947 }
2948 
intel_unmap_page(struct device * dev,dma_addr_t dev_addr,size_t size,enum dma_data_direction dir,struct dma_attrs * attrs)2949 static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
2950 			     size_t size, enum dma_data_direction dir,
2951 			     struct dma_attrs *attrs)
2952 {
2953 	struct pci_dev *pdev = to_pci_dev(dev);
2954 	struct dmar_domain *domain;
2955 	unsigned long start_pfn, last_pfn;
2956 	struct iova *iova;
2957 	struct intel_iommu *iommu;
2958 
2959 	if (iommu_no_mapping(dev))
2960 		return;
2961 
2962 	domain = find_domain(pdev);
2963 	BUG_ON(!domain);
2964 
2965 	iommu = domain_get_iommu(domain);
2966 
2967 	iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
2968 	if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
2969 		      (unsigned long long)dev_addr))
2970 		return;
2971 
2972 	start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2973 	last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
2974 
2975 	pr_debug("Device %s unmapping: pfn %lx-%lx\n",
2976 		 pci_name(pdev), start_pfn, last_pfn);
2977 
2978 	/*  clear the whole page */
2979 	dma_pte_clear_range(domain, start_pfn, last_pfn);
2980 
2981 	/* free page tables */
2982 	dma_pte_free_pagetable(domain, start_pfn, last_pfn);
2983 
2984 	if (intel_iommu_strict) {
2985 		iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
2986 				      last_pfn - start_pfn + 1, 0);
2987 		/* free iova */
2988 		__free_iova(&domain->iovad, iova);
2989 	} else {
2990 		add_unmap(domain, iova);
2991 		/*
2992 		 * queue up the release of the unmap to save the 1/6th of the
2993 		 * cpu used up by the iotlb flush operation...
2994 		 */
2995 	}
2996 }
2997 
intel_alloc_coherent(struct device * hwdev,size_t size,dma_addr_t * dma_handle,gfp_t flags,struct dma_attrs * attrs)2998 static void *intel_alloc_coherent(struct device *hwdev, size_t size,
2999 				  dma_addr_t *dma_handle, gfp_t flags,
3000 				  struct dma_attrs *attrs)
3001 {
3002 	void *vaddr;
3003 	int order;
3004 
3005 	size = PAGE_ALIGN(size);
3006 	order = get_order(size);
3007 
3008 	if (!iommu_no_mapping(hwdev))
3009 		flags &= ~(GFP_DMA | GFP_DMA32);
3010 	else if (hwdev->coherent_dma_mask < dma_get_required_mask(hwdev)) {
3011 		if (hwdev->coherent_dma_mask < DMA_BIT_MASK(32))
3012 			flags |= GFP_DMA;
3013 		else
3014 			flags |= GFP_DMA32;
3015 	}
3016 
3017 	vaddr = (void *)__get_free_pages(flags, order);
3018 	if (!vaddr)
3019 		return NULL;
3020 	memset(vaddr, 0, size);
3021 
3022 	*dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
3023 					 DMA_BIDIRECTIONAL,
3024 					 hwdev->coherent_dma_mask);
3025 	if (*dma_handle)
3026 		return vaddr;
3027 	free_pages((unsigned long)vaddr, order);
3028 	return NULL;
3029 }
3030 
intel_free_coherent(struct device * hwdev,size_t size,void * vaddr,dma_addr_t dma_handle,struct dma_attrs * attrs)3031 static void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
3032 				dma_addr_t dma_handle, struct dma_attrs *attrs)
3033 {
3034 	int order;
3035 
3036 	size = PAGE_ALIGN(size);
3037 	order = get_order(size);
3038 
3039 	intel_unmap_page(hwdev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
3040 	free_pages((unsigned long)vaddr, order);
3041 }
3042 
intel_unmap_sg(struct device * hwdev,struct scatterlist * sglist,int nelems,enum dma_data_direction dir,struct dma_attrs * attrs)3043 static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
3044 			   int nelems, enum dma_data_direction dir,
3045 			   struct dma_attrs *attrs)
3046 {
3047 	struct pci_dev *pdev = to_pci_dev(hwdev);
3048 	struct dmar_domain *domain;
3049 	unsigned long start_pfn, last_pfn;
3050 	struct iova *iova;
3051 	struct intel_iommu *iommu;
3052 
3053 	if (iommu_no_mapping(hwdev))
3054 		return;
3055 
3056 	domain = find_domain(pdev);
3057 	BUG_ON(!domain);
3058 
3059 	iommu = domain_get_iommu(domain);
3060 
3061 	iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
3062 	if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n",
3063 		      (unsigned long long)sglist[0].dma_address))
3064 		return;
3065 
3066 	start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3067 	last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
3068 
3069 	/*  clear the whole page */
3070 	dma_pte_clear_range(domain, start_pfn, last_pfn);
3071 
3072 	/* free page tables */
3073 	dma_pte_free_pagetable(domain, start_pfn, last_pfn);
3074 
3075 	if (intel_iommu_strict) {
3076 		iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
3077 				      last_pfn - start_pfn + 1, 0);
3078 		/* free iova */
3079 		__free_iova(&domain->iovad, iova);
3080 	} else {
3081 		add_unmap(domain, iova);
3082 		/*
3083 		 * queue up the release of the unmap to save the 1/6th of the
3084 		 * cpu used up by the iotlb flush operation...
3085 		 */
3086 	}
3087 }
3088 
intel_nontranslate_map_sg(struct device * hddev,struct scatterlist * sglist,int nelems,int dir)3089 static int intel_nontranslate_map_sg(struct device *hddev,
3090 	struct scatterlist *sglist, int nelems, int dir)
3091 {
3092 	int i;
3093 	struct scatterlist *sg;
3094 
3095 	for_each_sg(sglist, sg, nelems, i) {
3096 		BUG_ON(!sg_page(sg));
3097 		sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
3098 		sg->dma_length = sg->length;
3099 	}
3100 	return nelems;
3101 }
3102 
intel_map_sg(struct device * hwdev,struct scatterlist * sglist,int nelems,enum dma_data_direction dir,struct dma_attrs * attrs)3103 static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
3104 			enum dma_data_direction dir, struct dma_attrs *attrs)
3105 {
3106 	int i;
3107 	struct pci_dev *pdev = to_pci_dev(hwdev);
3108 	struct dmar_domain *domain;
3109 	size_t size = 0;
3110 	int prot = 0;
3111 	struct iova *iova = NULL;
3112 	int ret;
3113 	struct scatterlist *sg;
3114 	unsigned long start_vpfn;
3115 	struct intel_iommu *iommu;
3116 
3117 	BUG_ON(dir == DMA_NONE);
3118 	if (iommu_no_mapping(hwdev))
3119 		return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
3120 
3121 	domain = get_valid_domain_for_dev(pdev);
3122 	if (!domain)
3123 		return 0;
3124 
3125 	iommu = domain_get_iommu(domain);
3126 
3127 	for_each_sg(sglist, sg, nelems, i)
3128 		size += aligned_nrpages(sg->offset, sg->length);
3129 
3130 	iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
3131 				pdev->dma_mask);
3132 	if (!iova) {
3133 		sglist->dma_length = 0;
3134 		return 0;
3135 	}
3136 
3137 	/*
3138 	 * Check if DMAR supports zero-length reads on write only
3139 	 * mappings..
3140 	 */
3141 	if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3142 			!cap_zlr(iommu->cap))
3143 		prot |= DMA_PTE_READ;
3144 	if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3145 		prot |= DMA_PTE_WRITE;
3146 
3147 	start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
3148 
3149 	ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
3150 	if (unlikely(ret)) {
3151 		/*  clear the page */
3152 		dma_pte_clear_range(domain, start_vpfn,
3153 				    start_vpfn + size - 1);
3154 		/* free page tables */
3155 		dma_pte_free_pagetable(domain, start_vpfn,
3156 				       start_vpfn + size - 1);
3157 		/* free iova */
3158 		__free_iova(&domain->iovad, iova);
3159 		return 0;
3160 	}
3161 
3162 	/* it's a non-present to present mapping. Only flush if caching mode */
3163 	if (cap_caching_mode(iommu->cap))
3164 		iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 1);
3165 	else
3166 		iommu_flush_write_buffer(iommu);
3167 
3168 	return nelems;
3169 }
3170 
intel_mapping_error(struct device * dev,dma_addr_t dma_addr)3171 static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3172 {
3173 	return !dma_addr;
3174 }
3175 
3176 struct dma_map_ops intel_dma_ops = {
3177 	.alloc = intel_alloc_coherent,
3178 	.free = intel_free_coherent,
3179 	.map_sg = intel_map_sg,
3180 	.unmap_sg = intel_unmap_sg,
3181 	.map_page = intel_map_page,
3182 	.unmap_page = intel_unmap_page,
3183 	.mapping_error = intel_mapping_error,
3184 };
3185 
iommu_domain_cache_init(void)3186 static inline int iommu_domain_cache_init(void)
3187 {
3188 	int ret = 0;
3189 
3190 	iommu_domain_cache = kmem_cache_create("iommu_domain",
3191 					 sizeof(struct dmar_domain),
3192 					 0,
3193 					 SLAB_HWCACHE_ALIGN,
3194 
3195 					 NULL);
3196 	if (!iommu_domain_cache) {
3197 		printk(KERN_ERR "Couldn't create iommu_domain cache\n");
3198 		ret = -ENOMEM;
3199 	}
3200 
3201 	return ret;
3202 }
3203 
iommu_devinfo_cache_init(void)3204 static inline int iommu_devinfo_cache_init(void)
3205 {
3206 	int ret = 0;
3207 
3208 	iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3209 					 sizeof(struct device_domain_info),
3210 					 0,
3211 					 SLAB_HWCACHE_ALIGN,
3212 					 NULL);
3213 	if (!iommu_devinfo_cache) {
3214 		printk(KERN_ERR "Couldn't create devinfo cache\n");
3215 		ret = -ENOMEM;
3216 	}
3217 
3218 	return ret;
3219 }
3220 
iommu_iova_cache_init(void)3221 static inline int iommu_iova_cache_init(void)
3222 {
3223 	int ret = 0;
3224 
3225 	iommu_iova_cache = kmem_cache_create("iommu_iova",
3226 					 sizeof(struct iova),
3227 					 0,
3228 					 SLAB_HWCACHE_ALIGN,
3229 					 NULL);
3230 	if (!iommu_iova_cache) {
3231 		printk(KERN_ERR "Couldn't create iova cache\n");
3232 		ret = -ENOMEM;
3233 	}
3234 
3235 	return ret;
3236 }
3237 
iommu_init_mempool(void)3238 static int __init iommu_init_mempool(void)
3239 {
3240 	int ret;
3241 	ret = iommu_iova_cache_init();
3242 	if (ret)
3243 		return ret;
3244 
3245 	ret = iommu_domain_cache_init();
3246 	if (ret)
3247 		goto domain_error;
3248 
3249 	ret = iommu_devinfo_cache_init();
3250 	if (!ret)
3251 		return ret;
3252 
3253 	kmem_cache_destroy(iommu_domain_cache);
3254 domain_error:
3255 	kmem_cache_destroy(iommu_iova_cache);
3256 
3257 	return -ENOMEM;
3258 }
3259 
iommu_exit_mempool(void)3260 static void __init iommu_exit_mempool(void)
3261 {
3262 	kmem_cache_destroy(iommu_devinfo_cache);
3263 	kmem_cache_destroy(iommu_domain_cache);
3264 	kmem_cache_destroy(iommu_iova_cache);
3265 
3266 }
3267 
quirk_ioat_snb_local_iommu(struct pci_dev * pdev)3268 static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3269 {
3270 	struct dmar_drhd_unit *drhd;
3271 	u32 vtbar;
3272 	int rc;
3273 
3274 	/* We know that this device on this chipset has its own IOMMU.
3275 	 * If we find it under a different IOMMU, then the BIOS is lying
3276 	 * to us. Hope that the IOMMU for this device is actually
3277 	 * disabled, and it needs no translation...
3278 	 */
3279 	rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3280 	if (rc) {
3281 		/* "can't" happen */
3282 		dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3283 		return;
3284 	}
3285 	vtbar &= 0xffff0000;
3286 
3287 	/* we know that the this iommu should be at offset 0xa000 from vtbar */
3288 	drhd = dmar_find_matched_drhd_unit(pdev);
3289 	if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3290 			    TAINT_FIRMWARE_WORKAROUND,
3291 			    "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3292 		pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3293 }
3294 DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3295 
init_no_remapping_devices(void)3296 static void __init init_no_remapping_devices(void)
3297 {
3298 	struct dmar_drhd_unit *drhd;
3299 
3300 	for_each_drhd_unit(drhd) {
3301 		if (!drhd->include_all) {
3302 			int i;
3303 			for (i = 0; i < drhd->devices_cnt; i++)
3304 				if (drhd->devices[i] != NULL)
3305 					break;
3306 			/* ignore DMAR unit if no pci devices exist */
3307 			if (i == drhd->devices_cnt)
3308 				drhd->ignored = 1;
3309 		}
3310 	}
3311 
3312 	for_each_drhd_unit(drhd) {
3313 		int i;
3314 		if (drhd->ignored || drhd->include_all)
3315 			continue;
3316 
3317 		for (i = 0; i < drhd->devices_cnt; i++)
3318 			if (drhd->devices[i] &&
3319 			    !IS_GFX_DEVICE(drhd->devices[i]))
3320 				break;
3321 
3322 		if (i < drhd->devices_cnt)
3323 			continue;
3324 
3325 		/* This IOMMU has *only* gfx devices. Either bypass it or
3326 		   set the gfx_mapped flag, as appropriate */
3327 		if (dmar_map_gfx) {
3328 			intel_iommu_gfx_mapped = 1;
3329 		} else {
3330 			drhd->ignored = 1;
3331 			for (i = 0; i < drhd->devices_cnt; i++) {
3332 				if (!drhd->devices[i])
3333 					continue;
3334 				drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3335 			}
3336 		}
3337 	}
3338 }
3339 
3340 #ifdef CONFIG_SUSPEND
init_iommu_hw(void)3341 static int init_iommu_hw(void)
3342 {
3343 	struct dmar_drhd_unit *drhd;
3344 	struct intel_iommu *iommu = NULL;
3345 
3346 	for_each_active_iommu(iommu, drhd)
3347 		if (iommu->qi)
3348 			dmar_reenable_qi(iommu);
3349 
3350 	for_each_iommu(iommu, drhd) {
3351 		if (drhd->ignored) {
3352 			/*
3353 			 * we always have to disable PMRs or DMA may fail on
3354 			 * this device
3355 			 */
3356 			if (force_on)
3357 				iommu_disable_protect_mem_regions(iommu);
3358 			continue;
3359 		}
3360 
3361 		iommu_flush_write_buffer(iommu);
3362 
3363 		iommu_set_root_entry(iommu);
3364 
3365 		iommu->flush.flush_context(iommu, 0, 0, 0,
3366 					   DMA_CCMD_GLOBAL_INVL);
3367 		iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3368 					 DMA_TLB_GLOBAL_FLUSH);
3369 		if (iommu_enable_translation(iommu))
3370 			return 1;
3371 		iommu_disable_protect_mem_regions(iommu);
3372 	}
3373 
3374 	return 0;
3375 }
3376 
iommu_flush_all(void)3377 static void iommu_flush_all(void)
3378 {
3379 	struct dmar_drhd_unit *drhd;
3380 	struct intel_iommu *iommu;
3381 
3382 	for_each_active_iommu(iommu, drhd) {
3383 		iommu->flush.flush_context(iommu, 0, 0, 0,
3384 					   DMA_CCMD_GLOBAL_INVL);
3385 		iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3386 					 DMA_TLB_GLOBAL_FLUSH);
3387 	}
3388 }
3389 
iommu_suspend(void)3390 static int iommu_suspend(void)
3391 {
3392 	struct dmar_drhd_unit *drhd;
3393 	struct intel_iommu *iommu = NULL;
3394 	unsigned long flag;
3395 
3396 	for_each_active_iommu(iommu, drhd) {
3397 		iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3398 						 GFP_ATOMIC);
3399 		if (!iommu->iommu_state)
3400 			goto nomem;
3401 	}
3402 
3403 	iommu_flush_all();
3404 
3405 	for_each_active_iommu(iommu, drhd) {
3406 		iommu_disable_translation(iommu);
3407 
3408 		raw_spin_lock_irqsave(&iommu->register_lock, flag);
3409 
3410 		iommu->iommu_state[SR_DMAR_FECTL_REG] =
3411 			readl(iommu->reg + DMAR_FECTL_REG);
3412 		iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3413 			readl(iommu->reg + DMAR_FEDATA_REG);
3414 		iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3415 			readl(iommu->reg + DMAR_FEADDR_REG);
3416 		iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3417 			readl(iommu->reg + DMAR_FEUADDR_REG);
3418 
3419 		raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3420 	}
3421 	return 0;
3422 
3423 nomem:
3424 	for_each_active_iommu(iommu, drhd)
3425 		kfree(iommu->iommu_state);
3426 
3427 	return -ENOMEM;
3428 }
3429 
iommu_resume(void)3430 static void iommu_resume(void)
3431 {
3432 	struct dmar_drhd_unit *drhd;
3433 	struct intel_iommu *iommu = NULL;
3434 	unsigned long flag;
3435 
3436 	if (init_iommu_hw()) {
3437 		if (force_on)
3438 			panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3439 		else
3440 			WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
3441 		return;
3442 	}
3443 
3444 	for_each_active_iommu(iommu, drhd) {
3445 
3446 		raw_spin_lock_irqsave(&iommu->register_lock, flag);
3447 
3448 		writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3449 			iommu->reg + DMAR_FECTL_REG);
3450 		writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3451 			iommu->reg + DMAR_FEDATA_REG);
3452 		writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3453 			iommu->reg + DMAR_FEADDR_REG);
3454 		writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3455 			iommu->reg + DMAR_FEUADDR_REG);
3456 
3457 		raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3458 	}
3459 
3460 	for_each_active_iommu(iommu, drhd)
3461 		kfree(iommu->iommu_state);
3462 }
3463 
3464 static struct syscore_ops iommu_syscore_ops = {
3465 	.resume		= iommu_resume,
3466 	.suspend	= iommu_suspend,
3467 };
3468 
init_iommu_pm_ops(void)3469 static void __init init_iommu_pm_ops(void)
3470 {
3471 	register_syscore_ops(&iommu_syscore_ops);
3472 }
3473 
3474 #else
init_iommu_pm_ops(void)3475 static inline void init_iommu_pm_ops(void) {}
3476 #endif	/* CONFIG_PM */
3477 
3478 LIST_HEAD(dmar_rmrr_units);
3479 
dmar_register_rmrr_unit(struct dmar_rmrr_unit * rmrr)3480 static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
3481 {
3482 	list_add(&rmrr->list, &dmar_rmrr_units);
3483 }
3484 
3485 
dmar_parse_one_rmrr(struct acpi_dmar_header * header)3486 int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header)
3487 {
3488 	struct acpi_dmar_reserved_memory *rmrr;
3489 	struct dmar_rmrr_unit *rmrru;
3490 
3491 	rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
3492 	if (!rmrru)
3493 		return -ENOMEM;
3494 
3495 	rmrru->hdr = header;
3496 	rmrr = (struct acpi_dmar_reserved_memory *)header;
3497 	rmrru->base_address = rmrr->base_address;
3498 	rmrru->end_address = rmrr->end_address;
3499 
3500 	dmar_register_rmrr_unit(rmrru);
3501 	return 0;
3502 }
3503 
3504 static int __init
rmrr_parse_dev(struct dmar_rmrr_unit * rmrru)3505 rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
3506 {
3507 	struct acpi_dmar_reserved_memory *rmrr;
3508 	int ret;
3509 
3510 	rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
3511 	ret = dmar_parse_dev_scope((void *)(rmrr + 1),
3512 		((void *)rmrr) + rmrr->header.length,
3513 		&rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
3514 
3515 	if (ret || (rmrru->devices_cnt == 0)) {
3516 		list_del(&rmrru->list);
3517 		kfree(rmrru);
3518 	}
3519 	return ret;
3520 }
3521 
3522 static LIST_HEAD(dmar_atsr_units);
3523 
dmar_parse_one_atsr(struct acpi_dmar_header * hdr)3524 int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
3525 {
3526 	struct acpi_dmar_atsr *atsr;
3527 	struct dmar_atsr_unit *atsru;
3528 
3529 	atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3530 	atsru = kzalloc(sizeof(*atsru), GFP_KERNEL);
3531 	if (!atsru)
3532 		return -ENOMEM;
3533 
3534 	atsru->hdr = hdr;
3535 	atsru->include_all = atsr->flags & 0x1;
3536 
3537 	list_add(&atsru->list, &dmar_atsr_units);
3538 
3539 	return 0;
3540 }
3541 
atsr_parse_dev(struct dmar_atsr_unit * atsru)3542 static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru)
3543 {
3544 	int rc;
3545 	struct acpi_dmar_atsr *atsr;
3546 
3547 	if (atsru->include_all)
3548 		return 0;
3549 
3550 	atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3551 	rc = dmar_parse_dev_scope((void *)(atsr + 1),
3552 				(void *)atsr + atsr->header.length,
3553 				&atsru->devices_cnt, &atsru->devices,
3554 				atsr->segment);
3555 	if (rc || !atsru->devices_cnt) {
3556 		list_del(&atsru->list);
3557 		kfree(atsru);
3558 	}
3559 
3560 	return rc;
3561 }
3562 
dmar_find_matched_atsr_unit(struct pci_dev * dev)3563 int dmar_find_matched_atsr_unit(struct pci_dev *dev)
3564 {
3565 	int i;
3566 	struct pci_bus *bus;
3567 	struct acpi_dmar_atsr *atsr;
3568 	struct dmar_atsr_unit *atsru;
3569 
3570 	dev = pci_physfn(dev);
3571 
3572 	list_for_each_entry(atsru, &dmar_atsr_units, list) {
3573 		atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3574 		if (atsr->segment == pci_domain_nr(dev->bus))
3575 			goto found;
3576 	}
3577 
3578 	return 0;
3579 
3580 found:
3581 	for (bus = dev->bus; bus; bus = bus->parent) {
3582 		struct pci_dev *bridge = bus->self;
3583 
3584 		if (!bridge || !pci_is_pcie(bridge) ||
3585 		    bridge->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
3586 			return 0;
3587 
3588 		if (bridge->pcie_type == PCI_EXP_TYPE_ROOT_PORT) {
3589 			for (i = 0; i < atsru->devices_cnt; i++)
3590 				if (atsru->devices[i] == bridge)
3591 					return 1;
3592 			break;
3593 		}
3594 	}
3595 
3596 	if (atsru->include_all)
3597 		return 1;
3598 
3599 	return 0;
3600 }
3601 
dmar_parse_rmrr_atsr_dev(void)3602 int __init dmar_parse_rmrr_atsr_dev(void)
3603 {
3604 	struct dmar_rmrr_unit *rmrr, *rmrr_n;
3605 	struct dmar_atsr_unit *atsr, *atsr_n;
3606 	int ret = 0;
3607 
3608 	list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) {
3609 		ret = rmrr_parse_dev(rmrr);
3610 		if (ret)
3611 			return ret;
3612 	}
3613 
3614 	list_for_each_entry_safe(atsr, atsr_n, &dmar_atsr_units, list) {
3615 		ret = atsr_parse_dev(atsr);
3616 		if (ret)
3617 			return ret;
3618 	}
3619 
3620 	return ret;
3621 }
3622 
3623 /*
3624  * Here we only respond to action of unbound device from driver.
3625  *
3626  * Added device is not attached to its DMAR domain here yet. That will happen
3627  * when mapping the device to iova.
3628  */
device_notifier(struct notifier_block * nb,unsigned long action,void * data)3629 static int device_notifier(struct notifier_block *nb,
3630 				  unsigned long action, void *data)
3631 {
3632 	struct device *dev = data;
3633 	struct pci_dev *pdev = to_pci_dev(dev);
3634 	struct dmar_domain *domain;
3635 
3636 	if (iommu_no_mapping(dev))
3637 		return 0;
3638 
3639 	domain = find_domain(pdev);
3640 	if (!domain)
3641 		return 0;
3642 
3643 	if (action == BUS_NOTIFY_UNBOUND_DRIVER && !iommu_pass_through) {
3644 		domain_remove_one_dev_info(domain, pdev);
3645 
3646 		if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3647 		    !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) &&
3648 		    list_empty(&domain->devices))
3649 			domain_exit(domain);
3650 	}
3651 
3652 	return 0;
3653 }
3654 
3655 static struct notifier_block device_nb = {
3656 	.notifier_call = device_notifier,
3657 };
3658 
intel_iommu_init(void)3659 int __init intel_iommu_init(void)
3660 {
3661 	int ret = 0;
3662 
3663 	/* VT-d is required for a TXT/tboot launch, so enforce that */
3664 	force_on = tboot_force_iommu();
3665 
3666 	if (dmar_table_init()) {
3667 		if (force_on)
3668 			panic("tboot: Failed to initialize DMAR table\n");
3669 		return 	-ENODEV;
3670 	}
3671 
3672 	if (dmar_dev_scope_init() < 0) {
3673 		if (force_on)
3674 			panic("tboot: Failed to initialize DMAR device scope\n");
3675 		return 	-ENODEV;
3676 	}
3677 
3678 	if (no_iommu || dmar_disabled)
3679 		return -ENODEV;
3680 
3681 	if (iommu_init_mempool()) {
3682 		if (force_on)
3683 			panic("tboot: Failed to initialize iommu memory\n");
3684 		return 	-ENODEV;
3685 	}
3686 
3687 	if (list_empty(&dmar_rmrr_units))
3688 		printk(KERN_INFO "DMAR: No RMRR found\n");
3689 
3690 	if (list_empty(&dmar_atsr_units))
3691 		printk(KERN_INFO "DMAR: No ATSR found\n");
3692 
3693 	if (dmar_init_reserved_ranges()) {
3694 		if (force_on)
3695 			panic("tboot: Failed to reserve iommu ranges\n");
3696 		return 	-ENODEV;
3697 	}
3698 
3699 	init_no_remapping_devices();
3700 
3701 	ret = init_dmars();
3702 	if (ret) {
3703 		if (force_on)
3704 			panic("tboot: Failed to initialize DMARs\n");
3705 		printk(KERN_ERR "IOMMU: dmar init failed\n");
3706 		put_iova_domain(&reserved_iova_list);
3707 		iommu_exit_mempool();
3708 		return ret;
3709 	}
3710 	printk(KERN_INFO
3711 	"PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
3712 
3713 	init_timer(&unmap_timer);
3714 #ifdef CONFIG_SWIOTLB
3715 	swiotlb = 0;
3716 #endif
3717 	dma_ops = &intel_dma_ops;
3718 
3719 	init_iommu_pm_ops();
3720 
3721 	bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
3722 
3723 	bus_register_notifier(&pci_bus_type, &device_nb);
3724 
3725 	intel_iommu_enabled = 1;
3726 
3727 	return 0;
3728 }
3729 
iommu_detach_dependent_devices(struct intel_iommu * iommu,struct pci_dev * pdev)3730 static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
3731 					   struct pci_dev *pdev)
3732 {
3733 	struct pci_dev *tmp, *parent;
3734 
3735 	if (!iommu || !pdev)
3736 		return;
3737 
3738 	/* dependent device detach */
3739 	tmp = pci_find_upstream_pcie_bridge(pdev);
3740 	/* Secondary interface's bus number and devfn 0 */
3741 	if (tmp) {
3742 		parent = pdev->bus->self;
3743 		while (parent != tmp) {
3744 			iommu_detach_dev(iommu, parent->bus->number,
3745 					 parent->devfn);
3746 			parent = parent->bus->self;
3747 		}
3748 		if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
3749 			iommu_detach_dev(iommu,
3750 				tmp->subordinate->number, 0);
3751 		else /* this is a legacy PCI bridge */
3752 			iommu_detach_dev(iommu, tmp->bus->number,
3753 					 tmp->devfn);
3754 	}
3755 }
3756 
domain_remove_one_dev_info(struct dmar_domain * domain,struct pci_dev * pdev)3757 static void domain_remove_one_dev_info(struct dmar_domain *domain,
3758 					  struct pci_dev *pdev)
3759 {
3760 	struct device_domain_info *info;
3761 	struct intel_iommu *iommu;
3762 	unsigned long flags;
3763 	int found = 0;
3764 	struct list_head *entry, *tmp;
3765 
3766 	iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3767 				pdev->devfn);
3768 	if (!iommu)
3769 		return;
3770 
3771 	spin_lock_irqsave(&device_domain_lock, flags);
3772 	list_for_each_safe(entry, tmp, &domain->devices) {
3773 		info = list_entry(entry, struct device_domain_info, link);
3774 		if (info->segment == pci_domain_nr(pdev->bus) &&
3775 		    info->bus == pdev->bus->number &&
3776 		    info->devfn == pdev->devfn) {
3777 			list_del(&info->link);
3778 			list_del(&info->global);
3779 			if (info->dev)
3780 				info->dev->dev.archdata.iommu = NULL;
3781 			spin_unlock_irqrestore(&device_domain_lock, flags);
3782 
3783 			iommu_disable_dev_iotlb(info);
3784 			iommu_detach_dev(iommu, info->bus, info->devfn);
3785 			iommu_detach_dependent_devices(iommu, pdev);
3786 			free_devinfo_mem(info);
3787 
3788 			spin_lock_irqsave(&device_domain_lock, flags);
3789 
3790 			if (found)
3791 				break;
3792 			else
3793 				continue;
3794 		}
3795 
3796 		/* if there is no other devices under the same iommu
3797 		 * owned by this domain, clear this iommu in iommu_bmp
3798 		 * update iommu count and coherency
3799 		 */
3800 		if (iommu == device_to_iommu(info->segment, info->bus,
3801 					    info->devfn))
3802 			found = 1;
3803 	}
3804 
3805 	spin_unlock_irqrestore(&device_domain_lock, flags);
3806 
3807 	if (found == 0) {
3808 		unsigned long tmp_flags;
3809 		spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
3810 		clear_bit(iommu->seq_id, domain->iommu_bmp);
3811 		domain->iommu_count--;
3812 		domain_update_iommu_cap(domain);
3813 		spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
3814 
3815 		if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3816 		    !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)) {
3817 			spin_lock_irqsave(&iommu->lock, tmp_flags);
3818 			clear_bit(domain->id, iommu->domain_ids);
3819 			iommu->domains[domain->id] = NULL;
3820 			spin_unlock_irqrestore(&iommu->lock, tmp_flags);
3821 		}
3822 	}
3823 }
3824 
vm_domain_remove_all_dev_info(struct dmar_domain * domain)3825 static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
3826 {
3827 	struct device_domain_info *info;
3828 	struct intel_iommu *iommu;
3829 	unsigned long flags1, flags2;
3830 
3831 	spin_lock_irqsave(&device_domain_lock, flags1);
3832 	while (!list_empty(&domain->devices)) {
3833 		info = list_entry(domain->devices.next,
3834 			struct device_domain_info, link);
3835 		list_del(&info->link);
3836 		list_del(&info->global);
3837 		if (info->dev)
3838 			info->dev->dev.archdata.iommu = NULL;
3839 
3840 		spin_unlock_irqrestore(&device_domain_lock, flags1);
3841 
3842 		iommu_disable_dev_iotlb(info);
3843 		iommu = device_to_iommu(info->segment, info->bus, info->devfn);
3844 		iommu_detach_dev(iommu, info->bus, info->devfn);
3845 		iommu_detach_dependent_devices(iommu, info->dev);
3846 
3847 		/* clear this iommu in iommu_bmp, update iommu count
3848 		 * and capabilities
3849 		 */
3850 		spin_lock_irqsave(&domain->iommu_lock, flags2);
3851 		if (test_and_clear_bit(iommu->seq_id,
3852 				       domain->iommu_bmp)) {
3853 			domain->iommu_count--;
3854 			domain_update_iommu_cap(domain);
3855 		}
3856 		spin_unlock_irqrestore(&domain->iommu_lock, flags2);
3857 
3858 		free_devinfo_mem(info);
3859 		spin_lock_irqsave(&device_domain_lock, flags1);
3860 	}
3861 	spin_unlock_irqrestore(&device_domain_lock, flags1);
3862 }
3863 
3864 /* domain id for virtual machine, it won't be set in context */
3865 static unsigned long vm_domid;
3866 
iommu_alloc_vm_domain(void)3867 static struct dmar_domain *iommu_alloc_vm_domain(void)
3868 {
3869 	struct dmar_domain *domain;
3870 
3871 	domain = alloc_domain_mem();
3872 	if (!domain)
3873 		return NULL;
3874 
3875 	domain->id = vm_domid++;
3876 	domain->nid = -1;
3877 	memset(domain->iommu_bmp, 0, sizeof(domain->iommu_bmp));
3878 	domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
3879 
3880 	return domain;
3881 }
3882 
md_domain_init(struct dmar_domain * domain,int guest_width)3883 static int md_domain_init(struct dmar_domain *domain, int guest_width)
3884 {
3885 	int adjust_width;
3886 
3887 	init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
3888 	spin_lock_init(&domain->iommu_lock);
3889 
3890 	domain_reserve_special_ranges(domain);
3891 
3892 	/* calculate AGAW */
3893 	domain->gaw = guest_width;
3894 	adjust_width = guestwidth_to_adjustwidth(guest_width);
3895 	domain->agaw = width_to_agaw(adjust_width);
3896 
3897 	INIT_LIST_HEAD(&domain->devices);
3898 
3899 	domain->iommu_count = 0;
3900 	domain->iommu_coherency = 0;
3901 	domain->iommu_snooping = 0;
3902 	domain->iommu_superpage = 0;
3903 	domain->max_addr = 0;
3904 	domain->nid = -1;
3905 
3906 	/* always allocate the top pgd */
3907 	domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
3908 	if (!domain->pgd)
3909 		return -ENOMEM;
3910 	domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
3911 	return 0;
3912 }
3913 
iommu_free_vm_domain(struct dmar_domain * domain)3914 static void iommu_free_vm_domain(struct dmar_domain *domain)
3915 {
3916 	unsigned long flags;
3917 	struct dmar_drhd_unit *drhd;
3918 	struct intel_iommu *iommu;
3919 	unsigned long i;
3920 	unsigned long ndomains;
3921 
3922 	for_each_drhd_unit(drhd) {
3923 		if (drhd->ignored)
3924 			continue;
3925 		iommu = drhd->iommu;
3926 
3927 		ndomains = cap_ndoms(iommu->cap);
3928 		for_each_set_bit(i, iommu->domain_ids, ndomains) {
3929 			if (iommu->domains[i] == domain) {
3930 				spin_lock_irqsave(&iommu->lock, flags);
3931 				clear_bit(i, iommu->domain_ids);
3932 				iommu->domains[i] = NULL;
3933 				spin_unlock_irqrestore(&iommu->lock, flags);
3934 				break;
3935 			}
3936 		}
3937 	}
3938 }
3939 
vm_domain_exit(struct dmar_domain * domain)3940 static void vm_domain_exit(struct dmar_domain *domain)
3941 {
3942 	/* Domain 0 is reserved, so dont process it */
3943 	if (!domain)
3944 		return;
3945 
3946 	vm_domain_remove_all_dev_info(domain);
3947 	/* destroy iovas */
3948 	put_iova_domain(&domain->iovad);
3949 
3950 	/* clear ptes */
3951 	dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
3952 
3953 	/* free page tables */
3954 	dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
3955 
3956 	iommu_free_vm_domain(domain);
3957 	free_domain_mem(domain);
3958 }
3959 
intel_iommu_domain_init(struct iommu_domain * domain)3960 static int intel_iommu_domain_init(struct iommu_domain *domain)
3961 {
3962 	struct dmar_domain *dmar_domain;
3963 
3964 	dmar_domain = iommu_alloc_vm_domain();
3965 	if (!dmar_domain) {
3966 		printk(KERN_ERR
3967 			"intel_iommu_domain_init: dmar_domain == NULL\n");
3968 		return -ENOMEM;
3969 	}
3970 	if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
3971 		printk(KERN_ERR
3972 			"intel_iommu_domain_init() failed\n");
3973 		vm_domain_exit(dmar_domain);
3974 		return -ENOMEM;
3975 	}
3976 	domain_update_iommu_cap(dmar_domain);
3977 	domain->priv = dmar_domain;
3978 
3979 	return 0;
3980 }
3981 
intel_iommu_domain_destroy(struct iommu_domain * domain)3982 static void intel_iommu_domain_destroy(struct iommu_domain *domain)
3983 {
3984 	struct dmar_domain *dmar_domain = domain->priv;
3985 
3986 	domain->priv = NULL;
3987 	vm_domain_exit(dmar_domain);
3988 }
3989 
intel_iommu_attach_device(struct iommu_domain * domain,struct device * dev)3990 static int intel_iommu_attach_device(struct iommu_domain *domain,
3991 				     struct device *dev)
3992 {
3993 	struct dmar_domain *dmar_domain = domain->priv;
3994 	struct pci_dev *pdev = to_pci_dev(dev);
3995 	struct intel_iommu *iommu;
3996 	int addr_width;
3997 
3998 	/* normally pdev is not mapped */
3999 	if (unlikely(domain_context_mapped(pdev))) {
4000 		struct dmar_domain *old_domain;
4001 
4002 		old_domain = find_domain(pdev);
4003 		if (old_domain) {
4004 			if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
4005 			    dmar_domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)
4006 				domain_remove_one_dev_info(old_domain, pdev);
4007 			else
4008 				domain_remove_dev_info(old_domain);
4009 		}
4010 	}
4011 
4012 	iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
4013 				pdev->devfn);
4014 	if (!iommu)
4015 		return -ENODEV;
4016 
4017 	/* check if this iommu agaw is sufficient for max mapped address */
4018 	addr_width = agaw_to_width(iommu->agaw);
4019 	if (addr_width > cap_mgaw(iommu->cap))
4020 		addr_width = cap_mgaw(iommu->cap);
4021 
4022 	if (dmar_domain->max_addr > (1LL << addr_width)) {
4023 		printk(KERN_ERR "%s: iommu width (%d) is not "
4024 		       "sufficient for the mapped address (%llx)\n",
4025 		       __func__, addr_width, dmar_domain->max_addr);
4026 		return -EFAULT;
4027 	}
4028 	dmar_domain->gaw = addr_width;
4029 
4030 	/*
4031 	 * Knock out extra levels of page tables if necessary
4032 	 */
4033 	while (iommu->agaw < dmar_domain->agaw) {
4034 		struct dma_pte *pte;
4035 
4036 		pte = dmar_domain->pgd;
4037 		if (dma_pte_present(pte)) {
4038 			dmar_domain->pgd = (struct dma_pte *)
4039 				phys_to_virt(dma_pte_addr(pte));
4040 			free_pgtable_page(pte);
4041 		}
4042 		dmar_domain->agaw--;
4043 	}
4044 
4045 	return domain_add_dev_info(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL);
4046 }
4047 
intel_iommu_detach_device(struct iommu_domain * domain,struct device * dev)4048 static void intel_iommu_detach_device(struct iommu_domain *domain,
4049 				      struct device *dev)
4050 {
4051 	struct dmar_domain *dmar_domain = domain->priv;
4052 	struct pci_dev *pdev = to_pci_dev(dev);
4053 
4054 	domain_remove_one_dev_info(dmar_domain, pdev);
4055 }
4056 
intel_iommu_map(struct iommu_domain * domain,unsigned long iova,phys_addr_t hpa,size_t size,int iommu_prot)4057 static int intel_iommu_map(struct iommu_domain *domain,
4058 			   unsigned long iova, phys_addr_t hpa,
4059 			   size_t size, int iommu_prot)
4060 {
4061 	struct dmar_domain *dmar_domain = domain->priv;
4062 	u64 max_addr;
4063 	int prot = 0;
4064 	int ret;
4065 
4066 	if (iommu_prot & IOMMU_READ)
4067 		prot |= DMA_PTE_READ;
4068 	if (iommu_prot & IOMMU_WRITE)
4069 		prot |= DMA_PTE_WRITE;
4070 	if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
4071 		prot |= DMA_PTE_SNP;
4072 
4073 	max_addr = iova + size;
4074 	if (dmar_domain->max_addr < max_addr) {
4075 		u64 end;
4076 
4077 		/* check if minimum agaw is sufficient for mapped address */
4078 		end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
4079 		if (end < max_addr) {
4080 			printk(KERN_ERR "%s: iommu width (%d) is not "
4081 			       "sufficient for the mapped address (%llx)\n",
4082 			       __func__, dmar_domain->gaw, max_addr);
4083 			return -EFAULT;
4084 		}
4085 		dmar_domain->max_addr = max_addr;
4086 	}
4087 	/* Round up size to next multiple of PAGE_SIZE, if it and
4088 	   the low bits of hpa would take us onto the next page */
4089 	size = aligned_nrpages(hpa, size);
4090 	ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
4091 				 hpa >> VTD_PAGE_SHIFT, size, prot);
4092 	return ret;
4093 }
4094 
intel_iommu_unmap(struct iommu_domain * domain,unsigned long iova,size_t size)4095 static size_t intel_iommu_unmap(struct iommu_domain *domain,
4096 			     unsigned long iova, size_t size)
4097 {
4098 	struct dmar_domain *dmar_domain = domain->priv;
4099 	int order;
4100 
4101 	order = dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT,
4102 			    (iova + size - 1) >> VTD_PAGE_SHIFT);
4103 
4104 	if (dmar_domain->max_addr == iova + size)
4105 		dmar_domain->max_addr = iova;
4106 
4107 	return PAGE_SIZE << order;
4108 }
4109 
intel_iommu_iova_to_phys(struct iommu_domain * domain,unsigned long iova)4110 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
4111 					    unsigned long iova)
4112 {
4113 	struct dmar_domain *dmar_domain = domain->priv;
4114 	struct dma_pte *pte;
4115 	u64 phys = 0;
4116 
4117 	pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, 0);
4118 	if (pte)
4119 		phys = dma_pte_addr(pte);
4120 
4121 	return phys;
4122 }
4123 
intel_iommu_domain_has_cap(struct iommu_domain * domain,unsigned long cap)4124 static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
4125 				      unsigned long cap)
4126 {
4127 	struct dmar_domain *dmar_domain = domain->priv;
4128 
4129 	if (cap == IOMMU_CAP_CACHE_COHERENCY)
4130 		return dmar_domain->iommu_snooping;
4131 	if (cap == IOMMU_CAP_INTR_REMAP)
4132 		return intr_remapping_enabled;
4133 
4134 	return 0;
4135 }
4136 
4137 /*
4138  * Group numbers are arbitrary.  Device with the same group number
4139  * indicate the iommu cannot differentiate between them.  To avoid
4140  * tracking used groups we just use the seg|bus|devfn of the lowest
4141  * level we're able to differentiate devices
4142  */
intel_iommu_device_group(struct device * dev,unsigned int * groupid)4143 static int intel_iommu_device_group(struct device *dev, unsigned int *groupid)
4144 {
4145 	struct pci_dev *pdev = to_pci_dev(dev);
4146 	struct pci_dev *bridge;
4147 	union {
4148 		struct {
4149 			u8 devfn;
4150 			u8 bus;
4151 			u16 segment;
4152 		} pci;
4153 		u32 group;
4154 	} id;
4155 
4156 	if (iommu_no_mapping(dev))
4157 		return -ENODEV;
4158 
4159 	id.pci.segment = pci_domain_nr(pdev->bus);
4160 	id.pci.bus = pdev->bus->number;
4161 	id.pci.devfn = pdev->devfn;
4162 
4163 	if (!device_to_iommu(id.pci.segment, id.pci.bus, id.pci.devfn))
4164 		return -ENODEV;
4165 
4166 	bridge = pci_find_upstream_pcie_bridge(pdev);
4167 	if (bridge) {
4168 		if (pci_is_pcie(bridge)) {
4169 			id.pci.bus = bridge->subordinate->number;
4170 			id.pci.devfn = 0;
4171 		} else {
4172 			id.pci.bus = bridge->bus->number;
4173 			id.pci.devfn = bridge->devfn;
4174 		}
4175 	}
4176 
4177 	if (!pdev->is_virtfn && iommu_group_mf)
4178 		id.pci.devfn = PCI_DEVFN(PCI_SLOT(id.pci.devfn), 0);
4179 
4180 	*groupid = id.group;
4181 
4182 	return 0;
4183 }
4184 
4185 static struct iommu_ops intel_iommu_ops = {
4186 	.domain_init	= intel_iommu_domain_init,
4187 	.domain_destroy = intel_iommu_domain_destroy,
4188 	.attach_dev	= intel_iommu_attach_device,
4189 	.detach_dev	= intel_iommu_detach_device,
4190 	.map		= intel_iommu_map,
4191 	.unmap		= intel_iommu_unmap,
4192 	.iova_to_phys	= intel_iommu_iova_to_phys,
4193 	.domain_has_cap = intel_iommu_domain_has_cap,
4194 	.device_group	= intel_iommu_device_group,
4195 	.pgsize_bitmap	= INTEL_IOMMU_PGSIZES,
4196 };
4197 
quirk_iommu_g4x_gfx(struct pci_dev * dev)4198 static void __devinit quirk_iommu_g4x_gfx(struct pci_dev *dev)
4199 {
4200 	/* G4x/GM45 integrated gfx dmar support is totally busted. */
4201 	printk(KERN_INFO "DMAR: Disabling IOMMU for graphics on this chipset\n");
4202 	dmar_map_gfx = 0;
4203 }
4204 
4205 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
4206 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
4207 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
4208 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
4209 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
4210 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
4211 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
4212 
quirk_iommu_rwbf(struct pci_dev * dev)4213 static void __devinit quirk_iommu_rwbf(struct pci_dev *dev)
4214 {
4215 	/*
4216 	 * Mobile 4 Series Chipset neglects to set RWBF capability,
4217 	 * but needs it. Same seems to hold for the desktop versions.
4218 	 */
4219 	printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
4220 	rwbf_quirk = 1;
4221 }
4222 
4223 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
4224 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
4225 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
4226 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
4227 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
4228 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
4229 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
4230 
4231 #define GGC 0x52
4232 #define GGC_MEMORY_SIZE_MASK	(0xf << 8)
4233 #define GGC_MEMORY_SIZE_NONE	(0x0 << 8)
4234 #define GGC_MEMORY_SIZE_1M	(0x1 << 8)
4235 #define GGC_MEMORY_SIZE_2M	(0x3 << 8)
4236 #define GGC_MEMORY_VT_ENABLED	(0x8 << 8)
4237 #define GGC_MEMORY_SIZE_2M_VT	(0x9 << 8)
4238 #define GGC_MEMORY_SIZE_3M_VT	(0xa << 8)
4239 #define GGC_MEMORY_SIZE_4M_VT	(0xb << 8)
4240 
quirk_calpella_no_shadow_gtt(struct pci_dev * dev)4241 static void __devinit quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
4242 {
4243 	unsigned short ggc;
4244 
4245 	if (pci_read_config_word(dev, GGC, &ggc))
4246 		return;
4247 
4248 	if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
4249 		printk(KERN_INFO "DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
4250 		dmar_map_gfx = 0;
4251 	} else if (dmar_map_gfx) {
4252 		/* we have to ensure the gfx device is idle before we flush */
4253 		printk(KERN_INFO "DMAR: Disabling batched IOTLB flush on Ironlake\n");
4254 		intel_iommu_strict = 1;
4255        }
4256 }
4257 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
4258 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
4259 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
4260 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
4261 
4262 /* On Tylersburg chipsets, some BIOSes have been known to enable the
4263    ISOCH DMAR unit for the Azalia sound device, but not give it any
4264    TLB entries, which causes it to deadlock. Check for that.  We do
4265    this in a function called from init_dmars(), instead of in a PCI
4266    quirk, because we don't want to print the obnoxious "BIOS broken"
4267    message if VT-d is actually disabled.
4268 */
check_tylersburg_isoch(void)4269 static void __init check_tylersburg_isoch(void)
4270 {
4271 	struct pci_dev *pdev;
4272 	uint32_t vtisochctrl;
4273 
4274 	/* If there's no Azalia in the system anyway, forget it. */
4275 	pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
4276 	if (!pdev)
4277 		return;
4278 	pci_dev_put(pdev);
4279 
4280 	/* System Management Registers. Might be hidden, in which case
4281 	   we can't do the sanity check. But that's OK, because the
4282 	   known-broken BIOSes _don't_ actually hide it, so far. */
4283 	pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
4284 	if (!pdev)
4285 		return;
4286 
4287 	if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
4288 		pci_dev_put(pdev);
4289 		return;
4290 	}
4291 
4292 	pci_dev_put(pdev);
4293 
4294 	/* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
4295 	if (vtisochctrl & 1)
4296 		return;
4297 
4298 	/* Drop all bits other than the number of TLB entries */
4299 	vtisochctrl &= 0x1c;
4300 
4301 	/* If we have the recommended number of TLB entries (16), fine. */
4302 	if (vtisochctrl == 0x10)
4303 		return;
4304 
4305 	/* Zero TLB entries? You get to ride the short bus to school. */
4306 	if (!vtisochctrl) {
4307 		WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4308 		     "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4309 		     dmi_get_system_info(DMI_BIOS_VENDOR),
4310 		     dmi_get_system_info(DMI_BIOS_VERSION),
4311 		     dmi_get_system_info(DMI_PRODUCT_VERSION));
4312 		iommu_identity_mapping |= IDENTMAP_AZALIA;
4313 		return;
4314 	}
4315 
4316 	printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
4317 	       vtisochctrl);
4318 }
4319