1 /* $Id: pci_iommu.c,v 1.16 2001/10/09 02:24:33 davem Exp $
2  * pci_iommu.c: UltraSparc PCI controller IOM/STC support.
3  *
4  * Copyright (C) 1999 David S. Miller (davem@redhat.com)
5  * Copyright (C) 1999, 2000 Jakub Jelinek (jakub@redhat.com)
6  */
7 
8 #include <linux/kernel.h>
9 #include <linux/sched.h>
10 #include <linux/mm.h>
11 
12 #include <asm/pbm.h>
13 
14 #include "iommu_common.h"
15 
16 #define PCI_STC_CTXMATCH_ADDR(STC, CTX)	\
17 	((STC)->strbuf_ctxmatch_base + ((CTX) << 3))
18 
19 /* Accessing IOMMU and Streaming Buffer registers.
20  * REG parameter is a physical address.  All registers
21  * are 64-bits in size.
22  */
23 #define pci_iommu_read(__reg) \
24 ({	u64 __ret; \
25 	__asm__ __volatile__("ldxa [%1] %2, %0" \
26 			     : "=r" (__ret) \
27 			     : "r" (__reg), "i" (ASI_PHYS_BYPASS_EC_E) \
28 			     : "memory"); \
29 	__ret; \
30 })
31 #define pci_iommu_write(__reg, __val) \
32 	__asm__ __volatile__("stxa %0, [%1] %2" \
33 			     : /* no outputs */ \
34 			     : "r" (__val), "r" (__reg), \
35 			       "i" (ASI_PHYS_BYPASS_EC_E))
36 
37 /* Must be invoked under the IOMMU lock. */
__iommu_flushall(struct pci_iommu * iommu)38 static void __iommu_flushall(struct pci_iommu *iommu)
39 {
40 	unsigned long tag;
41 	int entry;
42 
43 	tag = iommu->iommu_flush + (0xa580UL - 0x0210UL);
44 	for (entry = 0; entry < 16; entry++) {
45 		pci_iommu_write(tag, 0);
46 		tag += 8;
47 	}
48 
49 	/* Ensure completion of previous PIO writes. */
50 	(void) pci_iommu_read(iommu->write_complete_reg);
51 
52 	/* Now update everyone's flush point. */
53 	for (entry = 0; entry < PBM_NCLUSTERS; entry++) {
54 		iommu->alloc_info[entry].flush =
55 			iommu->alloc_info[entry].next;
56 	}
57 }
58 
59 #define IOPTE_CONSISTENT(CTX) \
60 	(IOPTE_VALID | IOPTE_CACHE | \
61 	 (((CTX) << 47) & IOPTE_CONTEXT))
62 
63 #define IOPTE_STREAMING(CTX) \
64 	(IOPTE_CONSISTENT(CTX) | IOPTE_STBUF)
65 
66 /* Existing mappings are never marked invalid, instead they
67  * are pointed to a dummy page.
68  */
69 #define IOPTE_IS_DUMMY(iommu, iopte)	\
70 	((iopte_val(*iopte) & IOPTE_PAGE) == (iommu)->dummy_page_pa)
71 
iopte_make_dummy(struct pci_iommu * iommu,iopte_t * iopte)72 static void inline iopte_make_dummy(struct pci_iommu *iommu, iopte_t *iopte)
73 {
74 	unsigned long val = iopte_val(*iopte);
75 
76 	val &= ~IOPTE_PAGE;
77 	val |= iommu->dummy_page_pa;
78 
79 	iopte_val(*iopte) = val;
80 }
81 
pci_iommu_table_init(struct pci_iommu * iommu,int tsbsize)82 void pci_iommu_table_init(struct pci_iommu *iommu, int tsbsize)
83 {
84 	int i;
85 
86 	tsbsize /= sizeof(iopte_t);
87 
88 	for (i = 0; i < tsbsize; i++)
89 		iopte_make_dummy(iommu, &iommu->page_table[i]);
90 }
91 
alloc_streaming_cluster(struct pci_iommu * iommu,unsigned long npages)92 static iopte_t *alloc_streaming_cluster(struct pci_iommu *iommu, unsigned long npages)
93 {
94 	iopte_t *iopte, *limit, *first;
95 	unsigned long cnum, ent, flush_point;
96 
97 	cnum = 0;
98 	while ((1UL << cnum) < npages)
99 		cnum++;
100 	iopte  = (iommu->page_table +
101 		  (cnum << (iommu->page_table_sz_bits - PBM_LOGCLUSTERS)));
102 
103 	if (cnum == 0)
104 		limit = (iommu->page_table +
105 			 iommu->lowest_consistent_map);
106 	else
107 		limit = (iopte +
108 			 (1 << (iommu->page_table_sz_bits - PBM_LOGCLUSTERS)));
109 
110 	iopte += ((ent = iommu->alloc_info[cnum].next) << cnum);
111 	flush_point = iommu->alloc_info[cnum].flush;
112 
113 	first = iopte;
114 	for (;;) {
115 		if (IOPTE_IS_DUMMY(iommu, iopte)) {
116 			if ((iopte + (1 << cnum)) >= limit)
117 				ent = 0;
118 			else
119 				ent = ent + 1;
120 			iommu->alloc_info[cnum].next = ent;
121 			if (ent == flush_point)
122 				__iommu_flushall(iommu);
123 			break;
124 		}
125 		iopte += (1 << cnum);
126 		ent++;
127 		if (iopte >= limit) {
128 			iopte = (iommu->page_table +
129 				 (cnum <<
130 				  (iommu->page_table_sz_bits - PBM_LOGCLUSTERS)));
131 			ent = 0;
132 		}
133 		if (ent == flush_point)
134 			__iommu_flushall(iommu);
135 		if (iopte == first)
136 			goto bad;
137 	}
138 
139 	/* I've got your streaming cluster right here buddy boy... */
140 	return iopte;
141 
142 bad:
143 	printk(KERN_EMERG "pci_iommu: alloc_streaming_cluster of npages(%ld) failed!\n",
144 	       npages);
145 	return NULL;
146 }
147 
free_streaming_cluster(struct pci_iommu * iommu,dma_addr_t base,unsigned long npages,unsigned long ctx)148 static void free_streaming_cluster(struct pci_iommu *iommu, dma_addr_t base,
149 				   unsigned long npages, unsigned long ctx)
150 {
151 	unsigned long cnum, ent;
152 
153 	cnum = 0;
154 	while ((1UL << cnum) < npages)
155 		cnum++;
156 
157 	ent = (base << (32 - IO_PAGE_SHIFT + PBM_LOGCLUSTERS - iommu->page_table_sz_bits))
158 		>> (32 + PBM_LOGCLUSTERS + cnum - iommu->page_table_sz_bits);
159 
160 	/* If the global flush might not have caught this entry,
161 	 * adjust the flush point such that we will flush before
162 	 * ever trying to reuse it.
163 	 */
164 #define between(X,Y,Z)	(((Z) - (Y)) >= ((X) - (Y)))
165 	if (between(ent, iommu->alloc_info[cnum].next, iommu->alloc_info[cnum].flush))
166 		iommu->alloc_info[cnum].flush = ent;
167 #undef between
168 }
169 
170 /* We allocate consistent mappings from the end of cluster zero. */
alloc_consistent_cluster(struct pci_iommu * iommu,unsigned long npages)171 static iopte_t *alloc_consistent_cluster(struct pci_iommu *iommu, unsigned long npages)
172 {
173 	iopte_t *iopte;
174 
175 	iopte = iommu->page_table + (1 << (iommu->page_table_sz_bits - PBM_LOGCLUSTERS));
176 	while (iopte > iommu->page_table) {
177 		iopte--;
178 		if (IOPTE_IS_DUMMY(iommu, iopte)) {
179 			unsigned long tmp = npages;
180 
181 			while (--tmp) {
182 				iopte--;
183 				if (!IOPTE_IS_DUMMY(iommu, iopte))
184 					break;
185 			}
186 			if (tmp == 0) {
187 				u32 entry = (iopte - iommu->page_table);
188 
189 				if (entry < iommu->lowest_consistent_map)
190 					iommu->lowest_consistent_map = entry;
191 				return iopte;
192 			}
193 		}
194 	}
195 	return NULL;
196 }
197 
198 /* Allocate and map kernel buffer of size SIZE using consistent mode
199  * DMA for PCI device PDEV.  Return non-NULL cpu-side address if
200  * successful and set *DMA_ADDRP to the PCI side dma address.
201  */
pci_alloc_consistent(struct pci_dev * pdev,size_t size,dma_addr_t * dma_addrp)202 void *pci_alloc_consistent(struct pci_dev *pdev, size_t size, dma_addr_t *dma_addrp)
203 {
204 	struct pcidev_cookie *pcp;
205 	struct pci_iommu *iommu;
206 	iopte_t *iopte;
207 	unsigned long flags, order, first_page, ctx;
208 	void *ret;
209 	int npages;
210 
211 	size = IO_PAGE_ALIGN(size);
212 	order = get_order(size);
213 	if (order >= 10)
214 		return NULL;
215 
216 	first_page = __get_free_pages(GFP_ATOMIC, order);
217 	if (first_page == 0UL)
218 		return NULL;
219 	memset((char *)first_page, 0, PAGE_SIZE << order);
220 
221 	pcp = pdev->sysdata;
222 	iommu = pcp->pbm->iommu;
223 
224 	spin_lock_irqsave(&iommu->lock, flags);
225 	iopte = alloc_consistent_cluster(iommu, size >> IO_PAGE_SHIFT);
226 	if (iopte == NULL) {
227 		spin_unlock_irqrestore(&iommu->lock, flags);
228 		free_pages(first_page, order);
229 		return NULL;
230 	}
231 
232 	*dma_addrp = (iommu->page_table_map_base +
233 		      ((iopte - iommu->page_table) << IO_PAGE_SHIFT));
234 	ret = (void *) first_page;
235 	npages = size >> IO_PAGE_SHIFT;
236 	ctx = 0;
237 	if (iommu->iommu_ctxflush)
238 		ctx = iommu->iommu_cur_ctx++;
239 	first_page = __pa(first_page);
240 	while (npages--) {
241 		iopte_val(*iopte) = (IOPTE_CONSISTENT(ctx) |
242 				     IOPTE_WRITE |
243 				     (first_page & IOPTE_PAGE));
244 		iopte++;
245 		first_page += IO_PAGE_SIZE;
246 	}
247 
248 	{
249 		int i;
250 		u32 daddr = *dma_addrp;
251 
252 		npages = size >> IO_PAGE_SHIFT;
253 		for (i = 0; i < npages; i++) {
254 			pci_iommu_write(iommu->iommu_flush, daddr);
255 			daddr += IO_PAGE_SIZE;
256 		}
257 	}
258 
259 	spin_unlock_irqrestore(&iommu->lock, flags);
260 
261 	return ret;
262 }
263 
264 /* Free and unmap a consistent DMA translation. */
pci_free_consistent(struct pci_dev * pdev,size_t size,void * cpu,dma_addr_t dvma)265 void pci_free_consistent(struct pci_dev *pdev, size_t size, void *cpu, dma_addr_t dvma)
266 {
267 	struct pcidev_cookie *pcp;
268 	struct pci_iommu *iommu;
269 	iopte_t *iopte;
270 	unsigned long flags, order, npages, i, ctx;
271 
272 	npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT;
273 	pcp = pdev->sysdata;
274 	iommu = pcp->pbm->iommu;
275 	iopte = iommu->page_table +
276 		((dvma - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
277 
278 	spin_lock_irqsave(&iommu->lock, flags);
279 
280 	if ((iopte - iommu->page_table) ==
281 	    iommu->lowest_consistent_map) {
282 		iopte_t *walk = iopte + npages;
283 		iopte_t *limit;
284 
285 		limit = (iommu->page_table +
286 			 (1 << (iommu->page_table_sz_bits - PBM_LOGCLUSTERS)));
287 		while (walk < limit) {
288 			if (!IOPTE_IS_DUMMY(iommu, walk))
289 				break;
290 			walk++;
291 		}
292 		iommu->lowest_consistent_map =
293 			(walk - iommu->page_table);
294 	}
295 
296 	/* Data for consistent mappings cannot enter the streaming
297 	 * buffers, so we only need to update the TSB.  We flush
298 	 * the IOMMU here as well to prevent conflicts with the
299 	 * streaming mapping deferred tlb flush scheme.
300 	 */
301 
302 	ctx = 0;
303 	if (iommu->iommu_ctxflush)
304 		ctx = (iopte_val(*iopte) & IOPTE_CONTEXT) >> 47UL;
305 
306 	for (i = 0; i < npages; i++, iopte++)
307 		iopte_make_dummy(iommu, iopte);
308 
309 	if (iommu->iommu_ctxflush) {
310 		pci_iommu_write(iommu->iommu_ctxflush, ctx);
311 	} else {
312 		for (i = 0; i < npages; i++) {
313 			u32 daddr = dvma + (i << IO_PAGE_SHIFT);
314 
315 			pci_iommu_write(iommu->iommu_flush, daddr);
316 		}
317 	}
318 
319 	spin_unlock_irqrestore(&iommu->lock, flags);
320 
321 	order = get_order(size);
322 	if (order < 10)
323 		free_pages((unsigned long)cpu, order);
324 }
325 
326 /* Map a single buffer at PTR of SZ bytes for PCI DMA
327  * in streaming mode.
328  */
pci_map_single(struct pci_dev * pdev,void * ptr,size_t sz,int direction)329 dma_addr_t pci_map_single(struct pci_dev *pdev, void *ptr, size_t sz, int direction)
330 {
331 	struct pcidev_cookie *pcp;
332 	struct pci_iommu *iommu;
333 	struct pci_strbuf *strbuf;
334 	iopte_t *base;
335 	unsigned long flags, npages, oaddr;
336 	unsigned long i, base_paddr, ctx;
337 	u32 bus_addr, ret;
338 	unsigned long iopte_protection;
339 
340 	pcp = pdev->sysdata;
341 	iommu = pcp->pbm->iommu;
342 	strbuf = &pcp->pbm->stc;
343 
344 	if (direction == PCI_DMA_NONE)
345 		BUG();
346 
347 	oaddr = (unsigned long)ptr;
348 	npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK);
349 	npages >>= IO_PAGE_SHIFT;
350 
351 	spin_lock_irqsave(&iommu->lock, flags);
352 
353 	base = alloc_streaming_cluster(iommu, npages);
354 	if (base == NULL)
355 		goto bad;
356 	bus_addr = (iommu->page_table_map_base +
357 		    ((base - iommu->page_table) << IO_PAGE_SHIFT));
358 	ret = bus_addr | (oaddr & ~IO_PAGE_MASK);
359 	base_paddr = __pa(oaddr & IO_PAGE_MASK);
360 	ctx = 0;
361 	if (iommu->iommu_ctxflush)
362 		ctx = iommu->iommu_cur_ctx++;
363 	if (strbuf->strbuf_enabled)
364 		iopte_protection = IOPTE_STREAMING(ctx);
365 	else
366 		iopte_protection = IOPTE_CONSISTENT(ctx);
367 	if (direction != PCI_DMA_TODEVICE)
368 		iopte_protection |= IOPTE_WRITE;
369 
370 	for (i = 0; i < npages; i++, base++, base_paddr += IO_PAGE_SIZE)
371 		iopte_val(*base) = iopte_protection | base_paddr;
372 
373 	spin_unlock_irqrestore(&iommu->lock, flags);
374 
375 	return ret;
376 
377 bad:
378 	spin_unlock_irqrestore(&iommu->lock, flags);
379 	BUG();
380 	return 0;
381 }
382 
383 /* Unmap a single streaming mode DMA translation. */
pci_unmap_single(struct pci_dev * pdev,dma_addr_t bus_addr,size_t sz,int direction)384 void pci_unmap_single(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int direction)
385 {
386 	struct pcidev_cookie *pcp;
387 	struct pci_iommu *iommu;
388 	struct pci_strbuf *strbuf;
389 	iopte_t *base;
390 	unsigned long flags, npages, i, ctx;
391 
392 	if (direction == PCI_DMA_NONE)
393 		BUG();
394 
395 	pcp = pdev->sysdata;
396 	iommu = pcp->pbm->iommu;
397 	strbuf = &pcp->pbm->stc;
398 
399 	npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK);
400 	npages >>= IO_PAGE_SHIFT;
401 	base = iommu->page_table +
402 		((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
403 #ifdef DEBUG_PCI_IOMMU
404 	if (IOPTE_IS_DUMMY(iommu, base))
405 		printk("pci_unmap_single called on non-mapped region %08x,%08x from %016lx\n",
406 		       bus_addr, sz, __builtin_return_address(0));
407 #endif
408 	bus_addr &= IO_PAGE_MASK;
409 
410 	spin_lock_irqsave(&iommu->lock, flags);
411 
412 	/* Record the context, if any. */
413 	ctx = 0;
414 	if (iommu->iommu_ctxflush)
415 		ctx = (iopte_val(*base) & IOPTE_CONTEXT) >> 47UL;
416 
417 	/* Step 1: Kick data out of streaming buffers if necessary. */
418 	if (strbuf->strbuf_enabled) {
419 		u32 vaddr = bus_addr;
420 
421 		PCI_STC_FLUSHFLAG_INIT(strbuf);
422 		if (strbuf->strbuf_ctxflush &&
423 		    iommu->iommu_ctxflush) {
424 			unsigned long matchreg, flushreg;
425 
426 			flushreg = strbuf->strbuf_ctxflush;
427 			matchreg = PCI_STC_CTXMATCH_ADDR(strbuf, ctx);
428 			do {
429 				pci_iommu_write(flushreg, ctx);
430 			} while(((long)pci_iommu_read(matchreg)) < 0L);
431 		} else {
432 			for (i = 0; i < npages; i++, vaddr += IO_PAGE_SIZE)
433 				pci_iommu_write(strbuf->strbuf_pflush, vaddr);
434 		}
435 
436 		pci_iommu_write(strbuf->strbuf_fsync, strbuf->strbuf_flushflag_pa);
437 		(void) pci_iommu_read(iommu->write_complete_reg);
438 		while (!PCI_STC_FLUSHFLAG_SET(strbuf))
439 			rmb();
440 	}
441 
442 	/* Step 2: Clear out first TSB entry. */
443 	iopte_make_dummy(iommu, base);
444 
445 	free_streaming_cluster(iommu, bus_addr - iommu->page_table_map_base,
446 			       npages, ctx);
447 
448 	spin_unlock_irqrestore(&iommu->lock, flags);
449 }
450 
451 #define SG_ENT_PHYS_ADDRESS(SG)	\
452 	((SG)->address ? \
453 	 __pa((SG)->address) : \
454 	 (__pa(page_address((SG)->page)) + (SG)->offset))
455 
fill_sg(iopte_t * iopte,struct scatterlist * sg,int nused,int nelems,unsigned long iopte_protection)456 static inline void fill_sg(iopte_t *iopte, struct scatterlist *sg,
457 			   int nused, int nelems, unsigned long iopte_protection)
458 {
459 	struct scatterlist *dma_sg = sg;
460 	struct scatterlist *sg_end = sg + nelems;
461 	int i;
462 
463 	for (i = 0; i < nused; i++) {
464 		unsigned long pteval = ~0UL;
465 		u32 dma_npages;
466 
467 		dma_npages = ((dma_sg->dma_address & (IO_PAGE_SIZE - 1UL)) +
468 			      dma_sg->dma_length +
469 			      ((IO_PAGE_SIZE - 1UL))) >> IO_PAGE_SHIFT;
470 		do {
471 			unsigned long offset;
472 			signed int len;
473 
474 			/* If we are here, we know we have at least one
475 			 * more page to map.  So walk forward until we
476 			 * hit a page crossing, and begin creating new
477 			 * mappings from that spot.
478 			 */
479 			for (;;) {
480 				unsigned long tmp;
481 
482 				tmp = SG_ENT_PHYS_ADDRESS(sg);
483 				len = sg->length;
484 				if (((tmp ^ pteval) >> IO_PAGE_SHIFT) != 0UL) {
485 					pteval = tmp & IO_PAGE_MASK;
486 					offset = tmp & (IO_PAGE_SIZE - 1UL);
487 					break;
488 				}
489 				if (((tmp ^ (tmp + len - 1UL)) >> IO_PAGE_SHIFT) != 0UL) {
490 					pteval = (tmp + IO_PAGE_SIZE) & IO_PAGE_MASK;
491 					offset = 0UL;
492 					len -= (IO_PAGE_SIZE - (tmp & (IO_PAGE_SIZE - 1UL)));
493 					break;
494 				}
495 				sg++;
496 			}
497 
498 			pteval = iopte_protection | (pteval & IOPTE_PAGE);
499 			while (len > 0) {
500 				*iopte++ = __iopte(pteval);
501 				pteval += IO_PAGE_SIZE;
502 				len -= (IO_PAGE_SIZE - offset);
503 				offset = 0;
504 				dma_npages--;
505 			}
506 
507 			pteval = (pteval & IOPTE_PAGE) + len;
508 			sg++;
509 
510 			/* Skip over any tail mappings we've fully mapped,
511 			 * adjusting pteval along the way.  Stop when we
512 			 * detect a page crossing event.
513 			 */
514 			while (sg < sg_end &&
515 			       (pteval << (64 - IO_PAGE_SHIFT)) != 0UL &&
516 			       (pteval == SG_ENT_PHYS_ADDRESS(sg)) &&
517 			       ((pteval ^
518 				 (SG_ENT_PHYS_ADDRESS(sg) + sg->length - 1UL)) >> IO_PAGE_SHIFT) == 0UL) {
519 				pteval += sg->length;
520 				sg++;
521 			}
522 			if ((pteval << (64 - IO_PAGE_SHIFT)) == 0UL)
523 				pteval = ~0UL;
524 		} while (dma_npages != 0);
525 		dma_sg++;
526 	}
527 }
528 
529 /* Map a set of buffers described by SGLIST with NELEMS array
530  * elements in streaming mode for PCI DMA.
531  * When making changes here, inspect the assembly output. I was having
532  * hard time to kepp this routine out of using stack slots for holding variables.
533  */
pci_map_sg(struct pci_dev * pdev,struct scatterlist * sglist,int nelems,int direction)534 int pci_map_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int direction)
535 {
536 	struct pcidev_cookie *pcp;
537 	struct pci_iommu *iommu;
538 	struct pci_strbuf *strbuf;
539 	unsigned long flags, ctx, npages, iopte_protection;
540 	iopte_t *base;
541 	u32 dma_base;
542 	struct scatterlist *sgtmp;
543 	int used;
544 
545 	/* Fast path single entry scatterlists. */
546 	if (nelems == 1) {
547 		sglist->dma_address =
548 			pci_map_single(pdev,
549 				       (sglist->address ?
550 					sglist->address :
551 					(page_address(sglist->page) + sglist->offset)),
552 				       sglist->length, direction);
553 		sglist->dma_length = sglist->length;
554 		return 1;
555 	}
556 
557 	pcp = pdev->sysdata;
558 	iommu = pcp->pbm->iommu;
559 	strbuf = &pcp->pbm->stc;
560 
561 	if (direction == PCI_DMA_NONE)
562 		BUG();
563 
564 	/* Step 1: Prepare scatter list. */
565 
566 	npages = prepare_sg(sglist, nelems);
567 
568 	/* Step 2: Allocate a cluster. */
569 
570 	spin_lock_irqsave(&iommu->lock, flags);
571 
572 	base = alloc_streaming_cluster(iommu, npages);
573 	if (base == NULL)
574 		goto bad;
575 	dma_base = iommu->page_table_map_base + ((base - iommu->page_table) << IO_PAGE_SHIFT);
576 
577 	/* Step 3: Normalize DMA addresses. */
578 	used = nelems;
579 
580 	sgtmp = sglist;
581 	while (used && sgtmp->dma_length) {
582 		sgtmp->dma_address += dma_base;
583 		sgtmp++;
584 		used--;
585 	}
586 	used = nelems - used;
587 
588 	/* Step 4: Choose a context if necessary. */
589 	ctx = 0;
590 	if (iommu->iommu_ctxflush)
591 		ctx = iommu->iommu_cur_ctx++;
592 
593 	/* Step 5: Create the mappings. */
594 	if (strbuf->strbuf_enabled)
595 		iopte_protection = IOPTE_STREAMING(ctx);
596 	else
597 		iopte_protection = IOPTE_CONSISTENT(ctx);
598 	if (direction != PCI_DMA_TODEVICE)
599 		iopte_protection |= IOPTE_WRITE;
600 	fill_sg (base, sglist, used, nelems, iopte_protection);
601 #ifdef VERIFY_SG
602 	verify_sglist(sglist, nelems, base, npages);
603 #endif
604 
605 	spin_unlock_irqrestore(&iommu->lock, flags);
606 
607 	return used;
608 
609 bad:
610 	spin_unlock_irqrestore(&iommu->lock, flags);
611 	BUG();
612 	return 0;
613 }
614 
615 /* Unmap a set of streaming mode DMA translations. */
pci_unmap_sg(struct pci_dev * pdev,struct scatterlist * sglist,int nelems,int direction)616 void pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int direction)
617 {
618 	struct pcidev_cookie *pcp;
619 	struct pci_iommu *iommu;
620 	struct pci_strbuf *strbuf;
621 	iopte_t *base;
622 	unsigned long flags, ctx, i, npages;
623 	u32 bus_addr;
624 
625 	if (direction == PCI_DMA_NONE)
626 		BUG();
627 
628 	pcp = pdev->sysdata;
629 	iommu = pcp->pbm->iommu;
630 	strbuf = &pcp->pbm->stc;
631 
632 	bus_addr = sglist->dma_address & IO_PAGE_MASK;
633 
634 	for (i = 1; i < nelems; i++)
635 		if (sglist[i].dma_length == 0)
636 			break;
637 	i--;
638 	npages = (IO_PAGE_ALIGN(sglist[i].dma_address + sglist[i].dma_length) - bus_addr) >> IO_PAGE_SHIFT;
639 
640 	base = iommu->page_table +
641 		((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
642 
643 #ifdef DEBUG_PCI_IOMMU
644 	if (IOPTE_IS_DUMMY(iommu, base))
645 		printk("pci_unmap_sg called on non-mapped region %016lx,%d from %016lx\n", sglist->dma_address, nelems, __builtin_return_address(0));
646 #endif
647 
648 	spin_lock_irqsave(&iommu->lock, flags);
649 
650 	/* Record the context, if any. */
651 	ctx = 0;
652 	if (iommu->iommu_ctxflush)
653 		ctx = (iopte_val(*base) & IOPTE_CONTEXT) >> 47UL;
654 
655 	/* Step 1: Kick data out of streaming buffers if necessary. */
656 	if (strbuf->strbuf_enabled) {
657 		u32 vaddr = (u32) bus_addr;
658 
659 		PCI_STC_FLUSHFLAG_INIT(strbuf);
660 		if (strbuf->strbuf_ctxflush &&
661 		    iommu->iommu_ctxflush) {
662 			unsigned long matchreg, flushreg;
663 
664 			flushreg = strbuf->strbuf_ctxflush;
665 			matchreg = PCI_STC_CTXMATCH_ADDR(strbuf, ctx);
666 			do {
667 				pci_iommu_write(flushreg, ctx);
668 			} while(((long)pci_iommu_read(matchreg)) < 0L);
669 		} else {
670 			for (i = 0; i < npages; i++, vaddr += IO_PAGE_SIZE)
671 				pci_iommu_write(strbuf->strbuf_pflush, vaddr);
672 		}
673 
674 		pci_iommu_write(strbuf->strbuf_fsync, strbuf->strbuf_flushflag_pa);
675 		(void) pci_iommu_read(iommu->write_complete_reg);
676 		while (!PCI_STC_FLUSHFLAG_SET(strbuf))
677 			rmb();
678 	}
679 
680 	/* Step 2: Clear out first TSB entry. */
681 	iopte_make_dummy(iommu, base);
682 
683 	free_streaming_cluster(iommu, bus_addr - iommu->page_table_map_base,
684 			       npages, ctx);
685 
686 	spin_unlock_irqrestore(&iommu->lock, flags);
687 }
688 
689 /* Make physical memory consistent for a single
690  * streaming mode DMA translation after a transfer.
691  */
pci_dma_sync_single(struct pci_dev * pdev,dma_addr_t bus_addr,size_t sz,int direction)692 void pci_dma_sync_single(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int direction)
693 {
694 	struct pcidev_cookie *pcp;
695 	struct pci_iommu *iommu;
696 	struct pci_strbuf *strbuf;
697 	unsigned long flags, ctx, npages;
698 
699 	pcp = pdev->sysdata;
700 	iommu = pcp->pbm->iommu;
701 	strbuf = &pcp->pbm->stc;
702 
703 	if (!strbuf->strbuf_enabled)
704 		return;
705 
706 	spin_lock_irqsave(&iommu->lock, flags);
707 
708 	npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK);
709 	npages >>= IO_PAGE_SHIFT;
710 	bus_addr &= IO_PAGE_MASK;
711 
712 	/* Step 1: Record the context, if any. */
713 	ctx = 0;
714 	if (iommu->iommu_ctxflush &&
715 	    strbuf->strbuf_ctxflush) {
716 		iopte_t *iopte;
717 
718 		iopte = iommu->page_table +
719 			((bus_addr - iommu->page_table_map_base)>>IO_PAGE_SHIFT);
720 		ctx = (iopte_val(*iopte) & IOPTE_CONTEXT) >> 47UL;
721 	}
722 
723 	/* Step 2: Kick data out of streaming buffers. */
724 	PCI_STC_FLUSHFLAG_INIT(strbuf);
725 	if (iommu->iommu_ctxflush &&
726 	    strbuf->strbuf_ctxflush) {
727 		unsigned long matchreg, flushreg;
728 
729 		flushreg = strbuf->strbuf_ctxflush;
730 		matchreg = PCI_STC_CTXMATCH_ADDR(strbuf, ctx);
731 		do {
732 			pci_iommu_write(flushreg, ctx);
733 		} while(((long)pci_iommu_read(matchreg)) < 0L);
734 	} else {
735 		unsigned long i;
736 
737 		for (i = 0; i < npages; i++, bus_addr += IO_PAGE_SIZE)
738 			pci_iommu_write(strbuf->strbuf_pflush, bus_addr);
739 	}
740 
741 	/* Step 3: Perform flush synchronization sequence. */
742 	pci_iommu_write(strbuf->strbuf_fsync, strbuf->strbuf_flushflag_pa);
743 	(void) pci_iommu_read(iommu->write_complete_reg);
744 	while (!PCI_STC_FLUSHFLAG_SET(strbuf))
745 		rmb();
746 
747 	spin_unlock_irqrestore(&iommu->lock, flags);
748 }
749 
750 /* Make physical memory consistent for a set of streaming
751  * mode DMA translations after a transfer.
752  */
pci_dma_sync_sg(struct pci_dev * pdev,struct scatterlist * sglist,int nelems,int direction)753 void pci_dma_sync_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int direction)
754 {
755 	struct pcidev_cookie *pcp;
756 	struct pci_iommu *iommu;
757 	struct pci_strbuf *strbuf;
758 	unsigned long flags, ctx;
759 
760 	pcp = pdev->sysdata;
761 	iommu = pcp->pbm->iommu;
762 	strbuf = &pcp->pbm->stc;
763 
764 	if (!strbuf->strbuf_enabled)
765 		return;
766 
767 	spin_lock_irqsave(&iommu->lock, flags);
768 
769 	/* Step 1: Record the context, if any. */
770 	ctx = 0;
771 	if (iommu->iommu_ctxflush &&
772 	    strbuf->strbuf_ctxflush) {
773 		iopte_t *iopte;
774 
775 		iopte = iommu->page_table +
776 			((sglist[0].dma_address - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
777 		ctx = (iopte_val(*iopte) & IOPTE_CONTEXT) >> 47UL;
778 	}
779 
780 	/* Step 2: Kick data out of streaming buffers. */
781 	PCI_STC_FLUSHFLAG_INIT(strbuf);
782 	if (iommu->iommu_ctxflush &&
783 	    strbuf->strbuf_ctxflush) {
784 		unsigned long matchreg, flushreg;
785 
786 		flushreg = strbuf->strbuf_ctxflush;
787 		matchreg = PCI_STC_CTXMATCH_ADDR(strbuf, ctx);
788 		do {
789 			pci_iommu_write(flushreg, ctx);
790 		} while (((long)pci_iommu_read(matchreg)) < 0L);
791 	} else {
792 		unsigned long i, npages;
793 		u32 bus_addr;
794 
795 		bus_addr = sglist[0].dma_address & IO_PAGE_MASK;
796 
797 		for(i = 1; i < nelems; i++)
798 			if (!sglist[i].dma_length)
799 				break;
800 		i--;
801 		npages = (IO_PAGE_ALIGN(sglist[i].dma_address + sglist[i].dma_length) - bus_addr) >> IO_PAGE_SHIFT;
802 		for (i = 0; i < npages; i++, bus_addr += IO_PAGE_SIZE)
803 			pci_iommu_write(strbuf->strbuf_pflush, bus_addr);
804 	}
805 
806 	/* Step 3: Perform flush synchronization sequence. */
807 	pci_iommu_write(strbuf->strbuf_fsync, strbuf->strbuf_flushflag_pa);
808 	(void) pci_iommu_read(iommu->write_complete_reg);
809 	while (!PCI_STC_FLUSHFLAG_SET(strbuf))
810 		rmb();
811 
812 	spin_unlock_irqrestore(&iommu->lock, flags);
813 }
814 
ali_sound_dma_hack(struct pci_dev * pdev,int set_bit)815 static void ali_sound_dma_hack(struct pci_dev *pdev, int set_bit)
816 {
817 	struct pci_dev *ali_isa_bridge;
818 	u8 val;
819 
820 	/* ALI sound chips generate 31-bits of DMA, a special register
821 	 * determines what bit 31 is emitted as.
822 	 */
823 	ali_isa_bridge = pci_find_device(PCI_VENDOR_ID_AL,
824 					 PCI_DEVICE_ID_AL_M1533,
825 					 NULL);
826 
827 	pci_read_config_byte(ali_isa_bridge, 0x7e, &val);
828 	if (set_bit)
829 		val |= 0x01;
830 	else
831 		val &= ~0x01;
832 	pci_write_config_byte(ali_isa_bridge, 0x7e, val);
833 }
834 
pci_dma_supported(struct pci_dev * pdev,u64 device_mask)835 int pci_dma_supported(struct pci_dev *pdev, u64 device_mask)
836 {
837 	struct pcidev_cookie *pcp = pdev->sysdata;
838 	u64 dma_addr_mask;
839 
840 	if (pdev == NULL) {
841 		dma_addr_mask = 0xffffffff;
842 	} else {
843 		struct pci_iommu *iommu = pcp->pbm->iommu;
844 
845 		dma_addr_mask = iommu->dma_addr_mask;
846 
847 		if (pdev->vendor == PCI_VENDOR_ID_AL &&
848 		    pdev->device == PCI_DEVICE_ID_AL_M5451 &&
849 		    device_mask == 0x7fffffff) {
850 			ali_sound_dma_hack(pdev,
851 					   (dma_addr_mask & 0x80000000) != 0);
852 			return 1;
853 		}
854 	}
855 
856 	if (device_mask >= (1UL << 32UL))
857 		return 0;
858 
859 	return (device_mask & dma_addr_mask) == dma_addr_mask;
860 }
861