1 /*
2  * pci_dma.c
3  * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
4  *
5  * Dynamic DMA mapping support.
6  *
7  * Manages the TCE space assigned to this partition.
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
22  */
23 
24 #include <linux/init.h>
25 #include <linux/types.h>
26 #include <linux/slab.h>
27 #include <linux/mm.h>
28 #include <linux/spinlock.h>
29 #include <linux/string.h>
30 #include <linux/pci.h>
31 #include <asm/io.h>
32 #include <asm/prom.h>
33 #include <asm/rtas.h>
34 #include <asm/ppcdebug.h>
35 
36 #include <asm/iSeries/HvCallXm.h>
37 #include <asm/iSeries/LparData.h>
38 #include <asm/pci_dma.h>
39 #include <asm/pci-bridge.h>
40 #include <asm/iSeries/iSeries_pci.h>
41 
42 #include <asm/machdep.h>
43 
44 #include "pci.h"
45 
46 /* #define DEBUG_TCE 1   */
47 /* #define MONITOR_TCE 1 */ /* Turn on to sanity check TCE generation. */
48 
49 
50 /* Initialize so this guy does not end up in the BSS section.
51  * Only used to pass OF initialization data set in prom.c into the main
52  * kernel code -- data ultimately copied into tceTables[].
53  */
54 extern struct _of_tce_table of_tce_table[];
55 
56 extern struct pci_controller* hose_head;
57 extern struct pci_controller** hose_tail;
58 extern struct list_head iSeries_Global_Device_List;
59 
60 struct TceTable   virtBusVethTceTable;	/* Tce table for virtual ethernet */
61 struct TceTable   virtBusVioTceTable;	/* Tce table for virtual I/O */
62 
63 struct iSeries_Device_Node iSeries_veth_dev_node = { LogicalSlot: 0xFF, DevTceTable: &virtBusVethTceTable };
64 struct iSeries_Device_Node iSeries_vio_dev_node  = { LogicalSlot: 0xFF, DevTceTable: &virtBusVioTceTable };
65 
66 struct pci_dev    iSeries_veth_dev_st = { sysdata: &iSeries_veth_dev_node };
67 struct pci_dev    iSeries_vio_dev_st  = { sysdata: &iSeries_vio_dev_node  };
68 
69 struct pci_dev  * iSeries_veth_dev = &iSeries_veth_dev_st;
70 struct pci_dev  * iSeries_vio_dev  = &iSeries_vio_dev_st;
71 
72 /* Device TceTable is stored in Device Node */
73 /* struct TceTable * tceTables[256]; */	/* Tce tables for 256 busses
74 					 * Bus 255 is the virtual bus
75 					 * zero indicates no bus defined
76 					 */
77 /* allocates a contiguous range of tces (power-of-2 size) */
78 static inline long alloc_tce_range(struct TceTable *,
79 				   unsigned order );
80 
81 /* allocates a contiguous range of tces (power-of-2 size)
82  * assumes lock already held
83  */
84 static long alloc_tce_range_nolock(struct TceTable *,
85 				   unsigned order );
86 
87 /* frees a contiguous range of tces (power-of-2 size) */
88 static inline void free_tce_range(struct TceTable *,
89 				  long tcenum,
90 				  unsigned order );
91 
92 /* frees a contiguous rnage of tces (power-of-2 size)
93  * assumes lock already held
94  */
95 void free_tce_range_nolock(struct TceTable *,
96 			   long tcenum,
97 			   unsigned order );
98 
99 /* allocates a range of tces and sets them to the pages  */
100 dma_addr_t get_tces(struct TceTable *, unsigned order, void *page,
101 		    unsigned numPages, int direction);
102 
103 static long test_tce_range( struct TceTable *,
104 			    long tcenum,
105 			    unsigned order );
106 
107 static unsigned fill_scatterlist_sg(struct scatterlist *sg, int nents,
108 				    dma_addr_t dma_addr,
109 				    unsigned long numTces );
110 
111 static unsigned long num_tces_sg( struct scatterlist *sg,
112 				  int nents );
113 
114 static dma_addr_t create_tces_sg( struct TceTable *tbl,
115 				  struct scatterlist *sg,
116 			 	  int nents,
117 				  unsigned numTces,
118 				  int direction );
119 
120 static void getTceTableParmsiSeries(struct iSeries_Device_Node* DevNode,
121 				      struct TceTable *tce_table_parms );
122 
123 static void getTceTableParmsPSeries( struct pci_controller *phb,
124 				     struct device_node *dn,
125 				     struct TceTable *tce_table_parms );
126 
127 static void getTceTableParmsPSeriesLP(struct pci_controller *phb,
128 				    struct device_node *dn,
129 				    struct TceTable *newTceTable );
130 
131 static struct TceTable* findHwTceTable(struct TceTable * newTceTable );
132 
133 void create_pci_bus_tce_table( unsigned long token );
134 
iSeries_Get_Bus(struct pci_dev * dv)135 u8 iSeries_Get_Bus( struct pci_dev * dv )
136 {
137 	return 0;
138 }
139 
get_tce_table(struct pci_dev * dev)140 static inline struct TceTable *get_tce_table(struct pci_dev *dev)
141 {
142 	if (!dev)
143 		dev = ppc64_isabridge_dev;
144 	if (!dev)
145 		return NULL;
146 	if (systemcfg->platform == PLATFORM_ISERIES_LPAR) {
147  		return ISERIES_DEVNODE(dev)->DevTceTable;
148 	} else {
149 		return PCI_GET_DN(dev)->tce_table;
150 	}
151 }
152 
count_leading_zeros64(unsigned long x)153 static unsigned long __inline__ count_leading_zeros64( unsigned long x )
154 {
155 	unsigned long lz;
156 	asm("cntlzd %0,%1" : "=r"(lz) : "r"(x));
157 	return lz;
158 }
159 
tce_build_iSeries(struct TceTable * tbl,long tcenum,unsigned long uaddr,int direction)160 static void tce_build_iSeries(struct TceTable *tbl, long tcenum,
161 			       unsigned long uaddr, int direction )
162 {
163 	u64 setTceRc;
164 	union Tce tce;
165 
166 	PPCDBG(PPCDBG_TCE, "build_tce: uaddr = 0x%lx\n", uaddr);
167 	PPCDBG(PPCDBG_TCE, "\ttcenum = 0x%lx, tbl = 0x%lx, index=%lx\n",
168 	       tcenum, tbl, tbl->index);
169 
170 	tce.wholeTce = 0;
171 	tce.tceBits.rpn = (virt_to_absolute(uaddr)) >> PAGE_SHIFT;
172 
173 	/* If for virtual bus */
174 	if ( tbl->tceType == TCE_VB ) {
175 		tce.tceBits.valid = 1;
176 		tce.tceBits.allIo = 1;
177 		if ( direction != PCI_DMA_TODEVICE )
178 			tce.tceBits.readWrite = 1;
179 	} else {
180 		/* If for PCI bus */
181 		tce.tceBits.readWrite = 1; // Read allowed
182 		if ( direction != PCI_DMA_TODEVICE )
183 			tce.tceBits.pciWrite = 1;
184 	}
185 
186 	setTceRc = HvCallXm_setTce((u64)tbl->index,
187 				   (u64)tcenum,
188 				   tce.wholeTce );
189 	if(setTceRc) {
190 		panic("PCI_DMA: HvCallXm_setTce failed, Rc: 0x%lx\n", setTceRc);
191 	}
192 }
193 
tce_build_pSeries(struct TceTable * tbl,long tcenum,unsigned long uaddr,int direction)194 static void tce_build_pSeries(struct TceTable *tbl, long tcenum,
195 			       unsigned long uaddr, int direction )
196 {
197 	union Tce tce;
198 	union Tce *tce_addr;
199 
200 	PPCDBG(PPCDBG_TCE, "build_tce: uaddr = 0x%lx\n", uaddr);
201 	PPCDBG(PPCDBG_TCE, "\ttcenum = 0x%lx, tbl = 0x%lx, index=%lx\n",
202 	       tcenum, tbl, tbl->index);
203 
204 	tce.wholeTce = 0;
205 	tce.tceBits.rpn = (virt_to_absolute(uaddr)) >> PAGE_SHIFT;
206 
207 	tce.tceBits.readWrite = 1; // Read allowed
208 	if ( direction != PCI_DMA_TODEVICE ) tce.tceBits.pciWrite = 1;
209 
210 	tce_addr = ((union Tce *)tbl->base) + tcenum;
211 	*tce_addr = (union Tce)tce.wholeTce;
212 
213 }
214 
215 /*
216  * Build a TceTable structure.  This contains a multi-level bit map which
217  * is used to manage allocation of the tce space.
218  */
build_tce_table(struct TceTable * tbl)219 struct TceTable *build_tce_table(struct TceTable * tbl)
220 {
221 	unsigned long bits, bytes, totalBytes;
222 	unsigned long numBits[NUM_TCE_LEVELS], numBytes[NUM_TCE_LEVELS];
223 	unsigned i, k, m;
224 	unsigned char * pos, * p, b;
225 
226 	PPCDBG(PPCDBG_TCEINIT, "build_tce_table: tbl = 0x%lx\n", tbl);
227 	spin_lock_init( &(tbl->lock) );
228 
229 	tbl->mlbm.maxLevel = 0;
230 
231 	/* Compute number of bits and bytes for each level of the
232 	 * multi-level bit map
233 	 */
234 	totalBytes = 0;
235 	bits = tbl->size * (PAGE_SIZE / sizeof( union Tce ));
236 
237 	for ( i=0; i<NUM_TCE_LEVELS; ++i ) {
238 		bytes = ((bits+63)/64) * 8;
239 		PPCDBG(PPCDBG_TCEINIT, "build_tce_table: level %d bits=%ld, bytes=%ld\n", i, bits, bytes );
240 		numBits[i] = bits;
241 		numBytes[i] = bytes;
242 		bits /= 2;
243 		totalBytes += bytes;
244 	}
245 	PPCDBG(PPCDBG_TCEINIT, "build_tce_table: totalBytes=%ld\n", totalBytes );
246 
247 	pos = (char *)__get_free_pages( GFP_ATOMIC, get_order( totalBytes ));
248 
249 	if ( pos == NULL ) {
250 		panic("PCI_DMA: Allocation failed in build_tce_table!\n");
251 	}
252 
253 	/* For each level, fill in the pointer to the bit map,
254 	 * and turn on the last bit in the bit map (if the
255 	 * number of bits in the map is odd).  The highest
256 	 * level will get all of its bits turned on.
257 	 */
258 	memset( pos, 0, totalBytes );
259 	for (i=0; i<NUM_TCE_LEVELS; ++i) {
260 		if ( numBytes[i] ) {
261 			tbl->mlbm.level[i].map = pos;
262 			tbl->mlbm.maxLevel = i;
263 
264 			if ( numBits[i] & 1 ) {
265 				p = pos + numBytes[i] - 1;
266 				m = (( numBits[i] % 8) - 1) & 7;
267 				*p = 0x80 >> m;
268 				PPCDBG(PPCDBG_TCEINIT, "build_tce_table: level %d last bit %x\n", i, 0x80>>m );
269 			}
270 		}
271 		else
272 			tbl->mlbm.level[i].map = 0;
273 		pos += numBytes[i];
274 		tbl->mlbm.level[i].numBits = numBits[i];
275 		tbl->mlbm.level[i].numBytes = numBytes[i];
276 	}
277 
278 	/* For the highest level, turn on all the bits */
279 
280 	i = tbl->mlbm.maxLevel;
281 	p = tbl->mlbm.level[i].map;
282 	m = numBits[i];
283 	PPCDBG(PPCDBG_TCEINIT, "build_tce_table: highest level (%d) has all bits set\n", i);
284 	for (k=0; k<numBytes[i]; ++k) {
285 		if ( m >= 8 ) {
286 			/* handle full bytes */
287 			*p++ = 0xff;
288 			m -= 8;
289 		}
290 		else if(m>0) {
291 			/* handle the last partial byte */
292 			b = 0x80;
293 			*p = 0;
294 			while (m) {
295 				*p |= b;
296 				b >>= 1;
297 				--m;
298 			}
299 		} else {
300 			break;
301 		}
302 	}
303 
304 	return tbl;
305 }
306 
alloc_tce_range(struct TceTable * tbl,unsigned order)307 static inline long alloc_tce_range( struct TceTable *tbl, unsigned order )
308 {
309 	long retval;
310 	unsigned long flags;
311 
312 	/* Lock the tce allocation bitmap */
313 	spin_lock_irqsave( &(tbl->lock), flags );
314 
315 	/* Do the actual work */
316 	retval = alloc_tce_range_nolock( tbl, order );
317 
318 	/* Unlock the tce allocation bitmap */
319 	spin_unlock_irqrestore( &(tbl->lock), flags );
320 
321 	return retval;
322 }
323 
alloc_tce_range_nolock(struct TceTable * tbl,unsigned order)324 static long alloc_tce_range_nolock( struct TceTable *tbl, unsigned order )
325 {
326 	unsigned long numBits, numBytes;
327 	unsigned long i, bit, block, mask;
328 	long tcenum;
329 	u64 * map;
330 
331 	/* If the order (power of 2 size) requested is larger than our
332 	 * biggest, indicate failure
333 	 */
334 	if(order >= NUM_TCE_LEVELS) {
335 		/* This can happen if block of TCE's are not found. This code      */
336 		/*  maybe in a recursive loop looking up the bit map for the range.*/
337 		panic("PCI_DMA: alloc_tce_range_nolock: invalid order: %d\n",order);
338 	}
339 
340 	numBits =  tbl->mlbm.level[order].numBits;
341 	numBytes = tbl->mlbm.level[order].numBytes;
342 	map =      (u64 *)tbl->mlbm.level[order].map;
343 
344 	/* Initialize return value to -1 (failure) */
345 	tcenum = -1;
346 
347 	/* Loop through the bytes of the bitmap */
348 	for (i=0; i<numBytes/8; ++i) {
349 		if ( *map ) {
350 			/* A free block is found, compute the block
351 			 * number (of this size)
352 			 */
353 			bit = count_leading_zeros64( *map );
354 			block = (i * 64) + bit;    /* Bit count to free entry */
355 
356 			/* turn off the bit in the map to indicate
357 			 * that the block is now in use
358 			 */
359 			mask = 0x1UL << (63 - bit);
360 			*map &= ~mask;
361 
362 			/* compute the index into our tce table for
363 			 * the first tce in the block
364 			 */
365 			PPCDBG(PPCDBG_TCE, "alloc_tce_range_nolock: allocating block %ld, (byte=%ld, bit=%ld) order %d\n", block, i, bit, order );
366 			tcenum = block << order;
367 			return tcenum;
368 		}
369 		++map;
370 	}
371 
372 #ifdef DEBUG_TCE
373 	if ( tcenum == -1 ) {
374 		PPCDBG(PPCDBG_TCE, "alloc_tce_range_nolock: no available blocks of order = %d\n", order );
375 		if ( order < tbl->mlbm.maxLevel ) {
376 			PPCDBG(PPCDBG_TCE, "alloc_tce_range_nolock: trying next bigger size\n" );
377 		}
378 		else {
379 			panic("PCI_DMA: alloc_tce_range_nolock: maximum size reached...failing\n");
380 		}
381 	}
382 #endif
383 
384 	/* If no block of the requested size was found, try the next
385 	 * size bigger.  If one of those is found, return the second
386 	 * half of the block to freespace and keep the first half
387 	 */
388 	if((tcenum == -1) && (order < (NUM_TCE_LEVELS - 1))) {
389 		tcenum = alloc_tce_range_nolock( tbl, order+1 );
390 		if ( tcenum != -1 ) {
391 			free_tce_range_nolock( tbl, tcenum+(1<<order), order );
392 		}
393 	}
394 
395 	/* Return the index of the first tce in the block
396 	 * (or -1 if we failed)
397 	 */
398 	return tcenum;
399 }
400 
free_tce_range(struct TceTable * tbl,long tcenum,unsigned order)401 static inline void free_tce_range(struct TceTable *tbl,
402 				  long tcenum, unsigned order )
403 {
404 	unsigned long flags;
405 
406 	/* Lock the tce allocation bitmap */
407 	spin_lock_irqsave( &(tbl->lock), flags );
408 
409 	/* Do the actual work */
410 	free_tce_range_nolock( tbl, tcenum, order );
411 
412 	/* Unlock the tce allocation bitmap */
413 	spin_unlock_irqrestore( &(tbl->lock), flags );
414 
415 }
416 
free_tce_range_nolock(struct TceTable * tbl,long tcenum,unsigned order)417 void free_tce_range_nolock(struct TceTable *tbl,
418 			   long tcenum, unsigned order )
419 {
420 	unsigned long block;
421 	unsigned byte, bit, mask, b;
422 	unsigned char  * map, * bytep;
423 
424 	if (order >= NUM_TCE_LEVELS) {
425 		panic("PCI_DMA: free_tce_range: invalid order: 0x%x\n",order);
426 		return;
427 	}
428 
429 	block = tcenum >> order;
430 
431 #ifdef MONITOR_TCE
432 	if ( tcenum != (block << order ) ) {
433 		printk("PCI_DMA: Free_tce_range: tcenum %lx misaligned for order %x\n",tcenum, order);
434 		return;
435 	}
436 	if ( block >= tbl->mlbm.level[order].numBits ) {
437 		printk("PCI_DMA: Free_tce_range: tcenum %lx is outside the range of this map (order %x, numBits %lx\n",
438 		       tcenum, order, tbl->mlbm.level[order].numBits );
439 		return;
440 	}
441 	if ( test_tce_range( tbl, tcenum, order ) ) {
442 		printk("PCI_DMA: Freeing range not allocated: tTceTable %p, tcenum %lx, order %x\n",tbl, tcenum, order );
443 		return;
444 	}
445 #endif
446 
447 	map = tbl->mlbm.level[order].map;
448 	byte  = block / 8;
449 	bit   = block % 8;
450 	mask  = 0x80 >> bit;
451 	bytep = map + byte;
452 
453 #ifdef DEBUG_TCE
454 	PPCDBG(PPCDBG_TCE,"free_tce_range_nolock: freeing block %ld (byte=%d, bit=%d) of order %d\n",
455 	       block, byte, bit, order);
456 #endif
457 
458 #ifdef MONITOR_TCE
459 	if ( *bytep & mask ) {
460 		panic("PCI_DMA: Tce already free: TceTable %p, tcenum %lx, order %x\n",tbl,tcenum,order);
461 	}
462 #endif
463 
464 	*bytep |= mask;
465 
466 	/* If there is a higher level in the bit map than this we may be
467 	 * able to buddy up this block with its partner.
468 	 *   If this is the highest level we can't buddy up
469 	 *   If this level has an odd number of bits and
470 	 *      we are freeing the last block we can't buddy up
471 	 * Don't buddy up if it's in the first 1/4 of the level
472 	 */
473 	if (( order < tbl->mlbm.maxLevel ) &&
474 	    ( block > (tbl->mlbm.level[order].numBits/4) ) &&
475 	    (( block < tbl->mlbm.level[order].numBits-1 ) ||
476 	      ( 0 == ( tbl->mlbm.level[order].numBits & 1)))) {
477 		/* See if we can buddy up the block we just freed */
478 		bit  &= 6;		/* get to the first of the buddy bits */
479 		mask  = 0xc0 >> bit;	/* build two bit mask */
480 		b     = *bytep & mask;	/* Get the two bits */
481 		if ( 0 == (b ^ mask) ) { /* If both bits are on */
482 			/* both of the buddy blocks are free we can combine them */
483 			*bytep ^= mask;	/* turn off the two bits */
484 			block = ( byte * 8 ) + bit; /* block of first of buddies */
485 			tcenum = block << order;
486 			/* free the buddied block */
487 			PPCDBG(PPCDBG_TCE,
488 			       "free_tce_range: buddying blocks %ld & %ld\n",
489 			       block, block+1);
490 			free_tce_range_nolock( tbl, tcenum, order+1 );
491 		}
492 	}
493 }
494 
test_tce_range(struct TceTable * tbl,long tcenum,unsigned order)495 static long test_tce_range( struct TceTable *tbl, long tcenum, unsigned order )
496 {
497 	unsigned long block;
498 	unsigned byte, bit, mask, b;
499 	long	retval, retLeft, retRight;
500 	unsigned char  * map;
501 
502 	map = tbl->mlbm.level[order].map;
503 	block = tcenum >> order;
504 	byte = block / 8;		/* Byte within bitmap */
505 	bit  = block % 8;		/* Bit within byte */
506 	mask = 0x80 >> bit;
507 	b    = (*(map+byte) & mask );	/* 0 if block is allocated, else free */
508 	if ( b )
509 		retval = 1;		/* 1 == block is free */
510 	else
511 		retval = 0;		/* 0 == block is allocated */
512 	/* Test bits at all levels below this to ensure that all agree */
513 
514 	if (order) {
515 		retLeft  = test_tce_range( tbl, tcenum, order-1 );
516 		retRight = test_tce_range( tbl, tcenum+(1<<(order-1)), order-1 );
517 		if ( retLeft || retRight ) {
518 			retval = 2;
519 		}
520 	}
521 
522 	/* Test bits at all levels above this to ensure that all agree */
523 
524 	return retval;
525 }
526 
get_tces(struct TceTable * tbl,unsigned order,void * page,unsigned numPages,int direction)527 inline dma_addr_t get_tces(struct TceTable *tbl, unsigned order,
528 			   void *page, unsigned numPages, int direction)
529 {
530 	long tcenum;
531 	unsigned long uaddr;
532 	unsigned i;
533 	dma_addr_t retTce = NO_TCE;
534 
535 	uaddr = (unsigned long)page & PAGE_MASK;
536 
537 	/* Allocate a range of tces */
538 	tcenum = alloc_tce_range(tbl, order);
539 	if (tcenum != -1) {
540 		/* We got the tces we wanted */
541 		tcenum += tbl->startOffset;	/* Offset into real TCE table */
542 		retTce = tcenum << PAGE_SHIFT;	/* Set the return dma address */
543 		/* Setup a tce for each page */
544 		for (i=0; i<numPages; ++i) {
545 			ppc_md.tce_build(tbl, tcenum, uaddr, direction);
546 			++tcenum;
547 			uaddr += PAGE_SIZE;
548 		}
549 		/* Make sure the update is visible to hardware.
550 		   sync required to synchronize the update to
551 		   the TCE table with the MMIO that will send
552 		   the bus address to the IOA */
553 		__asm__ __volatile__ ("sync" : : : "memory");
554 	} else {
555 		panic("get_tces: TCE allocation failed. 0x%p 0x%x\n",
556 		      tbl, order);
557 	}
558 
559 	return retTce;
560 }
561 
tce_free_one_iSeries(struct TceTable * tbl,long tcenum)562 static void tce_free_one_iSeries( struct TceTable *tbl, long tcenum )
563 {
564 	u64 set_tce_rc;
565 	union Tce tce;
566 	tce.wholeTce = 0;
567 	set_tce_rc = HvCallXm_setTce((u64)tbl->index,
568 				   (u64)tcenum,
569 				   tce.wholeTce);
570 	if ( set_tce_rc )
571 		panic("PCI_DMA: HvCallXm_setTce failed, Rc: 0x%lx\n", set_tce_rc);
572 
573 }
574 
tce_free_one_pSeries(struct TceTable * tbl,long tcenum)575 static void tce_free_one_pSeries( struct TceTable *tbl, long tcenum )
576 {
577 	union Tce tce;
578 	union Tce *tce_addr;
579 
580 	tce.wholeTce = 0;
581 
582 	tce_addr  = ((union Tce *)tbl->base) + tcenum;
583 	*tce_addr = (union Tce)tce.wholeTce;
584 
585 }
586 
tce_free(struct TceTable * tbl,dma_addr_t dma_addr,unsigned order,unsigned num_pages)587 void tce_free(struct TceTable *tbl, dma_addr_t dma_addr,
588 	      unsigned order, unsigned num_pages)
589 {
590 	long tcenum, total_tces, free_tce;
591 	unsigned i;
592 
593 	total_tces = (tbl->size * (PAGE_SIZE / sizeof(union Tce)));
594 
595 	tcenum = dma_addr >> PAGE_SHIFT;
596 	free_tce = tcenum - tbl->startOffset;
597 
598 	if ( ( (free_tce + num_pages) > total_tces ) ||
599 	     ( tcenum < tbl->startOffset ) ) {
600 		printk("tce_free: invalid tcenum\n");
601 		printk("\ttcenum    = 0x%lx\n", tcenum);
602 		printk("\tTCE Table = 0x%lx\n", (u64)tbl);
603 		printk("\tbus#      = 0x%lx\n", (u64)tbl->busNumber );
604 		printk("\tsize      = 0x%lx\n", (u64)tbl->size);
605 		printk("\tstartOff  = 0x%lx\n", (u64)tbl->startOffset );
606 		printk("\tindex     = 0x%lx\n", (u64)tbl->index);
607 		return;
608 	}
609 
610 	for (i=0; i<num_pages; ++i) {
611 		ppc_md.tce_free_one(tbl, tcenum);
612 		++tcenum;
613 	}
614 
615 	/* No sync (to make TCE change visible) is required here.
616 	   The lwsync when acquiring the lock in free_tce_range
617 	   is sufficient to synchronize with the bitmap.
618 	*/
619 
620 	free_tce_range( tbl, free_tce, order );
621 }
622 
create_virtual_bus_tce_table(void)623 void __init create_virtual_bus_tce_table(void)
624 {
625 	struct TceTable *t;
626 	struct TceTableManagerCB virtBusTceTableParms;
627 	u64 absParmsPtr;
628 
629 	virtBusTceTableParms.busNumber = 255;	/* Bus 255 is the virtual bus */
630 	virtBusTceTableParms.virtualBusFlag = 0xff; /* Ask for virtual bus */
631 
632 	absParmsPtr = virt_to_absolute( (u64)&virtBusTceTableParms );
633 	HvCallXm_getTceTableParms( absParmsPtr );
634 
635 	virtBusVethTceTable.size = virtBusTceTableParms.size / 2;
636 	virtBusVethTceTable.busNumber = virtBusTceTableParms.busNumber;
637 	virtBusVethTceTable.startOffset = virtBusTceTableParms.startOffset;
638 	virtBusVethTceTable.index = virtBusTceTableParms.index;
639 	virtBusVethTceTable.tceType = TCE_VB;
640 
641 	virtBusVioTceTable.size = virtBusTceTableParms.size - virtBusVethTceTable.size;
642 	virtBusVioTceTable.busNumber = virtBusTceTableParms.busNumber;
643 	virtBusVioTceTable.startOffset = virtBusTceTableParms.startOffset +
644 			virtBusVethTceTable.size * (PAGE_SIZE/sizeof(union Tce));
645 	virtBusVioTceTable.index = virtBusTceTableParms.index;
646 	virtBusVioTceTable.tceType = TCE_VB;
647 
648 	t = build_tce_table( &virtBusVethTceTable );
649 	if ( t ) {
650 		/* tceTables[255] = t; */
651 		//VirtBusVethTceTable = t;
652 		printk( "Virtual Bus VETH TCE table built successfully.\n");
653 		printk( "  TCE table size = %ld entries\n",
654 				(unsigned long)t->size*(PAGE_SIZE/sizeof(union Tce)) );
655 		printk( "  TCE table token = %d\n",
656 				(unsigned)t->index );
657 		printk( "  TCE table start entry = 0x%lx\n",
658 				(unsigned long)t->startOffset );
659 	}
660 	else printk( "Virtual Bus VETH TCE table failed.\n");
661 
662 	t = build_tce_table( &virtBusVioTceTable );
663 	if ( t ) {
664 		//VirtBusVioTceTable = t;
665 		printk( "Virtual Bus VIO TCE table built successfully.\n");
666 		printk( "  TCE table size = %ld entries\n",
667 				(unsigned long)t->size*(PAGE_SIZE/sizeof(union Tce)) );
668 		printk( "  TCE table token = %d\n",
669 				(unsigned)t->index );
670 		printk( "  TCE table start entry = 0x%lx\n",
671 				(unsigned long)t->startOffset );
672 	}
673 	else printk( "Virtual Bus VIO TCE table failed.\n");
674 }
675 
create_tce_tables_for_buses(struct list_head * bus_list)676 void create_tce_tables_for_buses(struct list_head *bus_list)
677 {
678 	struct pci_controller* phb;
679 	struct device_node *dn, *first_dn;
680 	int num_slots, num_slots_ilog2;
681 	int first_phb = 1;
682 
683 	for (phb=hose_head;phb;phb=phb->next) {
684 		first_dn = ((struct device_node *)phb->arch_data)->child;
685 		/* Carve 2GB into the largest dma_window_size possible */
686 		for (dn = first_dn, num_slots = 0; dn != NULL; dn = dn->sibling)
687 			num_slots++;
688 		num_slots_ilog2 = __ilog2(num_slots);
689 		if ((1<<num_slots_ilog2) != num_slots)
690 			num_slots_ilog2++;
691 		phb->dma_window_size = 1 << (22 - num_slots_ilog2);
692 		/* Reserve 16MB of DMA space on the first PHB.
693 		 * We should probably be more careful and use firmware props.
694 		 * In reality this space is remapped, not lost.  But we don't
695 		 * want to get that smart to handle it -- too much work.
696 		 */
697 		phb->dma_window_base_cur = first_phb ? (1 << 12) : 0;
698 		first_phb = 0;
699 		for (dn = first_dn, num_slots = 0; dn != NULL; dn = dn->sibling) {
700 			create_pci_bus_tce_table((unsigned long)dn);
701 		}
702 	}
703 }
704 
create_tce_tables_for_busesLP(struct list_head * bus_list)705 void create_tce_tables_for_busesLP(struct list_head *bus_list)
706 {
707 	struct list_head *ln;
708 	struct pci_bus *bus;
709 	struct device_node *busdn;
710 	u32 *dma_window;
711 	for (ln=bus_list->next; ln != bus_list; ln=ln->next) {
712 		bus = pci_bus_b(ln);
713 		busdn = PCI_GET_DN(bus);
714 		dma_window = (u32 *)get_property(busdn, "ibm,dma-window", 0);
715 		if (dma_window) {
716 			/* Busno hasn't been copied yet.
717 			 * Do it now because getTceTableParmsPSeriesLP needs it.
718 			 */
719 			busdn->busno = bus->number;
720 			create_pci_bus_tce_table((unsigned long)busdn);
721 		}
722 		/* look for a window on a bridge even if the PHB had one */
723 		create_tce_tables_for_busesLP(&bus->children);
724 	}
725 }
726 
create_tce_tables(void)727 void create_tce_tables(void) {
728 	struct pci_dev *dev;
729 	struct device_node *dn, *mydn;
730 
731 	if (systemcfg->platform == PLATFORM_PSERIES_LPAR) {
732 		create_tce_tables_for_busesLP(&pci_root_buses);
733 	}
734 	else {
735 		create_tce_tables_for_buses(&pci_root_buses);
736 	}
737 	/* Now copy the tce_table ptr from the bus devices down to every
738 	 * pci device_node.  This means get_tce_table() won't need to search
739 	 * up the device tree to find it.
740 	 */
741 	pci_for_each_dev(dev) {
742 		mydn = dn = PCI_GET_DN(dev);
743 		while (dn && dn->tce_table == NULL)
744 			dn = dn->parent;
745 		if (dn) {
746 			mydn->tce_table = dn->tce_table;
747 		}
748 	}
749 }
750 
751 
752 /*
753  * iSeries token = iSeries_device_Node*
754  * pSeries token = pci_controller*
755  *
756  */
create_pci_bus_tce_table(unsigned long token)757 void create_pci_bus_tce_table( unsigned long token ) {
758 	struct TceTable * newTceTable;
759 
760 	PPCDBG(PPCDBG_TCE, "Entering create_pci_bus_tce_table.\n");
761 	PPCDBG(PPCDBG_TCE, "\ttoken = 0x%lx\n", token);
762 
763 	newTceTable = (struct TceTable *)kmalloc( sizeof(struct TceTable), GFP_KERNEL );
764 
765 	/*****************************************************************/
766  	/* For the iSeries machines, the HvTce Table can be one of three */
767  	/* flavors,                                                      */
768  	/* - Single bus TCE table,                                       */
769  	/* - Tce Table Share between buses,                              */
770  	/* - Tce Table per logical slot.                                 */
771 	/*****************************************************************/
772 	if(systemcfg->platform == PLATFORM_ISERIES_LPAR) {
773 
774 		struct iSeries_Device_Node* DevNode = (struct iSeries_Device_Node*)token;
775 		getTceTableParmsiSeries(DevNode,newTceTable);
776 
777 		/* Look for existing TCE table for this device.          */
778 		DevNode->DevTceTable = findHwTceTable(newTceTable );
779 		if( DevNode->DevTceTable == NULL) {
780 			DevNode->DevTceTable = build_tce_table( newTceTable );
781 		}
782 		else {
783 		    /* We're using a shared table, free this new one.    */
784 		    kfree(newTceTable);
785 		}
786 		printk("Pci Device 0x%p TceTable: %p\n",DevNode,DevNode->DevTceTable);
787  		return;
788 	}
789 	/* pSeries Leg */
790 	else {
791 		struct device_node *dn;
792 		struct pci_controller *phb;
793 
794 		dn = (struct device_node *)token;
795 		phb = dn->phb;
796 		if (systemcfg->platform == PLATFORM_PSERIES)
797 			getTceTableParmsPSeries(phb, dn, newTceTable);
798 		else
799 			getTceTableParmsPSeriesLP(phb, dn, newTceTable);
800 
801 		dn->tce_table  = build_tce_table( newTceTable );
802 	}
803 }
804 
805 /***********************************************************************/
806 /* This function compares the known Tce tables to find a TceTable that */
807 /* has already been built for hardware TCEs.                           */
808 /* Search the complete(all devices) for a TCE table assigned.  If the  */
809 /* startOffset, index, and size match, then the TCE for this device has*/
810 /* already been built and it should be shared with this device         */
811 /***********************************************************************/
findHwTceTable(struct TceTable * newTceTable)812 static struct TceTable* findHwTceTable(struct TceTable * newTceTable )
813 {
814 	struct list_head* Device_Node_Ptr    = iSeries_Global_Device_List.next;
815 	/* Cache the compare values. */
816 	u64  startOffset = newTceTable->startOffset;
817 	u64  index       = newTceTable->index;
818 	u64  size        = newTceTable->size;
819 
820 	while(Device_Node_Ptr != &iSeries_Global_Device_List) {
821 		struct iSeries_Device_Node* CmprNode = (struct iSeries_Device_Node*)Device_Node_Ptr;
822 		if( CmprNode->DevTceTable != NULL &&
823 		    CmprNode->DevTceTable->tceType == TCE_PCI) {
824 			if( CmprNode->DevTceTable->startOffset == startOffset &&
825 			    CmprNode->DevTceTable->index       == index       &&
826 			    CmprNode->DevTceTable->size        == size        ) {
827 				printk("PCI TCE table matches 0x%p \n",CmprNode->DevTceTable);
828 				return CmprNode->DevTceTable;
829 			}
830 		}
831 		/* Get next Device Node in List             */
832 		Device_Node_Ptr = Device_Node_Ptr->next;
833 	}
834 	return NULL;
835 }
836 
837 /***********************************************************************/
838 /* Call Hv with the architected data structure to get TCE table info.  */
839 /* info. Put the returned data into the Linux representation of the    */
840 /* TCE table data.                                                     */
841 /* The Hardware Tce table comes in three flavors.                      */
842 /* 1. TCE table shared between Buses.                                  */
843 /* 2. TCE table per Bus.                                               */
844 /* 3. TCE Table per IOA.                                               */
845 /***********************************************************************/
getTceTableParmsiSeries(struct iSeries_Device_Node * DevNode,struct TceTable * newTceTable)846 static void getTceTableParmsiSeries(struct iSeries_Device_Node* DevNode,
847 				    struct TceTable* newTceTable )
848 {
849 	struct TceTableManagerCB* pciBusTceTableParms = (struct TceTableManagerCB*)kmalloc( sizeof(struct TceTableManagerCB), GFP_KERNEL );
850 	if(pciBusTceTableParms == NULL) panic("PCI_DMA: TCE Table Allocation failed.");
851 
852 	memset( (void*)pciBusTceTableParms,0,sizeof(struct TceTableManagerCB) );
853 	pciBusTceTableParms->busNumber      = ISERIES_BUS(DevNode);
854 	pciBusTceTableParms->logicalSlot    = DevNode->LogicalSlot;
855 	pciBusTceTableParms->virtualBusFlag = 0;
856 
857 	HvCallXm_getTceTableParms( REALADDR(pciBusTceTableParms) );
858 
859         /* PciTceTableParms Bus:0x18 Slot:0x04 Start:0x000000 Offset:0x04c000 Size:0x0020 */
860 	printk("PciTceTableParms Bus:0x%02lx Slot:0x%02x Start:0x%06lx Offset:0x%06lx Size:0x%04lx\n",
861 	       pciBusTceTableParms->busNumber,
862 	       pciBusTceTableParms->logicalSlot,
863 	       pciBusTceTableParms->start,
864 	       pciBusTceTableParms->startOffset,
865 	       pciBusTceTableParms->size);
866 
867 	if(pciBusTceTableParms->size == 0) {
868 		printk("PCI_DMA: Possible Structure mismatch, 0x%p\n",pciBusTceTableParms);
869 		panic( "PCI_DMA: pciBusTceTableParms->size is zero, halt here!");
870 	}
871 
872 	newTceTable->size        = pciBusTceTableParms->size;
873 	newTceTable->busNumber   = pciBusTceTableParms->busNumber;
874 	newTceTable->startOffset = pciBusTceTableParms->startOffset;
875 	newTceTable->index       = pciBusTceTableParms->index;
876 	newTceTable->tceType     = TCE_PCI;
877 
878 	kfree(pciBusTceTableParms);
879 }
880 
getTceTableParmsPSeries(struct pci_controller * phb,struct device_node * dn,struct TceTable * newTceTable)881 static void getTceTableParmsPSeries(struct pci_controller *phb,
882 				    struct device_node *dn,
883 				    struct TceTable *newTceTable ) {
884 	phandle node;
885 	unsigned long i;
886 
887 	node = ((struct device_node *)(phb->arch_data))->node;
888 
889 	PPCDBG(PPCDBG_TCEINIT, "getTceTableParms: start\n");
890 	PPCDBG(PPCDBG_TCEINIT, "\tof_tce_table = 0x%lx\n", of_tce_table);
891 	PPCDBG(PPCDBG_TCEINIT, "\tphb          = 0x%lx\n", phb);
892 	PPCDBG(PPCDBG_TCEINIT, "\tdn           = 0x%lx\n", dn);
893 	PPCDBG(PPCDBG_TCEINIT, "\tdn->name     = %s\n", dn->name);
894 	PPCDBG(PPCDBG_TCEINIT, "\tdn->full_name= %s\n", dn->full_name);
895 	PPCDBG(PPCDBG_TCEINIT, "\tnewTceTable  = 0x%lx\n", newTceTable);
896 	PPCDBG(PPCDBG_TCEINIT, "\tdma_window_size = 0x%lx\n", phb->dma_window_size);
897 
898 	i = 0;
899 	while(of_tce_table[i].node) {
900 		PPCDBG(PPCDBG_TCEINIT, "\tof_tce_table[%d].node = 0x%lx\n",
901 		       i, of_tce_table[i].node);
902 		PPCDBG(PPCDBG_TCEINIT, "\tof_tce_table[%d].base = 0x%lx\n",
903 		       i, of_tce_table[i].base);
904 		PPCDBG(PPCDBG_TCEINIT, "\tof_tce_table[%d].size = 0x%lx\n",
905 		       i, of_tce_table[i].size >> PAGE_SHIFT);
906 		PPCDBG(PPCDBG_TCEINIT, "\tphb->arch_data->node = 0x%lx\n",
907 		       node);
908 
909 		if(of_tce_table[i].node == node) {
910 			memset((void *)of_tce_table[i].base,
911 			       0, of_tce_table[i].size);
912 			newTceTable->busNumber = phb->bus->number;
913 
914 			/* Units of tce entries.                        */
915 			newTceTable->startOffset = phb->dma_window_base_cur;
916 
917 			/* Adjust the current table offset to the next  */
918 			/* region.  Measured in TCE entries. Force an   */
919 			/* alignment to the size alloted per IOA. This  */
920 			/* makes it easier to remove the 1st 16MB.      */
921 			phb->dma_window_base_cur += (phb->dma_window_size>>3);
922 			phb->dma_window_base_cur &=
923 				~((phb->dma_window_size>>3)-1);
924 
925 			/* Set the tce table size - measured in units   */
926 			/* of pages of tce table.                       */
927 			newTceTable->size = ((phb->dma_window_base_cur -
928 					      newTceTable->startOffset) << 3)
929 					      >> PAGE_SHIFT;
930 
931 			/* Test if we are going over 2GB of DMA space.  */
932 			if(phb->dma_window_base_cur > (1 << 19)) {
933 				panic("PCI_DMA: Unexpected number of IOAs under this PHB.\n");
934 			}
935 
936 			newTceTable->base = of_tce_table[i].base;
937 			newTceTable->index = 0;
938 
939 			PPCDBG(PPCDBG_TCEINIT,
940 			       "\tnewTceTable->base        = 0x%lx\n",
941 			       newTceTable->base);
942 			PPCDBG(PPCDBG_TCEINIT,
943 			       "\tnewTceTable->startOffset = 0x%lx"
944 			       "(# tce entries)\n",
945 			       newTceTable->startOffset);
946 			PPCDBG(PPCDBG_TCEINIT,
947 			       "\tnewTceTable->size        = 0x%lx"
948 			       "(# pages of tce table)\n",
949 			       newTceTable->size);
950 		}
951 		i++;
952 	}
953 }
954 
955 /*
956  * getTceTableParmsPSeriesLP
957  *
958  * Function: On pSeries LPAR systems, return TCE table info, given a pci bus.
959  *
960  * ToDo: properly interpret the ibm,dma-window property.  The definition is:
961  *	logical-bus-number	(1 word)
962  *	phys-address		(#address-cells words)
963  *	size			(#cell-size words)
964  *
965  * Currently we hard code these sizes (more or less).
966  */
getTceTableParmsPSeriesLP(struct pci_controller * phb,struct device_node * dn,struct TceTable * newTceTable)967 static void getTceTableParmsPSeriesLP(struct pci_controller *phb,
968 				    struct device_node *dn,
969 				    struct TceTable *newTceTable ) {
970 	u32 *dma_window = (u32 *)get_property(dn, "ibm,dma-window", 0);
971 	if (!dma_window) {
972 		panic("PCI_DMA: getTceTableParmsPSeriesLP: device %s has no ibm,dma-window property!\n", dn->full_name);
973 	}
974 
975 	newTceTable->busNumber = dn->busno;
976 	newTceTable->size = (((((unsigned long)dma_window[4] << 32) | (unsigned long)dma_window[5]) >> PAGE_SHIFT) << 3) >> PAGE_SHIFT;
977 	newTceTable->startOffset = ((((unsigned long)dma_window[2] << 32) | (unsigned long)dma_window[3]) >> 12);
978 	newTceTable->base = 0;
979 	newTceTable->index = dma_window[0];
980 	PPCDBG(PPCDBG_TCEINIT, "getTceTableParmsPSeriesLP for bus 0x%lx:\n", dn->busno);
981 	PPCDBG(PPCDBG_TCEINIT, "\tDevice = %s\n", dn->full_name);
982 	PPCDBG(PPCDBG_TCEINIT, "\tnewTceTable->index       = 0x%lx\n", newTceTable->index);
983 	PPCDBG(PPCDBG_TCEINIT, "\tnewTceTable->startOffset = 0x%lx\n", newTceTable->startOffset);
984 	PPCDBG(PPCDBG_TCEINIT, "\tnewTceTable->size        = 0x%lx\n", newTceTable->size);
985 }
986 
987 /* Allocates a contiguous real buffer and creates TCEs over it.
988  * Returns the virtual address of the buffer and sets dma_handle
989  * to the dma address (tce) of the first page.
990  */
pci_alloc_consistent(struct pci_dev * hwdev,size_t size,dma_addr_t * dma_handle)991 void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size,
992 			   dma_addr_t *dma_handle)
993 {
994 	struct TceTable * tbl;
995 	void *ret = NULL;
996 	unsigned order, nPages;
997 	dma_addr_t tce;
998 
999 	PPCDBG(PPCDBG_TCE, "pci_alloc_consistent:\n");
1000 	PPCDBG(PPCDBG_TCE, "\thwdev      = 0x%16.16lx\n", hwdev);
1001 	PPCDBG(PPCDBG_TCE, "\tsize       = 0x%16.16lx\n", size);
1002 	PPCDBG(PPCDBG_TCE, "\tdma_handle = 0x%16.16lx\n", dma_handle);
1003 
1004 	size = PAGE_ALIGN(size);
1005 	order = get_order(size);
1006 	nPages = 1 << order;
1007 
1008  	/* Client asked for way to much space.  This is checked later anyway */
1009 	/* It is easier to debug here for the drivers than in the tce tables.*/
1010  	if (order >= NUM_TCE_LEVELS) {
1011  		printk("PCI_DMA: pci_alloc_consistent size too large: 0x%lx\n",
1012 		       size);
1013  		return (void *)NULL;
1014  	}
1015 
1016 	tbl = get_tce_table(hwdev);
1017 
1018 	if (tbl) {
1019 		/* Alloc enough pages (and possibly more) */
1020 		ret = (void *)__get_free_pages( GFP_ATOMIC, order );
1021 		if (ret) {
1022 			/* Page allocation succeeded */
1023 			memset(ret, 0, nPages << PAGE_SHIFT);
1024 			/* Set up tces to cover the allocated range */
1025 			tce = get_tces( tbl, order, ret, nPages, PCI_DMA_BIDIRECTIONAL );
1026 			if (tce == NO_TCE) {
1027 				free_pages( (unsigned long)ret, order );
1028 				ret = NULL;
1029 			} else {
1030 				*dma_handle = tce;
1031 			}
1032 		} else {
1033 			printk("pci_alloc_consistent: __get_free_pages failed for order = %d\n", order);
1034 		}
1035 	} else {
1036 		panic("pci_alloc_consistent: unable to find TCE table\n");
1037 	}
1038 
1039 	PPCDBG(PPCDBG_TCE, "\tpci_alloc_consistent: dma_handle = 0x%16.16lx\n", *dma_handle);
1040 	PPCDBG(PPCDBG_TCE, "\tpci_alloc_consistent: return     = 0x%16.16lx\n", ret);
1041 	return ret;
1042 }
1043 
pci_free_consistent(struct pci_dev * hwdev,size_t size,void * vaddr,dma_addr_t dma_handle)1044 void pci_free_consistent(struct pci_dev *hwdev, size_t size,
1045 			 void *vaddr, dma_addr_t dma_handle)
1046 {
1047 	struct TceTable * tbl;
1048 	unsigned order, nPages;
1049 
1050 	PPCDBG(PPCDBG_TCE, "pci_free_consistent:\n");
1051 	PPCDBG(PPCDBG_TCE, "\thwdev = 0x%16.16lx, size = 0x%16.16lx, dma_handle = 0x%16.16lx, vaddr = 0x%16.16lx\n", hwdev, size, dma_handle, vaddr);
1052 
1053 	size = PAGE_ALIGN(size);
1054 	order = get_order(size);
1055 	nPages = 1 << order;
1056 
1057  	/* Client asked for way to much space.  This is checked later anyway */
1058 	/* It is easier to debug here for the drivers than in the tce tables.*/
1059  	if(order >= NUM_TCE_LEVELS) {
1060  		printk("PCI_DMA: pci_free_consistent size too large: 0x%lx \n",size);
1061  		return;
1062  	}
1063 
1064 	tbl = get_tce_table(hwdev);
1065 
1066 	if ( tbl ) {
1067 		tce_free(tbl, dma_handle, order, nPages);
1068 		free_pages( (unsigned long)vaddr, order );
1069 	}
1070 }
1071 
1072 /* Creates TCEs for a user provided buffer.  The user buffer must be
1073  * contiguous real kernel storage (not vmalloc).  The address of the buffer
1074  * passed here is the kernel (virtual) address of the buffer.  The buffer
1075  * need not be page aligned, the dma_addr_t returned will point to the same
1076  * byte within the page as vaddr.
1077  */
pci_map_single(struct pci_dev * hwdev,void * vaddr,size_t size,int direction)1078 dma_addr_t pci_map_single(struct pci_dev *hwdev, void *vaddr,
1079 			  size_t size, int direction )
1080 {
1081 	struct TceTable * tbl;
1082 	dma_addr_t dma_handle = NO_TCE;
1083 	unsigned long uaddr;
1084 	unsigned order, nPages;
1085 
1086 	PPCDBG(PPCDBG_TCE, "pci_map_single:\n");
1087 	PPCDBG(PPCDBG_TCE, "\thwdev = 0x%16.16lx, size = 0x%16.16lx, direction = 0x%16.16lx, vaddr = 0x%16.16lx\n", hwdev, size, direction, vaddr);
1088 	if (direction == PCI_DMA_NONE)
1089 		BUG();
1090 
1091 	uaddr = (unsigned long)vaddr;
1092 	nPages = PAGE_ALIGN( uaddr + size ) - ( uaddr & PAGE_MASK );
1093 	order = get_order( nPages & PAGE_MASK );
1094 	nPages >>= PAGE_SHIFT;
1095 
1096  	/* Client asked for way to much space.  This is checked later anyway */
1097 	/* It is easier to debug here for the drivers than in the tce tables.*/
1098  	if(order >= NUM_TCE_LEVELS) {
1099 		panic("PCI_DMA: pci_map_single size too large: 0x%lx \n", size);
1100 		return dma_handle;
1101  	}
1102 
1103 	tbl = get_tce_table(hwdev);
1104 
1105 	if (tbl) {
1106 		/* get_tces panics if there are no entries available */
1107 		dma_handle = get_tces( tbl, order, vaddr, nPages, direction );
1108 		dma_handle |= ( uaddr & ~PAGE_MASK );
1109 	} else {
1110 		panic("PCI_DMA: Unable to find TCE table.\n");
1111 	}
1112 
1113 	return dma_handle;
1114 }
1115 
pci_unmap_single(struct pci_dev * hwdev,dma_addr_t dma_handle,size_t size,int direction)1116 void pci_unmap_single( struct pci_dev *hwdev, dma_addr_t dma_handle, size_t size, int direction )
1117 {
1118 	struct TceTable * tbl;
1119 	unsigned order, nPages;
1120 
1121 	PPCDBG(PPCDBG_TCE, "pci_unmap_single:\n");
1122 	PPCDBG(PPCDBG_TCE, "\thwdev = 0x%16.16lx, size = 0x%16.16lx, direction = 0x%16.16lx, dma_handle = 0x%16.16lx\n", hwdev, size, direction, dma_handle);
1123 	if ( direction == PCI_DMA_NONE )
1124 		BUG();
1125 
1126 	nPages = PAGE_ALIGN( dma_handle + size ) - ( dma_handle & PAGE_MASK );
1127 	order = get_order( nPages & PAGE_MASK );
1128 	nPages >>= PAGE_SHIFT;
1129 
1130  	/* Client asked for way to much space.  This is checked later anyway */
1131 	/* It is easier to debug here for the drivers than in the tce tables.*/
1132  	if(order >= NUM_TCE_LEVELS) {
1133  		printk("PCI_DMA: pci_unmap_single size too large: 0x%lx \n",size);
1134  		return;
1135  	}
1136 
1137 	tbl = get_tce_table(hwdev);
1138 
1139 	if ( tbl )
1140 		tce_free(tbl, dma_handle, order, nPages);
1141 
1142 }
1143 
1144 /* Figure out how many TCEs are actually going to be required
1145  * to map this scatterlist.  This code is not optimal.  It
1146  * takes into account the case where entry n ends in the same
1147  * page in which entry n+1 starts.  It does not handle the
1148  * general case of entry n ending in the same page in which
1149  * entry m starts.
1150  */
num_tces_sg(struct scatterlist * sg,int nents)1151 static unsigned long num_tces_sg( struct scatterlist *sg, int nents )
1152 {
1153 	unsigned long nTces, numPages, startPage, endPage, prevEndPage;
1154 	unsigned i;
1155 	void *address;
1156 
1157 	prevEndPage = 0;
1158 	nTces = 0;
1159 
1160 	for (i=0; i<nents; ++i) {
1161 		/* Compute the starting page number and
1162 		 * the ending page number for this entry
1163 		 */
1164 		address = sg->address ? sg->address :
1165 			(page_address(sg->page) + sg->offset);
1166 		startPage = (unsigned long)address >> PAGE_SHIFT;
1167 		endPage = ((unsigned long)address + sg->length - 1) >> PAGE_SHIFT;
1168 		numPages = endPage - startPage + 1;
1169 		/* Simple optimization: if the previous entry ended
1170 		 * in the same page in which this entry starts
1171 		 * then we can reduce the required pages by one.
1172 		 * This matches assumptions in fill_scatterlist_sg and
1173 		 * create_tces_sg
1174 		 */
1175 		if ( startPage == prevEndPage )
1176 			--numPages;
1177 		nTces += numPages;
1178 		prevEndPage = endPage;
1179 		sg++;
1180 	}
1181 	return nTces;
1182 }
1183 
1184 /* Fill in the dma data in the scatterlist
1185  * return the number of dma sg entries created
1186  */
fill_scatterlist_sg(struct scatterlist * sg,int nents,dma_addr_t dma_addr,unsigned long numTces)1187 static unsigned fill_scatterlist_sg( struct scatterlist *sg, int nents,
1188 				 dma_addr_t dma_addr , unsigned long numTces)
1189 {
1190 	struct scatterlist *dma_sg;
1191 	u32 cur_start_dma;
1192 	unsigned long cur_len_dma, cur_end_virt, uaddr;
1193 	unsigned num_dma_ents;
1194 	void *address;
1195 
1196 	dma_sg = sg;
1197 	num_dma_ents = 1;
1198 
1199 	/* Process the first sg entry */
1200 	address = sg->address ? sg->address :
1201 		(page_address(sg->page) + sg->offset);
1202 	cur_start_dma = dma_addr + ((unsigned long)address & (~PAGE_MASK));
1203 	cur_len_dma = sg->length;
1204 	/* cur_end_virt holds the address of the byte immediately after the
1205 	 * end of the current buffer.
1206 	 */
1207 	cur_end_virt = (unsigned long)address + cur_len_dma;
1208 	/* Later code assumes that unused sg->dma_address and sg->dma_length
1209 	 * fields will be zero.  Other archs seem to assume that the user
1210 	 * (device driver) guarantees that...I don't want to depend on that
1211 	 */
1212 	sg->dma_address = sg->dma_length = 0;
1213 
1214 	/* Process the rest of the sg entries */
1215 	while (--nents) {
1216 		++sg;
1217 		/* Clear possibly unused fields. Note: sg >= dma_sg so
1218 		 * this can't be clearing a field we've already set
1219 		 */
1220 		sg->dma_address = sg->dma_length = 0;
1221 
1222 		/* Check if it is possible to make this next entry
1223 		 * contiguous (in dma space) with the previous entry.
1224 		 */
1225 
1226 		/* The entries can be contiguous in dma space if
1227 		 * the previous entry ends immediately before the
1228 		 * start of the current entry (in virtual space)
1229 		 * or if the previous entry ends at a page boundary
1230 		 * and the current entry starts at a page boundary.
1231 		 */
1232 		address = sg->address ? sg->address :
1233 			(page_address(sg->page) + sg->offset);
1234 		uaddr = (unsigned long)address;
1235 		if ( ( uaddr != cur_end_virt ) &&
1236 		     ( ( ( uaddr | cur_end_virt ) & (~PAGE_MASK) ) ||
1237 		       ( ( uaddr & PAGE_MASK ) == ( ( cur_end_virt-1 ) & PAGE_MASK ) ) ) ) {
1238 			/* This entry can not be contiguous in dma space.
1239 			 * save the previous dma entry and start a new one
1240 			 */
1241 			dma_sg->dma_address = cur_start_dma;
1242 			dma_sg->dma_length  = cur_len_dma;
1243 
1244 			++dma_sg;
1245 			++num_dma_ents;
1246 
1247 			cur_start_dma += cur_len_dma-1;
1248 			/* If the previous entry ends and this entry starts
1249 			 * in the same page then they share a tce.  In that
1250 			 * case don't bump cur_start_dma to the next page
1251 			 * in dma space.  This matches assumptions made in
1252 			 * num_tces_sg and create_tces_sg.
1253 			 */
1254 			if ((uaddr & PAGE_MASK) == ((cur_end_virt-1) & PAGE_MASK))
1255 				cur_start_dma &= PAGE_MASK;
1256 			else
1257 				cur_start_dma = PAGE_ALIGN(cur_start_dma+1);
1258 			cur_start_dma += ( uaddr & (~PAGE_MASK) );
1259 			cur_len_dma = 0;
1260 		}
1261 		/* Accumulate the length of this entry for the next
1262 		 * dma entry
1263 		 */
1264 		cur_len_dma += sg->length;
1265 		cur_end_virt = uaddr + sg->length;
1266 	}
1267 	/* Fill in the last dma entry */
1268 	dma_sg->dma_address = cur_start_dma;
1269 	dma_sg->dma_length  = cur_len_dma;
1270 
1271 	if ((((cur_start_dma +cur_len_dma - 1)>> PAGE_SHIFT) - (dma_addr >> PAGE_SHIFT) + 1) != numTces)
1272 	  {
1273 	    PPCDBG(PPCDBG_TCE, "fill_scatterlist_sg: numTces %ld, used tces %d\n",
1274 		   numTces,
1275 		   (unsigned)(((cur_start_dma + cur_len_dma - 1) >> PAGE_SHIFT) - (dma_addr >> PAGE_SHIFT) + 1));
1276 	  }
1277 
1278 
1279 	return num_dma_ents;
1280 }
1281 
1282 /* Call the hypervisor to create the TCE entries.
1283  * return the number of TCEs created
1284  */
create_tces_sg(struct TceTable * tbl,struct scatterlist * sg,int nents,unsigned numTces,int direction)1285 static dma_addr_t create_tces_sg(struct TceTable *tbl, struct scatterlist *sg,
1286 				 int nents, unsigned numTces, int direction)
1287 {
1288 	unsigned order, i, j;
1289 	unsigned long startPage, endPage, prevEndPage, numPages, uaddr;
1290 	long tcenum, starttcenum;
1291 	dma_addr_t dmaAddr;
1292 	void *address;
1293 
1294 	dmaAddr = NO_TCE;
1295 
1296 	order = get_order( numTces << PAGE_SHIFT );
1297  	/* Client asked for way to much space.  This is checked later anyway */
1298 	/* It is easier to debug here for the drivers than in the tce tables.*/
1299  	if(order >= NUM_TCE_LEVELS) {
1300 		printk("PCI_DMA: create_tces_sg size too large: 0x%llx \n",(numTces << PAGE_SHIFT));
1301 		panic("numTces is off");
1302  		return NO_TCE;
1303  	}
1304 
1305 	/* allocate a block of tces */
1306 	tcenum = alloc_tce_range(tbl, order);
1307 	if (tcenum != -1) {
1308 		tcenum += tbl->startOffset;
1309 		starttcenum = tcenum;
1310 		dmaAddr = tcenum << PAGE_SHIFT;
1311 		prevEndPage = 0;
1312 		for (j=0; j<nents; ++j) {
1313 			address = sg->address ? sg->address :
1314 				(page_address(sg->page) + sg->offset);
1315 			startPage = (unsigned long)address >> PAGE_SHIFT;
1316 			endPage = ((unsigned long)address + sg->length - 1) >> PAGE_SHIFT;
1317 			numPages = endPage - startPage + 1;
1318 
1319 			uaddr = (unsigned long)address;
1320 
1321 			/* If the previous entry ended in the same page that
1322 			 * the current page starts then they share that
1323 			 * tce and we reduce the number of tces we need
1324 			 * by one.  This matches assumptions made in
1325 			 * num_tces_sg and fill_scatterlist_sg
1326 			 */
1327 			if ( startPage == prevEndPage ) {
1328 				--numPages;
1329 				uaddr += PAGE_SIZE;
1330 			}
1331 
1332 			for (i=0; i<numPages; ++i) {
1333 			  ppc_md.tce_build(tbl, tcenum, uaddr, direction);
1334 			  ++tcenum;
1335 			  uaddr += PAGE_SIZE;
1336 			}
1337 
1338 			prevEndPage = endPage;
1339 			sg++;
1340 		}
1341 		/* Make sure the update is visible to hardware.
1342 		   sync required to synchronize the update to
1343 		   the TCE table with the MMIO that will send
1344 		   the bus address to the IOA */
1345 		__asm__ __volatile__ ("sync" : : : "memory");
1346 
1347 		if ((tcenum - starttcenum) != numTces)
1348 	    		PPCDBG(PPCDBG_TCE, "create_tces_sg: numTces %d, tces used %d\n",
1349 		   		numTces, (unsigned)(tcenum - starttcenum));
1350 
1351 	} else {
1352 		panic("PCI_DMA: TCE allocation failure in create_tces_sg. 0x%p 0x%x\n",
1353 		      tbl, order);
1354 	}
1355 
1356 	return dmaAddr;
1357 }
1358 
pci_map_sg(struct pci_dev * hwdev,struct scatterlist * sg,int nents,int direction)1359 int pci_map_sg( struct pci_dev *hwdev, struct scatterlist *sg, int nents, int direction )
1360 {
1361 	struct TceTable * tbl;
1362 	unsigned numTces;
1363 	int num_dma = 0;
1364 	dma_addr_t dma_handle;
1365 	void *address;
1366 
1367 	/* Fast path for a single entry scatterlist */
1368 	if ( nents == 1 ) {
1369  		address = sg->address ? sg->address :
1370  			(page_address(sg->page) + sg->offset);
1371  		sg->dma_address = pci_map_single( hwdev, address,
1372  						  sg->length, direction );
1373 		sg->dma_length = sg->length;
1374 		return 1;
1375 	}
1376 
1377 	if (direction == PCI_DMA_NONE)
1378 		BUG();
1379 
1380 	tbl = get_tce_table(hwdev);
1381 
1382 	if (tbl) {
1383 		/* Compute the number of tces required */
1384 		numTces = num_tces_sg(sg, nents);
1385 		/* Create the tces and get the dma address */
1386 		dma_handle = create_tces_sg( tbl, sg, nents, numTces, direction );
1387 
1388 		if(dma_handle == NO_TCE) return 0;
1389 
1390 		/* Fill in the dma scatterlist */
1391 		num_dma = fill_scatterlist_sg( sg, nents, dma_handle, numTces );
1392 	} else {
1393 		panic("pci_map_sg: unable to find TCE table\n");
1394 	}
1395 
1396 	return num_dma;
1397 }
1398 
pci_unmap_sg(struct pci_dev * hwdev,struct scatterlist * sg,int nelms,int direction)1399 void pci_unmap_sg( struct pci_dev *hwdev, struct scatterlist *sg, int nelms, int direction )
1400 {
1401 	struct TceTable * tbl;
1402 	unsigned order, numTces, i;
1403 	dma_addr_t dma_end_page, dma_start_page;
1404 
1405 	PPCDBG(PPCDBG_TCE, "pci_unmap_sg:\n");
1406 	PPCDBG(PPCDBG_TCE, "\thwdev = 0x%16.16lx, sg = 0x%16.16lx, direction = 0x%16.16lx, nelms = 0x%16.16lx\n", hwdev, sg, direction, nelms);
1407 
1408 	if ( direction == PCI_DMA_NONE || nelms == 0 )
1409 		BUG();
1410 
1411 	dma_start_page = sg->dma_address & PAGE_MASK;
1412  	dma_end_page   = 0;
1413 	for ( i=nelms; i>0; --i ) {
1414 		unsigned k = i - 1;
1415 		if ( sg[k].dma_length ) {
1416 			dma_end_page = ( sg[k].dma_address +
1417 					 sg[k].dma_length - 1 ) & PAGE_MASK;
1418 			break;
1419 		}
1420 	}
1421 
1422 	numTces = ((dma_end_page - dma_start_page ) >> PAGE_SHIFT) + 1;
1423 	order = get_order( numTces << PAGE_SHIFT );
1424 
1425  	/* Client asked for way to much space.  This is checked later anyway */
1426 	/* It is easier to debug here for the drivers than in the tce tables.*/
1427  	if(order >= NUM_TCE_LEVELS) {
1428 		printk("PCI_DMA: dma_start_page:0x%lx  dma_end_page:0x%lx\n",dma_start_page,dma_end_page);
1429 		printk("PCI_DMA: pci_unmap_sg size too large: 0x%x \n",(numTces << PAGE_SHIFT));
1430  		return;
1431  	}
1432 
1433 	tbl = get_tce_table(hwdev);
1434 
1435 	if ( tbl )
1436 		tce_free( tbl, dma_start_page, order, numTces );
1437 
1438 }
1439 
1440 /*
1441  * phb_tce_table_init
1442  *
1443  * Function: Display TCE config registers.  Could be easily changed
1444  *           to initialize the hardware to use TCEs.
1445  */
phb_tce_table_init(struct pci_controller * phb)1446 unsigned long phb_tce_table_init(struct pci_controller *phb) {
1447 	unsigned int r, cfg_rw, i;
1448 	unsigned long r64;
1449 	phandle node;
1450 
1451 	PPCDBG(PPCDBG_TCE, "phb_tce_table_init: start.\n");
1452 
1453 	node = ((struct device_node *)(phb->arch_data))->node;
1454 
1455 	PPCDBG(PPCDBG_TCEINIT, "\tphb            = 0x%lx\n", phb);
1456 	PPCDBG(PPCDBG_TCEINIT, "\tphb->type      = 0x%lx\n", phb->type);
1457 	PPCDBG(PPCDBG_TCEINIT, "\tphb->phb_regs  = 0x%lx\n", phb->phb_regs);
1458 	PPCDBG(PPCDBG_TCEINIT, "\tphb->chip_regs = 0x%lx\n", phb->chip_regs);
1459 	PPCDBG(PPCDBG_TCEINIT, "\tphb: node      = 0x%lx\n", node);
1460 	PPCDBG(PPCDBG_TCEINIT, "\tphb->arch_data = 0x%lx\n", phb->arch_data);
1461 
1462 	i = 0;
1463 	while(of_tce_table[i].node) {
1464 		if(of_tce_table[i].node == node) {
1465 			if(phb->type == phb_type_python) {
1466 				r = *(((unsigned int *)phb->phb_regs) + (0xf10>>2));
1467 				PPCDBG(PPCDBG_TCEINIT, "\tTAR(low)    = 0x%x\n", r);
1468 				r = *(((unsigned int *)phb->phb_regs) + (0xf00>>2));
1469 				PPCDBG(PPCDBG_TCEINIT, "\tTAR(high)   = 0x%x\n", r);
1470 				r = *(((unsigned int *)phb->phb_regs) + (0xfd0>>2));
1471 				PPCDBG(PPCDBG_TCEINIT, "\tPHB cfg(rw) = 0x%x\n", r);
1472 				break;
1473 			} else if(phb->type == phb_type_speedwagon) {
1474 				r64 = *(((unsigned long *)phb->chip_regs) +
1475 					(0x800>>3));
1476 				PPCDBG(PPCDBG_TCEINIT, "\tNCFG    = 0x%lx\n", r64);
1477 				r64 = *(((unsigned long *)phb->chip_regs) +
1478 					(0x580>>3));
1479 				PPCDBG(PPCDBG_TCEINIT, "\tTAR0    = 0x%lx\n", r64);
1480 				r64 = *(((unsigned long *)phb->chip_regs) +
1481 					(0x588>>3));
1482 				PPCDBG(PPCDBG_TCEINIT, "\tTAR1    = 0x%lx\n", r64);
1483 				r64 = *(((unsigned long *)phb->chip_regs) +
1484 					(0x590>>3));
1485 				PPCDBG(PPCDBG_TCEINIT, "\tTAR2    = 0x%lx\n", r64);
1486 				r64 = *(((unsigned long *)phb->chip_regs) +
1487 					(0x598>>3));
1488 				PPCDBG(PPCDBG_TCEINIT, "\tTAR3    = 0x%lx\n", r64);
1489 				cfg_rw = *(((unsigned int *)phb->chip_regs) +
1490 					   ((0x160 +
1491 					     (((phb->local_number)+8)<<12))>>2));
1492 				PPCDBG(PPCDBG_TCEINIT, "\tcfg_rw = 0x%x\n", cfg_rw);
1493 			}
1494 		}
1495 		i++;
1496 	}
1497 
1498 	PPCDBG(PPCDBG_TCEINIT, "phb_tce_table_init: done\n");
1499 
1500 	return(0);
1501 }
1502 
1503 /* These are called very early. */
tce_init_pSeries(void)1504 void tce_init_pSeries(void)
1505 {
1506 	ppc_md.tce_build = tce_build_pSeries;
1507 	ppc_md.tce_free_one = tce_free_one_pSeries;
1508 }
1509 
tce_init_iSeries(void)1510 void tce_init_iSeries(void)
1511 {
1512 	ppc_md.tce_build = tce_build_iSeries;
1513 	ppc_md.tce_free_one = tce_free_one_iSeries;
1514 }
1515