1 /*
2  *
3  *
4  * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms of version 2 of the GNU General Public License
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it would be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
13  *
14  * Further, this software is distributed without any warranty that it is
15  * free of the rightful claim of any third person regarding infringement
16  * or the like.  Any license provided herein, whether implied or
17  * otherwise, applies only to this software file.  Patent licenses, if
18  * any, provided herein do not apply to combinations of this program with
19  * other software, or any other product whatsoever.
20  *
21  * You should have received a copy of the GNU General Public
22  * License along with this program; if not, write the Free Software
23  * Foundation, Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
24  *
25  * Contact information:  Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
26  * Mountain View, CA  94043, or:
27  *
28  * http://www.sgi.com
29  *
30  * For further information regarding this notice, see:
31  *
32  * http://oss.sgi.com/projects/GenInfo/NoticeExplan
33  */
34 
35 #include <linux/config.h>
36 #include <asm/sn/nodepda.h>
37 #include <asm/sn/addrs.h>
38 #include <asm/sn/arch.h>
39 #include <asm/sn/sn_cpuid.h>
40 #include <asm/sn/pda.h>
41 #include <asm/sn/sn2/shubio.h>
42 #include <asm/nodedata.h>
43 
44 #include <linux/bootmem.h>
45 #include <linux/string.h>
46 #include <linux/sched.h>
47 
48 #include <asm/sn/bte.h>
49 
50 
51 /*
52  * The base address of for each set of bte registers.
53  */
54 static int bte_offsets[] = { IIO_IBLS0, IIO_IBLS1 };
55 
56 
57 /************************************************************************
58  * Block Transfer Engine copy related functions.
59  *
60  ***********************************************************************/
61 
62 
63 /*
64  * bte_copy(src, dest, len, mode, notification)
65  *
66  * Use the block transfer engine to move kernel memory from src to dest
67  * using the assigned mode.
68  *
69  * Paramaters:
70  *   src - physical address of the transfer source.
71  *   dest - physical address of the transfer destination.
72  *   len - number of bytes to transfer from source to dest.
73  *   mode - hardware defined.  See reference information
74  *          for IBCT0/1 in the SHUB Programmers Reference
75  *   notification - kernel virtual address of the notification cache
76  *                  line.  If NULL, the default is used and
77  *                  the bte_copy is synchronous.
78  *
79  * NOTE:  This function requires src, dest, and len to
80  * be cacheline aligned.
81  */
82 bte_result_t
bte_copy(u64 src,u64 dest,u64 len,u64 mode,void * notification)83 bte_copy(u64 src, u64 dest, u64 len, u64 mode, void *notification)
84 {
85 	int bte_to_use;
86 	u64 transfer_size;
87 	struct bteinfo_s *bte;
88 	bte_result_t bte_status;
89 	unsigned long irq_flags;
90 
91 
92 	BTE_PRINTK(("bte_copy(0x%lx, 0x%lx, 0x%lx, 0x%lx, 0x%p)\n",
93 		    src, dest, len, mode, notification));
94 
95 	if (len == 0) {
96 		return BTE_SUCCESS;
97 	}
98 
99 	ASSERT(!((len & L1_CACHE_MASK) ||
100 		 (src & L1_CACHE_MASK) || (dest & L1_CACHE_MASK)));
101 	ASSERT(len < ((BTE_LEN_MASK + 1) << L1_CACHE_SHIFT));
102 
103 	do {
104 		local_irq_save(irq_flags);
105 
106 		bte_to_use = 0;
107 		/* Attempt to lock one of the BTE interfaces. */
108 		while ((bte_to_use < BTES_PER_NODE) &&
109 		       BTE_LOCK_IF_AVAIL(bte_to_use)) {
110 			bte_to_use++;
111 		}
112 
113 		if (bte_to_use < BTES_PER_NODE) {
114 			break;
115 		}
116 
117 		local_irq_restore(irq_flags);
118 
119 		if (!(mode & BTE_WACQUIRE)) {
120 			return BTEFAIL_NOTAVAIL;
121 		}
122 
123 		/* Wait until a bte is available. */
124 		udelay(10);
125 	} while (1);
126 
127 	bte = pda.cpu_bte_if[bte_to_use];
128 	BTE_PRINTKV(("Got a lock on bte %d\n", bte_to_use));
129 
130 
131 	if (notification == NULL) {
132 		/* User does not want to be notified. */
133 		bte->most_rcnt_na = &bte->notify;
134 	} else {
135 		bte->most_rcnt_na = notification;
136 	}
137 
138 	/* Calculate the number of cache lines to transfer. */
139 	transfer_size = ((len >> L1_CACHE_SHIFT) & BTE_LEN_MASK);
140 
141 	/* Initialize the notification to a known value. */
142 	*bte->most_rcnt_na = -1L;
143 
144 	/* Set the status reg busy bit and transfer length */
145 	BTE_PRINTKV(("IBLS - HUB_S(0x%p, 0x%lx)\n",
146 		     BTEREG_LNSTAT_ADDR, IBLS_BUSY | transfer_size));
147 	HUB_S(BTEREG_LNSTAT_ADDR, (IBLS_BUSY | transfer_size));
148 
149 	/* Set the source and destination registers */
150 	BTE_PRINTKV(("IBSA - HUB_S(0x%p, 0x%lx)\n", BTEREG_SRC_ADDR,
151 		     (TO_PHYS(src))));
152 	HUB_S(BTEREG_SRC_ADDR, (TO_PHYS(src)));
153 	BTE_PRINTKV(("IBDA - HUB_S(0x%p, 0x%lx)\n", BTEREG_DEST_ADDR,
154 		     (TO_PHYS(dest))));
155 	HUB_S(BTEREG_DEST_ADDR, (TO_PHYS(dest)));
156 
157 	/* Set the notification register */
158 	BTE_PRINTKV(("IBNA - HUB_S(0x%p, 0x%lx)\n", BTEREG_NOTIF_ADDR,
159 		     (TO_PHYS(ia64_tpa(bte->most_rcnt_na)))));
160 	HUB_S(BTEREG_NOTIF_ADDR, (TO_PHYS(ia64_tpa(bte->most_rcnt_na))));
161 
162 
163 	/* Initiate the transfer */
164 	BTE_PRINTK(("IBCT - HUB_S(0x%p, 0x%lx)\n", BTEREG_CTRL_ADDR,
165 		     BTE_VALID_MODE(mode)));
166 	HUB_S(BTEREG_CTRL_ADDR, BTE_VALID_MODE(mode));
167 
168 	spin_unlock_irqrestore(&bte->spinlock, irq_flags);
169 
170 
171 	if (notification != NULL) {
172 		return BTE_SUCCESS;
173 	}
174 
175 	while (*bte->most_rcnt_na == -1UL) {
176 	}
177 
178 
179 	BTE_PRINTKV((" Delay Done.  IBLS = 0x%lx, most_rcnt_na = 0x%lx\n",
180 				HUB_L(BTEREG_LNSTAT_ADDR), *bte->most_rcnt_na));
181 
182 	if (*bte->most_rcnt_na & IBLS_ERROR) {
183 		bte_status = *bte->most_rcnt_na & ~IBLS_ERROR;
184 		*bte->most_rcnt_na = 0L;
185 	} else {
186 		bte_status = BTE_SUCCESS;
187 	}
188 	BTE_PRINTK(("Returning status is 0x%lx and most_rcnt_na is 0x%lx\n",
189 				HUB_L(BTEREG_LNSTAT_ADDR), *bte->most_rcnt_na));
190 
191 	return bte_status;
192 }
193 
194 
195 /*
196  * bte_unaligned_copy(src, dest, len, mode)
197  *
198  * use the block transfer engine to move kernel
199  * memory from src to dest using the assigned mode.
200  *
201  * Paramaters:
202  *   src - physical address of the transfer source.
203  *   dest - physical address of the transfer destination.
204  *   len - number of bytes to transfer from source to dest.
205  *   mode - hardware defined.  See reference information
206  *          for IBCT0/1 in the SGI documentation.
207  *
208  * NOTE: If the source, dest, and len are all cache line aligned,
209  * then it would be _FAR_ preferrable to use bte_copy instead.
210  */
211 bte_result_t
bte_unaligned_copy(u64 src,u64 dest,u64 len,u64 mode)212 bte_unaligned_copy(u64 src, u64 dest, u64 len, u64 mode)
213 {
214 	int destFirstCacheOffset;
215 	u64 headBteSource;
216 	u64 headBteLen;
217 	u64 headBcopySrcOffset;
218 	u64 headBcopyDest;
219 	u64 headBcopyLen;
220 	u64 footBteSource;
221 	u64 footBteLen;
222 	u64 footBcopyDest;
223 	u64 footBcopyLen;
224 	bte_result_t rv;
225 	char *bteBlock;
226 
227 	if (len == 0) {
228 		return BTE_SUCCESS;
229 	}
230 
231 	/* temporary buffer used during unaligned transfers */
232 	bteBlock = pda.cpu_bte_if[0]->scratch_buf;
233 
234 	headBcopySrcOffset = src & L1_CACHE_MASK;
235 	destFirstCacheOffset = dest & L1_CACHE_MASK;
236 
237 	/*
238 	 * At this point, the transfer is broken into
239 	 * (up to) three sections.  The first section is
240 	 * from the start address to the first physical
241 	 * cache line, the second is from the first physical
242 	 * cache line to the last complete cache line,
243 	 * and the third is from the last cache line to the
244 	 * end of the buffer.  The first and third sections
245 	 * are handled by bte copying into a temporary buffer
246 	 * and then bcopy'ing the necessary section into the
247 	 * final location.  The middle section is handled with
248 	 * a standard bte copy.
249 	 *
250 	 * One nasty exception to the above rule is when the
251 	 * source and destination are not symetrically
252 	 * mis-aligned.  If the source offset from the first
253 	 * cache line is different from the destination offset,
254 	 * we make the first section be the entire transfer
255 	 * and the bcopy the entire block into place.
256 	 */
257 	if (headBcopySrcOffset == destFirstCacheOffset) {
258 
259 		/*
260 		 * Both the source and destination are the same
261 		 * distance from a cache line boundary so we can
262 		 * use the bte to transfer the bulk of the
263 		 * data.
264 		 */
265 		headBteSource = src & ~L1_CACHE_MASK;
266 		headBcopyDest = dest;
267 		if (headBcopySrcOffset) {
268 			headBcopyLen =
269 			    (len >
270 			     (L1_CACHE_BYTES -
271 			      headBcopySrcOffset) ? L1_CACHE_BYTES
272 			     - headBcopySrcOffset : len);
273 			headBteLen = L1_CACHE_BYTES;
274 		} else {
275 			headBcopyLen = 0;
276 			headBteLen = 0;
277 		}
278 
279 		if (len > headBcopyLen) {
280 			footBcopyLen =
281 			    (len - headBcopyLen) & L1_CACHE_MASK;
282 			footBteLen = L1_CACHE_BYTES;
283 
284 			footBteSource = src + len - footBcopyLen;
285 			footBcopyDest = dest + len - footBcopyLen;
286 
287 			if (footBcopyDest ==
288 			    (headBcopyDest + headBcopyLen)) {
289 				/*
290 				 * We have two contigous bcopy
291 				 * blocks.  Merge them.
292 				 */
293 				headBcopyLen += footBcopyLen;
294 				headBteLen += footBteLen;
295 			} else if (footBcopyLen > 0) {
296 				rv = bte_copy(footBteSource,
297 					      ia64_tpa(bteBlock),
298 					      footBteLen, mode, NULL);
299 				if (rv != BTE_SUCCESS) {
300 					return rv;
301 				}
302 
303 
304 				memcpy(__va(footBcopyDest),
305 				       (char *) bteBlock, footBcopyLen);
306 			}
307 		} else {
308 			footBcopyLen = 0;
309 			footBteLen = 0;
310 		}
311 
312 		if (len > (headBcopyLen + footBcopyLen)) {
313 			/* now transfer the middle. */
314 			rv = bte_copy((src + headBcopyLen),
315 				      (dest +
316 				       headBcopyLen),
317 				      (len - headBcopyLen -
318 				       footBcopyLen), mode, NULL);
319 			if (rv != BTE_SUCCESS) {
320 				return rv;
321 			}
322 
323 		}
324 	} else {
325 
326 
327 		/*
328 		 * The transfer is not symetric, we will
329 		 * allocate a buffer large enough for all the
330 		 * data, bte_copy into that buffer and then
331 		 * bcopy to the destination.
332 		 */
333 
334 		/* Add the leader from source */
335 		headBteLen = len + (src & L1_CACHE_MASK);
336 		/* Add the trailing bytes from footer. */
337 		headBteLen +=
338 		    L1_CACHE_BYTES - (headBteLen & L1_CACHE_MASK);
339 		headBteSource = src & ~L1_CACHE_MASK;
340 		headBcopySrcOffset = src & L1_CACHE_MASK;
341 		headBcopyDest = dest;
342 		headBcopyLen = len;
343 	}
344 
345 	if (headBcopyLen > 0) {
346 		rv = bte_copy(headBteSource,
347 			      ia64_tpa(bteBlock), headBteLen, mode, NULL);
348 		if (rv != BTE_SUCCESS) {
349 			return rv;
350 		}
351 
352 		memcpy(__va(headBcopyDest), ((char *) bteBlock +
353 					     headBcopySrcOffset),
354 		       headBcopyLen);
355 	}
356 	return BTE_SUCCESS;
357 }
358 
359 
360 /************************************************************************
361  * Block Transfer Engine initialization functions.
362  *
363  ***********************************************************************/
364 
365 
366 /*
367  * bte_init_node(nodepda, cnode)
368  *
369  * Initialize the nodepda structure with BTE base addresses and
370  * spinlocks.
371  */
372 void
bte_init_node(nodepda_t * mynodepda,cnodeid_t cnode)373 bte_init_node(nodepda_t * mynodepda, cnodeid_t cnode)
374 {
375 	int i;
376 
377 
378 	/*
379 	 * Indicate that all the block transfer engines on this node
380 	 * are available.
381 	 */
382 
383 	/*
384 	 * Allocate one bte_recover_t structure per node.  It holds
385 	 * the recovery lock for node.  All the bte interface structures
386 	 * will point at this one bte_recover structure to get the lock.
387 	 */
388 	spin_lock_init(&mynodepda->bte_recovery_lock);
389 	init_timer(&mynodepda->bte_recovery_timer);
390 	mynodepda->bte_recovery_timer.function = bte_error_handler;
391 	mynodepda->bte_recovery_timer.data = (unsigned long) mynodepda;
392 
393 	for (i = 0; i < BTES_PER_NODE; i++) {
394 		/* >>> Don't know why the 0x1800000L is here.  Robin */
395 		mynodepda->bte_if[i].bte_base_addr =
396 		    (char *) LOCAL_MMR_ADDR(bte_offsets[i] | 0x1800000L);
397 
398 		/*
399 		 * Initialize the notification and spinlock
400 		 * so the first transfer can occur.
401 		 */
402 		mynodepda->bte_if[i].most_rcnt_na =
403 		    &(mynodepda->bte_if[i].notify);
404 		mynodepda->bte_if[i].notify = 0L;
405 		spin_lock_init(&mynodepda->bte_if[i].spinlock);
406 
407 		mynodepda->bte_if[i].scratch_buf =
408 		    alloc_bootmem_node(NODE_DATA(cnode), BTE_MAX_XFER);
409 		mynodepda->bte_if[i].bte_cnode = cnode;
410 		mynodepda->bte_if[i].bte_error_count = 0;
411 		mynodepda->bte_if[i].bte_num = i;
412 		mynodepda->bte_if[i].cleanup_active = 0;
413 		mynodepda->bte_if[i].bh_error = 0;
414 	}
415 
416 }
417 
418 /*
419  * bte_init_cpu()
420  *
421  * Initialize the cpupda structure with pointers to the
422  * nodepda bte blocks.
423  *
424  */
425 void
bte_init_cpu(void)426 bte_init_cpu(void)
427 {
428 	/* Called by setup.c as each cpu is being added to the nodepda */
429 	if (local_node_data->active_cpu_count & 0x1) {
430 		pda.cpu_bte_if[0] = &(nodepda->bte_if[0]);
431 		pda.cpu_bte_if[1] = &(nodepda->bte_if[1]);
432 	} else {
433 		pda.cpu_bte_if[0] = &(nodepda->bte_if[1]);
434 		pda.cpu_bte_if[1] = &(nodepda->bte_if[0]);
435 	}
436 }
437