1 /* $Id: shubio.c,v 1.1 2002/02/28 17:31:25 marcelo Exp $
2  *
3  * This file is subject to the terms and conditions of the GNU General Public
4  * License.  See the file "COPYING" in the main directory of this archive
5  * for more details.
6  *
7  * Copyright (C) 1992 - 1997, 2000,2002-2003 Silicon Graphics, Inc. All rights reserved.
8  */
9 
10 
11 #include <linux/types.h>
12 #include <linux/slab.h>
13 #include <asm/smp.h>
14 #include <asm/sn/sgi.h>
15 #include <asm/sn/io.h>
16 #include <asm/sn/iograph.h>
17 #include <asm/sn/invent.h>
18 #include <asm/sn/hcl.h>
19 #include <asm/sn/labelcl.h>
20 #include <asm/sn/sn_private.h>
21 #include <asm/sn/klconfig.h>
22 #include <asm/sn/sn_cpuid.h>
23 #include <asm/sn/pci/pciio.h>
24 #include <asm/sn/pci/pcibr.h>
25 #include <asm/sn/xtalk/xtalk.h>
26 #include <asm/sn/pci/pcibr_private.h>
27 #include <asm/sn/intr.h>
28 #include <asm/sn/ioerror_handling.h>
29 #include <asm/sn/ioerror.h>
30 #include <asm/sn/sn2/shubio.h>
31 
32 
33 error_state_t error_state_get(vertex_hdl_t v);
34 error_return_code_t error_state_set(vertex_hdl_t v,error_state_t new_state);
35 
36 
37 /*
38  * Get the xtalk provider function pointer for the
39  * specified hub.
40  */
41 
42 /*ARGSUSED*/
43 int
hub_xp_error_handler(vertex_hdl_t hub_v,nasid_t nasid,int error_code,ioerror_mode_t mode,ioerror_t * ioerror)44 hub_xp_error_handler(
45 	vertex_hdl_t 	hub_v,
46 	nasid_t		nasid,
47 	int		error_code,
48 	ioerror_mode_t	mode,
49 	ioerror_t	*ioerror)
50 {
51 	/*REFERENCED*/
52 	hubreg_t	iio_imem;
53 	vertex_hdl_t	xswitch;
54 	error_state_t	e_state;
55 	cnodeid_t	cnode;
56 
57 	/*
58 	 * Before walking down to the next level, check if
59 	 * the I/O link is up. If it's been disabled by the
60 	 * hub ii for some reason, we can't even touch the
61 	 * widget registers.
62 	 */
63 	iio_imem = REMOTE_HUB_L(nasid, IIO_IMEM);
64 
65 	if (!(iio_imem & (IIO_IMEM_B0ESD|IIO_IMEM_W0ESD))){
66 		/*
67 		 * IIO_IMEM_B0ESD getting set, indicates II shutdown
68 		 * on HUB0 parts.. Hopefully that's not true for
69 		 * Hub1 parts..
70 		 *
71 		 *
72 		 * If either one of them is shut down, can't
73 		 * go any further.
74 		 */
75 		return IOERROR_XTALKLEVEL;
76 	}
77 
78 	/* Get the error state of the hub */
79 	e_state = error_state_get(hub_v);
80 
81 	cnode = NASID_TO_COMPACT_NODEID(nasid);
82 
83 	xswitch = NODEPDA(cnode)->basew_xc;
84 
85 	/* Set the error state of the crosstalk device to that of
86 	 * hub.
87 	 */
88 	if (error_state_set(xswitch , e_state) ==
89 	    ERROR_RETURN_CODE_CANNOT_SET_STATE)
90 		return(IOERROR_UNHANDLED);
91 
92 	/* Clean the error state of the hub if we are in the action handling
93 	 * phase.
94 	 */
95 	if (e_state == ERROR_STATE_ACTION)
96 		(void)error_state_set(hub_v, ERROR_STATE_NONE);
97 	/* hand the error off to the switch or the directly
98 	 * connected crosstalk device.
99 	 */
100 	return  xtalk_error_handler(xswitch,
101 				    error_code, mode, ioerror);
102 
103 }
104 
105 /*
106  * Check if the widget in error has been enabled for PIO accesses
107  */
108 int
is_widget_pio_enabled(ioerror_t * ioerror)109 is_widget_pio_enabled(ioerror_t *ioerror)
110 {
111 	cnodeid_t	src_node;
112 	nasid_t		src_nasid;
113 	hubreg_t	ii_iowa;
114 	xwidgetnum_t	widget;
115 	iopaddr_t	p;
116 
117 	/* Get the node where the PIO error occurred */
118 	IOERROR_GETVALUE(p,ioerror, srcnode);
119 	src_node = p;
120 	if (src_node == CNODEID_NONE)
121 		return(0);
122 
123 	/* Get the nasid for the cnode */
124 	src_nasid = COMPACT_TO_NASID_NODEID(src_node);
125 	if (src_nasid == INVALID_NASID)
126 		return(0);
127 
128 	/* Read the Outbound widget access register for this hub */
129 	ii_iowa = REMOTE_HUB_L(src_nasid, IIO_IOWA);
130 	IOERROR_GETVALUE(p,ioerror, widgetnum);
131 	widget = p;
132 
133 	/* Check if the PIOs to the widget with PIO error have been
134 	 * enabled.
135 	 */
136 	if (ii_iowa & IIO_IOWA_WIDGET(widget))
137 		return(1);
138 
139 	return(0);
140 }
141 
142 /*
143  * Hub IO error handling.
144  *
145  *	Gets invoked for different types of errors found at the hub.
146  *	Typically this includes situations from bus error or due to
147  *	an error interrupt (mostly generated at the hub).
148  */
149 int
hub_ioerror_handler(vertex_hdl_t hub_v,int error_code,int mode,struct io_error_s * ioerror)150 hub_ioerror_handler(
151 	vertex_hdl_t 	hub_v,
152 	int		error_code,
153 	int		mode,
154 	struct io_error_s	*ioerror)
155 {
156 	hubinfo_t 	hinfo; 		/* Hub info pointer */
157 	nasid_t		nasid;
158 	int		retval = 0;
159 	/*REFERENCED*/
160 	iopaddr_t 	p;
161 	caddr_t 	cp;
162 
163 	IOERROR_DUMP("hub_ioerror_handler", error_code, mode, ioerror);
164 
165 	hubinfo_get(hub_v, &hinfo);
166 
167 	if (!hinfo){
168 		/* Print an error message and return */
169 		goto end;
170 	}
171 	nasid = hinfo->h_nasid;
172 
173 	switch(error_code) {
174 
175 	case PIO_READ_ERROR:
176 		/*
177 		 * Cpu got a bus error while accessing IO space.
178 		 * hubaddr field in ioerror structure should have
179 		 * the IO address that caused access error.
180 		 */
181 
182 		/*
183 		 * Identify if  the physical address in hub_error_data
184 		 * corresponds to small/large window, and accordingly,
185 		 * get the xtalk address.
186 		 */
187 
188 		/*
189 		 * Evaluate the widget number and the widget address that
190 		 * caused the error. Use 'vaddr' if it's there.
191 		 * This is typically true either during probing
192 		 * or a kernel driver getting into trouble.
193 		 * Otherwise, use paddr to figure out widget details
194 		 * This is typically true for user mode bus errors while
195 		 * accessing I/O space.
196 		 */
197 		 IOERROR_GETVALUE(cp,ioerror,vaddr);
198 		 if (cp){
199 		    /*
200 		     * If neither in small window nor in large window range,
201 		     * outright reject it.
202 		     */
203 		    IOERROR_GETVALUE(cp,ioerror,vaddr);
204 		    if (NODE_SWIN_ADDR(nasid, (paddr_t)cp)){
205 			iopaddr_t	hubaddr;
206 			xwidgetnum_t	widgetnum;
207 			iopaddr_t	xtalkaddr;
208 
209 			IOERROR_GETVALUE(p,ioerror,hubaddr);
210 			hubaddr = p;
211 			widgetnum = SWIN_WIDGETNUM(hubaddr);
212 			xtalkaddr = SWIN_WIDGETADDR(hubaddr);
213 			/*
214 			 * differentiate local register vs IO space access
215 			 */
216 			IOERROR_SETVALUE(ioerror,widgetnum,widgetnum);
217 			IOERROR_SETVALUE(ioerror,xtalkaddr,xtalkaddr);
218 
219 
220 		    } else if (NODE_BWIN_ADDR(nasid, (paddr_t)cp)){
221 			/*
222 			 * Address corresponds to large window space.
223 			 * Convert it to xtalk address.
224 			 */
225 			int		bigwin;
226 			hub_piomap_t    bw_piomap;
227 			xtalk_piomap_t	xt_pmap = NULL;
228 			iopaddr_t	hubaddr;
229 			xwidgetnum_t	widgetnum;
230 			iopaddr_t	xtalkaddr;
231 
232 			IOERROR_GETVALUE(p,ioerror,hubaddr);
233 			hubaddr = p;
234 
235 			/*
236 			 * Have to loop to find the correct xtalk_piomap
237 			 * because the're not allocated on a one-to-one
238 			 * basis to the window number.
239 			 */
240 			for (bigwin=0; bigwin < HUB_NUM_BIG_WINDOW; bigwin++) {
241 				bw_piomap = hubinfo_bwin_piomap_get(hinfo,
242 								    bigwin);
243 
244 				if (bw_piomap->hpio_bigwin_num ==
245 				    (BWIN_WINDOWNUM(hubaddr) - 1)) {
246 					xt_pmap = hub_piomap_xt_piomap(bw_piomap);
247 					break;
248 				}
249 			}
250 
251 			ASSERT(xt_pmap);
252 
253 			widgetnum = xtalk_pio_target_get(xt_pmap);
254 			xtalkaddr = xtalk_pio_xtalk_addr_get(xt_pmap) + BWIN_WIDGETADDR(hubaddr);
255 
256 			IOERROR_SETVALUE(ioerror,widgetnum,widgetnum);
257 			IOERROR_SETVALUE(ioerror,xtalkaddr,xtalkaddr);
258 
259 			/*
260 			 * Make sure that widgetnum doesnot map to hub
261 			 * register widget number, as we never use
262 			 * big window to access hub registers.
263 			 */
264 			ASSERT(widgetnum != HUB_REGISTER_WIDGET);
265 		    }
266 		} else if (IOERROR_FIELDVALID(ioerror,hubaddr)) {
267 			iopaddr_t	hubaddr;
268 			xwidgetnum_t	widgetnum;
269 			iopaddr_t	xtalkaddr;
270 
271 			IOERROR_GETVALUE(p,ioerror,hubaddr);
272 			hubaddr = p;
273 			if (BWIN_WINDOWNUM(hubaddr)){
274 				int 	window = BWIN_WINDOWNUM(hubaddr) - 1;
275 				hubreg_t itte;
276 				itte = (hubreg_t)HUB_L(IIO_ITTE_GET(nasid, window));
277 				widgetnum =  (itte >> IIO_ITTE_WIDGET_SHIFT) &
278 						IIO_ITTE_WIDGET_MASK;
279 				xtalkaddr = (((itte >> IIO_ITTE_OFFSET_SHIFT) &
280 					IIO_ITTE_OFFSET_MASK) <<
281 					     BWIN_SIZE_BITS) +
282 					BWIN_WIDGETADDR(hubaddr);
283 			} else {
284 				widgetnum = SWIN_WIDGETNUM(hubaddr);
285 				xtalkaddr = SWIN_WIDGETADDR(hubaddr);
286 			}
287 			IOERROR_SETVALUE(ioerror,widgetnum,widgetnum);
288 			IOERROR_SETVALUE(ioerror,xtalkaddr,xtalkaddr);
289 		} else {
290 			IOERROR_DUMP("hub_ioerror_handler", error_code,
291 						mode, ioerror);
292 			IOERR_PRINTF(printk(
293 				"hub_ioerror_handler: Invalid address passed"));
294 
295 			return IOERROR_INVALIDADDR;
296 		}
297 
298 
299 		IOERROR_GETVALUE(p,ioerror,widgetnum);
300 		if ((p) == HUB_REGISTER_WIDGET) {
301 			/*
302 			 * Error in accessing Hub local register
303 			 * This should happen mostly in SABLE mode..
304 			 */
305 			retval = 0;
306 		} else {
307 			/* Make sure that the outbound widget access for this
308 			 * widget is enabled.
309 			 */
310 			if (!is_widget_pio_enabled(ioerror)) {
311 				if (error_state_get(hub_v) ==
312 				    ERROR_STATE_ACTION)
313 					snia_ioerror_dump("No outbound widget access - ",
314 						     error_code, mode, ioerror);
315 				return(IOERROR_HANDLED);
316 			}
317 
318 
319 			retval = hub_xp_error_handler(
320 				hub_v, nasid, error_code, mode, ioerror);
321 
322 		}
323 
324 		IOERR_PRINTF(printk(
325 			"hub_ioerror_handler:PIO_READ_ERROR return: %d",
326 				retval));
327 
328 		break;
329 
330 	case PIO_WRITE_ERROR:
331 		/*
332 		 * This hub received an interrupt indicating a widget
333 		 * attached to this hub got a timeout.
334 		 * widgetnum field should be filled to indicate the
335 		 * widget that caused error.
336 		 *
337 		 * NOTE: This hub may have nothing to do with this error.
338 		 * We are here since the widget attached to the xbow
339 		 * gets its PIOs through this hub.
340 		 *
341 		 * There is nothing that can be done at this level.
342 		 * Just invoke the xtalk error handling mechanism.
343 		 */
344 		IOERROR_GETVALUE(p,ioerror,widgetnum);
345 		if ((p) == HUB_REGISTER_WIDGET) {
346 		} else {
347 			/* Make sure that the outbound widget access for this
348 			 * widget is enabled.
349 			 */
350 
351 			if (!is_widget_pio_enabled(ioerror)) {
352 				if (error_state_get(hub_v) ==
353 				    ERROR_STATE_ACTION)
354 					snia_ioerror_dump("No outbound widget access - ",
355 						     error_code, mode, ioerror);
356 				return(IOERROR_HANDLED);
357 			}
358 
359 			retval = hub_xp_error_handler(
360 				hub_v, nasid, error_code, mode, ioerror);
361 		}
362 		break;
363 
364 	case DMA_READ_ERROR:
365 		/*
366 		 * DMA Read error always ends up generating an interrupt
367 		 * at the widget level, and never at the hub level. So,
368 		 * we don't expect to come here any time
369 		 */
370 		ASSERT(0);
371 		retval = IOERROR_UNHANDLED;
372 		break;
373 
374 	case DMA_WRITE_ERROR:
375 		/*
376 		 * DMA Write error is generated when a write by an I/O
377 		 * device could not be completed. Problem is, device is
378 		 * totally unaware of this problem, and would continue
379 		 * writing to system memory. So, hub has a way to send
380 		 * an error interrupt on the first error, and bitbucket
381 		 * all further write transactions.
382 		 * Coming here indicates that hub detected one such error,
383 		 * and we need to handle it.
384 		 *
385 		 * Hub interrupt handler would have extracted physaddr,
386 		 * widgetnum, and widgetdevice from the CRB
387 		 *
388 		 * There is nothing special to do here, since gathering
389 		 * data from crb's is done elsewhere. Just pass the
390 		 * error to xtalk layer.
391 		 */
392 		retval = hub_xp_error_handler(hub_v, nasid, error_code, mode,
393 					      ioerror);
394 		break;
395 
396 	default:
397 		ASSERT(0);
398 		return IOERROR_BADERRORCODE;
399 
400 	}
401 
402 	/*
403 	 * If error was not handled, we may need to take certain action
404 	 * based on the error code.
405 	 * For e.g. in case of PIO_READ_ERROR, we may need to release the
406 	 * PIO Read entry table (they are sticky after errors).
407 	 * Similarly other cases.
408 	 *
409 	 * Further Action TBD
410 	 */
411 end:
412 	if (retval == IOERROR_HWGRAPH_LOOKUP) {
413 		/*
414 		 * If we get errors very early, we can't traverse
415 		 * the path using hardware graph.
416 		 * To handle this situation, we need a functions
417 		 * which don't depend on the hardware graph vertex to
418 		 * handle errors. This break the modularity of the
419 		 * existing code. Instead we print out the reason for
420 		 * not handling error, and return. On return, all the
421 		 * info collected would be dumped. This should provide
422 		 * sufficient info to analyse the error.
423 		 */
424 		printk("Unable to handle IO error: hardware graph not setup\n");
425 	}
426 
427 	return retval;
428 }
429 
430 #define INFO_LBL_ERROR_STATE    "error_state"
431 
432 #define v_error_state_get(v,s)                                          \
433 (hwgraph_info_get_LBL(v,INFO_LBL_ERROR_STATE, (arbitrary_info_t *)&s))
434 
435 #define v_error_state_set(v,s,replace)                                  \
436 (replace ?                                                              \
437 hwgraph_info_replace_LBL(v,INFO_LBL_ERROR_STATE,(arbitrary_info_t)s,0) :\
438 hwgraph_info_add_LBL(v,INFO_LBL_ERROR_STATE, (arbitrary_info_t)s))
439 
440 
441 #define v_error_state_clear(v)                                          \
442 (hwgraph_info_remove_LBL(v,INFO_LBL_ERROR_STATE,0))
443 
444 /*
445  * error_state_get
446  *              Get the state of the vertex.
447  *              Returns ERROR_STATE_INVALID on failure
448  *                      current state otherwise
449  */
450 error_state_t
error_state_get(vertex_hdl_t v)451 error_state_get(vertex_hdl_t v)
452 {
453         error_state_t   s;
454 
455         /* Check if we have a valid hwgraph vertex */
456         if ( v == (vertex_hdl_t)0 )
457                 return(ERROR_STATE_NONE);
458 
459         /* Get the labelled info hanging off the vertex which corresponds
460          * to the state.
461          */
462         if (v_error_state_get(v, s) != GRAPH_SUCCESS) {
463                 return(ERROR_STATE_NONE);
464         }
465         return(s);
466 }
467 
468 
469 /*
470  * error_state_set
471  *              Set the state of the vertex
472  *              Returns ERROR_RETURN_CODE_CANNOT_SET_STATE on failure
473  *                      ERROR_RETURN_CODE_SUCCESS otherwise
474  */
475 error_return_code_t
error_state_set(vertex_hdl_t v,error_state_t new_state)476 error_state_set(vertex_hdl_t v,error_state_t new_state)
477 {
478         error_state_t   old_state;
479         int       replace = 1;
480 
481         /* Check if we have a valid hwgraph vertex */
482         if ( v == (vertex_hdl_t)0 )
483                 return(ERROR_RETURN_CODE_GENERAL_FAILURE);
484 
485 
486         /* This means that the error state needs to be cleaned */
487         if (new_state == ERROR_STATE_NONE) {
488                 /* Make sure that we have an error state */
489                 if (v_error_state_get(v,old_state) == GRAPH_SUCCESS)
490                         v_error_state_clear(v);
491                 return(ERROR_RETURN_CODE_SUCCESS);
492         }
493 
494         /* Check if the state information has been set at least once
495          * for this vertex.
496          */
497         if (v_error_state_get(v,old_state) != GRAPH_SUCCESS)
498                 replace = 0;
499 
500         if (v_error_state_set(v,new_state,replace) != GRAPH_SUCCESS) {
501                 return(ERROR_RETURN_CODE_CANNOT_SET_STATE);
502         }
503         return(ERROR_RETURN_CODE_SUCCESS);
504 }
505