1 /* $Id: shuberror.c,v 1.1 2002/02/28 17:31:25 marcelo Exp $
2  *
3  * This file is subject to the terms and conditions of the GNU General Public
4  * License.  See the file "COPYING" in the main directory of this archive
5  * for more details.
6  *
7  * Copyright (C) 1992 - 1997, 2000,2002-2003 Silicon Graphics, Inc. All rights reserved.
8  */
9 
10 
11 #include <linux/types.h>
12 #include <linux/slab.h>
13 #include <linux/irq.h>
14 #include <asm/io.h>
15 #include <asm/irq.h>
16 #include <asm/smp.h>
17 #include <asm/delay.h>
18 #include <asm/sn/sgi.h>
19 #include <asm/sn/io.h>
20 #include <asm/sn/iograph.h>
21 #include <asm/sn/invent.h>
22 #include <asm/sn/hcl.h>
23 #include <asm/sn/labelcl.h>
24 #include <asm/sn/sn_private.h>
25 #include <asm/sn/klconfig.h>
26 #include <asm/sn/sn_cpuid.h>
27 #include <asm/sn/pci/pciio.h>
28 #include <asm/sn/pci/pcibr.h>
29 #include <asm/sn/xtalk/xtalk.h>
30 #include <asm/sn/pci/pcibr_private.h>
31 #include <asm/sn/intr.h>
32 #include <asm/sn/ioerror_handling.h>
33 #include <asm/sn/ioerror.h>
34 #include <asm/sn/sn2/shubio.h>
35 #include <asm/sn/bte.h>
36 
37 extern void hubni_eint_init(cnodeid_t cnode);
38 extern void hubii_eint_init(cnodeid_t cnode);
39 extern void hubii_eint_handler (int irq, void *arg, struct pt_regs *ep);
40 int hubiio_crb_error_handler(vertex_hdl_t hub_v, hubinfo_t hinfo);
41 int hubiio_prb_error_handler(vertex_hdl_t hub_v, hubinfo_t hinfo);
42 extern void bte_crb_error_handler(vertex_hdl_t hub_v, int btenum, int crbnum, ioerror_t *ioe, int bteop);
43 void print_crb_fields(int crb_num, ii_icrb0_a_u_t icrba,
44 	ii_icrb0_b_u_t icrbb, ii_icrb0_c_u_t icrbc,
45 	ii_icrb0_d_u_t icrbd, ii_icrb0_e_u_t icrbe);
46 
47 extern int maxcpus;
48 extern error_return_code_t error_state_set(vertex_hdl_t v,error_state_t new_state);
49 
50 #define HUB_ERROR_PERIOD        (120 * HZ)      /* 2 minutes */
51 
52 void
hub_error_clear(nasid_t nasid)53 hub_error_clear(nasid_t nasid)
54 {
55 	int i;
56 
57     /*
58      * Make sure spurious write response errors are cleared
59      * (values are from hub_set_prb())
60      */
61     for (i = 0; i <= HUB_WIDGET_ID_MAX - HUB_WIDGET_ID_MIN + 1; i++) {
62         iprb_t prb;
63 
64 	prb.iprb_regval = REMOTE_HUB_L(nasid, IIO_IOPRB_0 + (i * sizeof(hubreg_t)));
65 
66         /* Clear out some fields */
67         prb.iprb_ovflow = 1;
68         prb.iprb_bnakctr = 0;
69         prb.iprb_anakctr = 0;
70 
71         prb.iprb_xtalkctr = 3;  /* approx. PIO credits for the widget */
72 
73         REMOTE_HUB_S(nasid, IIO_IOPRB_0 + (i * sizeof(hubreg_t)), prb.iprb_regval);
74     }
75 
76     REMOTE_HUB_S(nasid, IIO_IECLR, -1);
77 
78 }
79 
80 
81 /*
82  * Function	: hub_error_init
83  * Purpose	: initialize the error handling requirements for a given hub.
84  * Parameters	: cnode, the compact nodeid.
85  * Assumptions	: Called only once per hub, either by a local cpu. Or by a
86  *			remote cpu, when this hub is headless.(cpuless)
87  * Returns	: None
88  */
89 
90 void
hub_error_init(cnodeid_t cnode)91 hub_error_init(cnodeid_t cnode)
92 {
93 	nasid_t nasid;
94 
95     nasid = cnodeid_to_nasid(cnode);
96     hub_error_clear(nasid);
97 
98 
99     /*
100      * Now setup the hub ii error interrupt handler.
101      */
102 
103     hubii_eint_init(cnode);
104 
105     return;
106 }
107 
108 /*
109  * Function	: hubii_eint_init
110  * Parameters	: cnode
111  * Purpose	: to initialize the hub iio error interrupt.
112  * Assumptions	: Called once per hub, by the cpu which will ultimately
113  *			handle this interrupt.
114  * Returns	: None.
115  */
116 
117 void
hubii_eint_init(cnodeid_t cnode)118 hubii_eint_init(cnodeid_t cnode)
119 {
120     int			bit, rv;
121     ii_iidsr_u_t    	hubio_eint;
122     hubinfo_t		hinfo;
123     cpuid_t		intr_cpu;
124     vertex_hdl_t 	hub_v;
125     int bit_pos_to_irq(int bit);
126     ii_ilcsr_u_t	ilcsr;
127 
128 
129     hub_v = (vertex_hdl_t)cnodeid_to_vertex(cnode);
130     ASSERT_ALWAYS(hub_v);
131     hubinfo_get(hub_v, &hinfo);
132 
133     ASSERT(hinfo);
134     ASSERT(hinfo->h_cnodeid == cnode);
135 
136     ilcsr.ii_ilcsr_regval = REMOTE_HUB_L(hinfo->h_nasid, IIO_ILCSR);
137     if ((ilcsr.ii_ilcsr_fld_s.i_llp_stat & 0x2) == 0) {
138 	/*
139 	 * HUB II link is not up.  Disable LLP. Clear old errors.
140 	 * Enable interrupts to handle BTE errors.
141 	 */
142 	ilcsr.ii_ilcsr_fld_s.i_llp_en = 0;
143 	REMOTE_HUB_S(hinfo->h_nasid, IIO_ILCSR, ilcsr.ii_ilcsr_regval);
144     }
145 
146     /* Select a possible interrupt target where there is a free interrupt
147      * bit and also reserve the interrupt bit for this IO error interrupt
148      */
149     intr_cpu = intr_heuristic(hub_v,0,SGI_II_ERROR,0,hub_v,
150 			      "HUB IO error interrupt",&bit);
151     if (intr_cpu == CPU_NONE) {
152 	printk("hubii_eint_init: intr_reserve_level failed, cnode %d", cnode);
153 	return;
154     }
155 
156     rv = intr_connect_level(intr_cpu, SGI_II_ERROR, 0, NULL);
157     request_irq(SGI_II_ERROR, hubii_eint_handler, SA_SHIRQ, "SN_hub_error", (void *)hub_v);
158     irq_desc(bit)->status |= SN2_IRQ_PER_HUB;
159     ASSERT_ALWAYS(rv >= 0);
160     hubio_eint.ii_iidsr_regval = 0;
161     hubio_eint.ii_iidsr_fld_s.i_enable = 1;
162     hubio_eint.ii_iidsr_fld_s.i_level = bit;/* Take the least significant bits*/
163     hubio_eint.ii_iidsr_fld_s.i_node = COMPACT_TO_NASID_NODEID(cnode);
164     hubio_eint.ii_iidsr_fld_s.i_pi_id = cpuid_to_subnode(intr_cpu);
165     REMOTE_HUB_S(hinfo->h_nasid, IIO_IIDSR, hubio_eint.ii_iidsr_regval);
166 
167 }
168 
169 
170 /*ARGSUSED*/
171 void
hubii_eint_handler(int irq,void * arg,struct pt_regs * ep)172 hubii_eint_handler (int irq, void *arg, struct pt_regs *ep)
173 {
174     vertex_hdl_t	hub_v;
175     hubinfo_t		hinfo;
176     ii_wstat_u_t	wstat;
177     hubreg_t		idsr;
178     ii_ilcsr_u_t	ilcsr;
179 
180 
181     /* two levels of casting avoids compiler warning.!! */
182     hub_v = (vertex_hdl_t)(long)(arg);
183     ASSERT(hub_v);
184 
185     hubinfo_get(hub_v, &hinfo);
186 
187     idsr = REMOTE_HUB_L(hinfo->h_nasid, IIO_ICMR);
188 #if 0
189     if (idsr & 0x1) {
190 	/* ICMR bit is set .. we are getting into "Spurious Interrupts condition. */
191 	printk("Cnode %d II has seen the ICMR condition\n", hinfo->h_cnodeid);
192 	printk("***** Please file PV with the above messages *****\n");
193 	/* panic("We have to panic to prevent further unknown states ..\n"); */
194     }
195 #endif
196 
197     /*
198      * Identify the reason for error.
199      */
200     wstat.ii_wstat_regval = REMOTE_HUB_L(hinfo->h_nasid, IIO_WSTAT);
201 
202     if (wstat.ii_wstat_fld_s.w_crazy) {
203 	char	*reason;
204 	/*
205 	 * We can do a couple of things here.
206 	 * Look at the fields TX_MX_RTY/XT_TAIL_TO/XT_CRD_TO to check
207 	 * which of these caused the CRAZY bit to be set.
208 	 * You may be able to check if the Link is up really.
209 	 */
210 	if (wstat.ii_wstat_fld_s.w_tx_mx_rty)
211 		reason = "Micro Packet Retry Timeout";
212 	else if (wstat.ii_wstat_fld_s.w_xt_tail_to)
213 		reason = "Crosstalk Tail Timeout";
214 	else if (wstat.ii_wstat_fld_s.w_xt_crd_to)
215 		reason = "Crosstalk Credit Timeout";
216 	else {
217 		hubreg_t	hubii_imem;
218 		/*
219 		 * Check if widget 0 has been marked as shutdown, or
220 		 * if BTE 0/1 has been marked.
221 		 */
222 		hubii_imem = REMOTE_HUB_L(hinfo->h_nasid, IIO_IMEM);
223 		if (hubii_imem & IIO_IMEM_W0ESD)
224 			reason = "Hub Widget 0 has been Shutdown";
225 		else if (hubii_imem & IIO_IMEM_B0ESD)
226 			reason = "BTE 0 has been shutdown";
227 		else if (hubii_imem & IIO_IMEM_B1ESD)
228 			reason = "BTE 1 has been shutdown";
229 		else	reason = "Unknown";
230 
231 	}
232 	/*
233 	 * Note: we may never be able to print this, if the II talking
234 	 * to Xbow which hosts the console is dead.
235 	 */
236 	ilcsr.ii_ilcsr_regval = REMOTE_HUB_L(hinfo->h_nasid, IIO_ILCSR);
237 	if (ilcsr.ii_ilcsr_fld_s.i_llp_en == 1) {	/* Link is enabled */
238 	    printk("Hub %d, cnode %d to Xtalk Link failed (II_ECRAZY) Reason: %s",
239 		hinfo->h_nasid, hinfo->h_cnodeid, reason);
240 	}
241     }
242 
243 
244     /*
245      * Before processing any interrupt related information, clear all
246      * error indication and reenable interrupts.  This will prevent
247      * lost interrupts due to the interrupt handler scanning past a PRB/CRB
248      * which has not errorred yet and then the PRB/CRB goes into error.
249      * Note, PRB errors are cleared individually.
250      */
251     REMOTE_HUB_S(hinfo->h_nasid, IIO_IECLR, 0xff0000);
252     idsr = REMOTE_HUB_L(hinfo->h_nasid, IIO_IIDSR) & ~IIO_IIDSR_SENT_MASK;
253     REMOTE_HUB_S(hinfo->h_nasid, IIO_IIDSR, idsr);
254 
255 
256     /*
257      * It's a toss as to which one among PRB/CRB to check first.
258      * Current decision is based on the severity of the errors.
259      * IO CRB errors tend to be more severe than PRB errors.
260      *
261      * It is possible for BTE errors to have been handled already, so we
262      * may not see any errors handled here.
263      */
264     (void)hubiio_crb_error_handler(hub_v, hinfo);
265     (void)hubiio_prb_error_handler(hub_v, hinfo);
266 }
267 
268 /*
269  * Free the hub CRB "crbnum" which encountered an error.
270  * Assumption is, error handling was successfully done,
271  * and we now want to return the CRB back to Hub for normal usage.
272  *
273  * In order to free the CRB, all that's needed is to de-allocate it
274  *
275  * Assumption:
276  *      No other processor is mucking around with the hub control register.
277  *      So, upper layer has to single thread this.
278  */
279 void
hubiio_crb_free(hubinfo_t hinfo,int crbnum)280 hubiio_crb_free(hubinfo_t hinfo, int crbnum)
281 {
282 	ii_icrb0_b_u_t         icrbb;
283 
284 	/*
285 	* The hardware does NOT clear the mark bit, so it must get cleared
286 	* here to be sure the error is not processed twice.
287 	*/
288 	icrbb.ii_icrb0_b_regval = REMOTE_HUB_L(hinfo->h_nasid, IIO_ICRB_B(crbnum));
289 	icrbb.b_mark   = 0;
290 	REMOTE_HUB_S(hinfo->h_nasid, IIO_ICRB_B(crbnum), icrbb.ii_icrb0_b_regval);
291 
292 	/*
293 	* Deallocate the register.
294 	*/
295 
296 	REMOTE_HUB_S(hinfo->h_nasid, IIO_ICDR, (IIO_ICDR_PND | crbnum));
297 
298 	/*
299 	* Wait till hub indicates it's done.
300 	*/
301 	while (REMOTE_HUB_L(hinfo->h_nasid, IIO_ICDR) & IIO_ICDR_PND)
302 		udelay(1);
303 
304 }
305 
306 
307 /*
308  * Array of error names  that get logged in CRBs
309  */
310 char *hubiio_crb_errors[] = {
311 	"Directory Error",
312 	"CRB Poison Error",
313 	"I/O Write Error",
314 	"I/O Access Error",
315 	"I/O Partial Write Error",
316 	"I/O Partial Read Error",
317 	"I/O Timeout Error",
318 	"Xtalk Error Packet"
319 };
320 
321 void
print_crb_fields(int crb_num,ii_icrb0_a_u_t icrba,ii_icrb0_b_u_t icrbb,ii_icrb0_c_u_t icrbc,ii_icrb0_d_u_t icrbd,ii_icrb0_e_u_t icrbe)322 print_crb_fields(int crb_num, ii_icrb0_a_u_t icrba,
323 	ii_icrb0_b_u_t icrbb, ii_icrb0_c_u_t icrbc,
324 	ii_icrb0_d_u_t icrbd, ii_icrb0_e_u_t icrbe)
325 {
326     printk("CRB %d regA\n\t"
327 	    "a_iow 0x%x\n\t"
328 	    "valid0x%x\n\t"
329 	    "Address0x%lx\n\t"
330 	    "a_tnum 0x%x\n\t"
331 	    "a_sidn 0x%x\n",
332 	    crb_num,
333 	    icrba.a_iow,
334 	    icrba.a_valid,
335 	    icrba.a_addr,
336 	    icrba.a_tnum,
337 	    icrba.a_sidn);
338     printk("CRB %d regB\n\t"
339 	    "b_imsgtype 0x%x\n\t"
340 	    "b_imsg 0x%x\n"
341 	    "\tb_use_old 0x%x\n\t"
342 	    "b_initiator 0x%x\n\t"
343 	    "b_exc 0x%x\n"
344 	    "\tb_ackcnt 0x%x\n\t"
345 	    "b_resp 0x%x\n\t"
346 	    "b_ack 0x%x\n"
347 	    "\tb_hold 0x%x\n\t"
348 	    "b_wb 0x%x\n\t"
349 	    "b_intvn 0x%x\n"
350 	    "\tb_stall_ib 0x%x\n\t"
351 	    "b_stall_int 0x%x\n"
352 	    "\tb_stall_bte_0 0x%x\n\t"
353 	    "b_stall_bte_1 0x%x\n"
354 	    "\tb_error 0x%x\n\t"
355 	    "b_lnetuce 0x%x\n\t"
356 	    "b_mark 0x%x\n\t"
357 	    "b_xerr 0x%x\n",
358 	    crb_num,
359 	    icrbb.b_imsgtype,
360 	    icrbb.b_imsg,
361 	    icrbb.b_use_old,
362 	    icrbb.b_initiator,
363 	    icrbb.b_exc,
364 	    icrbb.b_ackcnt,
365 	    icrbb.b_resp,
366 	    icrbb.b_ack,
367 	    icrbb.b_hold,
368 	    icrbb.b_wb,
369 	    icrbb.b_intvn,
370 	    icrbb.b_stall_ib,
371 	    icrbb.b_stall_int,
372 	    icrbb.b_stall_bte_0,
373 	    icrbb.b_stall_bte_1,
374 	    icrbb.b_error,
375 	    icrbb.b_lnetuce,
376 	    icrbb.b_mark,
377 	    icrbb.b_xerr);
378     printk("CRB %d regC\n\t"
379 	    "c_source 0x%x\n\t"
380 	    "c_xtsize 0x%x\n\t"
381 	    "c_cohtrans 0x%x\n\t"
382 	    "c_btenum 0x%x\n\t"
383 	    "c_gbr 0x%x\n\t"
384 	    "c_doresp 0x%x\n\t"
385 	    "c_barrop 0x%x\n\t"
386 	    "c_suppl 0x%x\n",
387 	    crb_num,
388 	    icrbc.c_source,
389 	    icrbc.c_xtsize,
390 	    icrbc.c_cohtrans,
391 	    icrbc.c_btenum,
392 	    icrbc.c_gbr,
393 	    icrbc.c_doresp,
394 	    icrbc.c_barrop,
395 	    icrbc.c_suppl);
396     printk("CRB %d regD\n\t"
397 	    "d_bteaddr 0x%lx\n\t"
398 	    "d_bteop 0x%x\n\t"
399 	    "d_pripsc 0x%x\n\t"
400 	    "d_pricnt 0x%x\n\t"
401 	    "d_sleep 0x%x\n\t",
402 	    crb_num,
403 	    icrbd.d_bteaddr,
404 	    icrbd.d_bteop,
405 	    icrbd.d_pripsc,
406 	    icrbd.d_pricnt,
407 	    icrbd.d_sleep);
408     printk("CRB %d regE\n\t"
409 	    "icrbe_timeout 0x%x\n\t"
410 	    "icrbe_context 0x%x\n\t"
411 	    "icrbe_toutvld 0x%x\n\t"
412 	    "icrbe_ctxtvld 0x%x\n\t",
413 	    crb_num,
414 	    icrbe.icrbe_timeout,
415 	    icrbe.icrbe_context,
416 	    icrbe.icrbe_toutvld,
417 	    icrbe.icrbe_ctxtvld);
418 }
419 
420 /*
421  * hubiio_crb_error_handler
422  *
423  *	This routine gets invoked when a hub gets an error
424  *	interrupt. So, the routine is running in interrupt context
425  *	at error interrupt level.
426  * Action:
427  *	It's responsible for identifying ALL the CRBs that are marked
428  *	with error, and process them.
429  *
430  * 	If you find the CRB that's marked with error, map this to the
431  *	reason it caused error, and invoke appropriate error handler.
432  *
433  *	XXX Be aware of the information in the context register.
434  *
435  * NOTE:
436  *	Use REMOTE_HUB_* macro instead of LOCAL_HUB_* so that the interrupt
437  *	handler can be run on any node. (not necessarily the node
438  *	corresponding to the hub that encountered error).
439  */
440 
441 int
hubiio_crb_error_handler(vertex_hdl_t hub_v,hubinfo_t hinfo)442 hubiio_crb_error_handler(vertex_hdl_t hub_v, hubinfo_t hinfo)
443 {
444 	cnodeid_t	cnode;
445 	nasid_t		nasid;
446 	ii_icrb0_a_u_t		icrba;		/* II CRB Register A */
447 	ii_icrb0_b_u_t		icrbb;		/* II CRB Register B */
448 	ii_icrb0_c_u_t		icrbc;		/* II CRB Register C */
449 	ii_icrb0_d_u_t		icrbd;		/* II CRB Register D */
450 	ii_icrb0_e_u_t		icrbe;		/* II CRB Register D */
451 	int		i;
452 	int		num_errors = 0;	/* Num of errors handled */
453 	ioerror_t	ioerror;
454 	int		rc;
455 
456 	nasid = hinfo->h_nasid;
457 	cnode = NASID_TO_COMPACT_NODEID(nasid);
458 
459 	/*
460 	 * XXX - Add locking for any recovery actions
461 	 */
462 	/*
463 	 * Scan through all CRBs in the Hub, and handle the errors
464 	 * in any of the CRBs marked.
465 	 */
466 	for (i = 0; i < IIO_NUM_CRBS; i++) {
467 		/* Check this crb entry to see if it is in error. */
468 		icrbb.ii_icrb0_b_regval = REMOTE_HUB_L(nasid, IIO_ICRB_B(i));
469 
470 		if (icrbb.b_mark == 0) {
471 			continue;
472 		}
473 
474 		icrba.ii_icrb0_a_regval = REMOTE_HUB_L(nasid, IIO_ICRB_A(i));
475 
476 		IOERROR_INIT(&ioerror);
477 
478 		/* read other CRB error registers. */
479 		icrbc.ii_icrb0_c_regval = REMOTE_HUB_L(nasid, IIO_ICRB_C(i));
480 		icrbd.ii_icrb0_d_regval = REMOTE_HUB_L(nasid, IIO_ICRB_D(i));
481 		icrbe.ii_icrb0_e_regval = REMOTE_HUB_L(nasid, IIO_ICRB_E(i));
482 
483 		IOERROR_SETVALUE(&ioerror,errortype,icrbb.b_ecode);
484 
485 		/* Check if this error is due to BTE operation,
486 		* and handle it separately.
487 		*/
488 		if (icrbd.d_bteop ||
489 			((icrbb.b_initiator == IIO_ICRB_INIT_BTE0 ||
490 			icrbb.b_initiator == IIO_ICRB_INIT_BTE1) &&
491 			(icrbb.b_imsgtype == IIO_ICRB_IMSGT_BTE ||
492 			icrbb.b_imsgtype == IIO_ICRB_IMSGT_SN1NET))){
493 
494 			int bte_num;
495 
496 			if (icrbd.d_bteop)
497 				bte_num = icrbc.c_btenum;
498 			else /* b_initiator bit 2 gives BTE number */
499 				bte_num = (icrbb.b_initiator & 0x4) >> 2;
500 
501 			hubiio_crb_free(hinfo, i);
502 
503 			bte_crb_error_handler(hub_v, bte_num,
504 					      i, &ioerror,
505 					      icrbd.d_bteop);
506 			num_errors++;
507 			continue;
508 		}
509 
510 		/*
511 		 * XXX
512 		 * Assuming the only other error that would reach here is
513 		 * crosstalk errors.
514 		 * If CRB times out on a message from Xtalk, it changes
515 		 * the message type to CRB.
516 		 *
517 		 * If we get here due to other errors (SN0net/CRB)
518 		 * what's the action ?
519 		 */
520 
521 		/*
522 		 * Pick out the useful fields in CRB, and
523 		 * tuck them away into ioerror structure.
524 		 */
525 		IOERROR_SETVALUE(&ioerror,xtalkaddr,icrba.a_addr << IIO_ICRB_ADDR_SHFT);
526 		IOERROR_SETVALUE(&ioerror,widgetnum,icrba.a_sidn);
527 
528 
529 		if (icrba.a_iow){
530 			/*
531 			 * XXX We shouldn't really have BRIDGE-specific code
532 			 * here, but alas....
533 			 *
534 			 * The BRIDGE (or XBRIDGE) sets the upper bit of TNUM
535 			 * to indicate a WRITE operation.  It sets the next
536 			 * bit to indicate an INTERRUPT operation.  The bottom
537 			 * 3 bits of TNUM indicate which device was responsible.
538 			 */
539 			IOERROR_SETVALUE(&ioerror,widgetdev,
540 					 TNUM_TO_WIDGET_DEV(icrba.a_tnum));
541 			/*
542 			* The encoding of TNUM (see comments above) is
543 			* different for PIC. So we'll save TNUM here and
544 			* deal with the differences later when we can
545 			* determine if we're using a Bridge or the PIC.
546 			*
547 			* XXX:  We may be able to remove saving the widgetdev
548 			* above and just sort it out of TNUM later.
549 			*/
550 			IOERROR_SETVALUE(&ioerror, tnum, icrba.a_tnum);
551 
552 		}
553 		if (icrbb.b_error) {
554 		    /*
555 		     * CRB 'i' has some error. Identify the type of error,
556 		     * and try to handle it.
557 		     *
558 		     */
559 		    switch(icrbb.b_ecode) {
560 			case IIO_ICRB_ECODE_PERR:
561 			case IIO_ICRB_ECODE_WERR:
562 			case IIO_ICRB_ECODE_AERR:
563 			case IIO_ICRB_ECODE_PWERR:
564 			case IIO_ICRB_ECODE_TOUT:
565 			case IIO_ICRB_ECODE_XTERR:
566 			    printk("Shub II CRB %d: error %s on hub cnodeid: %d",
567 				    i, hubiio_crb_errors[icrbb.b_ecode], cnode);
568 			    /*
569 			     * Any sort of write error is mostly due
570 			     * bad programming (Note it's not a timeout.)
571 			     * So, invoke hub_iio_error_handler with
572 			     * appropriate information.
573 			     */
574 			    IOERROR_SETVALUE(&ioerror,errortype,icrbb.b_ecode);
575 
576 			    /* Go through the error bit lookup phase */
577 			    if (error_state_set(hub_v, ERROR_STATE_LOOKUP) ==
578 				    ERROR_RETURN_CODE_CANNOT_SET_STATE)
579 				return(IOERROR_UNHANDLED);
580 			    rc = hub_ioerror_handler(
581 				    hub_v,
582 				    DMA_WRITE_ERROR,
583 				    MODE_DEVERROR,
584 				    &ioerror);
585 			    if (rc == IOERROR_HANDLED) {
586 				rc = hub_ioerror_handler(
587 					hub_v,
588 					DMA_WRITE_ERROR,
589 					MODE_DEVREENABLE,
590 					&ioerror);
591 			    }else {
592 				printk("Unable to handle %s on hub %d",
593 					hubiio_crb_errors[icrbb.b_ecode],
594 					cnode);
595 				/* panic; */
596 			    }
597 			    /* Go to Next error */
598 			    print_crb_fields(i, icrba, icrbb, icrbc,
599 				    icrbd, icrbe);
600 			    hubiio_crb_free(hinfo, i);
601 			    continue;
602 			case IIO_ICRB_ECODE_PRERR:
603 			case IIO_ICRB_ECODE_DERR:
604 			    printk("Shub II CRB %d: error %s on hub : %d",
605 				    i, hubiio_crb_errors[icrbb.b_ecode], cnode);
606 			    /* panic */
607 			default:
608 			    printk("Shub II CRB error (code : %d) on hub : %d",
609 				    icrbb.b_ecode, cnode);
610 			    /* panic */
611 		    }
612 		}
613 		/*
614 		 * Error is not indicated via the errcode field
615 		 * Check other error indications in this register.
616 		 */
617 		if (icrbb.b_xerr) {
618 		    printk("Shub II CRB %d: Xtalk Packet with error bit set to hub %d",
619 			    i, cnode);
620 		    /* panic */
621 		}
622 		if (icrbb.b_lnetuce) {
623 		    printk("Shub II CRB %d: Uncorrectable data error detected on data "
624 			    " from NUMAlink to node %d",
625 			    i, cnode);
626 		    /* panic */
627 		}
628 		print_crb_fields(i, icrba, icrbb, icrbc, icrbd, icrbe);
629 
630 
631 
632 
633 
634 		if (icrbb.b_error) {
635 		/*
636 		 * CRB 'i' has some error. Identify the type of error,
637 		 * and try to handle it.
638 		 */
639 		switch(icrbb.b_ecode) {
640 		case IIO_ICRB_ECODE_PERR:
641 		case IIO_ICRB_ECODE_WERR:
642 		case IIO_ICRB_ECODE_AERR:
643 		case IIO_ICRB_ECODE_PWERR:
644 
645 			printk("%s on hub cnodeid: %d",
646 				hubiio_crb_errors[icrbb.b_ecode], cnode);
647 			/*
648 			 * Any sort of write error is mostly due
649 			 * bad programming (Note it's not a timeout.)
650 			 * So, invoke hub_iio_error_handler with
651 			 * appropriate information.
652 			 */
653 			IOERROR_SETVALUE(&ioerror,errortype,icrbb.b_ecode);
654 
655 			rc = hub_ioerror_handler(
656 					hub_v,
657 					DMA_WRITE_ERROR,
658 					MODE_DEVERROR,
659 					&ioerror);
660 
661                         if (rc == IOERROR_HANDLED) {
662                                 rc = hub_ioerror_handler(
663                                         hub_v,
664                                         DMA_WRITE_ERROR,
665                                         MODE_DEVREENABLE,
666                                         &ioerror);
667                                 ASSERT(rc == IOERROR_HANDLED);
668                         }else {
669 
670 				panic("Unable to handle %s on hub %d",
671 					hubiio_crb_errors[icrbb.b_ecode],
672 					cnode);
673 				/*NOTREACHED*/
674 			}
675 			/* Go to Next error */
676 			hubiio_crb_free(hinfo, i);
677 			continue;
678 
679 		case IIO_ICRB_ECODE_PRERR:
680 
681                 case IIO_ICRB_ECODE_TOUT:
682                 case IIO_ICRB_ECODE_XTERR:
683 
684 		case IIO_ICRB_ECODE_DERR:
685 			panic("Fatal %s on hub : %d",
686 				hubiio_crb_errors[icrbb.b_ecode], cnode);
687 			/*NOTREACHED*/
688 
689 		default:
690 			panic("Fatal error (code : %d) on hub : %d",
691 				icrbb.b_ecode, cnode);
692 			/*NOTREACHED*/
693 
694 		}
695 		} 	/* if (icrbb.b_error) */
696 
697 		/*
698 		 * Error is not indicated via the errcode field
699 		 * Check other error indications in this register.
700 		 */
701 
702 		if (icrbb.b_xerr) {
703 			panic("Xtalk Packet with error bit set to hub %d",
704 				cnode);
705 			/*NOTREACHED*/
706 		}
707 
708 		if (icrbb.b_lnetuce) {
709 			panic("Uncorrectable data error detected on data "
710 				" from Craylink to node %d",
711 				cnode);
712 			/*NOTREACHED*/
713 		}
714 
715 	}
716 	return	num_errors;
717 }
718 
719 /*
720  * hubii_check_widget_disabled
721  *
722  *	Check if PIO access to the specified widget is disabled due
723  *	to any II errors that are currently set.
724  *
725  *	The specific error bits checked are:
726  *		IPRBx register: SPUR_RD (51)
727  *				SPUR_WR (50)
728  *				RD_TO (49)
729  *				ERROR (48)
730  *
731  *		WSTAT register: CRAZY (32)
732  */
733 
734 int
hubii_check_widget_disabled(nasid_t nasid,int wnum)735 hubii_check_widget_disabled(nasid_t nasid, int wnum)
736 {
737 	iprb_t		iprb;
738 	ii_wstat_u_t	wstat;
739 
740 	iprb.iprb_regval = REMOTE_HUB_L(nasid, IIO_IOPRB(wnum));
741 	if (iprb.iprb_regval & (IIO_PRB_SPUR_RD | IIO_PRB_SPUR_WR |
742 		IIO_PRB_RD_TO | IIO_PRB_ERROR)) {
743 #ifdef DEBUG
744 	    printk(KERN_WARNING "II error, IPRB%x=0x%lx\n", wnum, iprb.iprb_regval);
745 #endif
746 	    return(1);
747 	}
748 
749 	wstat.ii_wstat_regval = REMOTE_HUB_L(nasid, IIO_WSTAT);
750 	if (wstat.ii_wstat_regval & IIO_WSTAT_ECRAZY) {
751 #ifdef DEBUG
752 	    printk(KERN_WARNING "II error, WSTAT=0x%lx\n", wstat.ii_wstat_regval);
753 #endif
754 	    return(1);
755 	}
756 	return(0);
757 }
758 
759 /*ARGSUSED*/
760 /*
761  * hubii_prb_handler
762  *      Handle the error reported in the PRB for wiget number wnum.
763  *      This typically happens on a PIO write error.
764  *      There is nothing much we can do in this interrupt context for
765  *      PIO write errors. For e.g. QL scsi controller has the
766  *      habit of flaking out on PIO writes.
767  *      Print a message and try to continue for now
768  *      Cleanup involes freeing the PRB register
769  */
770 static void
hubii_prb_handler(vertex_hdl_t hub_v,hubinfo_t hinfo,int wnum)771 hubii_prb_handler(vertex_hdl_t hub_v, hubinfo_t hinfo, int wnum)
772 {
773         nasid_t         nasid;
774 
775         nasid = hinfo->h_nasid;
776         /*
777          * Clear error bit by writing to IECLR register.
778          */
779         REMOTE_HUB_S(nasid, IIO_IECLR, (1 << wnum));
780         /*
781          * PIO Write to Widget 'i' got into an error.
782          * Invoke hubiio_error_handler with this information.
783          */
784         printk( "Hub nasid %d got a PIO Write error from widget %d, "
785 				"cleaning up and continuing", nasid, wnum);
786         /*
787          * XXX
788          * It may be necessary to adjust IO PRB counter
789          * to account for any lost credits.
790          */
791 }
792 
793 int
hubiio_prb_error_handler(vertex_hdl_t hub_v,hubinfo_t hinfo)794 hubiio_prb_error_handler(vertex_hdl_t hub_v, hubinfo_t hinfo)
795 {
796         int             wnum;
797         nasid_t         nasid;
798         int             num_errors = 0;
799         iprb_t          iprb;
800 
801         nasid = hinfo->h_nasid;
802         /*
803          * Check if IPRB0 has any error first.
804          */
805         iprb.iprb_regval = REMOTE_HUB_L(nasid, IIO_IOPRB(0));
806         if (iprb.iprb_error) {
807                 num_errors++;
808                 hubii_prb_handler(hub_v, hinfo, 0);
809         }
810         /*
811          * Look through PRBs 8 - F to see if any of them has error bit set.
812          * If true, invoke hub iio error handler for this widget.
813          */
814         for (wnum = HUB_WIDGET_ID_MIN; wnum <= HUB_WIDGET_ID_MAX; wnum++) {
815                 iprb.iprb_regval = REMOTE_HUB_L(nasid, IIO_IOPRB(wnum));
816 
817                 if (!iprb.iprb_error)
818                         continue;
819 
820                 num_errors++;
821                 hubii_prb_handler(hub_v, hinfo, wnum);
822         }
823 
824         return num_errors;
825 }
826 
827