1 /* $Id: shuberror.c,v 1.1 2002/02/28 17:31:25 marcelo Exp $
2 *
3 * This file is subject to the terms and conditions of the GNU General Public
4 * License. See the file "COPYING" in the main directory of this archive
5 * for more details.
6 *
7 * Copyright (C) 1992 - 1997, 2000,2002-2003 Silicon Graphics, Inc. All rights reserved.
8 */
9
10
11 #include <linux/types.h>
12 #include <linux/slab.h>
13 #include <linux/irq.h>
14 #include <asm/io.h>
15 #include <asm/irq.h>
16 #include <asm/smp.h>
17 #include <asm/delay.h>
18 #include <asm/sn/sgi.h>
19 #include <asm/sn/io.h>
20 #include <asm/sn/iograph.h>
21 #include <asm/sn/invent.h>
22 #include <asm/sn/hcl.h>
23 #include <asm/sn/labelcl.h>
24 #include <asm/sn/sn_private.h>
25 #include <asm/sn/klconfig.h>
26 #include <asm/sn/sn_cpuid.h>
27 #include <asm/sn/pci/pciio.h>
28 #include <asm/sn/pci/pcibr.h>
29 #include <asm/sn/xtalk/xtalk.h>
30 #include <asm/sn/pci/pcibr_private.h>
31 #include <asm/sn/intr.h>
32 #include <asm/sn/ioerror_handling.h>
33 #include <asm/sn/ioerror.h>
34 #include <asm/sn/sn2/shubio.h>
35 #include <asm/sn/bte.h>
36
37 extern void hubni_eint_init(cnodeid_t cnode);
38 extern void hubii_eint_init(cnodeid_t cnode);
39 extern void hubii_eint_handler (int irq, void *arg, struct pt_regs *ep);
40 int hubiio_crb_error_handler(vertex_hdl_t hub_v, hubinfo_t hinfo);
41 int hubiio_prb_error_handler(vertex_hdl_t hub_v, hubinfo_t hinfo);
42 extern void bte_crb_error_handler(vertex_hdl_t hub_v, int btenum, int crbnum, ioerror_t *ioe, int bteop);
43 void print_crb_fields(int crb_num, ii_icrb0_a_u_t icrba,
44 ii_icrb0_b_u_t icrbb, ii_icrb0_c_u_t icrbc,
45 ii_icrb0_d_u_t icrbd, ii_icrb0_e_u_t icrbe);
46
47 extern int maxcpus;
48 extern error_return_code_t error_state_set(vertex_hdl_t v,error_state_t new_state);
49
50 #define HUB_ERROR_PERIOD (120 * HZ) /* 2 minutes */
51
52 void
hub_error_clear(nasid_t nasid)53 hub_error_clear(nasid_t nasid)
54 {
55 int i;
56
57 /*
58 * Make sure spurious write response errors are cleared
59 * (values are from hub_set_prb())
60 */
61 for (i = 0; i <= HUB_WIDGET_ID_MAX - HUB_WIDGET_ID_MIN + 1; i++) {
62 iprb_t prb;
63
64 prb.iprb_regval = REMOTE_HUB_L(nasid, IIO_IOPRB_0 + (i * sizeof(hubreg_t)));
65
66 /* Clear out some fields */
67 prb.iprb_ovflow = 1;
68 prb.iprb_bnakctr = 0;
69 prb.iprb_anakctr = 0;
70
71 prb.iprb_xtalkctr = 3; /* approx. PIO credits for the widget */
72
73 REMOTE_HUB_S(nasid, IIO_IOPRB_0 + (i * sizeof(hubreg_t)), prb.iprb_regval);
74 }
75
76 REMOTE_HUB_S(nasid, IIO_IECLR, -1);
77
78 }
79
80
81 /*
82 * Function : hub_error_init
83 * Purpose : initialize the error handling requirements for a given hub.
84 * Parameters : cnode, the compact nodeid.
85 * Assumptions : Called only once per hub, either by a local cpu. Or by a
86 * remote cpu, when this hub is headless.(cpuless)
87 * Returns : None
88 */
89
90 void
hub_error_init(cnodeid_t cnode)91 hub_error_init(cnodeid_t cnode)
92 {
93 nasid_t nasid;
94
95 nasid = cnodeid_to_nasid(cnode);
96 hub_error_clear(nasid);
97
98
99 /*
100 * Now setup the hub ii error interrupt handler.
101 */
102
103 hubii_eint_init(cnode);
104
105 return;
106 }
107
108 /*
109 * Function : hubii_eint_init
110 * Parameters : cnode
111 * Purpose : to initialize the hub iio error interrupt.
112 * Assumptions : Called once per hub, by the cpu which will ultimately
113 * handle this interrupt.
114 * Returns : None.
115 */
116
117 void
hubii_eint_init(cnodeid_t cnode)118 hubii_eint_init(cnodeid_t cnode)
119 {
120 int bit, rv;
121 ii_iidsr_u_t hubio_eint;
122 hubinfo_t hinfo;
123 cpuid_t intr_cpu;
124 vertex_hdl_t hub_v;
125 int bit_pos_to_irq(int bit);
126 ii_ilcsr_u_t ilcsr;
127
128
129 hub_v = (vertex_hdl_t)cnodeid_to_vertex(cnode);
130 ASSERT_ALWAYS(hub_v);
131 hubinfo_get(hub_v, &hinfo);
132
133 ASSERT(hinfo);
134 ASSERT(hinfo->h_cnodeid == cnode);
135
136 ilcsr.ii_ilcsr_regval = REMOTE_HUB_L(hinfo->h_nasid, IIO_ILCSR);
137 if ((ilcsr.ii_ilcsr_fld_s.i_llp_stat & 0x2) == 0) {
138 /*
139 * HUB II link is not up. Disable LLP. Clear old errors.
140 * Enable interrupts to handle BTE errors.
141 */
142 ilcsr.ii_ilcsr_fld_s.i_llp_en = 0;
143 REMOTE_HUB_S(hinfo->h_nasid, IIO_ILCSR, ilcsr.ii_ilcsr_regval);
144 }
145
146 /* Select a possible interrupt target where there is a free interrupt
147 * bit and also reserve the interrupt bit for this IO error interrupt
148 */
149 intr_cpu = intr_heuristic(hub_v,0,SGI_II_ERROR,0,hub_v,
150 "HUB IO error interrupt",&bit);
151 if (intr_cpu == CPU_NONE) {
152 printk("hubii_eint_init: intr_reserve_level failed, cnode %d", cnode);
153 return;
154 }
155
156 rv = intr_connect_level(intr_cpu, SGI_II_ERROR, 0, NULL);
157 request_irq(SGI_II_ERROR, hubii_eint_handler, SA_SHIRQ, "SN_hub_error", (void *)hub_v);
158 irq_desc(bit)->status |= SN2_IRQ_PER_HUB;
159 ASSERT_ALWAYS(rv >= 0);
160 hubio_eint.ii_iidsr_regval = 0;
161 hubio_eint.ii_iidsr_fld_s.i_enable = 1;
162 hubio_eint.ii_iidsr_fld_s.i_level = bit;/* Take the least significant bits*/
163 hubio_eint.ii_iidsr_fld_s.i_node = COMPACT_TO_NASID_NODEID(cnode);
164 hubio_eint.ii_iidsr_fld_s.i_pi_id = cpuid_to_subnode(intr_cpu);
165 REMOTE_HUB_S(hinfo->h_nasid, IIO_IIDSR, hubio_eint.ii_iidsr_regval);
166
167 }
168
169
170 /*ARGSUSED*/
171 void
hubii_eint_handler(int irq,void * arg,struct pt_regs * ep)172 hubii_eint_handler (int irq, void *arg, struct pt_regs *ep)
173 {
174 vertex_hdl_t hub_v;
175 hubinfo_t hinfo;
176 ii_wstat_u_t wstat;
177 hubreg_t idsr;
178 ii_ilcsr_u_t ilcsr;
179
180
181 /* two levels of casting avoids compiler warning.!! */
182 hub_v = (vertex_hdl_t)(long)(arg);
183 ASSERT(hub_v);
184
185 hubinfo_get(hub_v, &hinfo);
186
187 idsr = REMOTE_HUB_L(hinfo->h_nasid, IIO_ICMR);
188 #if 0
189 if (idsr & 0x1) {
190 /* ICMR bit is set .. we are getting into "Spurious Interrupts condition. */
191 printk("Cnode %d II has seen the ICMR condition\n", hinfo->h_cnodeid);
192 printk("***** Please file PV with the above messages *****\n");
193 /* panic("We have to panic to prevent further unknown states ..\n"); */
194 }
195 #endif
196
197 /*
198 * Identify the reason for error.
199 */
200 wstat.ii_wstat_regval = REMOTE_HUB_L(hinfo->h_nasid, IIO_WSTAT);
201
202 if (wstat.ii_wstat_fld_s.w_crazy) {
203 char *reason;
204 /*
205 * We can do a couple of things here.
206 * Look at the fields TX_MX_RTY/XT_TAIL_TO/XT_CRD_TO to check
207 * which of these caused the CRAZY bit to be set.
208 * You may be able to check if the Link is up really.
209 */
210 if (wstat.ii_wstat_fld_s.w_tx_mx_rty)
211 reason = "Micro Packet Retry Timeout";
212 else if (wstat.ii_wstat_fld_s.w_xt_tail_to)
213 reason = "Crosstalk Tail Timeout";
214 else if (wstat.ii_wstat_fld_s.w_xt_crd_to)
215 reason = "Crosstalk Credit Timeout";
216 else {
217 hubreg_t hubii_imem;
218 /*
219 * Check if widget 0 has been marked as shutdown, or
220 * if BTE 0/1 has been marked.
221 */
222 hubii_imem = REMOTE_HUB_L(hinfo->h_nasid, IIO_IMEM);
223 if (hubii_imem & IIO_IMEM_W0ESD)
224 reason = "Hub Widget 0 has been Shutdown";
225 else if (hubii_imem & IIO_IMEM_B0ESD)
226 reason = "BTE 0 has been shutdown";
227 else if (hubii_imem & IIO_IMEM_B1ESD)
228 reason = "BTE 1 has been shutdown";
229 else reason = "Unknown";
230
231 }
232 /*
233 * Note: we may never be able to print this, if the II talking
234 * to Xbow which hosts the console is dead.
235 */
236 ilcsr.ii_ilcsr_regval = REMOTE_HUB_L(hinfo->h_nasid, IIO_ILCSR);
237 if (ilcsr.ii_ilcsr_fld_s.i_llp_en == 1) { /* Link is enabled */
238 printk("Hub %d, cnode %d to Xtalk Link failed (II_ECRAZY) Reason: %s",
239 hinfo->h_nasid, hinfo->h_cnodeid, reason);
240 }
241 }
242
243
244 /*
245 * Before processing any interrupt related information, clear all
246 * error indication and reenable interrupts. This will prevent
247 * lost interrupts due to the interrupt handler scanning past a PRB/CRB
248 * which has not errorred yet and then the PRB/CRB goes into error.
249 * Note, PRB errors are cleared individually.
250 */
251 REMOTE_HUB_S(hinfo->h_nasid, IIO_IECLR, 0xff0000);
252 idsr = REMOTE_HUB_L(hinfo->h_nasid, IIO_IIDSR) & ~IIO_IIDSR_SENT_MASK;
253 REMOTE_HUB_S(hinfo->h_nasid, IIO_IIDSR, idsr);
254
255
256 /*
257 * It's a toss as to which one among PRB/CRB to check first.
258 * Current decision is based on the severity of the errors.
259 * IO CRB errors tend to be more severe than PRB errors.
260 *
261 * It is possible for BTE errors to have been handled already, so we
262 * may not see any errors handled here.
263 */
264 (void)hubiio_crb_error_handler(hub_v, hinfo);
265 (void)hubiio_prb_error_handler(hub_v, hinfo);
266 }
267
268 /*
269 * Free the hub CRB "crbnum" which encountered an error.
270 * Assumption is, error handling was successfully done,
271 * and we now want to return the CRB back to Hub for normal usage.
272 *
273 * In order to free the CRB, all that's needed is to de-allocate it
274 *
275 * Assumption:
276 * No other processor is mucking around with the hub control register.
277 * So, upper layer has to single thread this.
278 */
279 void
hubiio_crb_free(hubinfo_t hinfo,int crbnum)280 hubiio_crb_free(hubinfo_t hinfo, int crbnum)
281 {
282 ii_icrb0_b_u_t icrbb;
283
284 /*
285 * The hardware does NOT clear the mark bit, so it must get cleared
286 * here to be sure the error is not processed twice.
287 */
288 icrbb.ii_icrb0_b_regval = REMOTE_HUB_L(hinfo->h_nasid, IIO_ICRB_B(crbnum));
289 icrbb.b_mark = 0;
290 REMOTE_HUB_S(hinfo->h_nasid, IIO_ICRB_B(crbnum), icrbb.ii_icrb0_b_regval);
291
292 /*
293 * Deallocate the register.
294 */
295
296 REMOTE_HUB_S(hinfo->h_nasid, IIO_ICDR, (IIO_ICDR_PND | crbnum));
297
298 /*
299 * Wait till hub indicates it's done.
300 */
301 while (REMOTE_HUB_L(hinfo->h_nasid, IIO_ICDR) & IIO_ICDR_PND)
302 udelay(1);
303
304 }
305
306
307 /*
308 * Array of error names that get logged in CRBs
309 */
310 char *hubiio_crb_errors[] = {
311 "Directory Error",
312 "CRB Poison Error",
313 "I/O Write Error",
314 "I/O Access Error",
315 "I/O Partial Write Error",
316 "I/O Partial Read Error",
317 "I/O Timeout Error",
318 "Xtalk Error Packet"
319 };
320
321 void
print_crb_fields(int crb_num,ii_icrb0_a_u_t icrba,ii_icrb0_b_u_t icrbb,ii_icrb0_c_u_t icrbc,ii_icrb0_d_u_t icrbd,ii_icrb0_e_u_t icrbe)322 print_crb_fields(int crb_num, ii_icrb0_a_u_t icrba,
323 ii_icrb0_b_u_t icrbb, ii_icrb0_c_u_t icrbc,
324 ii_icrb0_d_u_t icrbd, ii_icrb0_e_u_t icrbe)
325 {
326 printk("CRB %d regA\n\t"
327 "a_iow 0x%x\n\t"
328 "valid0x%x\n\t"
329 "Address0x%lx\n\t"
330 "a_tnum 0x%x\n\t"
331 "a_sidn 0x%x\n",
332 crb_num,
333 icrba.a_iow,
334 icrba.a_valid,
335 icrba.a_addr,
336 icrba.a_tnum,
337 icrba.a_sidn);
338 printk("CRB %d regB\n\t"
339 "b_imsgtype 0x%x\n\t"
340 "b_imsg 0x%x\n"
341 "\tb_use_old 0x%x\n\t"
342 "b_initiator 0x%x\n\t"
343 "b_exc 0x%x\n"
344 "\tb_ackcnt 0x%x\n\t"
345 "b_resp 0x%x\n\t"
346 "b_ack 0x%x\n"
347 "\tb_hold 0x%x\n\t"
348 "b_wb 0x%x\n\t"
349 "b_intvn 0x%x\n"
350 "\tb_stall_ib 0x%x\n\t"
351 "b_stall_int 0x%x\n"
352 "\tb_stall_bte_0 0x%x\n\t"
353 "b_stall_bte_1 0x%x\n"
354 "\tb_error 0x%x\n\t"
355 "b_lnetuce 0x%x\n\t"
356 "b_mark 0x%x\n\t"
357 "b_xerr 0x%x\n",
358 crb_num,
359 icrbb.b_imsgtype,
360 icrbb.b_imsg,
361 icrbb.b_use_old,
362 icrbb.b_initiator,
363 icrbb.b_exc,
364 icrbb.b_ackcnt,
365 icrbb.b_resp,
366 icrbb.b_ack,
367 icrbb.b_hold,
368 icrbb.b_wb,
369 icrbb.b_intvn,
370 icrbb.b_stall_ib,
371 icrbb.b_stall_int,
372 icrbb.b_stall_bte_0,
373 icrbb.b_stall_bte_1,
374 icrbb.b_error,
375 icrbb.b_lnetuce,
376 icrbb.b_mark,
377 icrbb.b_xerr);
378 printk("CRB %d regC\n\t"
379 "c_source 0x%x\n\t"
380 "c_xtsize 0x%x\n\t"
381 "c_cohtrans 0x%x\n\t"
382 "c_btenum 0x%x\n\t"
383 "c_gbr 0x%x\n\t"
384 "c_doresp 0x%x\n\t"
385 "c_barrop 0x%x\n\t"
386 "c_suppl 0x%x\n",
387 crb_num,
388 icrbc.c_source,
389 icrbc.c_xtsize,
390 icrbc.c_cohtrans,
391 icrbc.c_btenum,
392 icrbc.c_gbr,
393 icrbc.c_doresp,
394 icrbc.c_barrop,
395 icrbc.c_suppl);
396 printk("CRB %d regD\n\t"
397 "d_bteaddr 0x%lx\n\t"
398 "d_bteop 0x%x\n\t"
399 "d_pripsc 0x%x\n\t"
400 "d_pricnt 0x%x\n\t"
401 "d_sleep 0x%x\n\t",
402 crb_num,
403 icrbd.d_bteaddr,
404 icrbd.d_bteop,
405 icrbd.d_pripsc,
406 icrbd.d_pricnt,
407 icrbd.d_sleep);
408 printk("CRB %d regE\n\t"
409 "icrbe_timeout 0x%x\n\t"
410 "icrbe_context 0x%x\n\t"
411 "icrbe_toutvld 0x%x\n\t"
412 "icrbe_ctxtvld 0x%x\n\t",
413 crb_num,
414 icrbe.icrbe_timeout,
415 icrbe.icrbe_context,
416 icrbe.icrbe_toutvld,
417 icrbe.icrbe_ctxtvld);
418 }
419
420 /*
421 * hubiio_crb_error_handler
422 *
423 * This routine gets invoked when a hub gets an error
424 * interrupt. So, the routine is running in interrupt context
425 * at error interrupt level.
426 * Action:
427 * It's responsible for identifying ALL the CRBs that are marked
428 * with error, and process them.
429 *
430 * If you find the CRB that's marked with error, map this to the
431 * reason it caused error, and invoke appropriate error handler.
432 *
433 * XXX Be aware of the information in the context register.
434 *
435 * NOTE:
436 * Use REMOTE_HUB_* macro instead of LOCAL_HUB_* so that the interrupt
437 * handler can be run on any node. (not necessarily the node
438 * corresponding to the hub that encountered error).
439 */
440
441 int
hubiio_crb_error_handler(vertex_hdl_t hub_v,hubinfo_t hinfo)442 hubiio_crb_error_handler(vertex_hdl_t hub_v, hubinfo_t hinfo)
443 {
444 cnodeid_t cnode;
445 nasid_t nasid;
446 ii_icrb0_a_u_t icrba; /* II CRB Register A */
447 ii_icrb0_b_u_t icrbb; /* II CRB Register B */
448 ii_icrb0_c_u_t icrbc; /* II CRB Register C */
449 ii_icrb0_d_u_t icrbd; /* II CRB Register D */
450 ii_icrb0_e_u_t icrbe; /* II CRB Register D */
451 int i;
452 int num_errors = 0; /* Num of errors handled */
453 ioerror_t ioerror;
454 int rc;
455
456 nasid = hinfo->h_nasid;
457 cnode = NASID_TO_COMPACT_NODEID(nasid);
458
459 /*
460 * XXX - Add locking for any recovery actions
461 */
462 /*
463 * Scan through all CRBs in the Hub, and handle the errors
464 * in any of the CRBs marked.
465 */
466 for (i = 0; i < IIO_NUM_CRBS; i++) {
467 /* Check this crb entry to see if it is in error. */
468 icrbb.ii_icrb0_b_regval = REMOTE_HUB_L(nasid, IIO_ICRB_B(i));
469
470 if (icrbb.b_mark == 0) {
471 continue;
472 }
473
474 icrba.ii_icrb0_a_regval = REMOTE_HUB_L(nasid, IIO_ICRB_A(i));
475
476 IOERROR_INIT(&ioerror);
477
478 /* read other CRB error registers. */
479 icrbc.ii_icrb0_c_regval = REMOTE_HUB_L(nasid, IIO_ICRB_C(i));
480 icrbd.ii_icrb0_d_regval = REMOTE_HUB_L(nasid, IIO_ICRB_D(i));
481 icrbe.ii_icrb0_e_regval = REMOTE_HUB_L(nasid, IIO_ICRB_E(i));
482
483 IOERROR_SETVALUE(&ioerror,errortype,icrbb.b_ecode);
484
485 /* Check if this error is due to BTE operation,
486 * and handle it separately.
487 */
488 if (icrbd.d_bteop ||
489 ((icrbb.b_initiator == IIO_ICRB_INIT_BTE0 ||
490 icrbb.b_initiator == IIO_ICRB_INIT_BTE1) &&
491 (icrbb.b_imsgtype == IIO_ICRB_IMSGT_BTE ||
492 icrbb.b_imsgtype == IIO_ICRB_IMSGT_SN1NET))){
493
494 int bte_num;
495
496 if (icrbd.d_bteop)
497 bte_num = icrbc.c_btenum;
498 else /* b_initiator bit 2 gives BTE number */
499 bte_num = (icrbb.b_initiator & 0x4) >> 2;
500
501 hubiio_crb_free(hinfo, i);
502
503 bte_crb_error_handler(hub_v, bte_num,
504 i, &ioerror,
505 icrbd.d_bteop);
506 num_errors++;
507 continue;
508 }
509
510 /*
511 * XXX
512 * Assuming the only other error that would reach here is
513 * crosstalk errors.
514 * If CRB times out on a message from Xtalk, it changes
515 * the message type to CRB.
516 *
517 * If we get here due to other errors (SN0net/CRB)
518 * what's the action ?
519 */
520
521 /*
522 * Pick out the useful fields in CRB, and
523 * tuck them away into ioerror structure.
524 */
525 IOERROR_SETVALUE(&ioerror,xtalkaddr,icrba.a_addr << IIO_ICRB_ADDR_SHFT);
526 IOERROR_SETVALUE(&ioerror,widgetnum,icrba.a_sidn);
527
528
529 if (icrba.a_iow){
530 /*
531 * XXX We shouldn't really have BRIDGE-specific code
532 * here, but alas....
533 *
534 * The BRIDGE (or XBRIDGE) sets the upper bit of TNUM
535 * to indicate a WRITE operation. It sets the next
536 * bit to indicate an INTERRUPT operation. The bottom
537 * 3 bits of TNUM indicate which device was responsible.
538 */
539 IOERROR_SETVALUE(&ioerror,widgetdev,
540 TNUM_TO_WIDGET_DEV(icrba.a_tnum));
541 /*
542 * The encoding of TNUM (see comments above) is
543 * different for PIC. So we'll save TNUM here and
544 * deal with the differences later when we can
545 * determine if we're using a Bridge or the PIC.
546 *
547 * XXX: We may be able to remove saving the widgetdev
548 * above and just sort it out of TNUM later.
549 */
550 IOERROR_SETVALUE(&ioerror, tnum, icrba.a_tnum);
551
552 }
553 if (icrbb.b_error) {
554 /*
555 * CRB 'i' has some error. Identify the type of error,
556 * and try to handle it.
557 *
558 */
559 switch(icrbb.b_ecode) {
560 case IIO_ICRB_ECODE_PERR:
561 case IIO_ICRB_ECODE_WERR:
562 case IIO_ICRB_ECODE_AERR:
563 case IIO_ICRB_ECODE_PWERR:
564 case IIO_ICRB_ECODE_TOUT:
565 case IIO_ICRB_ECODE_XTERR:
566 printk("Shub II CRB %d: error %s on hub cnodeid: %d",
567 i, hubiio_crb_errors[icrbb.b_ecode], cnode);
568 /*
569 * Any sort of write error is mostly due
570 * bad programming (Note it's not a timeout.)
571 * So, invoke hub_iio_error_handler with
572 * appropriate information.
573 */
574 IOERROR_SETVALUE(&ioerror,errortype,icrbb.b_ecode);
575
576 /* Go through the error bit lookup phase */
577 if (error_state_set(hub_v, ERROR_STATE_LOOKUP) ==
578 ERROR_RETURN_CODE_CANNOT_SET_STATE)
579 return(IOERROR_UNHANDLED);
580 rc = hub_ioerror_handler(
581 hub_v,
582 DMA_WRITE_ERROR,
583 MODE_DEVERROR,
584 &ioerror);
585 if (rc == IOERROR_HANDLED) {
586 rc = hub_ioerror_handler(
587 hub_v,
588 DMA_WRITE_ERROR,
589 MODE_DEVREENABLE,
590 &ioerror);
591 }else {
592 printk("Unable to handle %s on hub %d",
593 hubiio_crb_errors[icrbb.b_ecode],
594 cnode);
595 /* panic; */
596 }
597 /* Go to Next error */
598 print_crb_fields(i, icrba, icrbb, icrbc,
599 icrbd, icrbe);
600 hubiio_crb_free(hinfo, i);
601 continue;
602 case IIO_ICRB_ECODE_PRERR:
603 case IIO_ICRB_ECODE_DERR:
604 printk("Shub II CRB %d: error %s on hub : %d",
605 i, hubiio_crb_errors[icrbb.b_ecode], cnode);
606 /* panic */
607 default:
608 printk("Shub II CRB error (code : %d) on hub : %d",
609 icrbb.b_ecode, cnode);
610 /* panic */
611 }
612 }
613 /*
614 * Error is not indicated via the errcode field
615 * Check other error indications in this register.
616 */
617 if (icrbb.b_xerr) {
618 printk("Shub II CRB %d: Xtalk Packet with error bit set to hub %d",
619 i, cnode);
620 /* panic */
621 }
622 if (icrbb.b_lnetuce) {
623 printk("Shub II CRB %d: Uncorrectable data error detected on data "
624 " from NUMAlink to node %d",
625 i, cnode);
626 /* panic */
627 }
628 print_crb_fields(i, icrba, icrbb, icrbc, icrbd, icrbe);
629
630
631
632
633
634 if (icrbb.b_error) {
635 /*
636 * CRB 'i' has some error. Identify the type of error,
637 * and try to handle it.
638 */
639 switch(icrbb.b_ecode) {
640 case IIO_ICRB_ECODE_PERR:
641 case IIO_ICRB_ECODE_WERR:
642 case IIO_ICRB_ECODE_AERR:
643 case IIO_ICRB_ECODE_PWERR:
644
645 printk("%s on hub cnodeid: %d",
646 hubiio_crb_errors[icrbb.b_ecode], cnode);
647 /*
648 * Any sort of write error is mostly due
649 * bad programming (Note it's not a timeout.)
650 * So, invoke hub_iio_error_handler with
651 * appropriate information.
652 */
653 IOERROR_SETVALUE(&ioerror,errortype,icrbb.b_ecode);
654
655 rc = hub_ioerror_handler(
656 hub_v,
657 DMA_WRITE_ERROR,
658 MODE_DEVERROR,
659 &ioerror);
660
661 if (rc == IOERROR_HANDLED) {
662 rc = hub_ioerror_handler(
663 hub_v,
664 DMA_WRITE_ERROR,
665 MODE_DEVREENABLE,
666 &ioerror);
667 ASSERT(rc == IOERROR_HANDLED);
668 }else {
669
670 panic("Unable to handle %s on hub %d",
671 hubiio_crb_errors[icrbb.b_ecode],
672 cnode);
673 /*NOTREACHED*/
674 }
675 /* Go to Next error */
676 hubiio_crb_free(hinfo, i);
677 continue;
678
679 case IIO_ICRB_ECODE_PRERR:
680
681 case IIO_ICRB_ECODE_TOUT:
682 case IIO_ICRB_ECODE_XTERR:
683
684 case IIO_ICRB_ECODE_DERR:
685 panic("Fatal %s on hub : %d",
686 hubiio_crb_errors[icrbb.b_ecode], cnode);
687 /*NOTREACHED*/
688
689 default:
690 panic("Fatal error (code : %d) on hub : %d",
691 icrbb.b_ecode, cnode);
692 /*NOTREACHED*/
693
694 }
695 } /* if (icrbb.b_error) */
696
697 /*
698 * Error is not indicated via the errcode field
699 * Check other error indications in this register.
700 */
701
702 if (icrbb.b_xerr) {
703 panic("Xtalk Packet with error bit set to hub %d",
704 cnode);
705 /*NOTREACHED*/
706 }
707
708 if (icrbb.b_lnetuce) {
709 panic("Uncorrectable data error detected on data "
710 " from Craylink to node %d",
711 cnode);
712 /*NOTREACHED*/
713 }
714
715 }
716 return num_errors;
717 }
718
719 /*
720 * hubii_check_widget_disabled
721 *
722 * Check if PIO access to the specified widget is disabled due
723 * to any II errors that are currently set.
724 *
725 * The specific error bits checked are:
726 * IPRBx register: SPUR_RD (51)
727 * SPUR_WR (50)
728 * RD_TO (49)
729 * ERROR (48)
730 *
731 * WSTAT register: CRAZY (32)
732 */
733
734 int
hubii_check_widget_disabled(nasid_t nasid,int wnum)735 hubii_check_widget_disabled(nasid_t nasid, int wnum)
736 {
737 iprb_t iprb;
738 ii_wstat_u_t wstat;
739
740 iprb.iprb_regval = REMOTE_HUB_L(nasid, IIO_IOPRB(wnum));
741 if (iprb.iprb_regval & (IIO_PRB_SPUR_RD | IIO_PRB_SPUR_WR |
742 IIO_PRB_RD_TO | IIO_PRB_ERROR)) {
743 #ifdef DEBUG
744 printk(KERN_WARNING "II error, IPRB%x=0x%lx\n", wnum, iprb.iprb_regval);
745 #endif
746 return(1);
747 }
748
749 wstat.ii_wstat_regval = REMOTE_HUB_L(nasid, IIO_WSTAT);
750 if (wstat.ii_wstat_regval & IIO_WSTAT_ECRAZY) {
751 #ifdef DEBUG
752 printk(KERN_WARNING "II error, WSTAT=0x%lx\n", wstat.ii_wstat_regval);
753 #endif
754 return(1);
755 }
756 return(0);
757 }
758
759 /*ARGSUSED*/
760 /*
761 * hubii_prb_handler
762 * Handle the error reported in the PRB for wiget number wnum.
763 * This typically happens on a PIO write error.
764 * There is nothing much we can do in this interrupt context for
765 * PIO write errors. For e.g. QL scsi controller has the
766 * habit of flaking out on PIO writes.
767 * Print a message and try to continue for now
768 * Cleanup involes freeing the PRB register
769 */
770 static void
hubii_prb_handler(vertex_hdl_t hub_v,hubinfo_t hinfo,int wnum)771 hubii_prb_handler(vertex_hdl_t hub_v, hubinfo_t hinfo, int wnum)
772 {
773 nasid_t nasid;
774
775 nasid = hinfo->h_nasid;
776 /*
777 * Clear error bit by writing to IECLR register.
778 */
779 REMOTE_HUB_S(nasid, IIO_IECLR, (1 << wnum));
780 /*
781 * PIO Write to Widget 'i' got into an error.
782 * Invoke hubiio_error_handler with this information.
783 */
784 printk( "Hub nasid %d got a PIO Write error from widget %d, "
785 "cleaning up and continuing", nasid, wnum);
786 /*
787 * XXX
788 * It may be necessary to adjust IO PRB counter
789 * to account for any lost credits.
790 */
791 }
792
793 int
hubiio_prb_error_handler(vertex_hdl_t hub_v,hubinfo_t hinfo)794 hubiio_prb_error_handler(vertex_hdl_t hub_v, hubinfo_t hinfo)
795 {
796 int wnum;
797 nasid_t nasid;
798 int num_errors = 0;
799 iprb_t iprb;
800
801 nasid = hinfo->h_nasid;
802 /*
803 * Check if IPRB0 has any error first.
804 */
805 iprb.iprb_regval = REMOTE_HUB_L(nasid, IIO_IOPRB(0));
806 if (iprb.iprb_error) {
807 num_errors++;
808 hubii_prb_handler(hub_v, hinfo, 0);
809 }
810 /*
811 * Look through PRBs 8 - F to see if any of them has error bit set.
812 * If true, invoke hub iio error handler for this widget.
813 */
814 for (wnum = HUB_WIDGET_ID_MIN; wnum <= HUB_WIDGET_ID_MAX; wnum++) {
815 iprb.iprb_regval = REMOTE_HUB_L(nasid, IIO_IOPRB(wnum));
816
817 if (!iprb.iprb_error)
818 continue;
819
820 num_errors++;
821 hubii_prb_handler(hub_v, hinfo, wnum);
822 }
823
824 return num_errors;
825 }
826
827