1 /*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (C) 1992 - 1997, 2000-2003 Silicon Graphics, Inc. All rights reserved.
7 */
8 #ifndef _ASM_IA64_SN_IOERROR_HANDLING_H
9 #define _ASM_IA64_SN_IOERROR_HANDLING_H
10
11 #include <linux/config.h>
12 #include <linux/types.h>
13 #include <linux/devfs_fs_kernel.h>
14 #include <asm/sn/sgi.h>
15
16 #if __KERNEL__
17
18 /*
19 * Basic types required for io error handling interfaces.
20 */
21
22 /*
23 * Return code from the io error handling interfaces.
24 */
25
26 enum error_return_code_e {
27 /* Success */
28 ERROR_RETURN_CODE_SUCCESS,
29
30 /* Unknown failure */
31 ERROR_RETURN_CODE_GENERAL_FAILURE,
32
33 /* Nth error noticed while handling the first error */
34 ERROR_RETURN_CODE_NESTED_CALL,
35
36 /* State of the vertex is invalid */
37 ERROR_RETURN_CODE_INVALID_STATE,
38
39 /* Invalid action */
40 ERROR_RETURN_CODE_INVALID_ACTION,
41
42 /* Valid action but not cannot set it */
43 ERROR_RETURN_CODE_CANNOT_SET_ACTION,
44
45 /* Valid action but not possible for the current state */
46 ERROR_RETURN_CODE_CANNOT_PERFORM_ACTION,
47
48 /* Valid state but cannot change the state of the vertex to it */
49 ERROR_RETURN_CODE_CANNOT_SET_STATE,
50
51 /* ??? */
52 ERROR_RETURN_CODE_DUPLICATE,
53
54 /* Reached the root of the system critical graph */
55 ERROR_RETURN_CODE_SYS_CRITICAL_GRAPH_BEGIN,
56
57 /* Reached the leaf of the system critical graph */
58 ERROR_RETURN_CODE_SYS_CRITICAL_GRAPH_ADD,
59
60 /* Cannot shutdown the device in hw/sw */
61 ERROR_RETURN_CODE_SHUTDOWN_FAILED,
62
63 /* Cannot restart the device in hw/sw */
64 ERROR_RETURN_CODE_RESET_FAILED,
65
66 /* Cannot failover the io subsystem */
67 ERROR_RETURN_CODE_FAILOVER_FAILED,
68
69 /* No Jump Buffer exists */
70 ERROR_RETURN_CODE_NO_JUMP_BUFFER
71 };
72
73 typedef uint64_t error_return_code_t;
74
75 /*
76 * State of the vertex during error handling.
77 */
78 enum error_state_e {
79 /* Ignore state */
80 ERROR_STATE_IGNORE,
81
82 /* Invalid state */
83 ERROR_STATE_NONE,
84
85 /* Trying to decipher the error bits */
86 ERROR_STATE_LOOKUP,
87
88 /* Trying to carryout the action decided upon after
89 * looking at the error bits
90 */
91 ERROR_STATE_ACTION,
92
93 /* Donot allow any other operations to this vertex from
94 * other parts of the kernel. This is also used to indicate
95 * that the device has been software shutdown.
96 */
97 ERROR_STATE_SHUTDOWN,
98
99 /* This is a transitory state when no new requests are accepted
100 * on behalf of the device. This is usually used when trying to
101 * quiesce all the outstanding operations and preparing the
102 * device for a failover / shutdown etc.
103 */
104 ERROR_STATE_SHUTDOWN_IN_PROGRESS,
105
106 /* This is the state when there is absolutely no activity going
107 * on wrt device.
108 */
109 ERROR_STATE_SHUTDOWN_COMPLETE,
110
111 /* This is the state when the device has issued a retry. */
112 ERROR_STATE_RETRY,
113
114 /* This is the normal state. This can also be used to indicate
115 * that the device has been software-enabled after software-
116 * shutting down previously.
117 */
118 ERROR_STATE_NORMAL
119
120 };
121
122 typedef uint64_t error_state_t;
123
124 /*
125 * Generic error classes. This is used to classify errors after looking
126 * at the error bits and helpful in deciding on the action.
127 */
128 enum error_class_e {
129 /* Unclassified error */
130 ERROR_CLASS_UNKNOWN,
131
132 /* LLP transmit error */
133 ERROR_CLASS_LLP_XMIT,
134
135 /* LLP receive error */
136 ERROR_CLASS_LLP_RECV,
137
138 /* Credit error */
139 ERROR_CLASS_CREDIT,
140
141 /* Timeout error */
142 ERROR_CLASS_TIMEOUT,
143
144 /* Access error */
145 ERROR_CLASS_ACCESS,
146
147 /* System coherency error */
148 ERROR_CLASS_SYS_COHERENCY,
149
150 /* Bad data error (ecc / parity etc) */
151 ERROR_CLASS_BAD_DATA,
152
153 /* Illegal request packet */
154 ERROR_CLASS_BAD_REQ_PKT,
155
156 /* Illegal response packet */
157 ERROR_CLASS_BAD_RESP_PKT
158 };
159
160 typedef uint64_t error_class_t;
161
162
163 /*
164 * Error context which the error action can use.
165 */
166 typedef void *error_context_t;
167 #define ERROR_CONTEXT_IGNORE ((error_context_t)-1ll)
168
169
170 /*
171 * Error action type.
172 */
173 typedef error_return_code_t (*error_action_f)( error_context_t);
174 #define ERROR_ACTION_IGNORE ((error_action_f)-1ll)
175
176 /* Typical set of error actions */
177 typedef struct error_action_set_s {
178 error_action_f eas_panic;
179 error_action_f eas_shutdown;
180 error_action_f eas_abort;
181 error_action_f eas_retry;
182 error_action_f eas_failover;
183 error_action_f eas_log_n_ignore;
184 error_action_f eas_reset;
185 } error_action_set_t;
186
187
188 /* Set of priorites for in case mutliple error actions/states
189 * are trying to be prescribed for a device.
190 * NOTE : The ordering below encapsulates the priorities. Highest value
191 * corresponds to highest priority.
192 */
193 enum error_priority_e {
194 ERROR_PRIORITY_IGNORE,
195 ERROR_PRIORITY_NONE,
196 ERROR_PRIORITY_NORMAL,
197 ERROR_PRIORITY_LOG,
198 ERROR_PRIORITY_FAILOVER,
199 ERROR_PRIORITY_RETRY,
200 ERROR_PRIORITY_ABORT,
201 ERROR_PRIORITY_SHUTDOWN,
202 ERROR_PRIORITY_RESTART,
203 ERROR_PRIORITY_PANIC
204 };
205
206 typedef uint64_t error_priority_t;
207
208 /* Error state interfaces */
209 #if defined(CONFIG_SGI_IO_ERROR_HANDLING)
210 extern error_return_code_t error_state_set(vertex_hdl_t,error_state_t);
211 extern error_state_t error_state_get(vertex_hdl_t);
212 #endif
213
214 /* Error action interfaces */
215
216 extern error_return_code_t error_action_set(vertex_hdl_t,
217 error_action_f,
218 error_context_t,
219 error_priority_t);
220 extern error_return_code_t error_action_perform(vertex_hdl_t);
221
222
223 #define INFO_LBL_ERROR_SKIP_ENV "error_skip_env"
224
225 #define v_error_skip_env_get(v, l) \
226 hwgraph_info_get_LBL(v, INFO_LBL_ERROR_SKIP_ENV, (arbitrary_info_t *)&l)
227
228 #define v_error_skip_env_set(v, l, r) \
229 (r ? \
230 hwgraph_info_replace_LBL(v, INFO_LBL_ERROR_SKIP_ENV, (arbitrary_info_t)l,0) :\
231 hwgraph_info_add_LBL(v, INFO_LBL_ERROR_SKIP_ENV, (arbitrary_info_t)l))
232
233 #define v_error_skip_env_clear(v) \
234 hwgraph_info_remove_LBL(v, INFO_LBL_ERROR_SKIP_ENV, 0)
235
236 /* REFERENCED */
237 #if defined(CONFIG_SGI_IO_ERROR_HANDLING)
238
239 inline static int
error_skip_point_mark(vertex_hdl_t v)240 error_skip_point_mark(vertex_hdl_t v)
241 {
242 label_t *error_env = NULL;
243 int code = 0;
244
245 /* Check if we have a valid hwgraph vertex */
246 #ifdef LATER
247 if (!dev_is_vertex(v))
248 return(code);
249 #endif
250
251 /* There is no error jump buffer for this device vertex. Allocate
252 * one.
253 */
254 if (v_error_skip_env_get(v, error_env) != GRAPH_SUCCESS) {
255 error_env = snia_kmem_zalloc(sizeof(label_t));
256 /* Unable to allocate memory for jum buffer. This should
257 * be a very rare occurrence.
258 */
259 if (!error_env)
260 return(-1);
261 /* Store the jump buffer information on the vertex.*/
262 if (v_error_skip_env_set(v, error_env, 0) != GRAPH_SUCCESS)
263 return(-2);
264 }
265 ASSERT(v_error_skip_env_get(v, error_env) == GRAPH_SUCCESS);
266 code = setjmp(*error_env);
267 return(code);
268 }
269 #endif /* CONFIG_SGI_IO_ERROR_HANDLING */
270
271 typedef uint64_t counter_t;
272
273 extern counter_t error_retry_count_get(vertex_hdl_t);
274 extern error_return_code_t error_retry_count_set(vertex_hdl_t,counter_t);
275 extern counter_t error_retry_count_increment(vertex_hdl_t);
276 extern counter_t error_retry_count_decrement(vertex_hdl_t);
277
278 /* Except for the PIO Read error typically the other errors are handled in
279 * the context of an asynchronous error interrupt.
280 */
281 #define IS_ERROR_INTR_CONTEXT(_ec) ((_ec & IOECODE_DMA) || \
282 (_ec == IOECODE_PIO_WRITE))
283
284 /* Some convenience macros on device state. This state is accessed only
285 * thru the calls the io error handling layer.
286 */
287 #if defined(CONFIG_SGI_IO_ERROR_HANDLING)
288 extern boolean_t is_device_shutdown(vertex_hdl_t);
289 #define IS_DEVICE_SHUTDOWN(_d) (is_device_shutdown(_d))
290 #endif
291
292 #endif /* __KERNEL__ */
293 #endif /* _ASM_IA64_SN_IOERROR_HANDLING_H */
294