1 /*
2  * This file is subject to the terms and conditions of the GNU General Public
3  * License.  See the file "COPYING" in the main directory of this archive
4  * for more details.
5  *
6  * Copyright (C) 1992 - 1997, 2000-2003 Silicon Graphics, Inc. All rights reserved.
7  */
8 #ifndef _ASM_IA64_SN_IOERROR_HANDLING_H
9 #define _ASM_IA64_SN_IOERROR_HANDLING_H
10 
11 #include <linux/config.h>
12 #include <linux/types.h>
13 #include <linux/devfs_fs_kernel.h>
14 #include <asm/sn/sgi.h>
15 
16 #if __KERNEL__
17 
18 /*
19  * Basic types required for io error handling interfaces.
20  */
21 
22 /*
23  * Return code from the io error handling interfaces.
24  */
25 
26 enum error_return_code_e {
27 	/* Success */
28 	ERROR_RETURN_CODE_SUCCESS,
29 
30 	/* Unknown failure */
31 	ERROR_RETURN_CODE_GENERAL_FAILURE,
32 
33 	/* Nth error noticed while handling the first error */
34 	ERROR_RETURN_CODE_NESTED_CALL,
35 
36 	/* State of the vertex is invalid */
37 	ERROR_RETURN_CODE_INVALID_STATE,
38 
39 	/* Invalid action */
40 	ERROR_RETURN_CODE_INVALID_ACTION,
41 
42 	/* Valid action but not cannot set it */
43 	ERROR_RETURN_CODE_CANNOT_SET_ACTION,
44 
45 	/* Valid action but not possible for the current state */
46 	ERROR_RETURN_CODE_CANNOT_PERFORM_ACTION,
47 
48 	/* Valid state but cannot change the state of the vertex to it */
49 	ERROR_RETURN_CODE_CANNOT_SET_STATE,
50 
51 	/* ??? */
52 	ERROR_RETURN_CODE_DUPLICATE,
53 
54 	/* Reached the root of the system critical graph */
55 	ERROR_RETURN_CODE_SYS_CRITICAL_GRAPH_BEGIN,
56 
57 	/* Reached the leaf of the system critical graph */
58 	ERROR_RETURN_CODE_SYS_CRITICAL_GRAPH_ADD,
59 
60 	/* Cannot shutdown the device in hw/sw */
61 	ERROR_RETURN_CODE_SHUTDOWN_FAILED,
62 
63 	/* Cannot restart the device in hw/sw */
64 	ERROR_RETURN_CODE_RESET_FAILED,
65 
66 	/* Cannot failover the io subsystem */
67 	ERROR_RETURN_CODE_FAILOVER_FAILED,
68 
69 	/* No Jump Buffer exists */
70 	ERROR_RETURN_CODE_NO_JUMP_BUFFER
71 };
72 
73 typedef uint64_t  error_return_code_t;
74 
75 /*
76  * State of the vertex during error handling.
77  */
78 enum error_state_e {
79 	/* Ignore state */
80 	ERROR_STATE_IGNORE,
81 
82 	/* Invalid state */
83 	ERROR_STATE_NONE,
84 
85 	/* Trying to decipher the error bits */
86 	ERROR_STATE_LOOKUP,
87 
88 	/* Trying to carryout the action decided upon after
89 	 * looking at the error bits
90 	 */
91 	ERROR_STATE_ACTION,
92 
93 	/* Donot allow any other operations to this vertex from
94 	 * other parts of the kernel. This is also used to indicate
95 	 * that the device has been software shutdown.
96 	 */
97 	ERROR_STATE_SHUTDOWN,
98 
99 	/* This is a transitory state when no new requests are accepted
100 	 * on behalf of the device. This is usually used when trying to
101 	 * quiesce all the outstanding operations and preparing the
102 	 * device for a failover / shutdown etc.
103 	 */
104 	ERROR_STATE_SHUTDOWN_IN_PROGRESS,
105 
106 	/* This is the state when there is absolutely no activity going
107 	 * on wrt device.
108 	 */
109 	ERROR_STATE_SHUTDOWN_COMPLETE,
110 
111 	/* This is the state when the device has issued a retry. */
112 	ERROR_STATE_RETRY,
113 
114 	/* This is the normal state. This can also be used to indicate
115 	 * that the device has been software-enabled after software-
116 	 * shutting down previously.
117 	 */
118 	ERROR_STATE_NORMAL
119 
120 };
121 
122 typedef uint64_t  error_state_t;
123 
124 /*
125  * Generic error classes. This is used to classify errors after looking
126  * at the error bits and helpful in deciding on the action.
127  */
128 enum error_class_e {
129 	/* Unclassified error */
130 	ERROR_CLASS_UNKNOWN,
131 
132 	/* LLP transmit error */
133 	ERROR_CLASS_LLP_XMIT,
134 
135 	/* LLP receive error */
136 	ERROR_CLASS_LLP_RECV,
137 
138 	/* Credit error */
139 	ERROR_CLASS_CREDIT,
140 
141 	/* Timeout error */
142 	ERROR_CLASS_TIMEOUT,
143 
144 	/* Access error */
145 	ERROR_CLASS_ACCESS,
146 
147 	/* System coherency error */
148 	ERROR_CLASS_SYS_COHERENCY,
149 
150 	/* Bad data error (ecc / parity etc) */
151 	ERROR_CLASS_BAD_DATA,
152 
153 	/* Illegal request packet */
154 	ERROR_CLASS_BAD_REQ_PKT,
155 
156 	/* Illegal response packet */
157 	ERROR_CLASS_BAD_RESP_PKT
158 };
159 
160 typedef uint64_t  error_class_t;
161 
162 
163 /*
164  * Error context which the error action can use.
165  */
166 typedef void			*error_context_t;
167 #define ERROR_CONTEXT_IGNORE	((error_context_t)-1ll)
168 
169 
170 /*
171  * Error action type.
172  */
173 typedef error_return_code_t 	(*error_action_f)( error_context_t);
174 #define ERROR_ACTION_IGNORE	((error_action_f)-1ll)
175 
176 /* Typical set of error actions */
177 typedef struct error_action_set_s {
178 	error_action_f		eas_panic;
179 	error_action_f		eas_shutdown;
180 	error_action_f		eas_abort;
181 	error_action_f		eas_retry;
182 	error_action_f		eas_failover;
183 	error_action_f		eas_log_n_ignore;
184 	error_action_f		eas_reset;
185 } error_action_set_t;
186 
187 
188 /* Set of priorites for in case mutliple error actions/states
189  * are trying to be prescribed for a device.
190  * NOTE : The ordering below encapsulates the priorities. Highest value
191  * corresponds to highest priority.
192  */
193 enum error_priority_e {
194 	ERROR_PRIORITY_IGNORE,
195 	ERROR_PRIORITY_NONE,
196 	ERROR_PRIORITY_NORMAL,
197 	ERROR_PRIORITY_LOG,
198 	ERROR_PRIORITY_FAILOVER,
199 	ERROR_PRIORITY_RETRY,
200 	ERROR_PRIORITY_ABORT,
201 	ERROR_PRIORITY_SHUTDOWN,
202 	ERROR_PRIORITY_RESTART,
203 	ERROR_PRIORITY_PANIC
204 };
205 
206 typedef uint64_t  error_priority_t;
207 
208 /* Error state interfaces */
209 #if defined(CONFIG_SGI_IO_ERROR_HANDLING)
210 extern error_return_code_t	error_state_set(vertex_hdl_t,error_state_t);
211 extern error_state_t		error_state_get(vertex_hdl_t);
212 #endif
213 
214 /* Error action interfaces */
215 
216 extern error_return_code_t	error_action_set(vertex_hdl_t,
217 						 error_action_f,
218 						 error_context_t,
219 						 error_priority_t);
220 extern error_return_code_t	error_action_perform(vertex_hdl_t);
221 
222 
223 #define INFO_LBL_ERROR_SKIP_ENV	"error_skip_env"
224 
225 #define v_error_skip_env_get(v, l)		\
226 hwgraph_info_get_LBL(v, INFO_LBL_ERROR_SKIP_ENV, (arbitrary_info_t *)&l)
227 
228 #define v_error_skip_env_set(v, l, r)		\
229 (r ? 						\
230  hwgraph_info_replace_LBL(v, INFO_LBL_ERROR_SKIP_ENV, (arbitrary_info_t)l,0) :\
231  hwgraph_info_add_LBL(v, INFO_LBL_ERROR_SKIP_ENV, (arbitrary_info_t)l))
232 
233 #define v_error_skip_env_clear(v)		\
234 hwgraph_info_remove_LBL(v, INFO_LBL_ERROR_SKIP_ENV, 0)
235 
236 /* REFERENCED */
237 #if defined(CONFIG_SGI_IO_ERROR_HANDLING)
238 
239 inline static int
error_skip_point_mark(vertex_hdl_t v)240 error_skip_point_mark(vertex_hdl_t  v)
241 {
242 	label_t		*error_env = NULL;
243 	int		code = 0;
244 
245 	/* Check if we have a valid hwgraph vertex */
246 #ifdef	LATER
247 	if (!dev_is_vertex(v))
248 		return(code);
249 #endif
250 
251 	/* There is no error jump buffer for this device vertex. Allocate
252 	 * one.
253 	 */
254 	if (v_error_skip_env_get(v, error_env) != GRAPH_SUCCESS) {
255 		error_env = snia_kmem_zalloc(sizeof(label_t));
256 		/* Unable to allocate memory for jum buffer. This should
257 		 * be a very rare occurrence.
258 		 */
259 		if (!error_env)
260 			return(-1);
261 		/* Store the jump buffer information on the vertex.*/
262 		if (v_error_skip_env_set(v, error_env, 0) != GRAPH_SUCCESS)
263 			return(-2);
264 	}
265 	ASSERT(v_error_skip_env_get(v, error_env) == GRAPH_SUCCESS);
266 	code = setjmp(*error_env);
267 	return(code);
268 }
269 #endif	/* CONFIG_SGI_IO_ERROR_HANDLING */
270 
271 typedef uint64_t		counter_t;
272 
273 extern counter_t		error_retry_count_get(vertex_hdl_t);
274 extern error_return_code_t	error_retry_count_set(vertex_hdl_t,counter_t);
275 extern counter_t		error_retry_count_increment(vertex_hdl_t);
276 extern counter_t		error_retry_count_decrement(vertex_hdl_t);
277 
278 /* Except for the PIO Read error typically the other errors are handled in
279  * the context of an asynchronous error interrupt.
280  */
281 #define	IS_ERROR_INTR_CONTEXT(_ec)	((_ec & IOECODE_DMA) 		|| \
282 					 (_ec == IOECODE_PIO_WRITE))
283 
284 /* Some convenience macros on device state. This state is accessed only
285  * thru the calls the io error handling layer.
286  */
287 #if defined(CONFIG_SGI_IO_ERROR_HANDLING)
288 extern boolean_t		is_device_shutdown(vertex_hdl_t);
289 #define IS_DEVICE_SHUTDOWN(_d) 	(is_device_shutdown(_d))
290 #endif
291 
292 #endif /* __KERNEL__ */
293 #endif /* _ASM_IA64_SN_IOERROR_HANDLING_H */
294