1 /*
2    md_k.h : kernel internal structure of the Linux MD driver
3           Copyright (C) 1996-98 Ingo Molnar, Gadi Oxman
4 
5    This program is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 2, or (at your option)
8    any later version.
9 
10    You should have received a copy of the GNU General Public License
11    (for example /usr/src/linux/COPYING); if not, write to the Free
12    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
13 */
14 
15 #ifndef _MD_K_H
16 #define _MD_K_H
17 
18 #define MD_RESERVED       0UL
19 #define LINEAR            1UL
20 #define RAID0             2UL
21 #define RAID1             3UL
22 #define RAID5             4UL
23 #define TRANSLUCENT       5UL
24 #define HSM               6UL
25 #define MULTIPATH         7UL
26 #define MAX_PERSONALITY   8UL
27 
pers_to_level(int pers)28 static inline int pers_to_level (int pers)
29 {
30 	switch (pers) {
31 		case MULTIPATH:		return -4;
32 		case HSM:		return -3;
33 		case TRANSLUCENT:	return -2;
34 		case LINEAR:		return -1;
35 		case RAID0:		return 0;
36 		case RAID1:		return 1;
37 		case RAID5:		return 5;
38 	}
39 	BUG();
40 	return MD_RESERVED;
41 }
42 
level_to_pers(int level)43 static inline int level_to_pers (int level)
44 {
45 	switch (level) {
46 		case -4: return MULTIPATH;
47 		case -3: return HSM;
48 		case -2: return TRANSLUCENT;
49 		case -1: return LINEAR;
50 		case 0: return RAID0;
51 		case 1: return RAID1;
52 		case 4:
53 		case 5: return RAID5;
54 	}
55 	return MD_RESERVED;
56 }
57 
58 typedef struct mddev_s mddev_t;
59 typedef struct mdk_rdev_s mdk_rdev_t;
60 
61 #if (MINORBITS != 8)
62 #error MD does not handle bigger kdev yet
63 #endif
64 
65 #define MAX_MD_DEVS  (1<<MINORBITS)	/* Max number of md dev */
66 
67 /*
68  * Maps a kdev to an mddev/subdev. How 'data' is handled is up to
69  * the personality. (eg. HSM uses this to identify individual LVs)
70  */
71 typedef struct dev_mapping_s {
72 	mddev_t *mddev;
73 	void *data;
74 } dev_mapping_t;
75 
76 extern dev_mapping_t mddev_map [MAX_MD_DEVS];
77 
kdev_to_mddev(kdev_t dev)78 static inline mddev_t * kdev_to_mddev (kdev_t dev)
79 {
80 	if (MAJOR(dev) != MD_MAJOR)
81 		BUG();
82         return mddev_map[MINOR(dev)].mddev;
83 }
84 
85 /*
86  * options passed in raidrun:
87  */
88 
89 #define MAX_CHUNK_SIZE (4096*1024)
90 
91 /*
92  * default readahead
93  */
94 #define MD_READAHEAD	vm_max_readahead
95 
disk_faulty(mdp_disk_t * d)96 static inline int disk_faulty(mdp_disk_t * d)
97 {
98 	return d->state & (1 << MD_DISK_FAULTY);
99 }
100 
disk_active(mdp_disk_t * d)101 static inline int disk_active(mdp_disk_t * d)
102 {
103 	return d->state & (1 << MD_DISK_ACTIVE);
104 }
105 
disk_sync(mdp_disk_t * d)106 static inline int disk_sync(mdp_disk_t * d)
107 {
108 	return d->state & (1 << MD_DISK_SYNC);
109 }
110 
disk_spare(mdp_disk_t * d)111 static inline int disk_spare(mdp_disk_t * d)
112 {
113 	return !disk_sync(d) && !disk_active(d) && !disk_faulty(d);
114 }
115 
disk_removed(mdp_disk_t * d)116 static inline int disk_removed(mdp_disk_t * d)
117 {
118 	return d->state & (1 << MD_DISK_REMOVED);
119 }
120 
mark_disk_faulty(mdp_disk_t * d)121 static inline void mark_disk_faulty(mdp_disk_t * d)
122 {
123 	d->state |= (1 << MD_DISK_FAULTY);
124 }
125 
mark_disk_active(mdp_disk_t * d)126 static inline void mark_disk_active(mdp_disk_t * d)
127 {
128 	d->state |= (1 << MD_DISK_ACTIVE);
129 }
130 
mark_disk_sync(mdp_disk_t * d)131 static inline void mark_disk_sync(mdp_disk_t * d)
132 {
133 	d->state |= (1 << MD_DISK_SYNC);
134 }
135 
mark_disk_spare(mdp_disk_t * d)136 static inline void mark_disk_spare(mdp_disk_t * d)
137 {
138 	d->state = 0;
139 }
140 
mark_disk_removed(mdp_disk_t * d)141 static inline void mark_disk_removed(mdp_disk_t * d)
142 {
143 	d->state = (1 << MD_DISK_FAULTY) | (1 << MD_DISK_REMOVED);
144 }
145 
mark_disk_inactive(mdp_disk_t * d)146 static inline void mark_disk_inactive(mdp_disk_t * d)
147 {
148 	d->state &= ~(1 << MD_DISK_ACTIVE);
149 }
150 
mark_disk_nonsync(mdp_disk_t * d)151 static inline void mark_disk_nonsync(mdp_disk_t * d)
152 {
153 	d->state &= ~(1 << MD_DISK_SYNC);
154 }
155 
156 /*
157  * MD's 'extended' device
158  */
159 struct mdk_rdev_s
160 {
161 	struct md_list_head same_set;	/* RAID devices within the same set */
162 	struct md_list_head all;	/* all RAID devices */
163 	struct md_list_head pending;	/* undetected RAID devices */
164 
165 	kdev_t dev;			/* Device number */
166 	kdev_t old_dev;			/*  "" when it was last imported */
167 	unsigned long size;		/* Device size (in blocks) */
168 	mddev_t *mddev;			/* RAID array if running */
169 	unsigned long last_events;	/* IO event timestamp */
170 
171 	struct block_device *bdev;	/* block device handle */
172 
173 	mdp_super_t *sb;
174 	struct page *sb_page;
175 	unsigned long sb_offset;
176 
177 	int alias_device;		/* device alias to the same disk */
178 	int faulty;			/* if faulty do not issue IO requests */
179 	int desc_nr;			/* descriptor index in the superblock */
180 };
181 
182 
183 /*
184  * disk operations in a working array:
185  */
186 #define DISKOP_SPARE_INACTIVE	0
187 #define DISKOP_SPARE_WRITE	1
188 #define DISKOP_SPARE_ACTIVE	2
189 #define DISKOP_HOT_REMOVE_DISK	3
190 #define DISKOP_HOT_ADD_DISK	4
191 
192 typedef struct mdk_personality_s mdk_personality_t;
193 
194 struct mddev_s
195 {
196 	void				*private;
197 	mdk_personality_t		*pers;
198 	int				__minor;
199 	mdp_super_t			*sb;
200 	int				nb_dev;
201 	struct md_list_head 		disks;
202 	int				sb_dirty;
203 	mdu_param_t			param;
204 	int				ro;
205 	unsigned long			curr_resync;	/* blocks scheduled */
206 	unsigned long			resync_mark;	/* a recent timestamp */
207 	unsigned long			resync_mark_cnt;/* blocks written at resync_mark */
208 	char				*name;
209 	int				recovery_running;
210 	struct semaphore		reconfig_sem;
211 	struct semaphore		recovery_sem;
212 	struct semaphore		resync_sem;
213 	atomic_t			active;
214 
215 	atomic_t			recovery_active; /* blocks scheduled, but not written */
216 	md_wait_queue_head_t		recovery_wait;
217 
218 	struct md_list_head		all_mddevs;
219 };
220 
221 struct mdk_personality_s
222 {
223 	char *name;
224 	int (*make_request)(mddev_t *mddev, int rw, struct buffer_head * bh);
225 	int (*run)(mddev_t *mddev);
226 	int (*stop)(mddev_t *mddev);
227 	void (*status)(struct seq_file *seq, mddev_t *mddev);
228 	int (*error_handler)(mddev_t *mddev, kdev_t dev);
229 
230 /*
231  * Some personalities (RAID-1, RAID-5) can have disks hot-added and
232  * hot-removed. Hot removal is different from failure. (failure marks
233  * a disk inactive, but the disk is still part of the array) The interface
234  * to such operations is the 'pers->diskop()' function, can be NULL.
235  *
236  * the diskop function can change the pointer pointing to the incoming
237  * descriptor, but must do so very carefully. (currently only
238  * SPARE_ACTIVE expects such a change)
239  */
240 	int (*diskop) (mddev_t *mddev, mdp_disk_t **descriptor, int state);
241 
242 	int (*stop_resync)(mddev_t *mddev);
243 	int (*restart_resync)(mddev_t *mddev);
244 	int (*sync_request)(mddev_t *mddev, unsigned long block_nr);
245 };
246 
247 
248 /*
249  * Currently we index md_array directly, based on the minor
250  * number. This will have to change to dynamic allocation
251  * once we start supporting partitioning of md devices.
252  */
mdidx(mddev_t * mddev)253 static inline int mdidx (mddev_t * mddev)
254 {
255 	return mddev->__minor;
256 }
257 
mddev_to_kdev(mddev_t * mddev)258 static inline kdev_t mddev_to_kdev(mddev_t * mddev)
259 {
260 	return MKDEV(MD_MAJOR, mdidx(mddev));
261 }
262 
263 extern mdk_rdev_t * find_rdev(mddev_t * mddev, kdev_t dev);
264 extern mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr);
265 extern mdp_disk_t *get_spare(mddev_t *mddev);
266 
267 /*
268  * iterates through some rdev ringlist. It's safe to remove the
269  * current 'rdev'. Dont touch 'tmp' though.
270  */
271 #define ITERATE_RDEV_GENERIC(head,field,rdev,tmp)			\
272 									\
273 	for (tmp = head.next;						\
274 		rdev = md_list_entry(tmp, mdk_rdev_t, field),		\
275 			tmp = tmp->next, tmp->prev != &head		\
276 		; )
277 /*
278  * iterates through the 'same array disks' ringlist
279  */
280 #define ITERATE_RDEV(mddev,rdev,tmp)					\
281 	ITERATE_RDEV_GENERIC((mddev)->disks,same_set,rdev,tmp)
282 
283 /*
284  * Same as above, but assumes that the device has rdev->desc_nr numbered
285  * from 0 to mddev->nb_dev, and iterates through rdevs in ascending order.
286  */
287 #define ITERATE_RDEV_ORDERED(mddev,rdev,i)				\
288 	for (i = 0; rdev = find_rdev_nr(mddev, i), i < mddev->nb_dev; i++)
289 
290 
291 /*
292  * Iterates through all 'RAID managed disks'
293  */
294 #define ITERATE_RDEV_ALL(rdev,tmp)					\
295 	ITERATE_RDEV_GENERIC(all_raid_disks,all,rdev,tmp)
296 
297 /*
298  * Iterates through 'pending RAID disks'
299  */
300 #define ITERATE_RDEV_PENDING(rdev,tmp)					\
301 	ITERATE_RDEV_GENERIC(pending_raid_disks,pending,rdev,tmp)
302 
303 /*
304  * iterates through all used mddevs in the system.
305  */
306 #define ITERATE_MDDEV(mddev,tmp)					\
307 									\
308 	for (tmp = all_mddevs.next;					\
309 		mddev = md_list_entry(tmp, mddev_t, all_mddevs),	\
310 			tmp = tmp->next, tmp->prev != &all_mddevs	\
311 		; )
312 
lock_mddev(mddev_t * mddev)313 static inline int lock_mddev (mddev_t * mddev)
314 {
315 	return down_interruptible(&mddev->reconfig_sem);
316 }
317 
unlock_mddev(mddev_t * mddev)318 static inline void unlock_mddev (mddev_t * mddev)
319 {
320 	up(&mddev->reconfig_sem);
321 }
322 
323 #define xchg_values(x,y) do { __typeof__(x) __tmp = x; \
324 				x = y; y = __tmp; } while (0)
325 
326 typedef struct mdk_thread_s {
327 	void			(*run) (void *data);
328 	void			*data;
329 	md_wait_queue_head_t	wqueue;
330 	unsigned long           flags;
331 	struct completion	*event;
332 	struct task_struct	*tsk;
333 	const char		*name;
334 } mdk_thread_t;
335 
336 #define THREAD_WAKEUP  0
337 
338 #define MAX_DISKNAME_LEN 64
339 
340 typedef struct dev_name_s {
341 	struct md_list_head list;
342 	kdev_t dev;
343 	char namebuf [MAX_DISKNAME_LEN];
344 	char *name;
345 } dev_name_t;
346 
347 
348 #define __wait_event_lock_irq(wq, condition, lock) 			\
349 do {									\
350 	wait_queue_t __wait;						\
351 	init_waitqueue_entry(&__wait, current);				\
352 									\
353 	add_wait_queue(&wq, &__wait);					\
354 	for (;;) {							\
355 		set_current_state(TASK_UNINTERRUPTIBLE);		\
356 		if (condition)						\
357 			break;						\
358 		spin_unlock_irq(&lock);					\
359 		run_task_queue(&tq_disk);				\
360 		schedule();						\
361 		spin_lock_irq(&lock);					\
362 	}								\
363 	current->state = TASK_RUNNING;					\
364 	remove_wait_queue(&wq, &__wait);				\
365 } while (0)
366 
367 #define wait_event_lock_irq(wq, condition, lock) 			\
368 do {									\
369 	if (condition)	 						\
370 		break;							\
371 	__wait_event_lock_irq(wq, condition, lock);			\
372 } while (0)
373 
374 
375 #define __wait_disk_event(wq, condition) 				\
376 do {									\
377 	wait_queue_t __wait;						\
378 	init_waitqueue_entry(&__wait, current);				\
379 									\
380 	add_wait_queue(&wq, &__wait);					\
381 	for (;;) {							\
382 		set_current_state(TASK_UNINTERRUPTIBLE);		\
383 		if (condition)						\
384 			break;						\
385 		run_task_queue(&tq_disk);				\
386 		schedule();						\
387 	}								\
388 	current->state = TASK_RUNNING;					\
389 	remove_wait_queue(&wq, &__wait);				\
390 } while (0)
391 
392 #define wait_disk_event(wq, condition) 					\
393 do {									\
394 	if (condition)	 						\
395 		break;							\
396 	__wait_disk_event(wq, condition);				\
397 } while (0)
398 
399 #endif
400 
401