1 /*
2 md_k.h : kernel internal structure of the Linux MD driver
3 Copyright (C) 1996-98 Ingo Molnar, Gadi Oxman
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
9
10 You should have received a copy of the GNU General Public License
11 (for example /usr/src/linux/COPYING); if not, write to the Free
12 Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
13 */
14
15 #ifndef _MD_K_H
16 #define _MD_K_H
17
18 #define MD_RESERVED 0UL
19 #define LINEAR 1UL
20 #define RAID0 2UL
21 #define RAID1 3UL
22 #define RAID5 4UL
23 #define TRANSLUCENT 5UL
24 #define HSM 6UL
25 #define MULTIPATH 7UL
26 #define MAX_PERSONALITY 8UL
27
pers_to_level(int pers)28 static inline int pers_to_level (int pers)
29 {
30 switch (pers) {
31 case MULTIPATH: return -4;
32 case HSM: return -3;
33 case TRANSLUCENT: return -2;
34 case LINEAR: return -1;
35 case RAID0: return 0;
36 case RAID1: return 1;
37 case RAID5: return 5;
38 }
39 BUG();
40 return MD_RESERVED;
41 }
42
level_to_pers(int level)43 static inline int level_to_pers (int level)
44 {
45 switch (level) {
46 case -4: return MULTIPATH;
47 case -3: return HSM;
48 case -2: return TRANSLUCENT;
49 case -1: return LINEAR;
50 case 0: return RAID0;
51 case 1: return RAID1;
52 case 4:
53 case 5: return RAID5;
54 }
55 return MD_RESERVED;
56 }
57
58 typedef struct mddev_s mddev_t;
59 typedef struct mdk_rdev_s mdk_rdev_t;
60
61 #if (MINORBITS != 8)
62 #error MD does not handle bigger kdev yet
63 #endif
64
65 #define MAX_MD_DEVS (1<<MINORBITS) /* Max number of md dev */
66
67 /*
68 * Maps a kdev to an mddev/subdev. How 'data' is handled is up to
69 * the personality. (eg. HSM uses this to identify individual LVs)
70 */
71 typedef struct dev_mapping_s {
72 mddev_t *mddev;
73 void *data;
74 } dev_mapping_t;
75
76 extern dev_mapping_t mddev_map [MAX_MD_DEVS];
77
kdev_to_mddev(kdev_t dev)78 static inline mddev_t * kdev_to_mddev (kdev_t dev)
79 {
80 if (MAJOR(dev) != MD_MAJOR)
81 BUG();
82 return mddev_map[MINOR(dev)].mddev;
83 }
84
85 /*
86 * options passed in raidrun:
87 */
88
89 #define MAX_CHUNK_SIZE (4096*1024)
90
91 /*
92 * default readahead
93 */
94 #define MD_READAHEAD vm_max_readahead
95
disk_faulty(mdp_disk_t * d)96 static inline int disk_faulty(mdp_disk_t * d)
97 {
98 return d->state & (1 << MD_DISK_FAULTY);
99 }
100
disk_active(mdp_disk_t * d)101 static inline int disk_active(mdp_disk_t * d)
102 {
103 return d->state & (1 << MD_DISK_ACTIVE);
104 }
105
disk_sync(mdp_disk_t * d)106 static inline int disk_sync(mdp_disk_t * d)
107 {
108 return d->state & (1 << MD_DISK_SYNC);
109 }
110
disk_spare(mdp_disk_t * d)111 static inline int disk_spare(mdp_disk_t * d)
112 {
113 return !disk_sync(d) && !disk_active(d) && !disk_faulty(d);
114 }
115
disk_removed(mdp_disk_t * d)116 static inline int disk_removed(mdp_disk_t * d)
117 {
118 return d->state & (1 << MD_DISK_REMOVED);
119 }
120
mark_disk_faulty(mdp_disk_t * d)121 static inline void mark_disk_faulty(mdp_disk_t * d)
122 {
123 d->state |= (1 << MD_DISK_FAULTY);
124 }
125
mark_disk_active(mdp_disk_t * d)126 static inline void mark_disk_active(mdp_disk_t * d)
127 {
128 d->state |= (1 << MD_DISK_ACTIVE);
129 }
130
mark_disk_sync(mdp_disk_t * d)131 static inline void mark_disk_sync(mdp_disk_t * d)
132 {
133 d->state |= (1 << MD_DISK_SYNC);
134 }
135
mark_disk_spare(mdp_disk_t * d)136 static inline void mark_disk_spare(mdp_disk_t * d)
137 {
138 d->state = 0;
139 }
140
mark_disk_removed(mdp_disk_t * d)141 static inline void mark_disk_removed(mdp_disk_t * d)
142 {
143 d->state = (1 << MD_DISK_FAULTY) | (1 << MD_DISK_REMOVED);
144 }
145
mark_disk_inactive(mdp_disk_t * d)146 static inline void mark_disk_inactive(mdp_disk_t * d)
147 {
148 d->state &= ~(1 << MD_DISK_ACTIVE);
149 }
150
mark_disk_nonsync(mdp_disk_t * d)151 static inline void mark_disk_nonsync(mdp_disk_t * d)
152 {
153 d->state &= ~(1 << MD_DISK_SYNC);
154 }
155
156 /*
157 * MD's 'extended' device
158 */
159 struct mdk_rdev_s
160 {
161 struct md_list_head same_set; /* RAID devices within the same set */
162 struct md_list_head all; /* all RAID devices */
163 struct md_list_head pending; /* undetected RAID devices */
164
165 kdev_t dev; /* Device number */
166 kdev_t old_dev; /* "" when it was last imported */
167 unsigned long size; /* Device size (in blocks) */
168 mddev_t *mddev; /* RAID array if running */
169 unsigned long last_events; /* IO event timestamp */
170
171 struct block_device *bdev; /* block device handle */
172
173 mdp_super_t *sb;
174 struct page *sb_page;
175 unsigned long sb_offset;
176
177 int alias_device; /* device alias to the same disk */
178 int faulty; /* if faulty do not issue IO requests */
179 int desc_nr; /* descriptor index in the superblock */
180 };
181
182
183 /*
184 * disk operations in a working array:
185 */
186 #define DISKOP_SPARE_INACTIVE 0
187 #define DISKOP_SPARE_WRITE 1
188 #define DISKOP_SPARE_ACTIVE 2
189 #define DISKOP_HOT_REMOVE_DISK 3
190 #define DISKOP_HOT_ADD_DISK 4
191
192 typedef struct mdk_personality_s mdk_personality_t;
193
194 struct mddev_s
195 {
196 void *private;
197 mdk_personality_t *pers;
198 int __minor;
199 mdp_super_t *sb;
200 int nb_dev;
201 struct md_list_head disks;
202 int sb_dirty;
203 mdu_param_t param;
204 int ro;
205 unsigned long curr_resync; /* blocks scheduled */
206 unsigned long resync_mark; /* a recent timestamp */
207 unsigned long resync_mark_cnt;/* blocks written at resync_mark */
208 char *name;
209 int recovery_running;
210 struct semaphore reconfig_sem;
211 struct semaphore recovery_sem;
212 struct semaphore resync_sem;
213 atomic_t active;
214
215 atomic_t recovery_active; /* blocks scheduled, but not written */
216 md_wait_queue_head_t recovery_wait;
217
218 struct md_list_head all_mddevs;
219 };
220
221 struct mdk_personality_s
222 {
223 char *name;
224 int (*make_request)(mddev_t *mddev, int rw, struct buffer_head * bh);
225 int (*run)(mddev_t *mddev);
226 int (*stop)(mddev_t *mddev);
227 void (*status)(struct seq_file *seq, mddev_t *mddev);
228 int (*error_handler)(mddev_t *mddev, kdev_t dev);
229
230 /*
231 * Some personalities (RAID-1, RAID-5) can have disks hot-added and
232 * hot-removed. Hot removal is different from failure. (failure marks
233 * a disk inactive, but the disk is still part of the array) The interface
234 * to such operations is the 'pers->diskop()' function, can be NULL.
235 *
236 * the diskop function can change the pointer pointing to the incoming
237 * descriptor, but must do so very carefully. (currently only
238 * SPARE_ACTIVE expects such a change)
239 */
240 int (*diskop) (mddev_t *mddev, mdp_disk_t **descriptor, int state);
241
242 int (*stop_resync)(mddev_t *mddev);
243 int (*restart_resync)(mddev_t *mddev);
244 int (*sync_request)(mddev_t *mddev, unsigned long block_nr);
245 };
246
247
248 /*
249 * Currently we index md_array directly, based on the minor
250 * number. This will have to change to dynamic allocation
251 * once we start supporting partitioning of md devices.
252 */
mdidx(mddev_t * mddev)253 static inline int mdidx (mddev_t * mddev)
254 {
255 return mddev->__minor;
256 }
257
mddev_to_kdev(mddev_t * mddev)258 static inline kdev_t mddev_to_kdev(mddev_t * mddev)
259 {
260 return MKDEV(MD_MAJOR, mdidx(mddev));
261 }
262
263 extern mdk_rdev_t * find_rdev(mddev_t * mddev, kdev_t dev);
264 extern mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr);
265 extern mdp_disk_t *get_spare(mddev_t *mddev);
266
267 /*
268 * iterates through some rdev ringlist. It's safe to remove the
269 * current 'rdev'. Dont touch 'tmp' though.
270 */
271 #define ITERATE_RDEV_GENERIC(head,field,rdev,tmp) \
272 \
273 for (tmp = head.next; \
274 rdev = md_list_entry(tmp, mdk_rdev_t, field), \
275 tmp = tmp->next, tmp->prev != &head \
276 ; )
277 /*
278 * iterates through the 'same array disks' ringlist
279 */
280 #define ITERATE_RDEV(mddev,rdev,tmp) \
281 ITERATE_RDEV_GENERIC((mddev)->disks,same_set,rdev,tmp)
282
283 /*
284 * Same as above, but assumes that the device has rdev->desc_nr numbered
285 * from 0 to mddev->nb_dev, and iterates through rdevs in ascending order.
286 */
287 #define ITERATE_RDEV_ORDERED(mddev,rdev,i) \
288 for (i = 0; rdev = find_rdev_nr(mddev, i), i < mddev->nb_dev; i++)
289
290
291 /*
292 * Iterates through all 'RAID managed disks'
293 */
294 #define ITERATE_RDEV_ALL(rdev,tmp) \
295 ITERATE_RDEV_GENERIC(all_raid_disks,all,rdev,tmp)
296
297 /*
298 * Iterates through 'pending RAID disks'
299 */
300 #define ITERATE_RDEV_PENDING(rdev,tmp) \
301 ITERATE_RDEV_GENERIC(pending_raid_disks,pending,rdev,tmp)
302
303 /*
304 * iterates through all used mddevs in the system.
305 */
306 #define ITERATE_MDDEV(mddev,tmp) \
307 \
308 for (tmp = all_mddevs.next; \
309 mddev = md_list_entry(tmp, mddev_t, all_mddevs), \
310 tmp = tmp->next, tmp->prev != &all_mddevs \
311 ; )
312
lock_mddev(mddev_t * mddev)313 static inline int lock_mddev (mddev_t * mddev)
314 {
315 return down_interruptible(&mddev->reconfig_sem);
316 }
317
unlock_mddev(mddev_t * mddev)318 static inline void unlock_mddev (mddev_t * mddev)
319 {
320 up(&mddev->reconfig_sem);
321 }
322
323 #define xchg_values(x,y) do { __typeof__(x) __tmp = x; \
324 x = y; y = __tmp; } while (0)
325
326 typedef struct mdk_thread_s {
327 void (*run) (void *data);
328 void *data;
329 md_wait_queue_head_t wqueue;
330 unsigned long flags;
331 struct completion *event;
332 struct task_struct *tsk;
333 const char *name;
334 } mdk_thread_t;
335
336 #define THREAD_WAKEUP 0
337
338 #define MAX_DISKNAME_LEN 64
339
340 typedef struct dev_name_s {
341 struct md_list_head list;
342 kdev_t dev;
343 char namebuf [MAX_DISKNAME_LEN];
344 char *name;
345 } dev_name_t;
346
347
348 #define __wait_event_lock_irq(wq, condition, lock) \
349 do { \
350 wait_queue_t __wait; \
351 init_waitqueue_entry(&__wait, current); \
352 \
353 add_wait_queue(&wq, &__wait); \
354 for (;;) { \
355 set_current_state(TASK_UNINTERRUPTIBLE); \
356 if (condition) \
357 break; \
358 spin_unlock_irq(&lock); \
359 run_task_queue(&tq_disk); \
360 schedule(); \
361 spin_lock_irq(&lock); \
362 } \
363 current->state = TASK_RUNNING; \
364 remove_wait_queue(&wq, &__wait); \
365 } while (0)
366
367 #define wait_event_lock_irq(wq, condition, lock) \
368 do { \
369 if (condition) \
370 break; \
371 __wait_event_lock_irq(wq, condition, lock); \
372 } while (0)
373
374
375 #define __wait_disk_event(wq, condition) \
376 do { \
377 wait_queue_t __wait; \
378 init_waitqueue_entry(&__wait, current); \
379 \
380 add_wait_queue(&wq, &__wait); \
381 for (;;) { \
382 set_current_state(TASK_UNINTERRUPTIBLE); \
383 if (condition) \
384 break; \
385 run_task_queue(&tq_disk); \
386 schedule(); \
387 } \
388 current->state = TASK_RUNNING; \
389 remove_wait_queue(&wq, &__wait); \
390 } while (0)
391
392 #define wait_disk_event(wq, condition) \
393 do { \
394 if (condition) \
395 break; \
396 __wait_disk_event(wq, condition); \
397 } while (0)
398
399 #endif
400
401