1 /*
2 * kernel/lvm.c
3 *
4 * Copyright (C) 1997 - 2002 Heinz Mauelshagen, Sistina Software
5 *
6 * February-November 1997
7 * April-May,July-August,November 1998
8 * January-March,May,July,September,October 1999
9 * January,February,July,September-November 2000
10 * January-May,June,October 2001
11 * May-August 2002
12 * February 2003
13 *
14 *
15 * LVM driver is free software; you can redistribute it and/or modify
16 * it under the terms of the GNU General Public License as published by
17 * the Free Software Foundation; either version 2, or (at your option)
18 * any later version.
19 *
20 * LVM driver is distributed in the hope that it will be useful,
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 * GNU General Public License for more details.
24 *
25 * You should have received a copy of the GNU General Public License
26 * along with GNU CC; see the file COPYING. If not, write to
27 * the Free Software Foundation, 59 Temple Place - Suite 330,
28 * Boston, MA 02111-1307, USA.
29 *
30 */
31
32 /*
33 * Changelog
34 *
35 * 09/11/1997 - added chr ioctls VG_STATUS_GET_COUNT
36 * and VG_STATUS_GET_NAMELIST
37 * 18/01/1998 - change lvm_chr_open/close lock handling
38 * 30/04/1998 - changed LV_STATUS ioctl to LV_STATUS_BYNAME and
39 * - added LV_STATUS_BYINDEX ioctl
40 * - used lvm_status_byname_req_t and
41 * lvm_status_byindex_req_t vars
42 * 04/05/1998 - added multiple device support
43 * 08/05/1998 - added support to set/clear extendable flag in volume group
44 * 09/05/1998 - changed output of lvm_proc_get_global_info() because of
45 * support for free (eg. longer) logical volume names
46 * 12/05/1998 - added spin_locks (thanks to Pascal van Dam
47 * <pascal@ramoth.xs4all.nl>)
48 * 25/05/1998 - fixed handling of locked PEs in lvm_map() and
49 * lvm_chr_ioctl()
50 * 26/05/1998 - reactivated verify_area by access_ok
51 * 07/06/1998 - used vmalloc/vfree instead of kmalloc/kfree to go
52 * beyond 128/256 KB max allocation limit per call
53 * - #ifdef blocked spin_lock calls to avoid compile errors
54 * with 2.0.x
55 * 11/06/1998 - another enhancement to spinlock code in lvm_chr_open()
56 * and use of LVM_VERSION_CODE instead of my own macros
57 * (thanks to Michael Marxmeier <mike@msede.com>)
58 * 07/07/1998 - added statistics in lvm_map()
59 * 08/07/1998 - saved statistics in lvm_do_lv_extend_reduce()
60 * 25/07/1998 - used __initfunc macro
61 * 02/08/1998 - changes for official char/block major numbers
62 * 07/08/1998 - avoided init_module() and cleanup_module() to be static
63 * 30/08/1998 - changed VG lv_open counter from sum of LV lv_open counters
64 * to sum of LVs open (no matter how often each is)
65 * 01/09/1998 - fixed lvm_gendisk.part[] index error
66 * 07/09/1998 - added copying of lv_current_pe-array
67 * in LV_STATUS_BYINDEX ioctl
68 * 17/11/1998 - added KERN_* levels to printk
69 * 13/01/1999 - fixed LV index bug in lvm_do_lv_create() which hit lvrename
70 * 07/02/1999 - fixed spinlock handling bug in case of LVM_RESET
71 * by moving spinlock code from lvm_chr_open()
72 * to lvm_chr_ioctl()
73 * - added LVM_LOCK_LVM ioctl to lvm_chr_ioctl()
74 * - allowed LVM_RESET and retrieval commands to go ahead;
75 * only other update ioctls are blocked now
76 * - fixed pv->pe to NULL for pv_status
77 * - using lv_req structure in lvm_chr_ioctl() now
78 * - fixed NULL ptr reference bug in lvm_do_lv_extend_reduce()
79 * caused by uncontiguous PV array in lvm_chr_ioctl(VG_REDUCE)
80 * 09/02/1999 - changed BLKRASET and BLKRAGET in lvm_chr_ioctl() to
81 * handle lgoical volume private read ahead sector
82 * - implemented LV read_ahead handling with lvm_blk_read()
83 * and lvm_blk_write()
84 * 10/02/1999 - implemented 2.[12].* support function lvm_hd_name()
85 * to be used in drivers/block/genhd.c by disk_name()
86 * 12/02/1999 - fixed index bug in lvm_blk_ioctl(), HDIO_GETGEO
87 * - enhanced gendisk insert/remove handling
88 * 16/02/1999 - changed to dynamic block minor number allocation to
89 * have as much as 99 volume groups with 256 logical volumes
90 * as the grand total; this allows having 1 volume group with
91 * up to 256 logical volumes in it
92 * 21/02/1999 - added LV open count information to proc filesystem
93 * - substituted redundant LVM_RESET code by calls
94 * to lvm_do_vg_remove()
95 * 22/02/1999 - used schedule_timeout() to be more responsive
96 * in case of lvm_do_vg_remove() with lots of logical volumes
97 * 19/03/1999 - fixed NULL pointer bug in module_init/lvm_init
98 * 17/05/1999 - used DECLARE_WAIT_QUEUE_HEAD macro (>2.3.0)
99 * - enhanced lvm_hd_name support
100 * 03/07/1999 - avoided use of KERNEL_VERSION macro based ifdefs and
101 * memcpy_tofs/memcpy_fromfs macro redefinitions
102 * 06/07/1999 - corrected reads/writes statistic counter copy in case
103 * of striped logical volume
104 * 28/07/1999 - implemented snapshot logical volumes
105 * - lvm_chr_ioctl
106 * - LV_STATUS_BYINDEX
107 * - LV_STATUS_BYNAME
108 * - lvm_do_lv_create
109 * - lvm_do_lv_remove
110 * - lvm_map
111 * - new lvm_snapshot_remap_block
112 * - new lvm_snapshot_remap_new_block
113 * 08/10/1999 - implemented support for multiple snapshots per
114 * original logical volume
115 * 12/10/1999 - support for 2.3.19
116 * 11/11/1999 - support for 2.3.28
117 * 21/11/1999 - changed lvm_map() interface to buffer_head based
118 * 19/12/1999 - support for 2.3.33
119 * 01/01/2000 - changed locking concept in lvm_map(),
120 * lvm_do_vg_create() and lvm_do_lv_remove()
121 * 15/01/2000 - fixed PV_FLUSH bug in lvm_chr_ioctl()
122 * 24/01/2000 - ported to 2.3.40 including Alan Cox's pointer changes etc.
123 * 29/01/2000 - used kmalloc/kfree again for all small structures
124 * 20/01/2000 - cleaned up lvm_chr_ioctl by moving code
125 * to seperated functions
126 * - avoided "/dev/" in proc filesystem output
127 * - avoided inline strings functions lvm_strlen etc.
128 * 14/02/2000 - support for 2.3.43
129 * - integrated Andrea Arcagneli's snapshot code
130 * 25/06/2000 - james (chip) , IKKHAYD! roffl
131 * 26/06/2000 - enhanced lv_extend_reduce for snapshot logical volume
132 * support
133 * 06/09/2000 - added devfs support
134 * 07/09/2000 - changed IOP version to 9
135 * - started to add new char ioctl LV_STATUS_BYDEV_T to support
136 * getting an lv_t based on the dev_t of the Logical Volume
137 * 14/09/2000 - enhanced lvm_do_lv_create to upcall VFS functions
138 * to sync and lock, activate snapshot and unlock the FS
139 * (to support journaled filesystems)
140 * 18/09/2000 - hardsector size support
141 * 27/09/2000 - implemented lvm_do_lv_rename() and lvm_do_vg_rename()
142 * 30/10/2000 - added Andi Kleen's LV_BMAP ioctl to support LILO
143 * 01/11/2000 - added memory information on hash tables to
144 * lvm_proc_get_global_info()
145 * 02/11/2000 - implemented /proc/lvm/ hierarchy
146 * 22/11/2000 - changed lvm_do_create_proc_entry_of_pv () to work
147 * with devfs
148 * 26/11/2000 - corrected #ifdef locations for PROC_FS
149 * 28/11/2000 - fixed lvm_do_vg_extend() NULL pointer BUG
150 * - fixed lvm_do_create_proc_entry_of_pv() buffer tampering BUG
151 * 08/01/2001 - Removed conditional compiles related to PROC_FS,
152 * procfs is always supported now. (JT)
153 * 12/01/2001 - avoided flushing logical volume in case of shrinking
154 * because of unecessary overhead in case of heavy updates
155 * 25/01/2001 - Allow RO open of an inactive LV so it can be reactivated.
156 * 31/01/2001 - removed blk_init_queue/blk_cleanup_queue queueing will be
157 * handled by the proper devices.
158 * - If you try and BMAP a snapshot you now get an -EPERM
159 * 01/01/2001 - lvm_map() now calls buffer_IO_error on error for 2.4
160 * - factored __remap_snapshot out of lvm_map
161 * 12/02/2001 - move devfs code to create VG before LVs
162 * 13/02/2001 - allow VG_CREATE on /dev/lvm
163 * 14/02/2001 - removed modversions.h
164 * - tidied device defines for blk.h
165 * - tidied debug statements
166 * - bug: vg[] member not set back to NULL if activation fails
167 * - more lvm_map tidying
168 * 15/02/2001 - register /dev/lvm with devfs correctly (major/minor
169 * were swapped)
170 * 19/02/2001 - preallocated buffer_heads for rawio when using
171 * snapshots [JT]
172 * 28/02/2001 - introduced the P_DEV macro and changed some internel
173 * functions to be static [AD]
174 * 28/02/2001 - factored lvm_get_snapshot_use_rate out of blk_ioctl [AD]
175 * - fixed user address accessing bug in lvm_do_lv_create()
176 * where the check for an existing LV takes place right at
177 * the beginning
178 * 01/03/2001 - Add VG_CREATE_OLD for IOP 10 compatibility
179 * 02/03/2001 - Don't destroy usermode pointers in lv_t structures duing
180 * LV_STATUS_BYxxx
181 * and remove redundant lv_t variables from same.
182 * - avoid compilation of lvm_dummy_device_request in case of
183 * Linux >= 2.3.0 to avoid a warning
184 * - added lvm_name argument to printk in buffer allocation
185 * in order to avoid a warning
186 * 04/03/2001 - moved linux/version.h above first use of KERNEL_VERSION
187 * macros
188 * 05/03/2001 - restore copying pe_t array in lvm_do_lv_status_byname. For
189 * lvdisplay -v (PC)
190 * - restore copying pe_t array in lvm_do_lv_status_byindex (HM)
191 * - added copying pe_t array in lvm_do_lv_status_bydev (HM)
192 * - enhanced lvm_do_lv_status_by{name,index,dev} to be capable
193 * to copy the lv_block_exception_t array to userspace (HM)
194 * 08/03/2001 - initialize new lv_ptr->lv_COW_table_iobuf for snapshots;
195 * removed obsolete lv_ptr->lv_COW_table_page initialization
196 * - factored lvm_do_pv_flush out of lvm_chr_ioctl (HM)
197 * 09/03/2001 - Added _lock_open_count to ensure we only drop the lock
198 * when the locking process closes.
199 * 05/04/2001 - Defer writes to an extent that is being moved [JT]
200 * 05/04/2001 - use b_rdev and b_rsector rather than b_dev and b_blocknr in
201 * lvm_map() in order to make stacking devices more happy (HM)
202 * 11/04/2001 - cleaned up the pvmove queue code. I no longer retain the
203 * rw flag, instead WRITEA's are just dropped [JT]
204 * 30/04/2001 - added KERNEL_VERSION > 2.4.3 get_hardsect_size() rather
205 * than get_hardblocksize() call
206 * 03/05/2001 - Use copy_to/from_user to preserve pointers in
207 * lvm_do_status_by*
208 * 11/05/2001 - avoid accesses to inactive snapshot data in
209 * __update_hardsectsize() and lvm_do_lv_extend_reduce() (JW)
210 * 28/05/2001 - implemented missing BLKSSZGET ioctl
211 * 05/06/2001 - Move _pe_lock out of fast path for lvm_map when no PEs
212 * locked. Make buffer queue flush not need locking.
213 * Fix lvm_user_bmap() to set b_rsector for new lvm_map(). [AED]
214 * 30/06/2001 - Speed up __update_hardsectsize() by checking if PVs have
215 * the same hardsectsize (very likely) before scanning all LEs
216 * in the LV each time. [AED]
217 * 12/10/2001 - Use add/del_gendisk() routines in 2.4.10+
218 * 01/11/2001 - Backport read_ahead change from Linus kernel [AED]
219 * 24/05/2002 - fixed locking bug in lvm_do_le_remap() introduced with 1.0.4
220 * 13/06/2002 - use blk_ioctl() to support various standard block ioctls
221 * - support HDIO_GETGEO_BIG ioctl
222 * 05/07/2002 - fixed OBO error on vg array access [benh@kernel.crashing.org]
223 * 22/07/2002 - streamlined blk_ioctl() call
224 * 14/08/2002 - stored fs handle in lvm_do_lv_rename
225 * [kaoru@bsd.tnes.nec.co.jp]
226 * 06/02/2003 - fix persistent snapshot extend/reduce bug in
227 * lvm_do_lv_extend_reduce() [dalestephenson@mac.com]
228 * 04/03/2003 - snapshot extend/reduce memory leak
229 * - VG PE counter wrong [dalestephenson@mac.com]
230 *
231 */
232
233 #include <linux/version.h>
234
235 #define MAJOR_NR LVM_BLK_MAJOR
236 #define DEVICE_OFF(device)
237 #define LOCAL_END_REQUEST
238
239 /* lvm_do_lv_create calls fsync_dev_lockfs()/unlockfs() */
240 /* #define LVM_VFS_ENHANCEMENT */
241
242 #include <linux/config.h>
243 #include <linux/module.h>
244 #include <linux/kernel.h>
245 #include <linux/vmalloc.h>
246
247 #include <linux/slab.h>
248 #include <linux/init.h>
249
250 #include <linux/hdreg.h>
251 #include <linux/stat.h>
252 #include <linux/fs.h>
253 #include <linux/proc_fs.h>
254 #include <linux/blkdev.h>
255 #include <linux/genhd.h>
256 #include <linux/locks.h>
257
258
259 #include <linux/devfs_fs_kernel.h>
260 #include <linux/smp_lock.h>
261 #include <asm/ioctl.h>
262 #include <asm/segment.h>
263 #include <asm/uaccess.h>
264
265 #ifdef CONFIG_KERNELD
266 #include <linux/kerneld.h>
267 #endif
268
269 #include <linux/blk.h>
270 #include <linux/blkpg.h>
271
272 #include <linux/errno.h>
273 #include <linux/lvm.h>
274
275 #include "lvm-internal.h"
276
277 #define LVM_CORRECT_READ_AHEAD(a) \
278 do { \
279 if ((a) < LVM_MIN_READ_AHEAD || \
280 (a) > LVM_MAX_READ_AHEAD) \
281 (a) = LVM_DEFAULT_READ_AHEAD; \
282 read_ahead[MAJOR_NR] = (a); \
283 } while(0)
284
285 #ifndef WRITEA
286 # define WRITEA WRITE
287 #endif
288
289
290 /*
291 * External function prototypes
292 */
293 static int lvm_make_request_fn(request_queue_t *, int,
294 struct buffer_head *);
295
296 static int lvm_blk_ioctl(struct inode *, struct file *, uint, ulong);
297 static int lvm_blk_open(struct inode *, struct file *);
298
299 static int lvm_blk_close(struct inode *, struct file *);
300 static int lvm_get_snapshot_use_rate(lv_t * lv_ptr, void *arg);
301 static int lvm_user_bmap(struct inode *, struct lv_bmap *);
302
303 static int lvm_chr_open(struct inode *, struct file *);
304 static int lvm_chr_close(struct inode *, struct file *);
305 static int lvm_chr_ioctl(struct inode *, struct file *, uint, ulong);
306
307
308 /* End external function prototypes */
309
310
311 /*
312 * Internal function prototypes
313 */
314 static void lvm_cleanup(void);
315 static void lvm_init_vars(void);
316
317 #ifdef LVM_HD_NAME
318 extern void (*lvm_hd_name_ptr) (char *, int);
319 #endif
320 static int lvm_map(struct buffer_head *, int);
321 static int lvm_do_lock_lvm(void);
322 static int lvm_do_le_remap(vg_t *, void *);
323
324 static int lvm_do_pv_create(pv_t *, vg_t *, ulong);
325 static int lvm_do_pv_remove(vg_t *, ulong);
326 static int lvm_do_lv_create(int, char *, lv_t *);
327 static int lvm_do_lv_extend_reduce(int, char *, lv_t *);
328 static int lvm_do_lv_remove(int, char *, int);
329 static int lvm_do_lv_rename(vg_t *, lv_req_t *, lv_t *);
330 static int lvm_do_lv_status_byname(vg_t * r, void *);
331 static int lvm_do_lv_status_byindex(vg_t *, void *);
332 static int lvm_do_lv_status_bydev(vg_t *, void *);
333
334 static int lvm_do_pe_lock_unlock(vg_t * r, void *);
335
336 static int lvm_do_pv_change(vg_t *, void *);
337 static int lvm_do_pv_status(vg_t *, void *);
338 static int lvm_do_pv_flush(void *);
339
340 static int lvm_do_vg_create(void *, int minor);
341 static int lvm_do_vg_extend(vg_t *, void *);
342 static int lvm_do_vg_reduce(vg_t *, void *);
343 static int lvm_do_vg_rename(vg_t *, void *);
344 static int lvm_do_vg_remove(int);
345 static void lvm_geninit(struct gendisk *);
346 static void __update_hardsectsize(lv_t * lv);
347
348
349 static void _queue_io(struct buffer_head *bh, int rw);
350 static struct buffer_head *_dequeue_io(void);
351 static void _flush_io(struct buffer_head *bh);
352
353 static int _open_pv(pv_t * pv);
354 static void _close_pv(pv_t * pv);
355
356 static unsigned long _sectors_to_k(unsigned long sect);
357
358 #ifdef LVM_HD_NAME
359 void lvm_hd_name(char *, int);
360 #endif
361 /* END Internal function prototypes */
362
363
364 /* variables */
365 char *lvm_version =
366 "LVM version " LVM_RELEASE_NAME "(" LVM_RELEASE_DATE ")";
367 ushort lvm_iop_version = LVM_DRIVER_IOP_VERSION;
368 int loadtime = 0;
369 const char *const lvm_name = LVM_NAME;
370
371
372 /* volume group descriptor area pointers */
373 vg_t *vg[ABS_MAX_VG + 1];
374
375 /* map from block minor number to VG and LV numbers */
376 static struct {
377 int vg_number;
378 int lv_number;
379 } vg_lv_map[ABS_MAX_LV];
380
381
382 /* Request structures (lvm_chr_ioctl()) */
383 static pv_change_req_t pv_change_req;
384 static pv_status_req_t pv_status_req;
385 volatile static pe_lock_req_t pe_lock_req;
386 static le_remap_req_t le_remap_req;
387 static lv_req_t lv_req;
388
389 #ifdef LVM_TOTAL_RESET
390 static int lvm_reset_spindown = 0;
391 #endif
392
393 static char pv_name[NAME_LEN];
394 /* static char rootvg[NAME_LEN] = { 0, }; */
395 static int lock = 0;
396 static int _lock_open_count = 0;
397 uint vg_count = 0;
398 static long lvm_chr_open_count = 0;
399 static DECLARE_WAIT_QUEUE_HEAD(lvm_wait);
400
401 static spinlock_t lvm_lock = SPIN_LOCK_UNLOCKED;
402 static spinlock_t lvm_snapshot_lock = SPIN_LOCK_UNLOCKED;
403
404 static struct buffer_head *_pe_requests;
405 static DECLARE_RWSEM(_pe_lock);
406
407
408 struct file_operations lvm_chr_fops = {
409 owner:THIS_MODULE,
410 open:lvm_chr_open,
411 release:lvm_chr_close,
412 ioctl:lvm_chr_ioctl,
413 };
414
415 /* block device operations structure needed for 2.3.38? and above */
416 struct block_device_operations lvm_blk_dops = {
417 .owner = THIS_MODULE,
418 .open = lvm_blk_open,
419 .release = lvm_blk_close,
420 .ioctl = lvm_blk_ioctl,
421 };
422
423
424 /* gendisk structures */
425 static struct hd_struct lvm_hd_struct[MAX_LV];
426 static int lvm_blocksizes[MAX_LV];
427 static int lvm_hardsectsizes[MAX_LV];
428 static int lvm_size[MAX_LV];
429
430 static struct gendisk lvm_gendisk = {
431 .major = MAJOR_NR,
432 .major_name = LVM_NAME,
433 .minor_shift = 0,
434 .max_p = 1,
435 .part = lvm_hd_struct,
436 .sizes = lvm_size,
437 .nr_real = MAX_LV,
438 };
439
440
441 /*
442 * Driver initialization...
443 */
lvm_init(void)444 int lvm_init(void)
445 {
446 if (devfs_register_chrdev(LVM_CHAR_MAJOR,
447 lvm_name, &lvm_chr_fops) < 0) {
448 printk(KERN_ERR "%s -- devfs_register_chrdev failed\n",
449 lvm_name);
450 return -EIO;
451 }
452 if (devfs_register_blkdev(MAJOR_NR, lvm_name, &lvm_blk_dops) < 0)
453 {
454 printk("%s -- devfs_register_blkdev failed\n", lvm_name);
455 if (devfs_unregister_chrdev(LVM_CHAR_MAJOR, lvm_name) < 0)
456 printk(KERN_ERR
457 "%s -- devfs_unregister_chrdev failed\n",
458 lvm_name);
459 return -EIO;
460 }
461
462 lvm_init_fs();
463 lvm_init_vars();
464 lvm_geninit(&lvm_gendisk);
465
466 /* insert our gendisk at the corresponding major */
467 add_gendisk(&lvm_gendisk);
468
469 #ifdef LVM_HD_NAME
470 /* reference from drivers/block/genhd.c */
471 lvm_hd_name_ptr = lvm_hd_name;
472 #endif
473
474 blk_queue_make_request(BLK_DEFAULT_QUEUE(MAJOR_NR),
475 lvm_make_request_fn);
476
477
478 /* initialise the pe lock */
479 pe_lock_req.lock = UNLOCK_PE;
480
481 /* optional read root VGDA */
482 /*
483 if ( *rootvg != 0) vg_read_with_pv_and_lv ( rootvg, &vg);
484 */
485
486 #ifdef MODULE
487 printk(KERN_INFO "%s module loaded\n", lvm_version);
488 #else
489 printk(KERN_INFO "%s\n", lvm_version);
490 #endif
491
492 return 0;
493 } /* lvm_init() */
494
495 /*
496 * cleanup...
497 */
498
lvm_cleanup(void)499 static void lvm_cleanup(void)
500 {
501 if (devfs_unregister_chrdev(LVM_CHAR_MAJOR, lvm_name) < 0)
502 printk(KERN_ERR "%s -- devfs_unregister_chrdev failed\n",
503 lvm_name);
504 if (devfs_unregister_blkdev(MAJOR_NR, lvm_name) < 0)
505 printk(KERN_ERR "%s -- devfs_unregister_blkdev failed\n",
506 lvm_name);
507
508
509
510 /* delete our gendisk from chain */
511 del_gendisk(&lvm_gendisk);
512
513 blk_size[MAJOR_NR] = NULL;
514 blksize_size[MAJOR_NR] = NULL;
515 hardsect_size[MAJOR_NR] = NULL;
516
517 #ifdef LVM_HD_NAME
518 /* reference from linux/drivers/block/genhd.c */
519 lvm_hd_name_ptr = NULL;
520 #endif
521
522 /* unregister with procfs and devfs */
523 lvm_fin_fs();
524
525 #ifdef MODULE
526 printk(KERN_INFO "%s -- Module successfully deactivated\n",
527 lvm_name);
528 #endif
529
530 return;
531 } /* lvm_cleanup() */
532
533 /*
534 * support function to initialize lvm variables
535 */
lvm_init_vars(void)536 static void __init lvm_init_vars(void)
537 {
538 int v;
539
540 loadtime = CURRENT_TIME;
541
542 lvm_lock = lvm_snapshot_lock = SPIN_LOCK_UNLOCKED;
543
544 pe_lock_req.lock = UNLOCK_PE;
545 pe_lock_req.data.lv_dev = 0;
546 pe_lock_req.data.pv_dev = 0;
547 pe_lock_req.data.pv_offset = 0;
548
549 /* Initialize VG pointers */
550 for (v = 0; v < ABS_MAX_VG + 1; v++)
551 vg[v] = NULL;
552
553 /* Initialize LV -> VG association */
554 for (v = 0; v < ABS_MAX_LV; v++) {
555 /* index ABS_MAX_VG never used for real VG */
556 vg_lv_map[v].vg_number = ABS_MAX_VG;
557 vg_lv_map[v].lv_number = -1;
558 }
559
560 return;
561 } /* lvm_init_vars() */
562
563
564 /********************************************************************
565 *
566 * Character device functions
567 *
568 ********************************************************************/
569
570 #define MODE_TO_STR(mode) (mode) & FMODE_READ ? "READ" : "", \
571 (mode) & FMODE_WRITE ? "WRITE" : ""
572
573 /*
574 * character device open routine
575 */
lvm_chr_open(struct inode * inode,struct file * file)576 static int lvm_chr_open(struct inode *inode, struct file *file)
577 {
578 int minor = MINOR(inode->i_rdev);
579
580 P_DEV("chr_open MINOR: %d VG#: %d mode: %s%s lock: %d\n",
581 minor, VG_CHR(minor), MODE_TO_STR(file->f_mode), lock);
582
583 /* super user validation */
584 if (!capable(CAP_SYS_ADMIN))
585 return -EACCES;
586
587 /* Group special file open */
588 if (VG_CHR(minor) > MAX_VG)
589 return -ENXIO;
590
591 spin_lock(&lvm_lock);
592 if (lock == current->pid)
593 _lock_open_count++;
594 spin_unlock(&lvm_lock);
595
596 lvm_chr_open_count++;
597
598 MOD_INC_USE_COUNT;
599
600 return 0;
601 } /* lvm_chr_open() */
602
603
604 /*
605 * character device i/o-control routine
606 *
607 * Only one changing process can do changing ioctl at one time,
608 * others will block.
609 *
610 */
lvm_chr_ioctl(struct inode * inode,struct file * file,uint command,ulong a)611 static int lvm_chr_ioctl(struct inode *inode, struct file *file,
612 uint command, ulong a)
613 {
614 int minor = MINOR(inode->i_rdev);
615 uint extendable, l, v;
616 void *arg = (void *) a;
617 lv_t lv;
618 vg_t *vg_ptr = vg[VG_CHR(minor)];
619
620 /* otherwise cc will complain about unused variables */
621 (void) lvm_lock;
622
623 P_IOCTL
624 ("chr MINOR: %d command: 0x%X arg: %p VG#: %d mode: %s%s\n",
625 minor, command, arg, VG_CHR(minor),
626 MODE_TO_STR(file->f_mode));
627
628 #ifdef LVM_TOTAL_RESET
629 if (lvm_reset_spindown > 0)
630 return -EACCES;
631 #endif
632
633 /* Main command switch */
634 switch (command) {
635 case LVM_LOCK_LVM:
636 /* lock the LVM */
637 return lvm_do_lock_lvm();
638
639 case LVM_GET_IOP_VERSION:
640 /* check lvm version to ensure driver/tools+lib
641 interoperability */
642 if (copy_to_user(arg, &lvm_iop_version, sizeof(ushort)) !=
643 0)
644 return -EFAULT;
645 return 0;
646
647 #ifdef LVM_TOTAL_RESET
648 case LVM_RESET:
649 /* lock reset function */
650 lvm_reset_spindown = 1;
651 for (v = 0; v < ABS_MAX_VG; v++) {
652 if (vg[v] != NULL)
653 lvm_do_vg_remove(v);
654 }
655
656 #ifdef MODULE
657 while (GET_USE_COUNT(&__this_module) < 1)
658 MOD_INC_USE_COUNT;
659 while (GET_USE_COUNT(&__this_module) > 1)
660 MOD_DEC_USE_COUNT;
661 #endif /* MODULE */
662 lock = 0; /* release lock */
663 wake_up_interruptible(&lvm_wait);
664 return 0;
665 #endif /* LVM_TOTAL_RESET */
666
667
668 case LE_REMAP:
669 /* remap a logical extent (after moving the physical extent) */
670 return lvm_do_le_remap(vg_ptr, arg);
671
672 case PE_LOCK_UNLOCK:
673 /* lock/unlock i/o to a physical extent to move it to another
674 physical volume (move's done in user space's pvmove) */
675 return lvm_do_pe_lock_unlock(vg_ptr, arg);
676
677 case VG_CREATE_OLD:
678 /* create a VGDA */
679 return lvm_do_vg_create(arg, minor);
680
681 case VG_CREATE:
682 /* create a VGDA, assume VG number is filled in */
683 return lvm_do_vg_create(arg, -1);
684
685 case VG_EXTEND:
686 /* extend a volume group */
687 return lvm_do_vg_extend(vg_ptr, arg);
688
689 case VG_REDUCE:
690 /* reduce a volume group */
691 return lvm_do_vg_reduce(vg_ptr, arg);
692
693 case VG_RENAME:
694 /* rename a volume group */
695 return lvm_do_vg_rename(vg_ptr, arg);
696
697 case VG_REMOVE:
698 /* remove an inactive VGDA */
699 return lvm_do_vg_remove(minor);
700
701
702 case VG_SET_EXTENDABLE:
703 /* set/clear extendability flag of volume group */
704 if (vg_ptr == NULL)
705 return -ENXIO;
706 if (copy_from_user(&extendable, arg, sizeof(extendable)) !=
707 0)
708 return -EFAULT;
709
710 if (extendable == VG_EXTENDABLE ||
711 extendable == ~VG_EXTENDABLE) {
712 if (extendable == VG_EXTENDABLE)
713 vg_ptr->vg_status |= VG_EXTENDABLE;
714 else
715 vg_ptr->vg_status &= ~VG_EXTENDABLE;
716 } else
717 return -EINVAL;
718 return 0;
719
720
721 case VG_STATUS:
722 /* get volume group data (only the vg_t struct) */
723 if (vg_ptr == NULL)
724 return -ENXIO;
725 if (copy_to_user(arg, vg_ptr, sizeof(vg_t)) != 0)
726 return -EFAULT;
727 return 0;
728
729
730 case VG_STATUS_GET_COUNT:
731 /* get volume group count */
732 if (copy_to_user(arg, &vg_count, sizeof(vg_count)) != 0)
733 return -EFAULT;
734 return 0;
735
736
737 case VG_STATUS_GET_NAMELIST:
738 /* get volume group names */
739 for (l = v = 0; v < ABS_MAX_VG; v++) {
740 if (vg[v] != NULL) {
741 if (copy_to_user(arg + l * NAME_LEN,
742 vg[v]->vg_name,
743 NAME_LEN) != 0)
744 return -EFAULT;
745 l++;
746 }
747 }
748 return 0;
749
750
751 case LV_CREATE:
752 case LV_EXTEND:
753 case LV_REDUCE:
754 case LV_REMOVE:
755 case LV_RENAME:
756 /* create, extend, reduce, remove or rename a logical volume */
757 if (vg_ptr == NULL)
758 return -ENXIO;
759 if (copy_from_user(&lv_req, arg, sizeof(lv_req)) != 0)
760 return -EFAULT;
761
762 if (command != LV_REMOVE) {
763 if (copy_from_user(&lv, lv_req.lv, sizeof(lv_t)) !=
764 0)
765 return -EFAULT;
766 }
767 switch (command) {
768 case LV_CREATE:
769 return lvm_do_lv_create(minor, lv_req.lv_name,
770 &lv);
771
772 case LV_EXTEND:
773 case LV_REDUCE:
774 return lvm_do_lv_extend_reduce(minor,
775 lv_req.lv_name,
776 &lv);
777 case LV_REMOVE:
778 return lvm_do_lv_remove(minor, lv_req.lv_name, -1);
779
780 case LV_RENAME:
781 return lvm_do_lv_rename(vg_ptr, &lv_req, &lv);
782 }
783
784
785
786
787 case LV_STATUS_BYNAME:
788 /* get status of a logical volume by name */
789 return lvm_do_lv_status_byname(vg_ptr, arg);
790
791
792 case LV_STATUS_BYINDEX:
793 /* get status of a logical volume by index */
794 return lvm_do_lv_status_byindex(vg_ptr, arg);
795
796
797 case LV_STATUS_BYDEV:
798 /* get status of a logical volume by device */
799 return lvm_do_lv_status_bydev(vg_ptr, arg);
800
801
802 case PV_CHANGE:
803 /* change a physical volume */
804 return lvm_do_pv_change(vg_ptr, arg);
805
806
807 case PV_STATUS:
808 /* get physical volume data (pv_t structure only) */
809 return lvm_do_pv_status(vg_ptr, arg);
810
811
812 case PV_FLUSH:
813 /* physical volume buffer flush/invalidate */
814 return lvm_do_pv_flush(arg);
815
816
817 default:
818 printk(KERN_WARNING
819 "%s -- lvm_chr_ioctl: unknown command 0x%x\n",
820 lvm_name, command);
821 return -ENOTTY;
822 }
823
824 return 0;
825 } /* lvm_chr_ioctl */
826
827
828 /*
829 * character device close routine
830 */
lvm_chr_close(struct inode * inode,struct file * file)831 static int lvm_chr_close(struct inode *inode, struct file *file)
832 {
833 P_DEV("chr_close MINOR: %d VG#: %d\n",
834 MINOR(inode->i_rdev), VG_CHR(MINOR(inode->i_rdev)));
835
836 #ifdef LVM_TOTAL_RESET
837 if (lvm_reset_spindown > 0) {
838 lvm_reset_spindown = 0;
839 lvm_chr_open_count = 0;
840 }
841 #endif
842
843 if (lvm_chr_open_count > 0)
844 lvm_chr_open_count--;
845
846 spin_lock(&lvm_lock);
847 if (lock == current->pid) {
848 if (!_lock_open_count) {
849 P_DEV("chr_close: unlocking LVM for pid %d\n",
850 lock);
851 lock = 0;
852 wake_up_interruptible(&lvm_wait);
853 } else
854 _lock_open_count--;
855 }
856 spin_unlock(&lvm_lock);
857
858 MOD_DEC_USE_COUNT;
859
860 return 0;
861 } /* lvm_chr_close() */
862
863
864
865 /********************************************************************
866 *
867 * Block device functions
868 *
869 ********************************************************************/
870
871 /*
872 * block device open routine
873 */
lvm_blk_open(struct inode * inode,struct file * file)874 static int lvm_blk_open(struct inode *inode, struct file *file)
875 {
876 int minor = MINOR(inode->i_rdev);
877 lv_t *lv_ptr;
878 vg_t *vg_ptr = vg[VG_BLK(minor)];
879
880 P_DEV("blk_open MINOR: %d VG#: %d LV#: %d mode: %s%s\n",
881 minor, VG_BLK(minor), LV_BLK(minor),
882 MODE_TO_STR(file->f_mode));
883
884 #ifdef LVM_TOTAL_RESET
885 if (lvm_reset_spindown > 0)
886 return -EPERM;
887 #endif
888
889 if (vg_ptr != NULL &&
890 (vg_ptr->vg_status & VG_ACTIVE) &&
891 (lv_ptr = vg_ptr->lv[LV_BLK(minor)]) != NULL &&
892 LV_BLK(minor) >= 0 && LV_BLK(minor) < vg_ptr->lv_max) {
893
894 /* Check parallel LV spindown (LV remove) */
895 if (lv_ptr->lv_status & LV_SPINDOWN)
896 return -EPERM;
897
898 /* Check inactive LV and open for read/write */
899 /* We need to be able to "read" an inactive LV
900 to re-activate it again */
901 if ((file->f_mode & FMODE_WRITE) &&
902 (!(lv_ptr->lv_status & LV_ACTIVE)))
903 return -EPERM;
904
905 if (!(lv_ptr->lv_access & LV_WRITE) &&
906 (file->f_mode & FMODE_WRITE))
907 return -EACCES;
908
909
910 /* be sure to increment VG counter */
911 if (lv_ptr->lv_open == 0)
912 vg_ptr->lv_open++;
913 lv_ptr->lv_open++;
914
915 MOD_INC_USE_COUNT;
916
917 P_DEV("blk_open OK, LV size %d\n", lv_ptr->lv_size);
918
919 return 0;
920 }
921 return -ENXIO;
922 } /* lvm_blk_open() */
923
924 /* Deliver "hard disk geometry" */
_hdio_getgeo(ulong a,lv_t * lv_ptr,int what)925 static int _hdio_getgeo(ulong a, lv_t * lv_ptr, int what)
926 {
927 int ret = 0;
928 uchar heads = 128;
929 uchar sectors = 128;
930 ulong start = 0;
931 uint cylinders;
932
933 while (heads * sectors > lv_ptr->lv_size) {
934 heads >>= 1;
935 sectors >>= 1;
936 }
937 cylinders = lv_ptr->lv_size / heads / sectors;
938
939 switch (what) {
940 case 0:
941 {
942 struct hd_geometry *hd = (struct hd_geometry *) a;
943
944 if (put_user(heads, &hd->heads) ||
945 put_user(sectors, &hd->sectors) ||
946 put_user((ushort) cylinders, &hd->cylinders) ||
947 put_user(start, &hd->start))
948 return -EFAULT;
949 break;
950 }
951
952 #ifdef HDIO_GETGEO_BIG
953 case 1:
954 {
955 struct hd_big_geometry *hd =
956 (struct hd_big_geometry *) a;
957
958 if (put_user(heads, &hd->heads) ||
959 put_user(sectors, &hd->sectors) ||
960 put_user(cylinders, &hd->cylinders) ||
961 put_user(start, &hd->start))
962 return -EFAULT;
963 break;
964 }
965 #endif
966
967 }
968
969 P_IOCTL("%s -- lvm_blk_ioctl -- cylinders: %d\n",
970 lvm_name, cylinders);
971 return ret;
972 }
973
974
975 /*
976 * block device i/o-control routine
977 */
lvm_blk_ioctl(struct inode * inode,struct file * file,uint cmd,ulong a)978 static int lvm_blk_ioctl(struct inode *inode, struct file *file,
979 uint cmd, ulong a)
980 {
981 kdev_t dev = inode->i_rdev;
982 int minor = MINOR(dev), ret;
983 vg_t *vg_ptr = vg[VG_BLK(minor)];
984 lv_t *lv_ptr = vg_ptr->lv[LV_BLK(minor)];
985 void *arg = (void *) a;
986
987 P_IOCTL("blk MINOR: %d cmd: 0x%X arg: %p VG#: %d LV#: %d "
988 "mode: %s%s\n", minor, cmd, arg, VG_BLK(minor),
989 LV_BLK(minor), MODE_TO_STR(file->f_mode));
990
991 switch (cmd) {
992 case BLKRASET:
993 /* set read ahead for block device */
994 ret = blk_ioctl(dev, cmd, a);
995 if (ret)
996 return ret;
997 lv_ptr->lv_read_ahead = (long) a;
998 LVM_CORRECT_READ_AHEAD(lv_ptr->lv_read_ahead);
999 break;
1000
1001 case HDIO_GETGEO:
1002 #ifdef HDIO_GETGEO_BIG
1003 case HDIO_GETGEO_BIG:
1004 #endif
1005 /* get disk geometry */
1006 P_IOCTL("%s -- lvm_blk_ioctl -- HDIO_GETGEO\n", lvm_name);
1007 if (!a)
1008 return -EINVAL;
1009
1010 switch (cmd) {
1011 case HDIO_GETGEO:
1012 return _hdio_getgeo(a, lv_ptr, 0);
1013 #ifdef HDIO_GETGEO_BIG
1014 case HDIO_GETGEO_BIG:
1015 return _hdio_getgeo(a, lv_ptr, 1);
1016 #endif
1017 }
1018
1019 case LV_BMAP:
1020 /* turn logical block into (dev_t, block). non privileged. */
1021 /* don't bmap a snapshot, since the mapping can change */
1022 if (lv_ptr->lv_access & LV_SNAPSHOT)
1023 return -EPERM;
1024
1025 return lvm_user_bmap(inode, (struct lv_bmap *) arg);
1026
1027 case LV_SET_ACCESS:
1028 /* set access flags of a logical volume */
1029 if (!capable(CAP_SYS_ADMIN))
1030 return -EACCES;
1031
1032 down_write(&lv_ptr->lv_lock);
1033 lv_ptr->lv_access = (ulong) arg;
1034 up_write(&lv_ptr->lv_lock);
1035
1036 if (lv_ptr->lv_access & LV_WRITE)
1037 set_device_ro(lv_ptr->lv_dev, 0);
1038 else
1039 set_device_ro(lv_ptr->lv_dev, 1);
1040 break;
1041
1042
1043 case LV_SET_ALLOCATION:
1044 /* set allocation flags of a logical volume */
1045 if (!capable(CAP_SYS_ADMIN))
1046 return -EACCES;
1047 down_write(&lv_ptr->lv_lock);
1048 lv_ptr->lv_allocation = (ulong) arg;
1049 up_write(&lv_ptr->lv_lock);
1050 break;
1051
1052 case LV_SET_STATUS:
1053 /* set status flags of a logical volume */
1054 if (!capable(CAP_SYS_ADMIN))
1055 return -EACCES;
1056 if (!((ulong) arg & LV_ACTIVE) && lv_ptr->lv_open > 1)
1057 return -EPERM;
1058 down_write(&lv_ptr->lv_lock);
1059 lv_ptr->lv_status = (ulong) arg;
1060 up_write(&lv_ptr->lv_lock);
1061 break;
1062
1063 case LV_SNAPSHOT_USE_RATE:
1064 return lvm_get_snapshot_use_rate(lv_ptr, arg);
1065
1066 default:
1067 /* Handle rest here */
1068 ret = blk_ioctl(dev, cmd, a);
1069 if (ret)
1070 printk(KERN_WARNING
1071 "%s -- lvm_blk_ioctl: unknown "
1072 "cmd 0x%x\n", lvm_name, cmd);
1073 return ret;
1074 }
1075
1076 return 0;
1077 } /* lvm_blk_ioctl() */
1078
1079
1080 /*
1081 * block device close routine
1082 */
lvm_blk_close(struct inode * inode,struct file * file)1083 static int lvm_blk_close(struct inode *inode, struct file *file)
1084 {
1085 int minor = MINOR(inode->i_rdev);
1086 vg_t *vg_ptr = vg[VG_BLK(minor)];
1087 lv_t *lv_ptr = vg_ptr->lv[LV_BLK(minor)];
1088
1089 P_DEV("blk_close MINOR: %d VG#: %d LV#: %d\n",
1090 minor, VG_BLK(minor), LV_BLK(minor));
1091
1092 if (lv_ptr->lv_open == 1)
1093 vg_ptr->lv_open--;
1094 lv_ptr->lv_open--;
1095
1096 MOD_DEC_USE_COUNT;
1097
1098 return 0;
1099 } /* lvm_blk_close() */
1100
lvm_get_snapshot_use_rate(lv_t * lv,void * arg)1101 static int lvm_get_snapshot_use_rate(lv_t * lv, void *arg)
1102 {
1103 lv_snapshot_use_rate_req_t lv_rate_req;
1104
1105 down_read(&lv->lv_lock);
1106 if (!(lv->lv_access & LV_SNAPSHOT)) {
1107 up_read(&lv->lv_lock);
1108 return -EPERM;
1109 }
1110 up_read(&lv->lv_lock);
1111
1112 if (copy_from_user(&lv_rate_req, arg, sizeof(lv_rate_req)))
1113 return -EFAULT;
1114
1115 if (lv_rate_req.rate < 0 || lv_rate_req.rate > 100)
1116 return -EINVAL;
1117
1118 switch (lv_rate_req.block) {
1119 case 0:
1120 down_write(&lv->lv_lock);
1121 lv->lv_snapshot_use_rate = lv_rate_req.rate;
1122 up_write(&lv->lv_lock);
1123 down_read(&lv->lv_lock);
1124 if (lv->lv_remap_ptr * 100 / lv->lv_remap_end <
1125 lv->lv_snapshot_use_rate) {
1126 up_read(&lv->lv_lock);
1127 interruptible_sleep_on(&lv->lv_snapshot_wait);
1128 down_read(&lv->lv_lock);
1129 }
1130 up_read(&lv->lv_lock);
1131 break;
1132
1133 case O_NONBLOCK:
1134 break;
1135
1136 default:
1137 return -EINVAL;
1138 }
1139 down_read(&lv->lv_lock);
1140 lv_rate_req.rate = lv->lv_remap_ptr * 100 / lv->lv_remap_end;
1141 up_read(&lv->lv_lock);
1142
1143 return copy_to_user(arg, &lv_rate_req,
1144 sizeof(lv_rate_req)) ? -EFAULT : 0;
1145 }
1146
lvm_user_bmap(struct inode * inode,struct lv_bmap * user_result)1147 static int lvm_user_bmap(struct inode *inode, struct lv_bmap *user_result)
1148 {
1149 struct buffer_head bh;
1150 unsigned long block;
1151 int err;
1152
1153 if (get_user(block, &user_result->lv_block))
1154 return -EFAULT;
1155
1156 memset(&bh, 0, sizeof bh);
1157 bh.b_blocknr = block;
1158 bh.b_dev = bh.b_rdev = inode->i_rdev;
1159 bh.b_size = lvm_get_blksize(bh.b_dev);
1160 bh.b_rsector = block * (bh.b_size >> 9);
1161 bh.b_end_io = NULL;
1162 if ((err = lvm_map(&bh, READ)) < 0) {
1163 printk("lvm map failed: %d\n", err);
1164 return -EINVAL;
1165 }
1166
1167 return put_user(kdev_t_to_nr(bh.b_rdev), &user_result->lv_dev) ||
1168 put_user(bh.b_rsector / (bh.b_size >> 9),
1169 &user_result->lv_block) ? -EFAULT : 0;
1170 }
1171
1172
1173 /*
1174 * block device support function for /usr/src/linux/drivers/block/ll_rw_blk.c
1175 * (see init_module/lvm_init)
1176 */
__remap_snapshot(kdev_t rdev,ulong rsector,ulong pe_start,lv_t * lv,vg_t * vg)1177 static void __remap_snapshot(kdev_t rdev, ulong rsector,
1178 ulong pe_start, lv_t * lv, vg_t * vg)
1179 {
1180
1181 /* copy a chunk from the origin to a snapshot device */
1182 down_write(&lv->lv_lock);
1183
1184 /* we must redo lvm_snapshot_remap_block in order to avoid a
1185 race condition in the gap where no lock was held */
1186 if (!lvm_snapshot_remap_block(&rdev, &rsector, pe_start, lv) &&
1187 !lvm_snapshot_COW(rdev, rsector, pe_start, rsector, vg, lv))
1188 lvm_write_COW_table_block(vg, lv);
1189
1190 up_write(&lv->lv_lock);
1191 }
1192
_remap_snapshot(kdev_t rdev,ulong rsector,ulong pe_start,lv_t * lv,vg_t * vg)1193 static inline void _remap_snapshot(kdev_t rdev, ulong rsector,
1194 ulong pe_start, lv_t * lv, vg_t * vg)
1195 {
1196 int r;
1197
1198 /* check to see if this chunk is already in the snapshot */
1199 down_read(&lv->lv_lock);
1200 r = lvm_snapshot_remap_block(&rdev, &rsector, pe_start, lv);
1201 up_read(&lv->lv_lock);
1202
1203 if (!r)
1204 /* we haven't yet copied this block to the snapshot */
1205 __remap_snapshot(rdev, rsector, pe_start, lv, vg);
1206 }
1207
1208
1209 /*
1210 * extents destined for a pe that is on the move should be deferred
1211 */
_should_defer(kdev_t pv,ulong sector,uint32_t pe_size)1212 static inline int _should_defer(kdev_t pv, ulong sector, uint32_t pe_size)
1213 {
1214 return ((pe_lock_req.lock == LOCK_PE) &&
1215 (pv == pe_lock_req.data.pv_dev) &&
1216 (sector >= pe_lock_req.data.pv_offset) &&
1217 (sector < (pe_lock_req.data.pv_offset + pe_size)));
1218 }
1219
_defer_extent(struct buffer_head * bh,int rw,kdev_t pv,ulong sector,uint32_t pe_size)1220 static inline int _defer_extent(struct buffer_head *bh, int rw,
1221 kdev_t pv, ulong sector, uint32_t pe_size)
1222 {
1223 if (pe_lock_req.lock == LOCK_PE) {
1224 down_read(&_pe_lock);
1225 if (_should_defer(pv, sector, pe_size)) {
1226 up_read(&_pe_lock);
1227 down_write(&_pe_lock);
1228 if (_should_defer(pv, sector, pe_size))
1229 _queue_io(bh, rw);
1230 up_write(&_pe_lock);
1231 return 1;
1232 }
1233 up_read(&_pe_lock);
1234 }
1235 return 0;
1236 }
1237
1238
lvm_map(struct buffer_head * bh,int rw)1239 static int lvm_map(struct buffer_head *bh, int rw)
1240 {
1241 int minor = MINOR(bh->b_rdev);
1242 ulong index;
1243 ulong pe_start;
1244 ulong size = bh->b_size >> 9;
1245 ulong rsector_org = bh->b_rsector;
1246 ulong rsector_map;
1247 kdev_t rdev_map;
1248 vg_t *vg_this = vg[VG_BLK(minor)];
1249 lv_t *lv = vg_this->lv[LV_BLK(minor)];
1250
1251
1252 down_read(&lv->lv_lock);
1253 if (!(lv->lv_status & LV_ACTIVE)) {
1254 printk(KERN_ALERT
1255 "%s - lvm_map: ll_rw_blk for inactive LV %s\n",
1256 lvm_name, lv->lv_name);
1257 goto bad;
1258 }
1259
1260 if ((rw == WRITE || rw == WRITEA) && !(lv->lv_access & LV_WRITE)) {
1261 printk(KERN_CRIT
1262 "%s - lvm_map: ll_rw_blk write for readonly LV %s\n",
1263 lvm_name, lv->lv_name);
1264 goto bad;
1265 }
1266
1267 P_MAP
1268 ("%s - lvm_map minor: %d *rdev: %s *rsector: %lu size:%lu\n",
1269 lvm_name, minor, kdevname(bh->b_rdev), rsector_org, size);
1270
1271 if (rsector_org + size > lv->lv_size) {
1272 printk(KERN_ALERT
1273 "%s - lvm_map access beyond end of device; *rsector: "
1274 "%lu or size: %lu wrong for minor: %2d\n",
1275 lvm_name, rsector_org, size, minor);
1276 goto bad;
1277 }
1278
1279
1280 if (lv->lv_stripes < 2) { /* linear mapping */
1281 /* get the index */
1282 index = rsector_org / vg_this->pe_size;
1283 pe_start = lv->lv_current_pe[index].pe;
1284 rsector_map = lv->lv_current_pe[index].pe +
1285 (rsector_org % vg_this->pe_size);
1286 rdev_map = lv->lv_current_pe[index].dev;
1287
1288 P_MAP("lv_current_pe[%ld].pe: %d rdev: %s rsector:%ld\n",
1289 index, lv->lv_current_pe[index].pe,
1290 kdevname(rdev_map), rsector_map);
1291
1292 } else { /* striped mapping */
1293 ulong stripe_index;
1294 ulong stripe_length;
1295
1296 stripe_length = vg_this->pe_size * lv->lv_stripes;
1297 stripe_index = (rsector_org % stripe_length) /
1298 lv->lv_stripesize;
1299 index = rsector_org / stripe_length +
1300 (stripe_index % lv->lv_stripes) *
1301 (lv->lv_allocated_le / lv->lv_stripes);
1302 pe_start = lv->lv_current_pe[index].pe;
1303 rsector_map = lv->lv_current_pe[index].pe +
1304 (rsector_org % stripe_length) -
1305 (stripe_index % lv->lv_stripes) * lv->lv_stripesize -
1306 stripe_index / lv->lv_stripes *
1307 (lv->lv_stripes - 1) * lv->lv_stripesize;
1308 rdev_map = lv->lv_current_pe[index].dev;
1309
1310 P_MAP("lv_current_pe[%ld].pe: %d rdev: %s rsector:%ld\n"
1311 "stripe_length: %ld stripe_index: %ld\n",
1312 index, lv->lv_current_pe[index].pe,
1313 kdevname(rdev_map), rsector_map, stripe_length,
1314 stripe_index);
1315 }
1316
1317 /*
1318 * Queue writes to physical extents on the move until move completes.
1319 * Don't get _pe_lock until there is a reasonable expectation that
1320 * we need to queue this request, because this is in the fast path.
1321 */
1322 if (rw == WRITE || rw == WRITEA) {
1323 if (_defer_extent(bh, rw, rdev_map,
1324 rsector_map, vg_this->pe_size)) {
1325
1326 up_read(&lv->lv_lock);
1327 return 0;
1328 }
1329
1330 lv->lv_current_pe[index].writes++; /* statistic */
1331 } else
1332 lv->lv_current_pe[index].reads++; /* statistic */
1333
1334 /* snapshot volume exception handling on physical device address base */
1335 if (!(lv->lv_access & (LV_SNAPSHOT | LV_SNAPSHOT_ORG)))
1336 goto out;
1337
1338 if (lv->lv_access & LV_SNAPSHOT) { /* remap snapshot */
1339 if (lvm_snapshot_remap_block(&rdev_map, &rsector_map,
1340 pe_start, lv) < 0)
1341 goto bad;
1342
1343 } else if (rw == WRITE || rw == WRITEA) { /* snapshot origin */
1344 lv_t *snap;
1345
1346 /* start with first snapshot and loop through all of
1347 them */
1348 for (snap = lv->lv_snapshot_next; snap;
1349 snap = snap->lv_snapshot_next) {
1350 /* Check for inactive snapshot */
1351 if (!(snap->lv_status & LV_ACTIVE))
1352 continue;
1353
1354 /* Serializes the COW with the accesses to the
1355 snapshot device */
1356 _remap_snapshot(rdev_map, rsector_map,
1357 pe_start, snap, vg_this);
1358 }
1359 }
1360
1361 out:
1362 bh->b_rdev = rdev_map;
1363 bh->b_rsector = rsector_map;
1364 up_read(&lv->lv_lock);
1365 return 1;
1366
1367 bad:
1368 if (bh->b_end_io)
1369 buffer_IO_error(bh);
1370 up_read(&lv->lv_lock);
1371 return -1;
1372 } /* lvm_map() */
1373
1374
1375 /*
1376 * internal support functions
1377 */
1378
1379 #ifdef LVM_HD_NAME
1380 /*
1381 * generate "hard disk" name
1382 */
lvm_hd_name(char * buf,int minor)1383 void lvm_hd_name(char *buf, int minor)
1384 {
1385 int len = 0;
1386 lv_t *lv_ptr;
1387
1388 if (vg[VG_BLK(minor)] == NULL ||
1389 (lv_ptr = vg[VG_BLK(minor)]->lv[LV_BLK(minor)]) == NULL)
1390 return;
1391 len = strlen(lv_ptr->lv_name) - 5;
1392 memcpy(buf, &lv_ptr->lv_name[5], len);
1393 buf[len] = 0;
1394 return;
1395 }
1396 #endif
1397
1398
1399
1400
1401 /*
1402 * make request function
1403 */
lvm_make_request_fn(request_queue_t * q,int rw,struct buffer_head * bh)1404 static int lvm_make_request_fn(request_queue_t * q,
1405 int rw, struct buffer_head *bh)
1406 {
1407 return (lvm_map(bh, rw) <= 0) ? 0 : 1;
1408 }
1409
1410
1411 /********************************************************************
1412 *
1413 * Character device support functions
1414 *
1415 ********************************************************************/
1416 /*
1417 * character device support function logical volume manager lock
1418 */
lvm_do_lock_lvm(void)1419 static int lvm_do_lock_lvm(void)
1420 {
1421 lock_try_again:
1422 spin_lock(&lvm_lock);
1423 if (lock != 0 && lock != current->pid) {
1424 P_DEV("lvm_do_lock_lvm: locked by pid %d ...\n", lock);
1425 spin_unlock(&lvm_lock);
1426 interruptible_sleep_on(&lvm_wait);
1427 if (current->sigpending != 0)
1428 return -EINTR;
1429 #ifdef LVM_TOTAL_RESET
1430 if (lvm_reset_spindown > 0)
1431 return -EACCES;
1432 #endif
1433 goto lock_try_again;
1434 }
1435 lock = current->pid;
1436 P_DEV("lvm_do_lock_lvm: locking LVM for pid %d\n", lock);
1437 spin_unlock(&lvm_lock);
1438 return 0;
1439 } /* lvm_do_lock_lvm */
1440
1441
1442 /*
1443 * character device support function lock/unlock physical extend
1444 */
lvm_do_pe_lock_unlock(vg_t * vg_ptr,void * arg)1445 static int lvm_do_pe_lock_unlock(vg_t * vg_ptr, void *arg)
1446 {
1447 pe_lock_req_t new_lock;
1448 struct buffer_head *bh;
1449 uint p;
1450
1451 if (vg_ptr == NULL)
1452 return -ENXIO;
1453 if (copy_from_user(&new_lock, arg, sizeof(new_lock)) != 0)
1454 return -EFAULT;
1455
1456 switch (new_lock.lock) {
1457 case LOCK_PE:
1458 for (p = 0; p < vg_ptr->pv_max; p++) {
1459 if (vg_ptr->pv[p] != NULL &&
1460 new_lock.data.pv_dev == vg_ptr->pv[p]->pv_dev)
1461 break;
1462 }
1463 if (p == vg_ptr->pv_max)
1464 return -ENXIO;
1465
1466 /*
1467 * this sync releaves memory pressure to lessen the
1468 * likelyhood of pvmove being paged out - resulting in
1469 * deadlock.
1470 *
1471 * This method of doing a pvmove is broken
1472 */
1473 fsync_dev(pe_lock_req.data.lv_dev);
1474
1475 down_write(&_pe_lock);
1476 if (pe_lock_req.lock == LOCK_PE) {
1477 up_write(&_pe_lock);
1478 return -EBUSY;
1479 }
1480
1481 /* Should we do to_kdev_t() on the pv_dev and lv_dev??? */
1482 pe_lock_req.lock = LOCK_PE;
1483 pe_lock_req.data.lv_dev = new_lock.data.lv_dev;
1484 pe_lock_req.data.pv_dev = new_lock.data.pv_dev;
1485 pe_lock_req.data.pv_offset = new_lock.data.pv_offset;
1486 up_write(&_pe_lock);
1487
1488 /* some requests may have got through since the fsync */
1489 fsync_dev(pe_lock_req.data.pv_dev);
1490 break;
1491
1492 case UNLOCK_PE:
1493 down_write(&_pe_lock);
1494 pe_lock_req.lock = UNLOCK_PE;
1495 pe_lock_req.data.lv_dev = 0;
1496 pe_lock_req.data.pv_dev = 0;
1497 pe_lock_req.data.pv_offset = 0;
1498 bh = _dequeue_io();
1499 up_write(&_pe_lock);
1500
1501 /* handle all deferred io for this PE */
1502 _flush_io(bh);
1503 break;
1504
1505 default:
1506 return -EINVAL;
1507 }
1508 return 0;
1509 }
1510
1511
1512 /*
1513 * character device support function logical extend remap
1514 */
lvm_do_le_remap(vg_t * vg_ptr,void * arg)1515 static int lvm_do_le_remap(vg_t * vg_ptr, void *arg)
1516 {
1517 uint l, le;
1518 lv_t *lv_ptr;
1519
1520 if (vg_ptr == NULL)
1521 return -ENXIO;
1522 if (copy_from_user(&le_remap_req, arg,
1523 sizeof(le_remap_req_t)) != 0)
1524 return -EFAULT;
1525
1526 for (l = 0; l < vg_ptr->lv_max; l++) {
1527 lv_ptr = vg_ptr->lv[l];
1528
1529 if (!lv_ptr)
1530 continue;
1531
1532 if (strcmp(lv_ptr->lv_name, le_remap_req.lv_name) == 0) {
1533 down_write(&lv_ptr->lv_lock);
1534 for (le = 0; le < lv_ptr->lv_allocated_le; le++) {
1535 if (lv_ptr->lv_current_pe[le].dev ==
1536 le_remap_req.old_dev &&
1537 lv_ptr->lv_current_pe[le].pe ==
1538 le_remap_req.old_pe) {
1539 lv_ptr->lv_current_pe[le].dev =
1540 le_remap_req.new_dev;
1541 lv_ptr->lv_current_pe[le].pe =
1542 le_remap_req.new_pe;
1543 __update_hardsectsize(lv_ptr);
1544 up_write(&lv_ptr->lv_lock);
1545 return 0;
1546 }
1547 }
1548 up_write(&lv_ptr->lv_lock);
1549 return -EINVAL;
1550 }
1551 }
1552 return -ENXIO;
1553 } /* lvm_do_le_remap() */
1554
1555
1556 /*
1557 * character device support function VGDA create
1558 */
lvm_do_vg_create(void * arg,int minor)1559 static int lvm_do_vg_create(void *arg, int minor)
1560 {
1561 int ret = 0;
1562 ulong l, ls = 0, p, size;
1563 lv_t lv;
1564 vg_t *vg_ptr;
1565 lv_t **snap_lv_ptr;
1566
1567 if ((vg_ptr = kmalloc(sizeof(vg_t), GFP_KERNEL)) == NULL) {
1568 printk(KERN_CRIT
1569 "%s -- VG_CREATE: kmalloc error VG at line %d\n",
1570 lvm_name, __LINE__);
1571 return -ENOMEM;
1572 }
1573 /* get the volume group structure */
1574 if (copy_from_user(vg_ptr, arg, sizeof(vg_t)) != 0) {
1575 P_IOCTL
1576 ("lvm_do_vg_create ERROR: copy VG ptr %p (%d bytes)\n",
1577 arg, sizeof(vg_t));
1578 kfree(vg_ptr);
1579 return -EFAULT;
1580 }
1581
1582 /* VG_CREATE now uses minor number in VG structure */
1583 if (minor == -1)
1584 minor = vg_ptr->vg_number;
1585
1586 /* check limits */
1587 if (minor >= ABS_MAX_VG)
1588 return -EFAULT;
1589
1590 /* Validate it */
1591 if (vg[VG_CHR(minor)] != NULL) {
1592 P_IOCTL("lvm_do_vg_create ERROR: VG %d in use\n", minor);
1593 kfree(vg_ptr);
1594 return -EPERM;
1595 }
1596
1597 /* we are not that active so far... */
1598 vg_ptr->vg_status &= ~VG_ACTIVE;
1599 vg_ptr->pe_allocated = 0;
1600
1601 if (vg_ptr->pv_max > ABS_MAX_PV) {
1602 printk(KERN_WARNING
1603 "%s -- Can't activate VG: ABS_MAX_PV too small\n",
1604 lvm_name);
1605 kfree(vg_ptr);
1606 return -EPERM;
1607 }
1608
1609 if (vg_ptr->lv_max > ABS_MAX_LV) {
1610 printk(KERN_WARNING
1611 "%s -- Can't activate VG: ABS_MAX_LV too small for %u\n",
1612 lvm_name, vg_ptr->lv_max);
1613 kfree(vg_ptr);
1614 return -EPERM;
1615 }
1616
1617 /* create devfs and procfs entries */
1618 lvm_fs_create_vg(vg_ptr);
1619
1620 vg[VG_CHR(minor)] = vg_ptr;
1621
1622 /* get the physical volume structures */
1623 vg_ptr->pv_act = vg_ptr->pv_cur = 0;
1624 for (p = 0; p < vg_ptr->pv_max; p++) {
1625 pv_t *pvp;
1626 /* user space address */
1627 if ((pvp = vg_ptr->pv[p]) != NULL) {
1628 ret = lvm_do_pv_create(pvp, vg_ptr, p);
1629 if (ret != 0) {
1630 lvm_do_vg_remove(minor);
1631 return ret;
1632 }
1633 }
1634 }
1635
1636 size = vg_ptr->lv_max * sizeof(lv_t *);
1637 if ((snap_lv_ptr = vmalloc(size)) == NULL) {
1638 printk(KERN_CRIT
1639 "%s -- VG_CREATE: vmalloc error snapshot LVs at line %d\n",
1640 lvm_name, __LINE__);
1641 lvm_do_vg_remove(minor);
1642 return -EFAULT;
1643 }
1644 memset(snap_lv_ptr, 0, size);
1645
1646 /* get the logical volume structures */
1647 vg_ptr->lv_cur = 0;
1648 for (l = 0; l < vg_ptr->lv_max; l++) {
1649 lv_t *lvp;
1650 /* user space address */
1651 if ((lvp = vg_ptr->lv[l]) != NULL) {
1652 if (copy_from_user(&lv, lvp, sizeof(lv_t)) != 0) {
1653 P_IOCTL
1654 ("ERROR: copying LV ptr %p (%d bytes)\n",
1655 lvp, sizeof(lv_t));
1656 lvm_do_vg_remove(minor);
1657 return -EFAULT;
1658 }
1659 if (lv.lv_access & LV_SNAPSHOT) {
1660 snap_lv_ptr[ls] = lvp;
1661 vg_ptr->lv[l] = NULL;
1662 ls++;
1663 continue;
1664 }
1665 vg_ptr->lv[l] = NULL;
1666 /* only create original logical volumes for now */
1667 if (lvm_do_lv_create(minor, lv.lv_name, &lv) != 0) {
1668 lvm_do_vg_remove(minor);
1669 return -EFAULT;
1670 }
1671 }
1672 }
1673
1674 /* Second path to correct snapshot logical volumes which are not
1675 in place during first path above */
1676 for (l = 0; l < ls; l++) {
1677 lv_t *lvp = snap_lv_ptr[l];
1678 if (copy_from_user(&lv, lvp, sizeof(lv_t)) != 0) {
1679 lvm_do_vg_remove(minor);
1680 return -EFAULT;
1681 }
1682 if (lvm_do_lv_create(minor, lv.lv_name, &lv) != 0) {
1683 lvm_do_vg_remove(minor);
1684 return -EFAULT;
1685 }
1686 }
1687
1688 vfree(snap_lv_ptr);
1689
1690 vg_count++;
1691
1692
1693 MOD_INC_USE_COUNT;
1694
1695 /* let's go active */
1696 vg_ptr->vg_status |= VG_ACTIVE;
1697
1698 return 0;
1699 } /* lvm_do_vg_create() */
1700
1701
1702 /*
1703 * character device support function VGDA extend
1704 */
lvm_do_vg_extend(vg_t * vg_ptr,void * arg)1705 static int lvm_do_vg_extend(vg_t * vg_ptr, void *arg)
1706 {
1707 int ret = 0;
1708 uint p;
1709 pv_t *pv_ptr;
1710
1711 if (vg_ptr == NULL)
1712 return -ENXIO;
1713 if (vg_ptr->pv_cur < vg_ptr->pv_max) {
1714 for (p = 0; p < vg_ptr->pv_max; p++) {
1715 if ((pv_ptr = vg_ptr->pv[p]) == NULL) {
1716 ret = lvm_do_pv_create(arg, vg_ptr, p);
1717 if (ret != 0)
1718 return ret;
1719 pv_ptr = vg_ptr->pv[p];
1720 vg_ptr->pe_total += pv_ptr->pe_total;
1721 return 0;
1722 }
1723 }
1724 }
1725 return -EPERM;
1726 } /* lvm_do_vg_extend() */
1727
1728
1729 /*
1730 * character device support function VGDA reduce
1731 */
lvm_do_vg_reduce(vg_t * vg_ptr,void * arg)1732 static int lvm_do_vg_reduce(vg_t * vg_ptr, void *arg)
1733 {
1734 uint p;
1735 pv_t *pv_ptr;
1736
1737 if (vg_ptr == NULL)
1738 return -ENXIO;
1739 if (copy_from_user(pv_name, arg, sizeof(pv_name)) != 0)
1740 return -EFAULT;
1741
1742 for (p = 0; p < vg_ptr->pv_max; p++) {
1743 pv_ptr = vg_ptr->pv[p];
1744 if (pv_ptr != NULL &&
1745 strcmp(pv_ptr->pv_name, pv_name) == 0) {
1746 if (pv_ptr->lv_cur > 0)
1747 return -EPERM;
1748 lvm_do_pv_remove(vg_ptr, p);
1749 /* Make PV pointer array contiguous */
1750 for (; p < vg_ptr->pv_max - 1; p++)
1751 vg_ptr->pv[p] = vg_ptr->pv[p + 1];
1752 vg_ptr->pv[p + 1] = NULL;
1753 return 0;
1754 }
1755 }
1756 return -ENXIO;
1757 } /* lvm_do_vg_reduce */
1758
1759
1760 /*
1761 * character device support function VG rename
1762 */
lvm_do_vg_rename(vg_t * vg_ptr,void * arg)1763 static int lvm_do_vg_rename(vg_t * vg_ptr, void *arg)
1764 {
1765 int l = 0, p = 0, len = 0;
1766 char vg_name[NAME_LEN] = { 0, };
1767 char lv_name[NAME_LEN] = { 0, };
1768 char *ptr = NULL;
1769 lv_t *lv_ptr = NULL;
1770 pv_t *pv_ptr = NULL;
1771
1772 /* If the VG doesn't exist in the kernel then just exit */
1773 if (!vg_ptr)
1774 return 0;
1775
1776 if (copy_from_user(vg_name, arg, sizeof(vg_name)) != 0)
1777 return -EFAULT;
1778
1779 lvm_fs_remove_vg(vg_ptr);
1780
1781 strncpy(vg_ptr->vg_name, vg_name, sizeof(vg_name) - 1);
1782 for (l = 0; l < vg_ptr->lv_max; l++) {
1783 if ((lv_ptr = vg_ptr->lv[l]) == NULL)
1784 continue;
1785 memset(lv_ptr->vg_name, 0, sizeof(*vg_name));
1786 strncpy(lv_ptr->vg_name, vg_name, sizeof(vg_name));
1787 ptr = strrchr(lv_ptr->lv_name, '/');
1788 ptr = ptr ? ptr + 1 : lv_ptr->lv_name;
1789 strncpy(lv_name, ptr, sizeof(lv_name));
1790 len = sizeof(LVM_DIR_PREFIX);
1791 strcpy(lv_ptr->lv_name, LVM_DIR_PREFIX);
1792 strncat(lv_ptr->lv_name, vg_name, NAME_LEN - len);
1793 strcat(lv_ptr->lv_name, "/");
1794 len += strlen(vg_name) + 1;
1795 strncat(lv_ptr->lv_name, lv_name, NAME_LEN - len);
1796 }
1797 for (p = 0; p < vg_ptr->pv_max; p++) {
1798 if ((pv_ptr = vg_ptr->pv[p]) == NULL)
1799 continue;
1800 strncpy(pv_ptr->vg_name, vg_name, NAME_LEN);
1801 }
1802
1803 lvm_fs_create_vg(vg_ptr);
1804
1805 /* Need to add PV entries */
1806 for (p = 0; p < vg_ptr->pv_act; p++) {
1807 pv_t *pv_ptr = vg_ptr->pv[p];
1808
1809 if (pv_ptr)
1810 lvm_fs_create_pv(vg_ptr, pv_ptr);
1811 }
1812
1813 /* Need to add LV entries */
1814 for (l = 0; l < vg_ptr->lv_max; l++) {
1815 lv_t *lv_ptr = vg_ptr->lv[l];
1816
1817 if (!lv_ptr)
1818 continue;
1819
1820 lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].de =
1821 lvm_fs_create_lv(vg_ptr, lv_ptr);
1822 }
1823
1824 return 0;
1825 } /* lvm_do_vg_rename */
1826
1827
1828 /*
1829 * character device support function VGDA remove
1830 */
lvm_do_vg_remove(int minor)1831 static int lvm_do_vg_remove(int minor)
1832 {
1833 int i;
1834 vg_t *vg_ptr = vg[VG_CHR(minor)];
1835 pv_t *pv_ptr;
1836
1837 if (vg_ptr == NULL)
1838 return -ENXIO;
1839
1840 #ifdef LVM_TOTAL_RESET
1841 if (vg_ptr->lv_open > 0 && lvm_reset_spindown == 0)
1842 #else
1843 if (vg_ptr->lv_open > 0)
1844 #endif
1845 return -EPERM;
1846
1847 /* let's go inactive */
1848 vg_ptr->vg_status &= ~VG_ACTIVE;
1849
1850 /* remove from procfs and devfs */
1851 lvm_fs_remove_vg(vg_ptr);
1852
1853 /* free LVs */
1854 /* first free snapshot logical volumes */
1855 for (i = 0; i < vg_ptr->lv_max; i++) {
1856 if (vg_ptr->lv[i] != NULL &&
1857 vg_ptr->lv[i]->lv_access & LV_SNAPSHOT) {
1858 lvm_do_lv_remove(minor, NULL, i);
1859 current->state = TASK_UNINTERRUPTIBLE;
1860 schedule_timeout(1);
1861 }
1862 }
1863 /* then free the rest of the LVs */
1864 for (i = 0; i < vg_ptr->lv_max; i++) {
1865 if (vg_ptr->lv[i] != NULL) {
1866 lvm_do_lv_remove(minor, NULL, i);
1867 current->state = TASK_UNINTERRUPTIBLE;
1868 schedule_timeout(1);
1869 }
1870 }
1871
1872 /* free PVs */
1873 for (i = 0; i < vg_ptr->pv_max; i++) {
1874 if ((pv_ptr = vg_ptr->pv[i]) != NULL) {
1875 P_KFREE("%s -- kfree %d\n", lvm_name, __LINE__);
1876 lvm_do_pv_remove(vg_ptr, i);
1877 }
1878 }
1879
1880 P_KFREE("%s -- kfree %d\n", lvm_name, __LINE__);
1881 kfree(vg_ptr);
1882 vg[VG_CHR(minor)] = NULL;
1883
1884 vg_count--;
1885
1886 MOD_DEC_USE_COUNT;
1887
1888 return 0;
1889 } /* lvm_do_vg_remove() */
1890
1891
1892 /*
1893 * character device support function physical volume create
1894 */
lvm_do_pv_create(pv_t * pvp,vg_t * vg_ptr,ulong p)1895 static int lvm_do_pv_create(pv_t * pvp, vg_t * vg_ptr, ulong p)
1896 {
1897 pv_t *pv;
1898 int err;
1899
1900 if (!vg_ptr)
1901 return -ENXIO;
1902
1903 pv = kmalloc(sizeof(pv_t), GFP_KERNEL);
1904 if (pv == NULL) {
1905 printk(KERN_CRIT
1906 "%s -- PV_CREATE: kmalloc error PV at line %d\n",
1907 lvm_name, __LINE__);
1908 return -ENOMEM;
1909 }
1910
1911 memset(pv, 0, sizeof(*pv));
1912
1913 if (copy_from_user(pv, pvp, sizeof(pv_t)) != 0) {
1914 P_IOCTL
1915 ("lvm_do_pv_create ERROR: copy PV ptr %p (%d bytes)\n",
1916 pvp, sizeof(pv_t));
1917 kfree(pv);
1918 return -EFAULT;
1919 }
1920
1921 if ((err = _open_pv(pv))) {
1922 kfree(pv);
1923 return err;
1924 }
1925
1926 /* We don't need the PE list
1927 in kernel space as with LVs pe_t list (see below) */
1928 pv->pe = NULL;
1929 pv->pe_allocated = 0;
1930 pv->pv_status = PV_ACTIVE;
1931 vg_ptr->pv_act++;
1932 vg_ptr->pv_cur++;
1933 lvm_fs_create_pv(vg_ptr, pv);
1934
1935 vg_ptr->pv[p] = pv;
1936 return 0;
1937 } /* lvm_do_pv_create() */
1938
1939
1940 /*
1941 * character device support function physical volume remove
1942 */
lvm_do_pv_remove(vg_t * vg_ptr,ulong p)1943 static int lvm_do_pv_remove(vg_t * vg_ptr, ulong p)
1944 {
1945 pv_t *pv = vg_ptr->pv[p];
1946
1947 lvm_fs_remove_pv(vg_ptr, pv);
1948
1949 vg_ptr->pe_total -= pv->pe_total;
1950 vg_ptr->pv_cur--;
1951 vg_ptr->pv_act--;
1952
1953 _close_pv(pv);
1954 kfree(pv);
1955
1956 vg_ptr->pv[p] = NULL;
1957
1958 return 0;
1959 }
1960
1961
__update_hardsectsize(lv_t * lv)1962 static void __update_hardsectsize(lv_t * lv)
1963 {
1964 int max_hardsectsize = 0, hardsectsize = 0;
1965 int p;
1966
1967 /* Check PVs first to see if they all have same sector size */
1968 for (p = 0; p < lv->vg->pv_cur; p++) {
1969 pv_t *pv = lv->vg->pv[p];
1970 if (pv && (hardsectsize = lvm_sectsize(pv->pv_dev))) {
1971 if (max_hardsectsize == 0)
1972 max_hardsectsize = hardsectsize;
1973 else if (hardsectsize != max_hardsectsize) {
1974 P_DEV
1975 ("%s PV[%d] (%s) sector size %d, not %d\n",
1976 lv->lv_name, p, kdevname(pv->pv_dev),
1977 hardsectsize, max_hardsectsize);
1978 break;
1979 }
1980 }
1981 }
1982
1983 /* PVs have different block size, need to check each LE sector size */
1984 if (hardsectsize != max_hardsectsize) {
1985 int le;
1986 for (le = 0; le < lv->lv_allocated_le; le++) {
1987 hardsectsize =
1988 lvm_sectsize(lv->lv_current_pe[le].dev);
1989 if (hardsectsize > max_hardsectsize) {
1990 P_DEV
1991 ("%s LE[%d] (%s) blocksize %d not %d\n",
1992 lv->lv_name, le,
1993 kdevname(lv->lv_current_pe[le].dev),
1994 hardsectsize, max_hardsectsize);
1995 max_hardsectsize = hardsectsize;
1996 }
1997 }
1998
1999 /* only perform this operation on active snapshots */
2000 if ((lv->lv_access & LV_SNAPSHOT) &&
2001 (lv->lv_status & LV_ACTIVE)) {
2002 int e;
2003 for (e = 0; e < lv->lv_remap_end; e++) {
2004 hardsectsize =
2005 lvm_sectsize(lv->lv_block_exception[e].
2006 rdev_new);
2007 if (hardsectsize > max_hardsectsize)
2008 max_hardsectsize = hardsectsize;
2009 }
2010 }
2011 }
2012
2013 if (max_hardsectsize == 0)
2014 max_hardsectsize = SECTOR_SIZE;
2015 P_DEV("hardblocksize for LV %s is %d\n",
2016 kdevname(lv->lv_dev), max_hardsectsize);
2017 lvm_hardsectsizes[MINOR(lv->lv_dev)] = max_hardsectsize;
2018 }
2019
2020 /*
2021 * character device support function logical volume create
2022 */
lvm_do_lv_create(int minor,char * lv_name,lv_t * lv)2023 static int lvm_do_lv_create(int minor, char *lv_name, lv_t * lv)
2024 {
2025 int e, ret, l, le, l_new, p, size, activate = 1;
2026 ulong lv_status_save;
2027 lv_block_exception_t *lvbe = lv->lv_block_exception;
2028 vg_t *vg_ptr = vg[VG_CHR(minor)];
2029 lv_t *lv_ptr = NULL;
2030 pe_t *pep;
2031
2032 if (!(pep = lv->lv_current_pe))
2033 return -EINVAL;
2034
2035 if (_sectors_to_k(lv->lv_chunk_size) > LVM_SNAPSHOT_MAX_CHUNK)
2036 return -EINVAL;
2037
2038 for (l = 0; l < vg_ptr->lv_cur; l++) {
2039 if (vg_ptr->lv[l] != NULL &&
2040 strcmp(vg_ptr->lv[l]->lv_name, lv_name) == 0)
2041 return -EEXIST;
2042 }
2043
2044 /* in case of lv_remove(), lv_create() pair */
2045 l_new = -1;
2046 if (vg_ptr->lv[lv->lv_number] == NULL)
2047 l_new = lv->lv_number;
2048 else {
2049 for (l = 0; l < vg_ptr->lv_max; l++) {
2050 if (vg_ptr->lv[l] == NULL)
2051 if (l_new == -1)
2052 l_new = l;
2053 }
2054 }
2055 if (l_new == -1)
2056 return -EPERM;
2057 else
2058 l = l_new;
2059
2060 if ((lv_ptr = kmalloc(sizeof(lv_t), GFP_KERNEL)) == NULL) {;
2061 printk(KERN_CRIT
2062 "%s -- LV_CREATE: kmalloc error LV at line %d\n",
2063 lvm_name, __LINE__);
2064 return -ENOMEM;
2065 }
2066 /* copy preloaded LV */
2067 memcpy((char *) lv_ptr, (char *) lv, sizeof(lv_t));
2068
2069 lv_status_save = lv_ptr->lv_status;
2070 lv_ptr->lv_status &= ~LV_ACTIVE;
2071 lv_ptr->lv_snapshot_org = NULL;
2072 lv_ptr->lv_snapshot_prev = NULL;
2073 lv_ptr->lv_snapshot_next = NULL;
2074 lv_ptr->lv_block_exception = NULL;
2075 lv_ptr->lv_iobuf = NULL;
2076 lv_ptr->lv_COW_table_iobuf = NULL;
2077 lv_ptr->lv_snapshot_hash_table = NULL;
2078 lv_ptr->lv_snapshot_hash_table_size = 0;
2079 lv_ptr->lv_snapshot_hash_mask = 0;
2080 init_rwsem(&lv_ptr->lv_lock);
2081
2082 lv_ptr->lv_snapshot_use_rate = 0;
2083
2084 vg_ptr->lv[l] = lv_ptr;
2085
2086 /* get the PE structures from user space if this
2087 is not a snapshot logical volume */
2088 if (!(lv_ptr->lv_access & LV_SNAPSHOT)) {
2089 size = lv_ptr->lv_allocated_le * sizeof(pe_t);
2090
2091 if ((lv_ptr->lv_current_pe = vmalloc(size)) == NULL) {
2092 printk(KERN_CRIT
2093 "%s -- LV_CREATE: vmalloc error LV_CURRENT_PE of %d Byte "
2094 "at line %d\n", lvm_name, size, __LINE__);
2095 P_KFREE("%s -- kfree %d\n", lvm_name, __LINE__);
2096 kfree(lv_ptr);
2097 vg_ptr->lv[l] = NULL;
2098 return -ENOMEM;
2099 }
2100 if (copy_from_user(lv_ptr->lv_current_pe, pep, size)) {
2101 P_IOCTL("ERROR: copying PE ptr %p (%d bytes)\n",
2102 pep, sizeof(size));
2103 vfree(lv_ptr->lv_current_pe);
2104 kfree(lv_ptr);
2105 vg_ptr->lv[l] = NULL;
2106 return -EFAULT;
2107 }
2108 /* correct the PE count in PVs */
2109 for (le = 0; le < lv_ptr->lv_allocated_le; le++) {
2110 vg_ptr->pe_allocated++;
2111 for (p = 0; p < vg_ptr->pv_cur; p++) {
2112 if (vg_ptr->pv[p]->pv_dev ==
2113 lv_ptr->lv_current_pe[le].dev)
2114 vg_ptr->pv[p]->pe_allocated++;
2115 }
2116 }
2117 } else {
2118 /* Get snapshot exception data and block list */
2119 if (lvbe != NULL) {
2120 lv_ptr->lv_snapshot_org =
2121 vg_ptr->lv[LV_BLK(lv_ptr->lv_snapshot_minor)];
2122 if (lv_ptr->lv_snapshot_org != NULL) {
2123 size =
2124 lv_ptr->lv_remap_end *
2125 sizeof(lv_block_exception_t);
2126
2127 if (!size) {
2128 printk(KERN_WARNING
2129 "%s -- zero length exception table requested\n",
2130 lvm_name);
2131 kfree(lv_ptr);
2132 return -EINVAL;
2133 }
2134
2135 if ((lv_ptr->lv_block_exception =
2136 vmalloc(size)) == NULL) {
2137 printk(KERN_CRIT
2138 "%s -- lvm_do_lv_create: vmalloc error LV_BLOCK_EXCEPTION "
2139 "of %d byte at line %d\n",
2140 lvm_name, size, __LINE__);
2141 P_KFREE("%s -- kfree %d\n",
2142 lvm_name, __LINE__);
2143 kfree(lv_ptr);
2144 vg_ptr->lv[l] = NULL;
2145 return -ENOMEM;
2146 }
2147 if (copy_from_user
2148 (lv_ptr->lv_block_exception, lvbe,
2149 size)) {
2150 vfree(lv_ptr->lv_block_exception);
2151 kfree(lv_ptr);
2152 vg_ptr->lv[l] = NULL;
2153 return -EFAULT;
2154 }
2155
2156 if (lv_ptr->lv_block_exception[0].
2157 rsector_org ==
2158 LVM_SNAPSHOT_DROPPED_SECTOR) {
2159 printk(KERN_WARNING
2160 "%s -- lvm_do_lv_create: snapshot has been dropped and will not be activated\n",
2161 lvm_name);
2162 activate = 0;
2163 }
2164
2165 /* point to the original logical volume */
2166 lv_ptr = lv_ptr->lv_snapshot_org;
2167
2168 lv_ptr->lv_snapshot_minor = 0;
2169 lv_ptr->lv_snapshot_org = lv_ptr;
2170 /* our new one now back points to the previous last in the chain
2171 which can be the original logical volume */
2172 lv_ptr = vg_ptr->lv[l];
2173 /* now lv_ptr points to our new last snapshot logical volume */
2174 lv_ptr->lv_current_pe =
2175 lv_ptr->lv_snapshot_org->lv_current_pe;
2176 lv_ptr->lv_allocated_snapshot_le =
2177 lv_ptr->lv_allocated_le;
2178 lv_ptr->lv_allocated_le =
2179 lv_ptr->lv_snapshot_org->
2180 lv_allocated_le;
2181 lv_ptr->lv_current_le =
2182 lv_ptr->lv_snapshot_org->lv_current_le;
2183 lv_ptr->lv_size =
2184 lv_ptr->lv_snapshot_org->lv_size;
2185 lv_ptr->lv_stripes =
2186 lv_ptr->lv_snapshot_org->lv_stripes;
2187 lv_ptr->lv_stripesize =
2188 lv_ptr->lv_snapshot_org->lv_stripesize;
2189
2190 if ((ret =
2191 lvm_snapshot_alloc(lv_ptr)) != 0) {
2192 vfree(lv_ptr->lv_block_exception);
2193 kfree(lv_ptr);
2194 vg_ptr->lv[l] = NULL;
2195 return ret;
2196 }
2197 /* Update the VG PE(s) used by snapshot reserve space. */
2198 vg_ptr->pe_allocated +=
2199 lv_ptr->lv_allocated_snapshot_le;
2200
2201 for (e = 0; e < lv_ptr->lv_remap_ptr; e++)
2202 lvm_hash_link(lv_ptr->
2203 lv_block_exception +
2204 e,
2205 lv_ptr->
2206 lv_block_exception
2207 [e].rdev_org,
2208 lv_ptr->
2209 lv_block_exception
2210 [e].rsector_org,
2211 lv_ptr);
2212 /* need to fill the COW exception table data
2213 into the page for disk i/o */
2214 if (lvm_snapshot_fill_COW_page
2215 (vg_ptr, lv_ptr)) {
2216 kfree(lv_ptr);
2217 vg_ptr->lv[l] = NULL;
2218 return -EINVAL;
2219 }
2220 init_waitqueue_head(&lv_ptr->
2221 lv_snapshot_wait);
2222 } else {
2223 kfree(lv_ptr);
2224 vg_ptr->lv[l] = NULL;
2225 return -EFAULT;
2226 }
2227 } else {
2228 kfree(vg_ptr->lv[l]);
2229 vg_ptr->lv[l] = NULL;
2230 return -EINVAL;
2231 }
2232 } /* if ( vg[VG_CHR(minor)]->lv[l]->lv_access & LV_SNAPSHOT) */
2233
2234 lv_ptr = vg_ptr->lv[l];
2235 lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].start_sect = 0;
2236 lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].nr_sects = lv_ptr->lv_size;
2237 lvm_size[MINOR(lv_ptr->lv_dev)] = lv_ptr->lv_size >> 1;
2238 vg_lv_map[MINOR(lv_ptr->lv_dev)].vg_number = vg_ptr->vg_number;
2239 vg_lv_map[MINOR(lv_ptr->lv_dev)].lv_number = lv_ptr->lv_number;
2240 LVM_CORRECT_READ_AHEAD(lv_ptr->lv_read_ahead);
2241 vg_ptr->lv_cur++;
2242 lv_ptr->lv_status = lv_status_save;
2243 lv_ptr->vg = vg_ptr;
2244
2245 __update_hardsectsize(lv_ptr);
2246
2247 /* optionally add our new snapshot LV */
2248 if (lv_ptr->lv_access & LV_SNAPSHOT) {
2249 lv_t *org = lv_ptr->lv_snapshot_org, *last;
2250
2251 /* sync the original logical volume */
2252 fsync_dev(org->lv_dev);
2253 #ifdef LVM_VFS_ENHANCEMENT
2254 /* VFS function call to sync and lock the filesystem */
2255 fsync_dev_lockfs(org->lv_dev);
2256 #endif
2257
2258 down_write(&org->lv_lock);
2259 org->lv_access |= LV_SNAPSHOT_ORG;
2260 lv_ptr->lv_access &= ~LV_SNAPSHOT_ORG; /* this can only hide an userspace bug */
2261
2262
2263 /* Link in the list of snapshot volumes */
2264 for (last = org; last->lv_snapshot_next;
2265 last = last->lv_snapshot_next);
2266 lv_ptr->lv_snapshot_prev = last;
2267 last->lv_snapshot_next = lv_ptr;
2268 up_write(&org->lv_lock);
2269 }
2270
2271 /* activate the logical volume */
2272 if (activate)
2273 lv_ptr->lv_status |= LV_ACTIVE;
2274 else
2275 lv_ptr->lv_status &= ~LV_ACTIVE;
2276
2277 if (lv_ptr->lv_access & LV_WRITE)
2278 set_device_ro(lv_ptr->lv_dev, 0);
2279 else
2280 set_device_ro(lv_ptr->lv_dev, 1);
2281
2282 #ifdef LVM_VFS_ENHANCEMENT
2283 /* VFS function call to unlock the filesystem */
2284 if (lv_ptr->lv_access & LV_SNAPSHOT)
2285 unlockfs(lv_ptr->lv_snapshot_org->lv_dev);
2286 #endif
2287
2288 lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].de =
2289 lvm_fs_create_lv(vg_ptr, lv_ptr);
2290 return 0;
2291 } /* lvm_do_lv_create() */
2292
2293
2294 /*
2295 * character device support function logical volume remove
2296 */
lvm_do_lv_remove(int minor,char * lv_name,int l)2297 static int lvm_do_lv_remove(int minor, char *lv_name, int l)
2298 {
2299 uint le, p;
2300 vg_t *vg_ptr = vg[VG_CHR(minor)];
2301 lv_t *lv_ptr;
2302
2303 if (!vg_ptr)
2304 return -ENXIO;
2305
2306 if (l == -1) {
2307 for (l = 0; l < vg_ptr->lv_max; l++) {
2308 if (vg_ptr->lv[l] != NULL &&
2309 strcmp(vg_ptr->lv[l]->lv_name, lv_name) == 0) {
2310 break;
2311 }
2312 }
2313 }
2314 if (l == vg_ptr->lv_max)
2315 return -ENXIO;
2316
2317 lv_ptr = vg_ptr->lv[l];
2318 #ifdef LVM_TOTAL_RESET
2319 if (lv_ptr->lv_open > 0 && lvm_reset_spindown == 0)
2320 #else
2321 if (lv_ptr->lv_open > 0)
2322 #endif
2323 return -EBUSY;
2324
2325 /* check for deletion of snapshot source while
2326 snapshot volume still exists */
2327 if ((lv_ptr->lv_access & LV_SNAPSHOT_ORG) &&
2328 lv_ptr->lv_snapshot_next != NULL)
2329 return -EPERM;
2330
2331 lvm_fs_remove_lv(vg_ptr, lv_ptr);
2332
2333 if (lv_ptr->lv_access & LV_SNAPSHOT) {
2334 /*
2335 * Atomically make the the snapshot invisible
2336 * to the original lv before playing with it.
2337 */
2338 lv_t *org = lv_ptr->lv_snapshot_org;
2339 down_write(&org->lv_lock);
2340
2341 /* remove this snapshot logical volume from the chain */
2342 lv_ptr->lv_snapshot_prev->lv_snapshot_next =
2343 lv_ptr->lv_snapshot_next;
2344 if (lv_ptr->lv_snapshot_next != NULL) {
2345 lv_ptr->lv_snapshot_next->lv_snapshot_prev =
2346 lv_ptr->lv_snapshot_prev;
2347 }
2348
2349 /* no more snapshots? */
2350 if (!org->lv_snapshot_next) {
2351 org->lv_access &= ~LV_SNAPSHOT_ORG;
2352 }
2353 up_write(&org->lv_lock);
2354
2355 lvm_snapshot_release(lv_ptr);
2356
2357 /* Update the VG PE(s) used by snapshot reserve space. */
2358 vg_ptr->pe_allocated -= lv_ptr->lv_allocated_snapshot_le;
2359 }
2360
2361 lv_ptr->lv_status |= LV_SPINDOWN;
2362
2363 /* sync the buffers */
2364 fsync_dev(lv_ptr->lv_dev);
2365
2366 lv_ptr->lv_status &= ~LV_ACTIVE;
2367
2368 /* invalidate the buffers */
2369 invalidate_buffers(lv_ptr->lv_dev);
2370
2371 /* reset generic hd */
2372 lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].start_sect = -1;
2373 lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].nr_sects = 0;
2374 lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].de = 0;
2375 lvm_size[MINOR(lv_ptr->lv_dev)] = 0;
2376
2377 /* reset VG/LV mapping */
2378 vg_lv_map[MINOR(lv_ptr->lv_dev)].vg_number = ABS_MAX_VG;
2379 vg_lv_map[MINOR(lv_ptr->lv_dev)].lv_number = -1;
2380
2381 /* correct the PE count in PVs if this is not a snapshot
2382 logical volume */
2383 if (!(lv_ptr->lv_access & LV_SNAPSHOT)) {
2384 /* only if this is no snapshot logical volume because
2385 we share the lv_current_pe[] structs with the
2386 original logical volume */
2387 for (le = 0; le < lv_ptr->lv_allocated_le; le++) {
2388 vg_ptr->pe_allocated--;
2389 for (p = 0; p < vg_ptr->pv_cur; p++) {
2390 if (vg_ptr->pv[p]->pv_dev ==
2391 lv_ptr->lv_current_pe[le].dev)
2392 vg_ptr->pv[p]->pe_allocated--;
2393 }
2394 }
2395 vfree(lv_ptr->lv_current_pe);
2396 }
2397
2398 P_KFREE("%s -- kfree %d\n", lvm_name, __LINE__);
2399 kfree(lv_ptr);
2400 vg_ptr->lv[l] = NULL;
2401 vg_ptr->lv_cur--;
2402 return 0;
2403 } /* lvm_do_lv_remove() */
2404
2405
2406 /*
2407 * logical volume extend / reduce
2408 */
__extend_reduce_snapshot(vg_t * vg_ptr,lv_t * old_lv,lv_t * new_lv)2409 static int __extend_reduce_snapshot(vg_t * vg_ptr, lv_t * old_lv,
2410 lv_t * new_lv)
2411 {
2412 ulong size;
2413 lv_block_exception_t *lvbe;
2414
2415 if (!new_lv->lv_block_exception)
2416 return -ENXIO;
2417
2418 size = new_lv->lv_remap_end * sizeof(lv_block_exception_t);
2419 if ((lvbe = vmalloc(size)) == NULL) {
2420 printk(KERN_CRIT
2421 "%s -- lvm_do_lv_extend_reduce: vmalloc "
2422 "error LV_BLOCK_EXCEPTION of %lu Byte at line %d\n",
2423 lvm_name, size, __LINE__);
2424 return -ENOMEM;
2425 }
2426
2427 if ((new_lv->lv_remap_end > old_lv->lv_remap_end) &&
2428 (copy_from_user(lvbe, new_lv->lv_block_exception, size))) {
2429 vfree(lvbe);
2430 return -EFAULT;
2431 }
2432 new_lv->lv_block_exception = lvbe;
2433
2434 if (lvm_snapshot_alloc_hash_table(new_lv)) {
2435 vfree(new_lv->lv_block_exception);
2436 return -ENOMEM;
2437 }
2438
2439 return 0;
2440 }
2441
__extend_reduce(vg_t * vg_ptr,lv_t * old_lv,lv_t * new_lv)2442 static int __extend_reduce(vg_t * vg_ptr, lv_t * old_lv, lv_t * new_lv)
2443 {
2444 ulong size, l, p, end;
2445 pe_t *pe;
2446
2447 /* allocate space for new pe structures */
2448 size = new_lv->lv_current_le * sizeof(pe_t);
2449 if ((pe = vmalloc(size)) == NULL) {
2450 printk(KERN_CRIT
2451 "%s -- lvm_do_lv_extend_reduce: "
2452 "vmalloc error LV_CURRENT_PE of %lu Byte at line %d\n",
2453 lvm_name, size, __LINE__);
2454 return -ENOMEM;
2455 }
2456
2457 /* get the PE structures from user space */
2458 if (copy_from_user(pe, new_lv->lv_current_pe, size)) {
2459 if (old_lv->lv_access & LV_SNAPSHOT)
2460 vfree(new_lv->lv_snapshot_hash_table);
2461 vfree(pe);
2462 return -EFAULT;
2463 }
2464
2465 new_lv->lv_current_pe = pe;
2466
2467 /* reduce allocation counters on PV(s) */
2468 for (l = 0; l < old_lv->lv_allocated_le; l++) {
2469 vg_ptr->pe_allocated--;
2470 for (p = 0; p < vg_ptr->pv_cur; p++) {
2471 if (vg_ptr->pv[p]->pv_dev ==
2472 old_lv->lv_current_pe[l].dev) {
2473 vg_ptr->pv[p]->pe_allocated--;
2474 break;
2475 }
2476 }
2477 }
2478
2479 /* extend the PE count in PVs */
2480 for (l = 0; l < new_lv->lv_allocated_le; l++) {
2481 vg_ptr->pe_allocated++;
2482 for (p = 0; p < vg_ptr->pv_cur; p++) {
2483 if (vg_ptr->pv[p]->pv_dev ==
2484 new_lv->lv_current_pe[l].dev) {
2485 vg_ptr->pv[p]->pe_allocated++;
2486 break;
2487 }
2488 }
2489 }
2490
2491 /* save available i/o statistic data */
2492 if (old_lv->lv_stripes < 2) { /* linear logical volume */
2493 end = min(old_lv->lv_current_le, new_lv->lv_current_le);
2494 for (l = 0; l < end; l++) {
2495 new_lv->lv_current_pe[l].reads +=
2496 old_lv->lv_current_pe[l].reads;
2497
2498 new_lv->lv_current_pe[l].writes +=
2499 old_lv->lv_current_pe[l].writes;
2500 }
2501
2502 } else { /* striped logical volume */
2503 uint i, j, source, dest, end, old_stripe_size,
2504 new_stripe_size;
2505
2506 old_stripe_size =
2507 old_lv->lv_allocated_le / old_lv->lv_stripes;
2508 new_stripe_size =
2509 new_lv->lv_allocated_le / new_lv->lv_stripes;
2510 end = min(old_stripe_size, new_stripe_size);
2511
2512 for (i = source = dest = 0; i < new_lv->lv_stripes; i++) {
2513 for (j = 0; j < end; j++) {
2514 new_lv->lv_current_pe[dest + j].reads +=
2515 old_lv->lv_current_pe[source +
2516 j].reads;
2517 new_lv->lv_current_pe[dest + j].writes +=
2518 old_lv->lv_current_pe[source +
2519 j].writes;
2520 }
2521 source += old_stripe_size;
2522 dest += new_stripe_size;
2523 }
2524 }
2525
2526 return 0;
2527 }
2528
lvm_do_lv_extend_reduce(int minor,char * lv_name,lv_t * new_lv)2529 static int lvm_do_lv_extend_reduce(int minor, char *lv_name, lv_t * new_lv)
2530 {
2531 int r;
2532 ulong l, e, size;
2533 vg_t *vg_ptr = vg[VG_CHR(minor)];
2534 lv_t *old_lv;
2535 pe_t *pe;
2536
2537 if (!vg_ptr)
2538 return -ENXIO;
2539
2540 if ((pe = new_lv->lv_current_pe) == NULL)
2541 return -EINVAL;
2542
2543 for (l = 0; l < vg_ptr->lv_max; l++)
2544 if (vg_ptr->lv[l]
2545 && !strcmp(vg_ptr->lv[l]->lv_name, lv_name))
2546 break;
2547
2548 if (l == vg_ptr->lv_max)
2549 return -ENXIO;
2550
2551 old_lv = vg_ptr->lv[l];
2552
2553 if (old_lv->lv_access & LV_SNAPSHOT) {
2554 /* only perform this operation on active snapshots */
2555 if (old_lv->lv_status & LV_ACTIVE)
2556 r = __extend_reduce_snapshot(vg_ptr, old_lv,
2557 new_lv);
2558 else
2559 r = -EPERM;
2560
2561 } else
2562 r = __extend_reduce(vg_ptr, old_lv, new_lv);
2563
2564 if (r)
2565 return r;
2566
2567 /* copy relevant fields */
2568 down_write(&old_lv->lv_lock);
2569
2570 if (new_lv->lv_access & LV_SNAPSHOT) {
2571 size = (new_lv->lv_remap_end > old_lv->lv_remap_end) ?
2572 old_lv->lv_remap_ptr : new_lv->lv_remap_end;
2573 size *= sizeof(lv_block_exception_t);
2574 memcpy(new_lv->lv_block_exception,
2575 old_lv->lv_block_exception, size);
2576 vfree(old_lv->lv_block_exception);
2577 vfree(old_lv->lv_snapshot_hash_table);
2578
2579 old_lv->lv_remap_end = new_lv->lv_remap_end;
2580 old_lv->lv_block_exception = new_lv->lv_block_exception;
2581 old_lv->lv_snapshot_hash_table =
2582 new_lv->lv_snapshot_hash_table;
2583 old_lv->lv_snapshot_hash_table_size =
2584 new_lv->lv_snapshot_hash_table_size;
2585 old_lv->lv_snapshot_hash_mask =
2586 new_lv->lv_snapshot_hash_mask;
2587
2588 for (e = 0; e < old_lv->lv_remap_ptr; e++)
2589 lvm_hash_link(new_lv->lv_block_exception + e,
2590 new_lv->lv_block_exception[e].
2591 rdev_org,
2592 new_lv->lv_block_exception[e].
2593 rsector_org, new_lv);
2594
2595 vg_ptr->pe_allocated -= old_lv->lv_allocated_snapshot_le;
2596 vg_ptr->pe_allocated += new_lv->lv_allocated_le;
2597 old_lv->lv_allocated_snapshot_le = new_lv->lv_allocated_le;
2598 } else {
2599 vfree(old_lv->lv_current_pe);
2600 vfree(old_lv->lv_snapshot_hash_table);
2601
2602 old_lv->lv_size = new_lv->lv_size;
2603 old_lv->lv_allocated_le = new_lv->lv_allocated_le;
2604 old_lv->lv_current_le = new_lv->lv_current_le;
2605 old_lv->lv_current_pe = new_lv->lv_current_pe;
2606 lvm_gendisk.part[MINOR(old_lv->lv_dev)].nr_sects =
2607 old_lv->lv_size;
2608 lvm_size[MINOR(old_lv->lv_dev)] = old_lv->lv_size >> 1;
2609
2610 if (old_lv->lv_access & LV_SNAPSHOT_ORG) {
2611 lv_t *snap;
2612 for (snap = old_lv->lv_snapshot_next; snap;
2613 snap = snap->lv_snapshot_next) {
2614 down_write(&snap->lv_lock);
2615 snap->lv_current_pe =
2616 old_lv->lv_current_pe;
2617 snap->lv_allocated_le =
2618 old_lv->lv_allocated_le;
2619 snap->lv_current_le =
2620 old_lv->lv_current_le;
2621 snap->lv_size = old_lv->lv_size;
2622
2623 lvm_gendisk.part[MINOR(snap->lv_dev)].
2624 nr_sects = old_lv->lv_size;
2625 lvm_size[MINOR(snap->lv_dev)] =
2626 old_lv->lv_size >> 1;
2627 __update_hardsectsize(snap);
2628 up_write(&snap->lv_lock);
2629 }
2630 }
2631 }
2632
2633 __update_hardsectsize(old_lv);
2634 up_write(&old_lv->lv_lock);
2635
2636 return 0;
2637 } /* lvm_do_lv_extend_reduce() */
2638
2639
2640 /*
2641 * character device support function logical volume status by name
2642 */
lvm_do_lv_status_byname(vg_t * vg_ptr,void * arg)2643 static int lvm_do_lv_status_byname(vg_t * vg_ptr, void *arg)
2644 {
2645 uint l;
2646 lv_status_byname_req_t lv_status_byname_req;
2647 void *saved_ptr1;
2648 void *saved_ptr2;
2649 lv_t *lv_ptr;
2650
2651 if (vg_ptr == NULL)
2652 return -ENXIO;
2653 if (copy_from_user(&lv_status_byname_req, arg,
2654 sizeof(lv_status_byname_req_t)) != 0)
2655 return -EFAULT;
2656
2657 if (lv_status_byname_req.lv == NULL)
2658 return -EINVAL;
2659
2660 for (l = 0; l < vg_ptr->lv_max; l++) {
2661 if ((lv_ptr = vg_ptr->lv[l]) != NULL &&
2662 strcmp(lv_ptr->lv_name,
2663 lv_status_byname_req.lv_name) == 0) {
2664 /* Save usermode pointers */
2665 if (copy_from_user
2666 (&saved_ptr1,
2667 &lv_status_byname_req.lv->lv_current_pe,
2668 sizeof(void *)) != 0)
2669 return -EFAULT;
2670 if (copy_from_user
2671 (&saved_ptr2,
2672 &lv_status_byname_req.lv->lv_block_exception,
2673 sizeof(void *)) != 0)
2674 return -EFAULT;
2675 if (copy_to_user(lv_status_byname_req.lv,
2676 lv_ptr, sizeof(lv_t)) != 0)
2677 return -EFAULT;
2678 if (saved_ptr1 != NULL) {
2679 if (copy_to_user(saved_ptr1,
2680 lv_ptr->lv_current_pe,
2681 lv_ptr->lv_allocated_le *
2682 sizeof(pe_t)) != 0)
2683 return -EFAULT;
2684 }
2685 /* Restore usermode pointers */
2686 if (copy_to_user
2687 (&lv_status_byname_req.lv->lv_current_pe,
2688 &saved_ptr1, sizeof(void *)) != 0)
2689 return -EFAULT;
2690 return 0;
2691 }
2692 }
2693 return -ENXIO;
2694 } /* lvm_do_lv_status_byname() */
2695
2696
2697 /*
2698 * character device support function logical volume status by index
2699 */
lvm_do_lv_status_byindex(vg_t * vg_ptr,void * arg)2700 static int lvm_do_lv_status_byindex(vg_t * vg_ptr, void *arg)
2701 {
2702 lv_status_byindex_req_t lv_status_byindex_req;
2703 void *saved_ptr1;
2704 void *saved_ptr2;
2705 lv_t *lv_ptr;
2706
2707 if (vg_ptr == NULL)
2708 return -ENXIO;
2709 if (copy_from_user(&lv_status_byindex_req, arg,
2710 sizeof(lv_status_byindex_req)) != 0)
2711 return -EFAULT;
2712
2713 if (lv_status_byindex_req.lv == NULL)
2714 return -EINVAL;
2715 if ((lv_ptr = vg_ptr->lv[lv_status_byindex_req.lv_index]) == NULL)
2716 return -ENXIO;
2717
2718 /* Save usermode pointers */
2719 if (copy_from_user
2720 (&saved_ptr1, &lv_status_byindex_req.lv->lv_current_pe,
2721 sizeof(void *)) != 0)
2722 return -EFAULT;
2723 if (copy_from_user
2724 (&saved_ptr2, &lv_status_byindex_req.lv->lv_block_exception,
2725 sizeof(void *)) != 0)
2726 return -EFAULT;
2727
2728 if (copy_to_user(lv_status_byindex_req.lv, lv_ptr, sizeof(lv_t)) !=
2729 0)
2730 return -EFAULT;
2731 if (saved_ptr1 != NULL) {
2732 if (copy_to_user(saved_ptr1,
2733 lv_ptr->lv_current_pe,
2734 lv_ptr->lv_allocated_le *
2735 sizeof(pe_t)) != 0)
2736 return -EFAULT;
2737 }
2738
2739 /* Restore usermode pointers */
2740 if (copy_to_user
2741 (&lv_status_byindex_req.lv->lv_current_pe, &saved_ptr1,
2742 sizeof(void *)) != 0)
2743 return -EFAULT;
2744
2745 return 0;
2746 } /* lvm_do_lv_status_byindex() */
2747
2748
2749 /*
2750 * character device support function logical volume status by device number
2751 */
lvm_do_lv_status_bydev(vg_t * vg_ptr,void * arg)2752 static int lvm_do_lv_status_bydev(vg_t * vg_ptr, void *arg)
2753 {
2754 int l;
2755 lv_status_bydev_req_t lv_status_bydev_req;
2756 void *saved_ptr1;
2757 void *saved_ptr2;
2758 lv_t *lv_ptr;
2759
2760 if (vg_ptr == NULL)
2761 return -ENXIO;
2762 if (copy_from_user(&lv_status_bydev_req, arg,
2763 sizeof(lv_status_bydev_req)) != 0)
2764 return -EFAULT;
2765
2766 for (l = 0; l < vg_ptr->lv_max; l++) {
2767 if (vg_ptr->lv[l] == NULL)
2768 continue;
2769 if (vg_ptr->lv[l]->lv_dev == lv_status_bydev_req.dev)
2770 break;
2771 }
2772
2773 if (l == vg_ptr->lv_max)
2774 return -ENXIO;
2775 lv_ptr = vg_ptr->lv[l];
2776
2777 /* Save usermode pointers */
2778 if (copy_from_user
2779 (&saved_ptr1, &lv_status_bydev_req.lv->lv_current_pe,
2780 sizeof(void *)) != 0)
2781 return -EFAULT;
2782 if (copy_from_user
2783 (&saved_ptr2, &lv_status_bydev_req.lv->lv_block_exception,
2784 sizeof(void *)) != 0)
2785 return -EFAULT;
2786
2787 if (copy_to_user(lv_status_bydev_req.lv, lv_ptr, sizeof(lv_t)) !=
2788 0)
2789 return -EFAULT;
2790 if (saved_ptr1 != NULL) {
2791 if (copy_to_user(saved_ptr1,
2792 lv_ptr->lv_current_pe,
2793 lv_ptr->lv_allocated_le *
2794 sizeof(pe_t)) != 0)
2795 return -EFAULT;
2796 }
2797 /* Restore usermode pointers */
2798 if (copy_to_user
2799 (&lv_status_bydev_req.lv->lv_current_pe, &saved_ptr1,
2800 sizeof(void *)) != 0)
2801 return -EFAULT;
2802
2803 return 0;
2804 } /* lvm_do_lv_status_bydev() */
2805
2806
2807 /*
2808 * character device support function rename a logical volume
2809 */
lvm_do_lv_rename(vg_t * vg_ptr,lv_req_t * lv_req,lv_t * lv)2810 static int lvm_do_lv_rename(vg_t * vg_ptr, lv_req_t * lv_req, lv_t * lv)
2811 {
2812 int l = 0;
2813 int ret = 0;
2814 lv_t *lv_ptr = NULL;
2815
2816 if (!vg_ptr)
2817 return -ENXIO;
2818
2819 for (l = 0; l < vg_ptr->lv_max; l++) {
2820 if ((lv_ptr = vg_ptr->lv[l]) == NULL)
2821 continue;
2822 if (lv_ptr->lv_dev == lv->lv_dev) {
2823 lvm_fs_remove_lv(vg_ptr, lv_ptr);
2824 strncpy(lv_ptr->lv_name, lv_req->lv_name,
2825 NAME_LEN);
2826 lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].de =
2827 lvm_fs_create_lv(vg_ptr, lv_ptr);
2828 break;
2829 }
2830 }
2831 if (l == vg_ptr->lv_max)
2832 ret = -ENODEV;
2833
2834 return ret;
2835 } /* lvm_do_lv_rename */
2836
2837
2838 /*
2839 * character device support function physical volume change
2840 */
lvm_do_pv_change(vg_t * vg_ptr,void * arg)2841 static int lvm_do_pv_change(vg_t * vg_ptr, void *arg)
2842 {
2843 uint p;
2844 pv_t *pv_ptr;
2845 struct block_device *bd;
2846
2847 if (vg_ptr == NULL)
2848 return -ENXIO;
2849 if (copy_from_user(&pv_change_req, arg,
2850 sizeof(pv_change_req)) != 0)
2851 return -EFAULT;
2852
2853 for (p = 0; p < vg_ptr->pv_max; p++) {
2854 pv_ptr = vg_ptr->pv[p];
2855 if (pv_ptr != NULL &&
2856 strcmp(pv_ptr->pv_name, pv_change_req.pv_name) == 0) {
2857
2858 bd = pv_ptr->bd;
2859 if (copy_from_user(pv_ptr,
2860 pv_change_req.pv,
2861 sizeof(pv_t)) != 0)
2862 return -EFAULT;
2863 pv_ptr->bd = bd;
2864
2865 /* We don't need the PE list
2866 in kernel space as with LVs pe_t list */
2867 pv_ptr->pe = NULL;
2868 return 0;
2869 }
2870 }
2871 return -ENXIO;
2872 } /* lvm_do_pv_change() */
2873
2874 /*
2875 * character device support function get physical volume status
2876 */
lvm_do_pv_status(vg_t * vg_ptr,void * arg)2877 static int lvm_do_pv_status(vg_t * vg_ptr, void *arg)
2878 {
2879 uint p;
2880 pv_t *pv_ptr;
2881
2882 if (vg_ptr == NULL)
2883 return -ENXIO;
2884 if (copy_from_user(&pv_status_req, arg,
2885 sizeof(pv_status_req)) != 0)
2886 return -EFAULT;
2887
2888 for (p = 0; p < vg_ptr->pv_max; p++) {
2889 pv_ptr = vg_ptr->pv[p];
2890 if (pv_ptr != NULL &&
2891 strcmp(pv_ptr->pv_name, pv_status_req.pv_name) == 0) {
2892 if (copy_to_user(pv_status_req.pv,
2893 pv_ptr, sizeof(pv_t)) != 0)
2894 return -EFAULT;
2895 return 0;
2896 }
2897 }
2898 return -ENXIO;
2899 } /* lvm_do_pv_status() */
2900
2901
2902 /*
2903 * character device support function flush and invalidate all buffers of a PV
2904 */
lvm_do_pv_flush(void * arg)2905 static int lvm_do_pv_flush(void *arg)
2906 {
2907 pv_flush_req_t pv_flush_req;
2908
2909 if (copy_from_user(&pv_flush_req, arg, sizeof(pv_flush_req)) != 0)
2910 return -EFAULT;
2911
2912 fsync_dev(pv_flush_req.pv_dev);
2913 invalidate_buffers(pv_flush_req.pv_dev);
2914
2915 return 0;
2916 }
2917
2918
2919 /*
2920 * support function initialize gendisk variables
2921 */
lvm_geninit(struct gendisk * lvm_gdisk)2922 static void __init lvm_geninit(struct gendisk *lvm_gdisk)
2923 {
2924 int i = 0;
2925
2926 #ifdef DEBUG_GENDISK
2927 printk(KERN_DEBUG "%s -- lvm_gendisk\n", lvm_name);
2928 #endif
2929
2930 for (i = 0; i < MAX_LV; i++) {
2931 lvm_gendisk.part[i].start_sect = -1; /* avoid partition check */
2932 lvm_size[i] = lvm_gendisk.part[i].nr_sects = 0;
2933 lvm_blocksizes[i] = BLOCK_SIZE;
2934 }
2935
2936 blk_size[MAJOR_NR] = lvm_size;
2937 blksize_size[MAJOR_NR] = lvm_blocksizes;
2938 hardsect_size[MAJOR_NR] = lvm_hardsectsizes;
2939
2940 return;
2941 } /* lvm_gen_init() */
2942
2943
2944
2945 /* Must have down_write(_pe_lock) when we enqueue buffers */
_queue_io(struct buffer_head * bh,int rw)2946 static void _queue_io(struct buffer_head *bh, int rw)
2947 {
2948 if (bh->b_reqnext)
2949 BUG();
2950 bh->b_reqnext = _pe_requests;
2951 _pe_requests = bh;
2952 }
2953
2954 /* Must have down_write(_pe_lock) when we dequeue buffers */
_dequeue_io(void)2955 static struct buffer_head *_dequeue_io(void)
2956 {
2957 struct buffer_head *bh = _pe_requests;
2958 _pe_requests = NULL;
2959 return bh;
2960 }
2961
2962 /*
2963 * We do not need to hold _pe_lock to flush buffers. bh should be taken from
2964 * _pe_requests under down_write(_pe_lock), and then _pe_requests can be set
2965 * NULL and we drop _pe_lock. Any new buffers defered at this time will be
2966 * added to a new list, and the old buffers can have their I/O restarted
2967 * asynchronously.
2968 *
2969 * If, for some reason, the same PE is locked again before all of these writes
2970 * have finished, then these buffers will just be re-queued (i.e. no danger).
2971 */
_flush_io(struct buffer_head * bh)2972 static void _flush_io(struct buffer_head *bh)
2973 {
2974 while (bh) {
2975 struct buffer_head *next = bh->b_reqnext;
2976 bh->b_reqnext = NULL;
2977 /* resubmit this buffer head */
2978 generic_make_request(WRITE, bh);
2979 bh = next;
2980 }
2981 }
2982
2983
2984 /*
2985 * we must open the pv's before we use them
2986 */
_open_pv(pv_t * pv)2987 static int _open_pv(pv_t * pv)
2988 {
2989 int err;
2990 struct block_device *bd;
2991
2992 if (!(bd = bdget(kdev_t_to_nr(pv->pv_dev))))
2993 return -ENOMEM;
2994
2995 err = blkdev_get(bd, FMODE_READ | FMODE_WRITE, 0, BDEV_FILE);
2996 if (err)
2997 return err;
2998
2999 pv->bd = bd;
3000 return 0;
3001 }
3002
_close_pv(pv_t * pv)3003 static void _close_pv(pv_t * pv)
3004 {
3005 if (pv) {
3006 struct block_device *bdev = pv->bd;
3007 pv->bd = NULL;
3008 if (bdev)
3009 blkdev_put(bdev, BDEV_FILE);
3010 }
3011 }
3012
3013
_sectors_to_k(unsigned long sect)3014 static unsigned long _sectors_to_k(unsigned long sect)
3015 {
3016 if (SECTOR_SIZE > 1024) {
3017 return sect * (SECTOR_SIZE / 1024);
3018 }
3019
3020 return sect / (1024 / SECTOR_SIZE);
3021 }
3022
3023 MODULE_AUTHOR("Heinz Mauelshagen, Sistina Software");
3024 MODULE_DESCRIPTION("Logical Volume Manager");
3025 #ifdef MODULE_LICENSE
3026 MODULE_LICENSE("GPL");
3027 #endif
3028
3029 module_init(lvm_init);
3030 module_exit(lvm_cleanup);
3031