1 /*
2  * kernel/lvm.c
3  *
4  * Copyright (C) 1997 - 2002  Heinz Mauelshagen, Sistina Software
5  *
6  * February-November 1997
7  * April-May,July-August,November 1998
8  * January-March,May,July,September,October 1999
9  * January,February,July,September-November 2000
10  * January-May,June,October 2001
11  * May-August 2002
12  * February 2003
13  *
14  *
15  * LVM driver is free software; you can redistribute it and/or modify
16  * it under the terms of the GNU General Public License as published by
17  * the Free Software Foundation; either version 2, or (at your option)
18  * any later version.
19  *
20  * LVM driver is distributed in the hope that it will be useful,
21  * but WITHOUT ANY WARRANTY; without even the implied warranty of
22  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23  * GNU General Public License for more details.
24  *
25  * You should have received a copy of the GNU General Public License
26  * along with GNU CC; see the file COPYING.  If not, write to
27  * the Free Software Foundation, 59 Temple Place - Suite 330,
28  * Boston, MA 02111-1307, USA.
29  *
30  */
31 
32 /*
33  * Changelog
34  *
35  *    09/11/1997 - added chr ioctls VG_STATUS_GET_COUNT
36  *                 and VG_STATUS_GET_NAMELIST
37  *    18/01/1998 - change lvm_chr_open/close lock handling
38  *    30/04/1998 - changed LV_STATUS ioctl to LV_STATUS_BYNAME and
39  *               - added   LV_STATUS_BYINDEX ioctl
40  *               - used lvm_status_byname_req_t and
41  *                      lvm_status_byindex_req_t vars
42  *    04/05/1998 - added multiple device support
43  *    08/05/1998 - added support to set/clear extendable flag in volume group
44  *    09/05/1998 - changed output of lvm_proc_get_global_info() because of
45  *                 support for free (eg. longer) logical volume names
46  *    12/05/1998 - added spin_locks (thanks to Pascal van Dam
47  *                 <pascal@ramoth.xs4all.nl>)
48  *    25/05/1998 - fixed handling of locked PEs in lvm_map() and
49  *                 lvm_chr_ioctl()
50  *    26/05/1998 - reactivated verify_area by access_ok
51  *    07/06/1998 - used vmalloc/vfree instead of kmalloc/kfree to go
52  *                 beyond 128/256 KB max allocation limit per call
53  *               - #ifdef blocked spin_lock calls to avoid compile errors
54  *                 with 2.0.x
55  *    11/06/1998 - another enhancement to spinlock code in lvm_chr_open()
56  *                 and use of LVM_VERSION_CODE instead of my own macros
57  *                 (thanks to  Michael Marxmeier <mike@msede.com>)
58  *    07/07/1998 - added statistics in lvm_map()
59  *    08/07/1998 - saved statistics in lvm_do_lv_extend_reduce()
60  *    25/07/1998 - used __initfunc macro
61  *    02/08/1998 - changes for official char/block major numbers
62  *    07/08/1998 - avoided init_module() and cleanup_module() to be static
63  *    30/08/1998 - changed VG lv_open counter from sum of LV lv_open counters
64  *                 to sum of LVs open (no matter how often each is)
65  *    01/09/1998 - fixed lvm_gendisk.part[] index error
66  *    07/09/1998 - added copying of lv_current_pe-array
67  *                 in LV_STATUS_BYINDEX ioctl
68  *    17/11/1998 - added KERN_* levels to printk
69  *    13/01/1999 - fixed LV index bug in lvm_do_lv_create() which hit lvrename
70  *    07/02/1999 - fixed spinlock handling bug in case of LVM_RESET
71  *                 by moving spinlock code from lvm_chr_open()
72  *                 to lvm_chr_ioctl()
73  *               - added LVM_LOCK_LVM ioctl to lvm_chr_ioctl()
74  *               - allowed LVM_RESET and retrieval commands to go ahead;
75  *                 only other update ioctls are blocked now
76  *               - fixed pv->pe to NULL for pv_status
77  *               - using lv_req structure in lvm_chr_ioctl() now
78  *               - fixed NULL ptr reference bug in lvm_do_lv_extend_reduce()
79  *                 caused by uncontiguous PV array in lvm_chr_ioctl(VG_REDUCE)
80  *    09/02/1999 - changed BLKRASET and BLKRAGET in lvm_chr_ioctl() to
81  *                 handle lgoical volume private read ahead sector
82  *               - implemented LV read_ahead handling with lvm_blk_read()
83  *                 and lvm_blk_write()
84  *    10/02/1999 - implemented 2.[12].* support function lvm_hd_name()
85  *                 to be used in drivers/block/genhd.c by disk_name()
86  *    12/02/1999 - fixed index bug in lvm_blk_ioctl(), HDIO_GETGEO
87  *               - enhanced gendisk insert/remove handling
88  *    16/02/1999 - changed to dynamic block minor number allocation to
89  *                 have as much as 99 volume groups with 256 logical volumes
90  *                 as the grand total; this allows having 1 volume group with
91  *                 up to 256 logical volumes in it
92  *    21/02/1999 - added LV open count information to proc filesystem
93  *               - substituted redundant LVM_RESET code by calls
94  *                 to lvm_do_vg_remove()
95  *    22/02/1999 - used schedule_timeout() to be more responsive
96  *                 in case of lvm_do_vg_remove() with lots of logical volumes
97  *    19/03/1999 - fixed NULL pointer bug in module_init/lvm_init
98  *    17/05/1999 - used DECLARE_WAIT_QUEUE_HEAD macro (>2.3.0)
99  *               - enhanced lvm_hd_name support
100  *    03/07/1999 - avoided use of KERNEL_VERSION macro based ifdefs and
101  *                 memcpy_tofs/memcpy_fromfs macro redefinitions
102  *    06/07/1999 - corrected reads/writes statistic counter copy in case
103  *                 of striped logical volume
104  *    28/07/1999 - implemented snapshot logical volumes
105  *                 - lvm_chr_ioctl
106  *                   - LV_STATUS_BYINDEX
107  *                   - LV_STATUS_BYNAME
108  *                 - lvm_do_lv_create
109  *                 - lvm_do_lv_remove
110  *                 - lvm_map
111  *                 - new lvm_snapshot_remap_block
112  *                 - new lvm_snapshot_remap_new_block
113  *    08/10/1999 - implemented support for multiple snapshots per
114  *                 original logical volume
115  *    12/10/1999 - support for 2.3.19
116  *    11/11/1999 - support for 2.3.28
117  *    21/11/1999 - changed lvm_map() interface to buffer_head based
118  *    19/12/1999 - support for 2.3.33
119  *    01/01/2000 - changed locking concept in lvm_map(),
120  *                 lvm_do_vg_create() and lvm_do_lv_remove()
121  *    15/01/2000 - fixed PV_FLUSH bug in lvm_chr_ioctl()
122  *    24/01/2000 - ported to 2.3.40 including Alan Cox's pointer changes etc.
123  *    29/01/2000 - used kmalloc/kfree again for all small structures
124  *    20/01/2000 - cleaned up lvm_chr_ioctl by moving code
125  *                 to seperated functions
126  *               - avoided "/dev/" in proc filesystem output
127  *               - avoided inline strings functions lvm_strlen etc.
128  *    14/02/2000 - support for 2.3.43
129  *               - integrated Andrea Arcagneli's snapshot code
130  *    25/06/2000 - james (chip) , IKKHAYD! roffl
131  *    26/06/2000 - enhanced lv_extend_reduce for snapshot logical volume
132  *                 support
133  *    06/09/2000 - added devfs support
134  *    07/09/2000 - changed IOP version to 9
135  *               - started to add new char ioctl LV_STATUS_BYDEV_T to support
136  *                 getting an lv_t based on the dev_t of the Logical Volume
137  *    14/09/2000 - enhanced lvm_do_lv_create to upcall VFS functions
138  *                 to sync and lock, activate snapshot and unlock the FS
139  *                 (to support journaled filesystems)
140  *    18/09/2000 - hardsector size support
141  *    27/09/2000 - implemented lvm_do_lv_rename() and lvm_do_vg_rename()
142  *    30/10/2000 - added Andi Kleen's LV_BMAP ioctl to support LILO
143  *    01/11/2000 - added memory information on hash tables to
144  *                 lvm_proc_get_global_info()
145  *    02/11/2000 - implemented /proc/lvm/ hierarchy
146  *    22/11/2000 - changed lvm_do_create_proc_entry_of_pv () to work
147  *                 with devfs
148  *    26/11/2000 - corrected #ifdef locations for PROC_FS
149  *    28/11/2000 - fixed lvm_do_vg_extend() NULL pointer BUG
150  *               - fixed lvm_do_create_proc_entry_of_pv() buffer tampering BUG
151  *    08/01/2001 - Removed conditional compiles related to PROC_FS,
152  *                 procfs is always supported now. (JT)
153  *    12/01/2001 - avoided flushing logical volume in case of shrinking
154  *                 because of unecessary overhead in case of heavy updates
155  *    25/01/2001 - Allow RO open of an inactive LV so it can be reactivated.
156  *    31/01/2001 - removed blk_init_queue/blk_cleanup_queue queueing will be
157  *                 handled by the proper devices.
158  *               - If you try and BMAP a snapshot you now get an -EPERM
159  *    01/01/2001 - lvm_map() now calls buffer_IO_error on error for 2.4
160  *               - factored __remap_snapshot out of lvm_map
161  *    12/02/2001 - move devfs code to create VG before LVs
162  *    13/02/2001 - allow VG_CREATE on /dev/lvm
163  *    14/02/2001 - removed modversions.h
164  *               - tidied device defines for blk.h
165  *               - tidied debug statements
166  *               - bug: vg[] member not set back to NULL if activation fails
167  *               - more lvm_map tidying
168  *    15/02/2001 - register /dev/lvm with devfs correctly (major/minor
169  *                 were swapped)
170  *    19/02/2001 - preallocated buffer_heads for rawio when using
171  *                 snapshots [JT]
172  *    28/02/2001 - introduced the P_DEV macro and changed some internel
173  *                 functions to be static [AD]
174  *    28/02/2001 - factored lvm_get_snapshot_use_rate out of blk_ioctl [AD]
175  *               - fixed user address accessing bug in lvm_do_lv_create()
176  *                 where the check for an existing LV takes place right at
177  *                 the beginning
178  *    01/03/2001 - Add VG_CREATE_OLD for IOP 10 compatibility
179  *    02/03/2001 - Don't destroy usermode pointers in lv_t structures duing
180  *                 LV_STATUS_BYxxx
181  *                 and remove redundant lv_t variables from same.
182  *               - avoid compilation of lvm_dummy_device_request in case of
183  *                 Linux >= 2.3.0 to avoid a warning
184  *               - added lvm_name argument to printk in buffer allocation
185  *                 in order to avoid a warning
186  *    04/03/2001 - moved linux/version.h above first use of KERNEL_VERSION
187  *                 macros
188  *    05/03/2001 - restore copying pe_t array in lvm_do_lv_status_byname. For
189  *                 lvdisplay -v (PC)
190  *               - restore copying pe_t array in lvm_do_lv_status_byindex (HM)
191  *               - added copying pe_t array in lvm_do_lv_status_bydev (HM)
192  *               - enhanced lvm_do_lv_status_by{name,index,dev} to be capable
193  *                 to copy the lv_block_exception_t array to userspace (HM)
194  *    08/03/2001 - initialize new lv_ptr->lv_COW_table_iobuf for snapshots;
195  *                 removed obsolete lv_ptr->lv_COW_table_page initialization
196  *               - factored lvm_do_pv_flush out of lvm_chr_ioctl (HM)
197  *    09/03/2001 - Added _lock_open_count to ensure we only drop the lock
198  *                 when the locking process closes.
199  *    05/04/2001 - Defer writes to an extent that is being moved [JT]
200  *    05/04/2001 - use b_rdev and b_rsector rather than b_dev and b_blocknr in
201  *                 lvm_map() in order to make stacking devices more happy (HM)
202  *    11/04/2001 - cleaned up the pvmove queue code. I no longer retain the
203  *                 rw flag, instead WRITEA's are just dropped [JT]
204  *    30/04/2001 - added KERNEL_VERSION > 2.4.3 get_hardsect_size() rather
205  *                 than get_hardblocksize() call
206  *    03/05/2001 - Use copy_to/from_user to preserve pointers in
207  *                 lvm_do_status_by*
208  *    11/05/2001 - avoid accesses to inactive snapshot data in
209  *                 __update_hardsectsize() and lvm_do_lv_extend_reduce() (JW)
210  *    28/05/2001 - implemented missing BLKSSZGET ioctl
211  *    05/06/2001 - Move _pe_lock out of fast path for lvm_map when no PEs
212  *                 locked.  Make buffer queue flush not need locking.
213  *                 Fix lvm_user_bmap() to set b_rsector for new lvm_map(). [AED]
214  *    30/06/2001 - Speed up __update_hardsectsize() by checking if PVs have
215  *                 the same hardsectsize (very likely) before scanning all LEs
216  *                 in the LV each time.  [AED]
217  *    12/10/2001 - Use add/del_gendisk() routines in 2.4.10+
218  *    01/11/2001 - Backport read_ahead change from Linus kernel [AED]
219  *    24/05/2002 - fixed locking bug in lvm_do_le_remap() introduced with 1.0.4
220  *    13/06/2002 - use blk_ioctl() to support various standard block ioctls
221  *               - support HDIO_GETGEO_BIG ioctl
222  *    05/07/2002 - fixed OBO error on vg array access [benh@kernel.crashing.org]
223  *    22/07/2002 - streamlined blk_ioctl() call
224  *    14/08/2002 - stored fs handle in lvm_do_lv_rename
225  *                 [kaoru@bsd.tnes.nec.co.jp]
226  *    06/02/2003 - fix persistent snapshot extend/reduce bug in
227  *		   lvm_do_lv_extend_reduce() [dalestephenson@mac.com]
228  *    04/03/2003 - snapshot extend/reduce memory leak
229  *               - VG PE counter wrong [dalestephenson@mac.com]
230  *
231  */
232 
233 #include <linux/version.h>
234 
235 #define MAJOR_NR LVM_BLK_MAJOR
236 #define DEVICE_OFF(device)
237 #define LOCAL_END_REQUEST
238 
239 /* lvm_do_lv_create calls fsync_dev_lockfs()/unlockfs() */
240 /* #define	LVM_VFS_ENHANCEMENT */
241 
242 #include <linux/config.h>
243 #include <linux/module.h>
244 #include <linux/kernel.h>
245 #include <linux/vmalloc.h>
246 
247 #include <linux/slab.h>
248 #include <linux/init.h>
249 
250 #include <linux/hdreg.h>
251 #include <linux/stat.h>
252 #include <linux/fs.h>
253 #include <linux/proc_fs.h>
254 #include <linux/blkdev.h>
255 #include <linux/genhd.h>
256 #include <linux/locks.h>
257 
258 
259 #include <linux/devfs_fs_kernel.h>
260 #include <linux/smp_lock.h>
261 #include <asm/ioctl.h>
262 #include <asm/segment.h>
263 #include <asm/uaccess.h>
264 
265 #ifdef CONFIG_KERNELD
266 #include <linux/kerneld.h>
267 #endif
268 
269 #include <linux/blk.h>
270 #include <linux/blkpg.h>
271 
272 #include <linux/errno.h>
273 #include <linux/lvm.h>
274 
275 #include "lvm-internal.h"
276 
277 #define	LVM_CORRECT_READ_AHEAD(a)		\
278 do {						\
279 	if ((a) < LVM_MIN_READ_AHEAD ||		\
280 	    (a) > LVM_MAX_READ_AHEAD)		\
281 		(a) = LVM_DEFAULT_READ_AHEAD;	\
282 	read_ahead[MAJOR_NR] = (a);		\
283 } while(0)
284 
285 #ifndef WRITEA
286 #  define WRITEA WRITE
287 #endif
288 
289 
290 /*
291  * External function prototypes
292  */
293 static int lvm_make_request_fn(request_queue_t *, int,
294 			       struct buffer_head *);
295 
296 static int lvm_blk_ioctl(struct inode *, struct file *, uint, ulong);
297 static int lvm_blk_open(struct inode *, struct file *);
298 
299 static int lvm_blk_close(struct inode *, struct file *);
300 static int lvm_get_snapshot_use_rate(lv_t * lv_ptr, void *arg);
301 static int lvm_user_bmap(struct inode *, struct lv_bmap *);
302 
303 static int lvm_chr_open(struct inode *, struct file *);
304 static int lvm_chr_close(struct inode *, struct file *);
305 static int lvm_chr_ioctl(struct inode *, struct file *, uint, ulong);
306 
307 
308 /* End external function prototypes */
309 
310 
311 /*
312  * Internal function prototypes
313  */
314 static void lvm_cleanup(void);
315 static void lvm_init_vars(void);
316 
317 #ifdef LVM_HD_NAME
318 extern void (*lvm_hd_name_ptr) (char *, int);
319 #endif
320 static int lvm_map(struct buffer_head *, int);
321 static int lvm_do_lock_lvm(void);
322 static int lvm_do_le_remap(vg_t *, void *);
323 
324 static int lvm_do_pv_create(pv_t *, vg_t *, ulong);
325 static int lvm_do_pv_remove(vg_t *, ulong);
326 static int lvm_do_lv_create(int, char *, lv_t *);
327 static int lvm_do_lv_extend_reduce(int, char *, lv_t *);
328 static int lvm_do_lv_remove(int, char *, int);
329 static int lvm_do_lv_rename(vg_t *, lv_req_t *, lv_t *);
330 static int lvm_do_lv_status_byname(vg_t * r, void *);
331 static int lvm_do_lv_status_byindex(vg_t *, void *);
332 static int lvm_do_lv_status_bydev(vg_t *, void *);
333 
334 static int lvm_do_pe_lock_unlock(vg_t * r, void *);
335 
336 static int lvm_do_pv_change(vg_t *, void *);
337 static int lvm_do_pv_status(vg_t *, void *);
338 static int lvm_do_pv_flush(void *);
339 
340 static int lvm_do_vg_create(void *, int minor);
341 static int lvm_do_vg_extend(vg_t *, void *);
342 static int lvm_do_vg_reduce(vg_t *, void *);
343 static int lvm_do_vg_rename(vg_t *, void *);
344 static int lvm_do_vg_remove(int);
345 static void lvm_geninit(struct gendisk *);
346 static void __update_hardsectsize(lv_t * lv);
347 
348 
349 static void _queue_io(struct buffer_head *bh, int rw);
350 static struct buffer_head *_dequeue_io(void);
351 static void _flush_io(struct buffer_head *bh);
352 
353 static int _open_pv(pv_t * pv);
354 static void _close_pv(pv_t * pv);
355 
356 static unsigned long _sectors_to_k(unsigned long sect);
357 
358 #ifdef LVM_HD_NAME
359 void lvm_hd_name(char *, int);
360 #endif
361 /* END Internal function prototypes */
362 
363 
364 /* variables */
365 char *lvm_version =
366     "LVM version " LVM_RELEASE_NAME "(" LVM_RELEASE_DATE ")";
367 ushort lvm_iop_version = LVM_DRIVER_IOP_VERSION;
368 int loadtime = 0;
369 const char *const lvm_name = LVM_NAME;
370 
371 
372 /* volume group descriptor area pointers */
373 vg_t *vg[ABS_MAX_VG + 1];
374 
375 /* map from block minor number to VG and LV numbers */
376 static struct {
377 	int vg_number;
378 	int lv_number;
379 } vg_lv_map[ABS_MAX_LV];
380 
381 
382 /* Request structures (lvm_chr_ioctl()) */
383 static pv_change_req_t pv_change_req;
384 static pv_status_req_t pv_status_req;
385 volatile static pe_lock_req_t pe_lock_req;
386 static le_remap_req_t le_remap_req;
387 static lv_req_t lv_req;
388 
389 #ifdef LVM_TOTAL_RESET
390 static int lvm_reset_spindown = 0;
391 #endif
392 
393 static char pv_name[NAME_LEN];
394 /* static char rootvg[NAME_LEN] = { 0, }; */
395 static int lock = 0;
396 static int _lock_open_count = 0;
397 uint vg_count = 0;
398 static long lvm_chr_open_count = 0;
399 static DECLARE_WAIT_QUEUE_HEAD(lvm_wait);
400 
401 static spinlock_t lvm_lock = SPIN_LOCK_UNLOCKED;
402 static spinlock_t lvm_snapshot_lock = SPIN_LOCK_UNLOCKED;
403 
404 static struct buffer_head *_pe_requests;
405 static DECLARE_RWSEM(_pe_lock);
406 
407 
408 struct file_operations lvm_chr_fops = {
409 	owner:THIS_MODULE,
410 	open:lvm_chr_open,
411 	release:lvm_chr_close,
412 	ioctl:lvm_chr_ioctl,
413 };
414 
415 /* block device operations structure needed for 2.3.38? and above */
416 struct block_device_operations lvm_blk_dops = {
417 	.owner		= THIS_MODULE,
418 	.open		= lvm_blk_open,
419 	.release	= lvm_blk_close,
420 	.ioctl		= lvm_blk_ioctl,
421 };
422 
423 
424 /* gendisk structures */
425 static struct hd_struct lvm_hd_struct[MAX_LV];
426 static int lvm_blocksizes[MAX_LV];
427 static int lvm_hardsectsizes[MAX_LV];
428 static int lvm_size[MAX_LV];
429 
430 static struct gendisk lvm_gendisk = {
431 	.major		= MAJOR_NR,
432 	.major_name	= LVM_NAME,
433 	.minor_shift	= 0,
434 	.max_p		= 1,
435 	.part		= lvm_hd_struct,
436 	.sizes		= lvm_size,
437 	.nr_real	= MAX_LV,
438 };
439 
440 
441 /*
442  * Driver initialization...
443  */
lvm_init(void)444 int lvm_init(void)
445 {
446 	if (devfs_register_chrdev(LVM_CHAR_MAJOR,
447 				  lvm_name, &lvm_chr_fops) < 0) {
448 		printk(KERN_ERR "%s -- devfs_register_chrdev failed\n",
449 		       lvm_name);
450 		return -EIO;
451 	}
452 	if (devfs_register_blkdev(MAJOR_NR, lvm_name, &lvm_blk_dops) < 0)
453 	{
454 		printk("%s -- devfs_register_blkdev failed\n", lvm_name);
455 		if (devfs_unregister_chrdev(LVM_CHAR_MAJOR, lvm_name) < 0)
456 			printk(KERN_ERR
457 			       "%s -- devfs_unregister_chrdev failed\n",
458 			       lvm_name);
459 		return -EIO;
460 	}
461 
462 	lvm_init_fs();
463 	lvm_init_vars();
464 	lvm_geninit(&lvm_gendisk);
465 
466 	/* insert our gendisk at the corresponding major */
467 	add_gendisk(&lvm_gendisk);
468 
469 #ifdef LVM_HD_NAME
470 	/* reference from drivers/block/genhd.c */
471 	lvm_hd_name_ptr = lvm_hd_name;
472 #endif
473 
474 	blk_queue_make_request(BLK_DEFAULT_QUEUE(MAJOR_NR),
475 			       lvm_make_request_fn);
476 
477 
478 	/* initialise the pe lock */
479 	pe_lock_req.lock = UNLOCK_PE;
480 
481 	/* optional read root VGDA */
482 /*
483    if ( *rootvg != 0) vg_read_with_pv_and_lv ( rootvg, &vg);
484 */
485 
486 #ifdef MODULE
487 	printk(KERN_INFO "%s module loaded\n", lvm_version);
488 #else
489 	printk(KERN_INFO "%s\n", lvm_version);
490 #endif
491 
492 	return 0;
493 }				/* lvm_init() */
494 
495 /*
496  * cleanup...
497  */
498 
lvm_cleanup(void)499 static void lvm_cleanup(void)
500 {
501 	if (devfs_unregister_chrdev(LVM_CHAR_MAJOR, lvm_name) < 0)
502 		printk(KERN_ERR "%s -- devfs_unregister_chrdev failed\n",
503 		       lvm_name);
504 	if (devfs_unregister_blkdev(MAJOR_NR, lvm_name) < 0)
505 		printk(KERN_ERR "%s -- devfs_unregister_blkdev failed\n",
506 		       lvm_name);
507 
508 
509 
510 	/* delete our gendisk from chain */
511 	del_gendisk(&lvm_gendisk);
512 
513 	blk_size[MAJOR_NR] = NULL;
514 	blksize_size[MAJOR_NR] = NULL;
515 	hardsect_size[MAJOR_NR] = NULL;
516 
517 #ifdef LVM_HD_NAME
518 	/* reference from linux/drivers/block/genhd.c */
519 	lvm_hd_name_ptr = NULL;
520 #endif
521 
522 	/* unregister with procfs and devfs */
523 	lvm_fin_fs();
524 
525 #ifdef MODULE
526 	printk(KERN_INFO "%s -- Module successfully deactivated\n",
527 	       lvm_name);
528 #endif
529 
530 	return;
531 }				/* lvm_cleanup() */
532 
533 /*
534  * support function to initialize lvm variables
535  */
lvm_init_vars(void)536 static void __init lvm_init_vars(void)
537 {
538 	int v;
539 
540 	loadtime = CURRENT_TIME;
541 
542 	lvm_lock = lvm_snapshot_lock = SPIN_LOCK_UNLOCKED;
543 
544 	pe_lock_req.lock = UNLOCK_PE;
545 	pe_lock_req.data.lv_dev = 0;
546 	pe_lock_req.data.pv_dev = 0;
547 	pe_lock_req.data.pv_offset = 0;
548 
549 	/* Initialize VG pointers */
550 	for (v = 0; v < ABS_MAX_VG + 1; v++)
551 		vg[v] = NULL;
552 
553 	/* Initialize LV -> VG association */
554 	for (v = 0; v < ABS_MAX_LV; v++) {
555 		/* index ABS_MAX_VG never used for real VG */
556 		vg_lv_map[v].vg_number = ABS_MAX_VG;
557 		vg_lv_map[v].lv_number = -1;
558 	}
559 
560 	return;
561 }				/* lvm_init_vars() */
562 
563 
564 /********************************************************************
565  *
566  * Character device functions
567  *
568  ********************************************************************/
569 
570 #define MODE_TO_STR(mode) (mode) & FMODE_READ ? "READ" : "", \
571 			  (mode) & FMODE_WRITE ? "WRITE" : ""
572 
573 /*
574  * character device open routine
575  */
lvm_chr_open(struct inode * inode,struct file * file)576 static int lvm_chr_open(struct inode *inode, struct file *file)
577 {
578 	int minor = MINOR(inode->i_rdev);
579 
580 	P_DEV("chr_open MINOR: %d  VG#: %d  mode: %s%s  lock: %d\n",
581 	      minor, VG_CHR(minor), MODE_TO_STR(file->f_mode), lock);
582 
583 	/* super user validation */
584 	if (!capable(CAP_SYS_ADMIN))
585 		return -EACCES;
586 
587 	/* Group special file open */
588 	if (VG_CHR(minor) > MAX_VG)
589 		return -ENXIO;
590 
591 	spin_lock(&lvm_lock);
592 	if (lock == current->pid)
593 		_lock_open_count++;
594 	spin_unlock(&lvm_lock);
595 
596 	lvm_chr_open_count++;
597 
598 	MOD_INC_USE_COUNT;
599 
600 	return 0;
601 }				/* lvm_chr_open() */
602 
603 
604 /*
605  * character device i/o-control routine
606  *
607  * Only one changing process can do changing ioctl at one time,
608  * others will block.
609  *
610  */
lvm_chr_ioctl(struct inode * inode,struct file * file,uint command,ulong a)611 static int lvm_chr_ioctl(struct inode *inode, struct file *file,
612 			 uint command, ulong a)
613 {
614 	int minor = MINOR(inode->i_rdev);
615 	uint extendable, l, v;
616 	void *arg = (void *) a;
617 	lv_t lv;
618 	vg_t *vg_ptr = vg[VG_CHR(minor)];
619 
620 	/* otherwise cc will complain about unused variables */
621 	(void) lvm_lock;
622 
623 	P_IOCTL
624 	    ("chr MINOR: %d  command: 0x%X  arg: %p  VG#: %d  mode: %s%s\n",
625 	     minor, command, arg, VG_CHR(minor),
626 	     MODE_TO_STR(file->f_mode));
627 
628 #ifdef LVM_TOTAL_RESET
629 	if (lvm_reset_spindown > 0)
630 		return -EACCES;
631 #endif
632 
633 	/* Main command switch */
634 	switch (command) {
635 	case LVM_LOCK_LVM:
636 		/* lock the LVM */
637 		return lvm_do_lock_lvm();
638 
639 	case LVM_GET_IOP_VERSION:
640 		/* check lvm version to ensure driver/tools+lib
641 		   interoperability */
642 		if (copy_to_user(arg, &lvm_iop_version, sizeof(ushort)) !=
643 		    0)
644 			return -EFAULT;
645 		return 0;
646 
647 #ifdef LVM_TOTAL_RESET
648 	case LVM_RESET:
649 		/* lock reset function */
650 		lvm_reset_spindown = 1;
651 		for (v = 0; v < ABS_MAX_VG; v++) {
652 			if (vg[v] != NULL)
653 				lvm_do_vg_remove(v);
654 		}
655 
656 #ifdef MODULE
657 		while (GET_USE_COUNT(&__this_module) < 1)
658 			MOD_INC_USE_COUNT;
659 		while (GET_USE_COUNT(&__this_module) > 1)
660 			MOD_DEC_USE_COUNT;
661 #endif				/* MODULE */
662 		lock = 0;	/* release lock */
663 		wake_up_interruptible(&lvm_wait);
664 		return 0;
665 #endif				/* LVM_TOTAL_RESET */
666 
667 
668 	case LE_REMAP:
669 		/* remap a logical extent (after moving the physical extent) */
670 		return lvm_do_le_remap(vg_ptr, arg);
671 
672 	case PE_LOCK_UNLOCK:
673 		/* lock/unlock i/o to a physical extent to move it to another
674 		   physical volume (move's done in user space's pvmove) */
675 		return lvm_do_pe_lock_unlock(vg_ptr, arg);
676 
677 	case VG_CREATE_OLD:
678 		/* create a VGDA */
679 		return lvm_do_vg_create(arg, minor);
680 
681 	case VG_CREATE:
682 		/* create a VGDA, assume VG number is filled in */
683 		return lvm_do_vg_create(arg, -1);
684 
685 	case VG_EXTEND:
686 		/* extend a volume group */
687 		return lvm_do_vg_extend(vg_ptr, arg);
688 
689 	case VG_REDUCE:
690 		/* reduce a volume group */
691 		return lvm_do_vg_reduce(vg_ptr, arg);
692 
693 	case VG_RENAME:
694 		/* rename a volume group */
695 		return lvm_do_vg_rename(vg_ptr, arg);
696 
697 	case VG_REMOVE:
698 		/* remove an inactive VGDA */
699 		return lvm_do_vg_remove(minor);
700 
701 
702 	case VG_SET_EXTENDABLE:
703 		/* set/clear extendability flag of volume group */
704 		if (vg_ptr == NULL)
705 			return -ENXIO;
706 		if (copy_from_user(&extendable, arg, sizeof(extendable)) !=
707 		    0)
708 			return -EFAULT;
709 
710 		if (extendable == VG_EXTENDABLE ||
711 		    extendable == ~VG_EXTENDABLE) {
712 			if (extendable == VG_EXTENDABLE)
713 				vg_ptr->vg_status |= VG_EXTENDABLE;
714 			else
715 				vg_ptr->vg_status &= ~VG_EXTENDABLE;
716 		} else
717 			return -EINVAL;
718 		return 0;
719 
720 
721 	case VG_STATUS:
722 		/* get volume group data (only the vg_t struct) */
723 		if (vg_ptr == NULL)
724 			return -ENXIO;
725 		if (copy_to_user(arg, vg_ptr, sizeof(vg_t)) != 0)
726 			return -EFAULT;
727 		return 0;
728 
729 
730 	case VG_STATUS_GET_COUNT:
731 		/* get volume group count */
732 		if (copy_to_user(arg, &vg_count, sizeof(vg_count)) != 0)
733 			return -EFAULT;
734 		return 0;
735 
736 
737 	case VG_STATUS_GET_NAMELIST:
738 		/* get volume group names */
739 		for (l = v = 0; v < ABS_MAX_VG; v++) {
740 			if (vg[v] != NULL) {
741 				if (copy_to_user(arg + l * NAME_LEN,
742 						 vg[v]->vg_name,
743 						 NAME_LEN) != 0)
744 					return -EFAULT;
745 				l++;
746 			}
747 		}
748 		return 0;
749 
750 
751 	case LV_CREATE:
752 	case LV_EXTEND:
753 	case LV_REDUCE:
754 	case LV_REMOVE:
755 	case LV_RENAME:
756 		/* create, extend, reduce, remove or rename a logical volume */
757 		if (vg_ptr == NULL)
758 			return -ENXIO;
759 		if (copy_from_user(&lv_req, arg, sizeof(lv_req)) != 0)
760 			return -EFAULT;
761 
762 		if (command != LV_REMOVE) {
763 			if (copy_from_user(&lv, lv_req.lv, sizeof(lv_t)) !=
764 			    0)
765 				return -EFAULT;
766 		}
767 		switch (command) {
768 		case LV_CREATE:
769 			return lvm_do_lv_create(minor, lv_req.lv_name,
770 						&lv);
771 
772 		case LV_EXTEND:
773 		case LV_REDUCE:
774 			return lvm_do_lv_extend_reduce(minor,
775 						       lv_req.lv_name,
776 						       &lv);
777 		case LV_REMOVE:
778 			return lvm_do_lv_remove(minor, lv_req.lv_name, -1);
779 
780 		case LV_RENAME:
781 			return lvm_do_lv_rename(vg_ptr, &lv_req, &lv);
782 		}
783 
784 
785 
786 
787 	case LV_STATUS_BYNAME:
788 		/* get status of a logical volume by name */
789 		return lvm_do_lv_status_byname(vg_ptr, arg);
790 
791 
792 	case LV_STATUS_BYINDEX:
793 		/* get status of a logical volume by index */
794 		return lvm_do_lv_status_byindex(vg_ptr, arg);
795 
796 
797 	case LV_STATUS_BYDEV:
798 		/* get status of a logical volume by device */
799 		return lvm_do_lv_status_bydev(vg_ptr, arg);
800 
801 
802 	case PV_CHANGE:
803 		/* change a physical volume */
804 		return lvm_do_pv_change(vg_ptr, arg);
805 
806 
807 	case PV_STATUS:
808 		/* get physical volume data (pv_t structure only) */
809 		return lvm_do_pv_status(vg_ptr, arg);
810 
811 
812 	case PV_FLUSH:
813 		/* physical volume buffer flush/invalidate */
814 		return lvm_do_pv_flush(arg);
815 
816 
817 	default:
818 		printk(KERN_WARNING
819 		       "%s -- lvm_chr_ioctl: unknown command 0x%x\n",
820 		       lvm_name, command);
821 		return -ENOTTY;
822 	}
823 
824 	return 0;
825 }				/* lvm_chr_ioctl */
826 
827 
828 /*
829  * character device close routine
830  */
lvm_chr_close(struct inode * inode,struct file * file)831 static int lvm_chr_close(struct inode *inode, struct file *file)
832 {
833 	P_DEV("chr_close MINOR: %d  VG#: %d\n",
834 	      MINOR(inode->i_rdev), VG_CHR(MINOR(inode->i_rdev)));
835 
836 #ifdef LVM_TOTAL_RESET
837 	if (lvm_reset_spindown > 0) {
838 		lvm_reset_spindown = 0;
839 		lvm_chr_open_count = 0;
840 	}
841 #endif
842 
843 	if (lvm_chr_open_count > 0)
844 		lvm_chr_open_count--;
845 
846 	spin_lock(&lvm_lock);
847 	if (lock == current->pid) {
848 		if (!_lock_open_count) {
849 			P_DEV("chr_close: unlocking LVM for pid %d\n",
850 			      lock);
851 			lock = 0;
852 			wake_up_interruptible(&lvm_wait);
853 		} else
854 			_lock_open_count--;
855 	}
856 	spin_unlock(&lvm_lock);
857 
858 	MOD_DEC_USE_COUNT;
859 
860 	return 0;
861 }				/* lvm_chr_close() */
862 
863 
864 
865 /********************************************************************
866  *
867  * Block device functions
868  *
869  ********************************************************************/
870 
871 /*
872  * block device open routine
873  */
lvm_blk_open(struct inode * inode,struct file * file)874 static int lvm_blk_open(struct inode *inode, struct file *file)
875 {
876 	int minor = MINOR(inode->i_rdev);
877 	lv_t *lv_ptr;
878 	vg_t *vg_ptr = vg[VG_BLK(minor)];
879 
880 	P_DEV("blk_open MINOR: %d  VG#: %d  LV#: %d  mode: %s%s\n",
881 	      minor, VG_BLK(minor), LV_BLK(minor),
882 	      MODE_TO_STR(file->f_mode));
883 
884 #ifdef LVM_TOTAL_RESET
885 	if (lvm_reset_spindown > 0)
886 		return -EPERM;
887 #endif
888 
889 	if (vg_ptr != NULL &&
890 	    (vg_ptr->vg_status & VG_ACTIVE) &&
891 	    (lv_ptr = vg_ptr->lv[LV_BLK(minor)]) != NULL &&
892 	    LV_BLK(minor) >= 0 && LV_BLK(minor) < vg_ptr->lv_max) {
893 
894 		/* Check parallel LV spindown (LV remove) */
895 		if (lv_ptr->lv_status & LV_SPINDOWN)
896 			return -EPERM;
897 
898 		/* Check inactive LV and open for read/write */
899 		/* We need to be able to "read" an inactive LV
900 		   to re-activate it again */
901 		if ((file->f_mode & FMODE_WRITE) &&
902 		    (!(lv_ptr->lv_status & LV_ACTIVE)))
903 			return -EPERM;
904 
905 		if (!(lv_ptr->lv_access & LV_WRITE) &&
906 		    (file->f_mode & FMODE_WRITE))
907 			return -EACCES;
908 
909 
910 		/* be sure to increment VG counter */
911 		if (lv_ptr->lv_open == 0)
912 			vg_ptr->lv_open++;
913 		lv_ptr->lv_open++;
914 
915 		MOD_INC_USE_COUNT;
916 
917 		P_DEV("blk_open OK, LV size %d\n", lv_ptr->lv_size);
918 
919 		return 0;
920 	}
921 	return -ENXIO;
922 }				/* lvm_blk_open() */
923 
924 /* Deliver "hard disk geometry" */
_hdio_getgeo(ulong a,lv_t * lv_ptr,int what)925 static int _hdio_getgeo(ulong a, lv_t * lv_ptr, int what)
926 {
927 	int ret = 0;
928 	uchar heads = 128;
929 	uchar sectors = 128;
930 	ulong start = 0;
931 	uint cylinders;
932 
933 	while (heads * sectors > lv_ptr->lv_size) {
934 		heads >>= 1;
935 		sectors >>= 1;
936 	}
937 	cylinders = lv_ptr->lv_size / heads / sectors;
938 
939 	switch (what) {
940 	case 0:
941 		{
942 			struct hd_geometry *hd = (struct hd_geometry *) a;
943 
944 			if (put_user(heads, &hd->heads) ||
945 			    put_user(sectors, &hd->sectors) ||
946 			    put_user((ushort) cylinders, &hd->cylinders) ||
947 			    put_user(start, &hd->start))
948 				return -EFAULT;
949 			break;
950 		}
951 
952 #ifdef HDIO_GETGEO_BIG
953 	case 1:
954 		{
955 			struct hd_big_geometry *hd =
956 			    (struct hd_big_geometry *) a;
957 
958 			if (put_user(heads, &hd->heads) ||
959 			    put_user(sectors, &hd->sectors) ||
960 			    put_user(cylinders, &hd->cylinders) ||
961 			    put_user(start, &hd->start))
962 				return -EFAULT;
963 			break;
964 		}
965 #endif
966 
967 	}
968 
969 	P_IOCTL("%s -- lvm_blk_ioctl -- cylinders: %d\n",
970 		lvm_name, cylinders);
971 	return ret;
972 }
973 
974 
975 /*
976  * block device i/o-control routine
977  */
lvm_blk_ioctl(struct inode * inode,struct file * file,uint cmd,ulong a)978 static int lvm_blk_ioctl(struct inode *inode, struct file *file,
979 			 uint cmd, ulong a)
980 {
981 	kdev_t dev = inode->i_rdev;
982 	int minor = MINOR(dev), ret;
983 	vg_t *vg_ptr = vg[VG_BLK(minor)];
984 	lv_t *lv_ptr = vg_ptr->lv[LV_BLK(minor)];
985 	void *arg = (void *) a;
986 
987 	P_IOCTL("blk MINOR: %d  cmd: 0x%X  arg: %p  VG#: %d  LV#: %d  "
988 		"mode: %s%s\n", minor, cmd, arg, VG_BLK(minor),
989 		LV_BLK(minor), MODE_TO_STR(file->f_mode));
990 
991 	switch (cmd) {
992 	case BLKRASET:
993 		/* set read ahead for block device */
994 		ret = blk_ioctl(dev, cmd, a);
995 		if (ret)
996 			return ret;
997 		lv_ptr->lv_read_ahead = (long) a;
998 		LVM_CORRECT_READ_AHEAD(lv_ptr->lv_read_ahead);
999 		break;
1000 
1001 	case HDIO_GETGEO:
1002 #ifdef HDIO_GETGEO_BIG
1003 	case HDIO_GETGEO_BIG:
1004 #endif
1005 		/* get disk geometry */
1006 		P_IOCTL("%s -- lvm_blk_ioctl -- HDIO_GETGEO\n", lvm_name);
1007 		if (!a)
1008 			return -EINVAL;
1009 
1010 		switch (cmd) {
1011 		case HDIO_GETGEO:
1012 			return _hdio_getgeo(a, lv_ptr, 0);
1013 #ifdef HDIO_GETGEO_BIG
1014 		case HDIO_GETGEO_BIG:
1015 			return _hdio_getgeo(a, lv_ptr, 1);
1016 #endif
1017 		}
1018 
1019 	case LV_BMAP:
1020 		/* turn logical block into (dev_t, block). non privileged. */
1021 		/* don't bmap a snapshot, since the mapping can change */
1022 		if (lv_ptr->lv_access & LV_SNAPSHOT)
1023 			return -EPERM;
1024 
1025 		return lvm_user_bmap(inode, (struct lv_bmap *) arg);
1026 
1027 	case LV_SET_ACCESS:
1028 		/* set access flags of a logical volume */
1029 		if (!capable(CAP_SYS_ADMIN))
1030 			return -EACCES;
1031 
1032 		down_write(&lv_ptr->lv_lock);
1033 		lv_ptr->lv_access = (ulong) arg;
1034 		up_write(&lv_ptr->lv_lock);
1035 
1036 		if (lv_ptr->lv_access & LV_WRITE)
1037 			set_device_ro(lv_ptr->lv_dev, 0);
1038 		else
1039 			set_device_ro(lv_ptr->lv_dev, 1);
1040 		break;
1041 
1042 
1043 	case LV_SET_ALLOCATION:
1044 		/* set allocation flags of a logical volume */
1045 		if (!capable(CAP_SYS_ADMIN))
1046 			return -EACCES;
1047 		down_write(&lv_ptr->lv_lock);
1048 		lv_ptr->lv_allocation = (ulong) arg;
1049 		up_write(&lv_ptr->lv_lock);
1050 		break;
1051 
1052 	case LV_SET_STATUS:
1053 		/* set status flags of a logical volume */
1054 		if (!capable(CAP_SYS_ADMIN))
1055 			return -EACCES;
1056 		if (!((ulong) arg & LV_ACTIVE) && lv_ptr->lv_open > 1)
1057 			return -EPERM;
1058 		down_write(&lv_ptr->lv_lock);
1059 		lv_ptr->lv_status = (ulong) arg;
1060 		up_write(&lv_ptr->lv_lock);
1061 		break;
1062 
1063 	case LV_SNAPSHOT_USE_RATE:
1064 		return lvm_get_snapshot_use_rate(lv_ptr, arg);
1065 
1066 	default:
1067 		/* Handle rest here */
1068 		ret = blk_ioctl(dev, cmd, a);
1069 		if (ret)
1070 			printk(KERN_WARNING
1071 			       "%s -- lvm_blk_ioctl: unknown "
1072 			       "cmd 0x%x\n", lvm_name, cmd);
1073 		return ret;
1074 	}
1075 
1076 	return 0;
1077 }				/* lvm_blk_ioctl() */
1078 
1079 
1080 /*
1081  * block device close routine
1082  */
lvm_blk_close(struct inode * inode,struct file * file)1083 static int lvm_blk_close(struct inode *inode, struct file *file)
1084 {
1085 	int minor = MINOR(inode->i_rdev);
1086 	vg_t *vg_ptr = vg[VG_BLK(minor)];
1087 	lv_t *lv_ptr = vg_ptr->lv[LV_BLK(minor)];
1088 
1089 	P_DEV("blk_close MINOR: %d  VG#: %d  LV#: %d\n",
1090 	      minor, VG_BLK(minor), LV_BLK(minor));
1091 
1092 	if (lv_ptr->lv_open == 1)
1093 		vg_ptr->lv_open--;
1094 	lv_ptr->lv_open--;
1095 
1096 	MOD_DEC_USE_COUNT;
1097 
1098 	return 0;
1099 }				/* lvm_blk_close() */
1100 
lvm_get_snapshot_use_rate(lv_t * lv,void * arg)1101 static int lvm_get_snapshot_use_rate(lv_t * lv, void *arg)
1102 {
1103 	lv_snapshot_use_rate_req_t lv_rate_req;
1104 
1105 	down_read(&lv->lv_lock);
1106 	if (!(lv->lv_access & LV_SNAPSHOT)) {
1107 		up_read(&lv->lv_lock);
1108 		return -EPERM;
1109 	}
1110 	up_read(&lv->lv_lock);
1111 
1112 	if (copy_from_user(&lv_rate_req, arg, sizeof(lv_rate_req)))
1113 		return -EFAULT;
1114 
1115 	if (lv_rate_req.rate < 0 || lv_rate_req.rate > 100)
1116 		return -EINVAL;
1117 
1118 	switch (lv_rate_req.block) {
1119 	case 0:
1120 		down_write(&lv->lv_lock);
1121 		lv->lv_snapshot_use_rate = lv_rate_req.rate;
1122 		up_write(&lv->lv_lock);
1123 		down_read(&lv->lv_lock);
1124 		if (lv->lv_remap_ptr * 100 / lv->lv_remap_end <
1125 		    lv->lv_snapshot_use_rate) {
1126 			up_read(&lv->lv_lock);
1127 			interruptible_sleep_on(&lv->lv_snapshot_wait);
1128 			down_read(&lv->lv_lock);
1129 		}
1130 		up_read(&lv->lv_lock);
1131 		break;
1132 
1133 	case O_NONBLOCK:
1134 		break;
1135 
1136 	default:
1137 		return -EINVAL;
1138 	}
1139 	down_read(&lv->lv_lock);
1140 	lv_rate_req.rate = lv->lv_remap_ptr * 100 / lv->lv_remap_end;
1141 	up_read(&lv->lv_lock);
1142 
1143 	return copy_to_user(arg, &lv_rate_req,
1144 			    sizeof(lv_rate_req)) ? -EFAULT : 0;
1145 }
1146 
lvm_user_bmap(struct inode * inode,struct lv_bmap * user_result)1147 static int lvm_user_bmap(struct inode *inode, struct lv_bmap *user_result)
1148 {
1149 	struct buffer_head bh;
1150 	unsigned long block;
1151 	int err;
1152 
1153 	if (get_user(block, &user_result->lv_block))
1154 		return -EFAULT;
1155 
1156 	memset(&bh, 0, sizeof bh);
1157 	bh.b_blocknr = block;
1158 	bh.b_dev = bh.b_rdev = inode->i_rdev;
1159 	bh.b_size = lvm_get_blksize(bh.b_dev);
1160 	bh.b_rsector = block * (bh.b_size >> 9);
1161 	bh.b_end_io = NULL;
1162 	if ((err = lvm_map(&bh, READ)) < 0) {
1163 		printk("lvm map failed: %d\n", err);
1164 		return -EINVAL;
1165 	}
1166 
1167 	return put_user(kdev_t_to_nr(bh.b_rdev), &user_result->lv_dev) ||
1168 	    put_user(bh.b_rsector / (bh.b_size >> 9),
1169 		     &user_result->lv_block) ? -EFAULT : 0;
1170 }
1171 
1172 
1173 /*
1174  * block device support function for /usr/src/linux/drivers/block/ll_rw_blk.c
1175  * (see init_module/lvm_init)
1176  */
__remap_snapshot(kdev_t rdev,ulong rsector,ulong pe_start,lv_t * lv,vg_t * vg)1177 static void __remap_snapshot(kdev_t rdev, ulong rsector,
1178 			     ulong pe_start, lv_t * lv, vg_t * vg)
1179 {
1180 
1181 	/* copy a chunk from the origin to a snapshot device */
1182 	down_write(&lv->lv_lock);
1183 
1184 	/* we must redo lvm_snapshot_remap_block in order to avoid a
1185 	   race condition in the gap where no lock was held */
1186 	if (!lvm_snapshot_remap_block(&rdev, &rsector, pe_start, lv) &&
1187 	    !lvm_snapshot_COW(rdev, rsector, pe_start, rsector, vg, lv))
1188 		lvm_write_COW_table_block(vg, lv);
1189 
1190 	up_write(&lv->lv_lock);
1191 }
1192 
_remap_snapshot(kdev_t rdev,ulong rsector,ulong pe_start,lv_t * lv,vg_t * vg)1193 static inline void _remap_snapshot(kdev_t rdev, ulong rsector,
1194 				   ulong pe_start, lv_t * lv, vg_t * vg)
1195 {
1196 	int r;
1197 
1198 	/* check to see if this chunk is already in the snapshot */
1199 	down_read(&lv->lv_lock);
1200 	r = lvm_snapshot_remap_block(&rdev, &rsector, pe_start, lv);
1201 	up_read(&lv->lv_lock);
1202 
1203 	if (!r)
1204 		/* we haven't yet copied this block to the snapshot */
1205 		__remap_snapshot(rdev, rsector, pe_start, lv, vg);
1206 }
1207 
1208 
1209 /*
1210  * extents destined for a pe that is on the move should be deferred
1211  */
_should_defer(kdev_t pv,ulong sector,uint32_t pe_size)1212 static inline int _should_defer(kdev_t pv, ulong sector, uint32_t pe_size)
1213 {
1214 	return ((pe_lock_req.lock == LOCK_PE) &&
1215 		(pv == pe_lock_req.data.pv_dev) &&
1216 		(sector >= pe_lock_req.data.pv_offset) &&
1217 		(sector < (pe_lock_req.data.pv_offset + pe_size)));
1218 }
1219 
_defer_extent(struct buffer_head * bh,int rw,kdev_t pv,ulong sector,uint32_t pe_size)1220 static inline int _defer_extent(struct buffer_head *bh, int rw,
1221 				kdev_t pv, ulong sector, uint32_t pe_size)
1222 {
1223 	if (pe_lock_req.lock == LOCK_PE) {
1224 		down_read(&_pe_lock);
1225 		if (_should_defer(pv, sector, pe_size)) {
1226 			up_read(&_pe_lock);
1227 			down_write(&_pe_lock);
1228 			if (_should_defer(pv, sector, pe_size))
1229 				_queue_io(bh, rw);
1230 			up_write(&_pe_lock);
1231 			return 1;
1232 		}
1233 		up_read(&_pe_lock);
1234 	}
1235 	return 0;
1236 }
1237 
1238 
lvm_map(struct buffer_head * bh,int rw)1239 static int lvm_map(struct buffer_head *bh, int rw)
1240 {
1241 	int minor = MINOR(bh->b_rdev);
1242 	ulong index;
1243 	ulong pe_start;
1244 	ulong size = bh->b_size >> 9;
1245 	ulong rsector_org = bh->b_rsector;
1246 	ulong rsector_map;
1247 	kdev_t rdev_map;
1248 	vg_t *vg_this = vg[VG_BLK(minor)];
1249 	lv_t *lv = vg_this->lv[LV_BLK(minor)];
1250 
1251 
1252 	down_read(&lv->lv_lock);
1253 	if (!(lv->lv_status & LV_ACTIVE)) {
1254 		printk(KERN_ALERT
1255 		       "%s - lvm_map: ll_rw_blk for inactive LV %s\n",
1256 		       lvm_name, lv->lv_name);
1257 		goto bad;
1258 	}
1259 
1260 	if ((rw == WRITE || rw == WRITEA) && !(lv->lv_access & LV_WRITE)) {
1261 		printk(KERN_CRIT
1262 		       "%s - lvm_map: ll_rw_blk write for readonly LV %s\n",
1263 		       lvm_name, lv->lv_name);
1264 		goto bad;
1265 	}
1266 
1267 	P_MAP
1268 	    ("%s - lvm_map minor: %d  *rdev: %s  *rsector: %lu  size:%lu\n",
1269 	     lvm_name, minor, kdevname(bh->b_rdev), rsector_org, size);
1270 
1271 	if (rsector_org + size > lv->lv_size) {
1272 		printk(KERN_ALERT
1273 		       "%s - lvm_map access beyond end of device; *rsector: "
1274 		       "%lu or size: %lu wrong for minor: %2d\n",
1275 		       lvm_name, rsector_org, size, minor);
1276 		goto bad;
1277 	}
1278 
1279 
1280 	if (lv->lv_stripes < 2) {	/* linear mapping */
1281 		/* get the index */
1282 		index = rsector_org / vg_this->pe_size;
1283 		pe_start = lv->lv_current_pe[index].pe;
1284 		rsector_map = lv->lv_current_pe[index].pe +
1285 		    (rsector_org % vg_this->pe_size);
1286 		rdev_map = lv->lv_current_pe[index].dev;
1287 
1288 		P_MAP("lv_current_pe[%ld].pe: %d  rdev: %s  rsector:%ld\n",
1289 		      index, lv->lv_current_pe[index].pe,
1290 		      kdevname(rdev_map), rsector_map);
1291 
1292 	} else {		/* striped mapping */
1293 		ulong stripe_index;
1294 		ulong stripe_length;
1295 
1296 		stripe_length = vg_this->pe_size * lv->lv_stripes;
1297 		stripe_index = (rsector_org % stripe_length) /
1298 		    lv->lv_stripesize;
1299 		index = rsector_org / stripe_length +
1300 		    (stripe_index % lv->lv_stripes) *
1301 		    (lv->lv_allocated_le / lv->lv_stripes);
1302 		pe_start = lv->lv_current_pe[index].pe;
1303 		rsector_map = lv->lv_current_pe[index].pe +
1304 		    (rsector_org % stripe_length) -
1305 		    (stripe_index % lv->lv_stripes) * lv->lv_stripesize -
1306 		    stripe_index / lv->lv_stripes *
1307 		    (lv->lv_stripes - 1) * lv->lv_stripesize;
1308 		rdev_map = lv->lv_current_pe[index].dev;
1309 
1310 		P_MAP("lv_current_pe[%ld].pe: %d  rdev: %s  rsector:%ld\n"
1311 		      "stripe_length: %ld  stripe_index: %ld\n",
1312 		      index, lv->lv_current_pe[index].pe,
1313 		      kdevname(rdev_map), rsector_map, stripe_length,
1314 		      stripe_index);
1315 	}
1316 
1317 	/*
1318 	 * Queue writes to physical extents on the move until move completes.
1319 	 * Don't get _pe_lock until there is a reasonable expectation that
1320 	 * we need to queue this request, because this is in the fast path.
1321 	 */
1322 	if (rw == WRITE || rw == WRITEA) {
1323 		if (_defer_extent(bh, rw, rdev_map,
1324 				  rsector_map, vg_this->pe_size)) {
1325 
1326 			up_read(&lv->lv_lock);
1327 			return 0;
1328 		}
1329 
1330 		lv->lv_current_pe[index].writes++;	/* statistic */
1331 	} else
1332 		lv->lv_current_pe[index].reads++;	/* statistic */
1333 
1334 	/* snapshot volume exception handling on physical device address base */
1335 	if (!(lv->lv_access & (LV_SNAPSHOT | LV_SNAPSHOT_ORG)))
1336 		goto out;
1337 
1338 	if (lv->lv_access & LV_SNAPSHOT) {	/* remap snapshot */
1339 		if (lvm_snapshot_remap_block(&rdev_map, &rsector_map,
1340 					     pe_start, lv) < 0)
1341 			goto bad;
1342 
1343 	} else if (rw == WRITE || rw == WRITEA) {	/* snapshot origin */
1344 		lv_t *snap;
1345 
1346 		/* start with first snapshot and loop through all of
1347 		   them */
1348 		for (snap = lv->lv_snapshot_next; snap;
1349 		     snap = snap->lv_snapshot_next) {
1350 			/* Check for inactive snapshot */
1351 			if (!(snap->lv_status & LV_ACTIVE))
1352 				continue;
1353 
1354 			/* Serializes the COW with the accesses to the
1355 			   snapshot device */
1356 			_remap_snapshot(rdev_map, rsector_map,
1357 					pe_start, snap, vg_this);
1358 		}
1359 	}
1360 
1361       out:
1362 	bh->b_rdev = rdev_map;
1363 	bh->b_rsector = rsector_map;
1364 	up_read(&lv->lv_lock);
1365 	return 1;
1366 
1367       bad:
1368 	if (bh->b_end_io)
1369 		buffer_IO_error(bh);
1370 	up_read(&lv->lv_lock);
1371 	return -1;
1372 }				/* lvm_map() */
1373 
1374 
1375 /*
1376  * internal support functions
1377  */
1378 
1379 #ifdef LVM_HD_NAME
1380 /*
1381  * generate "hard disk" name
1382  */
lvm_hd_name(char * buf,int minor)1383 void lvm_hd_name(char *buf, int minor)
1384 {
1385 	int len = 0;
1386 	lv_t *lv_ptr;
1387 
1388 	if (vg[VG_BLK(minor)] == NULL ||
1389 	    (lv_ptr = vg[VG_BLK(minor)]->lv[LV_BLK(minor)]) == NULL)
1390 		return;
1391 	len = strlen(lv_ptr->lv_name) - 5;
1392 	memcpy(buf, &lv_ptr->lv_name[5], len);
1393 	buf[len] = 0;
1394 	return;
1395 }
1396 #endif
1397 
1398 
1399 
1400 
1401 /*
1402  * make request function
1403  */
lvm_make_request_fn(request_queue_t * q,int rw,struct buffer_head * bh)1404 static int lvm_make_request_fn(request_queue_t * q,
1405 			       int rw, struct buffer_head *bh)
1406 {
1407 	return (lvm_map(bh, rw) <= 0) ? 0 : 1;
1408 }
1409 
1410 
1411 /********************************************************************
1412  *
1413  * Character device support functions
1414  *
1415  ********************************************************************/
1416 /*
1417  * character device support function logical volume manager lock
1418  */
lvm_do_lock_lvm(void)1419 static int lvm_do_lock_lvm(void)
1420 {
1421       lock_try_again:
1422 	spin_lock(&lvm_lock);
1423 	if (lock != 0 && lock != current->pid) {
1424 		P_DEV("lvm_do_lock_lvm: locked by pid %d ...\n", lock);
1425 		spin_unlock(&lvm_lock);
1426 		interruptible_sleep_on(&lvm_wait);
1427 		if (current->sigpending != 0)
1428 			return -EINTR;
1429 #ifdef LVM_TOTAL_RESET
1430 		if (lvm_reset_spindown > 0)
1431 			return -EACCES;
1432 #endif
1433 		goto lock_try_again;
1434 	}
1435 	lock = current->pid;
1436 	P_DEV("lvm_do_lock_lvm: locking LVM for pid %d\n", lock);
1437 	spin_unlock(&lvm_lock);
1438 	return 0;
1439 }				/* lvm_do_lock_lvm */
1440 
1441 
1442 /*
1443  * character device support function lock/unlock physical extend
1444  */
lvm_do_pe_lock_unlock(vg_t * vg_ptr,void * arg)1445 static int lvm_do_pe_lock_unlock(vg_t * vg_ptr, void *arg)
1446 {
1447 	pe_lock_req_t new_lock;
1448 	struct buffer_head *bh;
1449 	uint p;
1450 
1451 	if (vg_ptr == NULL)
1452 		return -ENXIO;
1453 	if (copy_from_user(&new_lock, arg, sizeof(new_lock)) != 0)
1454 		return -EFAULT;
1455 
1456 	switch (new_lock.lock) {
1457 	case LOCK_PE:
1458 		for (p = 0; p < vg_ptr->pv_max; p++) {
1459 			if (vg_ptr->pv[p] != NULL &&
1460 			    new_lock.data.pv_dev == vg_ptr->pv[p]->pv_dev)
1461 				break;
1462 		}
1463 		if (p == vg_ptr->pv_max)
1464 			return -ENXIO;
1465 
1466 		/*
1467 		 * this sync releaves memory pressure to lessen the
1468 		 * likelyhood of pvmove being paged out - resulting in
1469 		 * deadlock.
1470 		 *
1471 		 * This method of doing a pvmove is broken
1472 		 */
1473 		fsync_dev(pe_lock_req.data.lv_dev);
1474 
1475 		down_write(&_pe_lock);
1476 		if (pe_lock_req.lock == LOCK_PE) {
1477 			up_write(&_pe_lock);
1478 			return -EBUSY;
1479 		}
1480 
1481 		/* Should we do to_kdev_t() on the pv_dev and lv_dev??? */
1482 		pe_lock_req.lock = LOCK_PE;
1483 		pe_lock_req.data.lv_dev = new_lock.data.lv_dev;
1484 		pe_lock_req.data.pv_dev = new_lock.data.pv_dev;
1485 		pe_lock_req.data.pv_offset = new_lock.data.pv_offset;
1486 		up_write(&_pe_lock);
1487 
1488 		/* some requests may have got through since the fsync */
1489 		fsync_dev(pe_lock_req.data.pv_dev);
1490 		break;
1491 
1492 	case UNLOCK_PE:
1493 		down_write(&_pe_lock);
1494 		pe_lock_req.lock = UNLOCK_PE;
1495 		pe_lock_req.data.lv_dev = 0;
1496 		pe_lock_req.data.pv_dev = 0;
1497 		pe_lock_req.data.pv_offset = 0;
1498 		bh = _dequeue_io();
1499 		up_write(&_pe_lock);
1500 
1501 		/* handle all deferred io for this PE */
1502 		_flush_io(bh);
1503 		break;
1504 
1505 	default:
1506 		return -EINVAL;
1507 	}
1508 	return 0;
1509 }
1510 
1511 
1512 /*
1513  * character device support function logical extend remap
1514  */
lvm_do_le_remap(vg_t * vg_ptr,void * arg)1515 static int lvm_do_le_remap(vg_t * vg_ptr, void *arg)
1516 {
1517 	uint l, le;
1518 	lv_t *lv_ptr;
1519 
1520 	if (vg_ptr == NULL)
1521 		return -ENXIO;
1522 	if (copy_from_user(&le_remap_req, arg,
1523 			   sizeof(le_remap_req_t)) != 0)
1524 		return -EFAULT;
1525 
1526 	for (l = 0; l < vg_ptr->lv_max; l++) {
1527 		lv_ptr = vg_ptr->lv[l];
1528 
1529 		if (!lv_ptr)
1530 			continue;
1531 
1532 		if (strcmp(lv_ptr->lv_name, le_remap_req.lv_name) == 0) {
1533 			down_write(&lv_ptr->lv_lock);
1534 			for (le = 0; le < lv_ptr->lv_allocated_le; le++) {
1535 				if (lv_ptr->lv_current_pe[le].dev ==
1536 				    le_remap_req.old_dev &&
1537 				    lv_ptr->lv_current_pe[le].pe ==
1538 				    le_remap_req.old_pe) {
1539 					lv_ptr->lv_current_pe[le].dev =
1540 					    le_remap_req.new_dev;
1541 					lv_ptr->lv_current_pe[le].pe =
1542 					    le_remap_req.new_pe;
1543 					__update_hardsectsize(lv_ptr);
1544 					up_write(&lv_ptr->lv_lock);
1545 					return 0;
1546 				}
1547 			}
1548 			up_write(&lv_ptr->lv_lock);
1549 			return -EINVAL;
1550 		}
1551 	}
1552 	return -ENXIO;
1553 }				/* lvm_do_le_remap() */
1554 
1555 
1556 /*
1557  * character device support function VGDA create
1558  */
lvm_do_vg_create(void * arg,int minor)1559 static int lvm_do_vg_create(void *arg, int minor)
1560 {
1561 	int ret = 0;
1562 	ulong l, ls = 0, p, size;
1563 	lv_t lv;
1564 	vg_t *vg_ptr;
1565 	lv_t **snap_lv_ptr;
1566 
1567 	if ((vg_ptr = kmalloc(sizeof(vg_t), GFP_KERNEL)) == NULL) {
1568 		printk(KERN_CRIT
1569 		       "%s -- VG_CREATE: kmalloc error VG at line %d\n",
1570 		       lvm_name, __LINE__);
1571 		return -ENOMEM;
1572 	}
1573 	/* get the volume group structure */
1574 	if (copy_from_user(vg_ptr, arg, sizeof(vg_t)) != 0) {
1575 		P_IOCTL
1576 		    ("lvm_do_vg_create ERROR: copy VG ptr %p (%d bytes)\n",
1577 		     arg, sizeof(vg_t));
1578 		kfree(vg_ptr);
1579 		return -EFAULT;
1580 	}
1581 
1582 	/* VG_CREATE now uses minor number in VG structure */
1583 	if (minor == -1)
1584 		minor = vg_ptr->vg_number;
1585 
1586 	/* check limits */
1587 	if (minor >= ABS_MAX_VG)
1588 		return -EFAULT;
1589 
1590 	/* Validate it */
1591 	if (vg[VG_CHR(minor)] != NULL) {
1592 		P_IOCTL("lvm_do_vg_create ERROR: VG %d in use\n", minor);
1593 		kfree(vg_ptr);
1594 		return -EPERM;
1595 	}
1596 
1597 	/* we are not that active so far... */
1598 	vg_ptr->vg_status &= ~VG_ACTIVE;
1599 	vg_ptr->pe_allocated = 0;
1600 
1601 	if (vg_ptr->pv_max > ABS_MAX_PV) {
1602 		printk(KERN_WARNING
1603 		       "%s -- Can't activate VG: ABS_MAX_PV too small\n",
1604 		       lvm_name);
1605 		kfree(vg_ptr);
1606 		return -EPERM;
1607 	}
1608 
1609 	if (vg_ptr->lv_max > ABS_MAX_LV) {
1610 		printk(KERN_WARNING
1611 		       "%s -- Can't activate VG: ABS_MAX_LV too small for %u\n",
1612 		       lvm_name, vg_ptr->lv_max);
1613 		kfree(vg_ptr);
1614 		return -EPERM;
1615 	}
1616 
1617 	/* create devfs and procfs entries */
1618 	lvm_fs_create_vg(vg_ptr);
1619 
1620 	vg[VG_CHR(minor)] = vg_ptr;
1621 
1622 	/* get the physical volume structures */
1623 	vg_ptr->pv_act = vg_ptr->pv_cur = 0;
1624 	for (p = 0; p < vg_ptr->pv_max; p++) {
1625 		pv_t *pvp;
1626 		/* user space address */
1627 		if ((pvp = vg_ptr->pv[p]) != NULL) {
1628 			ret = lvm_do_pv_create(pvp, vg_ptr, p);
1629 			if (ret != 0) {
1630 				lvm_do_vg_remove(minor);
1631 				return ret;
1632 			}
1633 		}
1634 	}
1635 
1636 	size = vg_ptr->lv_max * sizeof(lv_t *);
1637 	if ((snap_lv_ptr = vmalloc(size)) == NULL) {
1638 		printk(KERN_CRIT
1639 		       "%s -- VG_CREATE: vmalloc error snapshot LVs at line %d\n",
1640 		       lvm_name, __LINE__);
1641 		lvm_do_vg_remove(minor);
1642 		return -EFAULT;
1643 	}
1644 	memset(snap_lv_ptr, 0, size);
1645 
1646 	/* get the logical volume structures */
1647 	vg_ptr->lv_cur = 0;
1648 	for (l = 0; l < vg_ptr->lv_max; l++) {
1649 		lv_t *lvp;
1650 		/* user space address */
1651 		if ((lvp = vg_ptr->lv[l]) != NULL) {
1652 			if (copy_from_user(&lv, lvp, sizeof(lv_t)) != 0) {
1653 				P_IOCTL
1654 				    ("ERROR: copying LV ptr %p (%d bytes)\n",
1655 				     lvp, sizeof(lv_t));
1656 				lvm_do_vg_remove(minor);
1657 				return -EFAULT;
1658 			}
1659 			if (lv.lv_access & LV_SNAPSHOT) {
1660 				snap_lv_ptr[ls] = lvp;
1661 				vg_ptr->lv[l] = NULL;
1662 				ls++;
1663 				continue;
1664 			}
1665 			vg_ptr->lv[l] = NULL;
1666 			/* only create original logical volumes for now */
1667 			if (lvm_do_lv_create(minor, lv.lv_name, &lv) != 0) {
1668 				lvm_do_vg_remove(minor);
1669 				return -EFAULT;
1670 			}
1671 		}
1672 	}
1673 
1674 	/* Second path to correct snapshot logical volumes which are not
1675 	   in place during first path above */
1676 	for (l = 0; l < ls; l++) {
1677 		lv_t *lvp = snap_lv_ptr[l];
1678 		if (copy_from_user(&lv, lvp, sizeof(lv_t)) != 0) {
1679 			lvm_do_vg_remove(minor);
1680 			return -EFAULT;
1681 		}
1682 		if (lvm_do_lv_create(minor, lv.lv_name, &lv) != 0) {
1683 			lvm_do_vg_remove(minor);
1684 			return -EFAULT;
1685 		}
1686 	}
1687 
1688 	vfree(snap_lv_ptr);
1689 
1690 	vg_count++;
1691 
1692 
1693 	MOD_INC_USE_COUNT;
1694 
1695 	/* let's go active */
1696 	vg_ptr->vg_status |= VG_ACTIVE;
1697 
1698 	return 0;
1699 }				/* lvm_do_vg_create() */
1700 
1701 
1702 /*
1703  * character device support function VGDA extend
1704  */
lvm_do_vg_extend(vg_t * vg_ptr,void * arg)1705 static int lvm_do_vg_extend(vg_t * vg_ptr, void *arg)
1706 {
1707 	int ret = 0;
1708 	uint p;
1709 	pv_t *pv_ptr;
1710 
1711 	if (vg_ptr == NULL)
1712 		return -ENXIO;
1713 	if (vg_ptr->pv_cur < vg_ptr->pv_max) {
1714 		for (p = 0; p < vg_ptr->pv_max; p++) {
1715 			if ((pv_ptr = vg_ptr->pv[p]) == NULL) {
1716 				ret = lvm_do_pv_create(arg, vg_ptr, p);
1717 				if (ret != 0)
1718 					return ret;
1719 				pv_ptr = vg_ptr->pv[p];
1720 				vg_ptr->pe_total += pv_ptr->pe_total;
1721 				return 0;
1722 			}
1723 		}
1724 	}
1725 	return -EPERM;
1726 }				/* lvm_do_vg_extend() */
1727 
1728 
1729 /*
1730  * character device support function VGDA reduce
1731  */
lvm_do_vg_reduce(vg_t * vg_ptr,void * arg)1732 static int lvm_do_vg_reduce(vg_t * vg_ptr, void *arg)
1733 {
1734 	uint p;
1735 	pv_t *pv_ptr;
1736 
1737 	if (vg_ptr == NULL)
1738 		return -ENXIO;
1739 	if (copy_from_user(pv_name, arg, sizeof(pv_name)) != 0)
1740 		return -EFAULT;
1741 
1742 	for (p = 0; p < vg_ptr->pv_max; p++) {
1743 		pv_ptr = vg_ptr->pv[p];
1744 		if (pv_ptr != NULL &&
1745 		    strcmp(pv_ptr->pv_name, pv_name) == 0) {
1746 			if (pv_ptr->lv_cur > 0)
1747 				return -EPERM;
1748 			lvm_do_pv_remove(vg_ptr, p);
1749 			/* Make PV pointer array contiguous */
1750 			for (; p < vg_ptr->pv_max - 1; p++)
1751 				vg_ptr->pv[p] = vg_ptr->pv[p + 1];
1752 			vg_ptr->pv[p + 1] = NULL;
1753 			return 0;
1754 		}
1755 	}
1756 	return -ENXIO;
1757 }				/* lvm_do_vg_reduce */
1758 
1759 
1760 /*
1761  * character device support function VG rename
1762  */
lvm_do_vg_rename(vg_t * vg_ptr,void * arg)1763 static int lvm_do_vg_rename(vg_t * vg_ptr, void *arg)
1764 {
1765 	int l = 0, p = 0, len = 0;
1766 	char vg_name[NAME_LEN] = { 0, };
1767 	char lv_name[NAME_LEN] = { 0, };
1768 	char *ptr = NULL;
1769 	lv_t *lv_ptr = NULL;
1770 	pv_t *pv_ptr = NULL;
1771 
1772 	/* If the VG doesn't exist in the kernel then just exit */
1773 	if (!vg_ptr)
1774 		return 0;
1775 
1776 	if (copy_from_user(vg_name, arg, sizeof(vg_name)) != 0)
1777 		return -EFAULT;
1778 
1779 	lvm_fs_remove_vg(vg_ptr);
1780 
1781 	strncpy(vg_ptr->vg_name, vg_name, sizeof(vg_name) - 1);
1782 	for (l = 0; l < vg_ptr->lv_max; l++) {
1783 		if ((lv_ptr = vg_ptr->lv[l]) == NULL)
1784 			continue;
1785 		memset(lv_ptr->vg_name, 0, sizeof(*vg_name));
1786 		strncpy(lv_ptr->vg_name, vg_name, sizeof(vg_name));
1787 		ptr = strrchr(lv_ptr->lv_name, '/');
1788 		ptr = ptr ? ptr + 1 : lv_ptr->lv_name;
1789 		strncpy(lv_name, ptr, sizeof(lv_name));
1790 		len = sizeof(LVM_DIR_PREFIX);
1791 		strcpy(lv_ptr->lv_name, LVM_DIR_PREFIX);
1792 		strncat(lv_ptr->lv_name, vg_name, NAME_LEN - len);
1793 		strcat(lv_ptr->lv_name, "/");
1794 		len += strlen(vg_name) + 1;
1795 		strncat(lv_ptr->lv_name, lv_name, NAME_LEN - len);
1796 	}
1797 	for (p = 0; p < vg_ptr->pv_max; p++) {
1798 		if ((pv_ptr = vg_ptr->pv[p]) == NULL)
1799 			continue;
1800 		strncpy(pv_ptr->vg_name, vg_name, NAME_LEN);
1801 	}
1802 
1803 	lvm_fs_create_vg(vg_ptr);
1804 
1805 	/* Need to add PV entries */
1806 	for (p = 0; p < vg_ptr->pv_act; p++) {
1807 		pv_t *pv_ptr = vg_ptr->pv[p];
1808 
1809 		if (pv_ptr)
1810 			lvm_fs_create_pv(vg_ptr, pv_ptr);
1811 	}
1812 
1813 	/* Need to add LV entries */
1814 	for (l = 0; l < vg_ptr->lv_max; l++) {
1815 		lv_t *lv_ptr = vg_ptr->lv[l];
1816 
1817 		if (!lv_ptr)
1818 			continue;
1819 
1820 		lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].de =
1821 		    lvm_fs_create_lv(vg_ptr, lv_ptr);
1822 	}
1823 
1824 	return 0;
1825 }				/* lvm_do_vg_rename */
1826 
1827 
1828 /*
1829  * character device support function VGDA remove
1830  */
lvm_do_vg_remove(int minor)1831 static int lvm_do_vg_remove(int minor)
1832 {
1833 	int i;
1834 	vg_t *vg_ptr = vg[VG_CHR(minor)];
1835 	pv_t *pv_ptr;
1836 
1837 	if (vg_ptr == NULL)
1838 		return -ENXIO;
1839 
1840 #ifdef LVM_TOTAL_RESET
1841 	if (vg_ptr->lv_open > 0 && lvm_reset_spindown == 0)
1842 #else
1843 	if (vg_ptr->lv_open > 0)
1844 #endif
1845 		return -EPERM;
1846 
1847 	/* let's go inactive */
1848 	vg_ptr->vg_status &= ~VG_ACTIVE;
1849 
1850 	/* remove from procfs and devfs */
1851 	lvm_fs_remove_vg(vg_ptr);
1852 
1853 	/* free LVs */
1854 	/* first free snapshot logical volumes */
1855 	for (i = 0; i < vg_ptr->lv_max; i++) {
1856 		if (vg_ptr->lv[i] != NULL &&
1857 		    vg_ptr->lv[i]->lv_access & LV_SNAPSHOT) {
1858 			lvm_do_lv_remove(minor, NULL, i);
1859 			current->state = TASK_UNINTERRUPTIBLE;
1860 			schedule_timeout(1);
1861 		}
1862 	}
1863 	/* then free the rest of the LVs */
1864 	for (i = 0; i < vg_ptr->lv_max; i++) {
1865 		if (vg_ptr->lv[i] != NULL) {
1866 			lvm_do_lv_remove(minor, NULL, i);
1867 			current->state = TASK_UNINTERRUPTIBLE;
1868 			schedule_timeout(1);
1869 		}
1870 	}
1871 
1872 	/* free PVs */
1873 	for (i = 0; i < vg_ptr->pv_max; i++) {
1874 		if ((pv_ptr = vg_ptr->pv[i]) != NULL) {
1875 			P_KFREE("%s -- kfree %d\n", lvm_name, __LINE__);
1876 			lvm_do_pv_remove(vg_ptr, i);
1877 		}
1878 	}
1879 
1880 	P_KFREE("%s -- kfree %d\n", lvm_name, __LINE__);
1881 	kfree(vg_ptr);
1882 	vg[VG_CHR(minor)] = NULL;
1883 
1884 	vg_count--;
1885 
1886 	MOD_DEC_USE_COUNT;
1887 
1888 	return 0;
1889 }				/* lvm_do_vg_remove() */
1890 
1891 
1892 /*
1893  * character device support function physical volume create
1894  */
lvm_do_pv_create(pv_t * pvp,vg_t * vg_ptr,ulong p)1895 static int lvm_do_pv_create(pv_t * pvp, vg_t * vg_ptr, ulong p)
1896 {
1897 	pv_t *pv;
1898 	int err;
1899 
1900 	if (!vg_ptr)
1901 		return -ENXIO;
1902 
1903 	pv = kmalloc(sizeof(pv_t), GFP_KERNEL);
1904 	if (pv == NULL) {
1905 		printk(KERN_CRIT
1906 		       "%s -- PV_CREATE: kmalloc error PV at line %d\n",
1907 		       lvm_name, __LINE__);
1908 		return -ENOMEM;
1909 	}
1910 
1911 	memset(pv, 0, sizeof(*pv));
1912 
1913 	if (copy_from_user(pv, pvp, sizeof(pv_t)) != 0) {
1914 		P_IOCTL
1915 		    ("lvm_do_pv_create ERROR: copy PV ptr %p (%d bytes)\n",
1916 		     pvp, sizeof(pv_t));
1917 		kfree(pv);
1918 		return -EFAULT;
1919 	}
1920 
1921 	if ((err = _open_pv(pv))) {
1922 		kfree(pv);
1923 		return err;
1924 	}
1925 
1926 	/* We don't need the PE list
1927 	   in kernel space as with LVs pe_t list (see below) */
1928 	pv->pe = NULL;
1929 	pv->pe_allocated = 0;
1930 	pv->pv_status = PV_ACTIVE;
1931 	vg_ptr->pv_act++;
1932 	vg_ptr->pv_cur++;
1933 	lvm_fs_create_pv(vg_ptr, pv);
1934 
1935 	vg_ptr->pv[p] = pv;
1936 	return 0;
1937 }				/* lvm_do_pv_create() */
1938 
1939 
1940 /*
1941  * character device support function physical volume remove
1942  */
lvm_do_pv_remove(vg_t * vg_ptr,ulong p)1943 static int lvm_do_pv_remove(vg_t * vg_ptr, ulong p)
1944 {
1945 	pv_t *pv = vg_ptr->pv[p];
1946 
1947 	lvm_fs_remove_pv(vg_ptr, pv);
1948 
1949 	vg_ptr->pe_total -= pv->pe_total;
1950 	vg_ptr->pv_cur--;
1951 	vg_ptr->pv_act--;
1952 
1953 	_close_pv(pv);
1954 	kfree(pv);
1955 
1956 	vg_ptr->pv[p] = NULL;
1957 
1958 	return 0;
1959 }
1960 
1961 
__update_hardsectsize(lv_t * lv)1962 static void __update_hardsectsize(lv_t * lv)
1963 {
1964 	int max_hardsectsize = 0, hardsectsize = 0;
1965 	int p;
1966 
1967 	/* Check PVs first to see if they all have same sector size */
1968 	for (p = 0; p < lv->vg->pv_cur; p++) {
1969 		pv_t *pv = lv->vg->pv[p];
1970 		if (pv && (hardsectsize = lvm_sectsize(pv->pv_dev))) {
1971 			if (max_hardsectsize == 0)
1972 				max_hardsectsize = hardsectsize;
1973 			else if (hardsectsize != max_hardsectsize) {
1974 				P_DEV
1975 				    ("%s PV[%d] (%s) sector size %d, not %d\n",
1976 				     lv->lv_name, p, kdevname(pv->pv_dev),
1977 				     hardsectsize, max_hardsectsize);
1978 				break;
1979 			}
1980 		}
1981 	}
1982 
1983 	/* PVs have different block size, need to check each LE sector size */
1984 	if (hardsectsize != max_hardsectsize) {
1985 		int le;
1986 		for (le = 0; le < lv->lv_allocated_le; le++) {
1987 			hardsectsize =
1988 			    lvm_sectsize(lv->lv_current_pe[le].dev);
1989 			if (hardsectsize > max_hardsectsize) {
1990 				P_DEV
1991 				    ("%s LE[%d] (%s) blocksize %d not %d\n",
1992 				     lv->lv_name, le,
1993 				     kdevname(lv->lv_current_pe[le].dev),
1994 				     hardsectsize, max_hardsectsize);
1995 				max_hardsectsize = hardsectsize;
1996 			}
1997 		}
1998 
1999 		/* only perform this operation on active snapshots */
2000 		if ((lv->lv_access & LV_SNAPSHOT) &&
2001 		    (lv->lv_status & LV_ACTIVE)) {
2002 			int e;
2003 			for (e = 0; e < lv->lv_remap_end; e++) {
2004 				hardsectsize =
2005 				    lvm_sectsize(lv->lv_block_exception[e].
2006 						 rdev_new);
2007 				if (hardsectsize > max_hardsectsize)
2008 					max_hardsectsize = hardsectsize;
2009 			}
2010 		}
2011 	}
2012 
2013 	if (max_hardsectsize == 0)
2014 		max_hardsectsize = SECTOR_SIZE;
2015 	P_DEV("hardblocksize for LV %s is %d\n",
2016 	      kdevname(lv->lv_dev), max_hardsectsize);
2017 	lvm_hardsectsizes[MINOR(lv->lv_dev)] = max_hardsectsize;
2018 }
2019 
2020 /*
2021  * character device support function logical volume create
2022  */
lvm_do_lv_create(int minor,char * lv_name,lv_t * lv)2023 static int lvm_do_lv_create(int minor, char *lv_name, lv_t * lv)
2024 {
2025 	int e, ret, l, le, l_new, p, size, activate = 1;
2026 	ulong lv_status_save;
2027 	lv_block_exception_t *lvbe = lv->lv_block_exception;
2028 	vg_t *vg_ptr = vg[VG_CHR(minor)];
2029 	lv_t *lv_ptr = NULL;
2030 	pe_t *pep;
2031 
2032 	if (!(pep = lv->lv_current_pe))
2033 		return -EINVAL;
2034 
2035 	if (_sectors_to_k(lv->lv_chunk_size) > LVM_SNAPSHOT_MAX_CHUNK)
2036 		return -EINVAL;
2037 
2038 	for (l = 0; l < vg_ptr->lv_cur; l++) {
2039 		if (vg_ptr->lv[l] != NULL &&
2040 		    strcmp(vg_ptr->lv[l]->lv_name, lv_name) == 0)
2041 			return -EEXIST;
2042 	}
2043 
2044 	/* in case of lv_remove(), lv_create() pair */
2045 	l_new = -1;
2046 	if (vg_ptr->lv[lv->lv_number] == NULL)
2047 		l_new = lv->lv_number;
2048 	else {
2049 		for (l = 0; l < vg_ptr->lv_max; l++) {
2050 			if (vg_ptr->lv[l] == NULL)
2051 				if (l_new == -1)
2052 					l_new = l;
2053 		}
2054 	}
2055 	if (l_new == -1)
2056 		return -EPERM;
2057 	else
2058 		l = l_new;
2059 
2060 	if ((lv_ptr = kmalloc(sizeof(lv_t), GFP_KERNEL)) == NULL) {;
2061 		printk(KERN_CRIT
2062 		       "%s -- LV_CREATE: kmalloc error LV at line %d\n",
2063 		       lvm_name, __LINE__);
2064 		return -ENOMEM;
2065 	}
2066 	/* copy preloaded LV */
2067 	memcpy((char *) lv_ptr, (char *) lv, sizeof(lv_t));
2068 
2069 	lv_status_save = lv_ptr->lv_status;
2070 	lv_ptr->lv_status &= ~LV_ACTIVE;
2071 	lv_ptr->lv_snapshot_org = NULL;
2072 	lv_ptr->lv_snapshot_prev = NULL;
2073 	lv_ptr->lv_snapshot_next = NULL;
2074 	lv_ptr->lv_block_exception = NULL;
2075 	lv_ptr->lv_iobuf = NULL;
2076 	lv_ptr->lv_COW_table_iobuf = NULL;
2077 	lv_ptr->lv_snapshot_hash_table = NULL;
2078 	lv_ptr->lv_snapshot_hash_table_size = 0;
2079 	lv_ptr->lv_snapshot_hash_mask = 0;
2080 	init_rwsem(&lv_ptr->lv_lock);
2081 
2082 	lv_ptr->lv_snapshot_use_rate = 0;
2083 
2084 	vg_ptr->lv[l] = lv_ptr;
2085 
2086 	/* get the PE structures from user space if this
2087 	   is not a snapshot logical volume */
2088 	if (!(lv_ptr->lv_access & LV_SNAPSHOT)) {
2089 		size = lv_ptr->lv_allocated_le * sizeof(pe_t);
2090 
2091 		if ((lv_ptr->lv_current_pe = vmalloc(size)) == NULL) {
2092 			printk(KERN_CRIT
2093 			       "%s -- LV_CREATE: vmalloc error LV_CURRENT_PE of %d Byte "
2094 			       "at line %d\n", lvm_name, size, __LINE__);
2095 			P_KFREE("%s -- kfree %d\n", lvm_name, __LINE__);
2096 			kfree(lv_ptr);
2097 			vg_ptr->lv[l] = NULL;
2098 			return -ENOMEM;
2099 		}
2100 		if (copy_from_user(lv_ptr->lv_current_pe, pep, size)) {
2101 			P_IOCTL("ERROR: copying PE ptr %p (%d bytes)\n",
2102 				pep, sizeof(size));
2103 			vfree(lv_ptr->lv_current_pe);
2104 			kfree(lv_ptr);
2105 			vg_ptr->lv[l] = NULL;
2106 			return -EFAULT;
2107 		}
2108 		/* correct the PE count in PVs */
2109 		for (le = 0; le < lv_ptr->lv_allocated_le; le++) {
2110 			vg_ptr->pe_allocated++;
2111 			for (p = 0; p < vg_ptr->pv_cur; p++) {
2112 				if (vg_ptr->pv[p]->pv_dev ==
2113 				    lv_ptr->lv_current_pe[le].dev)
2114 					vg_ptr->pv[p]->pe_allocated++;
2115 			}
2116 		}
2117 	} else {
2118 		/* Get snapshot exception data and block list */
2119 		if (lvbe != NULL) {
2120 			lv_ptr->lv_snapshot_org =
2121 			    vg_ptr->lv[LV_BLK(lv_ptr->lv_snapshot_minor)];
2122 			if (lv_ptr->lv_snapshot_org != NULL) {
2123 				size =
2124 				    lv_ptr->lv_remap_end *
2125 				    sizeof(lv_block_exception_t);
2126 
2127 				if (!size) {
2128 					printk(KERN_WARNING
2129 					       "%s -- zero length exception table requested\n",
2130 					       lvm_name);
2131 					kfree(lv_ptr);
2132 					return -EINVAL;
2133 				}
2134 
2135 				if ((lv_ptr->lv_block_exception =
2136 				     vmalloc(size)) == NULL) {
2137 					printk(KERN_CRIT
2138 					       "%s -- lvm_do_lv_create: vmalloc error LV_BLOCK_EXCEPTION "
2139 					       "of %d byte at line %d\n",
2140 					       lvm_name, size, __LINE__);
2141 					P_KFREE("%s -- kfree %d\n",
2142 						lvm_name, __LINE__);
2143 					kfree(lv_ptr);
2144 					vg_ptr->lv[l] = NULL;
2145 					return -ENOMEM;
2146 				}
2147 				if (copy_from_user
2148 				    (lv_ptr->lv_block_exception, lvbe,
2149 				     size)) {
2150 					vfree(lv_ptr->lv_block_exception);
2151 					kfree(lv_ptr);
2152 					vg_ptr->lv[l] = NULL;
2153 					return -EFAULT;
2154 				}
2155 
2156 				if (lv_ptr->lv_block_exception[0].
2157 				    rsector_org ==
2158 				    LVM_SNAPSHOT_DROPPED_SECTOR) {
2159 					printk(KERN_WARNING
2160 					       "%s -- lvm_do_lv_create: snapshot has been dropped and will not be activated\n",
2161 					       lvm_name);
2162 					activate = 0;
2163 				}
2164 
2165 				/* point to the original logical volume */
2166 				lv_ptr = lv_ptr->lv_snapshot_org;
2167 
2168 				lv_ptr->lv_snapshot_minor = 0;
2169 				lv_ptr->lv_snapshot_org = lv_ptr;
2170 				/* our new one now back points to the previous last in the chain
2171 				   which can be the original logical volume */
2172 				lv_ptr = vg_ptr->lv[l];
2173 				/* now lv_ptr points to our new last snapshot logical volume */
2174 				lv_ptr->lv_current_pe =
2175 				    lv_ptr->lv_snapshot_org->lv_current_pe;
2176 				lv_ptr->lv_allocated_snapshot_le =
2177 				    lv_ptr->lv_allocated_le;
2178 				lv_ptr->lv_allocated_le =
2179 				    lv_ptr->lv_snapshot_org->
2180 				    lv_allocated_le;
2181 				lv_ptr->lv_current_le =
2182 				    lv_ptr->lv_snapshot_org->lv_current_le;
2183 				lv_ptr->lv_size =
2184 				    lv_ptr->lv_snapshot_org->lv_size;
2185 				lv_ptr->lv_stripes =
2186 				    lv_ptr->lv_snapshot_org->lv_stripes;
2187 				lv_ptr->lv_stripesize =
2188 				    lv_ptr->lv_snapshot_org->lv_stripesize;
2189 
2190 				if ((ret =
2191 				     lvm_snapshot_alloc(lv_ptr)) != 0) {
2192 					vfree(lv_ptr->lv_block_exception);
2193 					kfree(lv_ptr);
2194 					vg_ptr->lv[l] = NULL;
2195 					return ret;
2196 				}
2197 				/* Update the VG PE(s) used by snapshot reserve space. */
2198 				vg_ptr->pe_allocated +=
2199 				    lv_ptr->lv_allocated_snapshot_le;
2200 
2201 				for (e = 0; e < lv_ptr->lv_remap_ptr; e++)
2202 					lvm_hash_link(lv_ptr->
2203 						      lv_block_exception +
2204 						      e,
2205 						      lv_ptr->
2206 						      lv_block_exception
2207 						      [e].rdev_org,
2208 						      lv_ptr->
2209 						      lv_block_exception
2210 						      [e].rsector_org,
2211 						      lv_ptr);
2212 				/* need to fill the COW exception table data
2213 				   into the page for disk i/o */
2214 				if (lvm_snapshot_fill_COW_page
2215 				    (vg_ptr, lv_ptr)) {
2216 					kfree(lv_ptr);
2217 					vg_ptr->lv[l] = NULL;
2218 					return -EINVAL;
2219 				}
2220 				init_waitqueue_head(&lv_ptr->
2221 						    lv_snapshot_wait);
2222 			} else {
2223 				kfree(lv_ptr);
2224 				vg_ptr->lv[l] = NULL;
2225 				return -EFAULT;
2226 			}
2227 		} else {
2228 			kfree(vg_ptr->lv[l]);
2229 			vg_ptr->lv[l] = NULL;
2230 			return -EINVAL;
2231 		}
2232 	}			/* if ( vg[VG_CHR(minor)]->lv[l]->lv_access & LV_SNAPSHOT) */
2233 
2234 	lv_ptr = vg_ptr->lv[l];
2235 	lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].start_sect = 0;
2236 	lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].nr_sects = lv_ptr->lv_size;
2237 	lvm_size[MINOR(lv_ptr->lv_dev)] = lv_ptr->lv_size >> 1;
2238 	vg_lv_map[MINOR(lv_ptr->lv_dev)].vg_number = vg_ptr->vg_number;
2239 	vg_lv_map[MINOR(lv_ptr->lv_dev)].lv_number = lv_ptr->lv_number;
2240 	LVM_CORRECT_READ_AHEAD(lv_ptr->lv_read_ahead);
2241 	vg_ptr->lv_cur++;
2242 	lv_ptr->lv_status = lv_status_save;
2243 	lv_ptr->vg = vg_ptr;
2244 
2245 	__update_hardsectsize(lv_ptr);
2246 
2247 	/* optionally add our new snapshot LV */
2248 	if (lv_ptr->lv_access & LV_SNAPSHOT) {
2249 		lv_t *org = lv_ptr->lv_snapshot_org, *last;
2250 
2251 		/* sync the original logical volume */
2252 		fsync_dev(org->lv_dev);
2253 #ifdef	LVM_VFS_ENHANCEMENT
2254 		/* VFS function call to sync and lock the filesystem */
2255 		fsync_dev_lockfs(org->lv_dev);
2256 #endif
2257 
2258 		down_write(&org->lv_lock);
2259 		org->lv_access |= LV_SNAPSHOT_ORG;
2260 		lv_ptr->lv_access &= ~LV_SNAPSHOT_ORG;	/* this can only hide an userspace bug */
2261 
2262 
2263 		/* Link in the list of snapshot volumes */
2264 		for (last = org; last->lv_snapshot_next;
2265 		     last = last->lv_snapshot_next);
2266 		lv_ptr->lv_snapshot_prev = last;
2267 		last->lv_snapshot_next = lv_ptr;
2268 		up_write(&org->lv_lock);
2269 	}
2270 
2271 	/* activate the logical volume */
2272 	if (activate)
2273 		lv_ptr->lv_status |= LV_ACTIVE;
2274 	else
2275 		lv_ptr->lv_status &= ~LV_ACTIVE;
2276 
2277 	if (lv_ptr->lv_access & LV_WRITE)
2278 		set_device_ro(lv_ptr->lv_dev, 0);
2279 	else
2280 		set_device_ro(lv_ptr->lv_dev, 1);
2281 
2282 #ifdef	LVM_VFS_ENHANCEMENT
2283 /* VFS function call to unlock the filesystem */
2284 	if (lv_ptr->lv_access & LV_SNAPSHOT)
2285 		unlockfs(lv_ptr->lv_snapshot_org->lv_dev);
2286 #endif
2287 
2288 	lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].de =
2289 	    lvm_fs_create_lv(vg_ptr, lv_ptr);
2290 	return 0;
2291 }				/* lvm_do_lv_create() */
2292 
2293 
2294 /*
2295  * character device support function logical volume remove
2296  */
lvm_do_lv_remove(int minor,char * lv_name,int l)2297 static int lvm_do_lv_remove(int minor, char *lv_name, int l)
2298 {
2299 	uint le, p;
2300 	vg_t *vg_ptr = vg[VG_CHR(minor)];
2301 	lv_t *lv_ptr;
2302 
2303 	if (!vg_ptr)
2304 		return -ENXIO;
2305 
2306 	if (l == -1) {
2307 		for (l = 0; l < vg_ptr->lv_max; l++) {
2308 			if (vg_ptr->lv[l] != NULL &&
2309 			    strcmp(vg_ptr->lv[l]->lv_name, lv_name) == 0) {
2310 				break;
2311 			}
2312 		}
2313 	}
2314 	if (l == vg_ptr->lv_max)
2315 		return -ENXIO;
2316 
2317 	lv_ptr = vg_ptr->lv[l];
2318 #ifdef LVM_TOTAL_RESET
2319 	if (lv_ptr->lv_open > 0 && lvm_reset_spindown == 0)
2320 #else
2321 	if (lv_ptr->lv_open > 0)
2322 #endif
2323 		return -EBUSY;
2324 
2325 	/* check for deletion of snapshot source while
2326 	   snapshot volume still exists */
2327 	if ((lv_ptr->lv_access & LV_SNAPSHOT_ORG) &&
2328 	    lv_ptr->lv_snapshot_next != NULL)
2329 		return -EPERM;
2330 
2331 	lvm_fs_remove_lv(vg_ptr, lv_ptr);
2332 
2333 	if (lv_ptr->lv_access & LV_SNAPSHOT) {
2334 		/*
2335 		 * Atomically make the the snapshot invisible
2336 		 * to the original lv before playing with it.
2337 		 */
2338 		lv_t *org = lv_ptr->lv_snapshot_org;
2339 		down_write(&org->lv_lock);
2340 
2341 		/* remove this snapshot logical volume from the chain */
2342 		lv_ptr->lv_snapshot_prev->lv_snapshot_next =
2343 		    lv_ptr->lv_snapshot_next;
2344 		if (lv_ptr->lv_snapshot_next != NULL) {
2345 			lv_ptr->lv_snapshot_next->lv_snapshot_prev =
2346 			    lv_ptr->lv_snapshot_prev;
2347 		}
2348 
2349 		/* no more snapshots? */
2350 		if (!org->lv_snapshot_next) {
2351 			org->lv_access &= ~LV_SNAPSHOT_ORG;
2352 		}
2353 		up_write(&org->lv_lock);
2354 
2355 		lvm_snapshot_release(lv_ptr);
2356 
2357 		/* Update the VG PE(s) used by snapshot reserve space. */
2358 		vg_ptr->pe_allocated -= lv_ptr->lv_allocated_snapshot_le;
2359 	}
2360 
2361 	lv_ptr->lv_status |= LV_SPINDOWN;
2362 
2363 	/* sync the buffers */
2364 	fsync_dev(lv_ptr->lv_dev);
2365 
2366 	lv_ptr->lv_status &= ~LV_ACTIVE;
2367 
2368 	/* invalidate the buffers */
2369 	invalidate_buffers(lv_ptr->lv_dev);
2370 
2371 	/* reset generic hd */
2372 	lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].start_sect = -1;
2373 	lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].nr_sects = 0;
2374 	lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].de = 0;
2375 	lvm_size[MINOR(lv_ptr->lv_dev)] = 0;
2376 
2377 	/* reset VG/LV mapping */
2378 	vg_lv_map[MINOR(lv_ptr->lv_dev)].vg_number = ABS_MAX_VG;
2379 	vg_lv_map[MINOR(lv_ptr->lv_dev)].lv_number = -1;
2380 
2381 	/* correct the PE count in PVs if this is not a snapshot
2382 	   logical volume */
2383 	if (!(lv_ptr->lv_access & LV_SNAPSHOT)) {
2384 		/* only if this is no snapshot logical volume because
2385 		   we share the lv_current_pe[] structs with the
2386 		   original logical volume */
2387 		for (le = 0; le < lv_ptr->lv_allocated_le; le++) {
2388 			vg_ptr->pe_allocated--;
2389 			for (p = 0; p < vg_ptr->pv_cur; p++) {
2390 				if (vg_ptr->pv[p]->pv_dev ==
2391 				    lv_ptr->lv_current_pe[le].dev)
2392 					vg_ptr->pv[p]->pe_allocated--;
2393 			}
2394 		}
2395 		vfree(lv_ptr->lv_current_pe);
2396 	}
2397 
2398 	P_KFREE("%s -- kfree %d\n", lvm_name, __LINE__);
2399 	kfree(lv_ptr);
2400 	vg_ptr->lv[l] = NULL;
2401 	vg_ptr->lv_cur--;
2402 	return 0;
2403 }				/* lvm_do_lv_remove() */
2404 
2405 
2406 /*
2407  * logical volume extend / reduce
2408  */
__extend_reduce_snapshot(vg_t * vg_ptr,lv_t * old_lv,lv_t * new_lv)2409 static int __extend_reduce_snapshot(vg_t * vg_ptr, lv_t * old_lv,
2410 				    lv_t * new_lv)
2411 {
2412 	ulong size;
2413 	lv_block_exception_t *lvbe;
2414 
2415 	if (!new_lv->lv_block_exception)
2416 		return -ENXIO;
2417 
2418 	size = new_lv->lv_remap_end * sizeof(lv_block_exception_t);
2419 	if ((lvbe = vmalloc(size)) == NULL) {
2420 		printk(KERN_CRIT
2421 		       "%s -- lvm_do_lv_extend_reduce: vmalloc "
2422 		       "error LV_BLOCK_EXCEPTION of %lu Byte at line %d\n",
2423 		       lvm_name, size, __LINE__);
2424 		return -ENOMEM;
2425 	}
2426 
2427 	if ((new_lv->lv_remap_end > old_lv->lv_remap_end) &&
2428 	    (copy_from_user(lvbe, new_lv->lv_block_exception, size))) {
2429 		vfree(lvbe);
2430 		return -EFAULT;
2431 	}
2432 	new_lv->lv_block_exception = lvbe;
2433 
2434 	if (lvm_snapshot_alloc_hash_table(new_lv)) {
2435 		vfree(new_lv->lv_block_exception);
2436 		return -ENOMEM;
2437 	}
2438 
2439 	return 0;
2440 }
2441 
__extend_reduce(vg_t * vg_ptr,lv_t * old_lv,lv_t * new_lv)2442 static int __extend_reduce(vg_t * vg_ptr, lv_t * old_lv, lv_t * new_lv)
2443 {
2444 	ulong size, l, p, end;
2445 	pe_t *pe;
2446 
2447 	/* allocate space for new pe structures */
2448 	size = new_lv->lv_current_le * sizeof(pe_t);
2449 	if ((pe = vmalloc(size)) == NULL) {
2450 		printk(KERN_CRIT
2451 		       "%s -- lvm_do_lv_extend_reduce: "
2452 		       "vmalloc error LV_CURRENT_PE of %lu Byte at line %d\n",
2453 		       lvm_name, size, __LINE__);
2454 		return -ENOMEM;
2455 	}
2456 
2457 	/* get the PE structures from user space */
2458 	if (copy_from_user(pe, new_lv->lv_current_pe, size)) {
2459 		if (old_lv->lv_access & LV_SNAPSHOT)
2460 			vfree(new_lv->lv_snapshot_hash_table);
2461 		vfree(pe);
2462 		return -EFAULT;
2463 	}
2464 
2465 	new_lv->lv_current_pe = pe;
2466 
2467 	/* reduce allocation counters on PV(s) */
2468 	for (l = 0; l < old_lv->lv_allocated_le; l++) {
2469 		vg_ptr->pe_allocated--;
2470 		for (p = 0; p < vg_ptr->pv_cur; p++) {
2471 			if (vg_ptr->pv[p]->pv_dev ==
2472 			    old_lv->lv_current_pe[l].dev) {
2473 				vg_ptr->pv[p]->pe_allocated--;
2474 				break;
2475 			}
2476 		}
2477 	}
2478 
2479 	/* extend the PE count in PVs */
2480 	for (l = 0; l < new_lv->lv_allocated_le; l++) {
2481 		vg_ptr->pe_allocated++;
2482 		for (p = 0; p < vg_ptr->pv_cur; p++) {
2483 			if (vg_ptr->pv[p]->pv_dev ==
2484 			    new_lv->lv_current_pe[l].dev) {
2485 				vg_ptr->pv[p]->pe_allocated++;
2486 				break;
2487 			}
2488 		}
2489 	}
2490 
2491 	/* save available i/o statistic data */
2492 	if (old_lv->lv_stripes < 2) {	/* linear logical volume */
2493 		end = min(old_lv->lv_current_le, new_lv->lv_current_le);
2494 		for (l = 0; l < end; l++) {
2495 			new_lv->lv_current_pe[l].reads +=
2496 			    old_lv->lv_current_pe[l].reads;
2497 
2498 			new_lv->lv_current_pe[l].writes +=
2499 			    old_lv->lv_current_pe[l].writes;
2500 		}
2501 
2502 	} else {		/* striped logical volume */
2503 		uint i, j, source, dest, end, old_stripe_size,
2504 		    new_stripe_size;
2505 
2506 		old_stripe_size =
2507 		    old_lv->lv_allocated_le / old_lv->lv_stripes;
2508 		new_stripe_size =
2509 		    new_lv->lv_allocated_le / new_lv->lv_stripes;
2510 		end = min(old_stripe_size, new_stripe_size);
2511 
2512 		for (i = source = dest = 0; i < new_lv->lv_stripes; i++) {
2513 			for (j = 0; j < end; j++) {
2514 				new_lv->lv_current_pe[dest + j].reads +=
2515 				    old_lv->lv_current_pe[source +
2516 							  j].reads;
2517 				new_lv->lv_current_pe[dest + j].writes +=
2518 				    old_lv->lv_current_pe[source +
2519 							  j].writes;
2520 			}
2521 			source += old_stripe_size;
2522 			dest += new_stripe_size;
2523 		}
2524 	}
2525 
2526 	return 0;
2527 }
2528 
lvm_do_lv_extend_reduce(int minor,char * lv_name,lv_t * new_lv)2529 static int lvm_do_lv_extend_reduce(int minor, char *lv_name, lv_t * new_lv)
2530 {
2531 	int r;
2532 	ulong l, e, size;
2533 	vg_t *vg_ptr = vg[VG_CHR(minor)];
2534 	lv_t *old_lv;
2535 	pe_t *pe;
2536 
2537 	if (!vg_ptr)
2538 		return -ENXIO;
2539 
2540 	if ((pe = new_lv->lv_current_pe) == NULL)
2541 		return -EINVAL;
2542 
2543 	for (l = 0; l < vg_ptr->lv_max; l++)
2544 		if (vg_ptr->lv[l]
2545 		    && !strcmp(vg_ptr->lv[l]->lv_name, lv_name))
2546 			break;
2547 
2548 	if (l == vg_ptr->lv_max)
2549 		return -ENXIO;
2550 
2551 	old_lv = vg_ptr->lv[l];
2552 
2553 	if (old_lv->lv_access & LV_SNAPSHOT) {
2554 		/* only perform this operation on active snapshots */
2555 		if (old_lv->lv_status & LV_ACTIVE)
2556 			r = __extend_reduce_snapshot(vg_ptr, old_lv,
2557 						     new_lv);
2558 		else
2559 			r = -EPERM;
2560 
2561 	} else
2562 		r = __extend_reduce(vg_ptr, old_lv, new_lv);
2563 
2564 	if (r)
2565 		return r;
2566 
2567 	/* copy relevant fields */
2568 	down_write(&old_lv->lv_lock);
2569 
2570 	if (new_lv->lv_access & LV_SNAPSHOT) {
2571 		size = (new_lv->lv_remap_end > old_lv->lv_remap_end) ?
2572 		    old_lv->lv_remap_ptr : new_lv->lv_remap_end;
2573 		size *= sizeof(lv_block_exception_t);
2574 		memcpy(new_lv->lv_block_exception,
2575 		       old_lv->lv_block_exception, size);
2576 		vfree(old_lv->lv_block_exception);
2577 		vfree(old_lv->lv_snapshot_hash_table);
2578 
2579 		old_lv->lv_remap_end = new_lv->lv_remap_end;
2580 		old_lv->lv_block_exception = new_lv->lv_block_exception;
2581 		old_lv->lv_snapshot_hash_table =
2582 		    new_lv->lv_snapshot_hash_table;
2583 		old_lv->lv_snapshot_hash_table_size =
2584 		    new_lv->lv_snapshot_hash_table_size;
2585 		old_lv->lv_snapshot_hash_mask =
2586 		    new_lv->lv_snapshot_hash_mask;
2587 
2588 		for (e = 0; e < old_lv->lv_remap_ptr; e++)
2589 			lvm_hash_link(new_lv->lv_block_exception + e,
2590 				      new_lv->lv_block_exception[e].
2591 				      rdev_org,
2592 				      new_lv->lv_block_exception[e].
2593 				      rsector_org, new_lv);
2594 
2595 		vg_ptr->pe_allocated -= old_lv->lv_allocated_snapshot_le;
2596 		vg_ptr->pe_allocated += new_lv->lv_allocated_le;
2597 		old_lv->lv_allocated_snapshot_le = new_lv->lv_allocated_le;
2598 	} else {
2599 		vfree(old_lv->lv_current_pe);
2600 		vfree(old_lv->lv_snapshot_hash_table);
2601 
2602 		old_lv->lv_size = new_lv->lv_size;
2603 		old_lv->lv_allocated_le = new_lv->lv_allocated_le;
2604 		old_lv->lv_current_le = new_lv->lv_current_le;
2605 		old_lv->lv_current_pe = new_lv->lv_current_pe;
2606 		lvm_gendisk.part[MINOR(old_lv->lv_dev)].nr_sects =
2607 		    old_lv->lv_size;
2608 		lvm_size[MINOR(old_lv->lv_dev)] = old_lv->lv_size >> 1;
2609 
2610 		if (old_lv->lv_access & LV_SNAPSHOT_ORG) {
2611 			lv_t *snap;
2612 			for (snap = old_lv->lv_snapshot_next; snap;
2613 			     snap = snap->lv_snapshot_next) {
2614 				down_write(&snap->lv_lock);
2615 				snap->lv_current_pe =
2616 				    old_lv->lv_current_pe;
2617 				snap->lv_allocated_le =
2618 				    old_lv->lv_allocated_le;
2619 				snap->lv_current_le =
2620 				    old_lv->lv_current_le;
2621 				snap->lv_size = old_lv->lv_size;
2622 
2623 				lvm_gendisk.part[MINOR(snap->lv_dev)].
2624 				    nr_sects = old_lv->lv_size;
2625 				lvm_size[MINOR(snap->lv_dev)] =
2626 				    old_lv->lv_size >> 1;
2627 				__update_hardsectsize(snap);
2628 				up_write(&snap->lv_lock);
2629 			}
2630 		}
2631 	}
2632 
2633 	__update_hardsectsize(old_lv);
2634 	up_write(&old_lv->lv_lock);
2635 
2636 	return 0;
2637 }				/* lvm_do_lv_extend_reduce() */
2638 
2639 
2640 /*
2641  * character device support function logical volume status by name
2642  */
lvm_do_lv_status_byname(vg_t * vg_ptr,void * arg)2643 static int lvm_do_lv_status_byname(vg_t * vg_ptr, void *arg)
2644 {
2645 	uint l;
2646 	lv_status_byname_req_t lv_status_byname_req;
2647 	void *saved_ptr1;
2648 	void *saved_ptr2;
2649 	lv_t *lv_ptr;
2650 
2651 	if (vg_ptr == NULL)
2652 		return -ENXIO;
2653 	if (copy_from_user(&lv_status_byname_req, arg,
2654 			   sizeof(lv_status_byname_req_t)) != 0)
2655 		return -EFAULT;
2656 
2657 	if (lv_status_byname_req.lv == NULL)
2658 		return -EINVAL;
2659 
2660 	for (l = 0; l < vg_ptr->lv_max; l++) {
2661 		if ((lv_ptr = vg_ptr->lv[l]) != NULL &&
2662 		    strcmp(lv_ptr->lv_name,
2663 			   lv_status_byname_req.lv_name) == 0) {
2664 			/* Save usermode pointers */
2665 			if (copy_from_user
2666 			    (&saved_ptr1,
2667 			     &lv_status_byname_req.lv->lv_current_pe,
2668 			     sizeof(void *)) != 0)
2669 				return -EFAULT;
2670 			if (copy_from_user
2671 			    (&saved_ptr2,
2672 			     &lv_status_byname_req.lv->lv_block_exception,
2673 			     sizeof(void *)) != 0)
2674 				return -EFAULT;
2675 			if (copy_to_user(lv_status_byname_req.lv,
2676 					 lv_ptr, sizeof(lv_t)) != 0)
2677 				return -EFAULT;
2678 			if (saved_ptr1 != NULL) {
2679 				if (copy_to_user(saved_ptr1,
2680 						 lv_ptr->lv_current_pe,
2681 						 lv_ptr->lv_allocated_le *
2682 						 sizeof(pe_t)) != 0)
2683 					return -EFAULT;
2684 			}
2685 			/* Restore usermode pointers */
2686 			if (copy_to_user
2687 			    (&lv_status_byname_req.lv->lv_current_pe,
2688 			     &saved_ptr1, sizeof(void *)) != 0)
2689 				return -EFAULT;
2690 			return 0;
2691 		}
2692 	}
2693 	return -ENXIO;
2694 }				/* lvm_do_lv_status_byname() */
2695 
2696 
2697 /*
2698  * character device support function logical volume status by index
2699  */
lvm_do_lv_status_byindex(vg_t * vg_ptr,void * arg)2700 static int lvm_do_lv_status_byindex(vg_t * vg_ptr, void *arg)
2701 {
2702 	lv_status_byindex_req_t lv_status_byindex_req;
2703 	void *saved_ptr1;
2704 	void *saved_ptr2;
2705 	lv_t *lv_ptr;
2706 
2707 	if (vg_ptr == NULL)
2708 		return -ENXIO;
2709 	if (copy_from_user(&lv_status_byindex_req, arg,
2710 			   sizeof(lv_status_byindex_req)) != 0)
2711 		return -EFAULT;
2712 
2713 	if (lv_status_byindex_req.lv == NULL)
2714 		return -EINVAL;
2715 	if ((lv_ptr = vg_ptr->lv[lv_status_byindex_req.lv_index]) == NULL)
2716 		return -ENXIO;
2717 
2718 	/* Save usermode pointers */
2719 	if (copy_from_user
2720 	    (&saved_ptr1, &lv_status_byindex_req.lv->lv_current_pe,
2721 	     sizeof(void *)) != 0)
2722 		return -EFAULT;
2723 	if (copy_from_user
2724 	    (&saved_ptr2, &lv_status_byindex_req.lv->lv_block_exception,
2725 	     sizeof(void *)) != 0)
2726 		return -EFAULT;
2727 
2728 	if (copy_to_user(lv_status_byindex_req.lv, lv_ptr, sizeof(lv_t)) !=
2729 	    0)
2730 		return -EFAULT;
2731 	if (saved_ptr1 != NULL) {
2732 		if (copy_to_user(saved_ptr1,
2733 				 lv_ptr->lv_current_pe,
2734 				 lv_ptr->lv_allocated_le *
2735 				 sizeof(pe_t)) != 0)
2736 			return -EFAULT;
2737 	}
2738 
2739 	/* Restore usermode pointers */
2740 	if (copy_to_user
2741 	    (&lv_status_byindex_req.lv->lv_current_pe, &saved_ptr1,
2742 	     sizeof(void *)) != 0)
2743 		return -EFAULT;
2744 
2745 	return 0;
2746 }				/* lvm_do_lv_status_byindex() */
2747 
2748 
2749 /*
2750  * character device support function logical volume status by device number
2751  */
lvm_do_lv_status_bydev(vg_t * vg_ptr,void * arg)2752 static int lvm_do_lv_status_bydev(vg_t * vg_ptr, void *arg)
2753 {
2754 	int l;
2755 	lv_status_bydev_req_t lv_status_bydev_req;
2756 	void *saved_ptr1;
2757 	void *saved_ptr2;
2758 	lv_t *lv_ptr;
2759 
2760 	if (vg_ptr == NULL)
2761 		return -ENXIO;
2762 	if (copy_from_user(&lv_status_bydev_req, arg,
2763 			   sizeof(lv_status_bydev_req)) != 0)
2764 		return -EFAULT;
2765 
2766 	for (l = 0; l < vg_ptr->lv_max; l++) {
2767 		if (vg_ptr->lv[l] == NULL)
2768 			continue;
2769 		if (vg_ptr->lv[l]->lv_dev == lv_status_bydev_req.dev)
2770 			break;
2771 	}
2772 
2773 	if (l == vg_ptr->lv_max)
2774 		return -ENXIO;
2775 	lv_ptr = vg_ptr->lv[l];
2776 
2777 	/* Save usermode pointers */
2778 	if (copy_from_user
2779 	    (&saved_ptr1, &lv_status_bydev_req.lv->lv_current_pe,
2780 	     sizeof(void *)) != 0)
2781 		return -EFAULT;
2782 	if (copy_from_user
2783 	    (&saved_ptr2, &lv_status_bydev_req.lv->lv_block_exception,
2784 	     sizeof(void *)) != 0)
2785 		return -EFAULT;
2786 
2787 	if (copy_to_user(lv_status_bydev_req.lv, lv_ptr, sizeof(lv_t)) !=
2788 	    0)
2789 		return -EFAULT;
2790 	if (saved_ptr1 != NULL) {
2791 		if (copy_to_user(saved_ptr1,
2792 				 lv_ptr->lv_current_pe,
2793 				 lv_ptr->lv_allocated_le *
2794 				 sizeof(pe_t)) != 0)
2795 			return -EFAULT;
2796 	}
2797 	/* Restore usermode pointers */
2798 	if (copy_to_user
2799 	    (&lv_status_bydev_req.lv->lv_current_pe, &saved_ptr1,
2800 	     sizeof(void *)) != 0)
2801 		return -EFAULT;
2802 
2803 	return 0;
2804 }				/* lvm_do_lv_status_bydev() */
2805 
2806 
2807 /*
2808  * character device support function rename a logical volume
2809  */
lvm_do_lv_rename(vg_t * vg_ptr,lv_req_t * lv_req,lv_t * lv)2810 static int lvm_do_lv_rename(vg_t * vg_ptr, lv_req_t * lv_req, lv_t * lv)
2811 {
2812 	int l = 0;
2813 	int ret = 0;
2814 	lv_t *lv_ptr = NULL;
2815 
2816 	if (!vg_ptr)
2817 		return -ENXIO;
2818 
2819 	for (l = 0; l < vg_ptr->lv_max; l++) {
2820 		if ((lv_ptr = vg_ptr->lv[l]) == NULL)
2821 			continue;
2822 		if (lv_ptr->lv_dev == lv->lv_dev) {
2823 			lvm_fs_remove_lv(vg_ptr, lv_ptr);
2824 			strncpy(lv_ptr->lv_name, lv_req->lv_name,
2825 				NAME_LEN);
2826 			lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].de =
2827 				lvm_fs_create_lv(vg_ptr, lv_ptr);
2828 			break;
2829 		}
2830 	}
2831 	if (l == vg_ptr->lv_max)
2832 		ret = -ENODEV;
2833 
2834 	return ret;
2835 }				/* lvm_do_lv_rename */
2836 
2837 
2838 /*
2839  * character device support function physical volume change
2840  */
lvm_do_pv_change(vg_t * vg_ptr,void * arg)2841 static int lvm_do_pv_change(vg_t * vg_ptr, void *arg)
2842 {
2843 	uint p;
2844 	pv_t *pv_ptr;
2845 	struct block_device *bd;
2846 
2847 	if (vg_ptr == NULL)
2848 		return -ENXIO;
2849 	if (copy_from_user(&pv_change_req, arg,
2850 			   sizeof(pv_change_req)) != 0)
2851 		return -EFAULT;
2852 
2853 	for (p = 0; p < vg_ptr->pv_max; p++) {
2854 		pv_ptr = vg_ptr->pv[p];
2855 		if (pv_ptr != NULL &&
2856 		    strcmp(pv_ptr->pv_name, pv_change_req.pv_name) == 0) {
2857 
2858 			bd = pv_ptr->bd;
2859 			if (copy_from_user(pv_ptr,
2860 					   pv_change_req.pv,
2861 					   sizeof(pv_t)) != 0)
2862 				return -EFAULT;
2863 			pv_ptr->bd = bd;
2864 
2865 			/* We don't need the PE list
2866 			   in kernel space as with LVs pe_t list */
2867 			pv_ptr->pe = NULL;
2868 			return 0;
2869 		}
2870 	}
2871 	return -ENXIO;
2872 }				/* lvm_do_pv_change() */
2873 
2874 /*
2875  * character device support function get physical volume status
2876  */
lvm_do_pv_status(vg_t * vg_ptr,void * arg)2877 static int lvm_do_pv_status(vg_t * vg_ptr, void *arg)
2878 {
2879 	uint p;
2880 	pv_t *pv_ptr;
2881 
2882 	if (vg_ptr == NULL)
2883 		return -ENXIO;
2884 	if (copy_from_user(&pv_status_req, arg,
2885 			   sizeof(pv_status_req)) != 0)
2886 		return -EFAULT;
2887 
2888 	for (p = 0; p < vg_ptr->pv_max; p++) {
2889 		pv_ptr = vg_ptr->pv[p];
2890 		if (pv_ptr != NULL &&
2891 		    strcmp(pv_ptr->pv_name, pv_status_req.pv_name) == 0) {
2892 			if (copy_to_user(pv_status_req.pv,
2893 					 pv_ptr, sizeof(pv_t)) != 0)
2894 				return -EFAULT;
2895 			return 0;
2896 		}
2897 	}
2898 	return -ENXIO;
2899 }				/* lvm_do_pv_status() */
2900 
2901 
2902 /*
2903  * character device support function flush and invalidate all buffers of a PV
2904  */
lvm_do_pv_flush(void * arg)2905 static int lvm_do_pv_flush(void *arg)
2906 {
2907 	pv_flush_req_t pv_flush_req;
2908 
2909 	if (copy_from_user(&pv_flush_req, arg, sizeof(pv_flush_req)) != 0)
2910 		return -EFAULT;
2911 
2912 	fsync_dev(pv_flush_req.pv_dev);
2913 	invalidate_buffers(pv_flush_req.pv_dev);
2914 
2915 	return 0;
2916 }
2917 
2918 
2919 /*
2920  * support function initialize gendisk variables
2921  */
lvm_geninit(struct gendisk * lvm_gdisk)2922 static void __init lvm_geninit(struct gendisk *lvm_gdisk)
2923 {
2924 	int i = 0;
2925 
2926 #ifdef DEBUG_GENDISK
2927 	printk(KERN_DEBUG "%s -- lvm_gendisk\n", lvm_name);
2928 #endif
2929 
2930 	for (i = 0; i < MAX_LV; i++) {
2931 		lvm_gendisk.part[i].start_sect = -1;	/* avoid partition check */
2932 		lvm_size[i] = lvm_gendisk.part[i].nr_sects = 0;
2933 		lvm_blocksizes[i] = BLOCK_SIZE;
2934 	}
2935 
2936 	blk_size[MAJOR_NR] = lvm_size;
2937 	blksize_size[MAJOR_NR] = lvm_blocksizes;
2938 	hardsect_size[MAJOR_NR] = lvm_hardsectsizes;
2939 
2940 	return;
2941 }				/* lvm_gen_init() */
2942 
2943 
2944 
2945 /* Must have down_write(_pe_lock) when we enqueue buffers */
_queue_io(struct buffer_head * bh,int rw)2946 static void _queue_io(struct buffer_head *bh, int rw)
2947 {
2948 	if (bh->b_reqnext)
2949 		BUG();
2950 	bh->b_reqnext = _pe_requests;
2951 	_pe_requests = bh;
2952 }
2953 
2954 /* Must have down_write(_pe_lock) when we dequeue buffers */
_dequeue_io(void)2955 static struct buffer_head *_dequeue_io(void)
2956 {
2957 	struct buffer_head *bh = _pe_requests;
2958 	_pe_requests = NULL;
2959 	return bh;
2960 }
2961 
2962 /*
2963  * We do not need to hold _pe_lock to flush buffers.  bh should be taken from
2964  * _pe_requests under down_write(_pe_lock), and then _pe_requests can be set
2965  * NULL and we drop _pe_lock.  Any new buffers defered at this time will be
2966  * added to a new list, and the old buffers can have their I/O restarted
2967  * asynchronously.
2968  *
2969  * If, for some reason, the same PE is locked again before all of these writes
2970  * have finished, then these buffers will just be re-queued (i.e. no danger).
2971  */
_flush_io(struct buffer_head * bh)2972 static void _flush_io(struct buffer_head *bh)
2973 {
2974 	while (bh) {
2975 		struct buffer_head *next = bh->b_reqnext;
2976 		bh->b_reqnext = NULL;
2977 		/* resubmit this buffer head */
2978 		generic_make_request(WRITE, bh);
2979 		bh = next;
2980 	}
2981 }
2982 
2983 
2984 /*
2985  * we must open the pv's before we use them
2986  */
_open_pv(pv_t * pv)2987 static int _open_pv(pv_t * pv)
2988 {
2989 	int err;
2990 	struct block_device *bd;
2991 
2992 	if (!(bd = bdget(kdev_t_to_nr(pv->pv_dev))))
2993 		return -ENOMEM;
2994 
2995 	err = blkdev_get(bd, FMODE_READ | FMODE_WRITE, 0, BDEV_FILE);
2996 	if (err)
2997 		return err;
2998 
2999 	pv->bd = bd;
3000 	return 0;
3001 }
3002 
_close_pv(pv_t * pv)3003 static void _close_pv(pv_t * pv)
3004 {
3005 	if (pv) {
3006 		struct block_device *bdev = pv->bd;
3007 		pv->bd = NULL;
3008 		if (bdev)
3009 			blkdev_put(bdev, BDEV_FILE);
3010 	}
3011 }
3012 
3013 
_sectors_to_k(unsigned long sect)3014 static unsigned long _sectors_to_k(unsigned long sect)
3015 {
3016 	if (SECTOR_SIZE > 1024) {
3017 		return sect * (SECTOR_SIZE / 1024);
3018 	}
3019 
3020 	return sect / (1024 / SECTOR_SIZE);
3021 }
3022 
3023 MODULE_AUTHOR("Heinz Mauelshagen, Sistina Software");
3024 MODULE_DESCRIPTION("Logical Volume Manager");
3025 #ifdef MODULE_LICENSE
3026 MODULE_LICENSE("GPL");
3027 #endif
3028 
3029 module_init(lvm_init);
3030 module_exit(lvm_cleanup);
3031