1 /*
2  * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms of version 2 of the GNU General Public License as
6  * published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it would be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11  *
12  * Further, this software is distributed without any warranty that it is
13  * free of the rightful claim of any third person regarding infringement
14  * or the like.  Any license provided herein, whether implied or
15  * otherwise, applies only to this software file.  Patent licenses, if
16  * any, provided herein do not apply to combinations of this program with
17  * other software, or any other product whatsoever.
18  *
19  * You should have received a copy of the GNU General Public License along
20  * with this program; if not, write the Free Software Foundation, Inc., 59
21  * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22  *
23  * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24  * Mountain View, CA  94043, or:
25  *
26  * http://www.sgi.com
27  *
28  * For further information regarding this notice, see:
29  *
30  * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31  */
32 #ifndef __XFS_LINUX__
33 #define __XFS_LINUX__
34 
35 #include <linux/types.h>
36 #include <linux/config.h>
37 
38 #undef __user
39 #define __user
40 
41 /*
42  * Some types are conditional depending on the target system.
43  * XFS_BIG_BLKNOS needs block layer disk addresses to be 64 bits.
44  * XFS_BIG_INUMS needs the VFS inode number to be 64 bits, as well
45  * as requiring XFS_BIG_BLKNOS to be set.
46  */
47 #define XFS_BIG_BLKNOS	0
48 #define XFS_BIG_INUMS	0
49 
50 #include <xfs_types.h>
51 #include <xfs_arch.h>
52 
53 #include <kmem.h>
54 #include <mrlock.h>
55 #include <spin.h>
56 #include <sv.h>
57 #include <mutex.h>
58 #include <sema.h>
59 #include <time.h>
60 
61 #include <support/qsort.h>
62 #include <support/ktrace.h>
63 #include <support/debug.h>
64 #include <support/move.h>
65 #include <support/uuid.h>
66 
67 #include <linux/mm.h>
68 #include <linux/kernel.h>
69 #include <linux/blkdev.h>
70 #include <linux/slab.h>
71 #include <linux/module.h>
72 #include <linux/file.h>
73 #include <linux/swap.h>
74 #include <linux/errno.h>
75 #include <linux/locks.h>
76 #include <linux/sched.h>
77 #include <linux/bitops.h>
78 #include <linux/major.h>
79 #include <linux/pagemap.h>
80 #include <linux/seq_file.h>
81 #include <linux/init.h>
82 #include <linux/list.h>
83 #include <linux/proc_fs.h>
84 
85 #include <asm/page.h>
86 #include <asm/div64.h>
87 #include <asm/param.h>
88 #include <asm/uaccess.h>
89 #include <asm/byteorder.h>
90 #include <asm/unaligned.h>
91 
92 #include <xfs_behavior.h>
93 #include <xfs_vfs.h>
94 #include <xfs_cred.h>
95 #include <xfs_vnode.h>
96 #include <xfs_stats.h>
97 #include <xfs_sysctl.h>
98 #include <xfs_iops.h>
99 #include <xfs_super.h>
100 #include <xfs_globals.h>
101 #include <xfs_fs_subr.h>
102 #include <xfs_lrw.h>
103 #include <xfs_buf.h>
104 
105 /*
106  * Feature macros (disable/enable)
107  */
108 #define HAVE_REFCACHE	/* 2.4 uses a NFS reference cache; 2.6+ does not */
109 #undef  HAVE_SENDFILE	/* sendfile(2) is a 2.6+ system call, not in 2.4 */
110 
111 #ifndef EVMS_MAJOR
112 #define EVMS_MAJOR 117
113 #endif
114 
115 /*
116  * State flag for unwritten extent buffers.
117  *
118  * We need to be able to distinguish between these and delayed
119  * allocate buffers within XFS.  The generic IO path code does
120  * not need to distinguish - we use the BH_Delay flag for both
121  * delalloc and these ondisk-uninitialised buffers.
122  */
123 #define BH_Unwritten		BH_PrivateStart
124 #define buffer_unwritten(bh)	__buffer_state(bh, Unwritten)
set_buffer_unwritten_io(struct buffer_head * bh)125 static inline void set_buffer_unwritten_io(struct buffer_head *bh)
126 {
127 	bh->b_end_io = linvfs_unwritten_done;
128 }
BUFFER_FNS(Unwritten,unwritten)129 BUFFER_FNS(Unwritten, unwritten)
130 
131 #define xfs_refcache_size	xfs_params.refcache_size.val
132 #define xfs_refcache_purge_count xfs_params.refcache_purge.val
133 #define restricted_chown	xfs_params.restrict_chown.val
134 #define irix_sgid_inherit	xfs_params.sgid_inherit.val
135 #define irix_symlink_mode	xfs_params.symlink_mode.val
136 #define xfs_panic_mask		xfs_params.panic_mask.val
137 #define xfs_error_level		xfs_params.error_level.val
138 #define xfs_syncd_centisecs	xfs_params.syncd_timer.val
139 #define xfs_stats_clear		xfs_params.stats_clear.val
140 #define xfs_inherit_sync	xfs_params.inherit_sync.val
141 #define xfs_inherit_nodump	xfs_params.inherit_nodump.val
142 #define xfs_inherit_noatime	xfs_params.inherit_noatim.val
143 #define xfs_buf_timer_centisecs	xfs_params.xfs_buf_timer.val
144 #define xfs_buf_age_centisecs	xfs_params.xfs_buf_age.val
145 #define xfs_inherit_nosymlinks	xfs_params.inherit_nosym.val
146 #define xfs_rotorstep		xfs_params.rotorstep.val
147 
148 #define current_cpu()		smp_processor_id()
149 #define current_pid()		(current->pid)
150 #define current_fsuid(cred)	(current->fsuid)
151 #define current_fsgid(cred)	(current->fsgid)
152 
153 #define NBPP		PAGE_SIZE
154 #define DPPSHFT		(PAGE_SHIFT - 9)
155 #define NDPP		(1 << (PAGE_SHIFT - 9))
156 #define dtop(DD)	(((DD) + NDPP - 1) >> DPPSHFT)
157 #define dtopt(DD)	((DD) >> DPPSHFT)
158 #define dpoff(DD)	((DD) & (NDPP-1))
159 
160 #define NBBY		8		/* number of bits per byte */
161 #define	NBPC		PAGE_SIZE	/* Number of bytes per click */
162 #define	BPCSHIFT	PAGE_SHIFT	/* LOG2(NBPC) if exact */
163 
164 /*
165  * Size of block device i/o is parameterized here.
166  * Currently the system supports page-sized i/o.
167  */
168 #define	BLKDEV_IOSHIFT		BPCSHIFT
169 #define	BLKDEV_IOSIZE		(1<<BLKDEV_IOSHIFT)
170 /* number of BB's per block device block */
171 #define	BLKDEV_BB		BTOBB(BLKDEV_IOSIZE)
172 
173 /* bytes to clicks */
174 #define	btoc(x)		(((__psunsigned_t)(x)+(NBPC-1))>>BPCSHIFT)
175 #define	btoct(x)	((__psunsigned_t)(x)>>BPCSHIFT)
176 #define	btoc64(x)	(((__uint64_t)(x)+(NBPC-1))>>BPCSHIFT)
177 #define	btoct64(x)	((__uint64_t)(x)>>BPCSHIFT)
178 #define	io_btoc(x)	(((__psunsigned_t)(x)+(IO_NBPC-1))>>IO_BPCSHIFT)
179 #define	io_btoct(x)	((__psunsigned_t)(x)>>IO_BPCSHIFT)
180 
181 /* off_t bytes to clicks */
182 #define offtoc(x)       (((__uint64_t)(x)+(NBPC-1))>>BPCSHIFT)
183 #define offtoct(x)      ((xfs_off_t)(x)>>BPCSHIFT)
184 
185 /* clicks to off_t bytes */
186 #define	ctooff(x)	((xfs_off_t)(x)<<BPCSHIFT)
187 
188 /* clicks to bytes */
189 #define	ctob(x)		((__psunsigned_t)(x)<<BPCSHIFT)
190 #define btoct(x)        ((__psunsigned_t)(x)>>BPCSHIFT)
191 #define	ctob64(x)	((__uint64_t)(x)<<BPCSHIFT)
192 #define	io_ctob(x)	((__psunsigned_t)(x)<<IO_BPCSHIFT)
193 
194 /* bytes to clicks */
195 #define btoc(x)         (((__psunsigned_t)(x)+(NBPC-1))>>BPCSHIFT)
196 
197 #ifndef CELL_CAPABLE
198 #define FSC_NOTIFY_NAME_CHANGED(vp)
199 #endif
200 
201 #ifndef ENOATTR
202 #define ENOATTR		ENODATA		/* Attribute not found */
203 #endif
204 
205 /* Note: EWRONGFS never visible outside the kernel */
206 #define	EWRONGFS	EINVAL		/* Mount with wrong filesystem type */
207 
208 /*
209  * XXX EFSCORRUPTED needs a real value in errno.h. asm-i386/errno.h won't
210  *     return codes out of its known range in errno.
211  * XXX Also note: needs to be < 1000 and fairly unique on Linux (mustn't
212  *     conflict with any code we use already or any code a driver may use)
213  * XXX Some options (currently we do #2):
214  *	1/ New error code ["Filesystem is corrupted", _after_ glibc updated]
215  *	2/ 990 ["Unknown error 990"]
216  *	3/ EUCLEAN ["Structure needs cleaning"]
217  *	4/ Convert EFSCORRUPTED to EIO [just prior to return into userspace]
218  */
219 #define EFSCORRUPTED    990		/* Filesystem is corrupted */
220 
221 #define SYNCHRONIZE()	barrier()
222 #define __return_address __builtin_return_address(0)
223 
224 /*
225  * IRIX (BSD) quotactl makes use of separate commands for user/group,
226  * whereas on Linux the syscall encodes this information into the cmd
227  * field (see the QCMD macro in quota.h).  These macros help keep the
228  * code portable - they are not visible from the syscall interface.
229  */
230 #define Q_XSETGQLIM	XQM_CMD(0x8)	/* set groups disk limits */
231 #define Q_XGETGQUOTA	XQM_CMD(0x9)	/* get groups disk limits */
232 
233 /* IRIX uses a dynamic sizing algorithm (ndquot = 200 + numprocs*2) */
234 /* we may well need to fine-tune this if it ever becomes an issue.  */
235 #define DQUOT_MAX_HEURISTIC	1024	/* NR_DQUOTS */
236 #define ndquot			DQUOT_MAX_HEURISTIC
237 
238 /* IRIX uses the current size of the name cache to guess a good value */
239 /* - this isn't the same but is a good enough starting point for now. */
240 #define DQUOT_HASH_HEURISTIC	files_stat.nr_files
241 
242 /* IRIX inodes maintain the project ID also, zero this field on Linux */
243 #define DEFAULT_PROJID	0
244 #define dfltprid	DEFAULT_PROJID
245 
246 #ifndef pgoff_t		/* 2.6 compat */
247 #define pgoff_t		unsigned long
248 #endif
249 
250 #define MAXPATHLEN	1024
251 
252 #define MIN(a,b)	(min(a,b))
253 #define MAX(a,b)	(max(a,b))
254 #define howmany(x, y)	(((x)+((y)-1))/(y))
255 #define roundup(x, y)	((((x)+((y)-1))/(y))*(y))
256 
257 /*
258  * Juggle IRIX device numbers - still used in ondisk structures
259  */
260 #define XFS_DEV_BITSMAJOR	14
261 #define XFS_DEV_BITSMINOR	18
262 #define XFS_DEV_MAXMAJ		0x1ff
263 #define XFS_DEV_MAXMIN		0x3ffff
264 #define XFS_DEV_MAJOR(dev)	((int)(((unsigned)(dev)>>XFS_DEV_BITSMINOR) \
265 				    & XFS_DEV_MAXMAJ))
266 #define XFS_DEV_MINOR(dev)	((int)((dev)&XFS_DEV_MAXMIN))
267 #define XFS_MKDEV(major,minor) ((xfs_dev_t)(((major)<<XFS_DEV_BITSMINOR) \
268 				    | (minor&XFS_DEV_MAXMIN)))
269 
270 #define XFS_DEV_TO_KDEVT(dev)	mk_kdev(XFS_DEV_MAJOR(dev),XFS_DEV_MINOR(dev))
271 
272 #define xfs_stack_trace()	dump_stack()
273 
274 #define xfs_itruncate_data(ip, off)	\
275 	(-vmtruncate(LINVFS_GET_IP(XFS_ITOV(ip)), (off)))
276 
277 
278 /* Move the kernel do_div definition off to one side */
279 
280 #if defined __i386__
281 /* For ia32 we need to pull some tricks to get past various versions
282  * of the compiler which do not like us using do_div in the middle
283  * of large functions.
284  */
285 static inline __u32 xfs_do_div(void *a, __u32 b, int n)
286 {
287 	__u32	mod;
288 
289 	switch (n) {
290 		case 4:
291 			mod = *(__u32 *)a % b;
292 			*(__u32 *)a = *(__u32 *)a / b;
293 			return mod;
294 		case 8:
295 			{
296 			unsigned long __upper, __low, __high, __mod;
297 			__u64	c = *(__u64 *)a;
298 			__upper = __high = c >> 32;
299 			__low = c;
300 			if (__high) {
301 				__upper = __high % (b);
302 				__high = __high / (b);
303 			}
304 			asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper));
305 			asm("":"=A" (c):"a" (__low),"d" (__high));
306 			*(__u64 *)a = c;
307 			return __mod;
308 			}
309 	}
310 
311 	/* NOTREACHED */
312 	return 0;
313 }
314 
315 /* Side effect free 64 bit mod operation */
xfs_do_mod(void * a,__u32 b,int n)316 static inline __u32 xfs_do_mod(void *a, __u32 b, int n)
317 {
318 	switch (n) {
319 		case 4:
320 			return *(__u32 *)a % b;
321 		case 8:
322 			{
323 			unsigned long __upper, __low, __high, __mod;
324 			__u64	c = *(__u64 *)a;
325 			__upper = __high = c >> 32;
326 			__low = c;
327 			if (__high) {
328 				__upper = __high % (b);
329 				__high = __high / (b);
330 			}
331 			asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper));
332 			asm("":"=A" (c):"a" (__low),"d" (__high));
333 			return __mod;
334 			}
335 	}
336 
337 	/* NOTREACHED */
338 	return 0;
339 }
340 #else
341 static inline __u32 xfs_do_div(void *a, __u32 b, int n)
342 {
343 	__u32	mod;
344 
345 	switch (n) {
346 		case 4:
347 			mod = *(__u32 *)a % b;
348 			*(__u32 *)a = *(__u32 *)a / b;
349 			return mod;
350 		case 8:
351 			mod = do_div(*(__u64 *)a, b);
352 			return mod;
353 	}
354 
355 	/* NOTREACHED */
356 	return 0;
357 }
358 
359 /* Side effect free 64 bit mod operation */
360 static inline __u32 xfs_do_mod(void *a, __u32 b, int n)
361 {
362 	switch (n) {
363 		case 4:
364 			return *(__u32 *)a % b;
365 		case 8:
366 			{
367 			__u64	c = *(__u64 *)a;
368 			return do_div(c, b);
369 			}
370 	}
371 
372 	/* NOTREACHED */
373 	return 0;
374 }
375 #endif
376 
377 #undef do_div
378 #define do_div(a, b)	xfs_do_div(&(a), (b), sizeof(a))
379 #define do_mod(a, b)	xfs_do_mod(&(a), (b), sizeof(a))
380 
roundup_64(__uint64_t x,__uint32_t y)381 static inline __uint64_t roundup_64(__uint64_t x, __uint32_t y)
382 {
383 	x += y - 1;
384 	do_div(x, y);
385 	return(x * y);
386 }
387 
388 #endif /* __XFS_LINUX__ */
389