1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  *  Copyright (C) 1998 Peter J. Braam <braam@clusterfs.com>
5  *  Copyright (C) 2000 Red Hat, Inc.
6  *  Copyright (C) 2000 Los Alamos National Laboratory
7  *  Copyright (C) 2000 TurboLinux, Inc.
8  *  Copyright (C) 2001 Mountain View Data, Inc.
9  *  Copyright (C) 2001 Tacit Networks, Inc. <phil@off.net>
10  *
11  *   This file is part of InterMezzo, http://www.inter-mezzo.org.
12  *
13  *   InterMezzo is free software; you can redistribute it and/or
14  *   modify it under the terms of version 2 of the GNU General Public
15  *   License as published by the Free Software Foundation.
16  *
17  *   InterMezzo is distributed in the hope that it will be useful,
18  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
19  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20  *   GNU General Public License for more details.
21  *
22  *   You should have received a copy of the GNU General Public License
23  *   along with InterMezzo; if not, write to the Free Software
24  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25  */
26 
27 #include <linux/types.h>
28 #include <linux/param.h>
29 #include <linux/kernel.h>
30 #include <linux/sched.h>
31 #include <linux/fs.h>
32 #include <linux/slab.h>
33 #include <linux/vmalloc.h>
34 #include <linux/stat.h>
35 #include <linux/errno.h>
36 #include <linux/locks.h>
37 #include <asm/segment.h>
38 #include <asm/uaccess.h>
39 #include <linux/string.h>
40 #include <linux/smp_lock.h>
41 #if defined(CONFIG_EXT3_FS) || defined (CONFIG_EXT3_FS_MODULE)
42 #include <linux/jbd.h>
43 #include <linux/ext3_fs.h>
44 #include <linux/ext3_jbd.h>
45 #endif
46 
47 #include <linux/intermezzo_fs.h>
48 #include <linux/intermezzo_psdev.h>
49 
50 #if defined(CONFIG_EXT3_FS) || defined (CONFIG_EXT3_FS_MODULE)
51 
52 #define MAX_PATH_BLOCKS(inode) (PATH_MAX >> EXT3_BLOCK_SIZE_BITS((inode)->i_sb))
53 #define MAX_NAME_BLOCKS(inode) (NAME_MAX >> EXT3_BLOCK_SIZE_BITS((inode)->i_sb))
54 
55 /* space requirements:
56    presto_do_truncate:
57         used to truncate the KML forward to next fset->chunksize boundary
58           - zero partial block
59           - update inode
60    presto_write_record:
61         write header (< one block)
62         write one path (< MAX_PATHLEN)
63         possibly write another path (< MAX_PATHLEN)
64         write suffix (< one block)
65    presto_update_last_rcvd
66         write one block
67 */
68 
presto_e3_freespace(struct presto_cache * cache,struct super_block * sb)69 static loff_t presto_e3_freespace(struct presto_cache *cache,
70                                          struct super_block *sb)
71 {
72         loff_t freebl = le32_to_cpu(sb->u.ext3_sb.s_es->s_free_blocks_count);
73         loff_t avail =   freebl -
74                 le32_to_cpu(sb->u.ext3_sb.s_es->s_r_blocks_count);
75         return (avail <<  EXT3_BLOCK_SIZE_BITS(sb));
76 }
77 
78 /* start the filesystem journal operations */
presto_e3_trans_start(struct presto_file_set * fset,struct inode * inode,int op)79 static void *presto_e3_trans_start(struct presto_file_set *fset,
80                                    struct inode *inode,
81                                    int op)
82 {
83         int jblocks;
84         int trunc_blks, one_path_blks, extra_path_blks,
85                 extra_name_blks, lml_blks;
86         __u32 avail_kmlblocks;
87         handle_t *handle;
88 
89         if ( presto_no_journal(fset) ||
90              strcmp(fset->fset_cache->cache_type, "ext3"))
91           {
92             CDEBUG(D_JOURNAL, "got cache_type \"%s\"\n",
93                    fset->fset_cache->cache_type);
94             return NULL;
95           }
96 
97         avail_kmlblocks = inode->i_sb->u.ext3_sb.s_es->s_free_blocks_count;
98 
99         if ( avail_kmlblocks < 3 ) {
100                 return ERR_PTR(-ENOSPC);
101         }
102 
103         if (  (op != KML_OPCODE_UNLINK && op != KML_OPCODE_RMDIR)
104               && avail_kmlblocks < 6 ) {
105                 return ERR_PTR(-ENOSPC);
106         }
107 
108         /* Need journal space for:
109              at least three writes to KML (two one block writes, one a path)
110              possibly a second name (unlink, rmdir)
111              possibly a second path (symlink, rename)
112              a one block write to the last rcvd file
113         */
114 
115         trunc_blks = EXT3_DATA_TRANS_BLOCKS + 1;
116         one_path_blks = 4*EXT3_DATA_TRANS_BLOCKS + MAX_PATH_BLOCKS(inode) + 3;
117         lml_blks = 4*EXT3_DATA_TRANS_BLOCKS + MAX_PATH_BLOCKS(inode) + 2;
118         extra_path_blks = EXT3_DATA_TRANS_BLOCKS + MAX_PATH_BLOCKS(inode);
119         extra_name_blks = EXT3_DATA_TRANS_BLOCKS + MAX_NAME_BLOCKS(inode);
120 
121         /* additional blocks appear for "two pathname" operations
122            and operations involving the LML records
123         */
124         switch (op) {
125         case KML_OPCODE_TRUNC:
126                 jblocks = one_path_blks + extra_name_blks + trunc_blks
127                         + EXT3_DELETE_TRANS_BLOCKS;
128                 break;
129         case KML_OPCODE_KML_TRUNC:
130                 /* Hopefully this is a little better, but I'm still mostly
131                  * guessing here. */
132                 /* unlink 1 */
133                 jblocks = extra_name_blks + trunc_blks +
134                         EXT3_DELETE_TRANS_BLOCKS + 2;
135 
136                 /* unlink 2 */
137                 jblocks += extra_name_blks + trunc_blks +
138                         EXT3_DELETE_TRANS_BLOCKS + 2;
139 
140                 /* rename 1 */
141                 jblocks += 2 * extra_path_blks + trunc_blks +
142                         2 * EXT3_DATA_TRANS_BLOCKS + 2 + 3;
143 
144                 /* rename 2 */
145                 jblocks += 2 * extra_path_blks + trunc_blks +
146                         2 * EXT3_DATA_TRANS_BLOCKS + 2 + 3;
147                 break;
148         case KML_OPCODE_RELEASE:
149                 /*
150                 jblocks = one_path_blks + lml_blks + 2*trunc_blks;
151                 */
152                 jblocks = one_path_blks;
153                 break;
154         case KML_OPCODE_SETATTR:
155                 jblocks = one_path_blks + trunc_blks + 1 ;
156                 break;
157         case KML_OPCODE_CREATE:
158                 jblocks = one_path_blks + trunc_blks
159                         + EXT3_DATA_TRANS_BLOCKS + 3 + 2;
160                 break;
161         case KML_OPCODE_LINK:
162                 jblocks = one_path_blks + trunc_blks
163                         + EXT3_DATA_TRANS_BLOCKS + 2;
164                 break;
165         case KML_OPCODE_UNLINK:
166                 jblocks = one_path_blks + extra_name_blks + trunc_blks
167                         + EXT3_DELETE_TRANS_BLOCKS + 2;
168                 break;
169         case KML_OPCODE_SYMLINK:
170                 jblocks = one_path_blks + extra_path_blks + trunc_blks
171                         + EXT3_DATA_TRANS_BLOCKS + 5;
172                 break;
173         case KML_OPCODE_MKDIR:
174                 jblocks = one_path_blks + trunc_blks
175                         + EXT3_DATA_TRANS_BLOCKS + 4 + 2;
176                 break;
177         case KML_OPCODE_RMDIR:
178                 jblocks = one_path_blks + extra_name_blks + trunc_blks
179                         + EXT3_DELETE_TRANS_BLOCKS + 1;
180                 break;
181         case KML_OPCODE_MKNOD:
182                 jblocks = one_path_blks + trunc_blks +
183                         EXT3_DATA_TRANS_BLOCKS + 3 + 2;
184                 break;
185         case KML_OPCODE_RENAME:
186                 jblocks = one_path_blks + extra_path_blks + trunc_blks +
187                         2 * EXT3_DATA_TRANS_BLOCKS + 2 + 3;
188                 break;
189         case KML_OPCODE_WRITE:
190                 jblocks = one_path_blks;
191                 /*  add this when we can wrap our transaction with
192                     that of ext3_file_write (ordered writes)
193                     +  EXT3_DATA_TRANS_BLOCKS;
194                 */
195                 break;
196         default:
197                 CDEBUG(D_JOURNAL, "invalid operation %d for journal\n", op);
198                 return NULL;
199         }
200 
201         CDEBUG(D_JOURNAL, "creating journal handle (%d blocks) for op %d\n",
202                jblocks, op);
203         /* journal_start/stop does not do its own locking while updating
204          * the handle/transaction information. Hence we create our own
205          * critical section to protect these calls. -SHP
206          */
207         lock_kernel();
208         handle = journal_start(EXT3_JOURNAL(inode), jblocks);
209         unlock_kernel();
210         return handle;
211 }
212 
presto_e3_trans_commit(struct presto_file_set * fset,void * handle)213 static void presto_e3_trans_commit(struct presto_file_set *fset, void *handle)
214 {
215         if ( presto_no_journal(fset) || !handle)
216                 return;
217 
218         /* See comments before journal_start above. -SHP */
219         lock_kernel();
220         journal_stop(handle);
221         unlock_kernel();
222 }
223 
presto_e3_journal_file_data(struct inode * inode)224 static void presto_e3_journal_file_data(struct inode *inode)
225 {
226 #ifdef EXT3_JOURNAL_DATA_FL
227         inode->u.ext3_i.i_flags |= EXT3_JOURNAL_DATA_FL;
228 #else
229 #warning You must have a facility to enable journaled writes for recovery!
230 #endif
231 }
232 
233 /* The logic here is a slightly modified version of ext3/inode.c:block_to_path
234  */
presto_e3_has_all_data(struct inode * inode)235 static int presto_e3_has_all_data(struct inode *inode)
236 {
237         int ptrs = EXT3_ADDR_PER_BLOCK(inode->i_sb);
238         int ptrs_bits = EXT3_ADDR_PER_BLOCK_BITS(inode->i_sb);
239         const long direct_blocks = EXT3_NDIR_BLOCKS,
240                 indirect_blocks = ptrs,
241                 double_blocks = (1 << (ptrs_bits * 2));
242         long block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
243                 inode->i_sb->s_blocksize_bits;
244 
245         ENTRY;
246 
247         if (inode->i_size == 0) {
248                 EXIT;
249                 return 1;
250         }
251 
252         if (block < direct_blocks) {
253                 /* No indirect blocks, no problem. */
254         } else if (block < indirect_blocks + direct_blocks) {
255                 block++;
256         } else if (block < double_blocks + indirect_blocks + direct_blocks) {
257                 block += 2;
258         } else if (((block - double_blocks - indirect_blocks - direct_blocks)
259                     >> (ptrs_bits * 2)) < ptrs) {
260                 block += 3;
261         }
262 
263         block *= (inode->i_sb->s_blocksize / 512);
264 
265         CDEBUG(D_CACHE, "Need %ld blocks, have %ld.\n", block, inode->i_blocks);
266 
267         if (block > inode->i_blocks) {
268                 EXIT;
269                 return 0;
270         }
271 
272         EXIT;
273         return 1;
274 }
275 
276 struct journal_ops presto_ext3_journal_ops = {
277         .tr_all_data     = presto_e3_has_all_data,
278         .tr_avail        = presto_e3_freespace,
279         .tr_start        =  presto_e3_trans_start,
280         .tr_commit       = presto_e3_trans_commit,
281         .tr_journal_data = presto_e3_journal_file_data,
282         .tr_ilookup      = presto_iget_ilookup
283 };
284 
285 #endif /* CONFIG_EXT3_FS */
286