1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
3 *
4 * Copyright (C) 2000 Stelias Computing, Inc.
5 * Copyright (C) 2000 Red Hat, Inc.
6 * Copyright (C) 2000 TurboLinux, Inc.
7 * Copyright (C) 2000 Los Alamos National Laboratory.
8 * Copyright (C) 2000, 2001 Tacit Networks, Inc.
9 * Copyright (C) 2000 Peter J. Braam
10 * Copyright (C) 2001 Mountain View Data, Inc.
11 * Copyright (C) 2001 Cluster File Systems, Inc.
12 *
13 * This file is part of InterMezzo, http://www.inter-mezzo.org.
14 *
15 * InterMezzo is free software; you can redistribute it and/or
16 * modify it under the terms of version 2 of the GNU General Public
17 * License as published by the Free Software Foundation.
18 *
19 * InterMezzo is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU General Public License for more details.
23 *
24 * You should have received a copy of the GNU General Public License
25 * along with InterMezzo; if not, write to the Free Software
26 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
27 *
28 * This file manages file I/O
29 *
30 */
31
32 #include <stdarg.h>
33
34 #include <asm/bitops.h>
35 #include <asm/uaccess.h>
36 #include <asm/system.h>
37
38 #include <linux/errno.h>
39 #include <linux/fs.h>
40 #include <linux/ext2_fs.h>
41 #include <linux/slab.h>
42 #include <linux/vmalloc.h>
43 #include <linux/sched.h>
44 #include <linux/stat.h>
45 #include <linux/string.h>
46 #include <linux/locks.h>
47 #include <linux/blkdev.h>
48 #include <linux/init.h>
49 #include <linux/smp_lock.h>
50 #define __NO_VERSION__
51 #include <linux/module.h>
52
53 #include <linux/intermezzo_fs.h>
54 #include <linux/intermezzo_psdev.h>
55 #include <linux/fsfilter.h>
56 /*
57 * these are initialized in super.c
58 */
59 extern int presto_permission(struct inode *inode, int mask);
60
61
presto_open_upcall(int minor,struct dentry * de)62 static int presto_open_upcall(int minor, struct dentry *de)
63 {
64 int rc = 0;
65 char *path, *buffer;
66 struct presto_file_set *fset;
67 int pathlen;
68 struct lento_vfs_context info;
69 struct presto_dentry_data *dd = presto_d2d(de);
70
71 PRESTO_ALLOC(buffer, PAGE_SIZE);
72 if ( !buffer ) {
73 CERROR("PRESTO: out of memory!\n");
74 return -ENOMEM;
75 }
76 fset = presto_fset(de);
77 path = presto_path(de, fset->fset_dentry, buffer, PAGE_SIZE);
78 pathlen = MYPATHLEN(buffer, path);
79
80 CDEBUG(D_FILE, "de %p, dd %p\n", de, dd);
81 if (dd->remote_ino == 0) {
82 rc = presto_get_fileid(minor, fset, de);
83 }
84 memset (&info, 0, sizeof(info));
85 if (dd->remote_ino > 0) {
86 info.remote_ino = dd->remote_ino;
87 info.remote_generation = dd->remote_generation;
88 } else
89 CERROR("get_fileid failed %d, ino: %Lx, fetching by name\n", rc,
90 dd->remote_ino);
91
92 rc = izo_upc_open(minor, pathlen, path, fset->fset_name, &info);
93 PRESTO_FREE(buffer, PAGE_SIZE);
94 return rc;
95 }
96
open_check_dod(struct file * file,struct presto_file_set * fset)97 static inline int open_check_dod(struct file *file,
98 struct presto_file_set *fset)
99 {
100 int gen, is_iopen = 0, minor;
101 struct presto_cache *cache = fset->fset_cache;
102 ino_t inum;
103
104 minor = presto_c2m(cache);
105
106 if ( ISLENTO(minor) ) {
107 CDEBUG(D_CACHE, "is lento, not doing DOD.\n");
108 return 0;
109 }
110
111 /* Files are only ever opened by inode during backfetches, when by
112 * definition we have the authoritative copy of the data. No DOD. */
113 is_iopen = izo_dentry_is_ilookup(file->f_dentry, &inum, &gen);
114
115 if (is_iopen) {
116 CDEBUG(D_CACHE, "doing iopen, not doing DOD.\n");
117 return 0;
118 }
119
120 if (!(fset->fset_flags & FSET_DATA_ON_DEMAND)) {
121 CDEBUG(D_CACHE, "fileset not on demand.\n");
122 return 0;
123 }
124
125 if (file->f_flags & O_TRUNC) {
126 CDEBUG(D_CACHE, "fileset dod: O_TRUNC.\n");
127 return 0;
128 }
129
130 if (presto_chk(file->f_dentry, PRESTO_DONT_JOURNAL)) {
131 CDEBUG(D_CACHE, "file under .intermezzo, not doing DOD\n");
132 return 0;
133 }
134
135 if (presto_chk(file->f_dentry, PRESTO_DATA)) {
136 CDEBUG(D_CACHE, "PRESTO_DATA is set, not doing DOD.\n");
137 return 0;
138 }
139
140 if (cache->cache_filter->o_trops->tr_all_data(file->f_dentry->d_inode)) {
141 CDEBUG(D_CACHE, "file not sparse, not doing DOD.\n");
142 return 0;
143 }
144
145 return 1;
146 }
147
presto_file_open(struct inode * inode,struct file * file)148 static int presto_file_open(struct inode *inode, struct file *file)
149 {
150 int rc = 0;
151 struct file_operations *fops;
152 struct presto_cache *cache;
153 struct presto_file_set *fset;
154 struct presto_file_data *fdata;
155 int writable = (file->f_flags & (O_RDWR | O_WRONLY));
156 int minor, i;
157
158 ENTRY;
159
160 if (presto_prep(file->f_dentry, &cache, &fset) < 0) {
161 EXIT;
162 return -EBADF;
163 }
164
165 minor = presto_c2m(cache);
166
167 CDEBUG(D_CACHE, "DATA_OK: %d, ino: %ld, islento: %d\n",
168 presto_chk(file->f_dentry, PRESTO_DATA), inode->i_ino,
169 ISLENTO(minor));
170
171 if ( !ISLENTO(minor) && (file->f_flags & O_RDWR ||
172 file->f_flags & O_WRONLY)) {
173 CDEBUG(D_CACHE, "calling presto_get_permit\n");
174 if ( presto_get_permit(inode) < 0 ) {
175 EXIT;
176 return -EROFS;
177 }
178 presto_put_permit(inode);
179 }
180
181 if (open_check_dod(file, fset)) {
182 CDEBUG(D_CACHE, "presto_open_upcall\n");
183 CDEBUG(D_CACHE, "dentry: %p setting DATA, ATTR\n", file->f_dentry);
184 presto_set(file->f_dentry, PRESTO_ATTR | PRESTO_DATA);
185 rc = presto_open_upcall(minor, file->f_dentry);
186 if (rc) {
187 EXIT;
188 CERROR("%s: returning error %d\n", __FUNCTION__, rc);
189 return rc;
190 }
191
192 }
193
194 /* file was truncated upon open: do not refetch */
195 if (file->f_flags & O_TRUNC) {
196 CDEBUG(D_CACHE, "setting DATA, ATTR\n");
197 presto_set(file->f_dentry, PRESTO_ATTR | PRESTO_DATA);
198 }
199
200 fops = filter_c2cffops(cache->cache_filter);
201 if ( fops->open ) {
202 CDEBUG(D_CACHE, "calling fs open\n");
203 rc = fops->open(inode, file);
204
205 if (rc) {
206 EXIT;
207 return rc;
208 }
209 }
210
211 if (writable) {
212 PRESTO_ALLOC(fdata, sizeof(*fdata));
213 if (!fdata) {
214 EXIT;
215 return -ENOMEM;
216 }
217 /* LOCK: XXX check that the kernel lock protects this alloc */
218 fdata->fd_do_lml = 0;
219 fdata->fd_bytes_written = 0;
220 fdata->fd_fsuid = current->fsuid;
221 fdata->fd_fsgid = current->fsgid;
222 fdata->fd_mode = file->f_dentry->d_inode->i_mode;
223 fdata->fd_uid = file->f_dentry->d_inode->i_uid;
224 fdata->fd_gid = file->f_dentry->d_inode->i_gid;
225 fdata->fd_ngroups = current->ngroups;
226 for (i=0 ; i < current->ngroups ; i++)
227 fdata->fd_groups[i] = current->groups[i];
228 if (!ISLENTO(minor))
229 fdata->fd_info.flags = LENTO_FL_KML;
230 else {
231 /* this is for the case of DOD,
232 reint_close will adjust flags if needed */
233 fdata->fd_info.flags = 0;
234 }
235
236 presto_getversion(&fdata->fd_version, inode);
237 file->private_data = fdata;
238 } else {
239 file->private_data = NULL;
240 }
241
242 EXIT;
243 return 0;
244 }
245
presto_adjust_lml(struct file * file,struct lento_vfs_context * info)246 int presto_adjust_lml(struct file *file, struct lento_vfs_context *info)
247 {
248 struct presto_file_data *fdata =
249 (struct presto_file_data *) file->private_data;
250
251 if (!fdata) {
252 EXIT;
253 return -EINVAL;
254 }
255
256 memcpy(&fdata->fd_info, info, sizeof(*info));
257 EXIT;
258 return 0;
259 }
260
261
presto_file_release(struct inode * inode,struct file * file)262 static int presto_file_release(struct inode *inode, struct file *file)
263 {
264 int rc;
265 struct file_operations *fops;
266 struct presto_cache *cache;
267 struct presto_file_set *fset;
268 struct presto_file_data *fdata =
269 (struct presto_file_data *)file->private_data;
270 ENTRY;
271
272 rc = presto_prep(file->f_dentry, &cache, &fset);
273 if ( rc ) {
274 EXIT;
275 return rc;
276 }
277
278 fops = filter_c2cffops(cache->cache_filter);
279 if (fops && fops->release)
280 rc = fops->release(inode, file);
281
282 CDEBUG(D_CACHE, "islento = %d (minor %d), rc %d, data %p\n",
283 ISLENTO(cache->cache_psdev->uc_minor),
284 cache->cache_psdev->uc_minor, rc, fdata);
285
286 /* this file was modified: ignore close errors, write KML */
287 if (fdata && fdata->fd_do_lml) {
288 /* XXX: remove when lento gets file granularity cd */
289 if ( presto_get_permit(inode) < 0 ) {
290 EXIT;
291 return -EROFS;
292 }
293
294 fdata->fd_info.updated_time = file->f_dentry->d_inode->i_mtime;
295 rc = presto_do_close(fset, file);
296 presto_put_permit(inode);
297 }
298
299 if (!rc && fdata) {
300 PRESTO_FREE(fdata, sizeof(*fdata));
301 file->private_data = NULL;
302 }
303
304 EXIT;
305 return rc;
306 }
307
presto_apply_write_policy(struct file * file,struct presto_file_set * fset,loff_t res)308 static void presto_apply_write_policy(struct file *file,
309 struct presto_file_set *fset, loff_t res)
310 {
311 struct presto_file_data *fdata =
312 (struct presto_file_data *)file->private_data;
313 struct presto_cache *cache = fset->fset_cache;
314 struct presto_version new_file_ver;
315 int error;
316 struct rec_info rec;
317
318 /* Here we do a journal close after a fixed or a specified
319 amount of KBytes, currently a global parameter set with
320 sysctl. If files are open for a long time, this gives added
321 protection. (XXX todo: per cache, add ioctl, handle
322 journaling in a thread, add more options etc.)
323 */
324
325 if ((fset->fset_flags & FSET_JCLOSE_ON_WRITE) &&
326 (!ISLENTO(cache->cache_psdev->uc_minor))) {
327 fdata->fd_bytes_written += res;
328
329 if (fdata->fd_bytes_written >= fset->fset_file_maxio) {
330 presto_getversion(&new_file_ver,
331 file->f_dentry->d_inode);
332 /* This is really heavy weight and should be fixed
333 ASAP. At most we should be recording the number
334 of bytes written and not locking the kernel,
335 wait for permits, etc, on the write path. SHP
336 */
337 lock_kernel();
338 if ( presto_get_permit(file->f_dentry->d_inode) < 0 ) {
339 EXIT;
340 /* we must be disconnected, not to worry */
341 unlock_kernel();
342 return;
343 }
344 error = presto_journal_close(&rec, fset, file,
345 file->f_dentry,
346 &fdata->fd_version,
347 &new_file_ver);
348 presto_put_permit(file->f_dentry->d_inode);
349 unlock_kernel();
350 if ( error ) {
351 CERROR("presto_close: cannot journal close\n");
352 /* XXX these errors are really bad */
353 /* panic(); */
354 return;
355 }
356 fdata->fd_bytes_written = 0;
357 }
358 }
359 }
360
presto_file_write(struct file * file,const char * buf,size_t size,loff_t * off)361 static ssize_t presto_file_write(struct file *file, const char *buf,
362 size_t size, loff_t *off)
363 {
364 struct rec_info rec;
365 int error;
366 struct presto_cache *cache;
367 struct presto_file_set *fset;
368 struct file_operations *fops;
369 ssize_t res;
370 int do_lml_here;
371 void *handle = NULL;
372 unsigned long blocks;
373 struct presto_file_data *fdata;
374 loff_t res_size;
375
376 error = presto_prep(file->f_dentry, &cache, &fset);
377 if ( error ) {
378 EXIT;
379 return error;
380 }
381
382 blocks = (size >> file->f_dentry->d_inode->i_sb->s_blocksize_bits) + 1;
383 /* XXX 3 is for ext2 indirect blocks ... */
384 res_size = 2 * PRESTO_REQHIGH + ((blocks+3)
385 << file->f_dentry->d_inode->i_sb->s_blocksize_bits);
386
387 error = presto_reserve_space(fset->fset_cache, res_size);
388 CDEBUG(D_INODE, "Reserved %Ld for %d\n", res_size, size);
389 if ( error ) {
390 EXIT;
391 return -ENOSPC;
392 }
393
394 CDEBUG(D_INODE, "islento %d, minor: %d\n",
395 ISLENTO(cache->cache_psdev->uc_minor),
396 cache->cache_psdev->uc_minor);
397
398 /*
399 * XXX this lock should become a per inode lock when
400 * Vinny's changes are in; we could just use i_sem.
401 */
402 read_lock(&fset->fset_lml.fd_lock);
403 fdata = (struct presto_file_data *)file->private_data;
404 do_lml_here = size && (fdata->fd_do_lml == 0) &&
405 !presto_chk(file->f_dentry, PRESTO_DONT_JOURNAL);
406
407 if (do_lml_here)
408 fdata->fd_do_lml = 1;
409 read_unlock(&fset->fset_lml.fd_lock);
410
411 /* XXX
412 There might be a bug here. We need to make
413 absolutely sure that the ext3_file_write commits
414 after our transaction that writes the LML record.
415 Nesting the file write helps if new blocks are allocated.
416 */
417 res = 0;
418 if (do_lml_here) {
419 struct presto_version file_version;
420 /* handle different space reqs from file system below! */
421 handle = presto_trans_start(fset, file->f_dentry->d_inode,
422 KML_OPCODE_WRITE);
423 if ( IS_ERR(handle) ) {
424 presto_release_space(fset->fset_cache, res_size);
425 CERROR("presto_write: no space for transaction\n");
426 return -ENOSPC;
427 }
428
429 presto_getversion(&file_version, file->f_dentry->d_inode);
430 res = presto_write_lml_close(&rec, fset, file,
431 fdata->fd_info.remote_ino,
432 fdata->fd_info.remote_generation,
433 &fdata->fd_info.remote_version,
434 &file_version);
435 fdata->fd_lml_offset = rec.offset;
436 if ( res ) {
437 CERROR("intermezzo: PANIC failed to write LML\n");
438 *(int *)0 = 1;
439 EXIT;
440 goto exit_write;
441 }
442 presto_trans_commit(fset, handle);
443 }
444
445 fops = filter_c2cffops(cache->cache_filter);
446 res = fops->write(file, buf, size, off);
447 if ( res != size ) {
448 CDEBUG(D_FILE, "file write returns short write: size %d, res %d\n", size, res);
449 }
450
451 if ( (res > 0) && fdata )
452 presto_apply_write_policy(file, fset, res);
453
454 exit_write:
455 presto_release_space(fset->fset_cache, res_size);
456 return res;
457 }
458
459 struct file_operations presto_file_fops = {
460 .write = presto_file_write,
461 .open = presto_file_open,
462 .release = presto_file_release,
463 .ioctl = presto_ioctl
464 };
465
466 struct inode_operations presto_file_iops = {
467 .permission = presto_permission,
468 .setattr = presto_setattr,
469 #ifdef CONFIG_FS_EXT_ATTR
470 .set_ext_attr = presto_set_ext_attr,
471 #endif
472 };
473
474 /* FIXME: I bet we want to add a lock here and in presto_file_open. */
izo_purge_file(struct presto_file_set * fset,char * file)475 int izo_purge_file(struct presto_file_set *fset, char *file)
476 {
477 #if 0
478 void *handle = NULL;
479 char *path = NULL;
480 struct nameidata nd;
481 struct dentry *dentry;
482 int rc = 0, len;
483 loff_t oldsize;
484
485 /* FIXME: not mtpt it's gone */
486 len = strlen(fset->fset_cache->cache_mtpt) + strlen(file) + 1;
487 PRESTO_ALLOC(path, len + 1);
488 if (path == NULL)
489 return -1;
490
491 sprintf(path, "%s/%s", fset->fset_cache->cache_mtpt, file);
492 rc = izo_lookup_file(fset, path, &nd);
493 if (rc)
494 goto error;
495 dentry = nd.dentry;
496
497 /* FIXME: take a lock here */
498
499 if (dentry->d_inode->i_atime > CURRENT_TIME - 5) {
500 /* We lost the race; this file was accessed while we were doing
501 * ioctls and lookups and whatnot. */
502 rc = -EBUSY;
503 goto error_unlock;
504 }
505
506 /* FIXME: Check if this file is open. */
507
508 handle = presto_trans_start(fset, dentry->d_inode, KML_OPCODE_TRUNC);
509 if (IS_ERR(handle)) {
510 rc = -ENOMEM;
511 goto error_unlock;
512 }
513
514 /* FIXME: Write LML record */
515
516 oldsize = dentry->d_inode->i_size;
517 rc = izo_do_truncate(fset, dentry, 0, oldsize);
518 if (rc != 0)
519 goto error_clear;
520 rc = izo_do_truncate(fset, dentry, oldsize, 0);
521 if (rc != 0)
522 goto error_clear;
523
524 error_clear:
525 /* FIXME: clear LML record */
526
527 error_unlock:
528 /* FIXME: release the lock here */
529
530 error:
531 if (handle != NULL && !IS_ERR(handle))
532 presto_trans_commit(fset, handle);
533 if (path != NULL)
534 PRESTO_FREE(path, len + 1);
535 return rc;
536 #else
537 return 0;
538 #endif
539 }
540