1 /*
2  * blkfront.c
3  *
4  * XenLinux virtual block device driver.
5  *
6  * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
7  * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
8  * Copyright (c) 2004, Christian Limpach
9  * Copyright (c) 2004, Andrew Warfield
10  * Copyright (c) 2005, Christopher Clark
11  * Copyright (c) 2005, XenSource Ltd
12  *
13  * This program is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU General Public License version 2
15  * as published by the Free Software Foundation; or, when distributed
16  * separately from the Linux kernel or incorporated into other
17  * software packages, subject to the following license:
18  *
19  * Permission is hereby granted, free of charge, to any person obtaining a copy
20  * of this source file (the "Software"), to deal in the Software without
21  * restriction, including without limitation the rights to use, copy, modify,
22  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
23  * and to permit persons to whom the Software is furnished to do so, subject to
24  * the following conditions:
25  *
26  * The above copyright notice and this permission notice shall be included in
27  * all copies or substantial portions of the Software.
28  *
29  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
30  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
31  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
32  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
33  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
34  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
35  * IN THE SOFTWARE.
36  */
37 
38 #include <linux/interrupt.h>
39 #include <linux/blkdev.h>
40 #include <linux/hdreg.h>
41 #include <linux/cdrom.h>
42 #include <linux/module.h>
43 #include <linux/slab.h>
44 #include <linux/mutex.h>
45 #include <linux/scatterlist.h>
46 
47 #include <xen/xen.h>
48 #include <xen/xenbus.h>
49 #include <xen/grant_table.h>
50 #include <xen/events.h>
51 #include <xen/page.h>
52 #include <xen/platform_pci.h>
53 
54 #include <xen/interface/grant_table.h>
55 #include <xen/interface/io/blkif.h>
56 #include <xen/interface/io/protocols.h>
57 
58 #include <asm/xen/hypervisor.h>
59 
60 enum blkif_state {
61 	BLKIF_STATE_DISCONNECTED,
62 	BLKIF_STATE_CONNECTED,
63 	BLKIF_STATE_SUSPENDED,
64 };
65 
66 struct blk_shadow {
67 	struct blkif_request req;
68 	struct request *request;
69 	unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
70 };
71 
72 static DEFINE_MUTEX(blkfront_mutex);
73 static const struct block_device_operations xlvbd_block_fops;
74 
75 #define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE)
76 
77 /*
78  * We have one of these per vbd, whether ide, scsi or 'other'.  They
79  * hang in private_data off the gendisk structure. We may end up
80  * putting all kinds of interesting stuff here :-)
81  */
82 struct blkfront_info
83 {
84 	struct mutex mutex;
85 	struct xenbus_device *xbdev;
86 	struct gendisk *gd;
87 	int vdevice;
88 	blkif_vdev_t handle;
89 	enum blkif_state connected;
90 	int ring_ref;
91 	struct blkif_front_ring ring;
92 	struct scatterlist sg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
93 	unsigned int evtchn, irq;
94 	struct request_queue *rq;
95 	struct work_struct work;
96 	struct gnttab_free_callback callback;
97 	struct blk_shadow shadow[BLK_RING_SIZE];
98 	unsigned long shadow_free;
99 	unsigned int feature_flush;
100 	int is_ready;
101 };
102 
103 static DEFINE_SPINLOCK(blkif_io_lock);
104 
105 static unsigned int nr_minors;
106 static unsigned long *minors;
107 static DEFINE_SPINLOCK(minor_lock);
108 
109 #define MAXIMUM_OUTSTANDING_BLOCK_REQS \
110 	(BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
111 #define GRANT_INVALID_REF	0
112 
113 #define PARTS_PER_DISK		16
114 #define PARTS_PER_EXT_DISK      256
115 
116 #define BLKIF_MAJOR(dev) ((dev)>>8)
117 #define BLKIF_MINOR(dev) ((dev) & 0xff)
118 
119 #define EXT_SHIFT 28
120 #define EXTENDED (1<<EXT_SHIFT)
121 #define VDEV_IS_EXTENDED(dev) ((dev)&(EXTENDED))
122 #define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED))
123 #define EMULATED_HD_DISK_MINOR_OFFSET (0)
124 #define EMULATED_HD_DISK_NAME_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET / 256)
125 #define EMULATED_SD_DISK_MINOR_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET + (4 * 16))
126 #define EMULATED_SD_DISK_NAME_OFFSET (EMULATED_HD_DISK_NAME_OFFSET + 4)
127 
128 #define DEV_NAME	"xvd"	/* name in /dev */
129 
get_id_from_freelist(struct blkfront_info * info)130 static int get_id_from_freelist(struct blkfront_info *info)
131 {
132 	unsigned long free = info->shadow_free;
133 	BUG_ON(free >= BLK_RING_SIZE);
134 	info->shadow_free = info->shadow[free].req.id;
135 	info->shadow[free].req.id = 0x0fffffee; /* debug */
136 	return free;
137 }
138 
add_id_to_freelist(struct blkfront_info * info,unsigned long id)139 static void add_id_to_freelist(struct blkfront_info *info,
140 			       unsigned long id)
141 {
142 	info->shadow[id].req.id  = info->shadow_free;
143 	info->shadow[id].request = NULL;
144 	info->shadow_free = id;
145 }
146 
xlbd_reserve_minors(unsigned int minor,unsigned int nr)147 static int xlbd_reserve_minors(unsigned int minor, unsigned int nr)
148 {
149 	unsigned int end = minor + nr;
150 	int rc;
151 
152 	if (end > nr_minors) {
153 		unsigned long *bitmap, *old;
154 
155 		bitmap = kzalloc(BITS_TO_LONGS(end) * sizeof(*bitmap),
156 				 GFP_KERNEL);
157 		if (bitmap == NULL)
158 			return -ENOMEM;
159 
160 		spin_lock(&minor_lock);
161 		if (end > nr_minors) {
162 			old = minors;
163 			memcpy(bitmap, minors,
164 			       BITS_TO_LONGS(nr_minors) * sizeof(*bitmap));
165 			minors = bitmap;
166 			nr_minors = BITS_TO_LONGS(end) * BITS_PER_LONG;
167 		} else
168 			old = bitmap;
169 		spin_unlock(&minor_lock);
170 		kfree(old);
171 	}
172 
173 	spin_lock(&minor_lock);
174 	if (find_next_bit(minors, end, minor) >= end) {
175 		for (; minor < end; ++minor)
176 			__set_bit(minor, minors);
177 		rc = 0;
178 	} else
179 		rc = -EBUSY;
180 	spin_unlock(&minor_lock);
181 
182 	return rc;
183 }
184 
xlbd_release_minors(unsigned int minor,unsigned int nr)185 static void xlbd_release_minors(unsigned int minor, unsigned int nr)
186 {
187 	unsigned int end = minor + nr;
188 
189 	BUG_ON(end > nr_minors);
190 	spin_lock(&minor_lock);
191 	for (; minor < end; ++minor)
192 		__clear_bit(minor, minors);
193 	spin_unlock(&minor_lock);
194 }
195 
blkif_restart_queue_callback(void * arg)196 static void blkif_restart_queue_callback(void *arg)
197 {
198 	struct blkfront_info *info = (struct blkfront_info *)arg;
199 	schedule_work(&info->work);
200 }
201 
blkif_getgeo(struct block_device * bd,struct hd_geometry * hg)202 static int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg)
203 {
204 	/* We don't have real geometry info, but let's at least return
205 	   values consistent with the size of the device */
206 	sector_t nsect = get_capacity(bd->bd_disk);
207 	sector_t cylinders = nsect;
208 
209 	hg->heads = 0xff;
210 	hg->sectors = 0x3f;
211 	sector_div(cylinders, hg->heads * hg->sectors);
212 	hg->cylinders = cylinders;
213 	if ((sector_t)(hg->cylinders + 1) * hg->heads * hg->sectors < nsect)
214 		hg->cylinders = 0xffff;
215 	return 0;
216 }
217 
blkif_ioctl(struct block_device * bdev,fmode_t mode,unsigned command,unsigned long argument)218 static int blkif_ioctl(struct block_device *bdev, fmode_t mode,
219 		       unsigned command, unsigned long argument)
220 {
221 	struct blkfront_info *info = bdev->bd_disk->private_data;
222 	int i;
223 
224 	dev_dbg(&info->xbdev->dev, "command: 0x%x, argument: 0x%lx\n",
225 		command, (long)argument);
226 
227 	switch (command) {
228 	case CDROMMULTISESSION:
229 		dev_dbg(&info->xbdev->dev, "FIXME: support multisession CDs later\n");
230 		for (i = 0; i < sizeof(struct cdrom_multisession); i++)
231 			if (put_user(0, (char __user *)(argument + i)))
232 				return -EFAULT;
233 		return 0;
234 
235 	case CDROM_GET_CAPABILITY: {
236 		struct gendisk *gd = info->gd;
237 		if (gd->flags & GENHD_FL_CD)
238 			return 0;
239 		return -EINVAL;
240 	}
241 
242 	default:
243 		/*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n",
244 		  command);*/
245 		return -EINVAL; /* same return as native Linux */
246 	}
247 
248 	return 0;
249 }
250 
251 /*
252  * Generate a Xen blkfront IO request from a blk layer request.  Reads
253  * and writes are handled as expected.  Since we lack a loose flush
254  * request, we map flushes into a full ordered barrier.
255  *
256  * @req: a request struct
257  */
blkif_queue_request(struct request * req)258 static int blkif_queue_request(struct request *req)
259 {
260 	struct blkfront_info *info = req->rq_disk->private_data;
261 	unsigned long buffer_mfn;
262 	struct blkif_request *ring_req;
263 	unsigned long id;
264 	unsigned int fsect, lsect;
265 	int i, ref;
266 	grant_ref_t gref_head;
267 	struct scatterlist *sg;
268 
269 	if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
270 		return 1;
271 
272 	if (gnttab_alloc_grant_references(
273 		BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) {
274 		gnttab_request_free_callback(
275 			&info->callback,
276 			blkif_restart_queue_callback,
277 			info,
278 			BLKIF_MAX_SEGMENTS_PER_REQUEST);
279 		return 1;
280 	}
281 
282 	/* Fill out a communications ring structure. */
283 	ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
284 	id = get_id_from_freelist(info);
285 	info->shadow[id].request = req;
286 
287 	ring_req->id = id;
288 	ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req);
289 	ring_req->handle = info->handle;
290 
291 	ring_req->operation = rq_data_dir(req) ?
292 		BLKIF_OP_WRITE : BLKIF_OP_READ;
293 
294 	if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) {
295 		/*
296 		 * Ideally we could just do an unordered
297 		 * flush-to-disk, but all we have is a full write
298 		 * barrier at the moment.  However, a barrier write is
299 		 * a superset of FUA, so we can implement it the same
300 		 * way.  (It's also a FLUSH+FUA, since it is
301 		 * guaranteed ordered WRT previous writes.)
302 		 */
303 		ring_req->operation = BLKIF_OP_WRITE_BARRIER;
304 	}
305 
306 	ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg);
307 	BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST);
308 
309 	for_each_sg(info->sg, sg, ring_req->nr_segments, i) {
310 		buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg)));
311 		fsect = sg->offset >> 9;
312 		lsect = fsect + (sg->length >> 9) - 1;
313 		/* install a grant reference. */
314 		ref = gnttab_claim_grant_reference(&gref_head);
315 		BUG_ON(ref == -ENOSPC);
316 
317 		gnttab_grant_foreign_access_ref(
318 				ref,
319 				info->xbdev->otherend_id,
320 				buffer_mfn,
321 				rq_data_dir(req) );
322 
323 		info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn);
324 		ring_req->u.rw.seg[i] =
325 				(struct blkif_request_segment) {
326 					.gref       = ref,
327 					.first_sect = fsect,
328 					.last_sect  = lsect };
329 	}
330 
331 	info->ring.req_prod_pvt++;
332 
333 	/* Keep a private copy so we can reissue requests when recovering. */
334 	info->shadow[id].req = *ring_req;
335 
336 	gnttab_free_grant_references(gref_head);
337 
338 	return 0;
339 }
340 
341 
flush_requests(struct blkfront_info * info)342 static inline void flush_requests(struct blkfront_info *info)
343 {
344 	int notify;
345 
346 	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify);
347 
348 	if (notify)
349 		notify_remote_via_irq(info->irq);
350 }
351 
352 /*
353  * do_blkif_request
354  *  read a block; request is in a request queue
355  */
do_blkif_request(struct request_queue * rq)356 static void do_blkif_request(struct request_queue *rq)
357 {
358 	struct blkfront_info *info = NULL;
359 	struct request *req;
360 	int queued;
361 
362 	pr_debug("Entered do_blkif_request\n");
363 
364 	queued = 0;
365 
366 	while ((req = blk_peek_request(rq)) != NULL) {
367 		info = req->rq_disk->private_data;
368 
369 		if (RING_FULL(&info->ring))
370 			goto wait;
371 
372 		blk_start_request(req);
373 
374 		if (req->cmd_type != REQ_TYPE_FS) {
375 			__blk_end_request_all(req, -EIO);
376 			continue;
377 		}
378 
379 		pr_debug("do_blk_req %p: cmd %p, sec %lx, "
380 			 "(%u/%u) buffer:%p [%s]\n",
381 			 req, req->cmd, (unsigned long)blk_rq_pos(req),
382 			 blk_rq_cur_sectors(req), blk_rq_sectors(req),
383 			 req->buffer, rq_data_dir(req) ? "write" : "read");
384 
385 		if (blkif_queue_request(req)) {
386 			blk_requeue_request(rq, req);
387 wait:
388 			/* Avoid pointless unplugs. */
389 			blk_stop_queue(rq);
390 			break;
391 		}
392 
393 		queued++;
394 	}
395 
396 	if (queued != 0)
397 		flush_requests(info);
398 }
399 
xlvbd_init_blk_queue(struct gendisk * gd,u16 sector_size)400 static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
401 {
402 	struct request_queue *rq;
403 
404 	rq = blk_init_queue(do_blkif_request, &blkif_io_lock);
405 	if (rq == NULL)
406 		return -1;
407 
408 	queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);
409 
410 	/* Hard sector size and max sectors impersonate the equiv. hardware. */
411 	blk_queue_logical_block_size(rq, sector_size);
412 	blk_queue_max_hw_sectors(rq, 512);
413 
414 	/* Each segment in a request is up to an aligned page in size. */
415 	blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
416 	blk_queue_max_segment_size(rq, PAGE_SIZE);
417 
418 	/* Ensure a merged request will fit in a single I/O ring slot. */
419 	blk_queue_max_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
420 
421 	/* Make sure buffer addresses are sector-aligned. */
422 	blk_queue_dma_alignment(rq, 511);
423 
424 	/* Make sure we don't use bounce buffers. */
425 	blk_queue_bounce_limit(rq, BLK_BOUNCE_ANY);
426 
427 	gd->queue = rq;
428 
429 	return 0;
430 }
431 
432 
xlvbd_flush(struct blkfront_info * info)433 static void xlvbd_flush(struct blkfront_info *info)
434 {
435 	blk_queue_flush(info->rq, info->feature_flush);
436 	printk(KERN_INFO "blkfront: %s: barriers %s\n",
437 	       info->gd->disk_name,
438 	       info->feature_flush ? "enabled" : "disabled");
439 }
440 
xen_translate_vdev(int vdevice,int * minor,unsigned int * offset)441 static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset)
442 {
443 	int major;
444 	major = BLKIF_MAJOR(vdevice);
445 	*minor = BLKIF_MINOR(vdevice);
446 	switch (major) {
447 		case XEN_IDE0_MAJOR:
448 			*offset = (*minor / 64) + EMULATED_HD_DISK_NAME_OFFSET;
449 			*minor = ((*minor / 64) * PARTS_PER_DISK) +
450 				EMULATED_HD_DISK_MINOR_OFFSET;
451 			break;
452 		case XEN_IDE1_MAJOR:
453 			*offset = (*minor / 64) + 2 + EMULATED_HD_DISK_NAME_OFFSET;
454 			*minor = (((*minor / 64) + 2) * PARTS_PER_DISK) +
455 				EMULATED_HD_DISK_MINOR_OFFSET;
456 			break;
457 		case XEN_SCSI_DISK0_MAJOR:
458 			*offset = (*minor / PARTS_PER_DISK) + EMULATED_SD_DISK_NAME_OFFSET;
459 			*minor = *minor + EMULATED_SD_DISK_MINOR_OFFSET;
460 			break;
461 		case XEN_SCSI_DISK1_MAJOR:
462 		case XEN_SCSI_DISK2_MAJOR:
463 		case XEN_SCSI_DISK3_MAJOR:
464 		case XEN_SCSI_DISK4_MAJOR:
465 		case XEN_SCSI_DISK5_MAJOR:
466 		case XEN_SCSI_DISK6_MAJOR:
467 		case XEN_SCSI_DISK7_MAJOR:
468 			*offset = (*minor / PARTS_PER_DISK) +
469 				((major - XEN_SCSI_DISK1_MAJOR + 1) * 16) +
470 				EMULATED_SD_DISK_NAME_OFFSET;
471 			*minor = *minor +
472 				((major - XEN_SCSI_DISK1_MAJOR + 1) * 16 * PARTS_PER_DISK) +
473 				EMULATED_SD_DISK_MINOR_OFFSET;
474 			break;
475 		case XEN_SCSI_DISK8_MAJOR:
476 		case XEN_SCSI_DISK9_MAJOR:
477 		case XEN_SCSI_DISK10_MAJOR:
478 		case XEN_SCSI_DISK11_MAJOR:
479 		case XEN_SCSI_DISK12_MAJOR:
480 		case XEN_SCSI_DISK13_MAJOR:
481 		case XEN_SCSI_DISK14_MAJOR:
482 		case XEN_SCSI_DISK15_MAJOR:
483 			*offset = (*minor / PARTS_PER_DISK) +
484 				((major - XEN_SCSI_DISK8_MAJOR + 8) * 16) +
485 				EMULATED_SD_DISK_NAME_OFFSET;
486 			*minor = *minor +
487 				((major - XEN_SCSI_DISK8_MAJOR + 8) * 16 * PARTS_PER_DISK) +
488 				EMULATED_SD_DISK_MINOR_OFFSET;
489 			break;
490 		case XENVBD_MAJOR:
491 			*offset = *minor / PARTS_PER_DISK;
492 			break;
493 		default:
494 			printk(KERN_WARNING "blkfront: your disk configuration is "
495 					"incorrect, please use an xvd device instead\n");
496 			return -ENODEV;
497 	}
498 	return 0;
499 }
500 
xlvbd_alloc_gendisk(blkif_sector_t capacity,struct blkfront_info * info,u16 vdisk_info,u16 sector_size)501 static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
502 			       struct blkfront_info *info,
503 			       u16 vdisk_info, u16 sector_size)
504 {
505 	struct gendisk *gd;
506 	int nr_minors = 1;
507 	int err;
508 	unsigned int offset;
509 	int minor;
510 	int nr_parts;
511 
512 	BUG_ON(info->gd != NULL);
513 	BUG_ON(info->rq != NULL);
514 
515 	if ((info->vdevice>>EXT_SHIFT) > 1) {
516 		/* this is above the extended range; something is wrong */
517 		printk(KERN_WARNING "blkfront: vdevice 0x%x is above the extended range; ignoring\n", info->vdevice);
518 		return -ENODEV;
519 	}
520 
521 	if (!VDEV_IS_EXTENDED(info->vdevice)) {
522 		err = xen_translate_vdev(info->vdevice, &minor, &offset);
523 		if (err)
524 			return err;
525  		nr_parts = PARTS_PER_DISK;
526 	} else {
527 		minor = BLKIF_MINOR_EXT(info->vdevice);
528 		nr_parts = PARTS_PER_EXT_DISK;
529 		offset = minor / nr_parts;
530 		if (xen_hvm_domain() && offset <= EMULATED_HD_DISK_NAME_OFFSET + 4)
531 			printk(KERN_WARNING "blkfront: vdevice 0x%x might conflict with "
532 					"emulated IDE disks,\n\t choose an xvd device name"
533 					"from xvde on\n", info->vdevice);
534 	}
535 	err = -ENODEV;
536 
537 	if ((minor % nr_parts) == 0)
538 		nr_minors = nr_parts;
539 
540 	err = xlbd_reserve_minors(minor, nr_minors);
541 	if (err)
542 		goto out;
543 	err = -ENODEV;
544 
545 	gd = alloc_disk(nr_minors);
546 	if (gd == NULL)
547 		goto release;
548 
549 	if (nr_minors > 1) {
550 		if (offset < 26)
551 			sprintf(gd->disk_name, "%s%c", DEV_NAME, 'a' + offset);
552 		else
553 			sprintf(gd->disk_name, "%s%c%c", DEV_NAME,
554 				'a' + ((offset / 26)-1), 'a' + (offset % 26));
555 	} else {
556 		if (offset < 26)
557 			sprintf(gd->disk_name, "%s%c%d", DEV_NAME,
558 				'a' + offset,
559 				minor & (nr_parts - 1));
560 		else
561 			sprintf(gd->disk_name, "%s%c%c%d", DEV_NAME,
562 				'a' + ((offset / 26) - 1),
563 				'a' + (offset % 26),
564 				minor & (nr_parts - 1));
565 	}
566 
567 	gd->major = XENVBD_MAJOR;
568 	gd->first_minor = minor;
569 	gd->fops = &xlvbd_block_fops;
570 	gd->private_data = info;
571 	gd->driverfs_dev = &(info->xbdev->dev);
572 	set_capacity(gd, capacity);
573 
574 	if (xlvbd_init_blk_queue(gd, sector_size)) {
575 		del_gendisk(gd);
576 		goto release;
577 	}
578 
579 	info->rq = gd->queue;
580 	info->gd = gd;
581 
582 	xlvbd_flush(info);
583 
584 	if (vdisk_info & VDISK_READONLY)
585 		set_disk_ro(gd, 1);
586 
587 	if (vdisk_info & VDISK_REMOVABLE)
588 		gd->flags |= GENHD_FL_REMOVABLE;
589 
590 	if (vdisk_info & VDISK_CDROM)
591 		gd->flags |= GENHD_FL_CD;
592 
593 	return 0;
594 
595  release:
596 	xlbd_release_minors(minor, nr_minors);
597  out:
598 	return err;
599 }
600 
xlvbd_release_gendisk(struct blkfront_info * info)601 static void xlvbd_release_gendisk(struct blkfront_info *info)
602 {
603 	unsigned int minor, nr_minors;
604 	unsigned long flags;
605 
606 	if (info->rq == NULL)
607 		return;
608 
609 	spin_lock_irqsave(&blkif_io_lock, flags);
610 
611 	/* No more blkif_request(). */
612 	blk_stop_queue(info->rq);
613 
614 	/* No more gnttab callback work. */
615 	gnttab_cancel_free_callback(&info->callback);
616 	spin_unlock_irqrestore(&blkif_io_lock, flags);
617 
618 	/* Flush gnttab callback work. Must be done with no locks held. */
619 	flush_work_sync(&info->work);
620 
621 	del_gendisk(info->gd);
622 
623 	minor = info->gd->first_minor;
624 	nr_minors = info->gd->minors;
625 	xlbd_release_minors(minor, nr_minors);
626 
627 	blk_cleanup_queue(info->rq);
628 	info->rq = NULL;
629 
630 	put_disk(info->gd);
631 	info->gd = NULL;
632 }
633 
kick_pending_request_queues(struct blkfront_info * info)634 static void kick_pending_request_queues(struct blkfront_info *info)
635 {
636 	if (!RING_FULL(&info->ring)) {
637 		/* Re-enable calldowns. */
638 		blk_start_queue(info->rq);
639 		/* Kick things off immediately. */
640 		do_blkif_request(info->rq);
641 	}
642 }
643 
blkif_restart_queue(struct work_struct * work)644 static void blkif_restart_queue(struct work_struct *work)
645 {
646 	struct blkfront_info *info = container_of(work, struct blkfront_info, work);
647 
648 	spin_lock_irq(&blkif_io_lock);
649 	if (info->connected == BLKIF_STATE_CONNECTED)
650 		kick_pending_request_queues(info);
651 	spin_unlock_irq(&blkif_io_lock);
652 }
653 
blkif_free(struct blkfront_info * info,int suspend)654 static void blkif_free(struct blkfront_info *info, int suspend)
655 {
656 	/* Prevent new requests being issued until we fix things up. */
657 	spin_lock_irq(&blkif_io_lock);
658 	info->connected = suspend ?
659 		BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
660 	/* No more blkif_request(). */
661 	if (info->rq)
662 		blk_stop_queue(info->rq);
663 	/* No more gnttab callback work. */
664 	gnttab_cancel_free_callback(&info->callback);
665 	spin_unlock_irq(&blkif_io_lock);
666 
667 	/* Flush gnttab callback work. Must be done with no locks held. */
668 	flush_work_sync(&info->work);
669 
670 	/* Free resources associated with old device channel. */
671 	if (info->ring_ref != GRANT_INVALID_REF) {
672 		gnttab_end_foreign_access(info->ring_ref, 0,
673 					  (unsigned long)info->ring.sring);
674 		info->ring_ref = GRANT_INVALID_REF;
675 		info->ring.sring = NULL;
676 	}
677 	if (info->irq)
678 		unbind_from_irqhandler(info->irq, info);
679 	info->evtchn = info->irq = 0;
680 
681 }
682 
blkif_completion(struct blk_shadow * s)683 static void blkif_completion(struct blk_shadow *s)
684 {
685 	int i;
686 	for (i = 0; i < s->req.nr_segments; i++)
687 		gnttab_end_foreign_access(s->req.u.rw.seg[i].gref, 0, 0UL);
688 }
689 
blkif_interrupt(int irq,void * dev_id)690 static irqreturn_t blkif_interrupt(int irq, void *dev_id)
691 {
692 	struct request *req;
693 	struct blkif_response *bret;
694 	RING_IDX i, rp;
695 	unsigned long flags;
696 	struct blkfront_info *info = (struct blkfront_info *)dev_id;
697 	int error;
698 
699 	spin_lock_irqsave(&blkif_io_lock, flags);
700 
701 	if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) {
702 		spin_unlock_irqrestore(&blkif_io_lock, flags);
703 		return IRQ_HANDLED;
704 	}
705 
706  again:
707 	rp = info->ring.sring->rsp_prod;
708 	rmb(); /* Ensure we see queued responses up to 'rp'. */
709 
710 	for (i = info->ring.rsp_cons; i != rp; i++) {
711 		unsigned long id;
712 
713 		bret = RING_GET_RESPONSE(&info->ring, i);
714 		id   = bret->id;
715 		req  = info->shadow[id].request;
716 
717 		blkif_completion(&info->shadow[id]);
718 
719 		add_id_to_freelist(info, id);
720 
721 		error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
722 		switch (bret->operation) {
723 		case BLKIF_OP_WRITE_BARRIER:
724 			if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
725 				printk(KERN_WARNING "blkfront: %s: write barrier op failed\n",
726 				       info->gd->disk_name);
727 				error = -EOPNOTSUPP;
728 			}
729 			if (unlikely(bret->status == BLKIF_RSP_ERROR &&
730 				     info->shadow[id].req.nr_segments == 0)) {
731 				printk(KERN_WARNING "blkfront: %s: empty write barrier op failed\n",
732 				       info->gd->disk_name);
733 				error = -EOPNOTSUPP;
734 			}
735 			if (unlikely(error)) {
736 				if (error == -EOPNOTSUPP)
737 					error = 0;
738 				info->feature_flush = 0;
739 				xlvbd_flush(info);
740 			}
741 			/* fall through */
742 		case BLKIF_OP_READ:
743 		case BLKIF_OP_WRITE:
744 			if (unlikely(bret->status != BLKIF_RSP_OKAY))
745 				dev_dbg(&info->xbdev->dev, "Bad return from blkdev data "
746 					"request: %x\n", bret->status);
747 
748 			__blk_end_request_all(req, error);
749 			break;
750 		default:
751 			BUG();
752 		}
753 	}
754 
755 	info->ring.rsp_cons = i;
756 
757 	if (i != info->ring.req_prod_pvt) {
758 		int more_to_do;
759 		RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do);
760 		if (more_to_do)
761 			goto again;
762 	} else
763 		info->ring.sring->rsp_event = i + 1;
764 
765 	kick_pending_request_queues(info);
766 
767 	spin_unlock_irqrestore(&blkif_io_lock, flags);
768 
769 	return IRQ_HANDLED;
770 }
771 
772 
setup_blkring(struct xenbus_device * dev,struct blkfront_info * info)773 static int setup_blkring(struct xenbus_device *dev,
774 			 struct blkfront_info *info)
775 {
776 	struct blkif_sring *sring;
777 	int err;
778 
779 	info->ring_ref = GRANT_INVALID_REF;
780 
781 	sring = (struct blkif_sring *)__get_free_page(GFP_NOIO | __GFP_HIGH);
782 	if (!sring) {
783 		xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
784 		return -ENOMEM;
785 	}
786 	SHARED_RING_INIT(sring);
787 	FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
788 
789 	sg_init_table(info->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
790 
791 	err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring));
792 	if (err < 0) {
793 		free_page((unsigned long)sring);
794 		info->ring.sring = NULL;
795 		goto fail;
796 	}
797 	info->ring_ref = err;
798 
799 	err = xenbus_alloc_evtchn(dev, &info->evtchn);
800 	if (err)
801 		goto fail;
802 
803 	err = bind_evtchn_to_irqhandler(info->evtchn,
804 					blkif_interrupt,
805 					IRQF_SAMPLE_RANDOM, "blkif", info);
806 	if (err <= 0) {
807 		xenbus_dev_fatal(dev, err,
808 				 "bind_evtchn_to_irqhandler failed");
809 		goto fail;
810 	}
811 	info->irq = err;
812 
813 	return 0;
814 fail:
815 	blkif_free(info, 0);
816 	return err;
817 }
818 
819 
820 /* Common code used when first setting up, and when resuming. */
talk_to_blkback(struct xenbus_device * dev,struct blkfront_info * info)821 static int talk_to_blkback(struct xenbus_device *dev,
822 			   struct blkfront_info *info)
823 {
824 	const char *message = NULL;
825 	struct xenbus_transaction xbt;
826 	int err;
827 
828 	/* Create shared ring, alloc event channel. */
829 	err = setup_blkring(dev, info);
830 	if (err)
831 		goto out;
832 
833 again:
834 	err = xenbus_transaction_start(&xbt);
835 	if (err) {
836 		xenbus_dev_fatal(dev, err, "starting transaction");
837 		goto destroy_blkring;
838 	}
839 
840 	err = xenbus_printf(xbt, dev->nodename,
841 			    "ring-ref", "%u", info->ring_ref);
842 	if (err) {
843 		message = "writing ring-ref";
844 		goto abort_transaction;
845 	}
846 	err = xenbus_printf(xbt, dev->nodename,
847 			    "event-channel", "%u", info->evtchn);
848 	if (err) {
849 		message = "writing event-channel";
850 		goto abort_transaction;
851 	}
852 	err = xenbus_printf(xbt, dev->nodename, "protocol", "%s",
853 			    XEN_IO_PROTO_ABI_NATIVE);
854 	if (err) {
855 		message = "writing protocol";
856 		goto abort_transaction;
857 	}
858 
859 	err = xenbus_transaction_end(xbt, 0);
860 	if (err) {
861 		if (err == -EAGAIN)
862 			goto again;
863 		xenbus_dev_fatal(dev, err, "completing transaction");
864 		goto destroy_blkring;
865 	}
866 
867 	xenbus_switch_state(dev, XenbusStateInitialised);
868 
869 	return 0;
870 
871  abort_transaction:
872 	xenbus_transaction_end(xbt, 1);
873 	if (message)
874 		xenbus_dev_fatal(dev, err, "%s", message);
875  destroy_blkring:
876 	blkif_free(info, 0);
877  out:
878 	return err;
879 }
880 
881 /**
882  * Entry point to this code when a new device is created.  Allocate the basic
883  * structures and the ring buffer for communication with the backend, and
884  * inform the backend of the appropriate details for those.  Switch to
885  * Initialised state.
886  */
blkfront_probe(struct xenbus_device * dev,const struct xenbus_device_id * id)887 static int blkfront_probe(struct xenbus_device *dev,
888 			  const struct xenbus_device_id *id)
889 {
890 	int err, vdevice, i;
891 	struct blkfront_info *info;
892 
893 	/* FIXME: Use dynamic device id if this is not set. */
894 	err = xenbus_scanf(XBT_NIL, dev->nodename,
895 			   "virtual-device", "%i", &vdevice);
896 	if (err != 1) {
897 		/* go looking in the extended area instead */
898 		err = xenbus_scanf(XBT_NIL, dev->nodename, "virtual-device-ext",
899 				   "%i", &vdevice);
900 		if (err != 1) {
901 			xenbus_dev_fatal(dev, err, "reading virtual-device");
902 			return err;
903 		}
904 	}
905 
906 	if (xen_hvm_domain()) {
907 		char *type;
908 		int len;
909 		/* no unplug has been done: do not hook devices != xen vbds */
910 		if (xen_platform_pci_unplug & XEN_UNPLUG_UNNECESSARY) {
911 			int major;
912 
913 			if (!VDEV_IS_EXTENDED(vdevice))
914 				major = BLKIF_MAJOR(vdevice);
915 			else
916 				major = XENVBD_MAJOR;
917 
918 			if (major != XENVBD_MAJOR) {
919 				printk(KERN_INFO
920 						"%s: HVM does not support vbd %d as xen block device\n",
921 						__FUNCTION__, vdevice);
922 				return -ENODEV;
923 			}
924 		}
925 		/* do not create a PV cdrom device if we are an HVM guest */
926 		type = xenbus_read(XBT_NIL, dev->nodename, "device-type", &len);
927 		if (IS_ERR(type))
928 			return -ENODEV;
929 		if (strncmp(type, "cdrom", 5) == 0) {
930 			kfree(type);
931 			return -ENODEV;
932 		}
933 		kfree(type);
934 	}
935 	info = kzalloc(sizeof(*info), GFP_KERNEL);
936 	if (!info) {
937 		xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure");
938 		return -ENOMEM;
939 	}
940 
941 	mutex_init(&info->mutex);
942 	info->xbdev = dev;
943 	info->vdevice = vdevice;
944 	info->connected = BLKIF_STATE_DISCONNECTED;
945 	INIT_WORK(&info->work, blkif_restart_queue);
946 
947 	for (i = 0; i < BLK_RING_SIZE; i++)
948 		info->shadow[i].req.id = i+1;
949 	info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
950 
951 	/* Front end dir is a number, which is used as the id. */
952 	info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
953 	dev_set_drvdata(&dev->dev, info);
954 
955 	err = talk_to_blkback(dev, info);
956 	if (err) {
957 		kfree(info);
958 		dev_set_drvdata(&dev->dev, NULL);
959 		return err;
960 	}
961 
962 	return 0;
963 }
964 
965 
blkif_recover(struct blkfront_info * info)966 static int blkif_recover(struct blkfront_info *info)
967 {
968 	int i;
969 	struct blkif_request *req;
970 	struct blk_shadow *copy;
971 	int j;
972 
973 	/* Stage 1: Make a safe copy of the shadow state. */
974 	copy = kmalloc(sizeof(info->shadow),
975 		       GFP_NOIO | __GFP_REPEAT | __GFP_HIGH);
976 	if (!copy)
977 		return -ENOMEM;
978 	memcpy(copy, info->shadow, sizeof(info->shadow));
979 
980 	/* Stage 2: Set up free list. */
981 	memset(&info->shadow, 0, sizeof(info->shadow));
982 	for (i = 0; i < BLK_RING_SIZE; i++)
983 		info->shadow[i].req.id = i+1;
984 	info->shadow_free = info->ring.req_prod_pvt;
985 	info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
986 
987 	/* Stage 3: Find pending requests and requeue them. */
988 	for (i = 0; i < BLK_RING_SIZE; i++) {
989 		/* Not in use? */
990 		if (!copy[i].request)
991 			continue;
992 
993 		/* Grab a request slot and copy shadow state into it. */
994 		req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
995 		*req = copy[i].req;
996 
997 		/* We get a new request id, and must reset the shadow state. */
998 		req->id = get_id_from_freelist(info);
999 		memcpy(&info->shadow[req->id], &copy[i], sizeof(copy[i]));
1000 
1001 		/* Rewrite any grant references invalidated by susp/resume. */
1002 		for (j = 0; j < req->nr_segments; j++)
1003 			gnttab_grant_foreign_access_ref(
1004 				req->u.rw.seg[j].gref,
1005 				info->xbdev->otherend_id,
1006 				pfn_to_mfn(info->shadow[req->id].frame[j]),
1007 				rq_data_dir(info->shadow[req->id].request));
1008 		info->shadow[req->id].req = *req;
1009 
1010 		info->ring.req_prod_pvt++;
1011 	}
1012 
1013 	kfree(copy);
1014 
1015 	xenbus_switch_state(info->xbdev, XenbusStateConnected);
1016 
1017 	spin_lock_irq(&blkif_io_lock);
1018 
1019 	/* Now safe for us to use the shared ring */
1020 	info->connected = BLKIF_STATE_CONNECTED;
1021 
1022 	/* Send off requeued requests */
1023 	flush_requests(info);
1024 
1025 	/* Kick any other new requests queued since we resumed */
1026 	kick_pending_request_queues(info);
1027 
1028 	spin_unlock_irq(&blkif_io_lock);
1029 
1030 	return 0;
1031 }
1032 
1033 /**
1034  * We are reconnecting to the backend, due to a suspend/resume, or a backend
1035  * driver restart.  We tear down our blkif structure and recreate it, but
1036  * leave the device-layer structures intact so that this is transparent to the
1037  * rest of the kernel.
1038  */
blkfront_resume(struct xenbus_device * dev)1039 static int blkfront_resume(struct xenbus_device *dev)
1040 {
1041 	struct blkfront_info *info = dev_get_drvdata(&dev->dev);
1042 	int err;
1043 
1044 	dev_dbg(&dev->dev, "blkfront_resume: %s\n", dev->nodename);
1045 
1046 	blkif_free(info, info->connected == BLKIF_STATE_CONNECTED);
1047 
1048 	err = talk_to_blkback(dev, info);
1049 	if (info->connected == BLKIF_STATE_SUSPENDED && !err)
1050 		err = blkif_recover(info);
1051 
1052 	return err;
1053 }
1054 
1055 static void
blkfront_closing(struct blkfront_info * info)1056 blkfront_closing(struct blkfront_info *info)
1057 {
1058 	struct xenbus_device *xbdev = info->xbdev;
1059 	struct block_device *bdev = NULL;
1060 
1061 	mutex_lock(&info->mutex);
1062 
1063 	if (xbdev->state == XenbusStateClosing) {
1064 		mutex_unlock(&info->mutex);
1065 		return;
1066 	}
1067 
1068 	if (info->gd)
1069 		bdev = bdget_disk(info->gd, 0);
1070 
1071 	mutex_unlock(&info->mutex);
1072 
1073 	if (!bdev) {
1074 		xenbus_frontend_closed(xbdev);
1075 		return;
1076 	}
1077 
1078 	mutex_lock(&bdev->bd_mutex);
1079 
1080 	if (bdev->bd_openers) {
1081 		xenbus_dev_error(xbdev, -EBUSY,
1082 				 "Device in use; refusing to close");
1083 		xenbus_switch_state(xbdev, XenbusStateClosing);
1084 	} else {
1085 		xlvbd_release_gendisk(info);
1086 		xenbus_frontend_closed(xbdev);
1087 	}
1088 
1089 	mutex_unlock(&bdev->bd_mutex);
1090 	bdput(bdev);
1091 }
1092 
1093 /*
1094  * Invoked when the backend is finally 'ready' (and has told produced
1095  * the details about the physical device - #sectors, size, etc).
1096  */
blkfront_connect(struct blkfront_info * info)1097 static void blkfront_connect(struct blkfront_info *info)
1098 {
1099 	unsigned long long sectors;
1100 	unsigned long sector_size;
1101 	unsigned int binfo;
1102 	int err;
1103 	int barrier;
1104 
1105 	switch (info->connected) {
1106 	case BLKIF_STATE_CONNECTED:
1107 		/*
1108 		 * Potentially, the back-end may be signalling
1109 		 * a capacity change; update the capacity.
1110 		 */
1111 		err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
1112 				   "sectors", "%Lu", &sectors);
1113 		if (XENBUS_EXIST_ERR(err))
1114 			return;
1115 		printk(KERN_INFO "Setting capacity to %Lu\n",
1116 		       sectors);
1117 		set_capacity(info->gd, sectors);
1118 		revalidate_disk(info->gd);
1119 
1120 		/* fall through */
1121 	case BLKIF_STATE_SUSPENDED:
1122 		return;
1123 
1124 	default:
1125 		break;
1126 	}
1127 
1128 	dev_dbg(&info->xbdev->dev, "%s:%s.\n",
1129 		__func__, info->xbdev->otherend);
1130 
1131 	err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
1132 			    "sectors", "%llu", &sectors,
1133 			    "info", "%u", &binfo,
1134 			    "sector-size", "%lu", &sector_size,
1135 			    NULL);
1136 	if (err) {
1137 		xenbus_dev_fatal(info->xbdev, err,
1138 				 "reading backend fields at %s",
1139 				 info->xbdev->otherend);
1140 		return;
1141 	}
1142 
1143 	err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
1144 			    "feature-barrier", "%lu", &barrier,
1145 			    NULL);
1146 
1147 	/*
1148 	 * If there's no "feature-barrier" defined, then it means
1149 	 * we're dealing with a very old backend which writes
1150 	 * synchronously; nothing to do.
1151 	 *
1152 	 * If there are barriers, then we use flush.
1153 	 */
1154 	info->feature_flush = 0;
1155 
1156 	if (!err && barrier)
1157 		info->feature_flush = REQ_FLUSH | REQ_FUA;
1158 
1159 	err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
1160 	if (err) {
1161 		xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
1162 				 info->xbdev->otherend);
1163 		return;
1164 	}
1165 
1166 	xenbus_switch_state(info->xbdev, XenbusStateConnected);
1167 
1168 	/* Kick pending requests. */
1169 	spin_lock_irq(&blkif_io_lock);
1170 	info->connected = BLKIF_STATE_CONNECTED;
1171 	kick_pending_request_queues(info);
1172 	spin_unlock_irq(&blkif_io_lock);
1173 
1174 	add_disk(info->gd);
1175 
1176 	info->is_ready = 1;
1177 }
1178 
1179 /**
1180  * Callback received when the backend's state changes.
1181  */
blkback_changed(struct xenbus_device * dev,enum xenbus_state backend_state)1182 static void blkback_changed(struct xenbus_device *dev,
1183 			    enum xenbus_state backend_state)
1184 {
1185 	struct blkfront_info *info = dev_get_drvdata(&dev->dev);
1186 
1187 	dev_dbg(&dev->dev, "blkfront:blkback_changed to state %d.\n", backend_state);
1188 
1189 	switch (backend_state) {
1190 	case XenbusStateInitialising:
1191 	case XenbusStateInitWait:
1192 	case XenbusStateInitialised:
1193 	case XenbusStateReconfiguring:
1194 	case XenbusStateReconfigured:
1195 	case XenbusStateUnknown:
1196 	case XenbusStateClosed:
1197 		break;
1198 
1199 	case XenbusStateConnected:
1200 		blkfront_connect(info);
1201 		break;
1202 
1203 	case XenbusStateClosing:
1204 		blkfront_closing(info);
1205 		break;
1206 	}
1207 }
1208 
blkfront_remove(struct xenbus_device * xbdev)1209 static int blkfront_remove(struct xenbus_device *xbdev)
1210 {
1211 	struct blkfront_info *info = dev_get_drvdata(&xbdev->dev);
1212 	struct block_device *bdev = NULL;
1213 	struct gendisk *disk;
1214 
1215 	dev_dbg(&xbdev->dev, "%s removed", xbdev->nodename);
1216 
1217 	blkif_free(info, 0);
1218 
1219 	mutex_lock(&info->mutex);
1220 
1221 	disk = info->gd;
1222 	if (disk)
1223 		bdev = bdget_disk(disk, 0);
1224 
1225 	info->xbdev = NULL;
1226 	mutex_unlock(&info->mutex);
1227 
1228 	if (!bdev) {
1229 		kfree(info);
1230 		return 0;
1231 	}
1232 
1233 	/*
1234 	 * The xbdev was removed before we reached the Closed
1235 	 * state. See if it's safe to remove the disk. If the bdev
1236 	 * isn't closed yet, we let release take care of it.
1237 	 */
1238 
1239 	mutex_lock(&bdev->bd_mutex);
1240 	info = disk->private_data;
1241 
1242 	dev_warn(disk_to_dev(disk),
1243 		 "%s was hot-unplugged, %d stale handles\n",
1244 		 xbdev->nodename, bdev->bd_openers);
1245 
1246 	if (info && !bdev->bd_openers) {
1247 		xlvbd_release_gendisk(info);
1248 		disk->private_data = NULL;
1249 		kfree(info);
1250 	}
1251 
1252 	mutex_unlock(&bdev->bd_mutex);
1253 	bdput(bdev);
1254 
1255 	return 0;
1256 }
1257 
blkfront_is_ready(struct xenbus_device * dev)1258 static int blkfront_is_ready(struct xenbus_device *dev)
1259 {
1260 	struct blkfront_info *info = dev_get_drvdata(&dev->dev);
1261 
1262 	return info->is_ready && info->xbdev;
1263 }
1264 
blkif_open(struct block_device * bdev,fmode_t mode)1265 static int blkif_open(struct block_device *bdev, fmode_t mode)
1266 {
1267 	struct gendisk *disk = bdev->bd_disk;
1268 	struct blkfront_info *info;
1269 	int err = 0;
1270 
1271 	mutex_lock(&blkfront_mutex);
1272 
1273 	info = disk->private_data;
1274 	if (!info) {
1275 		/* xbdev gone */
1276 		err = -ERESTARTSYS;
1277 		goto out;
1278 	}
1279 
1280 	mutex_lock(&info->mutex);
1281 
1282 	if (!info->gd)
1283 		/* xbdev is closed */
1284 		err = -ERESTARTSYS;
1285 
1286 	mutex_unlock(&info->mutex);
1287 
1288 out:
1289 	mutex_unlock(&blkfront_mutex);
1290 	return err;
1291 }
1292 
blkif_release(struct gendisk * disk,fmode_t mode)1293 static int blkif_release(struct gendisk *disk, fmode_t mode)
1294 {
1295 	struct blkfront_info *info = disk->private_data;
1296 	struct block_device *bdev;
1297 	struct xenbus_device *xbdev;
1298 
1299 	mutex_lock(&blkfront_mutex);
1300 
1301 	bdev = bdget_disk(disk, 0);
1302 	bdput(bdev);
1303 
1304 	if (bdev->bd_openers)
1305 		goto out;
1306 
1307 	/*
1308 	 * Check if we have been instructed to close. We will have
1309 	 * deferred this request, because the bdev was still open.
1310 	 */
1311 
1312 	mutex_lock(&info->mutex);
1313 	xbdev = info->xbdev;
1314 
1315 	if (xbdev && xbdev->state == XenbusStateClosing) {
1316 		/* pending switch to state closed */
1317 		dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n");
1318 		xlvbd_release_gendisk(info);
1319 		xenbus_frontend_closed(info->xbdev);
1320  	}
1321 
1322 	mutex_unlock(&info->mutex);
1323 
1324 	if (!xbdev) {
1325 		/* sudden device removal */
1326 		dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n");
1327 		xlvbd_release_gendisk(info);
1328 		disk->private_data = NULL;
1329 		kfree(info);
1330 	}
1331 
1332 out:
1333 	mutex_unlock(&blkfront_mutex);
1334 	return 0;
1335 }
1336 
1337 static const struct block_device_operations xlvbd_block_fops =
1338 {
1339 	.owner = THIS_MODULE,
1340 	.open = blkif_open,
1341 	.release = blkif_release,
1342 	.getgeo = blkif_getgeo,
1343 	.ioctl = blkif_ioctl,
1344 };
1345 
1346 
1347 static const struct xenbus_device_id blkfront_ids[] = {
1348 	{ "vbd" },
1349 	{ "" }
1350 };
1351 
1352 static struct xenbus_driver blkfront = {
1353 	.name = "vbd",
1354 	.owner = THIS_MODULE,
1355 	.ids = blkfront_ids,
1356 	.probe = blkfront_probe,
1357 	.remove = blkfront_remove,
1358 	.resume = blkfront_resume,
1359 	.otherend_changed = blkback_changed,
1360 	.is_ready = blkfront_is_ready,
1361 };
1362 
xlblk_init(void)1363 static int __init xlblk_init(void)
1364 {
1365 	if (!xen_domain())
1366 		return -ENODEV;
1367 
1368 	if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) {
1369 		printk(KERN_WARNING "xen_blk: can't get major %d with name %s\n",
1370 		       XENVBD_MAJOR, DEV_NAME);
1371 		return -ENODEV;
1372 	}
1373 
1374 	return xenbus_register_frontend(&blkfront);
1375 }
1376 module_init(xlblk_init);
1377 
1378 
xlblk_exit(void)1379 static void __exit xlblk_exit(void)
1380 {
1381 	return xenbus_unregister_driver(&blkfront);
1382 }
1383 module_exit(xlblk_exit);
1384 
1385 MODULE_DESCRIPTION("Xen virtual block device frontend");
1386 MODULE_LICENSE("GPL");
1387 MODULE_ALIAS_BLOCKDEV_MAJOR(XENVBD_MAJOR);
1388 MODULE_ALIAS("xen:vbd");
1389 MODULE_ALIAS("xenblk");
1390