diff options
Diffstat (limited to 'drivers/block')
| -rw-r--r-- | drivers/block/Kconfig | 4 | ||||
| -rw-r--r-- | drivers/block/cciss.c | 2 | ||||
| -rw-r--r-- | drivers/block/loop.c | 22 | ||||
| -rw-r--r-- | drivers/block/mg_disk.c | 4 | ||||
| -rw-r--r-- | drivers/block/mtip32xx/mtip32xx.c | 4 | ||||
| -rw-r--r-- | drivers/block/nvme.c | 33 | ||||
| -rw-r--r-- | drivers/block/rbd.c | 47 | ||||
| -rw-r--r-- | drivers/block/rsxx/Makefile | 2 | ||||
| -rw-r--r-- | drivers/block/rsxx/config.c | 8 | ||||
| -rw-r--r-- | drivers/block/rsxx/core.c | 237 | ||||
| -rw-r--r-- | drivers/block/rsxx/cregs.c | 112 | ||||
| -rw-r--r-- | drivers/block/rsxx/dma.c | 239 | ||||
| -rw-r--r-- | drivers/block/rsxx/rsxx.h | 6 | ||||
| -rw-r--r-- | drivers/block/rsxx/rsxx_cfg.h | 2 | ||||
| -rw-r--r-- | drivers/block/rsxx/rsxx_priv.h | 34 | ||||
| -rw-r--r-- | drivers/block/xen-blkback/blkback.c | 68 | ||||
| -rw-r--r-- | drivers/block/xen-blkback/common.h | 40 | ||||
| -rw-r--r-- | drivers/block/xen-blkback/xenbus.c | 14 | ||||
| -rw-r--r-- | drivers/block/xen-blkfront.c | 154 | 
19 files changed, 752 insertions, 280 deletions
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 5dc0daed8fa..b81ddfea1da 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -532,11 +532,11 @@ config BLK_DEV_RBD  	  If unsure, say N.  config BLK_DEV_RSXX -	tristate "RamSam PCIe Flash SSD Device Driver" +	tristate "IBM FlashSystem 70/80 PCIe SSD Device Driver"  	depends on PCI  	help  	  Device driver for IBM's high speed PCIe SSD -	  storage devices: RamSan-70 and RamSan-80. +	  storage devices: FlashSystem-70 and FlashSystem-80.  	  To compile this driver as a module, choose M here: the  	  module will be called rsxx. diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index ade58bc8f3c..1c1b8e544aa 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -4206,7 +4206,7 @@ static int cciss_find_cfgtables(ctlr_info_t *h)  	if (rc)  		return rc;  	h->cfgtable = remap_pci_mem(pci_resource_start(h->pdev, -		cfg_base_addr_index) + cfg_offset, sizeof(h->cfgtable)); +		cfg_base_addr_index) + cfg_offset, sizeof(*h->cfgtable));  	if (!h->cfgtable)  		return -ENOMEM;  	rc = write_driver_ver_to_cfgtable(h->cfgtable); diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 747bb2af69d..fe5f6403417 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1044,12 +1044,29 @@ static int loop_clr_fd(struct loop_device *lo)  	lo->lo_state = Lo_unbound;  	/* This is safe: open() is still holding a reference. */  	module_put(THIS_MODULE); -	if (lo->lo_flags & LO_FLAGS_PARTSCAN && bdev) -		ioctl_by_bdev(bdev, BLKRRPART, 0);  	lo->lo_flags = 0;  	if (!part_shift)  		lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN;  	mutex_unlock(&lo->lo_ctl_mutex); + +	/* +	 * Remove all partitions, since BLKRRPART won't remove user +	 * added partitions when max_part=0 +	 */ +	if (bdev) { +		struct disk_part_iter piter; +		struct hd_struct *part; + +		mutex_lock_nested(&bdev->bd_mutex, 1); +		invalidate_partition(bdev->bd_disk, 0); +		disk_part_iter_init(&piter, bdev->bd_disk, +					DISK_PITER_INCL_EMPTY); +		while ((part = disk_part_iter_next(&piter))) +			delete_partition(bdev->bd_disk, part->partno); +		disk_part_iter_exit(&piter); +		mutex_unlock(&bdev->bd_mutex); +	} +  	/*  	 * Need not hold lo_ctl_mutex to fput backing file.  	 * Calling fput holding lo_ctl_mutex triggers a circular @@ -1623,6 +1640,7 @@ static int loop_add(struct loop_device **l, int i)  		goto out_free_dev;  	i = err; +	err = -ENOMEM;  	lo->lo_queue = blk_alloc_queue(GFP_KERNEL);  	if (!lo->lo_queue)  		goto out_free_dev; diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c index 1788f491e0f..076ae7f1b78 100644 --- a/drivers/block/mg_disk.c +++ b/drivers/block/mg_disk.c @@ -890,8 +890,10 @@ static int mg_probe(struct platform_device *plat_dev)  	gpio_direction_output(host->rst, 1);  	/* reset out pin */ -	if (!(prv_data->dev_attr & MG_DEV_MASK)) +	if (!(prv_data->dev_attr & MG_DEV_MASK)) { +		err = -EINVAL;  		goto probe_err_3a; +	}  	if (prv_data->dev_attr != MG_BOOT_DEV) {  		rsc = platform_get_resource_byname(plat_dev, IORESOURCE_IO, diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 11cc9522cdd..92250af84e7 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -4224,6 +4224,7 @@ static int mtip_pci_probe(struct pci_dev *pdev,  	dd->isr_workq = create_workqueue(dd->workq_name);  	if (!dd->isr_workq) {  		dev_warn(&pdev->dev, "Can't create wq %d\n", dd->instance); +		rv = -ENOMEM;  		goto block_initialize_err;  	} @@ -4282,7 +4283,8 @@ static int mtip_pci_probe(struct pci_dev *pdev,  	INIT_WORK(&dd->work[7].work, mtip_workq_sdbf7);  	pci_set_master(pdev); -	if (pci_enable_msi(pdev)) { +	rv = pci_enable_msi(pdev); +	if (rv) {  		dev_warn(&pdev->dev,  			"Unable to enable MSI interrupt.\n");  		goto block_initialize_err; diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c index 07fb2dfaae1..9dcefe40380 100644 --- a/drivers/block/nvme.c +++ b/drivers/block/nvme.c @@ -135,6 +135,7 @@ static inline void _nvme_check_size(void)  	BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != 4096);  	BUILD_BUG_ON(sizeof(struct nvme_id_ns) != 4096);  	BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64); +	BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512);  }  typedef void (*nvme_completion_fn)(struct nvme_dev *, void *, @@ -237,7 +238,8 @@ static void *free_cmdid(struct nvme_queue *nvmeq, int cmdid,  		*fn = special_completion;  		return CMD_CTX_INVALID;  	} -	*fn = info[cmdid].fn; +	if (fn) +		*fn = info[cmdid].fn;  	ctx = info[cmdid].ctx;  	info[cmdid].fn = special_completion;  	info[cmdid].ctx = CMD_CTX_COMPLETED; @@ -335,6 +337,7 @@ nvme_alloc_iod(unsigned nseg, unsigned nbytes, gfp_t gfp)  		iod->offset = offsetof(struct nvme_iod, sg[nseg]);  		iod->npages = -1;  		iod->length = nbytes; +		iod->nents = 0;  	}  	return iod; @@ -375,7 +378,8 @@ static void bio_completion(struct nvme_dev *dev, void *ctx,  	struct bio *bio = iod->private;  	u16 status = le16_to_cpup(&cqe->status) >> 1; -	dma_unmap_sg(&dev->pci_dev->dev, iod->sg, iod->nents, +	if (iod->nents) +		dma_unmap_sg(&dev->pci_dev->dev, iod->sg, iod->nents,  			bio_data_dir(bio) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);  	nvme_free_iod(dev, iod);  	if (status) { @@ -589,7 +593,7 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns,  	result = nvme_map_bio(nvmeq->q_dmadev, iod, bio, dma_dir, psegs);  	if (result < 0) -		goto free_iod; +		goto free_cmdid;  	length = result;  	cmnd->rw.command_id = cmdid; @@ -609,6 +613,8 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns,  	return 0; + free_cmdid: +	free_cmdid(nvmeq, cmdid, NULL);   free_iod:  	nvme_free_iod(nvmeq->dev, iod);   nomem: @@ -835,8 +841,8 @@ static int nvme_identify(struct nvme_dev *dev, unsigned nsid, unsigned cns,  	return nvme_submit_admin_cmd(dev, &c, NULL);  } -static int nvme_get_features(struct nvme_dev *dev, unsigned fid, -				unsigned nsid, dma_addr_t dma_addr) +static int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid, +					dma_addr_t dma_addr, u32 *result)  {  	struct nvme_command c; @@ -846,7 +852,7 @@ static int nvme_get_features(struct nvme_dev *dev, unsigned fid,  	c.features.prp1 = cpu_to_le64(dma_addr);  	c.features.fid = cpu_to_le32(fid); -	return nvme_submit_admin_cmd(dev, &c, NULL); +	return nvme_submit_admin_cmd(dev, &c, result);  }  static int nvme_set_features(struct nvme_dev *dev, unsigned fid, @@ -906,6 +912,10 @@ static void nvme_free_queue(struct nvme_dev *dev, int qid)  	spin_lock_irq(&nvmeq->q_lock);  	nvme_cancel_ios(nvmeq, false); +	while (bio_list_peek(&nvmeq->sq_cong)) { +		struct bio *bio = bio_list_pop(&nvmeq->sq_cong); +		bio_endio(bio, -EIO); +	}  	spin_unlock_irq(&nvmeq->q_lock);  	irq_set_affinity_hint(vector, NULL); @@ -1230,12 +1240,17 @@ static int nvme_user_admin_cmd(struct nvme_dev *dev,  	if (length != cmd.data_len)  		status = -ENOMEM;  	else -		status = nvme_submit_admin_cmd(dev, &c, NULL); +		status = nvme_submit_admin_cmd(dev, &c, &cmd.result);  	if (cmd.data_len) {  		nvme_unmap_user_pages(dev, cmd.opcode & 1, iod);  		nvme_free_iod(dev, iod);  	} + +	if (!status && copy_to_user(&ucmd->result, &cmd.result, +							sizeof(cmd.result))) +		status = -EFAULT; +  	return status;  } @@ -1523,9 +1538,9 @@ static int nvme_dev_add(struct nvme_dev *dev)  			continue;  		res = nvme_get_features(dev, NVME_FEAT_LBA_RANGE, i, -							dma_addr + 4096); +							dma_addr + 4096, NULL);  		if (res) -			continue; +			memset(mem + 4096, 0, 4096);  		ns = nvme_alloc_ns(dev, i, mem, mem + 4096);  		if (ns) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 6c81a4c040b..f556f8a8b3f 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1264,6 +1264,32 @@ static bool obj_request_done_test(struct rbd_obj_request *obj_request)  	return atomic_read(&obj_request->done) != 0;  } +static void +rbd_img_obj_request_read_callback(struct rbd_obj_request *obj_request) +{ +	dout("%s: obj %p img %p result %d %llu/%llu\n", __func__, +		obj_request, obj_request->img_request, obj_request->result, +		obj_request->xferred, obj_request->length); +	/* +	 * ENOENT means a hole in the image.  We zero-fill the +	 * entire length of the request.  A short read also implies +	 * zero-fill to the end of the request.  Either way we +	 * update the xferred count to indicate the whole request +	 * was satisfied. +	 */ +	BUG_ON(obj_request->type != OBJ_REQUEST_BIO); +	if (obj_request->result == -ENOENT) { +		zero_bio_chain(obj_request->bio_list, 0); +		obj_request->result = 0; +		obj_request->xferred = obj_request->length; +	} else if (obj_request->xferred < obj_request->length && +			!obj_request->result) { +		zero_bio_chain(obj_request->bio_list, obj_request->xferred); +		obj_request->xferred = obj_request->length; +	} +	obj_request_done_set(obj_request); +} +  static void rbd_obj_request_complete(struct rbd_obj_request *obj_request)  {  	dout("%s: obj %p cb %p\n", __func__, obj_request, @@ -1284,23 +1310,10 @@ static void rbd_osd_read_callback(struct rbd_obj_request *obj_request)  {  	dout("%s: obj %p result %d %llu/%llu\n", __func__, obj_request,  		obj_request->result, obj_request->xferred, obj_request->length); -	/* -	 * ENOENT means a hole in the object.  We zero-fill the -	 * entire length of the request.  A short read also implies -	 * zero-fill to the end of the request.  Either way we -	 * update the xferred count to indicate the whole request -	 * was satisfied. -	 */ -	if (obj_request->result == -ENOENT) { -		zero_bio_chain(obj_request->bio_list, 0); -		obj_request->result = 0; -		obj_request->xferred = obj_request->length; -	} else if (obj_request->xferred < obj_request->length && -			!obj_request->result) { -		zero_bio_chain(obj_request->bio_list, obj_request->xferred); -		obj_request->xferred = obj_request->length; -	} -	obj_request_done_set(obj_request); +	if (obj_request->img_request) +		rbd_img_obj_request_read_callback(obj_request); +	else +		obj_request_done_set(obj_request);  }  static void rbd_osd_write_callback(struct rbd_obj_request *obj_request) diff --git a/drivers/block/rsxx/Makefile b/drivers/block/rsxx/Makefile index f35cd0b71f7..b1c53c0aa45 100644 --- a/drivers/block/rsxx/Makefile +++ b/drivers/block/rsxx/Makefile @@ -1,2 +1,2 @@  obj-$(CONFIG_BLK_DEV_RSXX) += rsxx.o -rsxx-y := config.o core.o cregs.o dev.o dma.o +rsxx-objs := config.o core.o cregs.o dev.o dma.o diff --git a/drivers/block/rsxx/config.c b/drivers/block/rsxx/config.c index a295e7e9ee4..10cd530d3e1 100644 --- a/drivers/block/rsxx/config.c +++ b/drivers/block/rsxx/config.c @@ -29,15 +29,13 @@  #include "rsxx_priv.h"  #include "rsxx_cfg.h" -static void initialize_config(void *config) +static void initialize_config(struct rsxx_card_cfg *cfg)  { -	struct rsxx_card_cfg *cfg = config; -  	cfg->hdr.version = RSXX_CFG_VERSION;  	cfg->data.block_size        = RSXX_HW_BLK_SIZE;  	cfg->data.stripe_size       = RSXX_HW_BLK_SIZE; -	cfg->data.vendor_id         = RSXX_VENDOR_ID_TMS_IBM; +	cfg->data.vendor_id         = RSXX_VENDOR_ID_IBM;  	cfg->data.cache_order       = (-1);  	cfg->data.intr_coal.mode    = RSXX_INTR_COAL_DISABLED;  	cfg->data.intr_coal.count   = 0; @@ -181,7 +179,7 @@ int rsxx_load_config(struct rsxx_cardinfo *card)  	} else {  		dev_info(CARD_TO_DEV(card),  			"Initializing card configuration.\n"); -		initialize_config(card); +		initialize_config(&card->config);  		st = rsxx_save_config(card);  		if (st)  			return st; diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c index e5162487686..5af21f2db29 100644 --- a/drivers/block/rsxx/core.c +++ b/drivers/block/rsxx/core.c @@ -30,6 +30,7 @@  #include <linux/reboot.h>  #include <linux/slab.h>  #include <linux/bitops.h> +#include <linux/delay.h>  #include <linux/genhd.h>  #include <linux/idr.h> @@ -39,8 +40,8 @@  #define NO_LEGACY 0 -MODULE_DESCRIPTION("IBM RamSan PCIe Flash SSD Device Driver"); -MODULE_AUTHOR("IBM <support@ramsan.com>"); +MODULE_DESCRIPTION("IBM FlashSystem 70/80 PCIe SSD Device Driver"); +MODULE_AUTHOR("Joshua Morris/Philip Kelleher, IBM");  MODULE_LICENSE("GPL");  MODULE_VERSION(DRIVER_VERSION); @@ -52,6 +53,13 @@ static DEFINE_IDA(rsxx_disk_ida);  static DEFINE_SPINLOCK(rsxx_ida_lock);  /*----------------- Interrupt Control & Handling -------------------*/ + +static void rsxx_mask_interrupts(struct rsxx_cardinfo *card) +{ +	card->isr_mask = 0; +	card->ier_mask = 0; +} +  static void __enable_intr(unsigned int *mask, unsigned int intr)  {  	*mask |= intr; @@ -71,7 +79,8 @@ static void __disable_intr(unsigned int *mask, unsigned int intr)   */  void rsxx_enable_ier(struct rsxx_cardinfo *card, unsigned int intr)  { -	if (unlikely(card->halt)) +	if (unlikely(card->halt) || +	    unlikely(card->eeh_state))  		return;  	__enable_intr(&card->ier_mask, intr); @@ -80,6 +89,9 @@ void rsxx_enable_ier(struct rsxx_cardinfo *card, unsigned int intr)  void rsxx_disable_ier(struct rsxx_cardinfo *card, unsigned int intr)  { +	if (unlikely(card->eeh_state)) +		return; +  	__disable_intr(&card->ier_mask, intr);  	iowrite32(card->ier_mask, card->regmap + IER);  } @@ -87,7 +99,8 @@ void rsxx_disable_ier(struct rsxx_cardinfo *card, unsigned int intr)  void rsxx_enable_ier_and_isr(struct rsxx_cardinfo *card,  				 unsigned int intr)  { -	if (unlikely(card->halt)) +	if (unlikely(card->halt) || +	    unlikely(card->eeh_state))  		return;  	__enable_intr(&card->isr_mask, intr); @@ -97,6 +110,9 @@ void rsxx_enable_ier_and_isr(struct rsxx_cardinfo *card,  void rsxx_disable_ier_and_isr(struct rsxx_cardinfo *card,  				  unsigned int intr)  { +	if (unlikely(card->eeh_state)) +		return; +  	__disable_intr(&card->isr_mask, intr);  	__disable_intr(&card->ier_mask, intr);  	iowrite32(card->ier_mask, card->regmap + IER); @@ -115,6 +131,9 @@ static irqreturn_t rsxx_isr(int irq, void *pdata)  	do {  		reread_isr = 0; +		if (unlikely(card->eeh_state)) +			break; +  		isr = ioread32(card->regmap + ISR);  		if (isr == 0xffffffff) {  			/* @@ -161,9 +180,9 @@ static irqreturn_t rsxx_isr(int irq, void *pdata)  }  /*----------------- Card Event Handler -------------------*/ -static char *rsxx_card_state_to_str(unsigned int state) +static const char * const rsxx_card_state_to_str(unsigned int state)  { -	static char *state_strings[] = { +	static const char * const state_strings[] = {  		"Unknown", "Shutdown", "Starting", "Formatting",  		"Uninitialized", "Good", "Shutting Down",  		"Fault", "Read Only Fault", "dStroying" @@ -304,6 +323,192 @@ static int card_shutdown(struct rsxx_cardinfo *card)  	return 0;  } +static int rsxx_eeh_frozen(struct pci_dev *dev) +{ +	struct rsxx_cardinfo *card = pci_get_drvdata(dev); +	int i; +	int st; + +	dev_warn(&dev->dev, "IBM FlashSystem PCI: preparing for slot reset.\n"); + +	card->eeh_state = 1; +	rsxx_mask_interrupts(card); + +	/* +	 * We need to guarantee that the write for eeh_state and masking +	 * interrupts does not become reordered. This will prevent a possible +	 * race condition with the EEH code. +	 */ +	wmb(); + +	pci_disable_device(dev); + +	st = rsxx_eeh_save_issued_dmas(card); +	if (st) +		return st; + +	rsxx_eeh_save_issued_creg(card); + +	for (i = 0; i < card->n_targets; i++) { +		if (card->ctrl[i].status.buf) +			pci_free_consistent(card->dev, STATUS_BUFFER_SIZE8, +					    card->ctrl[i].status.buf, +					    card->ctrl[i].status.dma_addr); +		if (card->ctrl[i].cmd.buf) +			pci_free_consistent(card->dev, COMMAND_BUFFER_SIZE8, +					    card->ctrl[i].cmd.buf, +					    card->ctrl[i].cmd.dma_addr); +	} + +	return 0; +} + +static void rsxx_eeh_failure(struct pci_dev *dev) +{ +	struct rsxx_cardinfo *card = pci_get_drvdata(dev); +	int i; + +	dev_err(&dev->dev, "IBM FlashSystem PCI: disabling failed card.\n"); + +	card->eeh_state = 1; + +	for (i = 0; i < card->n_targets; i++) +		del_timer_sync(&card->ctrl[i].activity_timer); + +	rsxx_eeh_cancel_dmas(card); +} + +static int rsxx_eeh_fifo_flush_poll(struct rsxx_cardinfo *card) +{ +	unsigned int status; +	int iter = 0; + +	/* We need to wait for the hardware to reset */ +	while (iter++ < 10) { +		status = ioread32(card->regmap + PCI_RECONFIG); + +		if (status & RSXX_FLUSH_BUSY) { +			ssleep(1); +			continue; +		} + +		if (status & RSXX_FLUSH_TIMEOUT) +			dev_warn(CARD_TO_DEV(card), "HW: flash controller timeout\n"); +		return 0; +	} + +	/* Hardware failed resetting itself. */ +	return -1; +} + +static pci_ers_result_t rsxx_error_detected(struct pci_dev *dev, +					    enum pci_channel_state error) +{ +	int st; + +	if (dev->revision < RSXX_EEH_SUPPORT) +		return PCI_ERS_RESULT_NONE; + +	if (error == pci_channel_io_perm_failure) { +		rsxx_eeh_failure(dev); +		return PCI_ERS_RESULT_DISCONNECT; +	} + +	st = rsxx_eeh_frozen(dev); +	if (st) { +		dev_err(&dev->dev, "Slot reset setup failed\n"); +		rsxx_eeh_failure(dev); +		return PCI_ERS_RESULT_DISCONNECT; +	} + +	return PCI_ERS_RESULT_NEED_RESET; +} + +static pci_ers_result_t rsxx_slot_reset(struct pci_dev *dev) +{ +	struct rsxx_cardinfo *card = pci_get_drvdata(dev); +	unsigned long flags; +	int i; +	int st; + +	dev_warn(&dev->dev, +		"IBM FlashSystem PCI: recovering from slot reset.\n"); + +	st = pci_enable_device(dev); +	if (st) +		goto failed_hw_setup; + +	pci_set_master(dev); + +	st = rsxx_eeh_fifo_flush_poll(card); +	if (st) +		goto failed_hw_setup; + +	rsxx_dma_queue_reset(card); + +	for (i = 0; i < card->n_targets; i++) { +		st = rsxx_hw_buffers_init(dev, &card->ctrl[i]); +		if (st) +			goto failed_hw_buffers_init; +	} + +	if (card->config_valid) +		rsxx_dma_configure(card); + +	/* Clears the ISR register from spurious interrupts */ +	st = ioread32(card->regmap + ISR); + +	card->eeh_state = 0; + +	st = rsxx_eeh_remap_dmas(card); +	if (st) +		goto failed_remap_dmas; + +	spin_lock_irqsave(&card->irq_lock, flags); +	if (card->n_targets & RSXX_MAX_TARGETS) +		rsxx_enable_ier_and_isr(card, CR_INTR_ALL_G); +	else +		rsxx_enable_ier_and_isr(card, CR_INTR_ALL_C); +	spin_unlock_irqrestore(&card->irq_lock, flags); + +	rsxx_kick_creg_queue(card); + +	for (i = 0; i < card->n_targets; i++) { +		spin_lock(&card->ctrl[i].queue_lock); +		if (list_empty(&card->ctrl[i].queue)) { +			spin_unlock(&card->ctrl[i].queue_lock); +			continue; +		} +		spin_unlock(&card->ctrl[i].queue_lock); + +		queue_work(card->ctrl[i].issue_wq, +				&card->ctrl[i].issue_dma_work); +	} + +	dev_info(&dev->dev, "IBM FlashSystem PCI: recovery complete.\n"); + +	return PCI_ERS_RESULT_RECOVERED; + +failed_hw_buffers_init: +failed_remap_dmas: +	for (i = 0; i < card->n_targets; i++) { +		if (card->ctrl[i].status.buf) +			pci_free_consistent(card->dev, +					STATUS_BUFFER_SIZE8, +					card->ctrl[i].status.buf, +					card->ctrl[i].status.dma_addr); +		if (card->ctrl[i].cmd.buf) +			pci_free_consistent(card->dev, +					COMMAND_BUFFER_SIZE8, +					card->ctrl[i].cmd.buf, +					card->ctrl[i].cmd.dma_addr); +	} +failed_hw_setup: +	rsxx_eeh_failure(dev); +	return PCI_ERS_RESULT_DISCONNECT; + +} +  /*----------------- Driver Initialization & Setup -------------------*/  /* Returns:   0 if the driver is compatible with the device  	     -1 if the driver is NOT compatible with the device */ @@ -383,6 +588,7 @@ static int rsxx_pci_probe(struct pci_dev *dev,  	spin_lock_init(&card->irq_lock);  	card->halt = 0; +	card->eeh_state = 0;  	spin_lock_irq(&card->irq_lock);  	rsxx_disable_ier_and_isr(card, CR_INTR_ALL); @@ -538,9 +744,6 @@ static void rsxx_pci_remove(struct pci_dev *dev)  	rsxx_disable_ier_and_isr(card, CR_INTR_EVENT);  	spin_unlock_irqrestore(&card->irq_lock, flags); -	/* Prevent work_structs from re-queuing themselves. */ -	card->halt = 1; -  	cancel_work_sync(&card->event_work);  	rsxx_destroy_dev(card); @@ -549,6 +752,10 @@ static void rsxx_pci_remove(struct pci_dev *dev)  	spin_lock_irqsave(&card->irq_lock, flags);  	rsxx_disable_ier_and_isr(card, CR_INTR_ALL);  	spin_unlock_irqrestore(&card->irq_lock, flags); + +	/* Prevent work_structs from re-queuing themselves. */ +	card->halt = 1; +  	free_irq(dev->irq, card);  	if (!force_legacy) @@ -592,11 +799,14 @@ static void rsxx_pci_shutdown(struct pci_dev *dev)  	card_shutdown(card);  } +static const struct pci_error_handlers rsxx_err_handler = { +	.error_detected = rsxx_error_detected, +	.slot_reset     = rsxx_slot_reset, +}; +  static DEFINE_PCI_DEVICE_TABLE(rsxx_pci_ids) = { -	{PCI_DEVICE(PCI_VENDOR_ID_TMS_IBM, PCI_DEVICE_ID_RS70_FLASH)}, -	{PCI_DEVICE(PCI_VENDOR_ID_TMS_IBM, PCI_DEVICE_ID_RS70D_FLASH)}, -	{PCI_DEVICE(PCI_VENDOR_ID_TMS_IBM, PCI_DEVICE_ID_RS80_FLASH)}, -	{PCI_DEVICE(PCI_VENDOR_ID_TMS_IBM, PCI_DEVICE_ID_RS81_FLASH)}, +	{PCI_DEVICE(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_FS70_FLASH)}, +	{PCI_DEVICE(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_FS80_FLASH)},  	{0,},  }; @@ -609,6 +819,7 @@ static struct pci_driver rsxx_pci_driver = {  	.remove		= rsxx_pci_remove,  	.suspend	= rsxx_pci_suspend,  	.shutdown	= rsxx_pci_shutdown, +	.err_handler    = &rsxx_err_handler,  };  static int __init rsxx_core_init(void) diff --git a/drivers/block/rsxx/cregs.c b/drivers/block/rsxx/cregs.c index 80bbe639fcc..4b5c020a0a6 100644 --- a/drivers/block/rsxx/cregs.c +++ b/drivers/block/rsxx/cregs.c @@ -58,7 +58,7 @@ static struct kmem_cache *creg_cmd_pool;  #error Unknown endianess!!! Aborting...  #endif -static void copy_to_creg_data(struct rsxx_cardinfo *card, +static int copy_to_creg_data(struct rsxx_cardinfo *card,  			      int cnt8,  			      void *buf,  			      unsigned int stream) @@ -66,6 +66,9 @@ static void copy_to_creg_data(struct rsxx_cardinfo *card,  	int i = 0;  	u32 *data = buf; +	if (unlikely(card->eeh_state)) +		return -EIO; +  	for (i = 0; cnt8 > 0; i++, cnt8 -= 4) {  		/*  		 * Firmware implementation makes it necessary to byte swap on @@ -76,10 +79,12 @@ static void copy_to_creg_data(struct rsxx_cardinfo *card,  		else  			iowrite32(data[i], card->regmap + CREG_DATA(i));  	} + +	return 0;  } -static void copy_from_creg_data(struct rsxx_cardinfo *card, +static int copy_from_creg_data(struct rsxx_cardinfo *card,  				int cnt8,  				void *buf,  				unsigned int stream) @@ -87,6 +92,9 @@ static void copy_from_creg_data(struct rsxx_cardinfo *card,  	int i = 0;  	u32 *data = buf; +	if (unlikely(card->eeh_state)) +		return -EIO; +  	for (i = 0; cnt8 > 0; i++, cnt8 -= 4) {  		/*  		 * Firmware implementation makes it necessary to byte swap on @@ -97,41 +105,31 @@ static void copy_from_creg_data(struct rsxx_cardinfo *card,  		else  			data[i] = ioread32(card->regmap + CREG_DATA(i));  	} -} - -static struct creg_cmd *pop_active_cmd(struct rsxx_cardinfo *card) -{ -	struct creg_cmd *cmd; -	/* -	 * Spin lock is needed because this can be called in atomic/interrupt -	 * context. -	 */ -	spin_lock_bh(&card->creg_ctrl.lock); -	cmd = card->creg_ctrl.active_cmd; -	card->creg_ctrl.active_cmd = NULL; -	spin_unlock_bh(&card->creg_ctrl.lock); - -	return cmd; +	return 0;  }  static void creg_issue_cmd(struct rsxx_cardinfo *card, struct creg_cmd *cmd)  { +	int st; + +	if (unlikely(card->eeh_state)) +		return; +  	iowrite32(cmd->addr, card->regmap + CREG_ADD);  	iowrite32(cmd->cnt8, card->regmap + CREG_CNT);  	if (cmd->op == CREG_OP_WRITE) { -		if (cmd->buf) -			copy_to_creg_data(card, cmd->cnt8, -					  cmd->buf, cmd->stream); +		if (cmd->buf) { +			st = copy_to_creg_data(card, cmd->cnt8, +					       cmd->buf, cmd->stream); +			if (st) +				return; +		}  	} -	/* -	 * Data copy must complete before initiating the command. This is -	 * needed for weakly ordered processors (i.e. PowerPC), so that all -	 * neccessary registers are written before we kick the hardware. -	 */ -	wmb(); +	if (unlikely(card->eeh_state)) +		return;  	/* Setting the valid bit will kick off the command. */  	iowrite32(cmd->op, card->regmap + CREG_CMD); @@ -196,11 +194,11 @@ static int creg_queue_cmd(struct rsxx_cardinfo *card,  	cmd->cb_private = cb_private;  	cmd->status	= 0; -	spin_lock(&card->creg_ctrl.lock); +	spin_lock_bh(&card->creg_ctrl.lock);  	list_add_tail(&cmd->list, &card->creg_ctrl.queue);  	card->creg_ctrl.q_depth++;  	creg_kick_queue(card); -	spin_unlock(&card->creg_ctrl.lock); +	spin_unlock_bh(&card->creg_ctrl.lock);  	return 0;  } @@ -210,7 +208,11 @@ static void creg_cmd_timed_out(unsigned long data)  	struct rsxx_cardinfo *card = (struct rsxx_cardinfo *) data;  	struct creg_cmd *cmd; -	cmd = pop_active_cmd(card); +	spin_lock(&card->creg_ctrl.lock); +	cmd = card->creg_ctrl.active_cmd; +	card->creg_ctrl.active_cmd = NULL; +	spin_unlock(&card->creg_ctrl.lock); +  	if (cmd == NULL) {  		card->creg_ctrl.creg_stats.creg_timeout++;  		dev_warn(CARD_TO_DEV(card), @@ -247,7 +249,11 @@ static void creg_cmd_done(struct work_struct *work)  	if (del_timer_sync(&card->creg_ctrl.cmd_timer) == 0)  		card->creg_ctrl.creg_stats.failed_cancel_timer++; -	cmd = pop_active_cmd(card); +	spin_lock_bh(&card->creg_ctrl.lock); +	cmd = card->creg_ctrl.active_cmd; +	card->creg_ctrl.active_cmd = NULL; +	spin_unlock_bh(&card->creg_ctrl.lock); +  	if (cmd == NULL) {  		dev_err(CARD_TO_DEV(card),  			"Spurious creg interrupt!\n"); @@ -287,7 +293,7 @@ static void creg_cmd_done(struct work_struct *work)  			goto creg_done;  		} -		copy_from_creg_data(card, cnt8, cmd->buf, cmd->stream); +		st = copy_from_creg_data(card, cnt8, cmd->buf, cmd->stream);  	}  creg_done: @@ -296,10 +302,10 @@ creg_done:  	kmem_cache_free(creg_cmd_pool, cmd); -	spin_lock(&card->creg_ctrl.lock); +	spin_lock_bh(&card->creg_ctrl.lock);  	card->creg_ctrl.active = 0;  	creg_kick_queue(card); -	spin_unlock(&card->creg_ctrl.lock); +	spin_unlock_bh(&card->creg_ctrl.lock);  }  static void creg_reset(struct rsxx_cardinfo *card) @@ -324,7 +330,7 @@ static void creg_reset(struct rsxx_cardinfo *card)  		"Resetting creg interface for recovery\n");  	/* Cancel outstanding commands */ -	spin_lock(&card->creg_ctrl.lock); +	spin_lock_bh(&card->creg_ctrl.lock);  	list_for_each_entry_safe(cmd, tmp, &card->creg_ctrl.queue, list) {  		list_del(&cmd->list);  		card->creg_ctrl.q_depth--; @@ -345,7 +351,7 @@ static void creg_reset(struct rsxx_cardinfo *card)  		card->creg_ctrl.active = 0;  	} -	spin_unlock(&card->creg_ctrl.lock); +	spin_unlock_bh(&card->creg_ctrl.lock);  	card->creg_ctrl.reset = 0;  	spin_lock_irqsave(&card->irq_lock, flags); @@ -399,12 +405,12 @@ static int __issue_creg_rw(struct rsxx_cardinfo *card,  		return st;  	/* -	 * This timeout is neccessary for unresponsive hardware. The additional +	 * This timeout is necessary for unresponsive hardware. The additional  	 * 20 seconds to used to guarantee that each cregs requests has time to  	 * complete.  	 */ -	timeout = msecs_to_jiffies((CREG_TIMEOUT_MSEC * -				card->creg_ctrl.q_depth) + 20000); +	timeout = msecs_to_jiffies(CREG_TIMEOUT_MSEC * +				   card->creg_ctrl.q_depth + 20000);  	/*  	 * The creg interface is guaranteed to complete. It has a timeout @@ -690,6 +696,32 @@ int rsxx_reg_access(struct rsxx_cardinfo *card,  	return 0;  } +void rsxx_eeh_save_issued_creg(struct rsxx_cardinfo *card) +{ +	struct creg_cmd *cmd = NULL; + +	cmd = card->creg_ctrl.active_cmd; +	card->creg_ctrl.active_cmd = NULL; + +	if (cmd) { +		del_timer_sync(&card->creg_ctrl.cmd_timer); + +		spin_lock_bh(&card->creg_ctrl.lock); +		list_add(&cmd->list, &card->creg_ctrl.queue); +		card->creg_ctrl.q_depth++; +		card->creg_ctrl.active = 0; +		spin_unlock_bh(&card->creg_ctrl.lock); +	} +} + +void rsxx_kick_creg_queue(struct rsxx_cardinfo *card) +{ +	spin_lock_bh(&card->creg_ctrl.lock); +	if (!list_empty(&card->creg_ctrl.queue)) +		creg_kick_queue(card); +	spin_unlock_bh(&card->creg_ctrl.lock); +} +  /*------------ Initialization & Setup --------------*/  int rsxx_creg_setup(struct rsxx_cardinfo *card)  { @@ -712,7 +744,7 @@ void rsxx_creg_destroy(struct rsxx_cardinfo *card)  	int cnt = 0;  	/* Cancel outstanding commands */ -	spin_lock(&card->creg_ctrl.lock); +	spin_lock_bh(&card->creg_ctrl.lock);  	list_for_each_entry_safe(cmd, tmp, &card->creg_ctrl.queue, list) {  		list_del(&cmd->list);  		if (cmd->cb) @@ -737,7 +769,7 @@ void rsxx_creg_destroy(struct rsxx_cardinfo *card)  			"Canceled active creg command\n");  		kmem_cache_free(creg_cmd_pool, cmd);  	} -	spin_unlock(&card->creg_ctrl.lock); +	spin_unlock_bh(&card->creg_ctrl.lock);  	cancel_work_sync(&card->creg_ctrl.done_work);  } diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c index 63176e67662..0607513cfb4 100644 --- a/drivers/block/rsxx/dma.c +++ b/drivers/block/rsxx/dma.c @@ -28,7 +28,7 @@  struct rsxx_dma {  	struct list_head	 list;  	u8			 cmd; -	unsigned int		 laddr;     /* Logical address on the ramsan */ +	unsigned int		 laddr;     /* Logical address */  	struct {  		u32		 off;  		u32		 cnt; @@ -81,9 +81,6 @@ enum rsxx_hw_status {  	HW_STATUS_FAULT		= 0x08,  }; -#define STATUS_BUFFER_SIZE8     4096 -#define COMMAND_BUFFER_SIZE8    4096 -  static struct kmem_cache *rsxx_dma_pool;  struct dma_tracker { @@ -122,7 +119,7 @@ static unsigned int rsxx_get_dma_tgt(struct rsxx_cardinfo *card, u64 addr8)  	return tgt;  } -static void rsxx_dma_queue_reset(struct rsxx_cardinfo *card) +void rsxx_dma_queue_reset(struct rsxx_cardinfo *card)  {  	/* Reset all DMA Command/Status Queues */  	iowrite32(DMA_QUEUE_RESET, card->regmap + RESET); @@ -210,7 +207,8 @@ static void dma_intr_coal_auto_tune(struct rsxx_cardinfo *card)  	u32 q_depth = 0;  	u32 intr_coal; -	if (card->config.data.intr_coal.mode != RSXX_INTR_COAL_AUTO_TUNE) +	if (card->config.data.intr_coal.mode != RSXX_INTR_COAL_AUTO_TUNE || +	    unlikely(card->eeh_state))  		return;  	for (i = 0; i < card->n_targets; i++) @@ -223,31 +221,26 @@ static void dma_intr_coal_auto_tune(struct rsxx_cardinfo *card)  }  /*----------------- RSXX DMA Handling -------------------*/ -static void rsxx_complete_dma(struct rsxx_cardinfo *card, +static void rsxx_complete_dma(struct rsxx_dma_ctrl *ctrl,  				  struct rsxx_dma *dma,  				  unsigned int status)  {  	if (status & DMA_SW_ERR) -		printk_ratelimited(KERN_ERR -				   "SW Error in DMA(cmd x%02x, laddr x%08x)\n", -				   dma->cmd, dma->laddr); +		ctrl->stats.dma_sw_err++;  	if (status & DMA_HW_FAULT) -		printk_ratelimited(KERN_ERR -				   "HW Fault in DMA(cmd x%02x, laddr x%08x)\n", -				   dma->cmd, dma->laddr); +		ctrl->stats.dma_hw_fault++;  	if (status & DMA_CANCELLED) -		printk_ratelimited(KERN_ERR -				   "DMA Cancelled(cmd x%02x, laddr x%08x)\n", -				   dma->cmd, dma->laddr); +		ctrl->stats.dma_cancelled++;  	if (dma->dma_addr) -		pci_unmap_page(card->dev, dma->dma_addr, get_dma_size(dma), +		pci_unmap_page(ctrl->card->dev, dma->dma_addr, +			       get_dma_size(dma),  			       dma->cmd == HW_CMD_BLK_WRITE ?  					   PCI_DMA_TODEVICE :  					   PCI_DMA_FROMDEVICE);  	if (dma->cb) -		dma->cb(card, dma->cb_data, status ? 1 : 0); +		dma->cb(ctrl->card, dma->cb_data, status ? 1 : 0);  	kmem_cache_free(rsxx_dma_pool, dma);  } @@ -330,14 +323,15 @@ static void rsxx_handle_dma_error(struct rsxx_dma_ctrl *ctrl,  	if (requeue_cmd)  		rsxx_requeue_dma(ctrl, dma);  	else -		rsxx_complete_dma(ctrl->card, dma, status); +		rsxx_complete_dma(ctrl, dma, status);  }  static void dma_engine_stalled(unsigned long data)  {  	struct rsxx_dma_ctrl *ctrl = (struct rsxx_dma_ctrl *)data; -	if (atomic_read(&ctrl->stats.hw_q_depth) == 0) +	if (atomic_read(&ctrl->stats.hw_q_depth) == 0 || +	    unlikely(ctrl->card->eeh_state))  		return;  	if (ctrl->cmd.idx != ioread32(ctrl->regmap + SW_CMD_IDX)) { @@ -369,7 +363,8 @@ static void rsxx_issue_dmas(struct work_struct *work)  	ctrl = container_of(work, struct rsxx_dma_ctrl, issue_dma_work);  	hw_cmd_buf = ctrl->cmd.buf; -	if (unlikely(ctrl->card->halt)) +	if (unlikely(ctrl->card->halt) || +	    unlikely(ctrl->card->eeh_state))  		return;  	while (1) { @@ -397,7 +392,7 @@ static void rsxx_issue_dmas(struct work_struct *work)  		 */  		if (unlikely(ctrl->card->dma_fault)) {  			push_tracker(ctrl->trackers, tag); -			rsxx_complete_dma(ctrl->card, dma, DMA_CANCELLED); +			rsxx_complete_dma(ctrl, dma, DMA_CANCELLED);  			continue;  		} @@ -432,19 +427,15 @@ static void rsxx_issue_dmas(struct work_struct *work)  	/* Let HW know we've queued commands. */  	if (cmds_pending) { -		/* -		 * We must guarantee that the CPU writes to 'ctrl->cmd.buf' -		 * (which is in PCI-consistent system-memory) from the loop -		 * above make it into the coherency domain before the -		 * following PIO "trigger" updating the cmd.idx.  A WMB is -		 * sufficient. We need not explicitly CPU cache-flush since -		 * the memory is a PCI-consistent (ie; coherent) mapping. -		 */ -		wmb(); -  		atomic_add(cmds_pending, &ctrl->stats.hw_q_depth);  		mod_timer(&ctrl->activity_timer,  			  jiffies + DMA_ACTIVITY_TIMEOUT); + +		if (unlikely(ctrl->card->eeh_state)) { +			del_timer_sync(&ctrl->activity_timer); +			return; +		} +  		iowrite32(ctrl->cmd.idx, ctrl->regmap + SW_CMD_IDX);  	}  } @@ -463,7 +454,8 @@ static void rsxx_dma_done(struct work_struct *work)  	hw_st_buf = ctrl->status.buf;  	if (unlikely(ctrl->card->halt) || -	    unlikely(ctrl->card->dma_fault)) +	    unlikely(ctrl->card->dma_fault) || +	    unlikely(ctrl->card->eeh_state))  		return;  	count = le16_to_cpu(hw_st_buf[ctrl->status.idx].count); @@ -508,7 +500,7 @@ static void rsxx_dma_done(struct work_struct *work)  		if (status)  			rsxx_handle_dma_error(ctrl, dma, status);  		else -			rsxx_complete_dma(ctrl->card, dma, 0); +			rsxx_complete_dma(ctrl, dma, 0);  		push_tracker(ctrl->trackers, tag); @@ -727,20 +719,54 @@ bvec_err:  /*----------------- DMA Engine Initialization & Setup -------------------*/ +int rsxx_hw_buffers_init(struct pci_dev *dev, struct rsxx_dma_ctrl *ctrl) +{ +	ctrl->status.buf = pci_alloc_consistent(dev, STATUS_BUFFER_SIZE8, +				&ctrl->status.dma_addr); +	ctrl->cmd.buf = pci_alloc_consistent(dev, COMMAND_BUFFER_SIZE8, +				&ctrl->cmd.dma_addr); +	if (ctrl->status.buf == NULL || ctrl->cmd.buf == NULL) +		return -ENOMEM; + +	memset(ctrl->status.buf, 0xac, STATUS_BUFFER_SIZE8); +	iowrite32(lower_32_bits(ctrl->status.dma_addr), +		ctrl->regmap + SB_ADD_LO); +	iowrite32(upper_32_bits(ctrl->status.dma_addr), +		ctrl->regmap + SB_ADD_HI); + +	memset(ctrl->cmd.buf, 0x83, COMMAND_BUFFER_SIZE8); +	iowrite32(lower_32_bits(ctrl->cmd.dma_addr), ctrl->regmap + CB_ADD_LO); +	iowrite32(upper_32_bits(ctrl->cmd.dma_addr), ctrl->regmap + CB_ADD_HI); + +	ctrl->status.idx = ioread32(ctrl->regmap + HW_STATUS_CNT); +	if (ctrl->status.idx > RSXX_MAX_OUTSTANDING_CMDS) { +		dev_crit(&dev->dev, "Failed reading status cnt x%x\n", +			ctrl->status.idx); +		return -EINVAL; +	} +	iowrite32(ctrl->status.idx, ctrl->regmap + HW_STATUS_CNT); +	iowrite32(ctrl->status.idx, ctrl->regmap + SW_STATUS_CNT); + +	ctrl->cmd.idx = ioread32(ctrl->regmap + HW_CMD_IDX); +	if (ctrl->cmd.idx > RSXX_MAX_OUTSTANDING_CMDS) { +		dev_crit(&dev->dev, "Failed reading cmd cnt x%x\n", +			ctrl->status.idx); +		return -EINVAL; +	} +	iowrite32(ctrl->cmd.idx, ctrl->regmap + HW_CMD_IDX); +	iowrite32(ctrl->cmd.idx, ctrl->regmap + SW_CMD_IDX); + +	return 0; +} +  static int rsxx_dma_ctrl_init(struct pci_dev *dev,  				  struct rsxx_dma_ctrl *ctrl)  {  	int i; +	int st;  	memset(&ctrl->stats, 0, sizeof(ctrl->stats)); -	ctrl->status.buf = pci_alloc_consistent(dev, STATUS_BUFFER_SIZE8, -						&ctrl->status.dma_addr); -	ctrl->cmd.buf = pci_alloc_consistent(dev, COMMAND_BUFFER_SIZE8, -					     &ctrl->cmd.dma_addr); -	if (ctrl->status.buf == NULL || ctrl->cmd.buf == NULL) -		return -ENOMEM; -  	ctrl->trackers = vmalloc(DMA_TRACKER_LIST_SIZE8);  	if (!ctrl->trackers)  		return -ENOMEM; @@ -770,35 +796,9 @@ static int rsxx_dma_ctrl_init(struct pci_dev *dev,  	INIT_WORK(&ctrl->issue_dma_work, rsxx_issue_dmas);  	INIT_WORK(&ctrl->dma_done_work, rsxx_dma_done); -	memset(ctrl->status.buf, 0xac, STATUS_BUFFER_SIZE8); -	iowrite32(lower_32_bits(ctrl->status.dma_addr), -		  ctrl->regmap + SB_ADD_LO); -	iowrite32(upper_32_bits(ctrl->status.dma_addr), -		  ctrl->regmap + SB_ADD_HI); - -	memset(ctrl->cmd.buf, 0x83, COMMAND_BUFFER_SIZE8); -	iowrite32(lower_32_bits(ctrl->cmd.dma_addr), ctrl->regmap + CB_ADD_LO); -	iowrite32(upper_32_bits(ctrl->cmd.dma_addr), ctrl->regmap + CB_ADD_HI); - -	ctrl->status.idx = ioread32(ctrl->regmap + HW_STATUS_CNT); -	if (ctrl->status.idx > RSXX_MAX_OUTSTANDING_CMDS) { -		dev_crit(&dev->dev, "Failed reading status cnt x%x\n", -			 ctrl->status.idx); -		return -EINVAL; -	} -	iowrite32(ctrl->status.idx, ctrl->regmap + HW_STATUS_CNT); -	iowrite32(ctrl->status.idx, ctrl->regmap + SW_STATUS_CNT); - -	ctrl->cmd.idx = ioread32(ctrl->regmap + HW_CMD_IDX); -	if (ctrl->cmd.idx > RSXX_MAX_OUTSTANDING_CMDS) { -		dev_crit(&dev->dev, "Failed reading cmd cnt x%x\n", -			 ctrl->status.idx); -		return -EINVAL; -	} -	iowrite32(ctrl->cmd.idx, ctrl->regmap + HW_CMD_IDX); -	iowrite32(ctrl->cmd.idx, ctrl->regmap + SW_CMD_IDX); - -	wmb(); +	st = rsxx_hw_buffers_init(dev, ctrl); +	if (st) +		return st;  	return 0;  } @@ -834,7 +834,7 @@ static int rsxx_dma_stripe_setup(struct rsxx_cardinfo *card,  	return 0;  } -static int rsxx_dma_configure(struct rsxx_cardinfo *card) +int rsxx_dma_configure(struct rsxx_cardinfo *card)  {  	u32 intr_coal; @@ -980,6 +980,103 @@ void rsxx_dma_destroy(struct rsxx_cardinfo *card)  	}  } +int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card) +{ +	int i; +	int j; +	int cnt; +	struct rsxx_dma *dma; +	struct list_head *issued_dmas; + +	issued_dmas = kzalloc(sizeof(*issued_dmas) * card->n_targets, +			      GFP_KERNEL); +	if (!issued_dmas) +		return -ENOMEM; + +	for (i = 0; i < card->n_targets; i++) { +		INIT_LIST_HEAD(&issued_dmas[i]); +		cnt = 0; +		for (j = 0; j < RSXX_MAX_OUTSTANDING_CMDS; j++) { +			dma = get_tracker_dma(card->ctrl[i].trackers, j); +			if (dma == NULL) +				continue; + +			if (dma->cmd == HW_CMD_BLK_WRITE) +				card->ctrl[i].stats.writes_issued--; +			else if (dma->cmd == HW_CMD_BLK_DISCARD) +				card->ctrl[i].stats.discards_issued--; +			else +				card->ctrl[i].stats.reads_issued--; + +			list_add_tail(&dma->list, &issued_dmas[i]); +			push_tracker(card->ctrl[i].trackers, j); +			cnt++; +		} + +		spin_lock(&card->ctrl[i].queue_lock); +		list_splice(&issued_dmas[i], &card->ctrl[i].queue); + +		atomic_sub(cnt, &card->ctrl[i].stats.hw_q_depth); +		card->ctrl[i].stats.sw_q_depth += cnt; +		card->ctrl[i].e_cnt = 0; + +		list_for_each_entry(dma, &card->ctrl[i].queue, list) { +			if (dma->dma_addr) +				pci_unmap_page(card->dev, dma->dma_addr, +					       get_dma_size(dma), +					       dma->cmd == HW_CMD_BLK_WRITE ? +					       PCI_DMA_TODEVICE : +					       PCI_DMA_FROMDEVICE); +		} +		spin_unlock(&card->ctrl[i].queue_lock); +	} + +	kfree(issued_dmas); + +	return 0; +} + +void rsxx_eeh_cancel_dmas(struct rsxx_cardinfo *card) +{ +	struct rsxx_dma *dma; +	struct rsxx_dma *tmp; +	int i; + +	for (i = 0; i < card->n_targets; i++) { +		spin_lock(&card->ctrl[i].queue_lock); +		list_for_each_entry_safe(dma, tmp, &card->ctrl[i].queue, list) { +			list_del(&dma->list); + +			rsxx_complete_dma(&card->ctrl[i], dma, DMA_CANCELLED); +		} +		spin_unlock(&card->ctrl[i].queue_lock); +	} +} + +int rsxx_eeh_remap_dmas(struct rsxx_cardinfo *card) +{ +	struct rsxx_dma *dma; +	int i; + +	for (i = 0; i < card->n_targets; i++) { +		spin_lock(&card->ctrl[i].queue_lock); +		list_for_each_entry(dma, &card->ctrl[i].queue, list) { +			dma->dma_addr = pci_map_page(card->dev, dma->page, +					dma->pg_off, get_dma_size(dma), +					dma->cmd == HW_CMD_BLK_WRITE ? +					PCI_DMA_TODEVICE : +					PCI_DMA_FROMDEVICE); +			if (!dma->dma_addr) { +				spin_unlock(&card->ctrl[i].queue_lock); +				kmem_cache_free(rsxx_dma_pool, dma); +				return -ENOMEM; +			} +		} +		spin_unlock(&card->ctrl[i].queue_lock); +	} + +	return 0; +}  int rsxx_dma_init(void)  { diff --git a/drivers/block/rsxx/rsxx.h b/drivers/block/rsxx/rsxx.h index 2e50b65902b..24ba3642bd8 100644 --- a/drivers/block/rsxx/rsxx.h +++ b/drivers/block/rsxx/rsxx.h @@ -27,15 +27,17 @@  /*----------------- IOCTL Definitions -------------------*/ +#define RSXX_MAX_DATA 8 +  struct rsxx_reg_access {  	__u32 addr;  	__u32 cnt;  	__u32 stat;  	__u32 stream; -	__u32 data[8]; +	__u32 data[RSXX_MAX_DATA];  }; -#define RSXX_MAX_REG_CNT	(8 * (sizeof(__u32))) +#define RSXX_MAX_REG_CNT	(RSXX_MAX_DATA * (sizeof(__u32)))  #define RSXX_IOC_MAGIC 'r' diff --git a/drivers/block/rsxx/rsxx_cfg.h b/drivers/block/rsxx/rsxx_cfg.h index c025fe5fdb7..f384c943846 100644 --- a/drivers/block/rsxx/rsxx_cfg.h +++ b/drivers/block/rsxx/rsxx_cfg.h @@ -58,7 +58,7 @@ struct rsxx_card_cfg {  };  /* Vendor ID Values */ -#define RSXX_VENDOR_ID_TMS_IBM		0 +#define RSXX_VENDOR_ID_IBM		0  #define RSXX_VENDOR_ID_DSI		1  #define RSXX_VENDOR_COUNT		2 diff --git a/drivers/block/rsxx/rsxx_priv.h b/drivers/block/rsxx/rsxx_priv.h index a1ac907d8f4..382e8bf5c03 100644 --- a/drivers/block/rsxx/rsxx_priv.h +++ b/drivers/block/rsxx/rsxx_priv.h @@ -45,16 +45,13 @@  struct proc_cmd; -#define PCI_VENDOR_ID_TMS_IBM		0x15B6 -#define PCI_DEVICE_ID_RS70_FLASH	0x0019 -#define PCI_DEVICE_ID_RS70D_FLASH	0x001A -#define PCI_DEVICE_ID_RS80_FLASH	0x001C -#define PCI_DEVICE_ID_RS81_FLASH	0x001E +#define PCI_DEVICE_ID_FS70_FLASH	0x04A9 +#define PCI_DEVICE_ID_FS80_FLASH	0x04AA  #define RS70_PCI_REV_SUPPORTED	4  #define DRIVER_NAME "rsxx" -#define DRIVER_VERSION "3.7" +#define DRIVER_VERSION "4.0"  /* Block size is 4096 */  #define RSXX_HW_BLK_SHIFT		12 @@ -67,6 +64,9 @@ struct proc_cmd;  #define RSXX_MAX_OUTSTANDING_CMDS	255  #define RSXX_CS_IDX_MASK		0xff +#define STATUS_BUFFER_SIZE8     4096 +#define COMMAND_BUFFER_SIZE8    4096 +  #define RSXX_MAX_TARGETS	8  struct dma_tracker_list; @@ -91,6 +91,9 @@ struct rsxx_dma_stats {  	u32 discards_failed;  	u32 done_rescheduled;  	u32 issue_rescheduled; +	u32 dma_sw_err; +	u32 dma_hw_fault; +	u32 dma_cancelled;  	u32 sw_q_depth;		/* Number of DMAs on the SW queue. */  	atomic_t hw_q_depth;	/* Number of DMAs queued to HW. */  }; @@ -116,6 +119,7 @@ struct rsxx_dma_ctrl {  struct rsxx_cardinfo {  	struct pci_dev		*dev;  	unsigned int		halt; +	unsigned int		eeh_state;  	void			__iomem *regmap;  	spinlock_t		irq_lock; @@ -224,6 +228,7 @@ enum rsxx_pci_regmap {  	PERF_RD512_HI	= 0xac,  	PERF_WR512_LO	= 0xb0,  	PERF_WR512_HI	= 0xb4, +	PCI_RECONFIG	= 0xb8,  };  enum rsxx_intr { @@ -237,6 +242,8 @@ enum rsxx_intr {  	CR_INTR_DMA5	= 0x00000080,  	CR_INTR_DMA6	= 0x00000100,  	CR_INTR_DMA7	= 0x00000200, +	CR_INTR_ALL_C	= 0x0000003f, +	CR_INTR_ALL_G	= 0x000003ff,  	CR_INTR_DMA_ALL = 0x000003f5,  	CR_INTR_ALL	= 0xffffffff,  }; @@ -253,8 +260,14 @@ enum rsxx_pci_reset {  	DMA_QUEUE_RESET		= 0x00000001,  }; +enum rsxx_hw_fifo_flush { +	RSXX_FLUSH_BUSY		= 0x00000002, +	RSXX_FLUSH_TIMEOUT	= 0x00000004, +}; +  enum rsxx_pci_revision {  	RSXX_DISCARD_SUPPORT = 2, +	RSXX_EEH_SUPPORT     = 3,  };  enum rsxx_creg_cmd { @@ -360,11 +373,17 @@ int rsxx_dma_setup(struct rsxx_cardinfo *card);  void rsxx_dma_destroy(struct rsxx_cardinfo *card);  int rsxx_dma_init(void);  void rsxx_dma_cleanup(void); +void rsxx_dma_queue_reset(struct rsxx_cardinfo *card); +int rsxx_dma_configure(struct rsxx_cardinfo *card);  int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,  			   struct bio *bio,  			   atomic_t *n_dmas,  			   rsxx_dma_cb cb,  			   void *cb_data); +int rsxx_hw_buffers_init(struct pci_dev *dev, struct rsxx_dma_ctrl *ctrl); +int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card); +void rsxx_eeh_cancel_dmas(struct rsxx_cardinfo *card); +int rsxx_eeh_remap_dmas(struct rsxx_cardinfo *card);  /***** cregs.c *****/  int rsxx_creg_write(struct rsxx_cardinfo *card, u32 addr, @@ -389,10 +408,11 @@ int rsxx_creg_setup(struct rsxx_cardinfo *card);  void rsxx_creg_destroy(struct rsxx_cardinfo *card);  int rsxx_creg_init(void);  void rsxx_creg_cleanup(void); -  int rsxx_reg_access(struct rsxx_cardinfo *card,  			struct rsxx_reg_access __user *ucmd,  			int read); +void rsxx_eeh_save_issued_creg(struct rsxx_cardinfo *card); +void rsxx_kick_creg_queue(struct rsxx_cardinfo *card); diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index de1f319f7bd..dd5b2fed97e 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -164,7 +164,7 @@ static void make_response(struct xen_blkif *blkif, u64 id,  #define foreach_grant_safe(pos, n, rbtree, node) \  	for ((pos) = container_of(rb_first((rbtree)), typeof(*(pos)), node), \ -	     (n) = rb_next(&(pos)->node); \ +	     (n) = (&(pos)->node != NULL) ? rb_next(&(pos)->node) : NULL; \  	     &(pos)->node != NULL; \  	     (pos) = container_of(n, typeof(*(pos)), node), \  	     (n) = (&(pos)->node != NULL) ? rb_next(&(pos)->node) : NULL) @@ -381,8 +381,8 @@ irqreturn_t xen_blkif_be_int(int irq, void *dev_id)  static void print_stats(struct xen_blkif *blkif)  { -	pr_info("xen-blkback (%s): oo %3d  |  rd %4d  |  wr %4d  |  f %4d" -		 "  |  ds %4d\n", +	pr_info("xen-blkback (%s): oo %3llu  |  rd %4llu  |  wr %4llu  |  f %4llu" +		 "  |  ds %4llu\n",  		 current->comm, blkif->st_oo_req,  		 blkif->st_rd_req, blkif->st_wr_req,  		 blkif->st_f_req, blkif->st_ds_req); @@ -442,7 +442,7 @@ int xen_blkif_schedule(void *arg)  }  struct seg_buf { -	unsigned long buf; +	unsigned int offset;  	unsigned int nsec;  };  /* @@ -621,30 +621,21 @@ static int xen_blkbk_map(struct blkif_request *req,  				 * If this is a new persistent grant  				 * save the handler  				 */ -				persistent_gnts[i]->handle = map[j].handle; -				persistent_gnts[i]->dev_bus_addr = -					map[j++].dev_bus_addr; +				persistent_gnts[i]->handle = map[j++].handle;  			}  			pending_handle(pending_req, i) =  				persistent_gnts[i]->handle;  			if (ret)  				continue; - -			seg[i].buf = persistent_gnts[i]->dev_bus_addr | -				(req->u.rw.seg[i].first_sect << 9);  		} else { -			pending_handle(pending_req, i) = map[j].handle; +			pending_handle(pending_req, i) = map[j++].handle;  			bitmap_set(pending_req->unmap_seg, i, 1); -			if (ret) { -				j++; +			if (ret)  				continue; -			} - -			seg[i].buf = map[j++].dev_bus_addr | -				(req->u.rw.seg[i].first_sect << 9);  		} +		seg[i].offset = (req->u.rw.seg[i].first_sect << 9);  	}  	return ret;  } @@ -679,6 +670,16 @@ static int dispatch_discard_io(struct xen_blkif *blkif,  	return err;  } +static int dispatch_other_io(struct xen_blkif *blkif, +			     struct blkif_request *req, +			     struct pending_req *pending_req) +{ +	free_req(pending_req); +	make_response(blkif, req->u.other.id, req->operation, +		      BLKIF_RSP_EOPNOTSUPP); +	return -EIO; +} +  static void xen_blk_drain_io(struct xen_blkif *blkif)  {  	atomic_set(&blkif->drain, 1); @@ -800,17 +801,30 @@ __do_block_io_op(struct xen_blkif *blkif)  		/* Apply all sanity checks to /private copy/ of request. */  		barrier(); -		if (unlikely(req.operation == BLKIF_OP_DISCARD)) { + +		switch (req.operation) { +		case BLKIF_OP_READ: +		case BLKIF_OP_WRITE: +		case BLKIF_OP_WRITE_BARRIER: +		case BLKIF_OP_FLUSH_DISKCACHE: +			if (dispatch_rw_block_io(blkif, &req, pending_req)) +				goto done; +			break; +		case BLKIF_OP_DISCARD:  			free_req(pending_req);  			if (dispatch_discard_io(blkif, &req)) -				break; -		} else if (dispatch_rw_block_io(blkif, &req, pending_req)) +				goto done;  			break; +		default: +			if (dispatch_other_io(blkif, &req, pending_req)) +				goto done; +			break; +		}  		/* Yield point for this unbounded loop. */  		cond_resched();  	} - +done:  	return more_to_do;  } @@ -904,7 +918,8 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,  		pr_debug(DRV_PFX "access denied: %s of [%llu,%llu] on dev=%04x\n",  			 operation == READ ? "read" : "write",  			 preq.sector_number, -			 preq.sector_number + preq.nr_sects, preq.dev); +			 preq.sector_number + preq.nr_sects, +			 blkif->vbd.pdevice);  		goto fail_response;  	} @@ -947,7 +962,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,  		       (bio_add_page(bio,  				     pages[i],  				     seg[i].nsec << 9, -				     seg[i].buf & ~PAGE_MASK) == 0)) { +				     seg[i].offset) == 0)) {  			bio = bio_alloc(GFP_KERNEL, nseg-i);  			if (unlikely(bio == NULL)) @@ -977,13 +992,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,  		bio->bi_end_io  = end_block_io_op;  	} -	/* -	 * We set it one so that the last submit_bio does not have to call -	 * atomic_inc. -	 */  	atomic_set(&pending_req->pendcnt, nbio); - -	/* Get a reference count for the disk queue and start sending I/O */  	blk_start_plug(&plug);  	for (i = 0; i < nbio; i++) @@ -1011,6 +1020,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,   fail_put_bio:  	for (i = 0; i < nbio; i++)  		bio_put(biolist[i]); +	atomic_set(&pending_req->pendcnt, 1);  	__end_block_io_op(pending_req, -EINVAL);  	msleep(1); /* back off a bit */  	return -EIO; diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index 6072390c7f5..60103e2517b 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -77,11 +77,18 @@ struct blkif_x86_32_request_discard {  	uint64_t       nr_sectors;  } __attribute__((__packed__)); +struct blkif_x86_32_request_other { +	uint8_t        _pad1; +	blkif_vdev_t   _pad2; +	uint64_t       id;           /* private guest value, echoed in resp  */ +} __attribute__((__packed__)); +  struct blkif_x86_32_request {  	uint8_t        operation;    /* BLKIF_OP_???                         */  	union {  		struct blkif_x86_32_request_rw rw;  		struct blkif_x86_32_request_discard discard; +		struct blkif_x86_32_request_other other;  	} u;  } __attribute__((__packed__)); @@ -113,11 +120,19 @@ struct blkif_x86_64_request_discard {  	uint64_t       nr_sectors;  } __attribute__((__packed__)); +struct blkif_x86_64_request_other { +	uint8_t        _pad1; +	blkif_vdev_t   _pad2; +	uint32_t       _pad3;        /* offsetof(blkif_..,u.discard.id)==8   */ +	uint64_t       id;           /* private guest value, echoed in resp  */ +} __attribute__((__packed__)); +  struct blkif_x86_64_request {  	uint8_t        operation;    /* BLKIF_OP_???                         */  	union {  		struct blkif_x86_64_request_rw rw;  		struct blkif_x86_64_request_discard discard; +		struct blkif_x86_64_request_other other;  	} u;  } __attribute__((__packed__)); @@ -172,7 +187,6 @@ struct persistent_gnt {  	struct page *page;  	grant_ref_t gnt;  	grant_handle_t handle; -	uint64_t dev_bus_addr;  	struct rb_node node;  }; @@ -208,13 +222,13 @@ struct xen_blkif {  	/* statistics */  	unsigned long		st_print; -	int			st_rd_req; -	int			st_wr_req; -	int			st_oo_req; -	int			st_f_req; -	int			st_ds_req; -	int			st_rd_sect; -	int			st_wr_sect; +	unsigned long long			st_rd_req; +	unsigned long long			st_wr_req; +	unsigned long long			st_oo_req; +	unsigned long long			st_f_req; +	unsigned long long			st_ds_req; +	unsigned long long			st_rd_sect; +	unsigned long long			st_wr_sect;  	wait_queue_head_t	waiting_to_free;  }; @@ -278,6 +292,11 @@ static inline void blkif_get_x86_32_req(struct blkif_request *dst,  		dst->u.discard.nr_sectors = src->u.discard.nr_sectors;  		break;  	default: +		/* +		 * Don't know how to translate this op. Only get the +		 * ID so failure can be reported to the frontend. +		 */ +		dst->u.other.id = src->u.other.id;  		break;  	}  } @@ -309,6 +328,11 @@ static inline void blkif_get_x86_64_req(struct blkif_request *dst,  		dst->u.discard.nr_sectors = src->u.discard.nr_sectors;  		break;  	default: +		/* +		 * Don't know how to translate this op. Only get the +		 * ID so failure can be reported to the frontend. +		 */ +		dst->u.other.id = src->u.other.id;  		break;  	}  } diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 5e237f630c4..8bfd1bcf95e 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -230,13 +230,13 @@ int __init xen_blkif_interface_init(void)  	}								\  	static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL) -VBD_SHOW(oo_req,  "%d\n", be->blkif->st_oo_req); -VBD_SHOW(rd_req,  "%d\n", be->blkif->st_rd_req); -VBD_SHOW(wr_req,  "%d\n", be->blkif->st_wr_req); -VBD_SHOW(f_req,  "%d\n", be->blkif->st_f_req); -VBD_SHOW(ds_req,  "%d\n", be->blkif->st_ds_req); -VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect); -VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect); +VBD_SHOW(oo_req,  "%llu\n", be->blkif->st_oo_req); +VBD_SHOW(rd_req,  "%llu\n", be->blkif->st_rd_req); +VBD_SHOW(wr_req,  "%llu\n", be->blkif->st_wr_req); +VBD_SHOW(f_req,  "%llu\n", be->blkif->st_f_req); +VBD_SHOW(ds_req,  "%llu\n", be->blkif->st_ds_req); +VBD_SHOW(rd_sect, "%llu\n", be->blkif->st_rd_sect); +VBD_SHOW(wr_sect, "%llu\n", be->blkif->st_wr_sect);  static struct attribute *xen_vbdstat_attrs[] = {  	&dev_attr_oo_req.attr, diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index c3dae2e0f29..a894f88762d 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -44,7 +44,7 @@  #include <linux/mutex.h>  #include <linux/scatterlist.h>  #include <linux/bitmap.h> -#include <linux/llist.h> +#include <linux/list.h>  #include <xen/xen.h>  #include <xen/xenbus.h> @@ -68,13 +68,12 @@ enum blkif_state {  struct grant {  	grant_ref_t gref;  	unsigned long pfn; -	struct llist_node node; +	struct list_head node;  };  struct blk_shadow {  	struct blkif_request req;  	struct request *request; -	unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];  	struct grant *grants_used[BLKIF_MAX_SEGMENTS_PER_REQUEST];  }; @@ -105,7 +104,7 @@ struct blkfront_info  	struct work_struct work;  	struct gnttab_free_callback callback;  	struct blk_shadow shadow[BLK_RING_SIZE]; -	struct llist_head persistent_gnts; +	struct list_head persistent_gnts;  	unsigned int persistent_gnts_c;  	unsigned long shadow_free;  	unsigned int feature_flush; @@ -165,6 +164,69 @@ static int add_id_to_freelist(struct blkfront_info *info,  	return 0;  } +static int fill_grant_buffer(struct blkfront_info *info, int num) +{ +	struct page *granted_page; +	struct grant *gnt_list_entry, *n; +	int i = 0; + +	while(i < num) { +		gnt_list_entry = kzalloc(sizeof(struct grant), GFP_NOIO); +		if (!gnt_list_entry) +			goto out_of_memory; + +		granted_page = alloc_page(GFP_NOIO); +		if (!granted_page) { +			kfree(gnt_list_entry); +			goto out_of_memory; +		} + +		gnt_list_entry->pfn = page_to_pfn(granted_page); +		gnt_list_entry->gref = GRANT_INVALID_REF; +		list_add(&gnt_list_entry->node, &info->persistent_gnts); +		i++; +	} + +	return 0; + +out_of_memory: +	list_for_each_entry_safe(gnt_list_entry, n, +	                         &info->persistent_gnts, node) { +		list_del(&gnt_list_entry->node); +		__free_page(pfn_to_page(gnt_list_entry->pfn)); +		kfree(gnt_list_entry); +		i--; +	} +	BUG_ON(i != 0); +	return -ENOMEM; +} + +static struct grant *get_grant(grant_ref_t *gref_head, +                               struct blkfront_info *info) +{ +	struct grant *gnt_list_entry; +	unsigned long buffer_mfn; + +	BUG_ON(list_empty(&info->persistent_gnts)); +	gnt_list_entry = list_first_entry(&info->persistent_gnts, struct grant, +	                                  node); +	list_del(&gnt_list_entry->node); + +	if (gnt_list_entry->gref != GRANT_INVALID_REF) { +		info->persistent_gnts_c--; +		return gnt_list_entry; +	} + +	/* Assign a gref to this page */ +	gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head); +	BUG_ON(gnt_list_entry->gref == -ENOSPC); +	buffer_mfn = pfn_to_mfn(gnt_list_entry->pfn); +	gnttab_grant_foreign_access_ref(gnt_list_entry->gref, +	                                info->xbdev->otherend_id, +	                                buffer_mfn, 0); +	return gnt_list_entry; +} +  static const char *op_name(int op)  {  	static const char *const names[] = { @@ -293,7 +355,6 @@ static int blkif_ioctl(struct block_device *bdev, fmode_t mode,  static int blkif_queue_request(struct request *req)  {  	struct blkfront_info *info = req->rq_disk->private_data; -	unsigned long buffer_mfn;  	struct blkif_request *ring_req;  	unsigned long id;  	unsigned int fsect, lsect; @@ -306,7 +367,6 @@ static int blkif_queue_request(struct request *req)  	 */  	bool new_persistent_gnts;  	grant_ref_t gref_head; -	struct page *granted_page;  	struct grant *gnt_list_entry = NULL;  	struct scatterlist *sg; @@ -370,41 +430,8 @@ static int blkif_queue_request(struct request *req)  			fsect = sg->offset >> 9;  			lsect = fsect + (sg->length >> 9) - 1; -			if (info->persistent_gnts_c) { -				BUG_ON(llist_empty(&info->persistent_gnts)); -				gnt_list_entry = llist_entry( -					llist_del_first(&info->persistent_gnts), -					struct grant, node); - -				ref = gnt_list_entry->gref; -				buffer_mfn = pfn_to_mfn(gnt_list_entry->pfn); -				info->persistent_gnts_c--; -			} else { -				ref = gnttab_claim_grant_reference(&gref_head); -				BUG_ON(ref == -ENOSPC); - -				gnt_list_entry = -					kmalloc(sizeof(struct grant), -							 GFP_ATOMIC); -				if (!gnt_list_entry) -					return -ENOMEM; - -				granted_page = alloc_page(GFP_ATOMIC); -				if (!granted_page) { -					kfree(gnt_list_entry); -					return -ENOMEM; -				} - -				gnt_list_entry->pfn = -					page_to_pfn(granted_page); -				gnt_list_entry->gref = ref; - -				buffer_mfn = pfn_to_mfn(page_to_pfn( -								granted_page)); -				gnttab_grant_foreign_access_ref(ref, -					info->xbdev->otherend_id, -					buffer_mfn, 0); -			} +			gnt_list_entry = get_grant(&gref_head, info); +			ref = gnt_list_entry->gref;  			info->shadow[id].grants_used[i] = gnt_list_entry; @@ -435,7 +462,6 @@ static int blkif_queue_request(struct request *req)  				kunmap_atomic(shared_data);  			} -			info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn);  			ring_req->u.rw.seg[i] =  					(struct blkif_request_segment) {  						.gref       = ref, @@ -790,9 +816,8 @@ static void blkif_restart_queue(struct work_struct *work)  static void blkif_free(struct blkfront_info *info, int suspend)  { -	struct llist_node *all_gnts; -	struct grant *persistent_gnt, *tmp; -	struct llist_node *n; +	struct grant *persistent_gnt; +	struct grant *n;  	/* Prevent new requests being issued until we fix things up. */  	spin_lock_irq(&info->io_lock); @@ -803,22 +828,20 @@ static void blkif_free(struct blkfront_info *info, int suspend)  		blk_stop_queue(info->rq);  	/* Remove all persistent grants */ -	if (info->persistent_gnts_c) { -		all_gnts = llist_del_all(&info->persistent_gnts); -		persistent_gnt = llist_entry(all_gnts, typeof(*(persistent_gnt)), node); -		while (persistent_gnt) { -			gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL); +	if (!list_empty(&info->persistent_gnts)) { +		list_for_each_entry_safe(persistent_gnt, n, +		                         &info->persistent_gnts, node) { +			list_del(&persistent_gnt->node); +			if (persistent_gnt->gref != GRANT_INVALID_REF) { +				gnttab_end_foreign_access(persistent_gnt->gref, +				                          0, 0UL); +				info->persistent_gnts_c--; +			}  			__free_page(pfn_to_page(persistent_gnt->pfn)); -			tmp = persistent_gnt; -			n = persistent_gnt->node.next; -			if (n) -				persistent_gnt = llist_entry(n, typeof(*(persistent_gnt)), node); -			else -				persistent_gnt = NULL; -			kfree(tmp); +			kfree(persistent_gnt);  		} -		info->persistent_gnts_c = 0;  	} +	BUG_ON(info->persistent_gnts_c != 0);  	/* No more gnttab callback work. */  	gnttab_cancel_free_callback(&info->callback); @@ -875,7 +898,7 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,  	}  	/* Add the persistent grant into the list of free grants */  	for (i = 0; i < s->req.u.rw.nr_segments; i++) { -		llist_add(&s->grants_used[i]->node, &info->persistent_gnts); +		list_add(&s->grants_used[i]->node, &info->persistent_gnts);  		info->persistent_gnts_c++;  	}  } @@ -1013,6 +1036,12 @@ static int setup_blkring(struct xenbus_device *dev,  	sg_init_table(info->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST); +	/* Allocate memory for grants */ +	err = fill_grant_buffer(info, BLK_RING_SIZE * +	                              BLKIF_MAX_SEGMENTS_PER_REQUEST); +	if (err) +		goto fail; +  	err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring));  	if (err < 0) {  		free_page((unsigned long)sring); @@ -1171,7 +1200,7 @@ static int blkfront_probe(struct xenbus_device *dev,  	spin_lock_init(&info->io_lock);  	info->xbdev = dev;  	info->vdevice = vdevice; -	init_llist_head(&info->persistent_gnts); +	INIT_LIST_HEAD(&info->persistent_gnts);  	info->persistent_gnts_c = 0;  	info->connected = BLKIF_STATE_DISCONNECTED;  	INIT_WORK(&info->work, blkif_restart_queue); @@ -1203,11 +1232,10 @@ static int blkif_recover(struct blkfront_info *info)  	int j;  	/* Stage 1: Make a safe copy of the shadow state. */ -	copy = kmalloc(sizeof(info->shadow), +	copy = kmemdup(info->shadow, sizeof(info->shadow),  		       GFP_NOIO | __GFP_REPEAT | __GFP_HIGH);  	if (!copy)  		return -ENOMEM; -	memcpy(copy, info->shadow, sizeof(info->shadow));  	/* Stage 2: Set up free list. */  	memset(&info->shadow, 0, sizeof(info->shadow)); @@ -1236,7 +1264,7 @@ static int blkif_recover(struct blkfront_info *info)  				gnttab_grant_foreign_access_ref(  					req->u.rw.seg[j].gref,  					info->xbdev->otherend_id, -					pfn_to_mfn(info->shadow[req->u.rw.id].frame[j]), +					pfn_to_mfn(copy[i].grants_used[j]->pfn),  					0);  		}  		info->shadow[req->u.rw.id].req = *req;  |