diff options
Diffstat (limited to 'drivers/infiniband/hw')
| -rw-r--r-- | drivers/infiniband/hw/cxgb4/cm.c | 68 | ||||
| -rw-r--r-- | drivers/infiniband/hw/cxgb4/device.c | 34 | ||||
| -rw-r--r-- | drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 14 | ||||
| -rw-r--r-- | drivers/infiniband/hw/cxgb4/mem.c | 155 | ||||
| -rw-r--r-- | drivers/infiniband/hw/cxgb4/provider.c | 15 | ||||
| -rw-r--r-- | drivers/infiniband/hw/cxgb4/qp.c | 113 | ||||
| -rw-r--r-- | drivers/infiniband/hw/cxgb4/t4.h | 11 | ||||
| -rw-r--r-- | drivers/infiniband/hw/mlx4/cq.c | 2 | ||||
| -rw-r--r-- | drivers/infiniband/hw/nes/nes_hw.c | 2 | ||||
| -rw-r--r-- | drivers/infiniband/hw/nes/nes_nic.c | 14 | 
10 files changed, 333 insertions, 95 deletions
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index a3fde52840c..65c30ea8c1a 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -511,12 +511,16 @@ static unsigned int select_ntuple(struct c4iw_dev *dev, struct dst_entry *dst,  static int send_connect(struct c4iw_ep *ep)  {  	struct cpl_act_open_req *req; +	struct cpl_t5_act_open_req *t5_req;  	struct sk_buff *skb;  	u64 opt0;  	u32 opt2;  	unsigned int mtu_idx;  	int wscale; -	int wrlen = roundup(sizeof *req, 16); +	int size = is_t4(ep->com.dev->rdev.lldi.adapter_type) ? +		sizeof(struct cpl_act_open_req) : +		sizeof(struct cpl_t5_act_open_req); +	int wrlen = roundup(size, 16);  	PDBG("%s ep %p atid %u\n", __func__, ep, ep->atid); @@ -552,17 +556,36 @@ static int send_connect(struct c4iw_ep *ep)  		opt2 |= WND_SCALE_EN(1);  	t4_set_arp_err_handler(skb, NULL, act_open_req_arp_failure); -	req = (struct cpl_act_open_req *) skb_put(skb, wrlen); -	INIT_TP_WR(req, 0); -	OPCODE_TID(req) = cpu_to_be32( -		MK_OPCODE_TID(CPL_ACT_OPEN_REQ, ((ep->rss_qid<<14)|ep->atid))); -	req->local_port = ep->com.local_addr.sin_port; -	req->peer_port = ep->com.remote_addr.sin_port; -	req->local_ip = ep->com.local_addr.sin_addr.s_addr; -	req->peer_ip = ep->com.remote_addr.sin_addr.s_addr; -	req->opt0 = cpu_to_be64(opt0); -	req->params = cpu_to_be32(select_ntuple(ep->com.dev, ep->dst, ep->l2t)); -	req->opt2 = cpu_to_be32(opt2); +	if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) { +		req = (struct cpl_act_open_req *) skb_put(skb, wrlen); +		INIT_TP_WR(req, 0); +		OPCODE_TID(req) = cpu_to_be32( +				MK_OPCODE_TID(CPL_ACT_OPEN_REQ, +				((ep->rss_qid << 14) | ep->atid))); +		req->local_port = ep->com.local_addr.sin_port; +		req->peer_port = ep->com.remote_addr.sin_port; +		req->local_ip = ep->com.local_addr.sin_addr.s_addr; +		req->peer_ip = ep->com.remote_addr.sin_addr.s_addr; +		req->opt0 = cpu_to_be64(opt0); +		req->params = cpu_to_be32(select_ntuple(ep->com.dev, +					ep->dst, ep->l2t)); +		req->opt2 = cpu_to_be32(opt2); +	} else { +		t5_req = (struct cpl_t5_act_open_req *) skb_put(skb, wrlen); +		INIT_TP_WR(t5_req, 0); +		OPCODE_TID(t5_req) = cpu_to_be32( +					MK_OPCODE_TID(CPL_ACT_OPEN_REQ, +					((ep->rss_qid << 14) | ep->atid))); +		t5_req->local_port = ep->com.local_addr.sin_port; +		t5_req->peer_port = ep->com.remote_addr.sin_port; +		t5_req->local_ip = ep->com.local_addr.sin_addr.s_addr; +		t5_req->peer_ip = ep->com.remote_addr.sin_addr.s_addr; +		t5_req->opt0 = cpu_to_be64(opt0); +		t5_req->params = cpu_to_be64(V_FILTER_TUPLE( +				select_ntuple(ep->com.dev, ep->dst, ep->l2t))); +		t5_req->opt2 = cpu_to_be32(opt2); +	} +  	set_bit(ACT_OPEN_REQ, &ep->com.history);  	return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);  } @@ -1676,9 +1699,9 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)  	case CPL_ERR_CONN_TIMEDOUT:  		break;  	case CPL_ERR_TCAM_FULL: +		dev->rdev.stats.tcam_full++;  		if (dev->rdev.lldi.enable_fw_ofld_conn) {  			mutex_lock(&dev->rdev.stats.lock); -			dev->rdev.stats.tcam_full++;  			mutex_unlock(&dev->rdev.stats.lock);  			send_fw_act_open_req(ep,  					     GET_TID_TID(GET_AOPEN_ATID( @@ -2875,12 +2898,14 @@ static int deferred_fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb)  static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos)  {  	u32 l2info; -	u16 vlantag, len, hdr_len; +	u16 vlantag, len, hdr_len, eth_hdr_len;  	u8 intf;  	struct cpl_rx_pkt *cpl = cplhdr(skb);  	struct cpl_pass_accept_req *req;  	struct tcp_options_received tmp_opt; +	struct c4iw_dev *dev; +	dev = *((struct c4iw_dev **) (skb->cb + sizeof(void *)));  	/* Store values from cpl_rx_pkt in temporary location. */  	vlantag = (__force u16) cpl->vlan;  	len = (__force u16) cpl->len; @@ -2896,7 +2921,7 @@ static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos)  	 */  	memset(&tmp_opt, 0, sizeof(tmp_opt));  	tcp_clear_options(&tmp_opt); -	tcp_parse_options(skb, &tmp_opt, NULL, 0, NULL); +	tcp_parse_options(skb, &tmp_opt, 0, NULL);  	req = (struct cpl_pass_accept_req *)__skb_push(skb, sizeof(*req));  	memset(req, 0, sizeof(*req)); @@ -2904,14 +2929,16 @@ static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos)  			 V_SYN_MAC_IDX(G_RX_MACIDX(  			 (__force int) htonl(l2info))) |  			 F_SYN_XACT_MATCH); +	eth_hdr_len = is_t4(dev->rdev.lldi.adapter_type) ? +			    G_RX_ETHHDR_LEN((__force int) htonl(l2info)) : +			    G_RX_T5_ETHHDR_LEN((__force int) htonl(l2info));  	req->hdr_len = cpu_to_be32(V_SYN_RX_CHAN(G_RX_CHAN(  					(__force int) htonl(l2info))) |  				   V_TCP_HDR_LEN(G_RX_TCPHDR_LEN(  					(__force int) htons(hdr_len))) |  				   V_IP_HDR_LEN(G_RX_IPHDR_LEN(  					(__force int) htons(hdr_len))) | -				   V_ETH_HDR_LEN(G_RX_ETHHDR_LEN( -					(__force int) htonl(l2info)))); +				   V_ETH_HDR_LEN(G_RX_ETHHDR_LEN(eth_hdr_len)));  	req->vlan = (__force __be16) vlantag;  	req->len = (__force __be16) len;  	req->tos_stid = cpu_to_be32(PASS_OPEN_TID(stid) | @@ -2999,7 +3026,7 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb)  	u16 window;  	struct port_info *pi;  	struct net_device *pdev; -	u16 rss_qid; +	u16 rss_qid, eth_hdr_len;  	int step;  	u32 tx_chan;  	struct neighbour *neigh; @@ -3028,7 +3055,10 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb)  		goto reject;  	} -	if (G_RX_ETHHDR_LEN(ntohl(cpl->l2info)) == ETH_HLEN) { +	eth_hdr_len = is_t4(dev->rdev.lldi.adapter_type) ? +			    G_RX_ETHHDR_LEN(htonl(cpl->l2info)) : +			    G_RX_T5_ETHHDR_LEN(htonl(cpl->l2info)); +	if (eth_hdr_len == ETH_HLEN) {  		eh = (struct ethhdr *)(req + 1);  		iph = (struct iphdr *)(eh + 1);  	} else { diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 80069ad595c..ae656016e1a 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -41,10 +41,20 @@  #define DRV_VERSION "0.1"  MODULE_AUTHOR("Steve Wise"); -MODULE_DESCRIPTION("Chelsio T4 RDMA Driver"); +MODULE_DESCRIPTION("Chelsio T4/T5 RDMA Driver");  MODULE_LICENSE("Dual BSD/GPL");  MODULE_VERSION(DRV_VERSION); +static int allow_db_fc_on_t5; +module_param(allow_db_fc_on_t5, int, 0644); +MODULE_PARM_DESC(allow_db_fc_on_t5, +		 "Allow DB Flow Control on T5 (default = 0)"); + +static int allow_db_coalescing_on_t5; +module_param(allow_db_coalescing_on_t5, int, 0644); +MODULE_PARM_DESC(allow_db_coalescing_on_t5, +		 "Allow DB Coalescing on T5 (default = 0)"); +  struct uld_ctx {  	struct list_head entry;  	struct cxgb4_lld_info lldi; @@ -614,7 +624,7 @@ static int rdma_supported(const struct cxgb4_lld_info *infop)  {  	return infop->vr->stag.size > 0 && infop->vr->pbl.size > 0 &&  	       infop->vr->rq.size > 0 && infop->vr->qp.size > 0 && -	       infop->vr->cq.size > 0 && infop->vr->ocq.size > 0; +	       infop->vr->cq.size > 0;  }  static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) @@ -627,6 +637,22 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)  		       pci_name(infop->pdev));  		return ERR_PTR(-ENOSYS);  	} +	if (!ocqp_supported(infop)) +		pr_info("%s: On-Chip Queues not supported on this device.\n", +			pci_name(infop->pdev)); + +	if (!is_t4(infop->adapter_type)) { +		if (!allow_db_fc_on_t5) { +			db_fc_threshold = 100000; +			pr_info("DB Flow Control Disabled.\n"); +		} + +		if (!allow_db_coalescing_on_t5) { +			db_coalescing_threshold = -1; +			pr_info("DB Coalescing Disabled.\n"); +		} +	} +  	devp = (struct c4iw_dev *)ib_alloc_device(sizeof(*devp));  	if (!devp) {  		printk(KERN_ERR MOD "Cannot allocate ib device\n"); @@ -678,8 +704,8 @@ static void *c4iw_uld_add(const struct cxgb4_lld_info *infop)  	int i;  	if (!vers_printed++) -		printk(KERN_INFO MOD "Chelsio T4 RDMA Driver - version %s\n", -		       DRV_VERSION); +		pr_info("Chelsio T4/T5 RDMA Driver - version %s\n", +			DRV_VERSION);  	ctx = kzalloc(sizeof *ctx, GFP_KERNEL);  	if (!ctx) { diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 7eec5e13fa8..485183ad34c 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -162,7 +162,7 @@ static inline int c4iw_num_stags(struct c4iw_rdev *rdev)  	return min((int)T4_MAX_NUM_STAG, (int)(rdev->lldi.vr->stag.size >> 5));  } -#define C4IW_WR_TO (10*HZ) +#define C4IW_WR_TO (30*HZ)  struct c4iw_wr_wait {  	struct completion completion; @@ -369,7 +369,6 @@ struct c4iw_fr_page_list {  	DEFINE_DMA_UNMAP_ADDR(mapping);  	dma_addr_t dma_addr;  	struct c4iw_dev *dev; -	int size;  };  static inline struct c4iw_fr_page_list *to_c4iw_fr_page_list( @@ -817,6 +816,15 @@ static inline int compute_wscale(int win)  	return wscale;  } +static inline int ocqp_supported(const struct cxgb4_lld_info *infop) +{ +#if defined(__i386__) || defined(__x86_64__) || defined(CONFIG_PPC64) +	return infop->vr->ocq.size > 0; +#else +	return 0; +#endif +} +  u32 c4iw_id_alloc(struct c4iw_id_table *alloc);  void c4iw_id_free(struct c4iw_id_table *alloc, u32 obj);  int c4iw_id_table_alloc(struct c4iw_id_table *alloc, u32 start, u32 num, @@ -930,6 +938,8 @@ extern struct cxgb4_client t4c_client;  extern c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS];  extern int c4iw_max_read_depth;  extern int db_fc_threshold; +extern int db_coalescing_threshold; +extern int use_dsgl;  #endif diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 903a92d6f91..4cb8eb24497 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -30,16 +30,76 @@   * SOFTWARE.   */ +#include <linux/module.h> +#include <linux/moduleparam.h>  #include <rdma/ib_umem.h>  #include <linux/atomic.h>  #include "iw_cxgb4.h" +int use_dsgl = 1; +module_param(use_dsgl, int, 0644); +MODULE_PARM_DESC(use_dsgl, "Use DSGL for PBL/FastReg (default=1)"); +  #define T4_ULPTX_MIN_IO 32  #define C4IW_MAX_INLINE_SIZE 96 +#define T4_ULPTX_MAX_DMA 1024 +#define C4IW_INLINE_THRESHOLD 128 -static int write_adapter_mem(struct c4iw_rdev *rdev, u32 addr, u32 len, -			     void *data) +static int inline_threshold = C4IW_INLINE_THRESHOLD; +module_param(inline_threshold, int, 0644); +MODULE_PARM_DESC(inline_threshold, "inline vs dsgl threshold (default=128)"); + +static int _c4iw_write_mem_dma_aligned(struct c4iw_rdev *rdev, u32 addr, +				       u32 len, dma_addr_t data, int wait) +{ +	struct sk_buff *skb; +	struct ulp_mem_io *req; +	struct ulptx_sgl *sgl; +	u8 wr_len; +	int ret = 0; +	struct c4iw_wr_wait wr_wait; + +	addr &= 0x7FFFFFF; + +	if (wait) +		c4iw_init_wr_wait(&wr_wait); +	wr_len = roundup(sizeof(*req) + sizeof(*sgl), 16); + +	skb = alloc_skb(wr_len, GFP_KERNEL | __GFP_NOFAIL); +	if (!skb) +		return -ENOMEM; +	set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); + +	req = (struct ulp_mem_io *)__skb_put(skb, wr_len); +	memset(req, 0, wr_len); +	INIT_ULPTX_WR(req, wr_len, 0, 0); +	req->wr.wr_hi = cpu_to_be32(FW_WR_OP(FW_ULPTX_WR) | +			(wait ? FW_WR_COMPL(1) : 0)); +	req->wr.wr_lo = wait ? (__force __be64)&wr_wait : 0; +	req->wr.wr_mid = cpu_to_be32(FW_WR_LEN16(DIV_ROUND_UP(wr_len, 16))); +	req->cmd = cpu_to_be32(ULPTX_CMD(ULP_TX_MEM_WRITE)); +	req->cmd |= cpu_to_be32(V_T5_ULP_MEMIO_ORDER(1)); +	req->dlen = cpu_to_be32(ULP_MEMIO_DATA_LEN(len>>5)); +	req->len16 = cpu_to_be32(DIV_ROUND_UP(wr_len-sizeof(req->wr), 16)); +	req->lock_addr = cpu_to_be32(ULP_MEMIO_ADDR(addr)); + +	sgl = (struct ulptx_sgl *)(req + 1); +	sgl->cmd_nsge = cpu_to_be32(ULPTX_CMD(ULP_TX_SC_DSGL) | +				    ULPTX_NSGE(1)); +	sgl->len0 = cpu_to_be32(len); +	sgl->addr0 = cpu_to_be64(data); + +	ret = c4iw_ofld_send(rdev, skb); +	if (ret) +		return ret; +	if (wait) +		ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, __func__); +	return ret; +} + +static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len, +				  void *data)  {  	struct sk_buff *skb;  	struct ulp_mem_io *req; @@ -47,6 +107,12 @@ static int write_adapter_mem(struct c4iw_rdev *rdev, u32 addr, u32 len,  	u8 wr_len, *to_dp, *from_dp;  	int copy_len, num_wqe, i, ret = 0;  	struct c4iw_wr_wait wr_wait; +	__be32 cmd = cpu_to_be32(ULPTX_CMD(ULP_TX_MEM_WRITE)); + +	if (is_t4(rdev->lldi.adapter_type)) +		cmd |= cpu_to_be32(ULP_MEMIO_ORDER(1)); +	else +		cmd |= cpu_to_be32(V_T5_ULP_MEMIO_IMM(1));  	addr &= 0x7FFFFFF;  	PDBG("%s addr 0x%x len %u\n", __func__, addr, len); @@ -77,7 +143,7 @@ static int write_adapter_mem(struct c4iw_rdev *rdev, u32 addr, u32 len,  		req->wr.wr_mid = cpu_to_be32(  				       FW_WR_LEN16(DIV_ROUND_UP(wr_len, 16))); -		req->cmd = cpu_to_be32(ULPTX_CMD(ULP_TX_MEM_WRITE) | (1<<23)); +		req->cmd = cmd;  		req->dlen = cpu_to_be32(ULP_MEMIO_DATA_LEN(  				DIV_ROUND_UP(copy_len, T4_ULPTX_MIN_IO)));  		req->len16 = cpu_to_be32(DIV_ROUND_UP(wr_len-sizeof(req->wr), @@ -107,6 +173,67 @@ static int write_adapter_mem(struct c4iw_rdev *rdev, u32 addr, u32 len,  	return ret;  } +int _c4iw_write_mem_dma(struct c4iw_rdev *rdev, u32 addr, u32 len, void *data) +{ +	u32 remain = len; +	u32 dmalen; +	int ret = 0; +	dma_addr_t daddr; +	dma_addr_t save; + +	daddr = dma_map_single(&rdev->lldi.pdev->dev, data, len, DMA_TO_DEVICE); +	if (dma_mapping_error(&rdev->lldi.pdev->dev, daddr)) +		return -1; +	save = daddr; + +	while (remain > inline_threshold) { +		if (remain < T4_ULPTX_MAX_DMA) { +			if (remain & ~T4_ULPTX_MIN_IO) +				dmalen = remain & ~(T4_ULPTX_MIN_IO-1); +			else +				dmalen = remain; +		} else +			dmalen = T4_ULPTX_MAX_DMA; +		remain -= dmalen; +		ret = _c4iw_write_mem_dma_aligned(rdev, addr, dmalen, daddr, +						 !remain); +		if (ret) +			goto out; +		addr += dmalen >> 5; +		data += dmalen; +		daddr += dmalen; +	} +	if (remain) +		ret = _c4iw_write_mem_inline(rdev, addr, remain, data); +out: +	dma_unmap_single(&rdev->lldi.pdev->dev, save, len, DMA_TO_DEVICE); +	return ret; +} + +/* + * write len bytes of data into addr (32B aligned address) + * If data is NULL, clear len byte of memory to zero. + */ +static int write_adapter_mem(struct c4iw_rdev *rdev, u32 addr, u32 len, +			     void *data) +{ +	if (is_t5(rdev->lldi.adapter_type) && use_dsgl) { +		if (len > inline_threshold) { +			if (_c4iw_write_mem_dma(rdev, addr, len, data)) { +				printk_ratelimited(KERN_WARNING +						   "%s: dma map" +						   " failure (non fatal)\n", +						   pci_name(rdev->lldi.pdev)); +				return _c4iw_write_mem_inline(rdev, addr, len, +							      data); +			} else +				return 0; +		} else +			return _c4iw_write_mem_inline(rdev, addr, len, data); +	} else +		return _c4iw_write_mem_inline(rdev, addr, len, data); +} +  /*   * Build and write a TPT entry.   * IN: stag key, pdid, perm, bind_enabled, zbva, to, len, page_size, @@ -760,19 +887,23 @@ struct ib_fast_reg_page_list *c4iw_alloc_fastreg_pbl(struct ib_device *device,  	struct c4iw_fr_page_list *c4pl;  	struct c4iw_dev *dev = to_c4iw_dev(device);  	dma_addr_t dma_addr; -	int size = sizeof *c4pl + page_list_len * sizeof(u64); +	int pll_len = roundup(page_list_len * sizeof(u64), 32); -	c4pl = dma_alloc_coherent(&dev->rdev.lldi.pdev->dev, size, -				  &dma_addr, GFP_KERNEL); +	c4pl = kmalloc(sizeof(*c4pl), GFP_KERNEL);  	if (!c4pl)  		return ERR_PTR(-ENOMEM); +	c4pl->ibpl.page_list = dma_alloc_coherent(&dev->rdev.lldi.pdev->dev, +						  pll_len, &dma_addr, +						  GFP_KERNEL); +	if (!c4pl->ibpl.page_list) { +		kfree(c4pl); +		return ERR_PTR(-ENOMEM); +	}  	dma_unmap_addr_set(c4pl, mapping, dma_addr);  	c4pl->dma_addr = dma_addr;  	c4pl->dev = dev; -	c4pl->size = size; -	c4pl->ibpl.page_list = (u64 *)(c4pl + 1); -	c4pl->ibpl.max_page_list_len = page_list_len; +	c4pl->ibpl.max_page_list_len = pll_len;  	return &c4pl->ibpl;  } @@ -781,8 +912,10 @@ void c4iw_free_fastreg_pbl(struct ib_fast_reg_page_list *ibpl)  {  	struct c4iw_fr_page_list *c4pl = to_c4iw_fr_page_list(ibpl); -	dma_free_coherent(&c4pl->dev->rdev.lldi.pdev->dev, c4pl->size, -			  c4pl, dma_unmap_addr(c4pl, mapping)); +	dma_free_coherent(&c4pl->dev->rdev.lldi.pdev->dev, +			  c4pl->ibpl.max_page_list_len, +			  c4pl->ibpl.page_list, dma_unmap_addr(c4pl, mapping)); +	kfree(c4pl);  }  int c4iw_dereg_mr(struct ib_mr *ib_mr) diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index e084fdc6da7..7e94c9a656a 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -162,8 +162,14 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)  		 */  		if (addr >= rdev->oc_mw_pa)  			vma->vm_page_prot = t4_pgprot_wc(vma->vm_page_prot); -		else -			vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); +		else { +			if (is_t5(rdev->lldi.adapter_type)) +				vma->vm_page_prot = +					t4_pgprot_wc(vma->vm_page_prot); +			else +				vma->vm_page_prot = +					pgprot_noncached(vma->vm_page_prot); +		}  		ret = io_remap_pfn_range(vma, vma->vm_start,  					 addr >> PAGE_SHIFT,  					 len, vma->vm_page_prot); @@ -263,7 +269,7 @@ static int c4iw_query_device(struct ib_device *ibdev,  	dev = to_c4iw_dev(ibdev);  	memset(props, 0, sizeof *props);  	memcpy(&props->sys_image_guid, dev->rdev.lldi.ports[0]->dev_addr, 6); -	props->hw_ver = dev->rdev.lldi.adapter_type; +	props->hw_ver = CHELSIO_CHIP_RELEASE(dev->rdev.lldi.adapter_type);  	props->fw_ver = dev->rdev.lldi.fw_vers;  	props->device_cap_flags = dev->device_cap_flags;  	props->page_size_cap = T4_PAGESIZE_MASK; @@ -346,7 +352,8 @@ static ssize_t show_rev(struct device *dev, struct device_attribute *attr,  	struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,  						 ibdev.dev);  	PDBG("%s dev 0x%p\n", __func__, dev); -	return sprintf(buf, "%d\n", c4iw_dev->rdev.lldi.adapter_type); +	return sprintf(buf, "%d\n", +		       CHELSIO_CHIP_RELEASE(c4iw_dev->rdev.lldi.adapter_type));  }  static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr, diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 70b1808a08f..5b059e2d80c 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -42,10 +42,21 @@ static int ocqp_support = 1;  module_param(ocqp_support, int, 0644);  MODULE_PARM_DESC(ocqp_support, "Support on-chip SQs (default=1)"); -int db_fc_threshold = 2000; +int db_fc_threshold = 1000;  module_param(db_fc_threshold, int, 0644); -MODULE_PARM_DESC(db_fc_threshold, "QP count/threshold that triggers automatic " -		 "db flow control mode (default = 2000)"); +MODULE_PARM_DESC(db_fc_threshold, +		 "QP count/threshold that triggers" +		 " automatic db flow control mode (default = 1000)"); + +int db_coalescing_threshold; +module_param(db_coalescing_threshold, int, 0644); +MODULE_PARM_DESC(db_coalescing_threshold, +		 "QP count/threshold that triggers" +		 " disabling db coalescing (default = 0)"); + +static int max_fr_immd = T4_MAX_FR_IMMD; +module_param(max_fr_immd, int, 0644); +MODULE_PARM_DESC(max_fr_immd, "fastreg threshold for using DSGL instead of immedate");  static void set_state(struct c4iw_qp *qhp, enum c4iw_qp_state state)  { @@ -76,7 +87,7 @@ static void dealloc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq)  static int alloc_oc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq)  { -	if (!ocqp_support || !t4_ocqp_supported()) +	if (!ocqp_support || !ocqp_supported(&rdev->lldi))  		return -ENOSYS;  	sq->dma_addr = c4iw_ocqp_pool_alloc(rdev, sq->memsize);  	if (!sq->dma_addr) @@ -129,7 +140,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,  	int wr_len;  	struct c4iw_wr_wait wr_wait;  	struct sk_buff *skb; -	int ret; +	int ret = 0;  	int eqsize;  	wq->sq.qid = c4iw_get_qpid(rdev, uctx); @@ -169,17 +180,14 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,  	}  	if (user) { -		ret = alloc_oc_sq(rdev, &wq->sq); -		if (ret) +		if (alloc_oc_sq(rdev, &wq->sq) && alloc_host_sq(rdev, &wq->sq))  			goto free_hwaddr; - -		ret = alloc_host_sq(rdev, &wq->sq); -		if (ret) -			goto free_sq; -	} else +	} else {  		ret = alloc_host_sq(rdev, &wq->sq);  		if (ret)  			goto free_hwaddr; +	} +  	memset(wq->sq.queue, 0, wq->sq.memsize);  	dma_unmap_addr_set(&wq->sq, mapping, wq->sq.dma_addr); @@ -534,7 +542,7 @@ static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe,  }  static int build_fastreg(struct t4_sq *sq, union t4_wr *wqe, -			 struct ib_send_wr *wr, u8 *len16) +			 struct ib_send_wr *wr, u8 *len16, u8 t5dev)  {  	struct fw_ri_immd *imdp; @@ -556,28 +564,51 @@ static int build_fastreg(struct t4_sq *sq, union t4_wr *wqe,  	wqe->fr.va_hi = cpu_to_be32(wr->wr.fast_reg.iova_start >> 32);  	wqe->fr.va_lo_fbo = cpu_to_be32(wr->wr.fast_reg.iova_start &  					0xffffffff); -	WARN_ON(pbllen > T4_MAX_FR_IMMD); -	imdp = (struct fw_ri_immd *)(&wqe->fr + 1); -	imdp->op = FW_RI_DATA_IMMD; -	imdp->r1 = 0; -	imdp->r2 = 0; -	imdp->immdlen = cpu_to_be32(pbllen); -	p = (__be64 *)(imdp + 1); -	rem = pbllen; -	for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) { -		*p = cpu_to_be64((u64)wr->wr.fast_reg.page_list->page_list[i]); -		rem -= sizeof *p; -		if (++p == (__be64 *)&sq->queue[sq->size]) -			p = (__be64 *)sq->queue; -	} -	BUG_ON(rem < 0); -	while (rem) { -		*p = 0; -		rem -= sizeof *p; -		if (++p == (__be64 *)&sq->queue[sq->size]) -			p = (__be64 *)sq->queue; + +	if (t5dev && use_dsgl && (pbllen > max_fr_immd)) { +		struct c4iw_fr_page_list *c4pl = +			to_c4iw_fr_page_list(wr->wr.fast_reg.page_list); +		struct fw_ri_dsgl *sglp; + +		for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) { +			wr->wr.fast_reg.page_list->page_list[i] = (__force u64) +				cpu_to_be64((u64) +				wr->wr.fast_reg.page_list->page_list[i]); +		} + +		sglp = (struct fw_ri_dsgl *)(&wqe->fr + 1); +		sglp->op = FW_RI_DATA_DSGL; +		sglp->r1 = 0; +		sglp->nsge = cpu_to_be16(1); +		sglp->addr0 = cpu_to_be64(c4pl->dma_addr); +		sglp->len0 = cpu_to_be32(pbllen); + +		*len16 = DIV_ROUND_UP(sizeof(wqe->fr) + sizeof(*sglp), 16); +	} else { +		imdp = (struct fw_ri_immd *)(&wqe->fr + 1); +		imdp->op = FW_RI_DATA_IMMD; +		imdp->r1 = 0; +		imdp->r2 = 0; +		imdp->immdlen = cpu_to_be32(pbllen); +		p = (__be64 *)(imdp + 1); +		rem = pbllen; +		for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) { +			*p = cpu_to_be64( +				(u64)wr->wr.fast_reg.page_list->page_list[i]); +			rem -= sizeof(*p); +			if (++p == (__be64 *)&sq->queue[sq->size]) +				p = (__be64 *)sq->queue; +		} +		BUG_ON(rem < 0); +		while (rem) { +			*p = 0; +			rem -= sizeof(*p); +			if (++p == (__be64 *)&sq->queue[sq->size]) +				p = (__be64 *)sq->queue; +		} +		*len16 = DIV_ROUND_UP(sizeof(wqe->fr) + sizeof(*imdp) +				      + pbllen, 16);  	} -	*len16 = DIV_ROUND_UP(sizeof wqe->fr + sizeof *imdp + pbllen, 16);  	return 0;  } @@ -678,7 +709,10 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,  		case IB_WR_FAST_REG_MR:  			fw_opcode = FW_RI_FR_NSMR_WR;  			swsqe->opcode = FW_RI_FAST_REGISTER; -			err = build_fastreg(&qhp->wq.sq, wqe, wr, &len16); +			err = build_fastreg(&qhp->wq.sq, wqe, wr, &len16, +					    is_t5( +					    qhp->rhp->rdev.lldi.adapter_type) ? +					    1 : 0);  			break;  		case IB_WR_LOCAL_INV:  			if (wr->send_flags & IB_SEND_FENCE) @@ -1450,6 +1484,9 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp)  		rhp->db_state = NORMAL;  		idr_for_each(&rhp->qpidr, enable_qp_db, NULL);  	} +	if (db_coalescing_threshold >= 0) +		if (rhp->qpcnt <= db_coalescing_threshold) +			cxgb4_enable_db_coalescing(rhp->rdev.lldi.ports[0]);  	spin_unlock_irq(&rhp->lock);  	atomic_dec(&qhp->refcnt);  	wait_event(qhp->wait, !atomic_read(&qhp->refcnt)); @@ -1561,11 +1598,15 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,  	spin_lock_irq(&rhp->lock);  	if (rhp->db_state != NORMAL)  		t4_disable_wq_db(&qhp->wq); -	if (++rhp->qpcnt > db_fc_threshold && rhp->db_state == NORMAL) { +	rhp->qpcnt++; +	if (rhp->qpcnt > db_fc_threshold && rhp->db_state == NORMAL) {  		rhp->rdev.stats.db_state_transitions++;  		rhp->db_state = FLOW_CONTROL;  		idr_for_each(&rhp->qpidr, disable_qp_db, NULL);  	} +	if (db_coalescing_threshold >= 0) +		if (rhp->qpcnt > db_coalescing_threshold) +			cxgb4_disable_db_coalescing(rhp->rdev.lldi.ports[0]);  	ret = insert_handle_nolock(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid);  	spin_unlock_irq(&rhp->lock);  	if (ret) diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index 16f26ab2930..ebcb03bd1b7 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -84,7 +84,7 @@ struct t4_status_page {  			sizeof(struct fw_ri_isgl)) / sizeof(struct fw_ri_sge))  #define T4_MAX_FR_IMMD ((T4_SQ_NUM_BYTES - sizeof(struct fw_ri_fr_nsmr_wr) - \  			sizeof(struct fw_ri_immd)) & ~31UL) -#define T4_MAX_FR_DEPTH (T4_MAX_FR_IMMD / sizeof(u64)) +#define T4_MAX_FR_DEPTH (1024 / sizeof(u64))  #define T4_RQ_NUM_SLOTS 2  #define T4_RQ_NUM_BYTES (T4_EQ_ENTRY_SIZE * T4_RQ_NUM_SLOTS) @@ -280,15 +280,6 @@ static inline pgprot_t t4_pgprot_wc(pgprot_t prot)  #endif  } -static inline int t4_ocqp_supported(void) -{ -#if defined(__i386__) || defined(__x86_64__) || defined(CONFIG_PPC64) -	return 1; -#else -	return 0; -#endif -} -  enum {  	T4_SQ_ONCHIP = (1<<0),  }; diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index ae67df35dd4..73b3a713258 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -228,7 +228,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector  		vector = dev->eq_table[vector % ibdev->num_comp_vectors];  	err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar, -			    cq->db.dma, &cq->mcq, vector, 0); +			    cq->db.dma, &cq->mcq, vector, 0, 0);  	if (err)  		goto err_dbmap; diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index 67647e26461..418004c93fe 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -2948,7 +2948,7 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq)  					nes_debug(NES_DBG_CQ, "%s: Reporting stripped VLAN packet. Tag = 0x%04X\n",  							nesvnic->netdev->name, vlan_tag); -					__vlan_hwaccel_put_tag(rx_skb, vlan_tag); +					__vlan_hwaccel_put_tag(rx_skb, htons(ETH_P_8021Q), vlan_tag);  				}  				if (nes_use_lro)  					lro_receive_skb(&nesvnic->lro_mgr, rx_skb, NULL); diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c index 85cf4d1ac44..49eb5111d2c 100644 --- a/drivers/infiniband/hw/nes/nes_nic.c +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -1599,7 +1599,7 @@ static void nes_vlan_mode(struct net_device *netdev, struct nes_device *nesdev,  	/* Enable/Disable VLAN Stripping */  	u32temp = nes_read_indexed(nesdev, NES_IDX_PCIX_DIAG); -	if (features & NETIF_F_HW_VLAN_RX) +	if (features & NETIF_F_HW_VLAN_CTAG_RX)  		u32temp &= 0xfdffffff;  	else  		u32temp	|= 0x02000000; @@ -1614,10 +1614,10 @@ static netdev_features_t nes_fix_features(struct net_device *netdev, netdev_feat  	 * Since there is no support for separate rx/tx vlan accel  	 * enable/disable make sure tx flag is always in same state as rx.  	 */ -	if (features & NETIF_F_HW_VLAN_RX) -		features |= NETIF_F_HW_VLAN_TX; +	if (features & NETIF_F_HW_VLAN_CTAG_RX) +		features |= NETIF_F_HW_VLAN_CTAG_TX;  	else -		features &= ~NETIF_F_HW_VLAN_TX; +		features &= ~NETIF_F_HW_VLAN_CTAG_TX;  	return features;  } @@ -1628,7 +1628,7 @@ static int nes_set_features(struct net_device *netdev, netdev_features_t feature  	struct nes_device *nesdev = nesvnic->nesdev;  	u32 changed = netdev->features ^ features; -	if (changed & NETIF_F_HW_VLAN_RX) +	if (changed & NETIF_F_HW_VLAN_CTAG_RX)  		nes_vlan_mode(netdev, nesdev, features);  	return 0; @@ -1706,11 +1706,11 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev,  	netdev->dev_addr[4] = (u8)(u64temp>>8);  	netdev->dev_addr[5] = (u8)u64temp; -	netdev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_RXCSUM | NETIF_F_HW_VLAN_RX; +	netdev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_RXCSUM | NETIF_F_HW_VLAN_CTAG_RX;  	if ((nesvnic->logical_port < 2) || (nesdev->nesadapter->hw_rev != NE020_REV))  		netdev->hw_features |= NETIF_F_TSO; -	netdev->features = netdev->hw_features | NETIF_F_HIGHDMA | NETIF_F_HW_VLAN_TX; +	netdev->features = netdev->hw_features | NETIF_F_HIGHDMA | NETIF_F_HW_VLAN_CTAG_TX;  	netdev->hw_features |= NETIF_F_LRO;  	nes_debug(NES_DBG_INIT, "nesvnic = %p, reported features = 0x%lX, QPid = %d,"  |