diff options
Diffstat (limited to 'net/rds/ib_cm.c')
| -rw-r--r-- | net/rds/ib_cm.c | 184 | 
1 files changed, 126 insertions, 58 deletions
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index f68832798db..ee369d201a6 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -38,6 +38,36 @@  #include "rds.h"  #include "ib.h" +static char *rds_ib_event_type_strings[] = { +#define RDS_IB_EVENT_STRING(foo) \ +		[IB_EVENT_##foo] = __stringify(IB_EVENT_##foo) +	RDS_IB_EVENT_STRING(CQ_ERR), +	RDS_IB_EVENT_STRING(QP_FATAL), +	RDS_IB_EVENT_STRING(QP_REQ_ERR), +	RDS_IB_EVENT_STRING(QP_ACCESS_ERR), +	RDS_IB_EVENT_STRING(COMM_EST), +	RDS_IB_EVENT_STRING(SQ_DRAINED), +	RDS_IB_EVENT_STRING(PATH_MIG), +	RDS_IB_EVENT_STRING(PATH_MIG_ERR), +	RDS_IB_EVENT_STRING(DEVICE_FATAL), +	RDS_IB_EVENT_STRING(PORT_ACTIVE), +	RDS_IB_EVENT_STRING(PORT_ERR), +	RDS_IB_EVENT_STRING(LID_CHANGE), +	RDS_IB_EVENT_STRING(PKEY_CHANGE), +	RDS_IB_EVENT_STRING(SM_CHANGE), +	RDS_IB_EVENT_STRING(SRQ_ERR), +	RDS_IB_EVENT_STRING(SRQ_LIMIT_REACHED), +	RDS_IB_EVENT_STRING(QP_LAST_WQE_REACHED), +	RDS_IB_EVENT_STRING(CLIENT_REREGISTER), +#undef RDS_IB_EVENT_STRING +}; + +static char *rds_ib_event_str(enum ib_event_type type) +{ +	return rds_str_array(rds_ib_event_type_strings, +			     ARRAY_SIZE(rds_ib_event_type_strings), type); +}; +  /*   * Set the selected protocol version   */ @@ -95,7 +125,6 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even  {  	const struct rds_ib_connect_private *dp = NULL;  	struct rds_ib_connection *ic = conn->c_transport_data; -	struct rds_ib_device *rds_ibdev;  	struct ib_qp_attr qp_attr;  	int err; @@ -111,11 +140,21 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even  		}  	} -	printk(KERN_NOTICE "RDS/IB: connected to %pI4 version %u.%u%s\n", -			&conn->c_faddr, -			RDS_PROTOCOL_MAJOR(conn->c_version), -			RDS_PROTOCOL_MINOR(conn->c_version), -			ic->i_flowctl ? ", flow control" : ""); +	if (conn->c_version < RDS_PROTOCOL(3,1)) { +		printk(KERN_NOTICE "RDS/IB: Connection to %pI4 version %u.%u failed," +		       " no longer supported\n", +		       &conn->c_faddr, +		       RDS_PROTOCOL_MAJOR(conn->c_version), +		       RDS_PROTOCOL_MINOR(conn->c_version)); +		rds_conn_destroy(conn); +		return; +	} else { +		printk(KERN_NOTICE "RDS/IB: connected to %pI4 version %u.%u%s\n", +		       &conn->c_faddr, +		       RDS_PROTOCOL_MAJOR(conn->c_version), +		       RDS_PROTOCOL_MINOR(conn->c_version), +		       ic->i_flowctl ? ", flow control" : ""); +	}  	/*  	 * Init rings and fill recv. this needs to wait until protocol negotiation @@ -125,7 +164,7 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even  	rds_ib_recv_init_ring(ic);  	/* Post receive buffers - as a side effect, this will update  	 * the posted credit count. */ -	rds_ib_recv_refill(conn, GFP_KERNEL, GFP_HIGHUSER, 1); +	rds_ib_recv_refill(conn, 1);  	/* Tune RNR behavior */  	rds_ib_tune_rnr(ic, &qp_attr); @@ -135,12 +174,11 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even  	if (err)  		printk(KERN_NOTICE "ib_modify_qp(IB_QP_STATE, RTS): err=%d\n", err); -	/* update ib_device with this local ipaddr & conn */ -	rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client); -	err = rds_ib_update_ipaddr(rds_ibdev, conn->c_laddr); +	/* update ib_device with this local ipaddr */ +	err = rds_ib_update_ipaddr(ic->rds_ibdev, conn->c_laddr);  	if (err) -		printk(KERN_ERR "rds_ib_update_ipaddr failed (%d)\n", err); -	rds_ib_add_conn(rds_ibdev, conn); +		printk(KERN_ERR "rds_ib_update_ipaddr failed (%d)\n", +			err);  	/* If the peer gave us the last packet it saw, process this as if  	 * we had received a regular ACK. */ @@ -153,18 +191,23 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even  static void rds_ib_cm_fill_conn_param(struct rds_connection *conn,  			struct rdma_conn_param *conn_param,  			struct rds_ib_connect_private *dp, -			u32 protocol_version) +			u32 protocol_version, +			u32 max_responder_resources, +			u32 max_initiator_depth)  { +	struct rds_ib_connection *ic = conn->c_transport_data; +	struct rds_ib_device *rds_ibdev = ic->rds_ibdev; +  	memset(conn_param, 0, sizeof(struct rdma_conn_param)); -	/* XXX tune these? */ -	conn_param->responder_resources = 1; -	conn_param->initiator_depth = 1; + +	conn_param->responder_resources = +		min_t(u32, rds_ibdev->max_responder_resources, max_responder_resources); +	conn_param->initiator_depth = +		min_t(u32, rds_ibdev->max_initiator_depth, max_initiator_depth);  	conn_param->retry_count = min_t(unsigned int, rds_ib_retry_count, 7);  	conn_param->rnr_retry_count = 7;  	if (dp) { -		struct rds_ib_connection *ic = conn->c_transport_data; -  		memset(dp, 0, sizeof(*dp));  		dp->dp_saddr = conn->c_laddr;  		dp->dp_daddr = conn->c_faddr; @@ -189,7 +232,8 @@ static void rds_ib_cm_fill_conn_param(struct rds_connection *conn,  static void rds_ib_cq_event_handler(struct ib_event *event, void *data)  { -	rdsdebug("event %u data %p\n", event->event, data); +	rdsdebug("event %u (%s) data %p\n", +		 event->event, rds_ib_event_str(event->event), data);  }  static void rds_ib_qp_event_handler(struct ib_event *event, void *data) @@ -197,16 +241,18 @@ static void rds_ib_qp_event_handler(struct ib_event *event, void *data)  	struct rds_connection *conn = data;  	struct rds_ib_connection *ic = conn->c_transport_data; -	rdsdebug("conn %p ic %p event %u\n", conn, ic, event->event); +	rdsdebug("conn %p ic %p event %u (%s)\n", conn, ic, event->event, +		 rds_ib_event_str(event->event));  	switch (event->event) {  	case IB_EVENT_COMM_EST:  		rdma_notify(ic->i_cm_id, IB_EVENT_COMM_EST);  		break;  	default: -		rdsdebug("Fatal QP Event %u " +		rdsdebug("Fatal QP Event %u (%s) "  			"- connection %pI4->%pI4, reconnecting\n", -			event->event, &conn->c_laddr, &conn->c_faddr); +			event->event, rds_ib_event_str(event->event), +			&conn->c_laddr, &conn->c_faddr);  		rds_conn_drop(conn);  		break;  	} @@ -224,18 +270,16 @@ static int rds_ib_setup_qp(struct rds_connection *conn)  	struct rds_ib_device *rds_ibdev;  	int ret; -	/* rds_ib_add_one creates a rds_ib_device object per IB device, -	 * and allocates a protection domain, memory range and FMR pool -	 * for each.  If that fails for any reason, it will not register -	 * the rds_ibdev at all. +	/* +	 * It's normal to see a null device if an incoming connection races +	 * with device removal, so we don't print a warning.  	 */ -	rds_ibdev = ib_get_client_data(dev, &rds_ib_client); -	if (rds_ibdev == NULL) { -		if (printk_ratelimit()) -			printk(KERN_NOTICE "RDS/IB: No client_data for device %s\n", -					dev->name); +	rds_ibdev = rds_ib_get_client_data(dev); +	if (!rds_ibdev)  		return -EOPNOTSUPP; -	} + +	/* add the conn now so that connection establishment has the dev */ +	rds_ib_add_conn(rds_ibdev, conn);  	if (rds_ibdev->max_wrs < ic->i_send_ring.w_nr + 1)  		rds_ib_ring_resize(&ic->i_send_ring, rds_ibdev->max_wrs - 1); @@ -306,7 +350,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)  					   ic->i_send_ring.w_nr *  						sizeof(struct rds_header),  					   &ic->i_send_hdrs_dma, GFP_KERNEL); -	if (ic->i_send_hdrs == NULL) { +	if (!ic->i_send_hdrs) {  		ret = -ENOMEM;  		rdsdebug("ib_dma_alloc_coherent send failed\n");  		goto out; @@ -316,7 +360,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)  					   ic->i_recv_ring.w_nr *  						sizeof(struct rds_header),  					   &ic->i_recv_hdrs_dma, GFP_KERNEL); -	if (ic->i_recv_hdrs == NULL) { +	if (!ic->i_recv_hdrs) {  		ret = -ENOMEM;  		rdsdebug("ib_dma_alloc_coherent recv failed\n");  		goto out; @@ -324,22 +368,24 @@ static int rds_ib_setup_qp(struct rds_connection *conn)  	ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header),  				       &ic->i_ack_dma, GFP_KERNEL); -	if (ic->i_ack == NULL) { +	if (!ic->i_ack) {  		ret = -ENOMEM;  		rdsdebug("ib_dma_alloc_coherent ack failed\n");  		goto out;  	} -	ic->i_sends = vmalloc(ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work)); -	if (ic->i_sends == NULL) { +	ic->i_sends = vmalloc_node(ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work), +				   ibdev_to_node(dev)); +	if (!ic->i_sends) {  		ret = -ENOMEM;  		rdsdebug("send allocation failed\n");  		goto out;  	}  	memset(ic->i_sends, 0, ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work)); -	ic->i_recvs = vmalloc(ic->i_recv_ring.w_nr * sizeof(struct rds_ib_recv_work)); -	if (ic->i_recvs == NULL) { +	ic->i_recvs = vmalloc_node(ic->i_recv_ring.w_nr * sizeof(struct rds_ib_recv_work), +				   ibdev_to_node(dev)); +	if (!ic->i_recvs) {  		ret = -ENOMEM;  		rdsdebug("recv allocation failed\n");  		goto out; @@ -352,6 +398,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)  		 ic->i_send_cq, ic->i_recv_cq);  out: +	rds_ib_dev_put(rds_ibdev);  	return ret;  } @@ -409,7 +456,7 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,  	struct rds_ib_connection *ic = NULL;  	struct rdma_conn_param conn_param;  	u32 version; -	int err, destroy = 1; +	int err = 1, destroy = 1;  	/* Check whether the remote protocol version matches ours. */  	version = rds_ib_protocol_compatible(event); @@ -448,7 +495,6 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,  			/* Wait and see - our connect may still be succeeding */  			rds_ib_stats_inc(s_ib_connect_raced);  		} -		mutex_unlock(&conn->c_cm_lock);  		goto out;  	} @@ -475,24 +521,23 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,  	err = rds_ib_setup_qp(conn);  	if (err) {  		rds_ib_conn_error(conn, "rds_ib_setup_qp failed (%d)\n", err); -		mutex_unlock(&conn->c_cm_lock);  		goto out;  	} -	rds_ib_cm_fill_conn_param(conn, &conn_param, &dp_rep, version); +	rds_ib_cm_fill_conn_param(conn, &conn_param, &dp_rep, version, +		event->param.conn.responder_resources, +		event->param.conn.initiator_depth);  	/* rdma_accept() calls rdma_reject() internally if it fails */  	err = rdma_accept(cm_id, &conn_param); -	mutex_unlock(&conn->c_cm_lock); -	if (err) { +	if (err)  		rds_ib_conn_error(conn, "rdma_accept failed (%d)\n", err); -		goto out; -	} - -	return 0;  out: -	rdma_reject(cm_id, NULL, 0); +	if (conn) +		mutex_unlock(&conn->c_cm_lock); +	if (err) +		rdma_reject(cm_id, NULL, 0);  	return destroy;  } @@ -516,8 +561,8 @@ int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id)  		goto out;  	} -	rds_ib_cm_fill_conn_param(conn, &conn_param, &dp, RDS_PROTOCOL_VERSION); - +	rds_ib_cm_fill_conn_param(conn, &conn_param, &dp, RDS_PROTOCOL_VERSION, +		UINT_MAX, UINT_MAX);  	ret = rdma_connect(cm_id, &conn_param);  	if (ret)  		rds_ib_conn_error(conn, "rdma_connect failed (%d)\n", ret); @@ -601,9 +646,19 @@ void rds_ib_conn_shutdown(struct rds_connection *conn)  				ic->i_cm_id, err);  		} +		/* +		 * We want to wait for tx and rx completion to finish +		 * before we tear down the connection, but we have to be +		 * careful not to get stuck waiting on a send ring that +		 * only has unsignaled sends in it.  We've shutdown new +		 * sends before getting here so by waiting for signaled +		 * sends to complete we're ensured that there will be no +		 * more tx processing. +		 */  		wait_event(rds_ib_ring_empty_wait, -			rds_ib_ring_empty(&ic->i_send_ring) && -			rds_ib_ring_empty(&ic->i_recv_ring)); +			   rds_ib_ring_empty(&ic->i_recv_ring) && +			   (atomic_read(&ic->i_signaled_sends) == 0)); +		tasklet_kill(&ic->i_recv_tasklet);  		if (ic->i_send_hdrs)  			ib_dma_free_coherent(dev, @@ -654,9 +709,12 @@ void rds_ib_conn_shutdown(struct rds_connection *conn)  	BUG_ON(ic->rds_ibdev);  	/* Clear pending transmit */ -	if (ic->i_rm) { -		rds_message_put(ic->i_rm); -		ic->i_rm = NULL; +	if (ic->i_data_op) { +		struct rds_message *rm; + +		rm = container_of(ic->i_data_op, struct rds_message, data); +		rds_message_put(rm); +		ic->i_data_op = NULL;  	}  	/* Clear the ACK state */ @@ -690,12 +748,19 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp)  {  	struct rds_ib_connection *ic;  	unsigned long flags; +	int ret;  	/* XXX too lazy? */  	ic = kzalloc(sizeof(struct rds_ib_connection), GFP_KERNEL); -	if (ic == NULL) +	if (!ic)  		return -ENOMEM; +	ret = rds_ib_recv_alloc_caches(ic); +	if (ret) { +		kfree(ic); +		return ret; +	} +  	INIT_LIST_HEAD(&ic->ib_node);  	tasklet_init(&ic->i_recv_tasklet, rds_ib_recv_tasklet_fn,  		     (unsigned long) ic); @@ -703,6 +768,7 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp)  #ifndef KERNEL_HAS_ATOMIC64  	spin_lock_init(&ic->i_ack_lock);  #endif +	atomic_set(&ic->i_signaled_sends, 0);  	/*  	 * rds_ib_conn_shutdown() waits for these to be emptied so they @@ -744,6 +810,8 @@ void rds_ib_conn_free(void *arg)  	list_del(&ic->ib_node);  	spin_unlock_irq(lock_ptr); +	rds_ib_recv_free_caches(ic); +  	kfree(ic);  }  |