[go: nahoru, domu]

1/*
2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses.  You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 *     Redistribution and use in source and binary forms, with or
11 *     without modification, are permitted provided that the following
12 *     conditions are met:
13 *
14 *      - Redistributions of source code must retain the above
15 *        copyright notice, this list of conditions and the following
16 *        disclaimer.
17 *
18 *      - Redistributions in binary form must reproduce the above
19 *        copyright notice, this list of conditions and the following
20 *        disclaimer in the documentation and/or other materials
21 *        provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32#include <linux/module.h>
33#include <linux/list.h>
34#include <linux/slab.h>
35#include <linux/workqueue.h>
36#include <linux/skbuff.h>
37#include <linux/timer.h>
38#include <linux/notifier.h>
39#include <linux/inetdevice.h>
40
41#include <net/neighbour.h>
42#include <net/netevent.h>
43#include <net/route.h>
44
45#include "tcb.h"
46#include "cxgb3_offload.h"
47#include "iwch.h"
48#include "iwch_provider.h"
49#include "iwch_cm.h"
50
51static char *states[] = {
52	"idle",
53	"listen",
54	"connecting",
55	"mpa_wait_req",
56	"mpa_req_sent",
57	"mpa_req_rcvd",
58	"mpa_rep_sent",
59	"fpdu_mode",
60	"aborting",
61	"closing",
62	"moribund",
63	"dead",
64	NULL,
65};
66
67int peer2peer = 0;
68module_param(peer2peer, int, 0644);
69MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=0)");
70
71static int ep_timeout_secs = 60;
72module_param(ep_timeout_secs, int, 0644);
73MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout "
74				   "in seconds (default=60)");
75
76static int mpa_rev = 1;
77module_param(mpa_rev, int, 0644);
78MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, "
79		 "1 is spec compliant. (default=1)");
80
81static int markers_enabled = 0;
82module_param(markers_enabled, int, 0644);
83MODULE_PARM_DESC(markers_enabled, "Enable MPA MARKERS (default(0)=disabled)");
84
85static int crc_enabled = 1;
86module_param(crc_enabled, int, 0644);
87MODULE_PARM_DESC(crc_enabled, "Enable MPA CRC (default(1)=enabled)");
88
89static int rcv_win = 256 * 1024;
90module_param(rcv_win, int, 0644);
91MODULE_PARM_DESC(rcv_win, "TCP receive window in bytes (default=256)");
92
93static int snd_win = 32 * 1024;
94module_param(snd_win, int, 0644);
95MODULE_PARM_DESC(snd_win, "TCP send window in bytes (default=32KB)");
96
97static unsigned int nocong = 0;
98module_param(nocong, uint, 0644);
99MODULE_PARM_DESC(nocong, "Turn off congestion control (default=0)");
100
101static unsigned int cong_flavor = 1;
102module_param(cong_flavor, uint, 0644);
103MODULE_PARM_DESC(cong_flavor, "TCP Congestion control flavor (default=1)");
104
105static struct workqueue_struct *workq;
106
107static struct sk_buff_head rxq;
108
109static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp);
110static void ep_timeout(unsigned long arg);
111static void connect_reply_upcall(struct iwch_ep *ep, int status);
112
113static void start_ep_timer(struct iwch_ep *ep)
114{
115	PDBG("%s ep %p\n", __func__, ep);
116	if (timer_pending(&ep->timer)) {
117		PDBG("%s stopped / restarted timer ep %p\n", __func__, ep);
118		del_timer_sync(&ep->timer);
119	} else
120		get_ep(&ep->com);
121	ep->timer.expires = jiffies + ep_timeout_secs * HZ;
122	ep->timer.data = (unsigned long)ep;
123	ep->timer.function = ep_timeout;
124	add_timer(&ep->timer);
125}
126
127static void stop_ep_timer(struct iwch_ep *ep)
128{
129	PDBG("%s ep %p\n", __func__, ep);
130	if (!timer_pending(&ep->timer)) {
131		WARN(1, "%s timer stopped when its not running!  ep %p state %u\n",
132			__func__, ep, ep->com.state);
133		return;
134	}
135	del_timer_sync(&ep->timer);
136	put_ep(&ep->com);
137}
138
139static int iwch_l2t_send(struct t3cdev *tdev, struct sk_buff *skb, struct l2t_entry *l2e)
140{
141	int	error = 0;
142	struct cxio_rdev *rdev;
143
144	rdev = (struct cxio_rdev *)tdev->ulp;
145	if (cxio_fatal_error(rdev)) {
146		kfree_skb(skb);
147		return -EIO;
148	}
149	error = l2t_send(tdev, skb, l2e);
150	if (error < 0)
151		kfree_skb(skb);
152	return error;
153}
154
155int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb)
156{
157	int	error = 0;
158	struct cxio_rdev *rdev;
159
160	rdev = (struct cxio_rdev *)tdev->ulp;
161	if (cxio_fatal_error(rdev)) {
162		kfree_skb(skb);
163		return -EIO;
164	}
165	error = cxgb3_ofld_send(tdev, skb);
166	if (error < 0)
167		kfree_skb(skb);
168	return error;
169}
170
171static void release_tid(struct t3cdev *tdev, u32 hwtid, struct sk_buff *skb)
172{
173	struct cpl_tid_release *req;
174
175	skb = get_skb(skb, sizeof *req, GFP_KERNEL);
176	if (!skb)
177		return;
178	req = (struct cpl_tid_release *) skb_put(skb, sizeof(*req));
179	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
180	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, hwtid));
181	skb->priority = CPL_PRIORITY_SETUP;
182	iwch_cxgb3_ofld_send(tdev, skb);
183	return;
184}
185
186int iwch_quiesce_tid(struct iwch_ep *ep)
187{
188	struct cpl_set_tcb_field *req;
189	struct sk_buff *skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
190
191	if (!skb)
192		return -ENOMEM;
193	req = (struct cpl_set_tcb_field *) skb_put(skb, sizeof(*req));
194	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
195	req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
196	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid));
197	req->reply = 0;
198	req->cpu_idx = 0;
199	req->word = htons(W_TCB_RX_QUIESCE);
200	req->mask = cpu_to_be64(1ULL << S_TCB_RX_QUIESCE);
201	req->val = cpu_to_be64(1 << S_TCB_RX_QUIESCE);
202
203	skb->priority = CPL_PRIORITY_DATA;
204	return iwch_cxgb3_ofld_send(ep->com.tdev, skb);
205}
206
207int iwch_resume_tid(struct iwch_ep *ep)
208{
209	struct cpl_set_tcb_field *req;
210	struct sk_buff *skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
211
212	if (!skb)
213		return -ENOMEM;
214	req = (struct cpl_set_tcb_field *) skb_put(skb, sizeof(*req));
215	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
216	req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
217	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid));
218	req->reply = 0;
219	req->cpu_idx = 0;
220	req->word = htons(W_TCB_RX_QUIESCE);
221	req->mask = cpu_to_be64(1ULL << S_TCB_RX_QUIESCE);
222	req->val = 0;
223
224	skb->priority = CPL_PRIORITY_DATA;
225	return iwch_cxgb3_ofld_send(ep->com.tdev, skb);
226}
227
228static void set_emss(struct iwch_ep *ep, u16 opt)
229{
230	PDBG("%s ep %p opt %u\n", __func__, ep, opt);
231	ep->emss = T3C_DATA(ep->com.tdev)->mtus[G_TCPOPT_MSS(opt)] - 40;
232	if (G_TCPOPT_TSTAMP(opt))
233		ep->emss -= 12;
234	if (ep->emss < 128)
235		ep->emss = 128;
236	PDBG("emss=%d\n", ep->emss);
237}
238
239static enum iwch_ep_state state_read(struct iwch_ep_common *epc)
240{
241	unsigned long flags;
242	enum iwch_ep_state state;
243
244	spin_lock_irqsave(&epc->lock, flags);
245	state = epc->state;
246	spin_unlock_irqrestore(&epc->lock, flags);
247	return state;
248}
249
250static void __state_set(struct iwch_ep_common *epc, enum iwch_ep_state new)
251{
252	epc->state = new;
253}
254
255static void state_set(struct iwch_ep_common *epc, enum iwch_ep_state new)
256{
257	unsigned long flags;
258
259	spin_lock_irqsave(&epc->lock, flags);
260	PDBG("%s - %s -> %s\n", __func__, states[epc->state], states[new]);
261	__state_set(epc, new);
262	spin_unlock_irqrestore(&epc->lock, flags);
263	return;
264}
265
266static void *alloc_ep(int size, gfp_t gfp)
267{
268	struct iwch_ep_common *epc;
269
270	epc = kzalloc(size, gfp);
271	if (epc) {
272		kref_init(&epc->kref);
273		spin_lock_init(&epc->lock);
274		init_waitqueue_head(&epc->waitq);
275	}
276	PDBG("%s alloc ep %p\n", __func__, epc);
277	return epc;
278}
279
280void __free_ep(struct kref *kref)
281{
282	struct iwch_ep *ep;
283	ep = container_of(container_of(kref, struct iwch_ep_common, kref),
284			  struct iwch_ep, com);
285	PDBG("%s ep %p state %s\n", __func__, ep, states[state_read(&ep->com)]);
286	if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) {
287		cxgb3_remove_tid(ep->com.tdev, (void *)ep, ep->hwtid);
288		dst_release(ep->dst);
289		l2t_release(ep->com.tdev, ep->l2t);
290	}
291	kfree(ep);
292}
293
294static void release_ep_resources(struct iwch_ep *ep)
295{
296	PDBG("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
297	set_bit(RELEASE_RESOURCES, &ep->com.flags);
298	put_ep(&ep->com);
299}
300
301static int status2errno(int status)
302{
303	switch (status) {
304	case CPL_ERR_NONE:
305		return 0;
306	case CPL_ERR_CONN_RESET:
307		return -ECONNRESET;
308	case CPL_ERR_ARP_MISS:
309		return -EHOSTUNREACH;
310	case CPL_ERR_CONN_TIMEDOUT:
311		return -ETIMEDOUT;
312	case CPL_ERR_TCAM_FULL:
313		return -ENOMEM;
314	case CPL_ERR_CONN_EXIST:
315		return -EADDRINUSE;
316	default:
317		return -EIO;
318	}
319}
320
321/*
322 * Try and reuse skbs already allocated...
323 */
324static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp)
325{
326	if (skb && !skb_is_nonlinear(skb) && !skb_cloned(skb)) {
327		skb_trim(skb, 0);
328		skb_get(skb);
329	} else {
330		skb = alloc_skb(len, gfp);
331	}
332	return skb;
333}
334
335static struct rtable *find_route(struct t3cdev *dev, __be32 local_ip,
336				 __be32 peer_ip, __be16 local_port,
337				 __be16 peer_port, u8 tos)
338{
339	struct rtable *rt;
340	struct flowi4 fl4;
341
342	rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, local_ip,
343				   peer_port, local_port, IPPROTO_TCP,
344				   tos, 0);
345	if (IS_ERR(rt))
346		return NULL;
347	return rt;
348}
349
350static unsigned int find_best_mtu(const struct t3c_data *d, unsigned short mtu)
351{
352	int i = 0;
353
354	while (i < d->nmtus - 1 && d->mtus[i + 1] <= mtu)
355		++i;
356	return i;
357}
358
359static void arp_failure_discard(struct t3cdev *dev, struct sk_buff *skb)
360{
361	PDBG("%s t3cdev %p\n", __func__, dev);
362	kfree_skb(skb);
363}
364
365/*
366 * Handle an ARP failure for an active open.
367 */
368static void act_open_req_arp_failure(struct t3cdev *dev, struct sk_buff *skb)
369{
370	printk(KERN_ERR MOD "ARP failure duing connect\n");
371	kfree_skb(skb);
372}
373
374/*
375 * Handle an ARP failure for a CPL_ABORT_REQ.  Change it into a no RST variant
376 * and send it along.
377 */
378static void abort_arp_failure(struct t3cdev *dev, struct sk_buff *skb)
379{
380	struct cpl_abort_req *req = cplhdr(skb);
381
382	PDBG("%s t3cdev %p\n", __func__, dev);
383	req->cmd = CPL_ABORT_NO_RST;
384	iwch_cxgb3_ofld_send(dev, skb);
385}
386
387static int send_halfclose(struct iwch_ep *ep, gfp_t gfp)
388{
389	struct cpl_close_con_req *req;
390	struct sk_buff *skb;
391
392	PDBG("%s ep %p\n", __func__, ep);
393	skb = get_skb(NULL, sizeof(*req), gfp);
394	if (!skb) {
395		printk(KERN_ERR MOD "%s - failed to alloc skb\n", __func__);
396		return -ENOMEM;
397	}
398	skb->priority = CPL_PRIORITY_DATA;
399	set_arp_failure_handler(skb, arp_failure_discard);
400	req = (struct cpl_close_con_req *) skb_put(skb, sizeof(*req));
401	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_CLOSE_CON));
402	req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
403	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, ep->hwtid));
404	return iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
405}
406
407static int send_abort(struct iwch_ep *ep, struct sk_buff *skb, gfp_t gfp)
408{
409	struct cpl_abort_req *req;
410
411	PDBG("%s ep %p\n", __func__, ep);
412	skb = get_skb(skb, sizeof(*req), gfp);
413	if (!skb) {
414		printk(KERN_ERR MOD "%s - failed to alloc skb.\n",
415		       __func__);
416		return -ENOMEM;
417	}
418	skb->priority = CPL_PRIORITY_DATA;
419	set_arp_failure_handler(skb, abort_arp_failure);
420	req = (struct cpl_abort_req *) skb_put(skb, sizeof(*req));
421	memset(req, 0, sizeof(*req));
422	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ));
423	req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
424	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid));
425	req->cmd = CPL_ABORT_SEND_RST;
426	return iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
427}
428
429static int send_connect(struct iwch_ep *ep)
430{
431	struct cpl_act_open_req *req;
432	struct sk_buff *skb;
433	u32 opt0h, opt0l, opt2;
434	unsigned int mtu_idx;
435	int wscale;
436
437	PDBG("%s ep %p\n", __func__, ep);
438
439	skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
440	if (!skb) {
441		printk(KERN_ERR MOD "%s - failed to alloc skb.\n",
442		       __func__);
443		return -ENOMEM;
444	}
445	mtu_idx = find_best_mtu(T3C_DATA(ep->com.tdev), dst_mtu(ep->dst));
446	wscale = compute_wscale(rcv_win);
447	opt0h = V_NAGLE(0) |
448	    V_NO_CONG(nocong) |
449	    V_KEEP_ALIVE(1) |
450	    F_TCAM_BYPASS |
451	    V_WND_SCALE(wscale) |
452	    V_MSS_IDX(mtu_idx) |
453	    V_L2T_IDX(ep->l2t->idx) | V_TX_CHANNEL(ep->l2t->smt_idx);
454	opt0l = V_TOS((ep->tos >> 2) & M_TOS) | V_RCV_BUFSIZ(rcv_win>>10);
455	opt2 = F_RX_COALESCE_VALID | V_RX_COALESCE(0) | V_FLAVORS_VALID(1) |
456	       V_CONG_CONTROL_FLAVOR(cong_flavor);
457	skb->priority = CPL_PRIORITY_SETUP;
458	set_arp_failure_handler(skb, act_open_req_arp_failure);
459
460	req = (struct cpl_act_open_req *) skb_put(skb, sizeof(*req));
461	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
462	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, ep->atid));
463	req->local_port = ep->com.local_addr.sin_port;
464	req->peer_port = ep->com.remote_addr.sin_port;
465	req->local_ip = ep->com.local_addr.sin_addr.s_addr;
466	req->peer_ip = ep->com.remote_addr.sin_addr.s_addr;
467	req->opt0h = htonl(opt0h);
468	req->opt0l = htonl(opt0l);
469	req->params = 0;
470	req->opt2 = htonl(opt2);
471	return iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
472}
473
474static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb)
475{
476	int mpalen;
477	struct tx_data_wr *req;
478	struct mpa_message *mpa;
479	int len;
480
481	PDBG("%s ep %p pd_len %d\n", __func__, ep, ep->plen);
482
483	BUG_ON(skb_cloned(skb));
484
485	mpalen = sizeof(*mpa) + ep->plen;
486	if (skb->data + mpalen + sizeof(*req) > skb_end_pointer(skb)) {
487		kfree_skb(skb);
488		skb=alloc_skb(mpalen + sizeof(*req), GFP_KERNEL);
489		if (!skb) {
490			connect_reply_upcall(ep, -ENOMEM);
491			return;
492		}
493	}
494	skb_trim(skb, 0);
495	skb_reserve(skb, sizeof(*req));
496	skb_put(skb, mpalen);
497	skb->priority = CPL_PRIORITY_DATA;
498	mpa = (struct mpa_message *) skb->data;
499	memset(mpa, 0, sizeof(*mpa));
500	memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key));
501	mpa->flags = (crc_enabled ? MPA_CRC : 0) |
502		     (markers_enabled ? MPA_MARKERS : 0);
503	mpa->private_data_size = htons(ep->plen);
504	mpa->revision = mpa_rev;
505
506	if (ep->plen)
507		memcpy(mpa->private_data, ep->mpa_pkt + sizeof(*mpa), ep->plen);
508
509	/*
510	 * Reference the mpa skb.  This ensures the data area
511	 * will remain in memory until the hw acks the tx.
512	 * Function tx_ack() will deref it.
513	 */
514	skb_get(skb);
515	set_arp_failure_handler(skb, arp_failure_discard);
516	skb_reset_transport_header(skb);
517	len = skb->len;
518	req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
519	req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL);
520	req->wr_lo = htonl(V_WR_TID(ep->hwtid));
521	req->len = htonl(len);
522	req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
523			   V_TX_SNDBUF(snd_win>>15));
524	req->flags = htonl(F_TX_INIT);
525	req->sndseq = htonl(ep->snd_seq);
526	BUG_ON(ep->mpa_skb);
527	ep->mpa_skb = skb;
528	iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
529	start_ep_timer(ep);
530	state_set(&ep->com, MPA_REQ_SENT);
531	return;
532}
533
534static int send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen)
535{
536	int mpalen;
537	struct tx_data_wr *req;
538	struct mpa_message *mpa;
539	struct sk_buff *skb;
540
541	PDBG("%s ep %p plen %d\n", __func__, ep, plen);
542
543	mpalen = sizeof(*mpa) + plen;
544
545	skb = get_skb(NULL, mpalen + sizeof(*req), GFP_KERNEL);
546	if (!skb) {
547		printk(KERN_ERR MOD "%s - cannot alloc skb!\n", __func__);
548		return -ENOMEM;
549	}
550	skb_reserve(skb, sizeof(*req));
551	mpa = (struct mpa_message *) skb_put(skb, mpalen);
552	memset(mpa, 0, sizeof(*mpa));
553	memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
554	mpa->flags = MPA_REJECT;
555	mpa->revision = mpa_rev;
556	mpa->private_data_size = htons(plen);
557	if (plen)
558		memcpy(mpa->private_data, pdata, plen);
559
560	/*
561	 * Reference the mpa skb again.  This ensures the data area
562	 * will remain in memory until the hw acks the tx.
563	 * Function tx_ack() will deref it.
564	 */
565	skb_get(skb);
566	skb->priority = CPL_PRIORITY_DATA;
567	set_arp_failure_handler(skb, arp_failure_discard);
568	skb_reset_transport_header(skb);
569	req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
570	req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL);
571	req->wr_lo = htonl(V_WR_TID(ep->hwtid));
572	req->len = htonl(mpalen);
573	req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
574			   V_TX_SNDBUF(snd_win>>15));
575	req->flags = htonl(F_TX_INIT);
576	req->sndseq = htonl(ep->snd_seq);
577	BUG_ON(ep->mpa_skb);
578	ep->mpa_skb = skb;
579	return iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
580}
581
582static int send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen)
583{
584	int mpalen;
585	struct tx_data_wr *req;
586	struct mpa_message *mpa;
587	int len;
588	struct sk_buff *skb;
589
590	PDBG("%s ep %p plen %d\n", __func__, ep, plen);
591
592	mpalen = sizeof(*mpa) + plen;
593
594	skb = get_skb(NULL, mpalen + sizeof(*req), GFP_KERNEL);
595	if (!skb) {
596		printk(KERN_ERR MOD "%s - cannot alloc skb!\n", __func__);
597		return -ENOMEM;
598	}
599	skb->priority = CPL_PRIORITY_DATA;
600	skb_reserve(skb, sizeof(*req));
601	mpa = (struct mpa_message *) skb_put(skb, mpalen);
602	memset(mpa, 0, sizeof(*mpa));
603	memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
604	mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) |
605		     (markers_enabled ? MPA_MARKERS : 0);
606	mpa->revision = mpa_rev;
607	mpa->private_data_size = htons(plen);
608	if (plen)
609		memcpy(mpa->private_data, pdata, plen);
610
611	/*
612	 * Reference the mpa skb.  This ensures the data area
613	 * will remain in memory until the hw acks the tx.
614	 * Function tx_ack() will deref it.
615	 */
616	skb_get(skb);
617	set_arp_failure_handler(skb, arp_failure_discard);
618	skb_reset_transport_header(skb);
619	len = skb->len;
620	req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
621	req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL);
622	req->wr_lo = htonl(V_WR_TID(ep->hwtid));
623	req->len = htonl(len);
624	req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
625			   V_TX_SNDBUF(snd_win>>15));
626	req->flags = htonl(F_TX_INIT);
627	req->sndseq = htonl(ep->snd_seq);
628	ep->mpa_skb = skb;
629	state_set(&ep->com, MPA_REP_SENT);
630	return iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
631}
632
633static int act_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
634{
635	struct iwch_ep *ep = ctx;
636	struct cpl_act_establish *req = cplhdr(skb);
637	unsigned int tid = GET_TID(req);
638
639	PDBG("%s ep %p tid %d\n", __func__, ep, tid);
640
641	dst_confirm(ep->dst);
642
643	/* setup the hwtid for this connection */
644	ep->hwtid = tid;
645	cxgb3_insert_tid(ep->com.tdev, &t3c_client, ep, tid);
646
647	ep->snd_seq = ntohl(req->snd_isn);
648	ep->rcv_seq = ntohl(req->rcv_isn);
649
650	set_emss(ep, ntohs(req->tcp_opt));
651
652	/* dealloc the atid */
653	cxgb3_free_atid(ep->com.tdev, ep->atid);
654
655	/* start MPA negotiation */
656	send_mpa_req(ep, skb);
657
658	return 0;
659}
660
661static void abort_connection(struct iwch_ep *ep, struct sk_buff *skb, gfp_t gfp)
662{
663	PDBG("%s ep %p\n", __FILE__, ep);
664	state_set(&ep->com, ABORTING);
665	send_abort(ep, skb, gfp);
666}
667
668static void close_complete_upcall(struct iwch_ep *ep)
669{
670	struct iw_cm_event event;
671
672	PDBG("%s ep %p\n", __func__, ep);
673	memset(&event, 0, sizeof(event));
674	event.event = IW_CM_EVENT_CLOSE;
675	if (ep->com.cm_id) {
676		PDBG("close complete delivered ep %p cm_id %p tid %d\n",
677		     ep, ep->com.cm_id, ep->hwtid);
678		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
679		ep->com.cm_id->rem_ref(ep->com.cm_id);
680		ep->com.cm_id = NULL;
681		ep->com.qp = NULL;
682	}
683}
684
685static void peer_close_upcall(struct iwch_ep *ep)
686{
687	struct iw_cm_event event;
688
689	PDBG("%s ep %p\n", __func__, ep);
690	memset(&event, 0, sizeof(event));
691	event.event = IW_CM_EVENT_DISCONNECT;
692	if (ep->com.cm_id) {
693		PDBG("peer close delivered ep %p cm_id %p tid %d\n",
694		     ep, ep->com.cm_id, ep->hwtid);
695		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
696	}
697}
698
699static void peer_abort_upcall(struct iwch_ep *ep)
700{
701	struct iw_cm_event event;
702
703	PDBG("%s ep %p\n", __func__, ep);
704	memset(&event, 0, sizeof(event));
705	event.event = IW_CM_EVENT_CLOSE;
706	event.status = -ECONNRESET;
707	if (ep->com.cm_id) {
708		PDBG("abort delivered ep %p cm_id %p tid %d\n", ep,
709		     ep->com.cm_id, ep->hwtid);
710		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
711		ep->com.cm_id->rem_ref(ep->com.cm_id);
712		ep->com.cm_id = NULL;
713		ep->com.qp = NULL;
714	}
715}
716
717static void connect_reply_upcall(struct iwch_ep *ep, int status)
718{
719	struct iw_cm_event event;
720
721	PDBG("%s ep %p status %d\n", __func__, ep, status);
722	memset(&event, 0, sizeof(event));
723	event.event = IW_CM_EVENT_CONNECT_REPLY;
724	event.status = status;
725	memcpy(&event.local_addr, &ep->com.local_addr,
726	       sizeof(ep->com.local_addr));
727	memcpy(&event.remote_addr, &ep->com.remote_addr,
728	       sizeof(ep->com.remote_addr));
729
730	if ((status == 0) || (status == -ECONNREFUSED)) {
731		event.private_data_len = ep->plen;
732		event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
733	}
734	if (ep->com.cm_id) {
735		PDBG("%s ep %p tid %d status %d\n", __func__, ep,
736		     ep->hwtid, status);
737		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
738	}
739	if (status < 0) {
740		ep->com.cm_id->rem_ref(ep->com.cm_id);
741		ep->com.cm_id = NULL;
742		ep->com.qp = NULL;
743	}
744}
745
746static void connect_request_upcall(struct iwch_ep *ep)
747{
748	struct iw_cm_event event;
749
750	PDBG("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
751	memset(&event, 0, sizeof(event));
752	event.event = IW_CM_EVENT_CONNECT_REQUEST;
753	memcpy(&event.local_addr, &ep->com.local_addr,
754	       sizeof(ep->com.local_addr));
755	memcpy(&event.remote_addr, &ep->com.remote_addr,
756	       sizeof(ep->com.local_addr));
757	event.private_data_len = ep->plen;
758	event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
759	event.provider_data = ep;
760	/*
761	 * Until ird/ord negotiation via MPAv2 support is added, send max
762	 * supported values
763	 */
764	event.ird = event.ord = 8;
765	if (state_read(&ep->parent_ep->com) != DEAD) {
766		get_ep(&ep->com);
767		ep->parent_ep->com.cm_id->event_handler(
768						ep->parent_ep->com.cm_id,
769						&event);
770	}
771	put_ep(&ep->parent_ep->com);
772	ep->parent_ep = NULL;
773}
774
775static void established_upcall(struct iwch_ep *ep)
776{
777	struct iw_cm_event event;
778
779	PDBG("%s ep %p\n", __func__, ep);
780	memset(&event, 0, sizeof(event));
781	event.event = IW_CM_EVENT_ESTABLISHED;
782	/*
783	 * Until ird/ord negotiation via MPAv2 support is added, send max
784	 * supported values
785	 */
786	event.ird = event.ord = 8;
787	if (ep->com.cm_id) {
788		PDBG("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
789		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
790	}
791}
792
793static int update_rx_credits(struct iwch_ep *ep, u32 credits)
794{
795	struct cpl_rx_data_ack *req;
796	struct sk_buff *skb;
797
798	PDBG("%s ep %p credits %u\n", __func__, ep, credits);
799	skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
800	if (!skb) {
801		printk(KERN_ERR MOD "update_rx_credits - cannot alloc skb!\n");
802		return 0;
803	}
804
805	req = (struct cpl_rx_data_ack *) skb_put(skb, sizeof(*req));
806	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
807	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, ep->hwtid));
808	req->credit_dack = htonl(V_RX_CREDITS(credits) | V_RX_FORCE_ACK(1));
809	skb->priority = CPL_PRIORITY_ACK;
810	iwch_cxgb3_ofld_send(ep->com.tdev, skb);
811	return credits;
812}
813
814static void process_mpa_reply(struct iwch_ep *ep, struct sk_buff *skb)
815{
816	struct mpa_message *mpa;
817	u16 plen;
818	struct iwch_qp_attributes attrs;
819	enum iwch_qp_attr_mask mask;
820	int err;
821
822	PDBG("%s ep %p\n", __func__, ep);
823
824	/*
825	 * Stop mpa timer.  If it expired, then the state has
826	 * changed and we bail since ep_timeout already aborted
827	 * the connection.
828	 */
829	stop_ep_timer(ep);
830	if (state_read(&ep->com) != MPA_REQ_SENT)
831		return;
832
833	/*
834	 * If we get more than the supported amount of private data
835	 * then we must fail this connection.
836	 */
837	if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) {
838		err = -EINVAL;
839		goto err;
840	}
841
842	/*
843	 * copy the new data into our accumulation buffer.
844	 */
845	skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
846				  skb->len);
847	ep->mpa_pkt_len += skb->len;
848
849	/*
850	 * if we don't even have the mpa message, then bail.
851	 */
852	if (ep->mpa_pkt_len < sizeof(*mpa))
853		return;
854	mpa = (struct mpa_message *) ep->mpa_pkt;
855
856	/* Validate MPA header. */
857	if (mpa->revision != mpa_rev) {
858		err = -EPROTO;
859		goto err;
860	}
861	if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) {
862		err = -EPROTO;
863		goto err;
864	}
865
866	plen = ntohs(mpa->private_data_size);
867
868	/*
869	 * Fail if there's too much private data.
870	 */
871	if (plen > MPA_MAX_PRIVATE_DATA) {
872		err = -EPROTO;
873		goto err;
874	}
875
876	/*
877	 * If plen does not account for pkt size
878	 */
879	if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
880		err = -EPROTO;
881		goto err;
882	}
883
884	ep->plen = (u8) plen;
885
886	/*
887	 * If we don't have all the pdata yet, then bail.
888	 * We'll continue process when more data arrives.
889	 */
890	if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
891		return;
892
893	if (mpa->flags & MPA_REJECT) {
894		err = -ECONNREFUSED;
895		goto err;
896	}
897
898	/*
899	 * If we get here we have accumulated the entire mpa
900	 * start reply message including private data. And
901	 * the MPA header is valid.
902	 */
903	state_set(&ep->com, FPDU_MODE);
904	ep->mpa_attr.initiator = 1;
905	ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
906	ep->mpa_attr.recv_marker_enabled = markers_enabled;
907	ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
908	ep->mpa_attr.version = mpa_rev;
909	PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, "
910	     "xmit_marker_enabled=%d, version=%d\n", __func__,
911	     ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
912	     ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
913
914	attrs.mpa_attr = ep->mpa_attr;
915	attrs.max_ird = ep->ird;
916	attrs.max_ord = ep->ord;
917	attrs.llp_stream_handle = ep;
918	attrs.next_state = IWCH_QP_STATE_RTS;
919
920	mask = IWCH_QP_ATTR_NEXT_STATE |
921	    IWCH_QP_ATTR_LLP_STREAM_HANDLE | IWCH_QP_ATTR_MPA_ATTR |
922	    IWCH_QP_ATTR_MAX_IRD | IWCH_QP_ATTR_MAX_ORD;
923
924	/* bind QP and TID with INIT_WR */
925	err = iwch_modify_qp(ep->com.qp->rhp,
926			     ep->com.qp, mask, &attrs, 1);
927	if (err)
928		goto err;
929
930	if (peer2peer && iwch_rqes_posted(ep->com.qp) == 0) {
931		iwch_post_zb_read(ep);
932	}
933
934	goto out;
935err:
936	abort_connection(ep, skb, GFP_KERNEL);
937out:
938	connect_reply_upcall(ep, err);
939	return;
940}
941
942static void process_mpa_request(struct iwch_ep *ep, struct sk_buff *skb)
943{
944	struct mpa_message *mpa;
945	u16 plen;
946
947	PDBG("%s ep %p\n", __func__, ep);
948
949	/*
950	 * Stop mpa timer.  If it expired, then the state has
951	 * changed and we bail since ep_timeout already aborted
952	 * the connection.
953	 */
954	stop_ep_timer(ep);
955	if (state_read(&ep->com) != MPA_REQ_WAIT)
956		return;
957
958	/*
959	 * If we get more than the supported amount of private data
960	 * then we must fail this connection.
961	 */
962	if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) {
963		abort_connection(ep, skb, GFP_KERNEL);
964		return;
965	}
966
967	PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
968
969	/*
970	 * Copy the new data into our accumulation buffer.
971	 */
972	skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
973				  skb->len);
974	ep->mpa_pkt_len += skb->len;
975
976	/*
977	 * If we don't even have the mpa message, then bail.
978	 * We'll continue process when more data arrives.
979	 */
980	if (ep->mpa_pkt_len < sizeof(*mpa))
981		return;
982	PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
983	mpa = (struct mpa_message *) ep->mpa_pkt;
984
985	/*
986	 * Validate MPA Header.
987	 */
988	if (mpa->revision != mpa_rev) {
989		abort_connection(ep, skb, GFP_KERNEL);
990		return;
991	}
992
993	if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) {
994		abort_connection(ep, skb, GFP_KERNEL);
995		return;
996	}
997
998	plen = ntohs(mpa->private_data_size);
999
1000	/*
1001	 * Fail if there's too much private data.
1002	 */
1003	if (plen > MPA_MAX_PRIVATE_DATA) {
1004		abort_connection(ep, skb, GFP_KERNEL);
1005		return;
1006	}
1007
1008	/*
1009	 * If plen does not account for pkt size
1010	 */
1011	if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
1012		abort_connection(ep, skb, GFP_KERNEL);
1013		return;
1014	}
1015	ep->plen = (u8) plen;
1016
1017	/*
1018	 * If we don't have all the pdata yet, then bail.
1019	 */
1020	if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
1021		return;
1022
1023	/*
1024	 * If we get here we have accumulated the entire mpa
1025	 * start reply message including private data.
1026	 */
1027	ep->mpa_attr.initiator = 0;
1028	ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
1029	ep->mpa_attr.recv_marker_enabled = markers_enabled;
1030	ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
1031	ep->mpa_attr.version = mpa_rev;
1032	PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, "
1033	     "xmit_marker_enabled=%d, version=%d\n", __func__,
1034	     ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
1035	     ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
1036
1037	state_set(&ep->com, MPA_REQ_RCVD);
1038
1039	/* drive upcall */
1040	connect_request_upcall(ep);
1041	return;
1042}
1043
1044static int rx_data(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1045{
1046	struct iwch_ep *ep = ctx;
1047	struct cpl_rx_data *hdr = cplhdr(skb);
1048	unsigned int dlen = ntohs(hdr->len);
1049
1050	PDBG("%s ep %p dlen %u\n", __func__, ep, dlen);
1051
1052	skb_pull(skb, sizeof(*hdr));
1053	skb_trim(skb, dlen);
1054
1055	ep->rcv_seq += dlen;
1056	BUG_ON(ep->rcv_seq != (ntohl(hdr->seq) + dlen));
1057
1058	switch (state_read(&ep->com)) {
1059	case MPA_REQ_SENT:
1060		process_mpa_reply(ep, skb);
1061		break;
1062	case MPA_REQ_WAIT:
1063		process_mpa_request(ep, skb);
1064		break;
1065	case MPA_REP_SENT:
1066		break;
1067	default:
1068		printk(KERN_ERR MOD "%s Unexpected streaming data."
1069		       " ep %p state %d tid %d\n",
1070		       __func__, ep, state_read(&ep->com), ep->hwtid);
1071
1072		/*
1073		 * The ep will timeout and inform the ULP of the failure.
1074		 * See ep_timeout().
1075		 */
1076		break;
1077	}
1078
1079	/* update RX credits */
1080	update_rx_credits(ep, dlen);
1081
1082	return CPL_RET_BUF_DONE;
1083}
1084
1085/*
1086 * Upcall from the adapter indicating data has been transmitted.
1087 * For us its just the single MPA request or reply.  We can now free
1088 * the skb holding the mpa message.
1089 */
1090static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1091{
1092	struct iwch_ep *ep = ctx;
1093	struct cpl_wr_ack *hdr = cplhdr(skb);
1094	unsigned int credits = ntohs(hdr->credits);
1095	unsigned long flags;
1096	int post_zb = 0;
1097
1098	PDBG("%s ep %p credits %u\n", __func__, ep, credits);
1099
1100	if (credits == 0) {
1101		PDBG("%s 0 credit ack  ep %p state %u\n",
1102		     __func__, ep, state_read(&ep->com));
1103		return CPL_RET_BUF_DONE;
1104	}
1105
1106	spin_lock_irqsave(&ep->com.lock, flags);
1107	BUG_ON(credits != 1);
1108	dst_confirm(ep->dst);
1109	if (!ep->mpa_skb) {
1110		PDBG("%s rdma_init wr_ack ep %p state %u\n",
1111			__func__, ep, ep->com.state);
1112		if (ep->mpa_attr.initiator) {
1113			PDBG("%s initiator ep %p state %u\n",
1114				__func__, ep, ep->com.state);
1115			if (peer2peer && ep->com.state == FPDU_MODE)
1116				post_zb = 1;
1117		} else {
1118			PDBG("%s responder ep %p state %u\n",
1119				__func__, ep, ep->com.state);
1120			if (ep->com.state == MPA_REQ_RCVD) {
1121				ep->com.rpl_done = 1;
1122				wake_up(&ep->com.waitq);
1123			}
1124		}
1125	} else {
1126		PDBG("%s lsm ack ep %p state %u freeing skb\n",
1127			__func__, ep, ep->com.state);
1128		kfree_skb(ep->mpa_skb);
1129		ep->mpa_skb = NULL;
1130	}
1131	spin_unlock_irqrestore(&ep->com.lock, flags);
1132	if (post_zb)
1133		iwch_post_zb_read(ep);
1134	return CPL_RET_BUF_DONE;
1135}
1136
1137static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1138{
1139	struct iwch_ep *ep = ctx;
1140	unsigned long flags;
1141	int release = 0;
1142
1143	PDBG("%s ep %p\n", __func__, ep);
1144	BUG_ON(!ep);
1145
1146	/*
1147	 * We get 2 abort replies from the HW.  The first one must
1148	 * be ignored except for scribbling that we need one more.
1149	 */
1150	if (!test_and_set_bit(ABORT_REQ_IN_PROGRESS, &ep->com.flags)) {
1151		return CPL_RET_BUF_DONE;
1152	}
1153
1154	spin_lock_irqsave(&ep->com.lock, flags);
1155	switch (ep->com.state) {
1156	case ABORTING:
1157		close_complete_upcall(ep);
1158		__state_set(&ep->com, DEAD);
1159		release = 1;
1160		break;
1161	default:
1162		printk(KERN_ERR "%s ep %p state %d\n",
1163		     __func__, ep, ep->com.state);
1164		break;
1165	}
1166	spin_unlock_irqrestore(&ep->com.lock, flags);
1167
1168	if (release)
1169		release_ep_resources(ep);
1170	return CPL_RET_BUF_DONE;
1171}
1172
1173/*
1174 * Return whether a failed active open has allocated a TID
1175 */
1176static inline int act_open_has_tid(int status)
1177{
1178	return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST &&
1179	       status != CPL_ERR_ARP_MISS;
1180}
1181
1182static int act_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1183{
1184	struct iwch_ep *ep = ctx;
1185	struct cpl_act_open_rpl *rpl = cplhdr(skb);
1186
1187	PDBG("%s ep %p status %u errno %d\n", __func__, ep, rpl->status,
1188	     status2errno(rpl->status));
1189	connect_reply_upcall(ep, status2errno(rpl->status));
1190	state_set(&ep->com, DEAD);
1191	if (ep->com.tdev->type != T3A && act_open_has_tid(rpl->status))
1192		release_tid(ep->com.tdev, GET_TID(rpl), NULL);
1193	cxgb3_free_atid(ep->com.tdev, ep->atid);
1194	dst_release(ep->dst);
1195	l2t_release(ep->com.tdev, ep->l2t);
1196	put_ep(&ep->com);
1197	return CPL_RET_BUF_DONE;
1198}
1199
1200static int listen_start(struct iwch_listen_ep *ep)
1201{
1202	struct sk_buff *skb;
1203	struct cpl_pass_open_req *req;
1204
1205	PDBG("%s ep %p\n", __func__, ep);
1206	skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
1207	if (!skb) {
1208		printk(KERN_ERR MOD "t3c_listen_start failed to alloc skb!\n");
1209		return -ENOMEM;
1210	}
1211
1212	req = (struct cpl_pass_open_req *) skb_put(skb, sizeof(*req));
1213	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1214	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, ep->stid));
1215	req->local_port = ep->com.local_addr.sin_port;
1216	req->local_ip = ep->com.local_addr.sin_addr.s_addr;
1217	req->peer_port = 0;
1218	req->peer_ip = 0;
1219	req->peer_netmask = 0;
1220	req->opt0h = htonl(F_DELACK | F_TCAM_BYPASS);
1221	req->opt0l = htonl(V_RCV_BUFSIZ(rcv_win>>10));
1222	req->opt1 = htonl(V_CONN_POLICY(CPL_CONN_POLICY_ASK));
1223
1224	skb->priority = 1;
1225	return iwch_cxgb3_ofld_send(ep->com.tdev, skb);
1226}
1227
1228static int pass_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1229{
1230	struct iwch_listen_ep *ep = ctx;
1231	struct cpl_pass_open_rpl *rpl = cplhdr(skb);
1232
1233	PDBG("%s ep %p status %d error %d\n", __func__, ep,
1234	     rpl->status, status2errno(rpl->status));
1235	ep->com.rpl_err = status2errno(rpl->status);
1236	ep->com.rpl_done = 1;
1237	wake_up(&ep->com.waitq);
1238
1239	return CPL_RET_BUF_DONE;
1240}
1241
1242static int listen_stop(struct iwch_listen_ep *ep)
1243{
1244	struct sk_buff *skb;
1245	struct cpl_close_listserv_req *req;
1246
1247	PDBG("%s ep %p\n", __func__, ep);
1248	skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
1249	if (!skb) {
1250		printk(KERN_ERR MOD "%s - failed to alloc skb\n", __func__);
1251		return -ENOMEM;
1252	}
1253	req = (struct cpl_close_listserv_req *) skb_put(skb, sizeof(*req));
1254	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1255	req->cpu_idx = 0;
1256	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ, ep->stid));
1257	skb->priority = 1;
1258	return iwch_cxgb3_ofld_send(ep->com.tdev, skb);
1259}
1260
1261static int close_listsrv_rpl(struct t3cdev *tdev, struct sk_buff *skb,
1262			     void *ctx)
1263{
1264	struct iwch_listen_ep *ep = ctx;
1265	struct cpl_close_listserv_rpl *rpl = cplhdr(skb);
1266
1267	PDBG("%s ep %p\n", __func__, ep);
1268	ep->com.rpl_err = status2errno(rpl->status);
1269	ep->com.rpl_done = 1;
1270	wake_up(&ep->com.waitq);
1271	return CPL_RET_BUF_DONE;
1272}
1273
1274static void accept_cr(struct iwch_ep *ep, __be32 peer_ip, struct sk_buff *skb)
1275{
1276	struct cpl_pass_accept_rpl *rpl;
1277	unsigned int mtu_idx;
1278	u32 opt0h, opt0l, opt2;
1279	int wscale;
1280
1281	PDBG("%s ep %p\n", __func__, ep);
1282	BUG_ON(skb_cloned(skb));
1283	skb_trim(skb, sizeof(*rpl));
1284	skb_get(skb);
1285	mtu_idx = find_best_mtu(T3C_DATA(ep->com.tdev), dst_mtu(ep->dst));
1286	wscale = compute_wscale(rcv_win);
1287	opt0h = V_NAGLE(0) |
1288	    V_NO_CONG(nocong) |
1289	    V_KEEP_ALIVE(1) |
1290	    F_TCAM_BYPASS |
1291	    V_WND_SCALE(wscale) |
1292	    V_MSS_IDX(mtu_idx) |
1293	    V_L2T_IDX(ep->l2t->idx) | V_TX_CHANNEL(ep->l2t->smt_idx);
1294	opt0l = V_TOS((ep->tos >> 2) & M_TOS) | V_RCV_BUFSIZ(rcv_win>>10);
1295	opt2 = F_RX_COALESCE_VALID | V_RX_COALESCE(0) | V_FLAVORS_VALID(1) |
1296	       V_CONG_CONTROL_FLAVOR(cong_flavor);
1297
1298	rpl = cplhdr(skb);
1299	rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1300	OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, ep->hwtid));
1301	rpl->peer_ip = peer_ip;
1302	rpl->opt0h = htonl(opt0h);
1303	rpl->opt0l_status = htonl(opt0l | CPL_PASS_OPEN_ACCEPT);
1304	rpl->opt2 = htonl(opt2);
1305	rpl->rsvd = rpl->opt2;	/* workaround for HW bug */
1306	skb->priority = CPL_PRIORITY_SETUP;
1307	iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
1308
1309	return;
1310}
1311
1312static void reject_cr(struct t3cdev *tdev, u32 hwtid, __be32 peer_ip,
1313		      struct sk_buff *skb)
1314{
1315	PDBG("%s t3cdev %p tid %u peer_ip %x\n", __func__, tdev, hwtid,
1316	     peer_ip);
1317	BUG_ON(skb_cloned(skb));
1318	skb_trim(skb, sizeof(struct cpl_tid_release));
1319	skb_get(skb);
1320
1321	if (tdev->type != T3A)
1322		release_tid(tdev, hwtid, skb);
1323	else {
1324		struct cpl_pass_accept_rpl *rpl;
1325
1326		rpl = cplhdr(skb);
1327		skb->priority = CPL_PRIORITY_SETUP;
1328		rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1329		OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
1330						      hwtid));
1331		rpl->peer_ip = peer_ip;
1332		rpl->opt0h = htonl(F_TCAM_BYPASS);
1333		rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT);
1334		rpl->opt2 = 0;
1335		rpl->rsvd = rpl->opt2;
1336		iwch_cxgb3_ofld_send(tdev, skb);
1337	}
1338}
1339
1340static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1341{
1342	struct iwch_ep *child_ep, *parent_ep = ctx;
1343	struct cpl_pass_accept_req *req = cplhdr(skb);
1344	unsigned int hwtid = GET_TID(req);
1345	struct dst_entry *dst;
1346	struct l2t_entry *l2t;
1347	struct rtable *rt;
1348	struct iff_mac tim;
1349
1350	PDBG("%s parent ep %p tid %u\n", __func__, parent_ep, hwtid);
1351
1352	if (state_read(&parent_ep->com) != LISTEN) {
1353		printk(KERN_ERR "%s - listening ep not in LISTEN\n",
1354		       __func__);
1355		goto reject;
1356	}
1357
1358	/*
1359	 * Find the netdev for this connection request.
1360	 */
1361	tim.mac_addr = req->dst_mac;
1362	tim.vlan_tag = ntohs(req->vlan_tag);
1363	if (tdev->ctl(tdev, GET_IFF_FROM_MAC, &tim) < 0 || !tim.dev) {
1364		printk(KERN_ERR "%s bad dst mac %pM\n",
1365			__func__, req->dst_mac);
1366		goto reject;
1367	}
1368
1369	/* Find output route */
1370	rt = find_route(tdev,
1371			req->local_ip,
1372			req->peer_ip,
1373			req->local_port,
1374			req->peer_port, G_PASS_OPEN_TOS(ntohl(req->tos_tid)));
1375	if (!rt) {
1376		printk(KERN_ERR MOD "%s - failed to find dst entry!\n",
1377		       __func__);
1378		goto reject;
1379	}
1380	dst = &rt->dst;
1381	l2t = t3_l2t_get(tdev, dst, NULL, &req->peer_ip);
1382	if (!l2t) {
1383		printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n",
1384		       __func__);
1385		dst_release(dst);
1386		goto reject;
1387	}
1388	child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL);
1389	if (!child_ep) {
1390		printk(KERN_ERR MOD "%s - failed to allocate ep entry!\n",
1391		       __func__);
1392		l2t_release(tdev, l2t);
1393		dst_release(dst);
1394		goto reject;
1395	}
1396	state_set(&child_ep->com, CONNECTING);
1397	child_ep->com.tdev = tdev;
1398	child_ep->com.cm_id = NULL;
1399	child_ep->com.local_addr.sin_family = PF_INET;
1400	child_ep->com.local_addr.sin_port = req->local_port;
1401	child_ep->com.local_addr.sin_addr.s_addr = req->local_ip;
1402	child_ep->com.remote_addr.sin_family = PF_INET;
1403	child_ep->com.remote_addr.sin_port = req->peer_port;
1404	child_ep->com.remote_addr.sin_addr.s_addr = req->peer_ip;
1405	get_ep(&parent_ep->com);
1406	child_ep->parent_ep = parent_ep;
1407	child_ep->tos = G_PASS_OPEN_TOS(ntohl(req->tos_tid));
1408	child_ep->l2t = l2t;
1409	child_ep->dst = dst;
1410	child_ep->hwtid = hwtid;
1411	init_timer(&child_ep->timer);
1412	cxgb3_insert_tid(tdev, &t3c_client, child_ep, hwtid);
1413	accept_cr(child_ep, req->peer_ip, skb);
1414	goto out;
1415reject:
1416	reject_cr(tdev, hwtid, req->peer_ip, skb);
1417out:
1418	return CPL_RET_BUF_DONE;
1419}
1420
1421static int pass_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1422{
1423	struct iwch_ep *ep = ctx;
1424	struct cpl_pass_establish *req = cplhdr(skb);
1425
1426	PDBG("%s ep %p\n", __func__, ep);
1427	ep->snd_seq = ntohl(req->snd_isn);
1428	ep->rcv_seq = ntohl(req->rcv_isn);
1429
1430	set_emss(ep, ntohs(req->tcp_opt));
1431
1432	dst_confirm(ep->dst);
1433	state_set(&ep->com, MPA_REQ_WAIT);
1434	start_ep_timer(ep);
1435
1436	return CPL_RET_BUF_DONE;
1437}
1438
1439static int peer_close(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1440{
1441	struct iwch_ep *ep = ctx;
1442	struct iwch_qp_attributes attrs;
1443	unsigned long flags;
1444	int disconnect = 1;
1445	int release = 0;
1446
1447	PDBG("%s ep %p\n", __func__, ep);
1448	dst_confirm(ep->dst);
1449
1450	spin_lock_irqsave(&ep->com.lock, flags);
1451	switch (ep->com.state) {
1452	case MPA_REQ_WAIT:
1453		__state_set(&ep->com, CLOSING);
1454		break;
1455	case MPA_REQ_SENT:
1456		__state_set(&ep->com, CLOSING);
1457		connect_reply_upcall(ep, -ECONNRESET);
1458		break;
1459	case MPA_REQ_RCVD:
1460
1461		/*
1462		 * We're gonna mark this puppy DEAD, but keep
1463		 * the reference on it until the ULP accepts or
1464		 * rejects the CR. Also wake up anyone waiting
1465		 * in rdma connection migration (see iwch_accept_cr()).
1466		 */
1467		__state_set(&ep->com, CLOSING);
1468		ep->com.rpl_done = 1;
1469		ep->com.rpl_err = -ECONNRESET;
1470		PDBG("waking up ep %p\n", ep);
1471		wake_up(&ep->com.waitq);
1472		break;
1473	case MPA_REP_SENT:
1474		__state_set(&ep->com, CLOSING);
1475		ep->com.rpl_done = 1;
1476		ep->com.rpl_err = -ECONNRESET;
1477		PDBG("waking up ep %p\n", ep);
1478		wake_up(&ep->com.waitq);
1479		break;
1480	case FPDU_MODE:
1481		start_ep_timer(ep);
1482		__state_set(&ep->com, CLOSING);
1483		attrs.next_state = IWCH_QP_STATE_CLOSING;
1484		iwch_modify_qp(ep->com.qp->rhp, ep->com.qp,
1485			       IWCH_QP_ATTR_NEXT_STATE, &attrs, 1);
1486		peer_close_upcall(ep);
1487		break;
1488	case ABORTING:
1489		disconnect = 0;
1490		break;
1491	case CLOSING:
1492		__state_set(&ep->com, MORIBUND);
1493		disconnect = 0;
1494		break;
1495	case MORIBUND:
1496		stop_ep_timer(ep);
1497		if (ep->com.cm_id && ep->com.qp) {
1498			attrs.next_state = IWCH_QP_STATE_IDLE;
1499			iwch_modify_qp(ep->com.qp->rhp, ep->com.qp,
1500				       IWCH_QP_ATTR_NEXT_STATE, &attrs, 1);
1501		}
1502		close_complete_upcall(ep);
1503		__state_set(&ep->com, DEAD);
1504		release = 1;
1505		disconnect = 0;
1506		break;
1507	case DEAD:
1508		disconnect = 0;
1509		break;
1510	default:
1511		BUG_ON(1);
1512	}
1513	spin_unlock_irqrestore(&ep->com.lock, flags);
1514	if (disconnect)
1515		iwch_ep_disconnect(ep, 0, GFP_KERNEL);
1516	if (release)
1517		release_ep_resources(ep);
1518	return CPL_RET_BUF_DONE;
1519}
1520
1521/*
1522 * Returns whether an ABORT_REQ_RSS message is a negative advice.
1523 */
1524static int is_neg_adv_abort(unsigned int status)
1525{
1526	return status == CPL_ERR_RTX_NEG_ADVICE ||
1527	       status == CPL_ERR_PERSIST_NEG_ADVICE;
1528}
1529
1530static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1531{
1532	struct cpl_abort_req_rss *req = cplhdr(skb);
1533	struct iwch_ep *ep = ctx;
1534	struct cpl_abort_rpl *rpl;
1535	struct sk_buff *rpl_skb;
1536	struct iwch_qp_attributes attrs;
1537	int ret;
1538	int release = 0;
1539	unsigned long flags;
1540
1541	if (is_neg_adv_abort(req->status)) {
1542		PDBG("%s neg_adv_abort ep %p tid %d\n", __func__, ep,
1543		     ep->hwtid);
1544		t3_l2t_send_event(ep->com.tdev, ep->l2t);
1545		return CPL_RET_BUF_DONE;
1546	}
1547
1548	/*
1549	 * We get 2 peer aborts from the HW.  The first one must
1550	 * be ignored except for scribbling that we need one more.
1551	 */
1552	if (!test_and_set_bit(PEER_ABORT_IN_PROGRESS, &ep->com.flags)) {
1553		return CPL_RET_BUF_DONE;
1554	}
1555
1556	spin_lock_irqsave(&ep->com.lock, flags);
1557	PDBG("%s ep %p state %u\n", __func__, ep, ep->com.state);
1558	switch (ep->com.state) {
1559	case CONNECTING:
1560		break;
1561	case MPA_REQ_WAIT:
1562		stop_ep_timer(ep);
1563		break;
1564	case MPA_REQ_SENT:
1565		stop_ep_timer(ep);
1566		connect_reply_upcall(ep, -ECONNRESET);
1567		break;
1568	case MPA_REP_SENT:
1569		ep->com.rpl_done = 1;
1570		ep->com.rpl_err = -ECONNRESET;
1571		PDBG("waking up ep %p\n", ep);
1572		wake_up(&ep->com.waitq);
1573		break;
1574	case MPA_REQ_RCVD:
1575
1576		/*
1577		 * We're gonna mark this puppy DEAD, but keep
1578		 * the reference on it until the ULP accepts or
1579		 * rejects the CR. Also wake up anyone waiting
1580		 * in rdma connection migration (see iwch_accept_cr()).
1581		 */
1582		ep->com.rpl_done = 1;
1583		ep->com.rpl_err = -ECONNRESET;
1584		PDBG("waking up ep %p\n", ep);
1585		wake_up(&ep->com.waitq);
1586		break;
1587	case MORIBUND:
1588	case CLOSING:
1589		stop_ep_timer(ep);
1590		/*FALLTHROUGH*/
1591	case FPDU_MODE:
1592		if (ep->com.cm_id && ep->com.qp) {
1593			attrs.next_state = IWCH_QP_STATE_ERROR;
1594			ret = iwch_modify_qp(ep->com.qp->rhp,
1595				     ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
1596				     &attrs, 1);
1597			if (ret)
1598				printk(KERN_ERR MOD
1599				       "%s - qp <- error failed!\n",
1600				       __func__);
1601		}
1602		peer_abort_upcall(ep);
1603		break;
1604	case ABORTING:
1605		break;
1606	case DEAD:
1607		PDBG("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__);
1608		spin_unlock_irqrestore(&ep->com.lock, flags);
1609		return CPL_RET_BUF_DONE;
1610	default:
1611		BUG_ON(1);
1612		break;
1613	}
1614	dst_confirm(ep->dst);
1615	if (ep->com.state != ABORTING) {
1616		__state_set(&ep->com, DEAD);
1617		release = 1;
1618	}
1619	spin_unlock_irqrestore(&ep->com.lock, flags);
1620
1621	rpl_skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL);
1622	if (!rpl_skb) {
1623		printk(KERN_ERR MOD "%s - cannot allocate skb!\n",
1624		       __func__);
1625		release = 1;
1626		goto out;
1627	}
1628	rpl_skb->priority = CPL_PRIORITY_DATA;
1629	rpl = (struct cpl_abort_rpl *) skb_put(rpl_skb, sizeof(*rpl));
1630	rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_RPL));
1631	rpl->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
1632	OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid));
1633	rpl->cmd = CPL_ABORT_NO_RST;
1634	iwch_cxgb3_ofld_send(ep->com.tdev, rpl_skb);
1635out:
1636	if (release)
1637		release_ep_resources(ep);
1638	return CPL_RET_BUF_DONE;
1639}
1640
1641static int close_con_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1642{
1643	struct iwch_ep *ep = ctx;
1644	struct iwch_qp_attributes attrs;
1645	unsigned long flags;
1646	int release = 0;
1647
1648	PDBG("%s ep %p\n", __func__, ep);
1649	BUG_ON(!ep);
1650
1651	/* The cm_id may be null if we failed to connect */
1652	spin_lock_irqsave(&ep->com.lock, flags);
1653	switch (ep->com.state) {
1654	case CLOSING:
1655		__state_set(&ep->com, MORIBUND);
1656		break;
1657	case MORIBUND:
1658		stop_ep_timer(ep);
1659		if ((ep->com.cm_id) && (ep->com.qp)) {
1660			attrs.next_state = IWCH_QP_STATE_IDLE;
1661			iwch_modify_qp(ep->com.qp->rhp,
1662					     ep->com.qp,
1663					     IWCH_QP_ATTR_NEXT_STATE,
1664					     &attrs, 1);
1665		}
1666		close_complete_upcall(ep);
1667		__state_set(&ep->com, DEAD);
1668		release = 1;
1669		break;
1670	case ABORTING:
1671	case DEAD:
1672		break;
1673	default:
1674		BUG_ON(1);
1675		break;
1676	}
1677	spin_unlock_irqrestore(&ep->com.lock, flags);
1678	if (release)
1679		release_ep_resources(ep);
1680	return CPL_RET_BUF_DONE;
1681}
1682
1683/*
1684 * T3A does 3 things when a TERM is received:
1685 * 1) send up a CPL_RDMA_TERMINATE message with the TERM packet
1686 * 2) generate an async event on the QP with the TERMINATE opcode
1687 * 3) post a TERMINATE opcode cqe into the associated CQ.
1688 *
1689 * For (1), we save the message in the qp for later consumer consumption.
1690 * For (2), we move the QP into TERMINATE, post a QP event and disconnect.
1691 * For (3), we toss the CQE in cxio_poll_cq().
1692 *
1693 * terminate() handles case (1)...
1694 */
1695static int terminate(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1696{
1697	struct iwch_ep *ep = ctx;
1698
1699	if (state_read(&ep->com) != FPDU_MODE)
1700		return CPL_RET_BUF_DONE;
1701
1702	PDBG("%s ep %p\n", __func__, ep);
1703	skb_pull(skb, sizeof(struct cpl_rdma_terminate));
1704	PDBG("%s saving %d bytes of term msg\n", __func__, skb->len);
1705	skb_copy_from_linear_data(skb, ep->com.qp->attr.terminate_buffer,
1706				  skb->len);
1707	ep->com.qp->attr.terminate_msg_len = skb->len;
1708	ep->com.qp->attr.is_terminate_local = 0;
1709	return CPL_RET_BUF_DONE;
1710}
1711
1712static int ec_status(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1713{
1714	struct cpl_rdma_ec_status *rep = cplhdr(skb);
1715	struct iwch_ep *ep = ctx;
1716
1717	PDBG("%s ep %p tid %u status %d\n", __func__, ep, ep->hwtid,
1718	     rep->status);
1719	if (rep->status) {
1720		struct iwch_qp_attributes attrs;
1721
1722		printk(KERN_ERR MOD "%s BAD CLOSE - Aborting tid %u\n",
1723		       __func__, ep->hwtid);
1724		stop_ep_timer(ep);
1725		attrs.next_state = IWCH_QP_STATE_ERROR;
1726		iwch_modify_qp(ep->com.qp->rhp,
1727			       ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
1728			       &attrs, 1);
1729		abort_connection(ep, NULL, GFP_KERNEL);
1730	}
1731	return CPL_RET_BUF_DONE;
1732}
1733
1734static void ep_timeout(unsigned long arg)
1735{
1736	struct iwch_ep *ep = (struct iwch_ep *)arg;
1737	struct iwch_qp_attributes attrs;
1738	unsigned long flags;
1739	int abort = 1;
1740
1741	spin_lock_irqsave(&ep->com.lock, flags);
1742	PDBG("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid,
1743	     ep->com.state);
1744	switch (ep->com.state) {
1745	case MPA_REQ_SENT:
1746		__state_set(&ep->com, ABORTING);
1747		connect_reply_upcall(ep, -ETIMEDOUT);
1748		break;
1749	case MPA_REQ_WAIT:
1750		__state_set(&ep->com, ABORTING);
1751		break;
1752	case CLOSING:
1753	case MORIBUND:
1754		if (ep->com.cm_id && ep->com.qp) {
1755			attrs.next_state = IWCH_QP_STATE_ERROR;
1756			iwch_modify_qp(ep->com.qp->rhp,
1757				     ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
1758				     &attrs, 1);
1759		}
1760		__state_set(&ep->com, ABORTING);
1761		break;
1762	default:
1763		WARN(1, "%s unexpected state ep %p state %u\n",
1764			__func__, ep, ep->com.state);
1765		abort = 0;
1766	}
1767	spin_unlock_irqrestore(&ep->com.lock, flags);
1768	if (abort)
1769		abort_connection(ep, NULL, GFP_ATOMIC);
1770	put_ep(&ep->com);
1771}
1772
1773int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
1774{
1775	int err;
1776	struct iwch_ep *ep = to_ep(cm_id);
1777	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
1778
1779	if (state_read(&ep->com) == DEAD) {
1780		put_ep(&ep->com);
1781		return -ECONNRESET;
1782	}
1783	BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
1784	if (mpa_rev == 0)
1785		abort_connection(ep, NULL, GFP_KERNEL);
1786	else {
1787		err = send_mpa_reject(ep, pdata, pdata_len);
1788		err = iwch_ep_disconnect(ep, 0, GFP_KERNEL);
1789	}
1790	put_ep(&ep->com);
1791	return 0;
1792}
1793
1794int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
1795{
1796	int err;
1797	struct iwch_qp_attributes attrs;
1798	enum iwch_qp_attr_mask mask;
1799	struct iwch_ep *ep = to_ep(cm_id);
1800	struct iwch_dev *h = to_iwch_dev(cm_id->device);
1801	struct iwch_qp *qp = get_qhp(h, conn_param->qpn);
1802
1803	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
1804	if (state_read(&ep->com) == DEAD) {
1805		err = -ECONNRESET;
1806		goto err;
1807	}
1808
1809	BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
1810	BUG_ON(!qp);
1811
1812	if ((conn_param->ord > qp->rhp->attr.max_rdma_read_qp_depth) ||
1813	    (conn_param->ird > qp->rhp->attr.max_rdma_reads_per_qp)) {
1814		abort_connection(ep, NULL, GFP_KERNEL);
1815		err = -EINVAL;
1816		goto err;
1817	}
1818
1819	cm_id->add_ref(cm_id);
1820	ep->com.cm_id = cm_id;
1821	ep->com.qp = qp;
1822
1823	ep->ird = conn_param->ird;
1824	ep->ord = conn_param->ord;
1825
1826	if (peer2peer && ep->ird == 0)
1827		ep->ird = 1;
1828
1829	PDBG("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord);
1830
1831	/* bind QP to EP and move to RTS */
1832	attrs.mpa_attr = ep->mpa_attr;
1833	attrs.max_ird = ep->ird;
1834	attrs.max_ord = ep->ord;
1835	attrs.llp_stream_handle = ep;
1836	attrs.next_state = IWCH_QP_STATE_RTS;
1837
1838	/* bind QP and TID with INIT_WR */
1839	mask = IWCH_QP_ATTR_NEXT_STATE |
1840			     IWCH_QP_ATTR_LLP_STREAM_HANDLE |
1841			     IWCH_QP_ATTR_MPA_ATTR |
1842			     IWCH_QP_ATTR_MAX_IRD |
1843			     IWCH_QP_ATTR_MAX_ORD;
1844
1845	err = iwch_modify_qp(ep->com.qp->rhp,
1846			     ep->com.qp, mask, &attrs, 1);
1847	if (err)
1848		goto err1;
1849
1850	/* if needed, wait for wr_ack */
1851	if (iwch_rqes_posted(qp)) {
1852		wait_event(ep->com.waitq, ep->com.rpl_done);
1853		err = ep->com.rpl_err;
1854		if (err)
1855			goto err1;
1856	}
1857
1858	err = send_mpa_reply(ep, conn_param->private_data,
1859			     conn_param->private_data_len);
1860	if (err)
1861		goto err1;
1862
1863
1864	state_set(&ep->com, FPDU_MODE);
1865	established_upcall(ep);
1866	put_ep(&ep->com);
1867	return 0;
1868err1:
1869	ep->com.cm_id = NULL;
1870	ep->com.qp = NULL;
1871	cm_id->rem_ref(cm_id);
1872err:
1873	put_ep(&ep->com);
1874	return err;
1875}
1876
1877static int is_loopback_dst(struct iw_cm_id *cm_id)
1878{
1879	struct net_device *dev;
1880	struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr;
1881
1882	dev = ip_dev_find(&init_net, raddr->sin_addr.s_addr);
1883	if (!dev)
1884		return 0;
1885	dev_put(dev);
1886	return 1;
1887}
1888
1889int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
1890{
1891	struct iwch_dev *h = to_iwch_dev(cm_id->device);
1892	struct iwch_ep *ep;
1893	struct rtable *rt;
1894	int err = 0;
1895	struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr;
1896	struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr;
1897
1898	if (cm_id->remote_addr.ss_family != PF_INET) {
1899		err = -ENOSYS;
1900		goto out;
1901	}
1902
1903	if (is_loopback_dst(cm_id)) {
1904		err = -ENOSYS;
1905		goto out;
1906	}
1907
1908	ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
1909	if (!ep) {
1910		printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __func__);
1911		err = -ENOMEM;
1912		goto out;
1913	}
1914	init_timer(&ep->timer);
1915	ep->plen = conn_param->private_data_len;
1916	if (ep->plen)
1917		memcpy(ep->mpa_pkt + sizeof(struct mpa_message),
1918		       conn_param->private_data, ep->plen);
1919	ep->ird = conn_param->ird;
1920	ep->ord = conn_param->ord;
1921
1922	if (peer2peer && ep->ord == 0)
1923		ep->ord = 1;
1924
1925	ep->com.tdev = h->rdev.t3cdev_p;
1926
1927	cm_id->add_ref(cm_id);
1928	ep->com.cm_id = cm_id;
1929	ep->com.qp = get_qhp(h, conn_param->qpn);
1930	BUG_ON(!ep->com.qp);
1931	PDBG("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn,
1932	     ep->com.qp, cm_id);
1933
1934	/*
1935	 * Allocate an active TID to initiate a TCP connection.
1936	 */
1937	ep->atid = cxgb3_alloc_atid(h->rdev.t3cdev_p, &t3c_client, ep);
1938	if (ep->atid == -1) {
1939		printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__);
1940		err = -ENOMEM;
1941		goto fail2;
1942	}
1943
1944	/* find a route */
1945	rt = find_route(h->rdev.t3cdev_p, laddr->sin_addr.s_addr,
1946			raddr->sin_addr.s_addr, laddr->sin_port,
1947			raddr->sin_port, IPTOS_LOWDELAY);
1948	if (!rt) {
1949		printk(KERN_ERR MOD "%s - cannot find route.\n", __func__);
1950		err = -EHOSTUNREACH;
1951		goto fail3;
1952	}
1953	ep->dst = &rt->dst;
1954	ep->l2t = t3_l2t_get(ep->com.tdev, ep->dst, NULL,
1955			     &raddr->sin_addr.s_addr);
1956	if (!ep->l2t) {
1957		printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__);
1958		err = -ENOMEM;
1959		goto fail4;
1960	}
1961
1962	state_set(&ep->com, CONNECTING);
1963	ep->tos = IPTOS_LOWDELAY;
1964	memcpy(&ep->com.local_addr, &cm_id->local_addr,
1965	       sizeof(ep->com.local_addr));
1966	memcpy(&ep->com.remote_addr, &cm_id->remote_addr,
1967	       sizeof(ep->com.remote_addr));
1968
1969	/* send connect request to rnic */
1970	err = send_connect(ep);
1971	if (!err)
1972		goto out;
1973
1974	l2t_release(h->rdev.t3cdev_p, ep->l2t);
1975fail4:
1976	dst_release(ep->dst);
1977fail3:
1978	cxgb3_free_atid(ep->com.tdev, ep->atid);
1979fail2:
1980	cm_id->rem_ref(cm_id);
1981	put_ep(&ep->com);
1982out:
1983	return err;
1984}
1985
1986int iwch_create_listen(struct iw_cm_id *cm_id, int backlog)
1987{
1988	int err = 0;
1989	struct iwch_dev *h = to_iwch_dev(cm_id->device);
1990	struct iwch_listen_ep *ep;
1991
1992
1993	might_sleep();
1994
1995	if (cm_id->local_addr.ss_family != PF_INET) {
1996		err = -ENOSYS;
1997		goto fail1;
1998	}
1999
2000	ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
2001	if (!ep) {
2002		printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __func__);
2003		err = -ENOMEM;
2004		goto fail1;
2005	}
2006	PDBG("%s ep %p\n", __func__, ep);
2007	ep->com.tdev = h->rdev.t3cdev_p;
2008	cm_id->add_ref(cm_id);
2009	ep->com.cm_id = cm_id;
2010	ep->backlog = backlog;
2011	memcpy(&ep->com.local_addr, &cm_id->local_addr,
2012	       sizeof(ep->com.local_addr));
2013
2014	/*
2015	 * Allocate a server TID.
2016	 */
2017	ep->stid = cxgb3_alloc_stid(h->rdev.t3cdev_p, &t3c_client, ep);
2018	if (ep->stid == -1) {
2019		printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__);
2020		err = -ENOMEM;
2021		goto fail2;
2022	}
2023
2024	state_set(&ep->com, LISTEN);
2025	err = listen_start(ep);
2026	if (err)
2027		goto fail3;
2028
2029	/* wait for pass_open_rpl */
2030	wait_event(ep->com.waitq, ep->com.rpl_done);
2031	err = ep->com.rpl_err;
2032	if (!err) {
2033		cm_id->provider_data = ep;
2034		goto out;
2035	}
2036fail3:
2037	cxgb3_free_stid(ep->com.tdev, ep->stid);
2038fail2:
2039	cm_id->rem_ref(cm_id);
2040	put_ep(&ep->com);
2041fail1:
2042out:
2043	return err;
2044}
2045
2046int iwch_destroy_listen(struct iw_cm_id *cm_id)
2047{
2048	int err;
2049	struct iwch_listen_ep *ep = to_listen_ep(cm_id);
2050
2051	PDBG("%s ep %p\n", __func__, ep);
2052
2053	might_sleep();
2054	state_set(&ep->com, DEAD);
2055	ep->com.rpl_done = 0;
2056	ep->com.rpl_err = 0;
2057	err = listen_stop(ep);
2058	if (err)
2059		goto done;
2060	wait_event(ep->com.waitq, ep->com.rpl_done);
2061	cxgb3_free_stid(ep->com.tdev, ep->stid);
2062done:
2063	err = ep->com.rpl_err;
2064	cm_id->rem_ref(cm_id);
2065	put_ep(&ep->com);
2066	return err;
2067}
2068
2069int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp)
2070{
2071	int ret=0;
2072	unsigned long flags;
2073	int close = 0;
2074	int fatal = 0;
2075	struct t3cdev *tdev;
2076	struct cxio_rdev *rdev;
2077
2078	spin_lock_irqsave(&ep->com.lock, flags);
2079
2080	PDBG("%s ep %p state %s, abrupt %d\n", __func__, ep,
2081	     states[ep->com.state], abrupt);
2082
2083	tdev = (struct t3cdev *)ep->com.tdev;
2084	rdev = (struct cxio_rdev *)tdev->ulp;
2085	if (cxio_fatal_error(rdev)) {
2086		fatal = 1;
2087		close_complete_upcall(ep);
2088		ep->com.state = DEAD;
2089	}
2090	switch (ep->com.state) {
2091	case MPA_REQ_WAIT:
2092	case MPA_REQ_SENT:
2093	case MPA_REQ_RCVD:
2094	case MPA_REP_SENT:
2095	case FPDU_MODE:
2096		close = 1;
2097		if (abrupt)
2098			ep->com.state = ABORTING;
2099		else {
2100			ep->com.state = CLOSING;
2101			start_ep_timer(ep);
2102		}
2103		set_bit(CLOSE_SENT, &ep->com.flags);
2104		break;
2105	case CLOSING:
2106		if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) {
2107			close = 1;
2108			if (abrupt) {
2109				stop_ep_timer(ep);
2110				ep->com.state = ABORTING;
2111			} else
2112				ep->com.state = MORIBUND;
2113		}
2114		break;
2115	case MORIBUND:
2116	case ABORTING:
2117	case DEAD:
2118		PDBG("%s ignoring disconnect ep %p state %u\n",
2119		     __func__, ep, ep->com.state);
2120		break;
2121	default:
2122		BUG();
2123		break;
2124	}
2125
2126	spin_unlock_irqrestore(&ep->com.lock, flags);
2127	if (close) {
2128		if (abrupt)
2129			ret = send_abort(ep, NULL, gfp);
2130		else
2131			ret = send_halfclose(ep, gfp);
2132		if (ret)
2133			fatal = 1;
2134	}
2135	if (fatal)
2136		release_ep_resources(ep);
2137	return ret;
2138}
2139
2140int iwch_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new,
2141		     struct l2t_entry *l2t)
2142{
2143	struct iwch_ep *ep = ctx;
2144
2145	if (ep->dst != old)
2146		return 0;
2147
2148	PDBG("%s ep %p redirect to dst %p l2t %p\n", __func__, ep, new,
2149	     l2t);
2150	dst_hold(new);
2151	l2t_release(ep->com.tdev, ep->l2t);
2152	ep->l2t = l2t;
2153	dst_release(old);
2154	ep->dst = new;
2155	return 1;
2156}
2157
2158/*
2159 * All the CM events are handled on a work queue to have a safe context.
2160 * These are the real handlers that are called from the work queue.
2161 */
2162static const cxgb3_cpl_handler_func work_handlers[NUM_CPL_CMDS] = {
2163	[CPL_ACT_ESTABLISH]	= act_establish,
2164	[CPL_ACT_OPEN_RPL]	= act_open_rpl,
2165	[CPL_RX_DATA]		= rx_data,
2166	[CPL_TX_DMA_ACK]	= tx_ack,
2167	[CPL_ABORT_RPL_RSS]	= abort_rpl,
2168	[CPL_ABORT_RPL]		= abort_rpl,
2169	[CPL_PASS_OPEN_RPL]	= pass_open_rpl,
2170	[CPL_CLOSE_LISTSRV_RPL]	= close_listsrv_rpl,
2171	[CPL_PASS_ACCEPT_REQ]	= pass_accept_req,
2172	[CPL_PASS_ESTABLISH]	= pass_establish,
2173	[CPL_PEER_CLOSE]	= peer_close,
2174	[CPL_ABORT_REQ_RSS]	= peer_abort,
2175	[CPL_CLOSE_CON_RPL]	= close_con_rpl,
2176	[CPL_RDMA_TERMINATE]	= terminate,
2177	[CPL_RDMA_EC_STATUS]	= ec_status,
2178};
2179
2180static void process_work(struct work_struct *work)
2181{
2182	struct sk_buff *skb = NULL;
2183	void *ep;
2184	struct t3cdev *tdev;
2185	int ret;
2186
2187	while ((skb = skb_dequeue(&rxq))) {
2188		ep = *((void **) (skb->cb));
2189		tdev = *((struct t3cdev **) (skb->cb + sizeof(void *)));
2190		ret = work_handlers[G_OPCODE(ntohl((__force __be32)skb->csum))](tdev, skb, ep);
2191		if (ret & CPL_RET_BUF_DONE)
2192			kfree_skb(skb);
2193
2194		/*
2195		 * ep was referenced in sched(), and is freed here.
2196		 */
2197		put_ep((struct iwch_ep_common *)ep);
2198	}
2199}
2200
2201static DECLARE_WORK(skb_work, process_work);
2202
2203static int sched(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
2204{
2205	struct iwch_ep_common *epc = ctx;
2206
2207	get_ep(epc);
2208
2209	/*
2210	 * Save ctx and tdev in the skb->cb area.
2211	 */
2212	*((void **) skb->cb) = ctx;
2213	*((struct t3cdev **) (skb->cb + sizeof(void *))) = tdev;
2214
2215	/*
2216	 * Queue the skb and schedule the worker thread.
2217	 */
2218	skb_queue_tail(&rxq, skb);
2219	queue_work(workq, &skb_work);
2220	return 0;
2221}
2222
2223static int set_tcb_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
2224{
2225	struct cpl_set_tcb_rpl *rpl = cplhdr(skb);
2226
2227	if (rpl->status != CPL_ERR_NONE) {
2228		printk(KERN_ERR MOD "Unexpected SET_TCB_RPL status %u "
2229		       "for tid %u\n", rpl->status, GET_TID(rpl));
2230	}
2231	return CPL_RET_BUF_DONE;
2232}
2233
2234/*
2235 * All upcalls from the T3 Core go to sched() to schedule the
2236 * processing on a work queue.
2237 */
2238cxgb3_cpl_handler_func t3c_handlers[NUM_CPL_CMDS] = {
2239	[CPL_ACT_ESTABLISH]	= sched,
2240	[CPL_ACT_OPEN_RPL]	= sched,
2241	[CPL_RX_DATA]		= sched,
2242	[CPL_TX_DMA_ACK]	= sched,
2243	[CPL_ABORT_RPL_RSS]	= sched,
2244	[CPL_ABORT_RPL]		= sched,
2245	[CPL_PASS_OPEN_RPL]	= sched,
2246	[CPL_CLOSE_LISTSRV_RPL]	= sched,
2247	[CPL_PASS_ACCEPT_REQ]	= sched,
2248	[CPL_PASS_ESTABLISH]	= sched,
2249	[CPL_PEER_CLOSE]	= sched,
2250	[CPL_CLOSE_CON_RPL]	= sched,
2251	[CPL_ABORT_REQ_RSS]	= sched,
2252	[CPL_RDMA_TERMINATE]	= sched,
2253	[CPL_RDMA_EC_STATUS]	= sched,
2254	[CPL_SET_TCB_RPL]	= set_tcb_rpl,
2255};
2256
2257int __init iwch_cm_init(void)
2258{
2259	skb_queue_head_init(&rxq);
2260
2261	workq = create_singlethread_workqueue("iw_cxgb3");
2262	if (!workq)
2263		return -ENOMEM;
2264
2265	return 0;
2266}
2267
2268void __exit iwch_cm_term(void)
2269{
2270	flush_workqueue(workq);
2271	destroy_workqueue(workq);
2272}
2273