1/* 2 * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. 3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. 4 * 5 * This software is available to you under a choice of one of two 6 * licenses. You may choose to be licensed under the terms of the GNU 7 * General Public License (GPL) Version 2, available from the file 8 * COPYING in the main directory of this source tree, or the 9 * OpenIB.org BSD license below: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * - Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 */ 33 34#include "ipath_verbs.h" 35#include "ipath_kernel.h" 36 37/* cut down ridiculously long IB macro names */ 38#define OP(x) IB_OPCODE_UC_##x 39 40/** 41 * ipath_make_uc_req - construct a request packet (SEND, RDMA write) 42 * @qp: a pointer to the QP 43 * 44 * Return 1 if constructed; otherwise, return 0. 45 */ 46int ipath_make_uc_req(struct ipath_qp *qp) 47{ 48 struct ipath_other_headers *ohdr; 49 struct ipath_swqe *wqe; 50 unsigned long flags; 51 u32 hwords; 52 u32 bth0; 53 u32 len; 54 u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); 55 int ret = 0; 56 57 spin_lock_irqsave(&qp->s_lock, flags); 58 59 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) { 60 if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND)) 61 goto bail; 62 /* We are in the error state, flush the work request. */ 63 if (qp->s_last == qp->s_head) 64 goto bail; 65 /* If DMAs are in progress, we can't flush immediately. */ 66 if (atomic_read(&qp->s_dma_busy)) { 67 qp->s_flags |= IPATH_S_WAIT_DMA; 68 goto bail; 69 } 70 wqe = get_swqe_ptr(qp, qp->s_last); 71 ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); 72 goto done; 73 } 74 75 ohdr = &qp->s_hdr.u.oth; 76 if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) 77 ohdr = &qp->s_hdr.u.l.oth; 78 79 /* header size in 32-bit words LRH+BTH = (8+12)/4. */ 80 hwords = 5; 81 bth0 = 1 << 22; /* Set M bit */ 82 83 /* Get the next send request. */ 84 wqe = get_swqe_ptr(qp, qp->s_cur); 85 qp->s_wqe = NULL; 86 switch (qp->s_state) { 87 default: 88 if (!(ib_ipath_state_ops[qp->state] & 89 IPATH_PROCESS_NEXT_SEND_OK)) 90 goto bail; 91 /* Check if send work queue is empty. */ 92 if (qp->s_cur == qp->s_head) 93 goto bail; 94 /* 95 * Start a new request. 96 */ 97 qp->s_psn = wqe->psn = qp->s_next_psn; 98 qp->s_sge.sge = wqe->sg_list[0]; 99 qp->s_sge.sg_list = wqe->sg_list + 1; 100 qp->s_sge.num_sge = wqe->wr.num_sge; 101 qp->s_len = len = wqe->length; 102 switch (wqe->wr.opcode) { 103 case IB_WR_SEND: 104 case IB_WR_SEND_WITH_IMM: 105 if (len > pmtu) { 106 qp->s_state = OP(SEND_FIRST); 107 len = pmtu; 108 break; 109 } 110 if (wqe->wr.opcode == IB_WR_SEND) 111 qp->s_state = OP(SEND_ONLY); 112 else { 113 qp->s_state = 114 OP(SEND_ONLY_WITH_IMMEDIATE); 115 /* Immediate data comes after the BTH */ 116 ohdr->u.imm_data = wqe->wr.ex.imm_data; 117 hwords += 1; 118 } 119 if (wqe->wr.send_flags & IB_SEND_SOLICITED) 120 bth0 |= 1 << 23; 121 qp->s_wqe = wqe; 122 if (++qp->s_cur >= qp->s_size) 123 qp->s_cur = 0; 124 break; 125 126 case IB_WR_RDMA_WRITE: 127 case IB_WR_RDMA_WRITE_WITH_IMM: 128 ohdr->u.rc.reth.vaddr = 129 cpu_to_be64(wqe->wr.wr.rdma.remote_addr); 130 ohdr->u.rc.reth.rkey = 131 cpu_to_be32(wqe->wr.wr.rdma.rkey); 132 ohdr->u.rc.reth.length = cpu_to_be32(len); 133 hwords += sizeof(struct ib_reth) / 4; 134 if (len > pmtu) { 135 qp->s_state = OP(RDMA_WRITE_FIRST); 136 len = pmtu; 137 break; 138 } 139 if (wqe->wr.opcode == IB_WR_RDMA_WRITE) 140 qp->s_state = OP(RDMA_WRITE_ONLY); 141 else { 142 qp->s_state = 143 OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE); 144 /* Immediate data comes after the RETH */ 145 ohdr->u.rc.imm_data = wqe->wr.ex.imm_data; 146 hwords += 1; 147 if (wqe->wr.send_flags & IB_SEND_SOLICITED) 148 bth0 |= 1 << 23; 149 } 150 qp->s_wqe = wqe; 151 if (++qp->s_cur >= qp->s_size) 152 qp->s_cur = 0; 153 break; 154 155 default: 156 goto bail; 157 } 158 break; 159 160 case OP(SEND_FIRST): 161 qp->s_state = OP(SEND_MIDDLE); 162 /* FALLTHROUGH */ 163 case OP(SEND_MIDDLE): 164 len = qp->s_len; 165 if (len > pmtu) { 166 len = pmtu; 167 break; 168 } 169 if (wqe->wr.opcode == IB_WR_SEND) 170 qp->s_state = OP(SEND_LAST); 171 else { 172 qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE); 173 /* Immediate data comes after the BTH */ 174 ohdr->u.imm_data = wqe->wr.ex.imm_data; 175 hwords += 1; 176 } 177 if (wqe->wr.send_flags & IB_SEND_SOLICITED) 178 bth0 |= 1 << 23; 179 qp->s_wqe = wqe; 180 if (++qp->s_cur >= qp->s_size) 181 qp->s_cur = 0; 182 break; 183 184 case OP(RDMA_WRITE_FIRST): 185 qp->s_state = OP(RDMA_WRITE_MIDDLE); 186 /* FALLTHROUGH */ 187 case OP(RDMA_WRITE_MIDDLE): 188 len = qp->s_len; 189 if (len > pmtu) { 190 len = pmtu; 191 break; 192 } 193 if (wqe->wr.opcode == IB_WR_RDMA_WRITE) 194 qp->s_state = OP(RDMA_WRITE_LAST); 195 else { 196 qp->s_state = 197 OP(RDMA_WRITE_LAST_WITH_IMMEDIATE); 198 /* Immediate data comes after the BTH */ 199 ohdr->u.imm_data = wqe->wr.ex.imm_data; 200 hwords += 1; 201 if (wqe->wr.send_flags & IB_SEND_SOLICITED) 202 bth0 |= 1 << 23; 203 } 204 qp->s_wqe = wqe; 205 if (++qp->s_cur >= qp->s_size) 206 qp->s_cur = 0; 207 break; 208 } 209 qp->s_len -= len; 210 qp->s_hdrwords = hwords; 211 qp->s_cur_sge = &qp->s_sge; 212 qp->s_cur_size = len; 213 ipath_make_ruc_header(to_idev(qp->ibqp.device), 214 qp, ohdr, bth0 | (qp->s_state << 24), 215 qp->s_next_psn++ & IPATH_PSN_MASK); 216done: 217 ret = 1; 218 goto unlock; 219 220bail: 221 qp->s_flags &= ~IPATH_S_BUSY; 222unlock: 223 spin_unlock_irqrestore(&qp->s_lock, flags); 224 return ret; 225} 226 227/** 228 * ipath_uc_rcv - handle an incoming UC packet 229 * @dev: the device the packet came in on 230 * @hdr: the header of the packet 231 * @has_grh: true if the packet has a GRH 232 * @data: the packet data 233 * @tlen: the length of the packet 234 * @qp: the QP for this packet. 235 * 236 * This is called from ipath_qp_rcv() to process an incoming UC packet 237 * for the given QP. 238 * Called at interrupt level. 239 */ 240void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, 241 int has_grh, void *data, u32 tlen, struct ipath_qp *qp) 242{ 243 struct ipath_other_headers *ohdr; 244 int opcode; 245 u32 hdrsize; 246 u32 psn; 247 u32 pad; 248 struct ib_wc wc; 249 u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); 250 struct ib_reth *reth; 251 int header_in_data; 252 253 /* Validate the SLID. See Ch. 9.6.1.5 */ 254 if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid)) 255 goto done; 256 257 /* Check for GRH */ 258 if (!has_grh) { 259 ohdr = &hdr->u.oth; 260 hdrsize = 8 + 12; /* LRH + BTH */ 261 psn = be32_to_cpu(ohdr->bth[2]); 262 header_in_data = 0; 263 } else { 264 ohdr = &hdr->u.l.oth; 265 hdrsize = 8 + 40 + 12; /* LRH + GRH + BTH */ 266 /* 267 * The header with GRH is 60 bytes and the 268 * core driver sets the eager header buffer 269 * size to 56 bytes so the last 4 bytes of 270 * the BTH header (PSN) is in the data buffer. 271 */ 272 header_in_data = dev->dd->ipath_rcvhdrentsize == 16; 273 if (header_in_data) { 274 psn = be32_to_cpu(((__be32 *) data)[0]); 275 data += sizeof(__be32); 276 } else 277 psn = be32_to_cpu(ohdr->bth[2]); 278 } 279 /* 280 * The opcode is in the low byte when its in network order 281 * (top byte when in host order). 282 */ 283 opcode = be32_to_cpu(ohdr->bth[0]) >> 24; 284 285 memset(&wc, 0, sizeof wc); 286 287 /* Compare the PSN verses the expected PSN. */ 288 if (unlikely(ipath_cmp24(psn, qp->r_psn) != 0)) { 289 /* 290 * Handle a sequence error. 291 * Silently drop any current message. 292 */ 293 qp->r_psn = psn; 294 inv: 295 qp->r_state = OP(SEND_LAST); 296 switch (opcode) { 297 case OP(SEND_FIRST): 298 case OP(SEND_ONLY): 299 case OP(SEND_ONLY_WITH_IMMEDIATE): 300 goto send_first; 301 302 case OP(RDMA_WRITE_FIRST): 303 case OP(RDMA_WRITE_ONLY): 304 case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): 305 goto rdma_first; 306 307 default: 308 dev->n_pkt_drops++; 309 goto done; 310 } 311 } 312 313 /* Check for opcode sequence errors. */ 314 switch (qp->r_state) { 315 case OP(SEND_FIRST): 316 case OP(SEND_MIDDLE): 317 if (opcode == OP(SEND_MIDDLE) || 318 opcode == OP(SEND_LAST) || 319 opcode == OP(SEND_LAST_WITH_IMMEDIATE)) 320 break; 321 goto inv; 322 323 case OP(RDMA_WRITE_FIRST): 324 case OP(RDMA_WRITE_MIDDLE): 325 if (opcode == OP(RDMA_WRITE_MIDDLE) || 326 opcode == OP(RDMA_WRITE_LAST) || 327 opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE)) 328 break; 329 goto inv; 330 331 default: 332 if (opcode == OP(SEND_FIRST) || 333 opcode == OP(SEND_ONLY) || 334 opcode == OP(SEND_ONLY_WITH_IMMEDIATE) || 335 opcode == OP(RDMA_WRITE_FIRST) || 336 opcode == OP(RDMA_WRITE_ONLY) || 337 opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE)) 338 break; 339 goto inv; 340 } 341 342 /* OK, process the packet. */ 343 switch (opcode) { 344 case OP(SEND_FIRST): 345 case OP(SEND_ONLY): 346 case OP(SEND_ONLY_WITH_IMMEDIATE): 347 send_first: 348 if (qp->r_flags & IPATH_R_REUSE_SGE) { 349 qp->r_flags &= ~IPATH_R_REUSE_SGE; 350 qp->r_sge = qp->s_rdma_read_sge; 351 } else if (!ipath_get_rwqe(qp, 0)) { 352 dev->n_pkt_drops++; 353 goto done; 354 } 355 /* Save the WQE so we can reuse it in case of an error. */ 356 qp->s_rdma_read_sge = qp->r_sge; 357 qp->r_rcv_len = 0; 358 if (opcode == OP(SEND_ONLY)) 359 goto send_last; 360 else if (opcode == OP(SEND_ONLY_WITH_IMMEDIATE)) 361 goto send_last_imm; 362 /* FALLTHROUGH */ 363 case OP(SEND_MIDDLE): 364 /* Check for invalid length PMTU or posted rwqe len. */ 365 if (unlikely(tlen != (hdrsize + pmtu + 4))) { 366 qp->r_flags |= IPATH_R_REUSE_SGE; 367 dev->n_pkt_drops++; 368 goto done; 369 } 370 qp->r_rcv_len += pmtu; 371 if (unlikely(qp->r_rcv_len > qp->r_len)) { 372 qp->r_flags |= IPATH_R_REUSE_SGE; 373 dev->n_pkt_drops++; 374 goto done; 375 } 376 ipath_copy_sge(&qp->r_sge, data, pmtu); 377 break; 378 379 case OP(SEND_LAST_WITH_IMMEDIATE): 380 send_last_imm: 381 if (header_in_data) { 382 wc.ex.imm_data = *(__be32 *) data; 383 data += sizeof(__be32); 384 } else { 385 /* Immediate data comes after BTH */ 386 wc.ex.imm_data = ohdr->u.imm_data; 387 } 388 hdrsize += 4; 389 wc.wc_flags = IB_WC_WITH_IMM; 390 /* FALLTHROUGH */ 391 case OP(SEND_LAST): 392 send_last: 393 /* Get the number of bytes the message was padded by. */ 394 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; 395 /* Check for invalid length. */ 396 /* XXX LAST len should be >= 1 */ 397 if (unlikely(tlen < (hdrsize + pad + 4))) { 398 qp->r_flags |= IPATH_R_REUSE_SGE; 399 dev->n_pkt_drops++; 400 goto done; 401 } 402 /* Don't count the CRC. */ 403 tlen -= (hdrsize + pad + 4); 404 wc.byte_len = tlen + qp->r_rcv_len; 405 if (unlikely(wc.byte_len > qp->r_len)) { 406 qp->r_flags |= IPATH_R_REUSE_SGE; 407 dev->n_pkt_drops++; 408 goto done; 409 } 410 wc.opcode = IB_WC_RECV; 411 last_imm: 412 ipath_copy_sge(&qp->r_sge, data, tlen); 413 wc.wr_id = qp->r_wr_id; 414 wc.status = IB_WC_SUCCESS; 415 wc.qp = &qp->ibqp; 416 wc.src_qp = qp->remote_qpn; 417 wc.slid = qp->remote_ah_attr.dlid; 418 wc.sl = qp->remote_ah_attr.sl; 419 /* Signal completion event if the solicited bit is set. */ 420 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 421 (ohdr->bth[0] & 422 cpu_to_be32(1 << 23)) != 0); 423 break; 424 425 case OP(RDMA_WRITE_FIRST): 426 case OP(RDMA_WRITE_ONLY): 427 case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): /* consume RWQE */ 428 rdma_first: 429 /* RETH comes after BTH */ 430 if (!header_in_data) 431 reth = &ohdr->u.rc.reth; 432 else { 433 reth = (struct ib_reth *)data; 434 data += sizeof(*reth); 435 } 436 hdrsize += sizeof(*reth); 437 qp->r_len = be32_to_cpu(reth->length); 438 qp->r_rcv_len = 0; 439 if (qp->r_len != 0) { 440 u32 rkey = be32_to_cpu(reth->rkey); 441 u64 vaddr = be64_to_cpu(reth->vaddr); 442 int ok; 443 444 /* Check rkey */ 445 ok = ipath_rkey_ok(qp, &qp->r_sge, qp->r_len, 446 vaddr, rkey, 447 IB_ACCESS_REMOTE_WRITE); 448 if (unlikely(!ok)) { 449 dev->n_pkt_drops++; 450 goto done; 451 } 452 } else { 453 qp->r_sge.sg_list = NULL; 454 qp->r_sge.sge.mr = NULL; 455 qp->r_sge.sge.vaddr = NULL; 456 qp->r_sge.sge.length = 0; 457 qp->r_sge.sge.sge_length = 0; 458 } 459 if (unlikely(!(qp->qp_access_flags & 460 IB_ACCESS_REMOTE_WRITE))) { 461 dev->n_pkt_drops++; 462 goto done; 463 } 464 if (opcode == OP(RDMA_WRITE_ONLY)) 465 goto rdma_last; 466 else if (opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE)) 467 goto rdma_last_imm; 468 /* FALLTHROUGH */ 469 case OP(RDMA_WRITE_MIDDLE): 470 /* Check for invalid length PMTU or posted rwqe len. */ 471 if (unlikely(tlen != (hdrsize + pmtu + 4))) { 472 dev->n_pkt_drops++; 473 goto done; 474 } 475 qp->r_rcv_len += pmtu; 476 if (unlikely(qp->r_rcv_len > qp->r_len)) { 477 dev->n_pkt_drops++; 478 goto done; 479 } 480 ipath_copy_sge(&qp->r_sge, data, pmtu); 481 break; 482 483 case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE): 484 rdma_last_imm: 485 if (header_in_data) { 486 wc.ex.imm_data = *(__be32 *) data; 487 data += sizeof(__be32); 488 } else { 489 /* Immediate data comes after BTH */ 490 wc.ex.imm_data = ohdr->u.imm_data; 491 } 492 hdrsize += 4; 493 wc.wc_flags = IB_WC_WITH_IMM; 494 495 /* Get the number of bytes the message was padded by. */ 496 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; 497 /* Check for invalid length. */ 498 /* XXX LAST len should be >= 1 */ 499 if (unlikely(tlen < (hdrsize + pad + 4))) { 500 dev->n_pkt_drops++; 501 goto done; 502 } 503 /* Don't count the CRC. */ 504 tlen -= (hdrsize + pad + 4); 505 if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) { 506 dev->n_pkt_drops++; 507 goto done; 508 } 509 if (qp->r_flags & IPATH_R_REUSE_SGE) 510 qp->r_flags &= ~IPATH_R_REUSE_SGE; 511 else if (!ipath_get_rwqe(qp, 1)) { 512 dev->n_pkt_drops++; 513 goto done; 514 } 515 wc.byte_len = qp->r_len; 516 wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; 517 goto last_imm; 518 519 case OP(RDMA_WRITE_LAST): 520 rdma_last: 521 /* Get the number of bytes the message was padded by. */ 522 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; 523 /* Check for invalid length. */ 524 /* XXX LAST len should be >= 1 */ 525 if (unlikely(tlen < (hdrsize + pad + 4))) { 526 dev->n_pkt_drops++; 527 goto done; 528 } 529 /* Don't count the CRC. */ 530 tlen -= (hdrsize + pad + 4); 531 if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) { 532 dev->n_pkt_drops++; 533 goto done; 534 } 535 ipath_copy_sge(&qp->r_sge, data, tlen); 536 break; 537 538 default: 539 /* Drop packet for unknown opcodes. */ 540 dev->n_pkt_drops++; 541 goto done; 542 } 543 qp->r_psn++; 544 qp->r_state = opcode; 545done: 546 return; 547} 548