1/* 2 * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. 3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. 4 * 5 * This software is available to you under a choice of one of two 6 * licenses. You may choose to be licensed under the terms of the GNU 7 * General Public License (GPL) Version 2, available from the file 8 * COPYING in the main directory of this source tree, or the 9 * OpenIB.org BSD license below: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * - Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 */ 33 34#include <linux/io.h> 35 36#include "ipath_verbs.h" 37#include "ipath_kernel.h" 38 39/* cut down ridiculously long IB macro names */ 40#define OP(x) IB_OPCODE_RC_##x 41 42static u32 restart_sge(struct ipath_sge_state *ss, struct ipath_swqe *wqe, 43 u32 psn, u32 pmtu) 44{ 45 u32 len; 46 47 len = ((psn - wqe->psn) & IPATH_PSN_MASK) * pmtu; 48 ss->sge = wqe->sg_list[0]; 49 ss->sg_list = wqe->sg_list + 1; 50 ss->num_sge = wqe->wr.num_sge; 51 ipath_skip_sge(ss, len); 52 return wqe->length - len; 53} 54 55/** 56 * ipath_init_restart- initialize the qp->s_sge after a restart 57 * @qp: the QP who's SGE we're restarting 58 * @wqe: the work queue to initialize the QP's SGE from 59 * 60 * The QP s_lock should be held and interrupts disabled. 61 */ 62static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe) 63{ 64 struct ipath_ibdev *dev; 65 66 qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, 67 ib_mtu_enum_to_int(qp->path_mtu)); 68 dev = to_idev(qp->ibqp.device); 69 spin_lock(&dev->pending_lock); 70 if (list_empty(&qp->timerwait)) 71 list_add_tail(&qp->timerwait, 72 &dev->pending[dev->pending_index]); 73 spin_unlock(&dev->pending_lock); 74} 75 76/** 77 * ipath_make_rc_ack - construct a response packet (ACK, NAK, or RDMA read) 78 * @qp: a pointer to the QP 79 * @ohdr: a pointer to the IB header being constructed 80 * @pmtu: the path MTU 81 * 82 * Return 1 if constructed; otherwise, return 0. 83 * Note that we are in the responder's side of the QP context. 84 * Note the QP s_lock must be held. 85 */ 86static int ipath_make_rc_ack(struct ipath_ibdev *dev, struct ipath_qp *qp, 87 struct ipath_other_headers *ohdr, u32 pmtu) 88{ 89 struct ipath_ack_entry *e; 90 u32 hwords; 91 u32 len; 92 u32 bth0; 93 u32 bth2; 94 95 /* Don't send an ACK if we aren't supposed to. */ 96 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) 97 goto bail; 98 99 /* header size in 32-bit words LRH+BTH = (8+12)/4. */ 100 hwords = 5; 101 102 switch (qp->s_ack_state) { 103 case OP(RDMA_READ_RESPONSE_LAST): 104 case OP(RDMA_READ_RESPONSE_ONLY): 105 case OP(ATOMIC_ACKNOWLEDGE): 106 /* 107 * We can increment the tail pointer now that the last 108 * response has been sent instead of only being 109 * constructed. 110 */ 111 if (++qp->s_tail_ack_queue > IPATH_MAX_RDMA_ATOMIC) 112 qp->s_tail_ack_queue = 0; 113 /* FALLTHROUGH */ 114 case OP(SEND_ONLY): 115 case OP(ACKNOWLEDGE): 116 /* Check for no next entry in the queue. */ 117 if (qp->r_head_ack_queue == qp->s_tail_ack_queue) { 118 if (qp->s_flags & IPATH_S_ACK_PENDING) 119 goto normal; 120 qp->s_ack_state = OP(ACKNOWLEDGE); 121 goto bail; 122 } 123 124 e = &qp->s_ack_queue[qp->s_tail_ack_queue]; 125 if (e->opcode == OP(RDMA_READ_REQUEST)) { 126 /* Copy SGE state in case we need to resend */ 127 qp->s_ack_rdma_sge = e->rdma_sge; 128 qp->s_cur_sge = &qp->s_ack_rdma_sge; 129 len = e->rdma_sge.sge.sge_length; 130 if (len > pmtu) { 131 len = pmtu; 132 qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST); 133 } else { 134 qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY); 135 e->sent = 1; 136 } 137 ohdr->u.aeth = ipath_compute_aeth(qp); 138 hwords++; 139 qp->s_ack_rdma_psn = e->psn; 140 bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK; 141 } else { 142 /* COMPARE_SWAP or FETCH_ADD */ 143 qp->s_cur_sge = NULL; 144 len = 0; 145 qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE); 146 ohdr->u.at.aeth = ipath_compute_aeth(qp); 147 ohdr->u.at.atomic_ack_eth[0] = 148 cpu_to_be32(e->atomic_data >> 32); 149 ohdr->u.at.atomic_ack_eth[1] = 150 cpu_to_be32(e->atomic_data); 151 hwords += sizeof(ohdr->u.at) / sizeof(u32); 152 bth2 = e->psn; 153 e->sent = 1; 154 } 155 bth0 = qp->s_ack_state << 24; 156 break; 157 158 case OP(RDMA_READ_RESPONSE_FIRST): 159 qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE); 160 /* FALLTHROUGH */ 161 case OP(RDMA_READ_RESPONSE_MIDDLE): 162 len = qp->s_ack_rdma_sge.sge.sge_length; 163 if (len > pmtu) 164 len = pmtu; 165 else { 166 ohdr->u.aeth = ipath_compute_aeth(qp); 167 hwords++; 168 qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST); 169 qp->s_ack_queue[qp->s_tail_ack_queue].sent = 1; 170 } 171 bth0 = qp->s_ack_state << 24; 172 bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK; 173 break; 174 175 default: 176 normal: 177 /* 178 * Send a regular ACK. 179 * Set the s_ack_state so we wait until after sending 180 * the ACK before setting s_ack_state to ACKNOWLEDGE 181 * (see above). 182 */ 183 qp->s_ack_state = OP(SEND_ONLY); 184 qp->s_flags &= ~IPATH_S_ACK_PENDING; 185 qp->s_cur_sge = NULL; 186 if (qp->s_nak_state) 187 ohdr->u.aeth = 188 cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) | 189 (qp->s_nak_state << 190 IPATH_AETH_CREDIT_SHIFT)); 191 else 192 ohdr->u.aeth = ipath_compute_aeth(qp); 193 hwords++; 194 len = 0; 195 bth0 = OP(ACKNOWLEDGE) << 24; 196 bth2 = qp->s_ack_psn & IPATH_PSN_MASK; 197 } 198 qp->s_hdrwords = hwords; 199 qp->s_cur_size = len; 200 ipath_make_ruc_header(dev, qp, ohdr, bth0, bth2); 201 return 1; 202 203bail: 204 return 0; 205} 206 207/** 208 * ipath_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC) 209 * @qp: a pointer to the QP 210 * 211 * Return 1 if constructed; otherwise, return 0. 212 */ 213int ipath_make_rc_req(struct ipath_qp *qp) 214{ 215 struct ipath_ibdev *dev = to_idev(qp->ibqp.device); 216 struct ipath_other_headers *ohdr; 217 struct ipath_sge_state *ss; 218 struct ipath_swqe *wqe; 219 u32 hwords; 220 u32 len; 221 u32 bth0; 222 u32 bth2; 223 u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); 224 char newreq; 225 unsigned long flags; 226 int ret = 0; 227 228 ohdr = &qp->s_hdr.u.oth; 229 if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) 230 ohdr = &qp->s_hdr.u.l.oth; 231 232 /* 233 * The lock is needed to synchronize between the sending tasklet, 234 * the receive interrupt handler, and timeout resends. 235 */ 236 spin_lock_irqsave(&qp->s_lock, flags); 237 238 /* Sending responses has higher priority over sending requests. */ 239 if ((qp->r_head_ack_queue != qp->s_tail_ack_queue || 240 (qp->s_flags & IPATH_S_ACK_PENDING) || 241 qp->s_ack_state != OP(ACKNOWLEDGE)) && 242 ipath_make_rc_ack(dev, qp, ohdr, pmtu)) 243 goto done; 244 245 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) { 246 if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND)) 247 goto bail; 248 /* We are in the error state, flush the work request. */ 249 if (qp->s_last == qp->s_head) 250 goto bail; 251 /* If DMAs are in progress, we can't flush immediately. */ 252 if (atomic_read(&qp->s_dma_busy)) { 253 qp->s_flags |= IPATH_S_WAIT_DMA; 254 goto bail; 255 } 256 wqe = get_swqe_ptr(qp, qp->s_last); 257 ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); 258 goto done; 259 } 260 261 /* Leave BUSY set until RNR timeout. */ 262 if (qp->s_rnr_timeout) { 263 qp->s_flags |= IPATH_S_WAITING; 264 goto bail; 265 } 266 267 /* header size in 32-bit words LRH+BTH = (8+12)/4. */ 268 hwords = 5; 269 bth0 = 1 << 22; /* Set M bit */ 270 271 /* Send a request. */ 272 wqe = get_swqe_ptr(qp, qp->s_cur); 273 switch (qp->s_state) { 274 default: 275 if (!(ib_ipath_state_ops[qp->state] & 276 IPATH_PROCESS_NEXT_SEND_OK)) 277 goto bail; 278 /* 279 * Resend an old request or start a new one. 280 * 281 * We keep track of the current SWQE so that 282 * we don't reset the "furthest progress" state 283 * if we need to back up. 284 */ 285 newreq = 0; 286 if (qp->s_cur == qp->s_tail) { 287 /* Check if send work queue is empty. */ 288 if (qp->s_tail == qp->s_head) 289 goto bail; 290 /* 291 * If a fence is requested, wait for previous 292 * RDMA read and atomic operations to finish. 293 */ 294 if ((wqe->wr.send_flags & IB_SEND_FENCE) && 295 qp->s_num_rd_atomic) { 296 qp->s_flags |= IPATH_S_FENCE_PENDING; 297 goto bail; 298 } 299 wqe->psn = qp->s_next_psn; 300 newreq = 1; 301 } 302 /* 303 * Note that we have to be careful not to modify the 304 * original work request since we may need to resend 305 * it. 306 */ 307 len = wqe->length; 308 ss = &qp->s_sge; 309 bth2 = 0; 310 switch (wqe->wr.opcode) { 311 case IB_WR_SEND: 312 case IB_WR_SEND_WITH_IMM: 313 /* If no credit, return. */ 314 if (qp->s_lsn != (u32) -1 && 315 ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) { 316 qp->s_flags |= IPATH_S_WAIT_SSN_CREDIT; 317 goto bail; 318 } 319 wqe->lpsn = wqe->psn; 320 if (len > pmtu) { 321 wqe->lpsn += (len - 1) / pmtu; 322 qp->s_state = OP(SEND_FIRST); 323 len = pmtu; 324 break; 325 } 326 if (wqe->wr.opcode == IB_WR_SEND) 327 qp->s_state = OP(SEND_ONLY); 328 else { 329 qp->s_state = OP(SEND_ONLY_WITH_IMMEDIATE); 330 /* Immediate data comes after the BTH */ 331 ohdr->u.imm_data = wqe->wr.ex.imm_data; 332 hwords += 1; 333 } 334 if (wqe->wr.send_flags & IB_SEND_SOLICITED) 335 bth0 |= 1 << 23; 336 bth2 = 1 << 31; /* Request ACK. */ 337 if (++qp->s_cur == qp->s_size) 338 qp->s_cur = 0; 339 break; 340 341 case IB_WR_RDMA_WRITE: 342 if (newreq && qp->s_lsn != (u32) -1) 343 qp->s_lsn++; 344 /* FALLTHROUGH */ 345 case IB_WR_RDMA_WRITE_WITH_IMM: 346 /* If no credit, return. */ 347 if (qp->s_lsn != (u32) -1 && 348 ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) { 349 qp->s_flags |= IPATH_S_WAIT_SSN_CREDIT; 350 goto bail; 351 } 352 ohdr->u.rc.reth.vaddr = 353 cpu_to_be64(wqe->wr.wr.rdma.remote_addr); 354 ohdr->u.rc.reth.rkey = 355 cpu_to_be32(wqe->wr.wr.rdma.rkey); 356 ohdr->u.rc.reth.length = cpu_to_be32(len); 357 hwords += sizeof(struct ib_reth) / sizeof(u32); 358 wqe->lpsn = wqe->psn; 359 if (len > pmtu) { 360 wqe->lpsn += (len - 1) / pmtu; 361 qp->s_state = OP(RDMA_WRITE_FIRST); 362 len = pmtu; 363 break; 364 } 365 if (wqe->wr.opcode == IB_WR_RDMA_WRITE) 366 qp->s_state = OP(RDMA_WRITE_ONLY); 367 else { 368 qp->s_state = 369 OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE); 370 /* Immediate data comes after RETH */ 371 ohdr->u.rc.imm_data = wqe->wr.ex.imm_data; 372 hwords += 1; 373 if (wqe->wr.send_flags & IB_SEND_SOLICITED) 374 bth0 |= 1 << 23; 375 } 376 bth2 = 1 << 31; /* Request ACK. */ 377 if (++qp->s_cur == qp->s_size) 378 qp->s_cur = 0; 379 break; 380 381 case IB_WR_RDMA_READ: 382 /* 383 * Don't allow more operations to be started 384 * than the QP limits allow. 385 */ 386 if (newreq) { 387 if (qp->s_num_rd_atomic >= 388 qp->s_max_rd_atomic) { 389 qp->s_flags |= IPATH_S_RDMAR_PENDING; 390 goto bail; 391 } 392 qp->s_num_rd_atomic++; 393 if (qp->s_lsn != (u32) -1) 394 qp->s_lsn++; 395 /* 396 * Adjust s_next_psn to count the 397 * expected number of responses. 398 */ 399 if (len > pmtu) 400 qp->s_next_psn += (len - 1) / pmtu; 401 wqe->lpsn = qp->s_next_psn++; 402 } 403 ohdr->u.rc.reth.vaddr = 404 cpu_to_be64(wqe->wr.wr.rdma.remote_addr); 405 ohdr->u.rc.reth.rkey = 406 cpu_to_be32(wqe->wr.wr.rdma.rkey); 407 ohdr->u.rc.reth.length = cpu_to_be32(len); 408 qp->s_state = OP(RDMA_READ_REQUEST); 409 hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32); 410 ss = NULL; 411 len = 0; 412 if (++qp->s_cur == qp->s_size) 413 qp->s_cur = 0; 414 break; 415 416 case IB_WR_ATOMIC_CMP_AND_SWP: 417 case IB_WR_ATOMIC_FETCH_AND_ADD: 418 /* 419 * Don't allow more operations to be started 420 * than the QP limits allow. 421 */ 422 if (newreq) { 423 if (qp->s_num_rd_atomic >= 424 qp->s_max_rd_atomic) { 425 qp->s_flags |= IPATH_S_RDMAR_PENDING; 426 goto bail; 427 } 428 qp->s_num_rd_atomic++; 429 if (qp->s_lsn != (u32) -1) 430 qp->s_lsn++; 431 wqe->lpsn = wqe->psn; 432 } 433 if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { 434 qp->s_state = OP(COMPARE_SWAP); 435 ohdr->u.atomic_eth.swap_data = cpu_to_be64( 436 wqe->wr.wr.atomic.swap); 437 ohdr->u.atomic_eth.compare_data = cpu_to_be64( 438 wqe->wr.wr.atomic.compare_add); 439 } else { 440 qp->s_state = OP(FETCH_ADD); 441 ohdr->u.atomic_eth.swap_data = cpu_to_be64( 442 wqe->wr.wr.atomic.compare_add); 443 ohdr->u.atomic_eth.compare_data = 0; 444 } 445 ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32( 446 wqe->wr.wr.atomic.remote_addr >> 32); 447 ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32( 448 wqe->wr.wr.atomic.remote_addr); 449 ohdr->u.atomic_eth.rkey = cpu_to_be32( 450 wqe->wr.wr.atomic.rkey); 451 hwords += sizeof(struct ib_atomic_eth) / sizeof(u32); 452 ss = NULL; 453 len = 0; 454 if (++qp->s_cur == qp->s_size) 455 qp->s_cur = 0; 456 break; 457 458 default: 459 goto bail; 460 } 461 qp->s_sge.sge = wqe->sg_list[0]; 462 qp->s_sge.sg_list = wqe->sg_list + 1; 463 qp->s_sge.num_sge = wqe->wr.num_sge; 464 qp->s_len = wqe->length; 465 if (newreq) { 466 qp->s_tail++; 467 if (qp->s_tail >= qp->s_size) 468 qp->s_tail = 0; 469 } 470 bth2 |= qp->s_psn & IPATH_PSN_MASK; 471 if (wqe->wr.opcode == IB_WR_RDMA_READ) 472 qp->s_psn = wqe->lpsn + 1; 473 else { 474 qp->s_psn++; 475 if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0) 476 qp->s_next_psn = qp->s_psn; 477 } 478 /* 479 * Put the QP on the pending list so lost ACKs will cause 480 * a retry. More than one request can be pending so the 481 * QP may already be on the dev->pending list. 482 */ 483 spin_lock(&dev->pending_lock); 484 if (list_empty(&qp->timerwait)) 485 list_add_tail(&qp->timerwait, 486 &dev->pending[dev->pending_index]); 487 spin_unlock(&dev->pending_lock); 488 break; 489 490 case OP(RDMA_READ_RESPONSE_FIRST): 491 /* 492 * This case can only happen if a send is restarted. 493 * See ipath_restart_rc(). 494 */ 495 ipath_init_restart(qp, wqe); 496 /* FALLTHROUGH */ 497 case OP(SEND_FIRST): 498 qp->s_state = OP(SEND_MIDDLE); 499 /* FALLTHROUGH */ 500 case OP(SEND_MIDDLE): 501 bth2 = qp->s_psn++ & IPATH_PSN_MASK; 502 if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0) 503 qp->s_next_psn = qp->s_psn; 504 ss = &qp->s_sge; 505 len = qp->s_len; 506 if (len > pmtu) { 507 len = pmtu; 508 break; 509 } 510 if (wqe->wr.opcode == IB_WR_SEND) 511 qp->s_state = OP(SEND_LAST); 512 else { 513 qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE); 514 /* Immediate data comes after the BTH */ 515 ohdr->u.imm_data = wqe->wr.ex.imm_data; 516 hwords += 1; 517 } 518 if (wqe->wr.send_flags & IB_SEND_SOLICITED) 519 bth0 |= 1 << 23; 520 bth2 |= 1 << 31; /* Request ACK. */ 521 qp->s_cur++; 522 if (qp->s_cur >= qp->s_size) 523 qp->s_cur = 0; 524 break; 525 526 case OP(RDMA_READ_RESPONSE_LAST): 527 /* 528 * This case can only happen if a RDMA write is restarted. 529 * See ipath_restart_rc(). 530 */ 531 ipath_init_restart(qp, wqe); 532 /* FALLTHROUGH */ 533 case OP(RDMA_WRITE_FIRST): 534 qp->s_state = OP(RDMA_WRITE_MIDDLE); 535 /* FALLTHROUGH */ 536 case OP(RDMA_WRITE_MIDDLE): 537 bth2 = qp->s_psn++ & IPATH_PSN_MASK; 538 if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0) 539 qp->s_next_psn = qp->s_psn; 540 ss = &qp->s_sge; 541 len = qp->s_len; 542 if (len > pmtu) { 543 len = pmtu; 544 break; 545 } 546 if (wqe->wr.opcode == IB_WR_RDMA_WRITE) 547 qp->s_state = OP(RDMA_WRITE_LAST); 548 else { 549 qp->s_state = OP(RDMA_WRITE_LAST_WITH_IMMEDIATE); 550 /* Immediate data comes after the BTH */ 551 ohdr->u.imm_data = wqe->wr.ex.imm_data; 552 hwords += 1; 553 if (wqe->wr.send_flags & IB_SEND_SOLICITED) 554 bth0 |= 1 << 23; 555 } 556 bth2 |= 1 << 31; /* Request ACK. */ 557 qp->s_cur++; 558 if (qp->s_cur >= qp->s_size) 559 qp->s_cur = 0; 560 break; 561 562 case OP(RDMA_READ_RESPONSE_MIDDLE): 563 /* 564 * This case can only happen if a RDMA read is restarted. 565 * See ipath_restart_rc(). 566 */ 567 ipath_init_restart(qp, wqe); 568 len = ((qp->s_psn - wqe->psn) & IPATH_PSN_MASK) * pmtu; 569 ohdr->u.rc.reth.vaddr = 570 cpu_to_be64(wqe->wr.wr.rdma.remote_addr + len); 571 ohdr->u.rc.reth.rkey = 572 cpu_to_be32(wqe->wr.wr.rdma.rkey); 573 ohdr->u.rc.reth.length = cpu_to_be32(qp->s_len); 574 qp->s_state = OP(RDMA_READ_REQUEST); 575 hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32); 576 bth2 = qp->s_psn & IPATH_PSN_MASK; 577 qp->s_psn = wqe->lpsn + 1; 578 ss = NULL; 579 len = 0; 580 qp->s_cur++; 581 if (qp->s_cur == qp->s_size) 582 qp->s_cur = 0; 583 break; 584 } 585 if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT - 1) >= 0) 586 bth2 |= 1 << 31; /* Request ACK. */ 587 qp->s_len -= len; 588 qp->s_hdrwords = hwords; 589 qp->s_cur_sge = ss; 590 qp->s_cur_size = len; 591 ipath_make_ruc_header(dev, qp, ohdr, bth0 | (qp->s_state << 24), bth2); 592done: 593 ret = 1; 594 goto unlock; 595 596bail: 597 qp->s_flags &= ~IPATH_S_BUSY; 598unlock: 599 spin_unlock_irqrestore(&qp->s_lock, flags); 600 return ret; 601} 602 603/** 604 * send_rc_ack - Construct an ACK packet and send it 605 * @qp: a pointer to the QP 606 * 607 * This is called from ipath_rc_rcv() and only uses the receive 608 * side QP state. 609 * Note that RDMA reads and atomics are handled in the 610 * send side QP state and tasklet. 611 */ 612static void send_rc_ack(struct ipath_qp *qp) 613{ 614 struct ipath_ibdev *dev = to_idev(qp->ibqp.device); 615 struct ipath_devdata *dd; 616 u16 lrh0; 617 u32 bth0; 618 u32 hwords; 619 u32 __iomem *piobuf; 620 struct ipath_ib_header hdr; 621 struct ipath_other_headers *ohdr; 622 unsigned long flags; 623 624 spin_lock_irqsave(&qp->s_lock, flags); 625 626 /* Don't send ACK or NAK if a RDMA read or atomic is pending. */ 627 if (qp->r_head_ack_queue != qp->s_tail_ack_queue || 628 (qp->s_flags & IPATH_S_ACK_PENDING) || 629 qp->s_ack_state != OP(ACKNOWLEDGE)) 630 goto queue_ack; 631 632 spin_unlock_irqrestore(&qp->s_lock, flags); 633 634 /* Don't try to send ACKs if the link isn't ACTIVE */ 635 dd = dev->dd; 636 if (!(dd->ipath_flags & IPATH_LINKACTIVE)) 637 goto done; 638 639 piobuf = ipath_getpiobuf(dd, 0, NULL); 640 if (!piobuf) { 641 /* 642 * We are out of PIO buffers at the moment. 643 * Pass responsibility for sending the ACK to the 644 * send tasklet so that when a PIO buffer becomes 645 * available, the ACK is sent ahead of other outgoing 646 * packets. 647 */ 648 spin_lock_irqsave(&qp->s_lock, flags); 649 goto queue_ack; 650 } 651 652 /* Construct the header. */ 653 ohdr = &hdr.u.oth; 654 lrh0 = IPATH_LRH_BTH; 655 /* header size in 32-bit words LRH+BTH+AETH = (8+12+4)/4. */ 656 hwords = 6; 657 if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) { 658 hwords += ipath_make_grh(dev, &hdr.u.l.grh, 659 &qp->remote_ah_attr.grh, 660 hwords, 0); 661 ohdr = &hdr.u.l.oth; 662 lrh0 = IPATH_LRH_GRH; 663 } 664 /* read pkey_index w/o lock (its atomic) */ 665 bth0 = ipath_get_pkey(dd, qp->s_pkey_index) | 666 (OP(ACKNOWLEDGE) << 24) | (1 << 22); 667 if (qp->r_nak_state) 668 ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) | 669 (qp->r_nak_state << 670 IPATH_AETH_CREDIT_SHIFT)); 671 else 672 ohdr->u.aeth = ipath_compute_aeth(qp); 673 lrh0 |= qp->remote_ah_attr.sl << 4; 674 hdr.lrh[0] = cpu_to_be16(lrh0); 675 hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); 676 hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC); 677 hdr.lrh[3] = cpu_to_be16(dd->ipath_lid | 678 qp->remote_ah_attr.src_path_bits); 679 ohdr->bth[0] = cpu_to_be32(bth0); 680 ohdr->bth[1] = cpu_to_be32(qp->remote_qpn); 681 ohdr->bth[2] = cpu_to_be32(qp->r_ack_psn & IPATH_PSN_MASK); 682 683 writeq(hwords + 1, piobuf); 684 685 if (dd->ipath_flags & IPATH_PIO_FLUSH_WC) { 686 u32 *hdrp = (u32 *) &hdr; 687 688 ipath_flush_wc(); 689 __iowrite32_copy(piobuf + 2, hdrp, hwords - 1); 690 ipath_flush_wc(); 691 __raw_writel(hdrp[hwords - 1], piobuf + hwords + 1); 692 } else 693 __iowrite32_copy(piobuf + 2, (u32 *) &hdr, hwords); 694 695 ipath_flush_wc(); 696 697 dev->n_unicast_xmit++; 698 goto done; 699 700queue_ack: 701 if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK) { 702 dev->n_rc_qacks++; 703 qp->s_flags |= IPATH_S_ACK_PENDING; 704 qp->s_nak_state = qp->r_nak_state; 705 qp->s_ack_psn = qp->r_ack_psn; 706 707 /* Schedule the send tasklet. */ 708 ipath_schedule_send(qp); 709 } 710 spin_unlock_irqrestore(&qp->s_lock, flags); 711done: 712 return; 713} 714 715/** 716 * reset_psn - reset the QP state to send starting from PSN 717 * @qp: the QP 718 * @psn: the packet sequence number to restart at 719 * 720 * This is called from ipath_rc_rcv() to process an incoming RC ACK 721 * for the given QP. 722 * Called at interrupt level with the QP s_lock held. 723 */ 724static void reset_psn(struct ipath_qp *qp, u32 psn) 725{ 726 u32 n = qp->s_last; 727 struct ipath_swqe *wqe = get_swqe_ptr(qp, n); 728 u32 opcode; 729 730 qp->s_cur = n; 731 732 /* 733 * If we are starting the request from the beginning, 734 * let the normal send code handle initialization. 735 */ 736 if (ipath_cmp24(psn, wqe->psn) <= 0) { 737 qp->s_state = OP(SEND_LAST); 738 goto done; 739 } 740 741 /* Find the work request opcode corresponding to the given PSN. */ 742 opcode = wqe->wr.opcode; 743 for (;;) { 744 int diff; 745 746 if (++n == qp->s_size) 747 n = 0; 748 if (n == qp->s_tail) 749 break; 750 wqe = get_swqe_ptr(qp, n); 751 diff = ipath_cmp24(psn, wqe->psn); 752 if (diff < 0) 753 break; 754 qp->s_cur = n; 755 /* 756 * If we are starting the request from the beginning, 757 * let the normal send code handle initialization. 758 */ 759 if (diff == 0) { 760 qp->s_state = OP(SEND_LAST); 761 goto done; 762 } 763 opcode = wqe->wr.opcode; 764 } 765 766 /* 767 * Set the state to restart in the middle of a request. 768 * Don't change the s_sge, s_cur_sge, or s_cur_size. 769 * See ipath_make_rc_req(). 770 */ 771 switch (opcode) { 772 case IB_WR_SEND: 773 case IB_WR_SEND_WITH_IMM: 774 qp->s_state = OP(RDMA_READ_RESPONSE_FIRST); 775 break; 776 777 case IB_WR_RDMA_WRITE: 778 case IB_WR_RDMA_WRITE_WITH_IMM: 779 qp->s_state = OP(RDMA_READ_RESPONSE_LAST); 780 break; 781 782 case IB_WR_RDMA_READ: 783 qp->s_state = OP(RDMA_READ_RESPONSE_MIDDLE); 784 break; 785 786 default: 787 /* 788 * This case shouldn't happen since its only 789 * one PSN per req. 790 */ 791 qp->s_state = OP(SEND_LAST); 792 } 793done: 794 qp->s_psn = psn; 795} 796 797/** 798 * ipath_restart_rc - back up requester to resend the last un-ACKed request 799 * @qp: the QP to restart 800 * @psn: packet sequence number for the request 801 * @wc: the work completion request 802 * 803 * The QP s_lock should be held and interrupts disabled. 804 */ 805void ipath_restart_rc(struct ipath_qp *qp, u32 psn) 806{ 807 struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last); 808 struct ipath_ibdev *dev; 809 810 if (qp->s_retry == 0) { 811 ipath_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR); 812 ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR); 813 goto bail; 814 } 815 qp->s_retry--; 816 817 /* 818 * Remove the QP from the timeout queue. 819 * Note: it may already have been removed by ipath_ib_timer(). 820 */ 821 dev = to_idev(qp->ibqp.device); 822 spin_lock(&dev->pending_lock); 823 if (!list_empty(&qp->timerwait)) 824 list_del_init(&qp->timerwait); 825 if (!list_empty(&qp->piowait)) 826 list_del_init(&qp->piowait); 827 spin_unlock(&dev->pending_lock); 828 829 if (wqe->wr.opcode == IB_WR_RDMA_READ) 830 dev->n_rc_resends++; 831 else 832 dev->n_rc_resends += (qp->s_psn - psn) & IPATH_PSN_MASK; 833 834 reset_psn(qp, psn); 835 ipath_schedule_send(qp); 836 837bail: 838 return; 839} 840 841static inline void update_last_psn(struct ipath_qp *qp, u32 psn) 842{ 843 qp->s_last_psn = psn; 844} 845 846/** 847 * do_rc_ack - process an incoming RC ACK 848 * @qp: the QP the ACK came in on 849 * @psn: the packet sequence number of the ACK 850 * @opcode: the opcode of the request that resulted in the ACK 851 * 852 * This is called from ipath_rc_rcv_resp() to process an incoming RC ACK 853 * for the given QP. 854 * Called at interrupt level with the QP s_lock held and interrupts disabled. 855 * Returns 1 if OK, 0 if current operation should be aborted (NAK). 856 */ 857static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, 858 u64 val) 859{ 860 struct ipath_ibdev *dev = to_idev(qp->ibqp.device); 861 struct ib_wc wc; 862 enum ib_wc_status status; 863 struct ipath_swqe *wqe; 864 int ret = 0; 865 u32 ack_psn; 866 int diff; 867 868 /* 869 * Remove the QP from the timeout queue (or RNR timeout queue). 870 * If ipath_ib_timer() has already removed it, 871 * it's OK since we hold the QP s_lock and ipath_restart_rc() 872 * just won't find anything to restart if we ACK everything. 873 */ 874 spin_lock(&dev->pending_lock); 875 if (!list_empty(&qp->timerwait)) 876 list_del_init(&qp->timerwait); 877 spin_unlock(&dev->pending_lock); 878 879 /* 880 * Note that NAKs implicitly ACK outstanding SEND and RDMA write 881 * requests and implicitly NAK RDMA read and atomic requests issued 882 * before the NAK'ed request. The MSN won't include the NAK'ed 883 * request but will include an ACK'ed request(s). 884 */ 885 ack_psn = psn; 886 if (aeth >> 29) 887 ack_psn--; 888 wqe = get_swqe_ptr(qp, qp->s_last); 889 890 /* 891 * The MSN might be for a later WQE than the PSN indicates so 892 * only complete WQEs that the PSN finishes. 893 */ 894 while ((diff = ipath_cmp24(ack_psn, wqe->lpsn)) >= 0) { 895 /* 896 * RDMA_READ_RESPONSE_ONLY is a special case since 897 * we want to generate completion events for everything 898 * before the RDMA read, copy the data, then generate 899 * the completion for the read. 900 */ 901 if (wqe->wr.opcode == IB_WR_RDMA_READ && 902 opcode == OP(RDMA_READ_RESPONSE_ONLY) && 903 diff == 0) { 904 ret = 1; 905 goto bail; 906 } 907 /* 908 * If this request is a RDMA read or atomic, and the ACK is 909 * for a later operation, this ACK NAKs the RDMA read or 910 * atomic. In other words, only a RDMA_READ_LAST or ONLY 911 * can ACK a RDMA read and likewise for atomic ops. Note 912 * that the NAK case can only happen if relaxed ordering is 913 * used and requests are sent after an RDMA read or atomic 914 * is sent but before the response is received. 915 */ 916 if ((wqe->wr.opcode == IB_WR_RDMA_READ && 917 (opcode != OP(RDMA_READ_RESPONSE_LAST) || diff != 0)) || 918 ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || 919 wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) && 920 (opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0))) { 921 /* 922 * The last valid PSN seen is the previous 923 * request's. 924 */ 925 update_last_psn(qp, wqe->psn - 1); 926 /* Retry this request. */ 927 ipath_restart_rc(qp, wqe->psn); 928 /* 929 * No need to process the ACK/NAK since we are 930 * restarting an earlier request. 931 */ 932 goto bail; 933 } 934 if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || 935 wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) 936 *(u64 *) wqe->sg_list[0].vaddr = val; 937 if (qp->s_num_rd_atomic && 938 (wqe->wr.opcode == IB_WR_RDMA_READ || 939 wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || 940 wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) { 941 qp->s_num_rd_atomic--; 942 /* Restart sending task if fence is complete */ 943 if (((qp->s_flags & IPATH_S_FENCE_PENDING) && 944 !qp->s_num_rd_atomic) || 945 qp->s_flags & IPATH_S_RDMAR_PENDING) 946 ipath_schedule_send(qp); 947 } 948 /* Post a send completion queue entry if requested. */ 949 if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) || 950 (wqe->wr.send_flags & IB_SEND_SIGNALED)) { 951 memset(&wc, 0, sizeof wc); 952 wc.wr_id = wqe->wr.wr_id; 953 wc.status = IB_WC_SUCCESS; 954 wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; 955 wc.byte_len = wqe->length; 956 wc.qp = &qp->ibqp; 957 wc.src_qp = qp->remote_qpn; 958 wc.slid = qp->remote_ah_attr.dlid; 959 wc.sl = qp->remote_ah_attr.sl; 960 ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0); 961 } 962 qp->s_retry = qp->s_retry_cnt; 963 /* 964 * If we are completing a request which is in the process of 965 * being resent, we can stop resending it since we know the 966 * responder has already seen it. 967 */ 968 if (qp->s_last == qp->s_cur) { 969 if (++qp->s_cur >= qp->s_size) 970 qp->s_cur = 0; 971 qp->s_last = qp->s_cur; 972 if (qp->s_last == qp->s_tail) 973 break; 974 wqe = get_swqe_ptr(qp, qp->s_cur); 975 qp->s_state = OP(SEND_LAST); 976 qp->s_psn = wqe->psn; 977 } else { 978 if (++qp->s_last >= qp->s_size) 979 qp->s_last = 0; 980 if (qp->state == IB_QPS_SQD && qp->s_last == qp->s_cur) 981 qp->s_draining = 0; 982 if (qp->s_last == qp->s_tail) 983 break; 984 wqe = get_swqe_ptr(qp, qp->s_last); 985 } 986 } 987 988 switch (aeth >> 29) { 989 case 0: /* ACK */ 990 dev->n_rc_acks++; 991 /* If this is a partial ACK, reset the retransmit timer. */ 992 if (qp->s_last != qp->s_tail) { 993 spin_lock(&dev->pending_lock); 994 if (list_empty(&qp->timerwait)) 995 list_add_tail(&qp->timerwait, 996 &dev->pending[dev->pending_index]); 997 spin_unlock(&dev->pending_lock); 998 /* 999 * If we get a partial ACK for a resent operation, 1000 * we can stop resending the earlier packets and 1001 * continue with the next packet the receiver wants. 1002 */ 1003 if (ipath_cmp24(qp->s_psn, psn) <= 0) { 1004 reset_psn(qp, psn + 1); 1005 ipath_schedule_send(qp); 1006 } 1007 } else if (ipath_cmp24(qp->s_psn, psn) <= 0) { 1008 qp->s_state = OP(SEND_LAST); 1009 qp->s_psn = psn + 1; 1010 } 1011 ipath_get_credit(qp, aeth); 1012 qp->s_rnr_retry = qp->s_rnr_retry_cnt; 1013 qp->s_retry = qp->s_retry_cnt; 1014 update_last_psn(qp, psn); 1015 ret = 1; 1016 goto bail; 1017 1018 case 1: /* RNR NAK */ 1019 dev->n_rnr_naks++; 1020 if (qp->s_last == qp->s_tail) 1021 goto bail; 1022 if (qp->s_rnr_retry == 0) { 1023 status = IB_WC_RNR_RETRY_EXC_ERR; 1024 goto class_b; 1025 } 1026 if (qp->s_rnr_retry_cnt < 7) 1027 qp->s_rnr_retry--; 1028 1029 /* The last valid PSN is the previous PSN. */ 1030 update_last_psn(qp, psn - 1); 1031 1032 if (wqe->wr.opcode == IB_WR_RDMA_READ) 1033 dev->n_rc_resends++; 1034 else 1035 dev->n_rc_resends += 1036 (qp->s_psn - psn) & IPATH_PSN_MASK; 1037 1038 reset_psn(qp, psn); 1039 1040 qp->s_rnr_timeout = 1041 ib_ipath_rnr_table[(aeth >> IPATH_AETH_CREDIT_SHIFT) & 1042 IPATH_AETH_CREDIT_MASK]; 1043 ipath_insert_rnr_queue(qp); 1044 ipath_schedule_send(qp); 1045 goto bail; 1046 1047 case 3: /* NAK */ 1048 if (qp->s_last == qp->s_tail) 1049 goto bail; 1050 /* The last valid PSN is the previous PSN. */ 1051 update_last_psn(qp, psn - 1); 1052 switch ((aeth >> IPATH_AETH_CREDIT_SHIFT) & 1053 IPATH_AETH_CREDIT_MASK) { 1054 case 0: /* PSN sequence error */ 1055 dev->n_seq_naks++; 1056 /* 1057 * Back up to the responder's expected PSN. 1058 * Note that we might get a NAK in the middle of an 1059 * RDMA READ response which terminates the RDMA 1060 * READ. 1061 */ 1062 ipath_restart_rc(qp, psn); 1063 break; 1064 1065 case 1: /* Invalid Request */ 1066 status = IB_WC_REM_INV_REQ_ERR; 1067 dev->n_other_naks++; 1068 goto class_b; 1069 1070 case 2: /* Remote Access Error */ 1071 status = IB_WC_REM_ACCESS_ERR; 1072 dev->n_other_naks++; 1073 goto class_b; 1074 1075 case 3: /* Remote Operation Error */ 1076 status = IB_WC_REM_OP_ERR; 1077 dev->n_other_naks++; 1078 class_b: 1079 ipath_send_complete(qp, wqe, status); 1080 ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR); 1081 break; 1082 1083 default: 1084 /* Ignore other reserved NAK error codes */ 1085 goto reserved; 1086 } 1087 qp->s_rnr_retry = qp->s_rnr_retry_cnt; 1088 goto bail; 1089 1090 default: /* 2: reserved */ 1091 reserved: 1092 /* Ignore reserved NAK codes. */ 1093 goto bail; 1094 } 1095 1096bail: 1097 return ret; 1098} 1099 1100/** 1101 * ipath_rc_rcv_resp - process an incoming RC response packet 1102 * @dev: the device this packet came in on 1103 * @ohdr: the other headers for this packet 1104 * @data: the packet data 1105 * @tlen: the packet length 1106 * @qp: the QP for this packet 1107 * @opcode: the opcode for this packet 1108 * @psn: the packet sequence number for this packet 1109 * @hdrsize: the header length 1110 * @pmtu: the path MTU 1111 * @header_in_data: true if part of the header data is in the data buffer 1112 * 1113 * This is called from ipath_rc_rcv() to process an incoming RC response 1114 * packet for the given QP. 1115 * Called at interrupt level. 1116 */ 1117static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, 1118 struct ipath_other_headers *ohdr, 1119 void *data, u32 tlen, 1120 struct ipath_qp *qp, 1121 u32 opcode, 1122 u32 psn, u32 hdrsize, u32 pmtu, 1123 int header_in_data) 1124{ 1125 struct ipath_swqe *wqe; 1126 enum ib_wc_status status; 1127 unsigned long flags; 1128 int diff; 1129 u32 pad; 1130 u32 aeth; 1131 u64 val; 1132 1133 spin_lock_irqsave(&qp->s_lock, flags); 1134 1135 /* Double check we can process this now that we hold the s_lock. */ 1136 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) 1137 goto ack_done; 1138 1139 /* Ignore invalid responses. */ 1140 if (ipath_cmp24(psn, qp->s_next_psn) >= 0) 1141 goto ack_done; 1142 1143 /* Ignore duplicate responses. */ 1144 diff = ipath_cmp24(psn, qp->s_last_psn); 1145 if (unlikely(diff <= 0)) { 1146 /* Update credits for "ghost" ACKs */ 1147 if (diff == 0 && opcode == OP(ACKNOWLEDGE)) { 1148 if (!header_in_data) 1149 aeth = be32_to_cpu(ohdr->u.aeth); 1150 else { 1151 aeth = be32_to_cpu(((__be32 *) data)[0]); 1152 data += sizeof(__be32); 1153 } 1154 if ((aeth >> 29) == 0) 1155 ipath_get_credit(qp, aeth); 1156 } 1157 goto ack_done; 1158 } 1159 1160 if (unlikely(qp->s_last == qp->s_tail)) 1161 goto ack_done; 1162 wqe = get_swqe_ptr(qp, qp->s_last); 1163 status = IB_WC_SUCCESS; 1164 1165 switch (opcode) { 1166 case OP(ACKNOWLEDGE): 1167 case OP(ATOMIC_ACKNOWLEDGE): 1168 case OP(RDMA_READ_RESPONSE_FIRST): 1169 if (!header_in_data) 1170 aeth = be32_to_cpu(ohdr->u.aeth); 1171 else { 1172 aeth = be32_to_cpu(((__be32 *) data)[0]); 1173 data += sizeof(__be32); 1174 } 1175 if (opcode == OP(ATOMIC_ACKNOWLEDGE)) { 1176 if (!header_in_data) { 1177 __be32 *p = ohdr->u.at.atomic_ack_eth; 1178 1179 val = ((u64) be32_to_cpu(p[0]) << 32) | 1180 be32_to_cpu(p[1]); 1181 } else 1182 val = be64_to_cpu(((__be64 *) data)[0]); 1183 } else 1184 val = 0; 1185 if (!do_rc_ack(qp, aeth, psn, opcode, val) || 1186 opcode != OP(RDMA_READ_RESPONSE_FIRST)) 1187 goto ack_done; 1188 hdrsize += 4; 1189 wqe = get_swqe_ptr(qp, qp->s_last); 1190 if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) 1191 goto ack_op_err; 1192 qp->r_flags &= ~IPATH_R_RDMAR_SEQ; 1193 /* 1194 * If this is a response to a resent RDMA read, we 1195 * have to be careful to copy the data to the right 1196 * location. 1197 */ 1198 qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge, 1199 wqe, psn, pmtu); 1200 goto read_middle; 1201 1202 case OP(RDMA_READ_RESPONSE_MIDDLE): 1203 /* no AETH, no ACK */ 1204 if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { 1205 dev->n_rdma_seq++; 1206 if (qp->r_flags & IPATH_R_RDMAR_SEQ) 1207 goto ack_done; 1208 qp->r_flags |= IPATH_R_RDMAR_SEQ; 1209 ipath_restart_rc(qp, qp->s_last_psn + 1); 1210 goto ack_done; 1211 } 1212 if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) 1213 goto ack_op_err; 1214 read_middle: 1215 if (unlikely(tlen != (hdrsize + pmtu + 4))) 1216 goto ack_len_err; 1217 if (unlikely(pmtu >= qp->s_rdma_read_len)) 1218 goto ack_len_err; 1219 1220 /* We got a response so update the timeout. */ 1221 spin_lock(&dev->pending_lock); 1222 if (qp->s_rnr_timeout == 0 && !list_empty(&qp->timerwait)) 1223 list_move_tail(&qp->timerwait, 1224 &dev->pending[dev->pending_index]); 1225 spin_unlock(&dev->pending_lock); 1226 1227 if (opcode == OP(RDMA_READ_RESPONSE_MIDDLE)) 1228 qp->s_retry = qp->s_retry_cnt; 1229 1230 /* 1231 * Update the RDMA receive state but do the copy w/o 1232 * holding the locks and blocking interrupts. 1233 */ 1234 qp->s_rdma_read_len -= pmtu; 1235 update_last_psn(qp, psn); 1236 spin_unlock_irqrestore(&qp->s_lock, flags); 1237 ipath_copy_sge(&qp->s_rdma_read_sge, data, pmtu); 1238 goto bail; 1239 1240 case OP(RDMA_READ_RESPONSE_ONLY): 1241 if (!header_in_data) 1242 aeth = be32_to_cpu(ohdr->u.aeth); 1243 else 1244 aeth = be32_to_cpu(((__be32 *) data)[0]); 1245 if (!do_rc_ack(qp, aeth, psn, opcode, 0)) 1246 goto ack_done; 1247 /* Get the number of bytes the message was padded by. */ 1248 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; 1249 /* 1250 * Check that the data size is >= 0 && <= pmtu. 1251 * Remember to account for the AETH header (4) and 1252 * ICRC (4). 1253 */ 1254 if (unlikely(tlen < (hdrsize + pad + 8))) 1255 goto ack_len_err; 1256 /* 1257 * If this is a response to a resent RDMA read, we 1258 * have to be careful to copy the data to the right 1259 * location. 1260 */ 1261 wqe = get_swqe_ptr(qp, qp->s_last); 1262 qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge, 1263 wqe, psn, pmtu); 1264 goto read_last; 1265 1266 case OP(RDMA_READ_RESPONSE_LAST): 1267 /* ACKs READ req. */ 1268 if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { 1269 dev->n_rdma_seq++; 1270 if (qp->r_flags & IPATH_R_RDMAR_SEQ) 1271 goto ack_done; 1272 qp->r_flags |= IPATH_R_RDMAR_SEQ; 1273 ipath_restart_rc(qp, qp->s_last_psn + 1); 1274 goto ack_done; 1275 } 1276 if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) 1277 goto ack_op_err; 1278 /* Get the number of bytes the message was padded by. */ 1279 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; 1280 /* 1281 * Check that the data size is >= 1 && <= pmtu. 1282 * Remember to account for the AETH header (4) and 1283 * ICRC (4). 1284 */ 1285 if (unlikely(tlen <= (hdrsize + pad + 8))) 1286 goto ack_len_err; 1287 read_last: 1288 tlen -= hdrsize + pad + 8; 1289 if (unlikely(tlen != qp->s_rdma_read_len)) 1290 goto ack_len_err; 1291 if (!header_in_data) 1292 aeth = be32_to_cpu(ohdr->u.aeth); 1293 else { 1294 aeth = be32_to_cpu(((__be32 *) data)[0]); 1295 data += sizeof(__be32); 1296 } 1297 ipath_copy_sge(&qp->s_rdma_read_sge, data, tlen); 1298 (void) do_rc_ack(qp, aeth, psn, 1299 OP(RDMA_READ_RESPONSE_LAST), 0); 1300 goto ack_done; 1301 } 1302 1303ack_op_err: 1304 status = IB_WC_LOC_QP_OP_ERR; 1305 goto ack_err; 1306 1307ack_len_err: 1308 status = IB_WC_LOC_LEN_ERR; 1309ack_err: 1310 ipath_send_complete(qp, wqe, status); 1311 ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR); 1312ack_done: 1313 spin_unlock_irqrestore(&qp->s_lock, flags); 1314bail: 1315 return; 1316} 1317 1318/** 1319 * ipath_rc_rcv_error - process an incoming duplicate or error RC packet 1320 * @dev: the device this packet came in on 1321 * @ohdr: the other headers for this packet 1322 * @data: the packet data 1323 * @qp: the QP for this packet 1324 * @opcode: the opcode for this packet 1325 * @psn: the packet sequence number for this packet 1326 * @diff: the difference between the PSN and the expected PSN 1327 * @header_in_data: true if part of the header data is in the data buffer 1328 * 1329 * This is called from ipath_rc_rcv() to process an unexpected 1330 * incoming RC packet for the given QP. 1331 * Called at interrupt level. 1332 * Return 1 if no more processing is needed; otherwise return 0 to 1333 * schedule a response to be sent. 1334 */ 1335static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, 1336 struct ipath_other_headers *ohdr, 1337 void *data, 1338 struct ipath_qp *qp, 1339 u32 opcode, 1340 u32 psn, 1341 int diff, 1342 int header_in_data) 1343{ 1344 struct ipath_ack_entry *e; 1345 u8 i, prev; 1346 int old_req; 1347 unsigned long flags; 1348 1349 if (diff > 0) { 1350 /* 1351 * Packet sequence error. 1352 * A NAK will ACK earlier sends and RDMA writes. 1353 * Don't queue the NAK if we already sent one. 1354 */ 1355 if (!qp->r_nak_state) { 1356 qp->r_nak_state = IB_NAK_PSN_ERROR; 1357 /* Use the expected PSN. */ 1358 qp->r_ack_psn = qp->r_psn; 1359 goto send_ack; 1360 } 1361 goto done; 1362 } 1363 1364 /* 1365 * Handle a duplicate request. Don't re-execute SEND, RDMA 1366 * write or atomic op. Don't NAK errors, just silently drop 1367 * the duplicate request. Note that r_sge, r_len, and 1368 * r_rcv_len may be in use so don't modify them. 1369 * 1370 * We are supposed to ACK the earliest duplicate PSN but we 1371 * can coalesce an outstanding duplicate ACK. We have to 1372 * send the earliest so that RDMA reads can be restarted at 1373 * the requester's expected PSN. 1374 * 1375 * First, find where this duplicate PSN falls within the 1376 * ACKs previously sent. 1377 */ 1378 psn &= IPATH_PSN_MASK; 1379 e = NULL; 1380 old_req = 1; 1381 1382 spin_lock_irqsave(&qp->s_lock, flags); 1383 /* Double check we can process this now that we hold the s_lock. */ 1384 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) 1385 goto unlock_done; 1386 1387 for (i = qp->r_head_ack_queue; ; i = prev) { 1388 if (i == qp->s_tail_ack_queue) 1389 old_req = 0; 1390 if (i) 1391 prev = i - 1; 1392 else 1393 prev = IPATH_MAX_RDMA_ATOMIC; 1394 if (prev == qp->r_head_ack_queue) { 1395 e = NULL; 1396 break; 1397 } 1398 e = &qp->s_ack_queue[prev]; 1399 if (!e->opcode) { 1400 e = NULL; 1401 break; 1402 } 1403 if (ipath_cmp24(psn, e->psn) >= 0) { 1404 if (prev == qp->s_tail_ack_queue) 1405 old_req = 0; 1406 break; 1407 } 1408 } 1409 switch (opcode) { 1410 case OP(RDMA_READ_REQUEST): { 1411 struct ib_reth *reth; 1412 u32 offset; 1413 u32 len; 1414 1415 /* 1416 * If we didn't find the RDMA read request in the ack queue, 1417 * or the send tasklet is already backed up to send an 1418 * earlier entry, we can ignore this request. 1419 */ 1420 if (!e || e->opcode != OP(RDMA_READ_REQUEST) || old_req) 1421 goto unlock_done; 1422 /* RETH comes after BTH */ 1423 if (!header_in_data) 1424 reth = &ohdr->u.rc.reth; 1425 else { 1426 reth = (struct ib_reth *)data; 1427 data += sizeof(*reth); 1428 } 1429 /* 1430 * Address range must be a subset of the original 1431 * request and start on pmtu boundaries. 1432 * We reuse the old ack_queue slot since the requester 1433 * should not back up and request an earlier PSN for the 1434 * same request. 1435 */ 1436 offset = ((psn - e->psn) & IPATH_PSN_MASK) * 1437 ib_mtu_enum_to_int(qp->path_mtu); 1438 len = be32_to_cpu(reth->length); 1439 if (unlikely(offset + len > e->rdma_sge.sge.sge_length)) 1440 goto unlock_done; 1441 if (len != 0) { 1442 u32 rkey = be32_to_cpu(reth->rkey); 1443 u64 vaddr = be64_to_cpu(reth->vaddr); 1444 int ok; 1445 1446 ok = ipath_rkey_ok(qp, &e->rdma_sge, 1447 len, vaddr, rkey, 1448 IB_ACCESS_REMOTE_READ); 1449 if (unlikely(!ok)) 1450 goto unlock_done; 1451 } else { 1452 e->rdma_sge.sg_list = NULL; 1453 e->rdma_sge.num_sge = 0; 1454 e->rdma_sge.sge.mr = NULL; 1455 e->rdma_sge.sge.vaddr = NULL; 1456 e->rdma_sge.sge.length = 0; 1457 e->rdma_sge.sge.sge_length = 0; 1458 } 1459 e->psn = psn; 1460 qp->s_ack_state = OP(ACKNOWLEDGE); 1461 qp->s_tail_ack_queue = prev; 1462 break; 1463 } 1464 1465 case OP(COMPARE_SWAP): 1466 case OP(FETCH_ADD): { 1467 /* 1468 * If we didn't find the atomic request in the ack queue 1469 * or the send tasklet is already backed up to send an 1470 * earlier entry, we can ignore this request. 1471 */ 1472 if (!e || e->opcode != (u8) opcode || old_req) 1473 goto unlock_done; 1474 qp->s_ack_state = OP(ACKNOWLEDGE); 1475 qp->s_tail_ack_queue = prev; 1476 break; 1477 } 1478 1479 default: 1480 if (old_req) 1481 goto unlock_done; 1482 /* 1483 * Resend the most recent ACK if this request is 1484 * after all the previous RDMA reads and atomics. 1485 */ 1486 if (i == qp->r_head_ack_queue) { 1487 spin_unlock_irqrestore(&qp->s_lock, flags); 1488 qp->r_nak_state = 0; 1489 qp->r_ack_psn = qp->r_psn - 1; 1490 goto send_ack; 1491 } 1492 /* 1493 * Try to send a simple ACK to work around a Mellanox bug 1494 * which doesn't accept a RDMA read response or atomic 1495 * response as an ACK for earlier SENDs or RDMA writes. 1496 */ 1497 if (qp->r_head_ack_queue == qp->s_tail_ack_queue && 1498 !(qp->s_flags & IPATH_S_ACK_PENDING) && 1499 qp->s_ack_state == OP(ACKNOWLEDGE)) { 1500 spin_unlock_irqrestore(&qp->s_lock, flags); 1501 qp->r_nak_state = 0; 1502 qp->r_ack_psn = qp->s_ack_queue[i].psn - 1; 1503 goto send_ack; 1504 } 1505 /* 1506 * Resend the RDMA read or atomic op which 1507 * ACKs this duplicate request. 1508 */ 1509 qp->s_ack_state = OP(ACKNOWLEDGE); 1510 qp->s_tail_ack_queue = i; 1511 break; 1512 } 1513 qp->r_nak_state = 0; 1514 ipath_schedule_send(qp); 1515 1516unlock_done: 1517 spin_unlock_irqrestore(&qp->s_lock, flags); 1518done: 1519 return 1; 1520 1521send_ack: 1522 return 0; 1523} 1524 1525void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err) 1526{ 1527 unsigned long flags; 1528 int lastwqe; 1529 1530 spin_lock_irqsave(&qp->s_lock, flags); 1531 lastwqe = ipath_error_qp(qp, err); 1532 spin_unlock_irqrestore(&qp->s_lock, flags); 1533 1534 if (lastwqe) { 1535 struct ib_event ev; 1536 1537 ev.device = qp->ibqp.device; 1538 ev.element.qp = &qp->ibqp; 1539 ev.event = IB_EVENT_QP_LAST_WQE_REACHED; 1540 qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); 1541 } 1542} 1543 1544static inline void ipath_update_ack_queue(struct ipath_qp *qp, unsigned n) 1545{ 1546 unsigned next; 1547 1548 next = n + 1; 1549 if (next > IPATH_MAX_RDMA_ATOMIC) 1550 next = 0; 1551 if (n == qp->s_tail_ack_queue) { 1552 qp->s_tail_ack_queue = next; 1553 qp->s_ack_state = OP(ACKNOWLEDGE); 1554 } 1555} 1556 1557/** 1558 * ipath_rc_rcv - process an incoming RC packet 1559 * @dev: the device this packet came in on 1560 * @hdr: the header of this packet 1561 * @has_grh: true if the header has a GRH 1562 * @data: the packet data 1563 * @tlen: the packet length 1564 * @qp: the QP for this packet 1565 * 1566 * This is called from ipath_qp_rcv() to process an incoming RC packet 1567 * for the given QP. 1568 * Called at interrupt level. 1569 */ 1570void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, 1571 int has_grh, void *data, u32 tlen, struct ipath_qp *qp) 1572{ 1573 struct ipath_other_headers *ohdr; 1574 u32 opcode; 1575 u32 hdrsize; 1576 u32 psn; 1577 u32 pad; 1578 struct ib_wc wc; 1579 u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); 1580 int diff; 1581 struct ib_reth *reth; 1582 int header_in_data; 1583 unsigned long flags; 1584 1585 /* Validate the SLID. See Ch. 9.6.1.5 */ 1586 if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid)) 1587 goto done; 1588 1589 /* Check for GRH */ 1590 if (!has_grh) { 1591 ohdr = &hdr->u.oth; 1592 hdrsize = 8 + 12; /* LRH + BTH */ 1593 psn = be32_to_cpu(ohdr->bth[2]); 1594 header_in_data = 0; 1595 } else { 1596 ohdr = &hdr->u.l.oth; 1597 hdrsize = 8 + 40 + 12; /* LRH + GRH + BTH */ 1598 /* 1599 * The header with GRH is 60 bytes and the core driver sets 1600 * the eager header buffer size to 56 bytes so the last 4 1601 * bytes of the BTH header (PSN) is in the data buffer. 1602 */ 1603 header_in_data = dev->dd->ipath_rcvhdrentsize == 16; 1604 if (header_in_data) { 1605 psn = be32_to_cpu(((__be32 *) data)[0]); 1606 data += sizeof(__be32); 1607 } else 1608 psn = be32_to_cpu(ohdr->bth[2]); 1609 } 1610 1611 /* 1612 * Process responses (ACKs) before anything else. Note that the 1613 * packet sequence number will be for something in the send work 1614 * queue rather than the expected receive packet sequence number. 1615 * In other words, this QP is the requester. 1616 */ 1617 opcode = be32_to_cpu(ohdr->bth[0]) >> 24; 1618 if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) && 1619 opcode <= OP(ATOMIC_ACKNOWLEDGE)) { 1620 ipath_rc_rcv_resp(dev, ohdr, data, tlen, qp, opcode, psn, 1621 hdrsize, pmtu, header_in_data); 1622 goto done; 1623 } 1624 1625 /* Compute 24 bits worth of difference. */ 1626 diff = ipath_cmp24(psn, qp->r_psn); 1627 if (unlikely(diff)) { 1628 if (ipath_rc_rcv_error(dev, ohdr, data, qp, opcode, 1629 psn, diff, header_in_data)) 1630 goto done; 1631 goto send_ack; 1632 } 1633 1634 /* Check for opcode sequence errors. */ 1635 switch (qp->r_state) { 1636 case OP(SEND_FIRST): 1637 case OP(SEND_MIDDLE): 1638 if (opcode == OP(SEND_MIDDLE) || 1639 opcode == OP(SEND_LAST) || 1640 opcode == OP(SEND_LAST_WITH_IMMEDIATE)) 1641 break; 1642 goto nack_inv; 1643 1644 case OP(RDMA_WRITE_FIRST): 1645 case OP(RDMA_WRITE_MIDDLE): 1646 if (opcode == OP(RDMA_WRITE_MIDDLE) || 1647 opcode == OP(RDMA_WRITE_LAST) || 1648 opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE)) 1649 break; 1650 goto nack_inv; 1651 1652 default: 1653 if (opcode == OP(SEND_MIDDLE) || 1654 opcode == OP(SEND_LAST) || 1655 opcode == OP(SEND_LAST_WITH_IMMEDIATE) || 1656 opcode == OP(RDMA_WRITE_MIDDLE) || 1657 opcode == OP(RDMA_WRITE_LAST) || 1658 opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE)) 1659 goto nack_inv; 1660 /* 1661 * Note that it is up to the requester to not send a new 1662 * RDMA read or atomic operation before receiving an ACK 1663 * for the previous operation. 1664 */ 1665 break; 1666 } 1667 1668 memset(&wc, 0, sizeof wc); 1669 1670 /* OK, process the packet. */ 1671 switch (opcode) { 1672 case OP(SEND_FIRST): 1673 if (!ipath_get_rwqe(qp, 0)) 1674 goto rnr_nak; 1675 qp->r_rcv_len = 0; 1676 /* FALLTHROUGH */ 1677 case OP(SEND_MIDDLE): 1678 case OP(RDMA_WRITE_MIDDLE): 1679 send_middle: 1680 /* Check for invalid length PMTU or posted rwqe len. */ 1681 if (unlikely(tlen != (hdrsize + pmtu + 4))) 1682 goto nack_inv; 1683 qp->r_rcv_len += pmtu; 1684 if (unlikely(qp->r_rcv_len > qp->r_len)) 1685 goto nack_inv; 1686 ipath_copy_sge(&qp->r_sge, data, pmtu); 1687 break; 1688 1689 case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE): 1690 /* consume RWQE */ 1691 if (!ipath_get_rwqe(qp, 1)) 1692 goto rnr_nak; 1693 goto send_last_imm; 1694 1695 case OP(SEND_ONLY): 1696 case OP(SEND_ONLY_WITH_IMMEDIATE): 1697 if (!ipath_get_rwqe(qp, 0)) 1698 goto rnr_nak; 1699 qp->r_rcv_len = 0; 1700 if (opcode == OP(SEND_ONLY)) 1701 goto send_last; 1702 /* FALLTHROUGH */ 1703 case OP(SEND_LAST_WITH_IMMEDIATE): 1704 send_last_imm: 1705 if (header_in_data) { 1706 wc.ex.imm_data = *(__be32 *) data; 1707 data += sizeof(__be32); 1708 } else { 1709 /* Immediate data comes after BTH */ 1710 wc.ex.imm_data = ohdr->u.imm_data; 1711 } 1712 hdrsize += 4; 1713 wc.wc_flags = IB_WC_WITH_IMM; 1714 /* FALLTHROUGH */ 1715 case OP(SEND_LAST): 1716 case OP(RDMA_WRITE_LAST): 1717 send_last: 1718 /* Get the number of bytes the message was padded by. */ 1719 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; 1720 /* Check for invalid length. */ 1721 /* XXX LAST len should be >= 1 */ 1722 if (unlikely(tlen < (hdrsize + pad + 4))) 1723 goto nack_inv; 1724 /* Don't count the CRC. */ 1725 tlen -= (hdrsize + pad + 4); 1726 wc.byte_len = tlen + qp->r_rcv_len; 1727 if (unlikely(wc.byte_len > qp->r_len)) 1728 goto nack_inv; 1729 ipath_copy_sge(&qp->r_sge, data, tlen); 1730 qp->r_msn++; 1731 if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) 1732 break; 1733 wc.wr_id = qp->r_wr_id; 1734 wc.status = IB_WC_SUCCESS; 1735 if (opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE) || 1736 opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE)) 1737 wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; 1738 else 1739 wc.opcode = IB_WC_RECV; 1740 wc.qp = &qp->ibqp; 1741 wc.src_qp = qp->remote_qpn; 1742 wc.slid = qp->remote_ah_attr.dlid; 1743 wc.sl = qp->remote_ah_attr.sl; 1744 /* Signal completion event if the solicited bit is set. */ 1745 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1746 (ohdr->bth[0] & 1747 cpu_to_be32(1 << 23)) != 0); 1748 break; 1749 1750 case OP(RDMA_WRITE_FIRST): 1751 case OP(RDMA_WRITE_ONLY): 1752 case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): 1753 if (unlikely(!(qp->qp_access_flags & 1754 IB_ACCESS_REMOTE_WRITE))) 1755 goto nack_inv; 1756 /* consume RWQE */ 1757 /* RETH comes after BTH */ 1758 if (!header_in_data) 1759 reth = &ohdr->u.rc.reth; 1760 else { 1761 reth = (struct ib_reth *)data; 1762 data += sizeof(*reth); 1763 } 1764 hdrsize += sizeof(*reth); 1765 qp->r_len = be32_to_cpu(reth->length); 1766 qp->r_rcv_len = 0; 1767 if (qp->r_len != 0) { 1768 u32 rkey = be32_to_cpu(reth->rkey); 1769 u64 vaddr = be64_to_cpu(reth->vaddr); 1770 int ok; 1771 1772 /* Check rkey & NAK */ 1773 ok = ipath_rkey_ok(qp, &qp->r_sge, 1774 qp->r_len, vaddr, rkey, 1775 IB_ACCESS_REMOTE_WRITE); 1776 if (unlikely(!ok)) 1777 goto nack_acc; 1778 } else { 1779 qp->r_sge.sg_list = NULL; 1780 qp->r_sge.sge.mr = NULL; 1781 qp->r_sge.sge.vaddr = NULL; 1782 qp->r_sge.sge.length = 0; 1783 qp->r_sge.sge.sge_length = 0; 1784 } 1785 if (opcode == OP(RDMA_WRITE_FIRST)) 1786 goto send_middle; 1787 else if (opcode == OP(RDMA_WRITE_ONLY)) 1788 goto send_last; 1789 if (!ipath_get_rwqe(qp, 1)) 1790 goto rnr_nak; 1791 goto send_last_imm; 1792 1793 case OP(RDMA_READ_REQUEST): { 1794 struct ipath_ack_entry *e; 1795 u32 len; 1796 u8 next; 1797 1798 if (unlikely(!(qp->qp_access_flags & 1799 IB_ACCESS_REMOTE_READ))) 1800 goto nack_inv; 1801 next = qp->r_head_ack_queue + 1; 1802 if (next > IPATH_MAX_RDMA_ATOMIC) 1803 next = 0; 1804 spin_lock_irqsave(&qp->s_lock, flags); 1805 /* Double check we can process this while holding the s_lock. */ 1806 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) 1807 goto unlock; 1808 if (unlikely(next == qp->s_tail_ack_queue)) { 1809 if (!qp->s_ack_queue[next].sent) 1810 goto nack_inv_unlck; 1811 ipath_update_ack_queue(qp, next); 1812 } 1813 e = &qp->s_ack_queue[qp->r_head_ack_queue]; 1814 /* RETH comes after BTH */ 1815 if (!header_in_data) 1816 reth = &ohdr->u.rc.reth; 1817 else { 1818 reth = (struct ib_reth *)data; 1819 data += sizeof(*reth); 1820 } 1821 len = be32_to_cpu(reth->length); 1822 if (len) { 1823 u32 rkey = be32_to_cpu(reth->rkey); 1824 u64 vaddr = be64_to_cpu(reth->vaddr); 1825 int ok; 1826 1827 /* Check rkey & NAK */ 1828 ok = ipath_rkey_ok(qp, &e->rdma_sge, len, vaddr, 1829 rkey, IB_ACCESS_REMOTE_READ); 1830 if (unlikely(!ok)) 1831 goto nack_acc_unlck; 1832 /* 1833 * Update the next expected PSN. We add 1 later 1834 * below, so only add the remainder here. 1835 */ 1836 if (len > pmtu) 1837 qp->r_psn += (len - 1) / pmtu; 1838 } else { 1839 e->rdma_sge.sg_list = NULL; 1840 e->rdma_sge.num_sge = 0; 1841 e->rdma_sge.sge.mr = NULL; 1842 e->rdma_sge.sge.vaddr = NULL; 1843 e->rdma_sge.sge.length = 0; 1844 e->rdma_sge.sge.sge_length = 0; 1845 } 1846 e->opcode = opcode; 1847 e->sent = 0; 1848 e->psn = psn; 1849 /* 1850 * We need to increment the MSN here instead of when we 1851 * finish sending the result since a duplicate request would 1852 * increment it more than once. 1853 */ 1854 qp->r_msn++; 1855 qp->r_psn++; 1856 qp->r_state = opcode; 1857 qp->r_nak_state = 0; 1858 qp->r_head_ack_queue = next; 1859 1860 /* Schedule the send tasklet. */ 1861 ipath_schedule_send(qp); 1862 1863 goto unlock; 1864 } 1865 1866 case OP(COMPARE_SWAP): 1867 case OP(FETCH_ADD): { 1868 struct ib_atomic_eth *ateth; 1869 struct ipath_ack_entry *e; 1870 u64 vaddr; 1871 atomic64_t *maddr; 1872 u64 sdata; 1873 u32 rkey; 1874 u8 next; 1875 1876 if (unlikely(!(qp->qp_access_flags & 1877 IB_ACCESS_REMOTE_ATOMIC))) 1878 goto nack_inv; 1879 next = qp->r_head_ack_queue + 1; 1880 if (next > IPATH_MAX_RDMA_ATOMIC) 1881 next = 0; 1882 spin_lock_irqsave(&qp->s_lock, flags); 1883 /* Double check we can process this while holding the s_lock. */ 1884 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) 1885 goto unlock; 1886 if (unlikely(next == qp->s_tail_ack_queue)) { 1887 if (!qp->s_ack_queue[next].sent) 1888 goto nack_inv_unlck; 1889 ipath_update_ack_queue(qp, next); 1890 } 1891 if (!header_in_data) 1892 ateth = &ohdr->u.atomic_eth; 1893 else 1894 ateth = (struct ib_atomic_eth *)data; 1895 vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) | 1896 be32_to_cpu(ateth->vaddr[1]); 1897 if (unlikely(vaddr & (sizeof(u64) - 1))) 1898 goto nack_inv_unlck; 1899 rkey = be32_to_cpu(ateth->rkey); 1900 /* Check rkey & NAK */ 1901 if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, 1902 sizeof(u64), vaddr, rkey, 1903 IB_ACCESS_REMOTE_ATOMIC))) 1904 goto nack_acc_unlck; 1905 /* Perform atomic OP and save result. */ 1906 maddr = (atomic64_t *) qp->r_sge.sge.vaddr; 1907 sdata = be64_to_cpu(ateth->swap_data); 1908 e = &qp->s_ack_queue[qp->r_head_ack_queue]; 1909 e->atomic_data = (opcode == OP(FETCH_ADD)) ? 1910 (u64) atomic64_add_return(sdata, maddr) - sdata : 1911 (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, 1912 be64_to_cpu(ateth->compare_data), 1913 sdata); 1914 e->opcode = opcode; 1915 e->sent = 0; 1916 e->psn = psn & IPATH_PSN_MASK; 1917 qp->r_msn++; 1918 qp->r_psn++; 1919 qp->r_state = opcode; 1920 qp->r_nak_state = 0; 1921 qp->r_head_ack_queue = next; 1922 1923 /* Schedule the send tasklet. */ 1924 ipath_schedule_send(qp); 1925 1926 goto unlock; 1927 } 1928 1929 default: 1930 /* NAK unknown opcodes. */ 1931 goto nack_inv; 1932 } 1933 qp->r_psn++; 1934 qp->r_state = opcode; 1935 qp->r_ack_psn = psn; 1936 qp->r_nak_state = 0; 1937 /* Send an ACK if requested or required. */ 1938 if (psn & (1 << 31)) 1939 goto send_ack; 1940 goto done; 1941 1942rnr_nak: 1943 qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer; 1944 qp->r_ack_psn = qp->r_psn; 1945 goto send_ack; 1946 1947nack_inv_unlck: 1948 spin_unlock_irqrestore(&qp->s_lock, flags); 1949nack_inv: 1950 ipath_rc_error(qp, IB_WC_LOC_QP_OP_ERR); 1951 qp->r_nak_state = IB_NAK_INVALID_REQUEST; 1952 qp->r_ack_psn = qp->r_psn; 1953 goto send_ack; 1954 1955nack_acc_unlck: 1956 spin_unlock_irqrestore(&qp->s_lock, flags); 1957nack_acc: 1958 ipath_rc_error(qp, IB_WC_LOC_PROT_ERR); 1959 qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR; 1960 qp->r_ack_psn = qp->r_psn; 1961send_ack: 1962 send_rc_ack(qp); 1963 goto done; 1964 1965unlock: 1966 spin_unlock_irqrestore(&qp->s_lock, flags); 1967done: 1968 return; 1969} 1970