1/* 2 * IBM eServer eHCA Infiniband device driver for Linux on POWER 3 * 4 * post_send/recv, poll_cq, req_notify 5 * 6 * Authors: Hoang-Nam Nguyen <hnguyen@de.ibm.com> 7 * Waleri Fomin <fomin@de.ibm.com> 8 * Joachim Fenkes <fenkes@de.ibm.com> 9 * Reinhard Ernst <rernst@de.ibm.com> 10 * 11 * Copyright (c) 2005 IBM Corporation 12 * 13 * All rights reserved. 14 * 15 * This source code is distributed under a dual license of GPL v2.0 and OpenIB 16 * BSD. 17 * 18 * OpenIB BSD License 19 * 20 * Redistribution and use in source and binary forms, with or without 21 * modification, are permitted provided that the following conditions are met: 22 * 23 * Redistributions of source code must retain the above copyright notice, this 24 * list of conditions and the following disclaimer. 25 * 26 * Redistributions in binary form must reproduce the above copyright notice, 27 * this list of conditions and the following disclaimer in the documentation 28 * and/or other materials 29 * provided with the distribution. 30 * 31 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 32 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 34 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 35 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 36 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 37 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 38 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER 39 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 40 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 41 * POSSIBILITY OF SUCH DAMAGE. 42 */ 43 44 45#include "ehca_classes.h" 46#include "ehca_tools.h" 47#include "ehca_qes.h" 48#include "ehca_iverbs.h" 49#include "hcp_if.h" 50#include "hipz_fns.h" 51 52/* in RC traffic, insert an empty RDMA READ every this many packets */ 53#define ACK_CIRC_THRESHOLD 2000000 54 55static u64 replace_wr_id(u64 wr_id, u16 idx) 56{ 57 u64 ret; 58 59 ret = wr_id & ~QMAP_IDX_MASK; 60 ret |= idx & QMAP_IDX_MASK; 61 62 return ret; 63} 64 65static u16 get_app_wr_id(u64 wr_id) 66{ 67 return wr_id & QMAP_IDX_MASK; 68} 69 70static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue, 71 struct ehca_wqe *wqe_p, 72 struct ib_recv_wr *recv_wr, 73 u32 rq_map_idx) 74{ 75 u8 cnt_ds; 76 if (unlikely((recv_wr->num_sge < 0) || 77 (recv_wr->num_sge > ipz_rqueue->act_nr_of_sg))) { 78 ehca_gen_err("Invalid number of WQE SGE. " 79 "num_sqe=%x max_nr_of_sg=%x", 80 recv_wr->num_sge, ipz_rqueue->act_nr_of_sg); 81 return -EINVAL; /* invalid SG list length */ 82 } 83 84 /* clear wqe header until sglist */ 85 memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list)); 86 87 wqe_p->work_request_id = replace_wr_id(recv_wr->wr_id, rq_map_idx); 88 wqe_p->nr_of_data_seg = recv_wr->num_sge; 89 90 for (cnt_ds = 0; cnt_ds < recv_wr->num_sge; cnt_ds++) { 91 wqe_p->u.all_rcv.sg_list[cnt_ds].vaddr = 92 recv_wr->sg_list[cnt_ds].addr; 93 wqe_p->u.all_rcv.sg_list[cnt_ds].lkey = 94 recv_wr->sg_list[cnt_ds].lkey; 95 wqe_p->u.all_rcv.sg_list[cnt_ds].length = 96 recv_wr->sg_list[cnt_ds].length; 97 } 98 99 if (ehca_debug_level >= 3) { 100 ehca_gen_dbg("RECEIVE WQE written into ipz_rqueue=%p", 101 ipz_rqueue); 102 ehca_dmp(wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "recv wqe"); 103 } 104 105 return 0; 106} 107 108#if defined(DEBUG_GSI_SEND_WR) 109 110/* need ib_mad struct */ 111#include <rdma/ib_mad.h> 112 113static void trace_send_wr_ud(const struct ib_send_wr *send_wr) 114{ 115 int idx; 116 int j; 117 while (send_wr) { 118 struct ib_mad_hdr *mad_hdr = send_wr->wr.ud.mad_hdr; 119 struct ib_sge *sge = send_wr->sg_list; 120 ehca_gen_dbg("send_wr#%x wr_id=%lx num_sge=%x " 121 "send_flags=%x opcode=%x", idx, send_wr->wr_id, 122 send_wr->num_sge, send_wr->send_flags, 123 send_wr->opcode); 124 if (mad_hdr) { 125 ehca_gen_dbg("send_wr#%x mad_hdr base_version=%x " 126 "mgmt_class=%x class_version=%x method=%x " 127 "status=%x class_specific=%x tid=%lx " 128 "attr_id=%x resv=%x attr_mod=%x", 129 idx, mad_hdr->base_version, 130 mad_hdr->mgmt_class, 131 mad_hdr->class_version, mad_hdr->method, 132 mad_hdr->status, mad_hdr->class_specific, 133 mad_hdr->tid, mad_hdr->attr_id, 134 mad_hdr->resv, 135 mad_hdr->attr_mod); 136 } 137 for (j = 0; j < send_wr->num_sge; j++) { 138 u8 *data = __va(sge->addr); 139 ehca_gen_dbg("send_wr#%x sge#%x addr=%p length=%x " 140 "lkey=%x", 141 idx, j, data, sge->length, sge->lkey); 142 /* assume length is n*16 */ 143 ehca_dmp(data, sge->length, "send_wr#%x sge#%x", 144 idx, j); 145 sge++; 146 } /* eof for j */ 147 idx++; 148 send_wr = send_wr->next; 149 } /* eof while send_wr */ 150} 151 152#endif /* DEBUG_GSI_SEND_WR */ 153 154static inline int ehca_write_swqe(struct ehca_qp *qp, 155 struct ehca_wqe *wqe_p, 156 const struct ib_send_wr *send_wr, 157 u32 sq_map_idx, 158 int hidden) 159{ 160 u32 idx; 161 u64 dma_length; 162 struct ehca_av *my_av; 163 u32 remote_qkey = send_wr->wr.ud.remote_qkey; 164 struct ehca_qmap_entry *qmap_entry = &qp->sq_map.map[sq_map_idx]; 165 166 if (unlikely((send_wr->num_sge < 0) || 167 (send_wr->num_sge > qp->ipz_squeue.act_nr_of_sg))) { 168 ehca_gen_err("Invalid number of WQE SGE. " 169 "num_sqe=%x max_nr_of_sg=%x", 170 send_wr->num_sge, qp->ipz_squeue.act_nr_of_sg); 171 return -EINVAL; /* invalid SG list length */ 172 } 173 174 /* clear wqe header until sglist */ 175 memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list)); 176 177 wqe_p->work_request_id = replace_wr_id(send_wr->wr_id, sq_map_idx); 178 179 qmap_entry->app_wr_id = get_app_wr_id(send_wr->wr_id); 180 qmap_entry->reported = 0; 181 qmap_entry->cqe_req = 0; 182 183 switch (send_wr->opcode) { 184 case IB_WR_SEND: 185 case IB_WR_SEND_WITH_IMM: 186 wqe_p->optype = WQE_OPTYPE_SEND; 187 break; 188 case IB_WR_RDMA_WRITE: 189 case IB_WR_RDMA_WRITE_WITH_IMM: 190 wqe_p->optype = WQE_OPTYPE_RDMAWRITE; 191 break; 192 case IB_WR_RDMA_READ: 193 wqe_p->optype = WQE_OPTYPE_RDMAREAD; 194 break; 195 default: 196 ehca_gen_err("Invalid opcode=%x", send_wr->opcode); 197 return -EINVAL; /* invalid opcode */ 198 } 199 200 wqe_p->wqef = (send_wr->opcode) & WQEF_HIGH_NIBBLE; 201 202 wqe_p->wr_flag = 0; 203 204 if ((send_wr->send_flags & IB_SEND_SIGNALED || 205 qp->init_attr.sq_sig_type == IB_SIGNAL_ALL_WR) 206 && !hidden) { 207 wqe_p->wr_flag |= WQE_WRFLAG_REQ_SIGNAL_COM; 208 qmap_entry->cqe_req = 1; 209 } 210 211 if (send_wr->opcode == IB_WR_SEND_WITH_IMM || 212 send_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) { 213 /* this might not work as long as HW does not support it */ 214 wqe_p->immediate_data = be32_to_cpu(send_wr->ex.imm_data); 215 wqe_p->wr_flag |= WQE_WRFLAG_IMM_DATA_PRESENT; 216 } 217 218 wqe_p->nr_of_data_seg = send_wr->num_sge; 219 220 switch (qp->qp_type) { 221 case IB_QPT_SMI: 222 case IB_QPT_GSI: 223 /* no break is intential here */ 224 case IB_QPT_UD: 225 /* IB 1.2 spec C10-15 compliance */ 226 if (send_wr->wr.ud.remote_qkey & 0x80000000) 227 remote_qkey = qp->qkey; 228 229 wqe_p->destination_qp_number = send_wr->wr.ud.remote_qpn << 8; 230 wqe_p->local_ee_context_qkey = remote_qkey; 231 if (unlikely(!send_wr->wr.ud.ah)) { 232 ehca_gen_err("wr.ud.ah is NULL. qp=%p", qp); 233 return -EINVAL; 234 } 235 if (unlikely(send_wr->wr.ud.remote_qpn == 0)) { 236 ehca_gen_err("dest QP# is 0. qp=%x", qp->real_qp_num); 237 return -EINVAL; 238 } 239 my_av = container_of(send_wr->wr.ud.ah, struct ehca_av, ib_ah); 240 wqe_p->u.ud_av.ud_av = my_av->av; 241 242 /* 243 * omitted check of IB_SEND_INLINE 244 * since HW does not support it 245 */ 246 for (idx = 0; idx < send_wr->num_sge; idx++) { 247 wqe_p->u.ud_av.sg_list[idx].vaddr = 248 send_wr->sg_list[idx].addr; 249 wqe_p->u.ud_av.sg_list[idx].lkey = 250 send_wr->sg_list[idx].lkey; 251 wqe_p->u.ud_av.sg_list[idx].length = 252 send_wr->sg_list[idx].length; 253 } /* eof for idx */ 254 if (qp->qp_type == IB_QPT_SMI || 255 qp->qp_type == IB_QPT_GSI) 256 wqe_p->u.ud_av.ud_av.pmtu = 1; 257 if (qp->qp_type == IB_QPT_GSI) { 258 wqe_p->pkeyi = send_wr->wr.ud.pkey_index; 259#ifdef DEBUG_GSI_SEND_WR 260 trace_send_wr_ud(send_wr); 261#endif /* DEBUG_GSI_SEND_WR */ 262 } 263 break; 264 265 case IB_QPT_UC: 266 if (send_wr->send_flags & IB_SEND_FENCE) 267 wqe_p->wr_flag |= WQE_WRFLAG_FENCE; 268 /* no break is intentional here */ 269 case IB_QPT_RC: 270 /* TODO: atomic not implemented */ 271 wqe_p->u.nud.remote_virtual_address = 272 send_wr->wr.rdma.remote_addr; 273 wqe_p->u.nud.rkey = send_wr->wr.rdma.rkey; 274 275 /* 276 * omitted checking of IB_SEND_INLINE 277 * since HW does not support it 278 */ 279 dma_length = 0; 280 for (idx = 0; idx < send_wr->num_sge; idx++) { 281 wqe_p->u.nud.sg_list[idx].vaddr = 282 send_wr->sg_list[idx].addr; 283 wqe_p->u.nud.sg_list[idx].lkey = 284 send_wr->sg_list[idx].lkey; 285 wqe_p->u.nud.sg_list[idx].length = 286 send_wr->sg_list[idx].length; 287 dma_length += send_wr->sg_list[idx].length; 288 } /* eof idx */ 289 wqe_p->u.nud.atomic_1st_op_dma_len = dma_length; 290 291 /* unsolicited ack circumvention */ 292 if (send_wr->opcode == IB_WR_RDMA_READ) { 293 /* on RDMA read, switch on and reset counters */ 294 qp->message_count = qp->packet_count = 0; 295 qp->unsol_ack_circ = 1; 296 } else 297 /* else estimate #packets */ 298 qp->packet_count += (dma_length >> qp->mtu_shift) + 1; 299 300 break; 301 302 default: 303 ehca_gen_err("Invalid qptype=%x", qp->qp_type); 304 return -EINVAL; 305 } 306 307 if (ehca_debug_level >= 3) { 308 ehca_gen_dbg("SEND WQE written into queue qp=%p ", qp); 309 ehca_dmp( wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "send wqe"); 310 } 311 return 0; 312} 313 314/* map_ib_wc_status converts raw cqe_status to ib_wc_status */ 315static inline void map_ib_wc_status(u32 cqe_status, 316 enum ib_wc_status *wc_status) 317{ 318 if (unlikely(cqe_status & WC_STATUS_ERROR_BIT)) { 319 switch (cqe_status & 0x3F) { 320 case 0x01: 321 case 0x21: 322 *wc_status = IB_WC_LOC_LEN_ERR; 323 break; 324 case 0x02: 325 case 0x22: 326 *wc_status = IB_WC_LOC_QP_OP_ERR; 327 break; 328 case 0x03: 329 case 0x23: 330 *wc_status = IB_WC_LOC_EEC_OP_ERR; 331 break; 332 case 0x04: 333 case 0x24: 334 *wc_status = IB_WC_LOC_PROT_ERR; 335 break; 336 case 0x05: 337 case 0x25: 338 *wc_status = IB_WC_WR_FLUSH_ERR; 339 break; 340 case 0x06: 341 *wc_status = IB_WC_MW_BIND_ERR; 342 break; 343 case 0x07: /* remote error - look into bits 20:24 */ 344 switch ((cqe_status 345 & WC_STATUS_REMOTE_ERROR_FLAGS) >> 11) { 346 case 0x0: 347 /* 348 * PSN Sequence Error! 349 * couldn't find a matching status! 350 */ 351 *wc_status = IB_WC_GENERAL_ERR; 352 break; 353 case 0x1: 354 *wc_status = IB_WC_REM_INV_REQ_ERR; 355 break; 356 case 0x2: 357 *wc_status = IB_WC_REM_ACCESS_ERR; 358 break; 359 case 0x3: 360 *wc_status = IB_WC_REM_OP_ERR; 361 break; 362 case 0x4: 363 *wc_status = IB_WC_REM_INV_RD_REQ_ERR; 364 break; 365 } 366 break; 367 case 0x08: 368 *wc_status = IB_WC_RETRY_EXC_ERR; 369 break; 370 case 0x09: 371 *wc_status = IB_WC_RNR_RETRY_EXC_ERR; 372 break; 373 case 0x0A: 374 case 0x2D: 375 *wc_status = IB_WC_REM_ABORT_ERR; 376 break; 377 case 0x0B: 378 case 0x2E: 379 *wc_status = IB_WC_INV_EECN_ERR; 380 break; 381 case 0x0C: 382 case 0x2F: 383 *wc_status = IB_WC_INV_EEC_STATE_ERR; 384 break; 385 case 0x0D: 386 *wc_status = IB_WC_BAD_RESP_ERR; 387 break; 388 case 0x10: 389 /* WQE purged */ 390 *wc_status = IB_WC_WR_FLUSH_ERR; 391 break; 392 default: 393 *wc_status = IB_WC_FATAL_ERR; 394 395 } 396 } else 397 *wc_status = IB_WC_SUCCESS; 398} 399 400static inline int post_one_send(struct ehca_qp *my_qp, 401 struct ib_send_wr *cur_send_wr, 402 int hidden) 403{ 404 struct ehca_wqe *wqe_p; 405 int ret; 406 u32 sq_map_idx; 407 u64 start_offset = my_qp->ipz_squeue.current_q_offset; 408 409 /* get pointer next to free WQE */ 410 wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue); 411 if (unlikely(!wqe_p)) { 412 /* too many posted work requests: queue overflow */ 413 ehca_err(my_qp->ib_qp.device, "Too many posted WQEs " 414 "qp_num=%x", my_qp->ib_qp.qp_num); 415 return -ENOMEM; 416 } 417 418 /* 419 * Get the index of the WQE in the send queue. The same index is used 420 * for writing into the sq_map. 421 */ 422 sq_map_idx = start_offset / my_qp->ipz_squeue.qe_size; 423 424 /* write a SEND WQE into the QUEUE */ 425 ret = ehca_write_swqe(my_qp, wqe_p, cur_send_wr, sq_map_idx, hidden); 426 /* 427 * if something failed, 428 * reset the free entry pointer to the start value 429 */ 430 if (unlikely(ret)) { 431 my_qp->ipz_squeue.current_q_offset = start_offset; 432 ehca_err(my_qp->ib_qp.device, "Could not write WQE " 433 "qp_num=%x", my_qp->ib_qp.qp_num); 434 return -EINVAL; 435 } 436 437 return 0; 438} 439 440int ehca_post_send(struct ib_qp *qp, 441 struct ib_send_wr *send_wr, 442 struct ib_send_wr **bad_send_wr) 443{ 444 struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp); 445 int wqe_cnt = 0; 446 int ret = 0; 447 unsigned long flags; 448 449 /* Reject WR if QP is in RESET, INIT or RTR state */ 450 if (unlikely(my_qp->state < IB_QPS_RTS)) { 451 ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x", 452 my_qp->state, qp->qp_num); 453 ret = -EINVAL; 454 goto out; 455 } 456 457 /* LOCK the QUEUE */ 458 spin_lock_irqsave(&my_qp->spinlock_s, flags); 459 460 /* Send an empty extra RDMA read if: 461 * 1) there has been an RDMA read on this connection before 462 * 2) no RDMA read occurred for ACK_CIRC_THRESHOLD link packets 463 * 3) we can be sure that any previous extra RDMA read has been 464 * processed so we don't overflow the SQ 465 */ 466 if (unlikely(my_qp->unsol_ack_circ && 467 my_qp->packet_count > ACK_CIRC_THRESHOLD && 468 my_qp->message_count > my_qp->init_attr.cap.max_send_wr)) { 469 /* insert an empty RDMA READ to fix up the remote QP state */ 470 struct ib_send_wr circ_wr; 471 memset(&circ_wr, 0, sizeof(circ_wr)); 472 circ_wr.opcode = IB_WR_RDMA_READ; 473 post_one_send(my_qp, &circ_wr, 1); /* ignore retcode */ 474 wqe_cnt++; 475 ehca_dbg(qp->device, "posted circ wr qp_num=%x", qp->qp_num); 476 my_qp->message_count = my_qp->packet_count = 0; 477 } 478 479 /* loop processes list of send reqs */ 480 while (send_wr) { 481 ret = post_one_send(my_qp, send_wr, 0); 482 if (unlikely(ret)) { 483 goto post_send_exit0; 484 } 485 wqe_cnt++; 486 send_wr = send_wr->next; 487 } 488 489post_send_exit0: 490 iosync(); /* serialize GAL register access */ 491 hipz_update_sqa(my_qp, wqe_cnt); 492 if (unlikely(ret || ehca_debug_level >= 2)) 493 ehca_dbg(qp->device, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i", 494 my_qp, qp->qp_num, wqe_cnt, ret); 495 my_qp->message_count += wqe_cnt; 496 spin_unlock_irqrestore(&my_qp->spinlock_s, flags); 497 498out: 499 if (ret) 500 *bad_send_wr = send_wr; 501 return ret; 502} 503 504static int internal_post_recv(struct ehca_qp *my_qp, 505 struct ib_device *dev, 506 struct ib_recv_wr *recv_wr, 507 struct ib_recv_wr **bad_recv_wr) 508{ 509 struct ehca_wqe *wqe_p; 510 int wqe_cnt = 0; 511 int ret = 0; 512 u32 rq_map_idx; 513 unsigned long flags; 514 struct ehca_qmap_entry *qmap_entry; 515 516 if (unlikely(!HAS_RQ(my_qp))) { 517 ehca_err(dev, "QP has no RQ ehca_qp=%p qp_num=%x ext_type=%d", 518 my_qp, my_qp->real_qp_num, my_qp->ext_type); 519 ret = -ENODEV; 520 goto out; 521 } 522 523 /* LOCK the QUEUE */ 524 spin_lock_irqsave(&my_qp->spinlock_r, flags); 525 526 /* loop processes list of recv reqs */ 527 while (recv_wr) { 528 u64 start_offset = my_qp->ipz_rqueue.current_q_offset; 529 /* get pointer next to free WQE */ 530 wqe_p = ipz_qeit_get_inc(&my_qp->ipz_rqueue); 531 if (unlikely(!wqe_p)) { 532 /* too many posted work requests: queue overflow */ 533 ret = -ENOMEM; 534 ehca_err(dev, "Too many posted WQEs " 535 "qp_num=%x", my_qp->real_qp_num); 536 goto post_recv_exit0; 537 } 538 /* 539 * Get the index of the WQE in the recv queue. The same index 540 * is used for writing into the rq_map. 541 */ 542 rq_map_idx = start_offset / my_qp->ipz_rqueue.qe_size; 543 544 /* write a RECV WQE into the QUEUE */ 545 ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, recv_wr, 546 rq_map_idx); 547 /* 548 * if something failed, 549 * reset the free entry pointer to the start value 550 */ 551 if (unlikely(ret)) { 552 my_qp->ipz_rqueue.current_q_offset = start_offset; 553 ret = -EINVAL; 554 ehca_err(dev, "Could not write WQE " 555 "qp_num=%x", my_qp->real_qp_num); 556 goto post_recv_exit0; 557 } 558 559 qmap_entry = &my_qp->rq_map.map[rq_map_idx]; 560 qmap_entry->app_wr_id = get_app_wr_id(recv_wr->wr_id); 561 qmap_entry->reported = 0; 562 qmap_entry->cqe_req = 1; 563 564 wqe_cnt++; 565 recv_wr = recv_wr->next; 566 } /* eof for recv_wr */ 567 568post_recv_exit0: 569 iosync(); /* serialize GAL register access */ 570 hipz_update_rqa(my_qp, wqe_cnt); 571 if (unlikely(ret || ehca_debug_level >= 2)) 572 ehca_dbg(dev, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i", 573 my_qp, my_qp->real_qp_num, wqe_cnt, ret); 574 spin_unlock_irqrestore(&my_qp->spinlock_r, flags); 575 576out: 577 if (ret) 578 *bad_recv_wr = recv_wr; 579 580 return ret; 581} 582 583int ehca_post_recv(struct ib_qp *qp, 584 struct ib_recv_wr *recv_wr, 585 struct ib_recv_wr **bad_recv_wr) 586{ 587 struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp); 588 589 /* Reject WR if QP is in RESET state */ 590 if (unlikely(my_qp->state == IB_QPS_RESET)) { 591 ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x", 592 my_qp->state, qp->qp_num); 593 *bad_recv_wr = recv_wr; 594 return -EINVAL; 595 } 596 597 return internal_post_recv(my_qp, qp->device, recv_wr, bad_recv_wr); 598} 599 600int ehca_post_srq_recv(struct ib_srq *srq, 601 struct ib_recv_wr *recv_wr, 602 struct ib_recv_wr **bad_recv_wr) 603{ 604 return internal_post_recv(container_of(srq, struct ehca_qp, ib_srq), 605 srq->device, recv_wr, bad_recv_wr); 606} 607 608/* 609 * ib_wc_opcode table converts ehca wc opcode to ib 610 * Since we use zero to indicate invalid opcode, the actual ib opcode must 611 * be decremented!!! 612 */ 613static const u8 ib_wc_opcode[255] = { 614 [0x01] = IB_WC_RECV+1, 615 [0x02] = IB_WC_RECV_RDMA_WITH_IMM+1, 616 [0x04] = IB_WC_BIND_MW+1, 617 [0x08] = IB_WC_FETCH_ADD+1, 618 [0x10] = IB_WC_COMP_SWAP+1, 619 [0x20] = IB_WC_RDMA_WRITE+1, 620 [0x40] = IB_WC_RDMA_READ+1, 621 [0x80] = IB_WC_SEND+1 622}; 623 624/* internal function to poll one entry of cq */ 625static inline int ehca_poll_cq_one(struct ib_cq *cq, struct ib_wc *wc) 626{ 627 int ret = 0, qmap_tail_idx; 628 struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); 629 struct ehca_cqe *cqe; 630 struct ehca_qp *my_qp; 631 struct ehca_qmap_entry *qmap_entry; 632 struct ehca_queue_map *qmap; 633 int cqe_count = 0, is_error; 634 635repoll: 636 cqe = (struct ehca_cqe *) 637 ipz_qeit_get_inc_valid(&my_cq->ipz_queue); 638 if (!cqe) { 639 ret = -EAGAIN; 640 if (ehca_debug_level >= 3) 641 ehca_dbg(cq->device, "Completion queue is empty " 642 "my_cq=%p cq_num=%x", my_cq, my_cq->cq_number); 643 goto poll_cq_one_exit0; 644 } 645 646 /* prevents loads being reordered across this point */ 647 rmb(); 648 649 cqe_count++; 650 if (unlikely(cqe->status & WC_STATUS_PURGE_BIT)) { 651 struct ehca_qp *qp; 652 int purgeflag; 653 unsigned long flags; 654 655 qp = ehca_cq_get_qp(my_cq, cqe->local_qp_number); 656 if (!qp) { 657 ehca_err(cq->device, "cq_num=%x qp_num=%x " 658 "could not find qp -> ignore cqe", 659 my_cq->cq_number, cqe->local_qp_number); 660 ehca_dmp(cqe, 64, "cq_num=%x qp_num=%x", 661 my_cq->cq_number, cqe->local_qp_number); 662 /* ignore this purged cqe */ 663 goto repoll; 664 } 665 spin_lock_irqsave(&qp->spinlock_s, flags); 666 purgeflag = qp->sqerr_purgeflag; 667 spin_unlock_irqrestore(&qp->spinlock_s, flags); 668 669 if (purgeflag) { 670 ehca_dbg(cq->device, 671 "Got CQE with purged bit qp_num=%x src_qp=%x", 672 cqe->local_qp_number, cqe->remote_qp_number); 673 if (ehca_debug_level >= 2) 674 ehca_dmp(cqe, 64, "qp_num=%x src_qp=%x", 675 cqe->local_qp_number, 676 cqe->remote_qp_number); 677 /* 678 * ignore this to avoid double cqes of bad wqe 679 * that caused sqe and turn off purge flag 680 */ 681 qp->sqerr_purgeflag = 0; 682 goto repoll; 683 } 684 } 685 686 is_error = cqe->status & WC_STATUS_ERROR_BIT; 687 688 /* trace error CQEs if debug_level >= 1, trace all CQEs if >= 3 */ 689 if (unlikely(ehca_debug_level >= 3 || (ehca_debug_level && is_error))) { 690 ehca_dbg(cq->device, 691 "Received %sCOMPLETION ehca_cq=%p cq_num=%x -----", 692 is_error ? "ERROR " : "", my_cq, my_cq->cq_number); 693 ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x", 694 my_cq, my_cq->cq_number); 695 ehca_dbg(cq->device, 696 "ehca_cq=%p cq_num=%x -------------------------", 697 my_cq, my_cq->cq_number); 698 } 699 700 read_lock(&ehca_qp_idr_lock); 701 my_qp = idr_find(&ehca_qp_idr, cqe->qp_token); 702 read_unlock(&ehca_qp_idr_lock); 703 if (!my_qp) 704 goto repoll; 705 wc->qp = &my_qp->ib_qp; 706 707 qmap_tail_idx = get_app_wr_id(cqe->work_request_id); 708 if (!(cqe->w_completion_flags & WC_SEND_RECEIVE_BIT)) 709 /* We got a send completion. */ 710 qmap = &my_qp->sq_map; 711 else 712 /* We got a receive completion. */ 713 qmap = &my_qp->rq_map; 714 715 /* advance the tail pointer */ 716 qmap->tail = qmap_tail_idx; 717 718 if (is_error) { 719 /* 720 * set left_to_poll to 0 because in error state, we will not 721 * get any additional CQEs 722 */ 723 my_qp->sq_map.next_wqe_idx = next_index(my_qp->sq_map.tail, 724 my_qp->sq_map.entries); 725 my_qp->sq_map.left_to_poll = 0; 726 ehca_add_to_err_list(my_qp, 1); 727 728 my_qp->rq_map.next_wqe_idx = next_index(my_qp->rq_map.tail, 729 my_qp->rq_map.entries); 730 my_qp->rq_map.left_to_poll = 0; 731 if (HAS_RQ(my_qp)) 732 ehca_add_to_err_list(my_qp, 0); 733 } 734 735 qmap_entry = &qmap->map[qmap_tail_idx]; 736 if (qmap_entry->reported) { 737 ehca_warn(cq->device, "Double cqe on qp_num=%#x", 738 my_qp->real_qp_num); 739 /* found a double cqe, discard it and read next one */ 740 goto repoll; 741 } 742 743 wc->wr_id = replace_wr_id(cqe->work_request_id, qmap_entry->app_wr_id); 744 qmap_entry->reported = 1; 745 746 /* if left_to_poll is decremented to 0, add the QP to the error list */ 747 if (qmap->left_to_poll > 0) { 748 qmap->left_to_poll--; 749 if ((my_qp->sq_map.left_to_poll == 0) && 750 (my_qp->rq_map.left_to_poll == 0)) { 751 ehca_add_to_err_list(my_qp, 1); 752 if (HAS_RQ(my_qp)) 753 ehca_add_to_err_list(my_qp, 0); 754 } 755 } 756 757 /* eval ib_wc_opcode */ 758 wc->opcode = ib_wc_opcode[cqe->optype]-1; 759 if (unlikely(wc->opcode == -1)) { 760 ehca_err(cq->device, "Invalid cqe->OPType=%x cqe->status=%x " 761 "ehca_cq=%p cq_num=%x", 762 cqe->optype, cqe->status, my_cq, my_cq->cq_number); 763 /* dump cqe for other infos */ 764 ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x", 765 my_cq, my_cq->cq_number); 766 /* update also queue adder to throw away this entry!!! */ 767 goto repoll; 768 } 769 770 /* eval ib_wc_status */ 771 if (unlikely(is_error)) { 772 /* complete with errors */ 773 map_ib_wc_status(cqe->status, &wc->status); 774 wc->vendor_err = wc->status; 775 } else 776 wc->status = IB_WC_SUCCESS; 777 778 wc->byte_len = cqe->nr_bytes_transferred; 779 wc->pkey_index = cqe->pkey_index; 780 wc->slid = cqe->rlid; 781 wc->dlid_path_bits = cqe->dlid; 782 wc->src_qp = cqe->remote_qp_number; 783 /* 784 * HW has "Immed data present" and "GRH present" in bits 6 and 5. 785 * SW defines those in bits 1 and 0, so we can just shift and mask. 786 */ 787 wc->wc_flags = (cqe->w_completion_flags >> 5) & 3; 788 wc->ex.imm_data = cpu_to_be32(cqe->immediate_data); 789 wc->sl = cqe->service_level; 790 791poll_cq_one_exit0: 792 if (cqe_count > 0) 793 hipz_update_feca(my_cq, cqe_count); 794 795 return ret; 796} 797 798static int generate_flush_cqes(struct ehca_qp *my_qp, struct ib_cq *cq, 799 struct ib_wc *wc, int num_entries, 800 struct ipz_queue *ipz_queue, int on_sq) 801{ 802 int nr = 0; 803 struct ehca_wqe *wqe; 804 u64 offset; 805 struct ehca_queue_map *qmap; 806 struct ehca_qmap_entry *qmap_entry; 807 808 if (on_sq) 809 qmap = &my_qp->sq_map; 810 else 811 qmap = &my_qp->rq_map; 812 813 qmap_entry = &qmap->map[qmap->next_wqe_idx]; 814 815 while ((nr < num_entries) && (qmap_entry->reported == 0)) { 816 /* generate flush CQE */ 817 818 memset(wc, 0, sizeof(*wc)); 819 820 offset = qmap->next_wqe_idx * ipz_queue->qe_size; 821 wqe = (struct ehca_wqe *)ipz_qeit_calc(ipz_queue, offset); 822 if (!wqe) { 823 ehca_err(cq->device, "Invalid wqe offset=%#llx on " 824 "qp_num=%#x", offset, my_qp->real_qp_num); 825 return nr; 826 } 827 828 wc->wr_id = replace_wr_id(wqe->work_request_id, 829 qmap_entry->app_wr_id); 830 831 if (on_sq) { 832 switch (wqe->optype) { 833 case WQE_OPTYPE_SEND: 834 wc->opcode = IB_WC_SEND; 835 break; 836 case WQE_OPTYPE_RDMAWRITE: 837 wc->opcode = IB_WC_RDMA_WRITE; 838 break; 839 case WQE_OPTYPE_RDMAREAD: 840 wc->opcode = IB_WC_RDMA_READ; 841 break; 842 default: 843 ehca_err(cq->device, "Invalid optype=%x", 844 wqe->optype); 845 return nr; 846 } 847 } else 848 wc->opcode = IB_WC_RECV; 849 850 if (wqe->wr_flag & WQE_WRFLAG_IMM_DATA_PRESENT) { 851 wc->ex.imm_data = wqe->immediate_data; 852 wc->wc_flags |= IB_WC_WITH_IMM; 853 } 854 855 wc->status = IB_WC_WR_FLUSH_ERR; 856 857 wc->qp = &my_qp->ib_qp; 858 859 /* mark as reported and advance next_wqe pointer */ 860 qmap_entry->reported = 1; 861 qmap->next_wqe_idx = next_index(qmap->next_wqe_idx, 862 qmap->entries); 863 qmap_entry = &qmap->map[qmap->next_wqe_idx]; 864 865 wc++; nr++; 866 } 867 868 return nr; 869 870} 871 872int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc) 873{ 874 struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); 875 int nr; 876 struct ehca_qp *err_qp; 877 struct ib_wc *current_wc = wc; 878 int ret = 0; 879 unsigned long flags; 880 int entries_left = num_entries; 881 882 if (num_entries < 1) { 883 ehca_err(cq->device, "Invalid num_entries=%d ehca_cq=%p " 884 "cq_num=%x", num_entries, my_cq, my_cq->cq_number); 885 ret = -EINVAL; 886 goto poll_cq_exit0; 887 } 888 889 spin_lock_irqsave(&my_cq->spinlock, flags); 890 891 /* generate flush cqes for send queues */ 892 list_for_each_entry(err_qp, &my_cq->sqp_err_list, sq_err_node) { 893 nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left, 894 &err_qp->ipz_squeue, 1); 895 entries_left -= nr; 896 current_wc += nr; 897 898 if (entries_left == 0) 899 break; 900 } 901 902 /* generate flush cqes for receive queues */ 903 list_for_each_entry(err_qp, &my_cq->rqp_err_list, rq_err_node) { 904 nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left, 905 &err_qp->ipz_rqueue, 0); 906 entries_left -= nr; 907 current_wc += nr; 908 909 if (entries_left == 0) 910 break; 911 } 912 913 for (nr = 0; nr < entries_left; nr++) { 914 ret = ehca_poll_cq_one(cq, current_wc); 915 if (ret) 916 break; 917 current_wc++; 918 } /* eof for nr */ 919 entries_left -= nr; 920 921 spin_unlock_irqrestore(&my_cq->spinlock, flags); 922 if (ret == -EAGAIN || !ret) 923 ret = num_entries - entries_left; 924 925poll_cq_exit0: 926 return ret; 927} 928 929int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags) 930{ 931 struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); 932 int ret = 0; 933 934 switch (notify_flags & IB_CQ_SOLICITED_MASK) { 935 case IB_CQ_SOLICITED: 936 hipz_set_cqx_n0(my_cq, 1); 937 break; 938 case IB_CQ_NEXT_COMP: 939 hipz_set_cqx_n1(my_cq, 1); 940 break; 941 default: 942 return -EINVAL; 943 } 944 945 if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) { 946 unsigned long spl_flags; 947 spin_lock_irqsave(&my_cq->spinlock, spl_flags); 948 ret = ipz_qeit_is_valid(&my_cq->ipz_queue); 949 spin_unlock_irqrestore(&my_cq->spinlock, spl_flags); 950 } 951 952 return ret; 953} 954