1/******************************************************************************* 2 * 3 * Intel Ethernet Controller XL710 Family Linux Driver 4 * Copyright(c) 2013 - 2014 Intel Corporation. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along 16 * with this program. If not, see <http://www.gnu.org/licenses/>. 17 * 18 * The full GNU General Public License is included in this distribution in 19 * the file called "COPYING". 20 * 21 * Contact Information: 22 * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> 23 * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 24 * 25 ******************************************************************************/ 26 27#include <linux/prefetch.h> 28#include "i40e.h" 29#include "i40e_prototype.h" 30 31static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size, 32 u32 td_tag) 33{ 34 return cpu_to_le64(I40E_TX_DESC_DTYPE_DATA | 35 ((u64)td_cmd << I40E_TXD_QW1_CMD_SHIFT) | 36 ((u64)td_offset << I40E_TXD_QW1_OFFSET_SHIFT) | 37 ((u64)size << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) | 38 ((u64)td_tag << I40E_TXD_QW1_L2TAG1_SHIFT)); 39} 40 41#define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS) 42#define I40E_FD_CLEAN_DELAY 10 43/** 44 * i40e_program_fdir_filter - Program a Flow Director filter 45 * @fdir_data: Packet data that will be filter parameters 46 * @raw_packet: the pre-allocated packet buffer for FDir 47 * @pf: The pf pointer 48 * @add: True for add/update, False for remove 49 **/ 50int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data, u8 *raw_packet, 51 struct i40e_pf *pf, bool add) 52{ 53 struct i40e_filter_program_desc *fdir_desc; 54 struct i40e_tx_buffer *tx_buf, *first; 55 struct i40e_tx_desc *tx_desc; 56 struct i40e_ring *tx_ring; 57 unsigned int fpt, dcc; 58 struct i40e_vsi *vsi; 59 struct device *dev; 60 dma_addr_t dma; 61 u32 td_cmd = 0; 62 u16 delay = 0; 63 u16 i; 64 65 /* find existing FDIR VSI */ 66 vsi = NULL; 67 for (i = 0; i < pf->num_alloc_vsi; i++) 68 if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR) 69 vsi = pf->vsi[i]; 70 if (!vsi) 71 return -ENOENT; 72 73 tx_ring = vsi->tx_rings[0]; 74 dev = tx_ring->dev; 75 76 /* we need two descriptors to add/del a filter and we can wait */ 77 do { 78 if (I40E_DESC_UNUSED(tx_ring) > 1) 79 break; 80 msleep_interruptible(1); 81 delay++; 82 } while (delay < I40E_FD_CLEAN_DELAY); 83 84 if (!(I40E_DESC_UNUSED(tx_ring) > 1)) 85 return -EAGAIN; 86 87 dma = dma_map_single(dev, raw_packet, 88 I40E_FDIR_MAX_RAW_PACKET_SIZE, DMA_TO_DEVICE); 89 if (dma_mapping_error(dev, dma)) 90 goto dma_fail; 91 92 /* grab the next descriptor */ 93 i = tx_ring->next_to_use; 94 fdir_desc = I40E_TX_FDIRDESC(tx_ring, i); 95 first = &tx_ring->tx_bi[i]; 96 memset(first, 0, sizeof(struct i40e_tx_buffer)); 97 98 tx_ring->next_to_use = ((i + 1) < tx_ring->count) ? i + 1 : 0; 99 100 fpt = (fdir_data->q_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) & 101 I40E_TXD_FLTR_QW0_QINDEX_MASK; 102 103 fpt |= (fdir_data->flex_off << I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT) & 104 I40E_TXD_FLTR_QW0_FLEXOFF_MASK; 105 106 fpt |= (fdir_data->pctype << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) & 107 I40E_TXD_FLTR_QW0_PCTYPE_MASK; 108 109 /* Use LAN VSI Id if not programmed by user */ 110 if (fdir_data->dest_vsi == 0) 111 fpt |= (pf->vsi[pf->lan_vsi]->id) << 112 I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT; 113 else 114 fpt |= ((u32)fdir_data->dest_vsi << 115 I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT) & 116 I40E_TXD_FLTR_QW0_DEST_VSI_MASK; 117 118 dcc = I40E_TX_DESC_DTYPE_FILTER_PROG; 119 120 if (add) 121 dcc |= I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE << 122 I40E_TXD_FLTR_QW1_PCMD_SHIFT; 123 else 124 dcc |= I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE << 125 I40E_TXD_FLTR_QW1_PCMD_SHIFT; 126 127 dcc |= (fdir_data->dest_ctl << I40E_TXD_FLTR_QW1_DEST_SHIFT) & 128 I40E_TXD_FLTR_QW1_DEST_MASK; 129 130 dcc |= (fdir_data->fd_status << I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT) & 131 I40E_TXD_FLTR_QW1_FD_STATUS_MASK; 132 133 if (fdir_data->cnt_index != 0) { 134 dcc |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK; 135 dcc |= ((u32)fdir_data->cnt_index << 136 I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) & 137 I40E_TXD_FLTR_QW1_CNTINDEX_MASK; 138 } 139 140 fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(fpt); 141 fdir_desc->rsvd = cpu_to_le32(0); 142 fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dcc); 143 fdir_desc->fd_id = cpu_to_le32(fdir_data->fd_id); 144 145 /* Now program a dummy descriptor */ 146 i = tx_ring->next_to_use; 147 tx_desc = I40E_TX_DESC(tx_ring, i); 148 tx_buf = &tx_ring->tx_bi[i]; 149 150 tx_ring->next_to_use = ((i + 1) < tx_ring->count) ? i + 1 : 0; 151 152 memset(tx_buf, 0, sizeof(struct i40e_tx_buffer)); 153 154 /* record length, and DMA address */ 155 dma_unmap_len_set(tx_buf, len, I40E_FDIR_MAX_RAW_PACKET_SIZE); 156 dma_unmap_addr_set(tx_buf, dma, dma); 157 158 tx_desc->buffer_addr = cpu_to_le64(dma); 159 td_cmd = I40E_TXD_CMD | I40E_TX_DESC_CMD_DUMMY; 160 161 tx_buf->tx_flags = I40E_TX_FLAGS_FD_SB; 162 tx_buf->raw_buf = (void *)raw_packet; 163 164 tx_desc->cmd_type_offset_bsz = 165 build_ctob(td_cmd, 0, I40E_FDIR_MAX_RAW_PACKET_SIZE, 0); 166 167 /* set the timestamp */ 168 tx_buf->time_stamp = jiffies; 169 170 /* Force memory writes to complete before letting h/w 171 * know there are new descriptors to fetch. 172 */ 173 wmb(); 174 175 /* Mark the data descriptor to be watched */ 176 first->next_to_watch = tx_desc; 177 178 writel(tx_ring->next_to_use, tx_ring->tail); 179 return 0; 180 181dma_fail: 182 return -1; 183} 184 185#define IP_HEADER_OFFSET 14 186#define I40E_UDPIP_DUMMY_PACKET_LEN 42 187/** 188 * i40e_add_del_fdir_udpv4 - Add/Remove UDPv4 filters 189 * @vsi: pointer to the targeted VSI 190 * @fd_data: the flow director data required for the FDir descriptor 191 * @add: true adds a filter, false removes it 192 * 193 * Returns 0 if the filters were successfully added or removed 194 **/ 195static int i40e_add_del_fdir_udpv4(struct i40e_vsi *vsi, 196 struct i40e_fdir_filter *fd_data, 197 bool add) 198{ 199 struct i40e_pf *pf = vsi->back; 200 struct udphdr *udp; 201 struct iphdr *ip; 202 bool err = false; 203 u8 *raw_packet; 204 int ret; 205 static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0, 206 0x45, 0, 0, 0x1c, 0, 0, 0x40, 0, 0x40, 0x11, 0, 0, 0, 0, 0, 0, 207 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; 208 209 raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL); 210 if (!raw_packet) 211 return -ENOMEM; 212 memcpy(raw_packet, packet, I40E_UDPIP_DUMMY_PACKET_LEN); 213 214 ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET); 215 udp = (struct udphdr *)(raw_packet + IP_HEADER_OFFSET 216 + sizeof(struct iphdr)); 217 218 ip->daddr = fd_data->dst_ip[0]; 219 udp->dest = fd_data->dst_port; 220 ip->saddr = fd_data->src_ip[0]; 221 udp->source = fd_data->src_port; 222 223 fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_UDP; 224 ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add); 225 if (ret) { 226 dev_info(&pf->pdev->dev, 227 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n", 228 fd_data->pctype, fd_data->fd_id, ret); 229 err = true; 230 } else { 231 if (add) 232 dev_info(&pf->pdev->dev, 233 "Filter OK for PCTYPE %d loc = %d\n", 234 fd_data->pctype, fd_data->fd_id); 235 else 236 dev_info(&pf->pdev->dev, 237 "Filter deleted for PCTYPE %d loc = %d\n", 238 fd_data->pctype, fd_data->fd_id); 239 } 240 return err ? -EOPNOTSUPP : 0; 241} 242 243#define I40E_TCPIP_DUMMY_PACKET_LEN 54 244/** 245 * i40e_add_del_fdir_tcpv4 - Add/Remove TCPv4 filters 246 * @vsi: pointer to the targeted VSI 247 * @fd_data: the flow director data required for the FDir descriptor 248 * @add: true adds a filter, false removes it 249 * 250 * Returns 0 if the filters were successfully added or removed 251 **/ 252static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi, 253 struct i40e_fdir_filter *fd_data, 254 bool add) 255{ 256 struct i40e_pf *pf = vsi->back; 257 struct tcphdr *tcp; 258 struct iphdr *ip; 259 bool err = false; 260 u8 *raw_packet; 261 int ret; 262 /* Dummy packet */ 263 static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0, 264 0x45, 0, 0, 0x28, 0, 0, 0x40, 0, 0x40, 0x6, 0, 0, 0, 0, 0, 0, 265 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x80, 0x11, 266 0x0, 0x72, 0, 0, 0, 0}; 267 268 raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL); 269 if (!raw_packet) 270 return -ENOMEM; 271 memcpy(raw_packet, packet, I40E_TCPIP_DUMMY_PACKET_LEN); 272 273 ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET); 274 tcp = (struct tcphdr *)(raw_packet + IP_HEADER_OFFSET 275 + sizeof(struct iphdr)); 276 277 ip->daddr = fd_data->dst_ip[0]; 278 tcp->dest = fd_data->dst_port; 279 ip->saddr = fd_data->src_ip[0]; 280 tcp->source = fd_data->src_port; 281 282 if (add) { 283 pf->fd_tcp_rule++; 284 if (pf->flags & I40E_FLAG_FD_ATR_ENABLED) { 285 dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n"); 286 pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED; 287 } 288 } else { 289 pf->fd_tcp_rule = (pf->fd_tcp_rule > 0) ? 290 (pf->fd_tcp_rule - 1) : 0; 291 if (pf->fd_tcp_rule == 0) { 292 pf->flags |= I40E_FLAG_FD_ATR_ENABLED; 293 dev_info(&pf->pdev->dev, "ATR re-enabled due to no sideband TCP/IPv4 rules\n"); 294 } 295 } 296 297 fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP; 298 ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add); 299 300 if (ret) { 301 dev_info(&pf->pdev->dev, 302 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n", 303 fd_data->pctype, fd_data->fd_id, ret); 304 err = true; 305 } else { 306 if (add) 307 dev_info(&pf->pdev->dev, "Filter OK for PCTYPE %d loc = %d)\n", 308 fd_data->pctype, fd_data->fd_id); 309 else 310 dev_info(&pf->pdev->dev, 311 "Filter deleted for PCTYPE %d loc = %d\n", 312 fd_data->pctype, fd_data->fd_id); 313 } 314 315 return err ? -EOPNOTSUPP : 0; 316} 317 318/** 319 * i40e_add_del_fdir_sctpv4 - Add/Remove SCTPv4 Flow Director filters for 320 * a specific flow spec 321 * @vsi: pointer to the targeted VSI 322 * @fd_data: the flow director data required for the FDir descriptor 323 * @add: true adds a filter, false removes it 324 * 325 * Always returns -EOPNOTSUPP 326 **/ 327static int i40e_add_del_fdir_sctpv4(struct i40e_vsi *vsi, 328 struct i40e_fdir_filter *fd_data, 329 bool add) 330{ 331 return -EOPNOTSUPP; 332} 333 334#define I40E_IP_DUMMY_PACKET_LEN 34 335/** 336 * i40e_add_del_fdir_ipv4 - Add/Remove IPv4 Flow Director filters for 337 * a specific flow spec 338 * @vsi: pointer to the targeted VSI 339 * @fd_data: the flow director data required for the FDir descriptor 340 * @add: true adds a filter, false removes it 341 * 342 * Returns 0 if the filters were successfully added or removed 343 **/ 344static int i40e_add_del_fdir_ipv4(struct i40e_vsi *vsi, 345 struct i40e_fdir_filter *fd_data, 346 bool add) 347{ 348 struct i40e_pf *pf = vsi->back; 349 struct iphdr *ip; 350 bool err = false; 351 u8 *raw_packet; 352 int ret; 353 int i; 354 static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0, 355 0x45, 0, 0, 0x14, 0, 0, 0x40, 0, 0x40, 0x10, 0, 0, 0, 0, 0, 0, 356 0, 0, 0, 0}; 357 358 for (i = I40E_FILTER_PCTYPE_NONF_IPV4_OTHER; 359 i <= I40E_FILTER_PCTYPE_FRAG_IPV4; i++) { 360 raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL); 361 if (!raw_packet) 362 return -ENOMEM; 363 memcpy(raw_packet, packet, I40E_IP_DUMMY_PACKET_LEN); 364 ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET); 365 366 ip->saddr = fd_data->src_ip[0]; 367 ip->daddr = fd_data->dst_ip[0]; 368 ip->protocol = 0; 369 370 fd_data->pctype = i; 371 ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add); 372 373 if (ret) { 374 dev_info(&pf->pdev->dev, 375 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n", 376 fd_data->pctype, fd_data->fd_id, ret); 377 err = true; 378 } else { 379 if (add) 380 dev_info(&pf->pdev->dev, 381 "Filter OK for PCTYPE %d loc = %d\n", 382 fd_data->pctype, fd_data->fd_id); 383 else 384 dev_info(&pf->pdev->dev, 385 "Filter deleted for PCTYPE %d loc = %d\n", 386 fd_data->pctype, fd_data->fd_id); 387 } 388 } 389 390 return err ? -EOPNOTSUPP : 0; 391} 392 393/** 394 * i40e_add_del_fdir - Build raw packets to add/del fdir filter 395 * @vsi: pointer to the targeted VSI 396 * @cmd: command to get or set RX flow classification rules 397 * @add: true adds a filter, false removes it 398 * 399 **/ 400int i40e_add_del_fdir(struct i40e_vsi *vsi, 401 struct i40e_fdir_filter *input, bool add) 402{ 403 struct i40e_pf *pf = vsi->back; 404 int ret; 405 406 switch (input->flow_type & ~FLOW_EXT) { 407 case TCP_V4_FLOW: 408 ret = i40e_add_del_fdir_tcpv4(vsi, input, add); 409 break; 410 case UDP_V4_FLOW: 411 ret = i40e_add_del_fdir_udpv4(vsi, input, add); 412 break; 413 case SCTP_V4_FLOW: 414 ret = i40e_add_del_fdir_sctpv4(vsi, input, add); 415 break; 416 case IPV4_FLOW: 417 ret = i40e_add_del_fdir_ipv4(vsi, input, add); 418 break; 419 case IP_USER_FLOW: 420 switch (input->ip4_proto) { 421 case IPPROTO_TCP: 422 ret = i40e_add_del_fdir_tcpv4(vsi, input, add); 423 break; 424 case IPPROTO_UDP: 425 ret = i40e_add_del_fdir_udpv4(vsi, input, add); 426 break; 427 case IPPROTO_SCTP: 428 ret = i40e_add_del_fdir_sctpv4(vsi, input, add); 429 break; 430 default: 431 ret = i40e_add_del_fdir_ipv4(vsi, input, add); 432 break; 433 } 434 break; 435 default: 436 dev_info(&pf->pdev->dev, "Could not specify spec type %d\n", 437 input->flow_type); 438 ret = -EINVAL; 439 } 440 441 /* The buffer allocated here is freed by the i40e_clean_tx_ring() */ 442 return ret; 443} 444 445/** 446 * i40e_fd_handle_status - check the Programming Status for FD 447 * @rx_ring: the Rx ring for this descriptor 448 * @rx_desc: the Rx descriptor for programming Status, not a packet descriptor. 449 * @prog_id: the id originally used for programming 450 * 451 * This is used to verify if the FD programming or invalidation 452 * requested by SW to the HW is successful or not and take actions accordingly. 453 **/ 454static void i40e_fd_handle_status(struct i40e_ring *rx_ring, 455 union i40e_rx_desc *rx_desc, u8 prog_id) 456{ 457 struct i40e_pf *pf = rx_ring->vsi->back; 458 struct pci_dev *pdev = pf->pdev; 459 u32 fcnt_prog, fcnt_avail; 460 u32 error; 461 u64 qw; 462 463 qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len); 464 error = (qw & I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK) >> 465 I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT; 466 467 if (error == (0x1 << I40E_RX_PROG_STATUS_DESC_FD_TBL_FULL_SHIFT)) { 468 if ((rx_desc->wb.qword0.hi_dword.fd_id != 0) || 469 (I40E_DEBUG_FD & pf->hw.debug_mask)) 470 dev_warn(&pdev->dev, "ntuple filter loc = %d, could not be added\n", 471 rx_desc->wb.qword0.hi_dword.fd_id); 472 473 pf->fd_add_err++; 474 /* store the current atr filter count */ 475 pf->fd_atr_cnt = i40e_get_current_atr_cnt(pf); 476 477 /* filter programming failed most likely due to table full */ 478 fcnt_prog = i40e_get_cur_guaranteed_fd_count(pf); 479 fcnt_avail = pf->fdir_pf_filter_count; 480 /* If ATR is running fcnt_prog can quickly change, 481 * if we are very close to full, it makes sense to disable 482 * FD ATR/SB and then re-enable it when there is room. 483 */ 484 if (fcnt_prog >= (fcnt_avail - I40E_FDIR_BUFFER_FULL_MARGIN)) { 485 if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) && 486 !(pf->auto_disable_flags & 487 I40E_FLAG_FD_SB_ENABLED)) { 488 dev_warn(&pdev->dev, "FD filter space full, new ntuple rules will not be added\n"); 489 pf->auto_disable_flags |= 490 I40E_FLAG_FD_SB_ENABLED; 491 } 492 } else { 493 dev_info(&pdev->dev, 494 "FD filter programming failed due to incorrect filter parameters\n"); 495 } 496 } else if (error == 497 (0x1 << I40E_RX_PROG_STATUS_DESC_NO_FD_ENTRY_SHIFT)) { 498 if (I40E_DEBUG_FD & pf->hw.debug_mask) 499 dev_info(&pdev->dev, "ntuple filter fd_id = %d, could not be removed\n", 500 rx_desc->wb.qword0.hi_dword.fd_id); 501 } 502} 503 504/** 505 * i40e_unmap_and_free_tx_resource - Release a Tx buffer 506 * @ring: the ring that owns the buffer 507 * @tx_buffer: the buffer to free 508 **/ 509static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring, 510 struct i40e_tx_buffer *tx_buffer) 511{ 512 if (tx_buffer->skb) { 513 if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB) 514 kfree(tx_buffer->raw_buf); 515 else 516 dev_kfree_skb_any(tx_buffer->skb); 517 518 if (dma_unmap_len(tx_buffer, len)) 519 dma_unmap_single(ring->dev, 520 dma_unmap_addr(tx_buffer, dma), 521 dma_unmap_len(tx_buffer, len), 522 DMA_TO_DEVICE); 523 } else if (dma_unmap_len(tx_buffer, len)) { 524 dma_unmap_page(ring->dev, 525 dma_unmap_addr(tx_buffer, dma), 526 dma_unmap_len(tx_buffer, len), 527 DMA_TO_DEVICE); 528 } 529 tx_buffer->next_to_watch = NULL; 530 tx_buffer->skb = NULL; 531 dma_unmap_len_set(tx_buffer, len, 0); 532 /* tx_buffer must be completely set up in the transmit path */ 533} 534 535/** 536 * i40e_clean_tx_ring - Free any empty Tx buffers 537 * @tx_ring: ring to be cleaned 538 **/ 539void i40e_clean_tx_ring(struct i40e_ring *tx_ring) 540{ 541 unsigned long bi_size; 542 u16 i; 543 544 /* ring already cleared, nothing to do */ 545 if (!tx_ring->tx_bi) 546 return; 547 548 /* Free all the Tx ring sk_buffs */ 549 for (i = 0; i < tx_ring->count; i++) 550 i40e_unmap_and_free_tx_resource(tx_ring, &tx_ring->tx_bi[i]); 551 552 bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count; 553 memset(tx_ring->tx_bi, 0, bi_size); 554 555 /* Zero out the descriptor ring */ 556 memset(tx_ring->desc, 0, tx_ring->size); 557 558 tx_ring->next_to_use = 0; 559 tx_ring->next_to_clean = 0; 560 561 if (!tx_ring->netdev) 562 return; 563 564 /* cleanup Tx queue statistics */ 565 netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev, 566 tx_ring->queue_index)); 567} 568 569/** 570 * i40e_free_tx_resources - Free Tx resources per queue 571 * @tx_ring: Tx descriptor ring for a specific queue 572 * 573 * Free all transmit software resources 574 **/ 575void i40e_free_tx_resources(struct i40e_ring *tx_ring) 576{ 577 i40e_clean_tx_ring(tx_ring); 578 kfree(tx_ring->tx_bi); 579 tx_ring->tx_bi = NULL; 580 581 if (tx_ring->desc) { 582 dma_free_coherent(tx_ring->dev, tx_ring->size, 583 tx_ring->desc, tx_ring->dma); 584 tx_ring->desc = NULL; 585 } 586} 587 588/** 589 * i40e_get_tx_pending - how many tx descriptors not processed 590 * @tx_ring: the ring of descriptors 591 * 592 * Since there is no access to the ring head register 593 * in XL710, we need to use our local copies 594 **/ 595static u32 i40e_get_tx_pending(struct i40e_ring *ring) 596{ 597 u32 ntu = ((ring->next_to_clean <= ring->next_to_use) 598 ? ring->next_to_use 599 : ring->next_to_use + ring->count); 600 return ntu - ring->next_to_clean; 601} 602 603/** 604 * i40e_check_tx_hang - Is there a hang in the Tx queue 605 * @tx_ring: the ring of descriptors 606 **/ 607static bool i40e_check_tx_hang(struct i40e_ring *tx_ring) 608{ 609 u32 tx_pending = i40e_get_tx_pending(tx_ring); 610 struct i40e_pf *pf = tx_ring->vsi->back; 611 bool ret = false; 612 613 clear_check_for_tx_hang(tx_ring); 614 615 /* Check for a hung queue, but be thorough. This verifies 616 * that a transmit has been completed since the previous 617 * check AND there is at least one packet pending. The 618 * ARMED bit is set to indicate a potential hang. The 619 * bit is cleared if a pause frame is received to remove 620 * false hang detection due to PFC or 802.3x frames. By 621 * requiring this to fail twice we avoid races with 622 * PFC clearing the ARMED bit and conditions where we 623 * run the check_tx_hang logic with a transmit completion 624 * pending but without time to complete it yet. 625 */ 626 if ((tx_ring->tx_stats.tx_done_old == tx_ring->stats.packets) && 627 (tx_pending >= I40E_MIN_DESC_PENDING)) { 628 /* make sure it is true for two checks in a row */ 629 ret = test_and_set_bit(__I40E_HANG_CHECK_ARMED, 630 &tx_ring->state); 631 } else if ((tx_ring->tx_stats.tx_done_old == tx_ring->stats.packets) && 632 (tx_pending < I40E_MIN_DESC_PENDING) && 633 (tx_pending > 0)) { 634 if (I40E_DEBUG_FLOW & pf->hw.debug_mask) 635 dev_info(tx_ring->dev, "HW needs some more descs to do a cacheline flush. tx_pending %d, queue %d", 636 tx_pending, tx_ring->queue_index); 637 pf->tx_sluggish_count++; 638 } else { 639 /* update completed stats and disarm the hang check */ 640 tx_ring->tx_stats.tx_done_old = tx_ring->stats.packets; 641 clear_bit(__I40E_HANG_CHECK_ARMED, &tx_ring->state); 642 } 643 644 return ret; 645} 646 647/** 648 * i40e_get_head - Retrieve head from head writeback 649 * @tx_ring: tx ring to fetch head of 650 * 651 * Returns value of Tx ring head based on value stored 652 * in head write-back location 653 **/ 654static inline u32 i40e_get_head(struct i40e_ring *tx_ring) 655{ 656 void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count; 657 658 return le32_to_cpu(*(volatile __le32 *)head); 659} 660 661/** 662 * i40e_clean_tx_irq - Reclaim resources after transmit completes 663 * @tx_ring: tx ring to clean 664 * @budget: how many cleans we're allowed 665 * 666 * Returns true if there's any budget left (e.g. the clean is finished) 667 **/ 668static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget) 669{ 670 u16 i = tx_ring->next_to_clean; 671 struct i40e_tx_buffer *tx_buf; 672 struct i40e_tx_desc *tx_head; 673 struct i40e_tx_desc *tx_desc; 674 unsigned int total_packets = 0; 675 unsigned int total_bytes = 0; 676 677 tx_buf = &tx_ring->tx_bi[i]; 678 tx_desc = I40E_TX_DESC(tx_ring, i); 679 i -= tx_ring->count; 680 681 tx_head = I40E_TX_DESC(tx_ring, i40e_get_head(tx_ring)); 682 683 do { 684 struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch; 685 686 /* if next_to_watch is not set then there is no work pending */ 687 if (!eop_desc) 688 break; 689 690 /* prevent any other reads prior to eop_desc */ 691 read_barrier_depends(); 692 693 /* we have caught up to head, no work left to do */ 694 if (tx_head == tx_desc) 695 break; 696 697 /* clear next_to_watch to prevent false hangs */ 698 tx_buf->next_to_watch = NULL; 699 700 /* update the statistics for this packet */ 701 total_bytes += tx_buf->bytecount; 702 total_packets += tx_buf->gso_segs; 703 704 /* free the skb */ 705 dev_consume_skb_any(tx_buf->skb); 706 707 /* unmap skb header data */ 708 dma_unmap_single(tx_ring->dev, 709 dma_unmap_addr(tx_buf, dma), 710 dma_unmap_len(tx_buf, len), 711 DMA_TO_DEVICE); 712 713 /* clear tx_buffer data */ 714 tx_buf->skb = NULL; 715 dma_unmap_len_set(tx_buf, len, 0); 716 717 /* unmap remaining buffers */ 718 while (tx_desc != eop_desc) { 719 720 tx_buf++; 721 tx_desc++; 722 i++; 723 if (unlikely(!i)) { 724 i -= tx_ring->count; 725 tx_buf = tx_ring->tx_bi; 726 tx_desc = I40E_TX_DESC(tx_ring, 0); 727 } 728 729 /* unmap any remaining paged data */ 730 if (dma_unmap_len(tx_buf, len)) { 731 dma_unmap_page(tx_ring->dev, 732 dma_unmap_addr(tx_buf, dma), 733 dma_unmap_len(tx_buf, len), 734 DMA_TO_DEVICE); 735 dma_unmap_len_set(tx_buf, len, 0); 736 } 737 } 738 739 /* move us one more past the eop_desc for start of next pkt */ 740 tx_buf++; 741 tx_desc++; 742 i++; 743 if (unlikely(!i)) { 744 i -= tx_ring->count; 745 tx_buf = tx_ring->tx_bi; 746 tx_desc = I40E_TX_DESC(tx_ring, 0); 747 } 748 749 /* update budget accounting */ 750 budget--; 751 } while (likely(budget)); 752 753 i += tx_ring->count; 754 tx_ring->next_to_clean = i; 755 u64_stats_update_begin(&tx_ring->syncp); 756 tx_ring->stats.bytes += total_bytes; 757 tx_ring->stats.packets += total_packets; 758 u64_stats_update_end(&tx_ring->syncp); 759 tx_ring->q_vector->tx.total_bytes += total_bytes; 760 tx_ring->q_vector->tx.total_packets += total_packets; 761 762 if (check_for_tx_hang(tx_ring) && i40e_check_tx_hang(tx_ring)) { 763 /* schedule immediate reset if we believe we hung */ 764 dev_info(tx_ring->dev, "Detected Tx Unit Hang\n" 765 " VSI <%d>\n" 766 " Tx Queue <%d>\n" 767 " next_to_use <%x>\n" 768 " next_to_clean <%x>\n", 769 tx_ring->vsi->seid, 770 tx_ring->queue_index, 771 tx_ring->next_to_use, i); 772 dev_info(tx_ring->dev, "tx_bi[next_to_clean]\n" 773 " time_stamp <%lx>\n" 774 " jiffies <%lx>\n", 775 tx_ring->tx_bi[i].time_stamp, jiffies); 776 777 netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index); 778 779 dev_info(tx_ring->dev, 780 "tx hang detected on queue %d, resetting adapter\n", 781 tx_ring->queue_index); 782 783 tx_ring->netdev->netdev_ops->ndo_tx_timeout(tx_ring->netdev); 784 785 /* the adapter is about to reset, no point in enabling stuff */ 786 return true; 787 } 788 789 netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev, 790 tx_ring->queue_index), 791 total_packets, total_bytes); 792 793#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2) 794 if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) && 795 (I40E_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) { 796 /* Make sure that anybody stopping the queue after this 797 * sees the new next_to_clean. 798 */ 799 smp_mb(); 800 if (__netif_subqueue_stopped(tx_ring->netdev, 801 tx_ring->queue_index) && 802 !test_bit(__I40E_DOWN, &tx_ring->vsi->state)) { 803 netif_wake_subqueue(tx_ring->netdev, 804 tx_ring->queue_index); 805 ++tx_ring->tx_stats.restart_queue; 806 } 807 } 808 809 return budget > 0; 810} 811 812/** 813 * i40e_set_new_dynamic_itr - Find new ITR level 814 * @rc: structure containing ring performance data 815 * 816 * Stores a new ITR value based on packets and byte counts during 817 * the last interrupt. The advantage of per interrupt computation 818 * is faster updates and more accurate ITR for the current traffic 819 * pattern. Constants in this function were computed based on 820 * theoretical maximum wire speed and thresholds were set based on 821 * testing data as well as attempting to minimize response time 822 * while increasing bulk throughput. 823 **/ 824static void i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) 825{ 826 enum i40e_latency_range new_latency_range = rc->latency_range; 827 u32 new_itr = rc->itr; 828 int bytes_per_int; 829 830 if (rc->total_packets == 0 || !rc->itr) 831 return; 832 833 /* simple throttlerate management 834 * 0-10MB/s lowest (100000 ints/s) 835 * 10-20MB/s low (20000 ints/s) 836 * 20-1249MB/s bulk (8000 ints/s) 837 */ 838 bytes_per_int = rc->total_bytes / rc->itr; 839 switch (rc->itr) { 840 case I40E_LOWEST_LATENCY: 841 if (bytes_per_int > 10) 842 new_latency_range = I40E_LOW_LATENCY; 843 break; 844 case I40E_LOW_LATENCY: 845 if (bytes_per_int > 20) 846 new_latency_range = I40E_BULK_LATENCY; 847 else if (bytes_per_int <= 10) 848 new_latency_range = I40E_LOWEST_LATENCY; 849 break; 850 case I40E_BULK_LATENCY: 851 if (bytes_per_int <= 20) 852 rc->latency_range = I40E_LOW_LATENCY; 853 break; 854 } 855 856 switch (new_latency_range) { 857 case I40E_LOWEST_LATENCY: 858 new_itr = I40E_ITR_100K; 859 break; 860 case I40E_LOW_LATENCY: 861 new_itr = I40E_ITR_20K; 862 break; 863 case I40E_BULK_LATENCY: 864 new_itr = I40E_ITR_8K; 865 break; 866 default: 867 break; 868 } 869 870 if (new_itr != rc->itr) { 871 /* do an exponential smoothing */ 872 new_itr = (10 * new_itr * rc->itr) / 873 ((9 * new_itr) + rc->itr); 874 rc->itr = new_itr & I40E_MAX_ITR; 875 } 876 877 rc->total_bytes = 0; 878 rc->total_packets = 0; 879} 880 881/** 882 * i40e_update_dynamic_itr - Adjust ITR based on bytes per int 883 * @q_vector: the vector to adjust 884 **/ 885static void i40e_update_dynamic_itr(struct i40e_q_vector *q_vector) 886{ 887 u16 vector = q_vector->vsi->base_vector + q_vector->v_idx; 888 struct i40e_hw *hw = &q_vector->vsi->back->hw; 889 u32 reg_addr; 890 u16 old_itr; 891 892 reg_addr = I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1); 893 old_itr = q_vector->rx.itr; 894 i40e_set_new_dynamic_itr(&q_vector->rx); 895 if (old_itr != q_vector->rx.itr) 896 wr32(hw, reg_addr, q_vector->rx.itr); 897 898 reg_addr = I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1); 899 old_itr = q_vector->tx.itr; 900 i40e_set_new_dynamic_itr(&q_vector->tx); 901 if (old_itr != q_vector->tx.itr) 902 wr32(hw, reg_addr, q_vector->tx.itr); 903} 904 905/** 906 * i40e_clean_programming_status - clean the programming status descriptor 907 * @rx_ring: the rx ring that has this descriptor 908 * @rx_desc: the rx descriptor written back by HW 909 * 910 * Flow director should handle FD_FILTER_STATUS to check its filter programming 911 * status being successful or not and take actions accordingly. FCoE should 912 * handle its context/filter programming/invalidation status and take actions. 913 * 914 **/ 915static void i40e_clean_programming_status(struct i40e_ring *rx_ring, 916 union i40e_rx_desc *rx_desc) 917{ 918 u64 qw; 919 u8 id; 920 921 qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len); 922 id = (qw & I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK) >> 923 I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT; 924 925 if (id == I40E_RX_PROG_STATUS_DESC_FD_FILTER_STATUS) 926 i40e_fd_handle_status(rx_ring, rx_desc, id); 927#ifdef I40E_FCOE 928 else if ((id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_PROG_STATUS) || 929 (id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_INVL_STATUS)) 930 i40e_fcoe_handle_status(rx_ring, rx_desc, id); 931#endif 932} 933 934/** 935 * i40e_setup_tx_descriptors - Allocate the Tx descriptors 936 * @tx_ring: the tx ring to set up 937 * 938 * Return 0 on success, negative on error 939 **/ 940int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring) 941{ 942 struct device *dev = tx_ring->dev; 943 int bi_size; 944 945 if (!dev) 946 return -ENOMEM; 947 948 bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count; 949 tx_ring->tx_bi = kzalloc(bi_size, GFP_KERNEL); 950 if (!tx_ring->tx_bi) 951 goto err; 952 953 /* round up to nearest 4K */ 954 tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc); 955 /* add u32 for head writeback, align after this takes care of 956 * guaranteeing this is at least one cache line in size 957 */ 958 tx_ring->size += sizeof(u32); 959 tx_ring->size = ALIGN(tx_ring->size, 4096); 960 tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size, 961 &tx_ring->dma, GFP_KERNEL); 962 if (!tx_ring->desc) { 963 dev_info(dev, "Unable to allocate memory for the Tx descriptor ring, size=%d\n", 964 tx_ring->size); 965 goto err; 966 } 967 968 tx_ring->next_to_use = 0; 969 tx_ring->next_to_clean = 0; 970 return 0; 971 972err: 973 kfree(tx_ring->tx_bi); 974 tx_ring->tx_bi = NULL; 975 return -ENOMEM; 976} 977 978/** 979 * i40e_clean_rx_ring - Free Rx buffers 980 * @rx_ring: ring to be cleaned 981 **/ 982void i40e_clean_rx_ring(struct i40e_ring *rx_ring) 983{ 984 struct device *dev = rx_ring->dev; 985 struct i40e_rx_buffer *rx_bi; 986 unsigned long bi_size; 987 u16 i; 988 989 /* ring already cleared, nothing to do */ 990 if (!rx_ring->rx_bi) 991 return; 992 993 /* Free all the Rx ring sk_buffs */ 994 for (i = 0; i < rx_ring->count; i++) { 995 rx_bi = &rx_ring->rx_bi[i]; 996 if (rx_bi->dma) { 997 dma_unmap_single(dev, 998 rx_bi->dma, 999 rx_ring->rx_buf_len, 1000 DMA_FROM_DEVICE); 1001 rx_bi->dma = 0; 1002 } 1003 if (rx_bi->skb) { 1004 dev_kfree_skb(rx_bi->skb); 1005 rx_bi->skb = NULL; 1006 } 1007 if (rx_bi->page) { 1008 if (rx_bi->page_dma) { 1009 dma_unmap_page(dev, 1010 rx_bi->page_dma, 1011 PAGE_SIZE / 2, 1012 DMA_FROM_DEVICE); 1013 rx_bi->page_dma = 0; 1014 } 1015 __free_page(rx_bi->page); 1016 rx_bi->page = NULL; 1017 rx_bi->page_offset = 0; 1018 } 1019 } 1020 1021 bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count; 1022 memset(rx_ring->rx_bi, 0, bi_size); 1023 1024 /* Zero out the descriptor ring */ 1025 memset(rx_ring->desc, 0, rx_ring->size); 1026 1027 rx_ring->next_to_clean = 0; 1028 rx_ring->next_to_use = 0; 1029} 1030 1031/** 1032 * i40e_free_rx_resources - Free Rx resources 1033 * @rx_ring: ring to clean the resources from 1034 * 1035 * Free all receive software resources 1036 **/ 1037void i40e_free_rx_resources(struct i40e_ring *rx_ring) 1038{ 1039 i40e_clean_rx_ring(rx_ring); 1040 kfree(rx_ring->rx_bi); 1041 rx_ring->rx_bi = NULL; 1042 1043 if (rx_ring->desc) { 1044 dma_free_coherent(rx_ring->dev, rx_ring->size, 1045 rx_ring->desc, rx_ring->dma); 1046 rx_ring->desc = NULL; 1047 } 1048} 1049 1050/** 1051 * i40e_setup_rx_descriptors - Allocate Rx descriptors 1052 * @rx_ring: Rx descriptor ring (for a specific queue) to setup 1053 * 1054 * Returns 0 on success, negative on failure 1055 **/ 1056int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring) 1057{ 1058 struct device *dev = rx_ring->dev; 1059 int bi_size; 1060 1061 bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count; 1062 rx_ring->rx_bi = kzalloc(bi_size, GFP_KERNEL); 1063 if (!rx_ring->rx_bi) 1064 goto err; 1065 1066 /* Round up to nearest 4K */ 1067 rx_ring->size = ring_is_16byte_desc_enabled(rx_ring) 1068 ? rx_ring->count * sizeof(union i40e_16byte_rx_desc) 1069 : rx_ring->count * sizeof(union i40e_32byte_rx_desc); 1070 rx_ring->size = ALIGN(rx_ring->size, 4096); 1071 rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size, 1072 &rx_ring->dma, GFP_KERNEL); 1073 1074 if (!rx_ring->desc) { 1075 dev_info(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n", 1076 rx_ring->size); 1077 goto err; 1078 } 1079 1080 rx_ring->next_to_clean = 0; 1081 rx_ring->next_to_use = 0; 1082 1083 return 0; 1084err: 1085 kfree(rx_ring->rx_bi); 1086 rx_ring->rx_bi = NULL; 1087 return -ENOMEM; 1088} 1089 1090/** 1091 * i40e_release_rx_desc - Store the new tail and head values 1092 * @rx_ring: ring to bump 1093 * @val: new head index 1094 **/ 1095static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val) 1096{ 1097 rx_ring->next_to_use = val; 1098 /* Force memory writes to complete before letting h/w 1099 * know there are new descriptors to fetch. (Only 1100 * applicable for weak-ordered memory model archs, 1101 * such as IA-64). 1102 */ 1103 wmb(); 1104 writel(val, rx_ring->tail); 1105} 1106 1107/** 1108 * i40e_alloc_rx_buffers - Replace used receive buffers; packet split 1109 * @rx_ring: ring to place buffers on 1110 * @cleaned_count: number of buffers to replace 1111 **/ 1112void i40e_alloc_rx_buffers(struct i40e_ring *rx_ring, u16 cleaned_count) 1113{ 1114 u16 i = rx_ring->next_to_use; 1115 union i40e_rx_desc *rx_desc; 1116 struct i40e_rx_buffer *bi; 1117 struct sk_buff *skb; 1118 1119 /* do nothing if no valid netdev defined */ 1120 if (!rx_ring->netdev || !cleaned_count) 1121 return; 1122 1123 while (cleaned_count--) { 1124 rx_desc = I40E_RX_DESC(rx_ring, i); 1125 bi = &rx_ring->rx_bi[i]; 1126 skb = bi->skb; 1127 1128 if (!skb) { 1129 skb = netdev_alloc_skb_ip_align(rx_ring->netdev, 1130 rx_ring->rx_buf_len); 1131 if (!skb) { 1132 rx_ring->rx_stats.alloc_buff_failed++; 1133 goto no_buffers; 1134 } 1135 /* initialize queue mapping */ 1136 skb_record_rx_queue(skb, rx_ring->queue_index); 1137 bi->skb = skb; 1138 } 1139 1140 if (!bi->dma) { 1141 bi->dma = dma_map_single(rx_ring->dev, 1142 skb->data, 1143 rx_ring->rx_buf_len, 1144 DMA_FROM_DEVICE); 1145 if (dma_mapping_error(rx_ring->dev, bi->dma)) { 1146 rx_ring->rx_stats.alloc_buff_failed++; 1147 bi->dma = 0; 1148 goto no_buffers; 1149 } 1150 } 1151 1152 if (ring_is_ps_enabled(rx_ring)) { 1153 if (!bi->page) { 1154 bi->page = alloc_page(GFP_ATOMIC); 1155 if (!bi->page) { 1156 rx_ring->rx_stats.alloc_page_failed++; 1157 goto no_buffers; 1158 } 1159 } 1160 1161 if (!bi->page_dma) { 1162 /* use a half page if we're re-using */ 1163 bi->page_offset ^= PAGE_SIZE / 2; 1164 bi->page_dma = dma_map_page(rx_ring->dev, 1165 bi->page, 1166 bi->page_offset, 1167 PAGE_SIZE / 2, 1168 DMA_FROM_DEVICE); 1169 if (dma_mapping_error(rx_ring->dev, 1170 bi->page_dma)) { 1171 rx_ring->rx_stats.alloc_page_failed++; 1172 bi->page_dma = 0; 1173 goto no_buffers; 1174 } 1175 } 1176 1177 /* Refresh the desc even if buffer_addrs didn't change 1178 * because each write-back erases this info. 1179 */ 1180 rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma); 1181 rx_desc->read.hdr_addr = cpu_to_le64(bi->dma); 1182 } else { 1183 rx_desc->read.pkt_addr = cpu_to_le64(bi->dma); 1184 rx_desc->read.hdr_addr = 0; 1185 } 1186 i++; 1187 if (i == rx_ring->count) 1188 i = 0; 1189 } 1190 1191no_buffers: 1192 if (rx_ring->next_to_use != i) 1193 i40e_release_rx_desc(rx_ring, i); 1194} 1195 1196/** 1197 * i40e_receive_skb - Send a completed packet up the stack 1198 * @rx_ring: rx ring in play 1199 * @skb: packet to send up 1200 * @vlan_tag: vlan tag for packet 1201 **/ 1202static void i40e_receive_skb(struct i40e_ring *rx_ring, 1203 struct sk_buff *skb, u16 vlan_tag) 1204{ 1205 struct i40e_q_vector *q_vector = rx_ring->q_vector; 1206 struct i40e_vsi *vsi = rx_ring->vsi; 1207 u64 flags = vsi->back->flags; 1208 1209 if (vlan_tag & VLAN_VID_MASK) 1210 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag); 1211 1212 if (flags & I40E_FLAG_IN_NETPOLL) 1213 netif_rx(skb); 1214 else 1215 napi_gro_receive(&q_vector->napi, skb); 1216} 1217 1218/** 1219 * i40e_rx_checksum - Indicate in skb if hw indicated a good cksum 1220 * @vsi: the VSI we care about 1221 * @skb: skb currently being received and modified 1222 * @rx_status: status value of last descriptor in packet 1223 * @rx_error: error value of last descriptor in packet 1224 * @rx_ptype: ptype value of last descriptor in packet 1225 **/ 1226static inline void i40e_rx_checksum(struct i40e_vsi *vsi, 1227 struct sk_buff *skb, 1228 u32 rx_status, 1229 u32 rx_error, 1230 u16 rx_ptype) 1231{ 1232 struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(rx_ptype); 1233 bool ipv4 = false, ipv6 = false; 1234 bool ipv4_tunnel, ipv6_tunnel; 1235 __wsum rx_udp_csum; 1236 struct iphdr *iph; 1237 __sum16 csum; 1238 1239 ipv4_tunnel = (rx_ptype > I40E_RX_PTYPE_GRENAT4_MAC_PAY3) && 1240 (rx_ptype < I40E_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4); 1241 ipv6_tunnel = (rx_ptype > I40E_RX_PTYPE_GRENAT6_MAC_PAY3) && 1242 (rx_ptype < I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4); 1243 1244 skb->ip_summed = CHECKSUM_NONE; 1245 1246 /* Rx csum enabled and ip headers found? */ 1247 if (!(vsi->netdev->features & NETIF_F_RXCSUM)) 1248 return; 1249 1250 /* did the hardware decode the packet and checksum? */ 1251 if (!(rx_status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT))) 1252 return; 1253 1254 /* both known and outer_ip must be set for the below code to work */ 1255 if (!(decoded.known && decoded.outer_ip)) 1256 return; 1257 1258 if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && 1259 decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) 1260 ipv4 = true; 1261 else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && 1262 decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) 1263 ipv6 = true; 1264 1265 if (ipv4 && 1266 (rx_error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) | 1267 (1 << I40E_RX_DESC_ERROR_EIPE_SHIFT)))) 1268 goto checksum_fail; 1269 1270 /* likely incorrect csum if alternate IP extension headers found */ 1271 if (ipv6 && 1272 rx_status & (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) 1273 /* don't increment checksum err here, non-fatal err */ 1274 return; 1275 1276 /* there was some L4 error, count error and punt packet to the stack */ 1277 if (rx_error & (1 << I40E_RX_DESC_ERROR_L4E_SHIFT)) 1278 goto checksum_fail; 1279 1280 /* handle packets that were not able to be checksummed due 1281 * to arrival speed, in this case the stack can compute 1282 * the csum. 1283 */ 1284 if (rx_error & (1 << I40E_RX_DESC_ERROR_PPRS_SHIFT)) 1285 return; 1286 1287 /* If VXLAN traffic has an outer UDPv4 checksum we need to check 1288 * it in the driver, hardware does not do it for us. 1289 * Since L3L4P bit was set we assume a valid IHL value (>=5) 1290 * so the total length of IPv4 header is IHL*4 bytes 1291 * The UDP_0 bit *may* bet set if the *inner* header is UDP 1292 */ 1293 if (ipv4_tunnel && 1294 (decoded.inner_prot != I40E_RX_PTYPE_INNER_PROT_UDP) && 1295 !(rx_status & (1 << I40E_RX_DESC_STATUS_UDP_0_SHIFT))) { 1296 skb->transport_header = skb->mac_header + 1297 sizeof(struct ethhdr) + 1298 (ip_hdr(skb)->ihl * 4); 1299 1300 /* Add 4 bytes for VLAN tagged packets */ 1301 skb->transport_header += (skb->protocol == htons(ETH_P_8021Q) || 1302 skb->protocol == htons(ETH_P_8021AD)) 1303 ? VLAN_HLEN : 0; 1304 1305 rx_udp_csum = udp_csum(skb); 1306 iph = ip_hdr(skb); 1307 csum = csum_tcpudp_magic( 1308 iph->saddr, iph->daddr, 1309 (skb->len - skb_transport_offset(skb)), 1310 IPPROTO_UDP, rx_udp_csum); 1311 1312 if (udp_hdr(skb)->check != csum) 1313 goto checksum_fail; 1314 } 1315 1316 skb->ip_summed = CHECKSUM_UNNECESSARY; 1317 skb->csum_level = ipv4_tunnel || ipv6_tunnel; 1318 1319 return; 1320 1321checksum_fail: 1322 vsi->back->hw_csum_rx_error++; 1323} 1324 1325/** 1326 * i40e_rx_hash - returns the hash value from the Rx descriptor 1327 * @ring: descriptor ring 1328 * @rx_desc: specific descriptor 1329 **/ 1330static inline u32 i40e_rx_hash(struct i40e_ring *ring, 1331 union i40e_rx_desc *rx_desc) 1332{ 1333 const __le64 rss_mask = 1334 cpu_to_le64((u64)I40E_RX_DESC_FLTSTAT_RSS_HASH << 1335 I40E_RX_DESC_STATUS_FLTSTAT_SHIFT); 1336 1337 if ((ring->netdev->features & NETIF_F_RXHASH) && 1338 (rx_desc->wb.qword1.status_error_len & rss_mask) == rss_mask) 1339 return le32_to_cpu(rx_desc->wb.qword0.hi_dword.rss); 1340 else 1341 return 0; 1342} 1343 1344/** 1345 * i40e_ptype_to_hash - get a hash type 1346 * @ptype: the ptype value from the descriptor 1347 * 1348 * Returns a hash type to be used by skb_set_hash 1349 **/ 1350static inline enum pkt_hash_types i40e_ptype_to_hash(u8 ptype) 1351{ 1352 struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(ptype); 1353 1354 if (!decoded.known) 1355 return PKT_HASH_TYPE_NONE; 1356 1357 if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && 1358 decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY4) 1359 return PKT_HASH_TYPE_L4; 1360 else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && 1361 decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY3) 1362 return PKT_HASH_TYPE_L3; 1363 else 1364 return PKT_HASH_TYPE_L2; 1365} 1366 1367/** 1368 * i40e_clean_rx_irq - Reclaim resources after receive completes 1369 * @rx_ring: rx ring to clean 1370 * @budget: how many cleans we're allowed 1371 * 1372 * Returns true if there's any budget left (e.g. the clean is finished) 1373 **/ 1374static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) 1375{ 1376 unsigned int total_rx_bytes = 0, total_rx_packets = 0; 1377 u16 rx_packet_len, rx_header_len, rx_sph, rx_hbo; 1378 u16 cleaned_count = I40E_DESC_UNUSED(rx_ring); 1379 const int current_node = numa_node_id(); 1380 struct i40e_vsi *vsi = rx_ring->vsi; 1381 u16 i = rx_ring->next_to_clean; 1382 union i40e_rx_desc *rx_desc; 1383 u32 rx_error, rx_status; 1384 u8 rx_ptype; 1385 u64 qword; 1386 1387 if (budget <= 0) 1388 return 0; 1389 1390 rx_desc = I40E_RX_DESC(rx_ring, i); 1391 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); 1392 rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >> 1393 I40E_RXD_QW1_STATUS_SHIFT; 1394 1395 while (rx_status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) { 1396 union i40e_rx_desc *next_rxd; 1397 struct i40e_rx_buffer *rx_bi; 1398 struct sk_buff *skb; 1399 u16 vlan_tag; 1400 if (i40e_rx_is_programming_status(qword)) { 1401 i40e_clean_programming_status(rx_ring, rx_desc); 1402 I40E_RX_NEXT_DESC_PREFETCH(rx_ring, i, next_rxd); 1403 goto next_desc; 1404 } 1405 rx_bi = &rx_ring->rx_bi[i]; 1406 skb = rx_bi->skb; 1407 prefetch(skb->data); 1408 1409 rx_packet_len = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >> 1410 I40E_RXD_QW1_LENGTH_PBUF_SHIFT; 1411 rx_header_len = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK) >> 1412 I40E_RXD_QW1_LENGTH_HBUF_SHIFT; 1413 rx_sph = (qword & I40E_RXD_QW1_LENGTH_SPH_MASK) >> 1414 I40E_RXD_QW1_LENGTH_SPH_SHIFT; 1415 1416 rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >> 1417 I40E_RXD_QW1_ERROR_SHIFT; 1418 rx_hbo = rx_error & (1 << I40E_RX_DESC_ERROR_HBO_SHIFT); 1419 rx_error &= ~(1 << I40E_RX_DESC_ERROR_HBO_SHIFT); 1420 1421 rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >> 1422 I40E_RXD_QW1_PTYPE_SHIFT; 1423 rx_bi->skb = NULL; 1424 1425 /* This memory barrier is needed to keep us from reading 1426 * any other fields out of the rx_desc until we know the 1427 * STATUS_DD bit is set 1428 */ 1429 rmb(); 1430 1431 /* Get the header and possibly the whole packet 1432 * If this is an skb from previous receive dma will be 0 1433 */ 1434 if (rx_bi->dma) { 1435 u16 len; 1436 1437 if (rx_hbo) 1438 len = I40E_RX_HDR_SIZE; 1439 else if (rx_sph) 1440 len = rx_header_len; 1441 else if (rx_packet_len) 1442 len = rx_packet_len; /* 1buf/no split found */ 1443 else 1444 len = rx_header_len; /* split always mode */ 1445 1446 skb_put(skb, len); 1447 dma_unmap_single(rx_ring->dev, 1448 rx_bi->dma, 1449 rx_ring->rx_buf_len, 1450 DMA_FROM_DEVICE); 1451 rx_bi->dma = 0; 1452 } 1453 1454 /* Get the rest of the data if this was a header split */ 1455 if (ring_is_ps_enabled(rx_ring) && rx_packet_len) { 1456 1457 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, 1458 rx_bi->page, 1459 rx_bi->page_offset, 1460 rx_packet_len); 1461 1462 skb->len += rx_packet_len; 1463 skb->data_len += rx_packet_len; 1464 skb->truesize += rx_packet_len; 1465 1466 if ((page_count(rx_bi->page) == 1) && 1467 (page_to_nid(rx_bi->page) == current_node)) 1468 get_page(rx_bi->page); 1469 else 1470 rx_bi->page = NULL; 1471 1472 dma_unmap_page(rx_ring->dev, 1473 rx_bi->page_dma, 1474 PAGE_SIZE / 2, 1475 DMA_FROM_DEVICE); 1476 rx_bi->page_dma = 0; 1477 } 1478 I40E_RX_NEXT_DESC_PREFETCH(rx_ring, i, next_rxd); 1479 1480 if (unlikely( 1481 !(rx_status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT)))) { 1482 struct i40e_rx_buffer *next_buffer; 1483 1484 next_buffer = &rx_ring->rx_bi[i]; 1485 1486 if (ring_is_ps_enabled(rx_ring)) { 1487 rx_bi->skb = next_buffer->skb; 1488 rx_bi->dma = next_buffer->dma; 1489 next_buffer->skb = skb; 1490 next_buffer->dma = 0; 1491 } 1492 rx_ring->rx_stats.non_eop_descs++; 1493 goto next_desc; 1494 } 1495 1496 /* ERR_MASK will only have valid bits if EOP set */ 1497 if (unlikely(rx_error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) { 1498 dev_kfree_skb_any(skb); 1499 /* TODO: shouldn't we increment a counter indicating the 1500 * drop? 1501 */ 1502 goto next_desc; 1503 } 1504 1505 skb_set_hash(skb, i40e_rx_hash(rx_ring, rx_desc), 1506 i40e_ptype_to_hash(rx_ptype)); 1507 if (unlikely(rx_status & I40E_RXD_QW1_STATUS_TSYNVALID_MASK)) { 1508 i40e_ptp_rx_hwtstamp(vsi->back, skb, (rx_status & 1509 I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >> 1510 I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT); 1511 rx_ring->last_rx_timestamp = jiffies; 1512 } 1513 1514 /* probably a little skewed due to removing CRC */ 1515 total_rx_bytes += skb->len; 1516 total_rx_packets++; 1517 1518 skb->protocol = eth_type_trans(skb, rx_ring->netdev); 1519 1520 i40e_rx_checksum(vsi, skb, rx_status, rx_error, rx_ptype); 1521 1522 vlan_tag = rx_status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT) 1523 ? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1) 1524 : 0; 1525#ifdef I40E_FCOE 1526 if (!i40e_fcoe_handle_offload(rx_ring, rx_desc, skb)) { 1527 dev_kfree_skb_any(skb); 1528 goto next_desc; 1529 } 1530#endif 1531 i40e_receive_skb(rx_ring, skb, vlan_tag); 1532 1533 rx_ring->netdev->last_rx = jiffies; 1534 budget--; 1535next_desc: 1536 rx_desc->wb.qword1.status_error_len = 0; 1537 if (!budget) 1538 break; 1539 1540 cleaned_count++; 1541 /* return some buffers to hardware, one at a time is too slow */ 1542 if (cleaned_count >= I40E_RX_BUFFER_WRITE) { 1543 i40e_alloc_rx_buffers(rx_ring, cleaned_count); 1544 cleaned_count = 0; 1545 } 1546 1547 /* use prefetched values */ 1548 rx_desc = next_rxd; 1549 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); 1550 rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >> 1551 I40E_RXD_QW1_STATUS_SHIFT; 1552 } 1553 1554 rx_ring->next_to_clean = i; 1555 u64_stats_update_begin(&rx_ring->syncp); 1556 rx_ring->stats.packets += total_rx_packets; 1557 rx_ring->stats.bytes += total_rx_bytes; 1558 u64_stats_update_end(&rx_ring->syncp); 1559 rx_ring->q_vector->rx.total_packets += total_rx_packets; 1560 rx_ring->q_vector->rx.total_bytes += total_rx_bytes; 1561 1562 if (cleaned_count) 1563 i40e_alloc_rx_buffers(rx_ring, cleaned_count); 1564 1565 return budget > 0; 1566} 1567 1568/** 1569 * i40e_napi_poll - NAPI polling Rx/Tx cleanup routine 1570 * @napi: napi struct with our devices info in it 1571 * @budget: amount of work driver is allowed to do this pass, in packets 1572 * 1573 * This function will clean all queues associated with a q_vector. 1574 * 1575 * Returns the amount of work done 1576 **/ 1577int i40e_napi_poll(struct napi_struct *napi, int budget) 1578{ 1579 struct i40e_q_vector *q_vector = 1580 container_of(napi, struct i40e_q_vector, napi); 1581 struct i40e_vsi *vsi = q_vector->vsi; 1582 struct i40e_ring *ring; 1583 bool clean_complete = true; 1584 int budget_per_ring; 1585 1586 if (test_bit(__I40E_DOWN, &vsi->state)) { 1587 napi_complete(napi); 1588 return 0; 1589 } 1590 1591 /* Since the actual Tx work is minimal, we can give the Tx a larger 1592 * budget and be more aggressive about cleaning up the Tx descriptors. 1593 */ 1594 i40e_for_each_ring(ring, q_vector->tx) 1595 clean_complete &= i40e_clean_tx_irq(ring, vsi->work_limit); 1596 1597 /* We attempt to distribute budget to each Rx queue fairly, but don't 1598 * allow the budget to go below 1 because that would exit polling early. 1599 */ 1600 budget_per_ring = max(budget/q_vector->num_ringpairs, 1); 1601 1602 i40e_for_each_ring(ring, q_vector->rx) 1603 clean_complete &= i40e_clean_rx_irq(ring, budget_per_ring); 1604 1605 /* If work not completed, return budget and polling will return */ 1606 if (!clean_complete) 1607 return budget; 1608 1609 /* Work is done so exit the polling mode and re-enable the interrupt */ 1610 napi_complete(napi); 1611 if (ITR_IS_DYNAMIC(vsi->rx_itr_setting) || 1612 ITR_IS_DYNAMIC(vsi->tx_itr_setting)) 1613 i40e_update_dynamic_itr(q_vector); 1614 1615 if (!test_bit(__I40E_DOWN, &vsi->state)) { 1616 if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) { 1617 i40e_irq_dynamic_enable(vsi, 1618 q_vector->v_idx + vsi->base_vector); 1619 } else { 1620 struct i40e_hw *hw = &vsi->back->hw; 1621 /* We re-enable the queue 0 cause, but 1622 * don't worry about dynamic_enable 1623 * because we left it on for the other 1624 * possible interrupts during napi 1625 */ 1626 u32 qval = rd32(hw, I40E_QINT_RQCTL(0)); 1627 qval |= I40E_QINT_RQCTL_CAUSE_ENA_MASK; 1628 wr32(hw, I40E_QINT_RQCTL(0), qval); 1629 1630 qval = rd32(hw, I40E_QINT_TQCTL(0)); 1631 qval |= I40E_QINT_TQCTL_CAUSE_ENA_MASK; 1632 wr32(hw, I40E_QINT_TQCTL(0), qval); 1633 1634 i40e_irq_dynamic_enable_icr0(vsi->back); 1635 } 1636 } 1637 1638 return 0; 1639} 1640 1641/** 1642 * i40e_atr - Add a Flow Director ATR filter 1643 * @tx_ring: ring to add programming descriptor to 1644 * @skb: send buffer 1645 * @flags: send flags 1646 * @protocol: wire protocol 1647 **/ 1648static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, 1649 u32 flags, __be16 protocol) 1650{ 1651 struct i40e_filter_program_desc *fdir_desc; 1652 struct i40e_pf *pf = tx_ring->vsi->back; 1653 union { 1654 unsigned char *network; 1655 struct iphdr *ipv4; 1656 struct ipv6hdr *ipv6; 1657 } hdr; 1658 struct tcphdr *th; 1659 unsigned int hlen; 1660 u32 flex_ptype, dtype_cmd; 1661 u16 i; 1662 1663 /* make sure ATR is enabled */ 1664 if (!(pf->flags & I40E_FLAG_FD_ATR_ENABLED)) 1665 return; 1666 1667 /* if sampling is disabled do nothing */ 1668 if (!tx_ring->atr_sample_rate) 1669 return; 1670 1671 /* snag network header to get L4 type and address */ 1672 hdr.network = skb_network_header(skb); 1673 1674 /* Currently only IPv4/IPv6 with TCP is supported */ 1675 if (protocol == htons(ETH_P_IP)) { 1676 if (hdr.ipv4->protocol != IPPROTO_TCP) 1677 return; 1678 1679 /* access ihl as a u8 to avoid unaligned access on ia64 */ 1680 hlen = (hdr.network[0] & 0x0F) << 2; 1681 } else if (protocol == htons(ETH_P_IPV6)) { 1682 if (hdr.ipv6->nexthdr != IPPROTO_TCP) 1683 return; 1684 1685 hlen = sizeof(struct ipv6hdr); 1686 } else { 1687 return; 1688 } 1689 1690 th = (struct tcphdr *)(hdr.network + hlen); 1691 1692 /* Due to lack of space, no more new filters can be programmed */ 1693 if (th->syn && (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED)) 1694 return; 1695 1696 tx_ring->atr_count++; 1697 1698 /* sample on all syn/fin/rst packets or once every atr sample rate */ 1699 if (!th->fin && 1700 !th->syn && 1701 !th->rst && 1702 (tx_ring->atr_count < tx_ring->atr_sample_rate)) 1703 return; 1704 1705 tx_ring->atr_count = 0; 1706 1707 /* grab the next descriptor */ 1708 i = tx_ring->next_to_use; 1709 fdir_desc = I40E_TX_FDIRDESC(tx_ring, i); 1710 1711 i++; 1712 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0; 1713 1714 flex_ptype = (tx_ring->queue_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) & 1715 I40E_TXD_FLTR_QW0_QINDEX_MASK; 1716 flex_ptype |= (protocol == htons(ETH_P_IP)) ? 1717 (I40E_FILTER_PCTYPE_NONF_IPV4_TCP << 1718 I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) : 1719 (I40E_FILTER_PCTYPE_NONF_IPV6_TCP << 1720 I40E_TXD_FLTR_QW0_PCTYPE_SHIFT); 1721 1722 flex_ptype |= tx_ring->vsi->id << I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT; 1723 1724 dtype_cmd = I40E_TX_DESC_DTYPE_FILTER_PROG; 1725 1726 dtype_cmd |= (th->fin || th->rst) ? 1727 (I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE << 1728 I40E_TXD_FLTR_QW1_PCMD_SHIFT) : 1729 (I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE << 1730 I40E_TXD_FLTR_QW1_PCMD_SHIFT); 1731 1732 dtype_cmd |= I40E_FILTER_PROGRAM_DESC_DEST_DIRECT_PACKET_QINDEX << 1733 I40E_TXD_FLTR_QW1_DEST_SHIFT; 1734 1735 dtype_cmd |= I40E_FILTER_PROGRAM_DESC_FD_STATUS_FD_ID << 1736 I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT; 1737 1738 dtype_cmd |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK; 1739 dtype_cmd |= 1740 ((u32)pf->fd_atr_cnt_idx << I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) & 1741 I40E_TXD_FLTR_QW1_CNTINDEX_MASK; 1742 1743 fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype); 1744 fdir_desc->rsvd = cpu_to_le32(0); 1745 fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dtype_cmd); 1746 fdir_desc->fd_id = cpu_to_le32(0); 1747} 1748 1749/** 1750 * i40e_tx_prepare_vlan_flags - prepare generic TX VLAN tagging flags for HW 1751 * @skb: send buffer 1752 * @tx_ring: ring to send buffer on 1753 * @flags: the tx flags to be set 1754 * 1755 * Checks the skb and set up correspondingly several generic transmit flags 1756 * related to VLAN tagging for the HW, such as VLAN, DCB, etc. 1757 * 1758 * Returns error code indicate the frame should be dropped upon error and the 1759 * otherwise returns 0 to indicate the flags has been set properly. 1760 **/ 1761#ifdef I40E_FCOE 1762int i40e_tx_prepare_vlan_flags(struct sk_buff *skb, 1763 struct i40e_ring *tx_ring, 1764 u32 *flags) 1765#else 1766static int i40e_tx_prepare_vlan_flags(struct sk_buff *skb, 1767 struct i40e_ring *tx_ring, 1768 u32 *flags) 1769#endif 1770{ 1771 __be16 protocol = skb->protocol; 1772 u32 tx_flags = 0; 1773 1774 /* if we have a HW VLAN tag being added, default to the HW one */ 1775 if (vlan_tx_tag_present(skb)) { 1776 tx_flags |= vlan_tx_tag_get(skb) << I40E_TX_FLAGS_VLAN_SHIFT; 1777 tx_flags |= I40E_TX_FLAGS_HW_VLAN; 1778 /* else if it is a SW VLAN, check the next protocol and store the tag */ 1779 } else if (protocol == htons(ETH_P_8021Q)) { 1780 struct vlan_hdr *vhdr, _vhdr; 1781 vhdr = skb_header_pointer(skb, ETH_HLEN, sizeof(_vhdr), &_vhdr); 1782 if (!vhdr) 1783 return -EINVAL; 1784 1785 protocol = vhdr->h_vlan_encapsulated_proto; 1786 tx_flags |= ntohs(vhdr->h_vlan_TCI) << I40E_TX_FLAGS_VLAN_SHIFT; 1787 tx_flags |= I40E_TX_FLAGS_SW_VLAN; 1788 } 1789 1790 /* Insert 802.1p priority into VLAN header */ 1791 if ((tx_flags & (I40E_TX_FLAGS_HW_VLAN | I40E_TX_FLAGS_SW_VLAN)) || 1792 (skb->priority != TC_PRIO_CONTROL)) { 1793 tx_flags &= ~I40E_TX_FLAGS_VLAN_PRIO_MASK; 1794 tx_flags |= (skb->priority & 0x7) << 1795 I40E_TX_FLAGS_VLAN_PRIO_SHIFT; 1796 if (tx_flags & I40E_TX_FLAGS_SW_VLAN) { 1797 struct vlan_ethhdr *vhdr; 1798 int rc; 1799 1800 rc = skb_cow_head(skb, 0); 1801 if (rc < 0) 1802 return rc; 1803 vhdr = (struct vlan_ethhdr *)skb->data; 1804 vhdr->h_vlan_TCI = htons(tx_flags >> 1805 I40E_TX_FLAGS_VLAN_SHIFT); 1806 } else { 1807 tx_flags |= I40E_TX_FLAGS_HW_VLAN; 1808 } 1809 } 1810 *flags = tx_flags; 1811 return 0; 1812} 1813 1814/** 1815 * i40e_tso - set up the tso context descriptor 1816 * @tx_ring: ptr to the ring to send 1817 * @skb: ptr to the skb we're sending 1818 * @tx_flags: the collected send information 1819 * @protocol: the send protocol 1820 * @hdr_len: ptr to the size of the packet header 1821 * @cd_tunneling: ptr to context descriptor bits 1822 * 1823 * Returns 0 if no TSO can happen, 1 if tso is going, or error 1824 **/ 1825static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb, 1826 u32 tx_flags, __be16 protocol, u8 *hdr_len, 1827 u64 *cd_type_cmd_tso_mss, u32 *cd_tunneling) 1828{ 1829 u32 cd_cmd, cd_tso_len, cd_mss; 1830 struct ipv6hdr *ipv6h; 1831 struct tcphdr *tcph; 1832 struct iphdr *iph; 1833 u32 l4len; 1834 int err; 1835 1836 if (!skb_is_gso(skb)) 1837 return 0; 1838 1839 err = skb_cow_head(skb, 0); 1840 if (err < 0) 1841 return err; 1842 1843 if (protocol == htons(ETH_P_IP)) { 1844 iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb); 1845 tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb); 1846 iph->tot_len = 0; 1847 iph->check = 0; 1848 tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 1849 0, IPPROTO_TCP, 0); 1850 } else if (skb_is_gso_v6(skb)) { 1851 1852 ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb) 1853 : ipv6_hdr(skb); 1854 tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb); 1855 ipv6h->payload_len = 0; 1856 tcph->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, 1857 0, IPPROTO_TCP, 0); 1858 } 1859 1860 l4len = skb->encapsulation ? inner_tcp_hdrlen(skb) : tcp_hdrlen(skb); 1861 *hdr_len = (skb->encapsulation 1862 ? (skb_inner_transport_header(skb) - skb->data) 1863 : skb_transport_offset(skb)) + l4len; 1864 1865 /* find the field values */ 1866 cd_cmd = I40E_TX_CTX_DESC_TSO; 1867 cd_tso_len = skb->len - *hdr_len; 1868 cd_mss = skb_shinfo(skb)->gso_size; 1869 *cd_type_cmd_tso_mss |= ((u64)cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) | 1870 ((u64)cd_tso_len << 1871 I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) | 1872 ((u64)cd_mss << I40E_TXD_CTX_QW1_MSS_SHIFT); 1873 return 1; 1874} 1875 1876/** 1877 * i40e_tsyn - set up the tsyn context descriptor 1878 * @tx_ring: ptr to the ring to send 1879 * @skb: ptr to the skb we're sending 1880 * @tx_flags: the collected send information 1881 * 1882 * Returns 0 if no Tx timestamp can happen and 1 if the timestamp will happen 1883 **/ 1884static int i40e_tsyn(struct i40e_ring *tx_ring, struct sk_buff *skb, 1885 u32 tx_flags, u64 *cd_type_cmd_tso_mss) 1886{ 1887 struct i40e_pf *pf; 1888 1889 if (likely(!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))) 1890 return 0; 1891 1892 /* Tx timestamps cannot be sampled when doing TSO */ 1893 if (tx_flags & I40E_TX_FLAGS_TSO) 1894 return 0; 1895 1896 /* only timestamp the outbound packet if the user has requested it and 1897 * we are not already transmitting a packet to be timestamped 1898 */ 1899 pf = i40e_netdev_to_pf(tx_ring->netdev); 1900 if (pf->ptp_tx && 1901 !test_and_set_bit_lock(__I40E_PTP_TX_IN_PROGRESS, &pf->state)) { 1902 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; 1903 pf->ptp_tx_skb = skb_get(skb); 1904 } else { 1905 return 0; 1906 } 1907 1908 *cd_type_cmd_tso_mss |= (u64)I40E_TX_CTX_DESC_TSYN << 1909 I40E_TXD_CTX_QW1_CMD_SHIFT; 1910 1911 return 1; 1912} 1913 1914/** 1915 * i40e_tx_enable_csum - Enable Tx checksum offloads 1916 * @skb: send buffer 1917 * @tx_flags: Tx flags currently set 1918 * @td_cmd: Tx descriptor command bits to set 1919 * @td_offset: Tx descriptor header offsets to set 1920 * @cd_tunneling: ptr to context desc bits 1921 **/ 1922static void i40e_tx_enable_csum(struct sk_buff *skb, u32 tx_flags, 1923 u32 *td_cmd, u32 *td_offset, 1924 struct i40e_ring *tx_ring, 1925 u32 *cd_tunneling) 1926{ 1927 struct ipv6hdr *this_ipv6_hdr; 1928 unsigned int this_tcp_hdrlen; 1929 struct iphdr *this_ip_hdr; 1930 u32 network_hdr_len; 1931 u8 l4_hdr = 0; 1932 1933 if (skb->encapsulation) { 1934 network_hdr_len = skb_inner_network_header_len(skb); 1935 this_ip_hdr = inner_ip_hdr(skb); 1936 this_ipv6_hdr = inner_ipv6_hdr(skb); 1937 this_tcp_hdrlen = inner_tcp_hdrlen(skb); 1938 1939 if (tx_flags & I40E_TX_FLAGS_IPV4) { 1940 1941 if (tx_flags & I40E_TX_FLAGS_TSO) { 1942 *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV4; 1943 ip_hdr(skb)->check = 0; 1944 } else { 1945 *cd_tunneling |= 1946 I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM; 1947 } 1948 } else if (tx_flags & I40E_TX_FLAGS_IPV6) { 1949 if (tx_flags & I40E_TX_FLAGS_TSO) { 1950 *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6; 1951 ip_hdr(skb)->check = 0; 1952 } else { 1953 *cd_tunneling |= 1954 I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM; 1955 } 1956 } 1957 1958 /* Now set the ctx descriptor fields */ 1959 *cd_tunneling |= (skb_network_header_len(skb) >> 2) << 1960 I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT | 1961 I40E_TXD_CTX_UDP_TUNNELING | 1962 ((skb_inner_network_offset(skb) - 1963 skb_transport_offset(skb)) >> 1) << 1964 I40E_TXD_CTX_QW0_NATLEN_SHIFT; 1965 1966 } else { 1967 network_hdr_len = skb_network_header_len(skb); 1968 this_ip_hdr = ip_hdr(skb); 1969 this_ipv6_hdr = ipv6_hdr(skb); 1970 this_tcp_hdrlen = tcp_hdrlen(skb); 1971 } 1972 1973 /* Enable IP checksum offloads */ 1974 if (tx_flags & I40E_TX_FLAGS_IPV4) { 1975 l4_hdr = this_ip_hdr->protocol; 1976 /* the stack computes the IP header already, the only time we 1977 * need the hardware to recompute it is in the case of TSO. 1978 */ 1979 if (tx_flags & I40E_TX_FLAGS_TSO) { 1980 *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM; 1981 this_ip_hdr->check = 0; 1982 } else { 1983 *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4; 1984 } 1985 /* Now set the td_offset for IP header length */ 1986 *td_offset = (network_hdr_len >> 2) << 1987 I40E_TX_DESC_LENGTH_IPLEN_SHIFT; 1988 } else if (tx_flags & I40E_TX_FLAGS_IPV6) { 1989 l4_hdr = this_ipv6_hdr->nexthdr; 1990 *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV6; 1991 /* Now set the td_offset for IP header length */ 1992 *td_offset = (network_hdr_len >> 2) << 1993 I40E_TX_DESC_LENGTH_IPLEN_SHIFT; 1994 } 1995 /* words in MACLEN + dwords in IPLEN + dwords in L4Len */ 1996 *td_offset |= (skb_network_offset(skb) >> 1) << 1997 I40E_TX_DESC_LENGTH_MACLEN_SHIFT; 1998 1999 /* Enable L4 checksum offloads */ 2000 switch (l4_hdr) { 2001 case IPPROTO_TCP: 2002 /* enable checksum offloads */ 2003 *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP; 2004 *td_offset |= (this_tcp_hdrlen >> 2) << 2005 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 2006 break; 2007 case IPPROTO_SCTP: 2008 /* enable SCTP checksum offload */ 2009 *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP; 2010 *td_offset |= (sizeof(struct sctphdr) >> 2) << 2011 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 2012 break; 2013 case IPPROTO_UDP: 2014 /* enable UDP checksum offload */ 2015 *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP; 2016 *td_offset |= (sizeof(struct udphdr) >> 2) << 2017 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 2018 break; 2019 default: 2020 break; 2021 } 2022} 2023 2024/** 2025 * i40e_create_tx_ctx Build the Tx context descriptor 2026 * @tx_ring: ring to create the descriptor on 2027 * @cd_type_cmd_tso_mss: Quad Word 1 2028 * @cd_tunneling: Quad Word 0 - bits 0-31 2029 * @cd_l2tag2: Quad Word 0 - bits 32-63 2030 **/ 2031static void i40e_create_tx_ctx(struct i40e_ring *tx_ring, 2032 const u64 cd_type_cmd_tso_mss, 2033 const u32 cd_tunneling, const u32 cd_l2tag2) 2034{ 2035 struct i40e_tx_context_desc *context_desc; 2036 int i = tx_ring->next_to_use; 2037 2038 if ((cd_type_cmd_tso_mss == I40E_TX_DESC_DTYPE_CONTEXT) && 2039 !cd_tunneling && !cd_l2tag2) 2040 return; 2041 2042 /* grab the next descriptor */ 2043 context_desc = I40E_TX_CTXTDESC(tx_ring, i); 2044 2045 i++; 2046 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0; 2047 2048 /* cpu_to_le32 and assign to struct fields */ 2049 context_desc->tunneling_params = cpu_to_le32(cd_tunneling); 2050 context_desc->l2tag2 = cpu_to_le16(cd_l2tag2); 2051 context_desc->rsvd = cpu_to_le16(0); 2052 context_desc->type_cmd_tso_mss = cpu_to_le64(cd_type_cmd_tso_mss); 2053} 2054 2055/** 2056 * __i40e_maybe_stop_tx - 2nd level check for tx stop conditions 2057 * @tx_ring: the ring to be checked 2058 * @size: the size buffer we want to assure is available 2059 * 2060 * Returns -EBUSY if a stop is needed, else 0 2061 **/ 2062static inline int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size) 2063{ 2064 netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index); 2065 /* Memory barrier before checking head and tail */ 2066 smp_mb(); 2067 2068 /* Check again in a case another CPU has just made room available. */ 2069 if (likely(I40E_DESC_UNUSED(tx_ring) < size)) 2070 return -EBUSY; 2071 2072 /* A reprieve! - use start_queue because it doesn't call schedule */ 2073 netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index); 2074 ++tx_ring->tx_stats.restart_queue; 2075 return 0; 2076} 2077 2078/** 2079 * i40e_maybe_stop_tx - 1st level check for tx stop conditions 2080 * @tx_ring: the ring to be checked 2081 * @size: the size buffer we want to assure is available 2082 * 2083 * Returns 0 if stop is not needed 2084 **/ 2085#ifdef I40E_FCOE 2086int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size) 2087#else 2088static int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size) 2089#endif 2090{ 2091 if (likely(I40E_DESC_UNUSED(tx_ring) >= size)) 2092 return 0; 2093 return __i40e_maybe_stop_tx(tx_ring, size); 2094} 2095 2096/** 2097 * i40e_tx_map - Build the Tx descriptor 2098 * @tx_ring: ring to send buffer on 2099 * @skb: send buffer 2100 * @first: first buffer info buffer to use 2101 * @tx_flags: collected send information 2102 * @hdr_len: size of the packet header 2103 * @td_cmd: the command field in the descriptor 2104 * @td_offset: offset for checksum or crc 2105 **/ 2106#ifdef I40E_FCOE 2107void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, 2108 struct i40e_tx_buffer *first, u32 tx_flags, 2109 const u8 hdr_len, u32 td_cmd, u32 td_offset) 2110#else 2111static void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, 2112 struct i40e_tx_buffer *first, u32 tx_flags, 2113 const u8 hdr_len, u32 td_cmd, u32 td_offset) 2114#endif 2115{ 2116 unsigned int data_len = skb->data_len; 2117 unsigned int size = skb_headlen(skb); 2118 struct skb_frag_struct *frag; 2119 struct i40e_tx_buffer *tx_bi; 2120 struct i40e_tx_desc *tx_desc; 2121 u16 i = tx_ring->next_to_use; 2122 u32 td_tag = 0; 2123 dma_addr_t dma; 2124 u16 gso_segs; 2125 2126 if (tx_flags & I40E_TX_FLAGS_HW_VLAN) { 2127 td_cmd |= I40E_TX_DESC_CMD_IL2TAG1; 2128 td_tag = (tx_flags & I40E_TX_FLAGS_VLAN_MASK) >> 2129 I40E_TX_FLAGS_VLAN_SHIFT; 2130 } 2131 2132 if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO)) 2133 gso_segs = skb_shinfo(skb)->gso_segs; 2134 else 2135 gso_segs = 1; 2136 2137 /* multiply data chunks by size of headers */ 2138 first->bytecount = skb->len - hdr_len + (gso_segs * hdr_len); 2139 first->gso_segs = gso_segs; 2140 first->skb = skb; 2141 first->tx_flags = tx_flags; 2142 2143 dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE); 2144 2145 tx_desc = I40E_TX_DESC(tx_ring, i); 2146 tx_bi = first; 2147 2148 for (frag = &skb_shinfo(skb)->frags[0];; frag++) { 2149 if (dma_mapping_error(tx_ring->dev, dma)) 2150 goto dma_error; 2151 2152 /* record length, and DMA address */ 2153 dma_unmap_len_set(tx_bi, len, size); 2154 dma_unmap_addr_set(tx_bi, dma, dma); 2155 2156 tx_desc->buffer_addr = cpu_to_le64(dma); 2157 2158 while (unlikely(size > I40E_MAX_DATA_PER_TXD)) { 2159 tx_desc->cmd_type_offset_bsz = 2160 build_ctob(td_cmd, td_offset, 2161 I40E_MAX_DATA_PER_TXD, td_tag); 2162 2163 tx_desc++; 2164 i++; 2165 if (i == tx_ring->count) { 2166 tx_desc = I40E_TX_DESC(tx_ring, 0); 2167 i = 0; 2168 } 2169 2170 dma += I40E_MAX_DATA_PER_TXD; 2171 size -= I40E_MAX_DATA_PER_TXD; 2172 2173 tx_desc->buffer_addr = cpu_to_le64(dma); 2174 } 2175 2176 if (likely(!data_len)) 2177 break; 2178 2179 tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset, 2180 size, td_tag); 2181 2182 tx_desc++; 2183 i++; 2184 if (i == tx_ring->count) { 2185 tx_desc = I40E_TX_DESC(tx_ring, 0); 2186 i = 0; 2187 } 2188 2189 size = skb_frag_size(frag); 2190 data_len -= size; 2191 2192 dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size, 2193 DMA_TO_DEVICE); 2194 2195 tx_bi = &tx_ring->tx_bi[i]; 2196 } 2197 2198 /* Place RS bit on last descriptor of any packet that spans across the 2199 * 4th descriptor (WB_STRIDE aka 0x3) in a 64B cacheline. 2200 */ 2201#define WB_STRIDE 0x3 2202 if (((i & WB_STRIDE) != WB_STRIDE) && 2203 (first <= &tx_ring->tx_bi[i]) && 2204 (first >= &tx_ring->tx_bi[i & ~WB_STRIDE])) { 2205 tx_desc->cmd_type_offset_bsz = 2206 build_ctob(td_cmd, td_offset, size, td_tag) | 2207 cpu_to_le64((u64)I40E_TX_DESC_CMD_EOP << 2208 I40E_TXD_QW1_CMD_SHIFT); 2209 } else { 2210 tx_desc->cmd_type_offset_bsz = 2211 build_ctob(td_cmd, td_offset, size, td_tag) | 2212 cpu_to_le64((u64)I40E_TXD_CMD << 2213 I40E_TXD_QW1_CMD_SHIFT); 2214 } 2215 2216 netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev, 2217 tx_ring->queue_index), 2218 first->bytecount); 2219 2220 /* set the timestamp */ 2221 first->time_stamp = jiffies; 2222 2223 /* Force memory writes to complete before letting h/w 2224 * know there are new descriptors to fetch. (Only 2225 * applicable for weak-ordered memory model archs, 2226 * such as IA-64). 2227 */ 2228 wmb(); 2229 2230 /* set next_to_watch value indicating a packet is present */ 2231 first->next_to_watch = tx_desc; 2232 2233 i++; 2234 if (i == tx_ring->count) 2235 i = 0; 2236 2237 tx_ring->next_to_use = i; 2238 2239 i40e_maybe_stop_tx(tx_ring, DESC_NEEDED); 2240 /* notify HW of packet */ 2241 if (!skb->xmit_more || 2242 netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev, 2243 tx_ring->queue_index))) 2244 writel(i, tx_ring->tail); 2245 2246 return; 2247 2248dma_error: 2249 dev_info(tx_ring->dev, "TX DMA map failed\n"); 2250 2251 /* clear dma mappings for failed tx_bi map */ 2252 for (;;) { 2253 tx_bi = &tx_ring->tx_bi[i]; 2254 i40e_unmap_and_free_tx_resource(tx_ring, tx_bi); 2255 if (tx_bi == first) 2256 break; 2257 if (i == 0) 2258 i = tx_ring->count; 2259 i--; 2260 } 2261 2262 tx_ring->next_to_use = i; 2263} 2264 2265/** 2266 * i40e_xmit_descriptor_count - calculate number of tx descriptors needed 2267 * @skb: send buffer 2268 * @tx_ring: ring to send buffer on 2269 * 2270 * Returns number of data descriptors needed for this skb. Returns 0 to indicate 2271 * there is not enough descriptors available in this ring since we need at least 2272 * one descriptor. 2273 **/ 2274#ifdef I40E_FCOE 2275int i40e_xmit_descriptor_count(struct sk_buff *skb, 2276 struct i40e_ring *tx_ring) 2277#else 2278static int i40e_xmit_descriptor_count(struct sk_buff *skb, 2279 struct i40e_ring *tx_ring) 2280#endif 2281{ 2282 unsigned int f; 2283 int count = 0; 2284 2285 /* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD, 2286 * + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD, 2287 * + 4 desc gap to avoid the cache line where head is, 2288 * + 1 desc for context descriptor, 2289 * otherwise try next time 2290 */ 2291 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) 2292 count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size); 2293 2294 count += TXD_USE_COUNT(skb_headlen(skb)); 2295 if (i40e_maybe_stop_tx(tx_ring, count + 4 + 1)) { 2296 tx_ring->tx_stats.tx_busy++; 2297 return 0; 2298 } 2299 return count; 2300} 2301 2302/** 2303 * i40e_xmit_frame_ring - Sends buffer on Tx ring 2304 * @skb: send buffer 2305 * @tx_ring: ring to send buffer on 2306 * 2307 * Returns NETDEV_TX_OK if sent, else an error code 2308 **/ 2309static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, 2310 struct i40e_ring *tx_ring) 2311{ 2312 u64 cd_type_cmd_tso_mss = I40E_TX_DESC_DTYPE_CONTEXT; 2313 u32 cd_tunneling = 0, cd_l2tag2 = 0; 2314 struct i40e_tx_buffer *first; 2315 u32 td_offset = 0; 2316 u32 tx_flags = 0; 2317 __be16 protocol; 2318 u32 td_cmd = 0; 2319 u8 hdr_len = 0; 2320 int tsyn; 2321 int tso; 2322 if (0 == i40e_xmit_descriptor_count(skb, tx_ring)) 2323 return NETDEV_TX_BUSY; 2324 2325 /* prepare the xmit flags */ 2326 if (i40e_tx_prepare_vlan_flags(skb, tx_ring, &tx_flags)) 2327 goto out_drop; 2328 2329 /* obtain protocol of skb */ 2330 protocol = vlan_get_protocol(skb); 2331 2332 /* record the location of the first descriptor for this packet */ 2333 first = &tx_ring->tx_bi[tx_ring->next_to_use]; 2334 2335 /* setup IPv4/IPv6 offloads */ 2336 if (protocol == htons(ETH_P_IP)) 2337 tx_flags |= I40E_TX_FLAGS_IPV4; 2338 else if (protocol == htons(ETH_P_IPV6)) 2339 tx_flags |= I40E_TX_FLAGS_IPV6; 2340 2341 tso = i40e_tso(tx_ring, skb, tx_flags, protocol, &hdr_len, 2342 &cd_type_cmd_tso_mss, &cd_tunneling); 2343 2344 if (tso < 0) 2345 goto out_drop; 2346 else if (tso) 2347 tx_flags |= I40E_TX_FLAGS_TSO; 2348 2349 tsyn = i40e_tsyn(tx_ring, skb, tx_flags, &cd_type_cmd_tso_mss); 2350 2351 if (tsyn) 2352 tx_flags |= I40E_TX_FLAGS_TSYN; 2353 2354 skb_tx_timestamp(skb); 2355 2356 /* always enable CRC insertion offload */ 2357 td_cmd |= I40E_TX_DESC_CMD_ICRC; 2358 2359 /* Always offload the checksum, since it's in the data descriptor */ 2360 if (skb->ip_summed == CHECKSUM_PARTIAL) { 2361 tx_flags |= I40E_TX_FLAGS_CSUM; 2362 2363 i40e_tx_enable_csum(skb, tx_flags, &td_cmd, &td_offset, 2364 tx_ring, &cd_tunneling); 2365 } 2366 2367 i40e_create_tx_ctx(tx_ring, cd_type_cmd_tso_mss, 2368 cd_tunneling, cd_l2tag2); 2369 2370 /* Add Flow Director ATR if it's enabled. 2371 * 2372 * NOTE: this must always be directly before the data descriptor. 2373 */ 2374 i40e_atr(tx_ring, skb, tx_flags, protocol); 2375 2376 i40e_tx_map(tx_ring, skb, first, tx_flags, hdr_len, 2377 td_cmd, td_offset); 2378 2379 return NETDEV_TX_OK; 2380 2381out_drop: 2382 dev_kfree_skb_any(skb); 2383 return NETDEV_TX_OK; 2384} 2385 2386/** 2387 * i40e_lan_xmit_frame - Selects the correct VSI and Tx queue to send buffer 2388 * @skb: send buffer 2389 * @netdev: network interface device structure 2390 * 2391 * Returns NETDEV_TX_OK if sent, else an error code 2392 **/ 2393netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev) 2394{ 2395 struct i40e_netdev_priv *np = netdev_priv(netdev); 2396 struct i40e_vsi *vsi = np->vsi; 2397 struct i40e_ring *tx_ring = vsi->tx_rings[skb->queue_mapping]; 2398 2399 /* hardware can't handle really short frames, hardware padding works 2400 * beyond this point 2401 */ 2402 if (unlikely(skb->len < I40E_MIN_TX_LEN)) { 2403 if (skb_pad(skb, I40E_MIN_TX_LEN - skb->len)) 2404 return NETDEV_TX_OK; 2405 skb->len = I40E_MIN_TX_LEN; 2406 skb_set_tail_pointer(skb, I40E_MIN_TX_LEN); 2407 } 2408 2409 return i40e_xmit_frame_ring(skb, tx_ring); 2410} 2411