1/******************************************************************************* 2 * 3 * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver 4 * Copyright(c) 2013 - 2014 Intel Corporation. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along 16 * with this program. If not, see <http://www.gnu.org/licenses/>. 17 * 18 * The full GNU General Public License is included in this distribution in 19 * the file called "COPYING". 20 * 21 * Contact Information: 22 * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> 23 * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 24 * 25 ******************************************************************************/ 26 27#include <linux/prefetch.h> 28 29#include "i40evf.h" 30#include "i40e_prototype.h" 31 32static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size, 33 u32 td_tag) 34{ 35 return cpu_to_le64(I40E_TX_DESC_DTYPE_DATA | 36 ((u64)td_cmd << I40E_TXD_QW1_CMD_SHIFT) | 37 ((u64)td_offset << I40E_TXD_QW1_OFFSET_SHIFT) | 38 ((u64)size << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) | 39 ((u64)td_tag << I40E_TXD_QW1_L2TAG1_SHIFT)); 40} 41 42#define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS) 43 44/** 45 * i40e_unmap_and_free_tx_resource - Release a Tx buffer 46 * @ring: the ring that owns the buffer 47 * @tx_buffer: the buffer to free 48 **/ 49static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring, 50 struct i40e_tx_buffer *tx_buffer) 51{ 52 if (tx_buffer->skb) { 53 if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB) 54 kfree(tx_buffer->raw_buf); 55 else 56 dev_kfree_skb_any(tx_buffer->skb); 57 58 if (dma_unmap_len(tx_buffer, len)) 59 dma_unmap_single(ring->dev, 60 dma_unmap_addr(tx_buffer, dma), 61 dma_unmap_len(tx_buffer, len), 62 DMA_TO_DEVICE); 63 } else if (dma_unmap_len(tx_buffer, len)) { 64 dma_unmap_page(ring->dev, 65 dma_unmap_addr(tx_buffer, dma), 66 dma_unmap_len(tx_buffer, len), 67 DMA_TO_DEVICE); 68 } 69 tx_buffer->next_to_watch = NULL; 70 tx_buffer->skb = NULL; 71 dma_unmap_len_set(tx_buffer, len, 0); 72 /* tx_buffer must be completely set up in the transmit path */ 73} 74 75/** 76 * i40evf_clean_tx_ring - Free any empty Tx buffers 77 * @tx_ring: ring to be cleaned 78 **/ 79void i40evf_clean_tx_ring(struct i40e_ring *tx_ring) 80{ 81 unsigned long bi_size; 82 u16 i; 83 84 /* ring already cleared, nothing to do */ 85 if (!tx_ring->tx_bi) 86 return; 87 88 /* Free all the Tx ring sk_buffs */ 89 for (i = 0; i < tx_ring->count; i++) 90 i40e_unmap_and_free_tx_resource(tx_ring, &tx_ring->tx_bi[i]); 91 92 bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count; 93 memset(tx_ring->tx_bi, 0, bi_size); 94 95 /* Zero out the descriptor ring */ 96 memset(tx_ring->desc, 0, tx_ring->size); 97 98 tx_ring->next_to_use = 0; 99 tx_ring->next_to_clean = 0; 100 101 if (!tx_ring->netdev) 102 return; 103 104 /* cleanup Tx queue statistics */ 105 netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev, 106 tx_ring->queue_index)); 107} 108 109/** 110 * i40evf_free_tx_resources - Free Tx resources per queue 111 * @tx_ring: Tx descriptor ring for a specific queue 112 * 113 * Free all transmit software resources 114 **/ 115void i40evf_free_tx_resources(struct i40e_ring *tx_ring) 116{ 117 i40evf_clean_tx_ring(tx_ring); 118 kfree(tx_ring->tx_bi); 119 tx_ring->tx_bi = NULL; 120 121 if (tx_ring->desc) { 122 dma_free_coherent(tx_ring->dev, tx_ring->size, 123 tx_ring->desc, tx_ring->dma); 124 tx_ring->desc = NULL; 125 } 126} 127 128/** 129 * i40e_get_tx_pending - how many tx descriptors not processed 130 * @tx_ring: the ring of descriptors 131 * 132 * Since there is no access to the ring head register 133 * in XL710, we need to use our local copies 134 **/ 135static u32 i40e_get_tx_pending(struct i40e_ring *ring) 136{ 137 u32 ntu = ((ring->next_to_clean <= ring->next_to_use) 138 ? ring->next_to_use 139 : ring->next_to_use + ring->count); 140 return ntu - ring->next_to_clean; 141} 142 143/** 144 * i40e_check_tx_hang - Is there a hang in the Tx queue 145 * @tx_ring: the ring of descriptors 146 **/ 147static bool i40e_check_tx_hang(struct i40e_ring *tx_ring) 148{ 149 u32 tx_pending = i40e_get_tx_pending(tx_ring); 150 bool ret = false; 151 152 clear_check_for_tx_hang(tx_ring); 153 154 /* Check for a hung queue, but be thorough. This verifies 155 * that a transmit has been completed since the previous 156 * check AND there is at least one packet pending. The 157 * ARMED bit is set to indicate a potential hang. The 158 * bit is cleared if a pause frame is received to remove 159 * false hang detection due to PFC or 802.3x frames. By 160 * requiring this to fail twice we avoid races with 161 * PFC clearing the ARMED bit and conditions where we 162 * run the check_tx_hang logic with a transmit completion 163 * pending but without time to complete it yet. 164 */ 165 if ((tx_ring->tx_stats.tx_done_old == tx_ring->stats.packets) && 166 (tx_pending >= I40E_MIN_DESC_PENDING)) { 167 /* make sure it is true for two checks in a row */ 168 ret = test_and_set_bit(__I40E_HANG_CHECK_ARMED, 169 &tx_ring->state); 170 } else if (!(tx_ring->tx_stats.tx_done_old == tx_ring->stats.packets) || 171 !(tx_pending < I40E_MIN_DESC_PENDING) || 172 !(tx_pending > 0)) { 173 /* update completed stats and disarm the hang check */ 174 tx_ring->tx_stats.tx_done_old = tx_ring->stats.packets; 175 clear_bit(__I40E_HANG_CHECK_ARMED, &tx_ring->state); 176 } 177 178 return ret; 179} 180 181/** 182 * i40e_get_head - Retrieve head from head writeback 183 * @tx_ring: tx ring to fetch head of 184 * 185 * Returns value of Tx ring head based on value stored 186 * in head write-back location 187 **/ 188static inline u32 i40e_get_head(struct i40e_ring *tx_ring) 189{ 190 void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count; 191 192 return le32_to_cpu(*(volatile __le32 *)head); 193} 194 195/** 196 * i40e_clean_tx_irq - Reclaim resources after transmit completes 197 * @tx_ring: tx ring to clean 198 * @budget: how many cleans we're allowed 199 * 200 * Returns true if there's any budget left (e.g. the clean is finished) 201 **/ 202static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget) 203{ 204 u16 i = tx_ring->next_to_clean; 205 struct i40e_tx_buffer *tx_buf; 206 struct i40e_tx_desc *tx_head; 207 struct i40e_tx_desc *tx_desc; 208 unsigned int total_packets = 0; 209 unsigned int total_bytes = 0; 210 211 tx_buf = &tx_ring->tx_bi[i]; 212 tx_desc = I40E_TX_DESC(tx_ring, i); 213 i -= tx_ring->count; 214 215 tx_head = I40E_TX_DESC(tx_ring, i40e_get_head(tx_ring)); 216 217 do { 218 struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch; 219 220 /* if next_to_watch is not set then there is no work pending */ 221 if (!eop_desc) 222 break; 223 224 /* prevent any other reads prior to eop_desc */ 225 read_barrier_depends(); 226 227 /* we have caught up to head, no work left to do */ 228 if (tx_head == tx_desc) 229 break; 230 231 /* clear next_to_watch to prevent false hangs */ 232 tx_buf->next_to_watch = NULL; 233 234 /* update the statistics for this packet */ 235 total_bytes += tx_buf->bytecount; 236 total_packets += tx_buf->gso_segs; 237 238 /* free the skb */ 239 dev_kfree_skb_any(tx_buf->skb); 240 241 /* unmap skb header data */ 242 dma_unmap_single(tx_ring->dev, 243 dma_unmap_addr(tx_buf, dma), 244 dma_unmap_len(tx_buf, len), 245 DMA_TO_DEVICE); 246 247 /* clear tx_buffer data */ 248 tx_buf->skb = NULL; 249 dma_unmap_len_set(tx_buf, len, 0); 250 251 /* unmap remaining buffers */ 252 while (tx_desc != eop_desc) { 253 254 tx_buf++; 255 tx_desc++; 256 i++; 257 if (unlikely(!i)) { 258 i -= tx_ring->count; 259 tx_buf = tx_ring->tx_bi; 260 tx_desc = I40E_TX_DESC(tx_ring, 0); 261 } 262 263 /* unmap any remaining paged data */ 264 if (dma_unmap_len(tx_buf, len)) { 265 dma_unmap_page(tx_ring->dev, 266 dma_unmap_addr(tx_buf, dma), 267 dma_unmap_len(tx_buf, len), 268 DMA_TO_DEVICE); 269 dma_unmap_len_set(tx_buf, len, 0); 270 } 271 } 272 273 /* move us one more past the eop_desc for start of next pkt */ 274 tx_buf++; 275 tx_desc++; 276 i++; 277 if (unlikely(!i)) { 278 i -= tx_ring->count; 279 tx_buf = tx_ring->tx_bi; 280 tx_desc = I40E_TX_DESC(tx_ring, 0); 281 } 282 283 /* update budget accounting */ 284 budget--; 285 } while (likely(budget)); 286 287 i += tx_ring->count; 288 tx_ring->next_to_clean = i; 289 u64_stats_update_begin(&tx_ring->syncp); 290 tx_ring->stats.bytes += total_bytes; 291 tx_ring->stats.packets += total_packets; 292 u64_stats_update_end(&tx_ring->syncp); 293 tx_ring->q_vector->tx.total_bytes += total_bytes; 294 tx_ring->q_vector->tx.total_packets += total_packets; 295 296 if (check_for_tx_hang(tx_ring) && i40e_check_tx_hang(tx_ring)) { 297 /* schedule immediate reset if we believe we hung */ 298 dev_info(tx_ring->dev, "Detected Tx Unit Hang\n" 299 " VSI <%d>\n" 300 " Tx Queue <%d>\n" 301 " next_to_use <%x>\n" 302 " next_to_clean <%x>\n", 303 tx_ring->vsi->seid, 304 tx_ring->queue_index, 305 tx_ring->next_to_use, i); 306 dev_info(tx_ring->dev, "tx_bi[next_to_clean]\n" 307 " time_stamp <%lx>\n" 308 " jiffies <%lx>\n", 309 tx_ring->tx_bi[i].time_stamp, jiffies); 310 311 netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index); 312 313 dev_info(tx_ring->dev, 314 "tx hang detected on queue %d, resetting adapter\n", 315 tx_ring->queue_index); 316 317 tx_ring->netdev->netdev_ops->ndo_tx_timeout(tx_ring->netdev); 318 319 /* the adapter is about to reset, no point in enabling stuff */ 320 return true; 321 } 322 323 netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev, 324 tx_ring->queue_index), 325 total_packets, total_bytes); 326 327#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2) 328 if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) && 329 (I40E_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) { 330 /* Make sure that anybody stopping the queue after this 331 * sees the new next_to_clean. 332 */ 333 smp_mb(); 334 if (__netif_subqueue_stopped(tx_ring->netdev, 335 tx_ring->queue_index) && 336 !test_bit(__I40E_DOWN, &tx_ring->vsi->state)) { 337 netif_wake_subqueue(tx_ring->netdev, 338 tx_ring->queue_index); 339 ++tx_ring->tx_stats.restart_queue; 340 } 341 } 342 343 return budget > 0; 344} 345 346/** 347 * i40e_set_new_dynamic_itr - Find new ITR level 348 * @rc: structure containing ring performance data 349 * 350 * Stores a new ITR value based on packets and byte counts during 351 * the last interrupt. The advantage of per interrupt computation 352 * is faster updates and more accurate ITR for the current traffic 353 * pattern. Constants in this function were computed based on 354 * theoretical maximum wire speed and thresholds were set based on 355 * testing data as well as attempting to minimize response time 356 * while increasing bulk throughput. 357 **/ 358static void i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) 359{ 360 enum i40e_latency_range new_latency_range = rc->latency_range; 361 u32 new_itr = rc->itr; 362 int bytes_per_int; 363 364 if (rc->total_packets == 0 || !rc->itr) 365 return; 366 367 /* simple throttlerate management 368 * 0-10MB/s lowest (100000 ints/s) 369 * 10-20MB/s low (20000 ints/s) 370 * 20-1249MB/s bulk (8000 ints/s) 371 */ 372 bytes_per_int = rc->total_bytes / rc->itr; 373 switch (rc->itr) { 374 case I40E_LOWEST_LATENCY: 375 if (bytes_per_int > 10) 376 new_latency_range = I40E_LOW_LATENCY; 377 break; 378 case I40E_LOW_LATENCY: 379 if (bytes_per_int > 20) 380 new_latency_range = I40E_BULK_LATENCY; 381 else if (bytes_per_int <= 10) 382 new_latency_range = I40E_LOWEST_LATENCY; 383 break; 384 case I40E_BULK_LATENCY: 385 if (bytes_per_int <= 20) 386 rc->latency_range = I40E_LOW_LATENCY; 387 break; 388 } 389 390 switch (new_latency_range) { 391 case I40E_LOWEST_LATENCY: 392 new_itr = I40E_ITR_100K; 393 break; 394 case I40E_LOW_LATENCY: 395 new_itr = I40E_ITR_20K; 396 break; 397 case I40E_BULK_LATENCY: 398 new_itr = I40E_ITR_8K; 399 break; 400 default: 401 break; 402 } 403 404 if (new_itr != rc->itr) { 405 /* do an exponential smoothing */ 406 new_itr = (10 * new_itr * rc->itr) / 407 ((9 * new_itr) + rc->itr); 408 rc->itr = new_itr & I40E_MAX_ITR; 409 } 410 411 rc->total_bytes = 0; 412 rc->total_packets = 0; 413} 414 415/** 416 * i40e_update_dynamic_itr - Adjust ITR based on bytes per int 417 * @q_vector: the vector to adjust 418 **/ 419static void i40e_update_dynamic_itr(struct i40e_q_vector *q_vector) 420{ 421 u16 vector = q_vector->vsi->base_vector + q_vector->v_idx; 422 struct i40e_hw *hw = &q_vector->vsi->back->hw; 423 u32 reg_addr; 424 u16 old_itr; 425 426 reg_addr = I40E_VFINT_ITRN1(I40E_RX_ITR, vector - 1); 427 old_itr = q_vector->rx.itr; 428 i40e_set_new_dynamic_itr(&q_vector->rx); 429 if (old_itr != q_vector->rx.itr) 430 wr32(hw, reg_addr, q_vector->rx.itr); 431 432 reg_addr = I40E_VFINT_ITRN1(I40E_TX_ITR, vector - 1); 433 old_itr = q_vector->tx.itr; 434 i40e_set_new_dynamic_itr(&q_vector->tx); 435 if (old_itr != q_vector->tx.itr) 436 wr32(hw, reg_addr, q_vector->tx.itr); 437} 438 439/** 440 * i40evf_setup_tx_descriptors - Allocate the Tx descriptors 441 * @tx_ring: the tx ring to set up 442 * 443 * Return 0 on success, negative on error 444 **/ 445int i40evf_setup_tx_descriptors(struct i40e_ring *tx_ring) 446{ 447 struct device *dev = tx_ring->dev; 448 int bi_size; 449 450 if (!dev) 451 return -ENOMEM; 452 453 bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count; 454 tx_ring->tx_bi = kzalloc(bi_size, GFP_KERNEL); 455 if (!tx_ring->tx_bi) 456 goto err; 457 458 /* round up to nearest 4K */ 459 tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc); 460 /* add u32 for head writeback, align after this takes care of 461 * guaranteeing this is at least one cache line in size 462 */ 463 tx_ring->size += sizeof(u32); 464 tx_ring->size = ALIGN(tx_ring->size, 4096); 465 tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size, 466 &tx_ring->dma, GFP_KERNEL); 467 if (!tx_ring->desc) { 468 dev_info(dev, "Unable to allocate memory for the Tx descriptor ring, size=%d\n", 469 tx_ring->size); 470 goto err; 471 } 472 473 tx_ring->next_to_use = 0; 474 tx_ring->next_to_clean = 0; 475 return 0; 476 477err: 478 kfree(tx_ring->tx_bi); 479 tx_ring->tx_bi = NULL; 480 return -ENOMEM; 481} 482 483/** 484 * i40evf_clean_rx_ring - Free Rx buffers 485 * @rx_ring: ring to be cleaned 486 **/ 487void i40evf_clean_rx_ring(struct i40e_ring *rx_ring) 488{ 489 struct device *dev = rx_ring->dev; 490 struct i40e_rx_buffer *rx_bi; 491 unsigned long bi_size; 492 u16 i; 493 494 /* ring already cleared, nothing to do */ 495 if (!rx_ring->rx_bi) 496 return; 497 498 /* Free all the Rx ring sk_buffs */ 499 for (i = 0; i < rx_ring->count; i++) { 500 rx_bi = &rx_ring->rx_bi[i]; 501 if (rx_bi->dma) { 502 dma_unmap_single(dev, 503 rx_bi->dma, 504 rx_ring->rx_buf_len, 505 DMA_FROM_DEVICE); 506 rx_bi->dma = 0; 507 } 508 if (rx_bi->skb) { 509 dev_kfree_skb(rx_bi->skb); 510 rx_bi->skb = NULL; 511 } 512 if (rx_bi->page) { 513 if (rx_bi->page_dma) { 514 dma_unmap_page(dev, 515 rx_bi->page_dma, 516 PAGE_SIZE / 2, 517 DMA_FROM_DEVICE); 518 rx_bi->page_dma = 0; 519 } 520 __free_page(rx_bi->page); 521 rx_bi->page = NULL; 522 rx_bi->page_offset = 0; 523 } 524 } 525 526 bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count; 527 memset(rx_ring->rx_bi, 0, bi_size); 528 529 /* Zero out the descriptor ring */ 530 memset(rx_ring->desc, 0, rx_ring->size); 531 532 rx_ring->next_to_clean = 0; 533 rx_ring->next_to_use = 0; 534} 535 536/** 537 * i40evf_free_rx_resources - Free Rx resources 538 * @rx_ring: ring to clean the resources from 539 * 540 * Free all receive software resources 541 **/ 542void i40evf_free_rx_resources(struct i40e_ring *rx_ring) 543{ 544 i40evf_clean_rx_ring(rx_ring); 545 kfree(rx_ring->rx_bi); 546 rx_ring->rx_bi = NULL; 547 548 if (rx_ring->desc) { 549 dma_free_coherent(rx_ring->dev, rx_ring->size, 550 rx_ring->desc, rx_ring->dma); 551 rx_ring->desc = NULL; 552 } 553} 554 555/** 556 * i40evf_setup_rx_descriptors - Allocate Rx descriptors 557 * @rx_ring: Rx descriptor ring (for a specific queue) to setup 558 * 559 * Returns 0 on success, negative on failure 560 **/ 561int i40evf_setup_rx_descriptors(struct i40e_ring *rx_ring) 562{ 563 struct device *dev = rx_ring->dev; 564 int bi_size; 565 566 bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count; 567 rx_ring->rx_bi = kzalloc(bi_size, GFP_KERNEL); 568 if (!rx_ring->rx_bi) 569 goto err; 570 571 /* Round up to nearest 4K */ 572 rx_ring->size = ring_is_16byte_desc_enabled(rx_ring) 573 ? rx_ring->count * sizeof(union i40e_16byte_rx_desc) 574 : rx_ring->count * sizeof(union i40e_32byte_rx_desc); 575 rx_ring->size = ALIGN(rx_ring->size, 4096); 576 rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size, 577 &rx_ring->dma, GFP_KERNEL); 578 579 if (!rx_ring->desc) { 580 dev_info(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n", 581 rx_ring->size); 582 goto err; 583 } 584 585 rx_ring->next_to_clean = 0; 586 rx_ring->next_to_use = 0; 587 588 return 0; 589err: 590 kfree(rx_ring->rx_bi); 591 rx_ring->rx_bi = NULL; 592 return -ENOMEM; 593} 594 595/** 596 * i40e_release_rx_desc - Store the new tail and head values 597 * @rx_ring: ring to bump 598 * @val: new head index 599 **/ 600static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val) 601{ 602 rx_ring->next_to_use = val; 603 /* Force memory writes to complete before letting h/w 604 * know there are new descriptors to fetch. (Only 605 * applicable for weak-ordered memory model archs, 606 * such as IA-64). 607 */ 608 wmb(); 609 writel(val, rx_ring->tail); 610} 611 612/** 613 * i40evf_alloc_rx_buffers - Replace used receive buffers; packet split 614 * @rx_ring: ring to place buffers on 615 * @cleaned_count: number of buffers to replace 616 **/ 617void i40evf_alloc_rx_buffers(struct i40e_ring *rx_ring, u16 cleaned_count) 618{ 619 u16 i = rx_ring->next_to_use; 620 union i40e_rx_desc *rx_desc; 621 struct i40e_rx_buffer *bi; 622 struct sk_buff *skb; 623 624 /* do nothing if no valid netdev defined */ 625 if (!rx_ring->netdev || !cleaned_count) 626 return; 627 628 while (cleaned_count--) { 629 rx_desc = I40E_RX_DESC(rx_ring, i); 630 bi = &rx_ring->rx_bi[i]; 631 skb = bi->skb; 632 633 if (!skb) { 634 skb = netdev_alloc_skb_ip_align(rx_ring->netdev, 635 rx_ring->rx_buf_len); 636 if (!skb) { 637 rx_ring->rx_stats.alloc_buff_failed++; 638 goto no_buffers; 639 } 640 /* initialize queue mapping */ 641 skb_record_rx_queue(skb, rx_ring->queue_index); 642 bi->skb = skb; 643 } 644 645 if (!bi->dma) { 646 bi->dma = dma_map_single(rx_ring->dev, 647 skb->data, 648 rx_ring->rx_buf_len, 649 DMA_FROM_DEVICE); 650 if (dma_mapping_error(rx_ring->dev, bi->dma)) { 651 rx_ring->rx_stats.alloc_buff_failed++; 652 bi->dma = 0; 653 goto no_buffers; 654 } 655 } 656 657 if (ring_is_ps_enabled(rx_ring)) { 658 if (!bi->page) { 659 bi->page = alloc_page(GFP_ATOMIC); 660 if (!bi->page) { 661 rx_ring->rx_stats.alloc_page_failed++; 662 goto no_buffers; 663 } 664 } 665 666 if (!bi->page_dma) { 667 /* use a half page if we're re-using */ 668 bi->page_offset ^= PAGE_SIZE / 2; 669 bi->page_dma = dma_map_page(rx_ring->dev, 670 bi->page, 671 bi->page_offset, 672 PAGE_SIZE / 2, 673 DMA_FROM_DEVICE); 674 if (dma_mapping_error(rx_ring->dev, 675 bi->page_dma)) { 676 rx_ring->rx_stats.alloc_page_failed++; 677 bi->page_dma = 0; 678 goto no_buffers; 679 } 680 } 681 682 /* Refresh the desc even if buffer_addrs didn't change 683 * because each write-back erases this info. 684 */ 685 rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma); 686 rx_desc->read.hdr_addr = cpu_to_le64(bi->dma); 687 } else { 688 rx_desc->read.pkt_addr = cpu_to_le64(bi->dma); 689 rx_desc->read.hdr_addr = 0; 690 } 691 i++; 692 if (i == rx_ring->count) 693 i = 0; 694 } 695 696no_buffers: 697 if (rx_ring->next_to_use != i) 698 i40e_release_rx_desc(rx_ring, i); 699} 700 701/** 702 * i40e_receive_skb - Send a completed packet up the stack 703 * @rx_ring: rx ring in play 704 * @skb: packet to send up 705 * @vlan_tag: vlan tag for packet 706 **/ 707static void i40e_receive_skb(struct i40e_ring *rx_ring, 708 struct sk_buff *skb, u16 vlan_tag) 709{ 710 struct i40e_q_vector *q_vector = rx_ring->q_vector; 711 struct i40e_vsi *vsi = rx_ring->vsi; 712 u64 flags = vsi->back->flags; 713 714 if (vlan_tag & VLAN_VID_MASK) 715 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag); 716 717 if (flags & I40E_FLAG_IN_NETPOLL) 718 netif_rx(skb); 719 else 720 napi_gro_receive(&q_vector->napi, skb); 721} 722 723/** 724 * i40e_rx_checksum - Indicate in skb if hw indicated a good cksum 725 * @vsi: the VSI we care about 726 * @skb: skb currently being received and modified 727 * @rx_status: status value of last descriptor in packet 728 * @rx_error: error value of last descriptor in packet 729 * @rx_ptype: ptype value of last descriptor in packet 730 **/ 731static inline void i40e_rx_checksum(struct i40e_vsi *vsi, 732 struct sk_buff *skb, 733 u32 rx_status, 734 u32 rx_error, 735 u16 rx_ptype) 736{ 737 struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(rx_ptype); 738 bool ipv4 = false, ipv6 = false; 739 bool ipv4_tunnel, ipv6_tunnel; 740 __wsum rx_udp_csum; 741 struct iphdr *iph; 742 __sum16 csum; 743 744 ipv4_tunnel = (rx_ptype > I40E_RX_PTYPE_GRENAT4_MAC_PAY3) && 745 (rx_ptype < I40E_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4); 746 ipv6_tunnel = (rx_ptype > I40E_RX_PTYPE_GRENAT6_MAC_PAY3) && 747 (rx_ptype < I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4); 748 749 skb->ip_summed = CHECKSUM_NONE; 750 751 /* Rx csum enabled and ip headers found? */ 752 if (!(vsi->netdev->features & NETIF_F_RXCSUM)) 753 return; 754 755 /* did the hardware decode the packet and checksum? */ 756 if (!(rx_status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT))) 757 return; 758 759 /* both known and outer_ip must be set for the below code to work */ 760 if (!(decoded.known && decoded.outer_ip)) 761 return; 762 763 if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && 764 decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) 765 ipv4 = true; 766 else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && 767 decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) 768 ipv6 = true; 769 770 if (ipv4 && 771 (rx_error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) | 772 (1 << I40E_RX_DESC_ERROR_EIPE_SHIFT)))) 773 goto checksum_fail; 774 775 /* likely incorrect csum if alternate IP extension headers found */ 776 if (ipv6 && 777 rx_status & (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) 778 /* don't increment checksum err here, non-fatal err */ 779 return; 780 781 /* there was some L4 error, count error and punt packet to the stack */ 782 if (rx_error & (1 << I40E_RX_DESC_ERROR_L4E_SHIFT)) 783 goto checksum_fail; 784 785 /* handle packets that were not able to be checksummed due 786 * to arrival speed, in this case the stack can compute 787 * the csum. 788 */ 789 if (rx_error & (1 << I40E_RX_DESC_ERROR_PPRS_SHIFT)) 790 return; 791 792 /* If VXLAN traffic has an outer UDPv4 checksum we need to check 793 * it in the driver, hardware does not do it for us. 794 * Since L3L4P bit was set we assume a valid IHL value (>=5) 795 * so the total length of IPv4 header is IHL*4 bytes 796 * The UDP_0 bit *may* bet set if the *inner* header is UDP 797 */ 798 if (ipv4_tunnel && 799 (decoded.inner_prot != I40E_RX_PTYPE_INNER_PROT_UDP) && 800 !(rx_status & (1 << I40E_RX_DESC_STATUS_UDP_0_SHIFT))) { 801 skb->transport_header = skb->mac_header + 802 sizeof(struct ethhdr) + 803 (ip_hdr(skb)->ihl * 4); 804 805 /* Add 4 bytes for VLAN tagged packets */ 806 skb->transport_header += (skb->protocol == htons(ETH_P_8021Q) || 807 skb->protocol == htons(ETH_P_8021AD)) 808 ? VLAN_HLEN : 0; 809 810 rx_udp_csum = udp_csum(skb); 811 iph = ip_hdr(skb); 812 csum = csum_tcpudp_magic( 813 iph->saddr, iph->daddr, 814 (skb->len - skb_transport_offset(skb)), 815 IPPROTO_UDP, rx_udp_csum); 816 817 if (udp_hdr(skb)->check != csum) 818 goto checksum_fail; 819 } 820 821 skb->ip_summed = CHECKSUM_UNNECESSARY; 822 skb->csum_level = ipv4_tunnel || ipv6_tunnel; 823 824 return; 825 826checksum_fail: 827 vsi->back->hw_csum_rx_error++; 828} 829 830/** 831 * i40e_rx_hash - returns the hash value from the Rx descriptor 832 * @ring: descriptor ring 833 * @rx_desc: specific descriptor 834 **/ 835static inline u32 i40e_rx_hash(struct i40e_ring *ring, 836 union i40e_rx_desc *rx_desc) 837{ 838 const __le64 rss_mask = 839 cpu_to_le64((u64)I40E_RX_DESC_FLTSTAT_RSS_HASH << 840 I40E_RX_DESC_STATUS_FLTSTAT_SHIFT); 841 842 if ((ring->netdev->features & NETIF_F_RXHASH) && 843 (rx_desc->wb.qword1.status_error_len & rss_mask) == rss_mask) 844 return le32_to_cpu(rx_desc->wb.qword0.hi_dword.rss); 845 else 846 return 0; 847} 848 849/** 850 * i40e_ptype_to_hash - get a hash type 851 * @ptype: the ptype value from the descriptor 852 * 853 * Returns a hash type to be used by skb_set_hash 854 **/ 855static inline enum pkt_hash_types i40e_ptype_to_hash(u8 ptype) 856{ 857 struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(ptype); 858 859 if (!decoded.known) 860 return PKT_HASH_TYPE_NONE; 861 862 if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && 863 decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY4) 864 return PKT_HASH_TYPE_L4; 865 else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && 866 decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY3) 867 return PKT_HASH_TYPE_L3; 868 else 869 return PKT_HASH_TYPE_L2; 870} 871 872/** 873 * i40e_clean_rx_irq - Reclaim resources after receive completes 874 * @rx_ring: rx ring to clean 875 * @budget: how many cleans we're allowed 876 * 877 * Returns true if there's any budget left (e.g. the clean is finished) 878 **/ 879static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) 880{ 881 unsigned int total_rx_bytes = 0, total_rx_packets = 0; 882 u16 rx_packet_len, rx_header_len, rx_sph, rx_hbo; 883 u16 cleaned_count = I40E_DESC_UNUSED(rx_ring); 884 const int current_node = numa_node_id(); 885 struct i40e_vsi *vsi = rx_ring->vsi; 886 u16 i = rx_ring->next_to_clean; 887 union i40e_rx_desc *rx_desc; 888 u32 rx_error, rx_status; 889 u8 rx_ptype; 890 u64 qword; 891 892 rx_desc = I40E_RX_DESC(rx_ring, i); 893 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); 894 rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >> 895 I40E_RXD_QW1_STATUS_SHIFT; 896 897 while (rx_status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) { 898 union i40e_rx_desc *next_rxd; 899 struct i40e_rx_buffer *rx_bi; 900 struct sk_buff *skb; 901 u16 vlan_tag; 902 rx_bi = &rx_ring->rx_bi[i]; 903 skb = rx_bi->skb; 904 prefetch(skb->data); 905 906 rx_packet_len = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >> 907 I40E_RXD_QW1_LENGTH_PBUF_SHIFT; 908 rx_header_len = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK) >> 909 I40E_RXD_QW1_LENGTH_HBUF_SHIFT; 910 rx_sph = (qword & I40E_RXD_QW1_LENGTH_SPH_MASK) >> 911 I40E_RXD_QW1_LENGTH_SPH_SHIFT; 912 913 rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >> 914 I40E_RXD_QW1_ERROR_SHIFT; 915 rx_hbo = rx_error & (1 << I40E_RX_DESC_ERROR_HBO_SHIFT); 916 rx_error &= ~(1 << I40E_RX_DESC_ERROR_HBO_SHIFT); 917 918 rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >> 919 I40E_RXD_QW1_PTYPE_SHIFT; 920 rx_bi->skb = NULL; 921 922 /* This memory barrier is needed to keep us from reading 923 * any other fields out of the rx_desc until we know the 924 * STATUS_DD bit is set 925 */ 926 rmb(); 927 928 /* Get the header and possibly the whole packet 929 * If this is an skb from previous receive dma will be 0 930 */ 931 if (rx_bi->dma) { 932 u16 len; 933 934 if (rx_hbo) 935 len = I40E_RX_HDR_SIZE; 936 else if (rx_sph) 937 len = rx_header_len; 938 else if (rx_packet_len) 939 len = rx_packet_len; /* 1buf/no split found */ 940 else 941 len = rx_header_len; /* split always mode */ 942 943 skb_put(skb, len); 944 dma_unmap_single(rx_ring->dev, 945 rx_bi->dma, 946 rx_ring->rx_buf_len, 947 DMA_FROM_DEVICE); 948 rx_bi->dma = 0; 949 } 950 951 /* Get the rest of the data if this was a header split */ 952 if (ring_is_ps_enabled(rx_ring) && rx_packet_len) { 953 954 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, 955 rx_bi->page, 956 rx_bi->page_offset, 957 rx_packet_len); 958 959 skb->len += rx_packet_len; 960 skb->data_len += rx_packet_len; 961 skb->truesize += rx_packet_len; 962 963 if ((page_count(rx_bi->page) == 1) && 964 (page_to_nid(rx_bi->page) == current_node)) 965 get_page(rx_bi->page); 966 else 967 rx_bi->page = NULL; 968 969 dma_unmap_page(rx_ring->dev, 970 rx_bi->page_dma, 971 PAGE_SIZE / 2, 972 DMA_FROM_DEVICE); 973 rx_bi->page_dma = 0; 974 } 975 I40E_RX_NEXT_DESC_PREFETCH(rx_ring, i, next_rxd); 976 977 if (unlikely( 978 !(rx_status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT)))) { 979 struct i40e_rx_buffer *next_buffer; 980 981 next_buffer = &rx_ring->rx_bi[i]; 982 983 if (ring_is_ps_enabled(rx_ring)) { 984 rx_bi->skb = next_buffer->skb; 985 rx_bi->dma = next_buffer->dma; 986 next_buffer->skb = skb; 987 next_buffer->dma = 0; 988 } 989 rx_ring->rx_stats.non_eop_descs++; 990 goto next_desc; 991 } 992 993 /* ERR_MASK will only have valid bits if EOP set */ 994 if (unlikely(rx_error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) { 995 dev_kfree_skb_any(skb); 996 /* TODO: shouldn't we increment a counter indicating the 997 * drop? 998 */ 999 goto next_desc; 1000 } 1001 1002 skb_set_hash(skb, i40e_rx_hash(rx_ring, rx_desc), 1003 i40e_ptype_to_hash(rx_ptype)); 1004 /* probably a little skewed due to removing CRC */ 1005 total_rx_bytes += skb->len; 1006 total_rx_packets++; 1007 1008 skb->protocol = eth_type_trans(skb, rx_ring->netdev); 1009 1010 i40e_rx_checksum(vsi, skb, rx_status, rx_error, rx_ptype); 1011 1012 vlan_tag = rx_status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT) 1013 ? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1) 1014 : 0; 1015 i40e_receive_skb(rx_ring, skb, vlan_tag); 1016 1017 rx_ring->netdev->last_rx = jiffies; 1018 budget--; 1019next_desc: 1020 rx_desc->wb.qword1.status_error_len = 0; 1021 if (!budget) 1022 break; 1023 1024 cleaned_count++; 1025 /* return some buffers to hardware, one at a time is too slow */ 1026 if (cleaned_count >= I40E_RX_BUFFER_WRITE) { 1027 i40evf_alloc_rx_buffers(rx_ring, cleaned_count); 1028 cleaned_count = 0; 1029 } 1030 1031 /* use prefetched values */ 1032 rx_desc = next_rxd; 1033 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); 1034 rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >> 1035 I40E_RXD_QW1_STATUS_SHIFT; 1036 } 1037 1038 rx_ring->next_to_clean = i; 1039 u64_stats_update_begin(&rx_ring->syncp); 1040 rx_ring->stats.packets += total_rx_packets; 1041 rx_ring->stats.bytes += total_rx_bytes; 1042 u64_stats_update_end(&rx_ring->syncp); 1043 rx_ring->q_vector->rx.total_packets += total_rx_packets; 1044 rx_ring->q_vector->rx.total_bytes += total_rx_bytes; 1045 1046 if (cleaned_count) 1047 i40evf_alloc_rx_buffers(rx_ring, cleaned_count); 1048 1049 return budget > 0; 1050} 1051 1052/** 1053 * i40evf_napi_poll - NAPI polling Rx/Tx cleanup routine 1054 * @napi: napi struct with our devices info in it 1055 * @budget: amount of work driver is allowed to do this pass, in packets 1056 * 1057 * This function will clean all queues associated with a q_vector. 1058 * 1059 * Returns the amount of work done 1060 **/ 1061int i40evf_napi_poll(struct napi_struct *napi, int budget) 1062{ 1063 struct i40e_q_vector *q_vector = 1064 container_of(napi, struct i40e_q_vector, napi); 1065 struct i40e_vsi *vsi = q_vector->vsi; 1066 struct i40e_ring *ring; 1067 bool clean_complete = true; 1068 int budget_per_ring; 1069 1070 if (test_bit(__I40E_DOWN, &vsi->state)) { 1071 napi_complete(napi); 1072 return 0; 1073 } 1074 1075 /* Since the actual Tx work is minimal, we can give the Tx a larger 1076 * budget and be more aggressive about cleaning up the Tx descriptors. 1077 */ 1078 i40e_for_each_ring(ring, q_vector->tx) 1079 clean_complete &= i40e_clean_tx_irq(ring, vsi->work_limit); 1080 1081 /* We attempt to distribute budget to each Rx queue fairly, but don't 1082 * allow the budget to go below 1 because that would exit polling early. 1083 */ 1084 budget_per_ring = max(budget/q_vector->num_ringpairs, 1); 1085 1086 i40e_for_each_ring(ring, q_vector->rx) 1087 clean_complete &= i40e_clean_rx_irq(ring, budget_per_ring); 1088 1089 /* If work not completed, return budget and polling will return */ 1090 if (!clean_complete) 1091 return budget; 1092 1093 /* Work is done so exit the polling mode and re-enable the interrupt */ 1094 napi_complete(napi); 1095 if (ITR_IS_DYNAMIC(vsi->rx_itr_setting) || 1096 ITR_IS_DYNAMIC(vsi->tx_itr_setting)) 1097 i40e_update_dynamic_itr(q_vector); 1098 1099 if (!test_bit(__I40E_DOWN, &vsi->state)) 1100 i40evf_irq_enable_queues(vsi->back, 1 << q_vector->v_idx); 1101 1102 return 0; 1103} 1104 1105/** 1106 * i40e_tx_prepare_vlan_flags - prepare generic TX VLAN tagging flags for HW 1107 * @skb: send buffer 1108 * @tx_ring: ring to send buffer on 1109 * @flags: the tx flags to be set 1110 * 1111 * Checks the skb and set up correspondingly several generic transmit flags 1112 * related to VLAN tagging for the HW, such as VLAN, DCB, etc. 1113 * 1114 * Returns error code indicate the frame should be dropped upon error and the 1115 * otherwise returns 0 to indicate the flags has been set properly. 1116 **/ 1117static int i40e_tx_prepare_vlan_flags(struct sk_buff *skb, 1118 struct i40e_ring *tx_ring, 1119 u32 *flags) 1120{ 1121 __be16 protocol = skb->protocol; 1122 u32 tx_flags = 0; 1123 1124 /* if we have a HW VLAN tag being added, default to the HW one */ 1125 if (vlan_tx_tag_present(skb)) { 1126 tx_flags |= vlan_tx_tag_get(skb) << I40E_TX_FLAGS_VLAN_SHIFT; 1127 tx_flags |= I40E_TX_FLAGS_HW_VLAN; 1128 /* else if it is a SW VLAN, check the next protocol and store the tag */ 1129 } else if (protocol == htons(ETH_P_8021Q)) { 1130 struct vlan_hdr *vhdr, _vhdr; 1131 vhdr = skb_header_pointer(skb, ETH_HLEN, sizeof(_vhdr), &_vhdr); 1132 if (!vhdr) 1133 return -EINVAL; 1134 1135 protocol = vhdr->h_vlan_encapsulated_proto; 1136 tx_flags |= ntohs(vhdr->h_vlan_TCI) << I40E_TX_FLAGS_VLAN_SHIFT; 1137 tx_flags |= I40E_TX_FLAGS_SW_VLAN; 1138 } 1139 1140 *flags = tx_flags; 1141 return 0; 1142} 1143 1144/** 1145 * i40e_tso - set up the tso context descriptor 1146 * @tx_ring: ptr to the ring to send 1147 * @skb: ptr to the skb we're sending 1148 * @tx_flags: the collected send information 1149 * @protocol: the send protocol 1150 * @hdr_len: ptr to the size of the packet header 1151 * @cd_tunneling: ptr to context descriptor bits 1152 * 1153 * Returns 0 if no TSO can happen, 1 if tso is going, or error 1154 **/ 1155static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb, 1156 u32 tx_flags, __be16 protocol, u8 *hdr_len, 1157 u64 *cd_type_cmd_tso_mss, u32 *cd_tunneling) 1158{ 1159 u32 cd_cmd, cd_tso_len, cd_mss; 1160 struct ipv6hdr *ipv6h; 1161 struct tcphdr *tcph; 1162 struct iphdr *iph; 1163 u32 l4len; 1164 int err; 1165 1166 if (!skb_is_gso(skb)) 1167 return 0; 1168 1169 err = skb_cow_head(skb, 0); 1170 if (err < 0) 1171 return err; 1172 1173 if (protocol == htons(ETH_P_IP)) { 1174 iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb); 1175 tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb); 1176 iph->tot_len = 0; 1177 iph->check = 0; 1178 tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 1179 0, IPPROTO_TCP, 0); 1180 } else if (skb_is_gso_v6(skb)) { 1181 1182 ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb) 1183 : ipv6_hdr(skb); 1184 tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb); 1185 ipv6h->payload_len = 0; 1186 tcph->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, 1187 0, IPPROTO_TCP, 0); 1188 } 1189 1190 l4len = skb->encapsulation ? inner_tcp_hdrlen(skb) : tcp_hdrlen(skb); 1191 *hdr_len = (skb->encapsulation 1192 ? (skb_inner_transport_header(skb) - skb->data) 1193 : skb_transport_offset(skb)) + l4len; 1194 1195 /* find the field values */ 1196 cd_cmd = I40E_TX_CTX_DESC_TSO; 1197 cd_tso_len = skb->len - *hdr_len; 1198 cd_mss = skb_shinfo(skb)->gso_size; 1199 *cd_type_cmd_tso_mss |= ((u64)cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) | 1200 ((u64)cd_tso_len << 1201 I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) | 1202 ((u64)cd_mss << I40E_TXD_CTX_QW1_MSS_SHIFT); 1203 return 1; 1204} 1205 1206/** 1207 * i40e_tx_enable_csum - Enable Tx checksum offloads 1208 * @skb: send buffer 1209 * @tx_flags: Tx flags currently set 1210 * @td_cmd: Tx descriptor command bits to set 1211 * @td_offset: Tx descriptor header offsets to set 1212 * @cd_tunneling: ptr to context desc bits 1213 **/ 1214static void i40e_tx_enable_csum(struct sk_buff *skb, u32 tx_flags, 1215 u32 *td_cmd, u32 *td_offset, 1216 struct i40e_ring *tx_ring, 1217 u32 *cd_tunneling) 1218{ 1219 struct ipv6hdr *this_ipv6_hdr; 1220 unsigned int this_tcp_hdrlen; 1221 struct iphdr *this_ip_hdr; 1222 u32 network_hdr_len; 1223 u8 l4_hdr = 0; 1224 1225 if (skb->encapsulation) { 1226 network_hdr_len = skb_inner_network_header_len(skb); 1227 this_ip_hdr = inner_ip_hdr(skb); 1228 this_ipv6_hdr = inner_ipv6_hdr(skb); 1229 this_tcp_hdrlen = inner_tcp_hdrlen(skb); 1230 1231 if (tx_flags & I40E_TX_FLAGS_IPV4) { 1232 1233 if (tx_flags & I40E_TX_FLAGS_TSO) { 1234 *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV4; 1235 ip_hdr(skb)->check = 0; 1236 } else { 1237 *cd_tunneling |= 1238 I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM; 1239 } 1240 } else if (tx_flags & I40E_TX_FLAGS_IPV6) { 1241 if (tx_flags & I40E_TX_FLAGS_TSO) { 1242 *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6; 1243 ip_hdr(skb)->check = 0; 1244 } else { 1245 *cd_tunneling |= 1246 I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM; 1247 } 1248 } 1249 1250 /* Now set the ctx descriptor fields */ 1251 *cd_tunneling |= (skb_network_header_len(skb) >> 2) << 1252 I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT | 1253 I40E_TXD_CTX_UDP_TUNNELING | 1254 ((skb_inner_network_offset(skb) - 1255 skb_transport_offset(skb)) >> 1) << 1256 I40E_TXD_CTX_QW0_NATLEN_SHIFT; 1257 1258 } else { 1259 network_hdr_len = skb_network_header_len(skb); 1260 this_ip_hdr = ip_hdr(skb); 1261 this_ipv6_hdr = ipv6_hdr(skb); 1262 this_tcp_hdrlen = tcp_hdrlen(skb); 1263 } 1264 1265 /* Enable IP checksum offloads */ 1266 if (tx_flags & I40E_TX_FLAGS_IPV4) { 1267 l4_hdr = this_ip_hdr->protocol; 1268 /* the stack computes the IP header already, the only time we 1269 * need the hardware to recompute it is in the case of TSO. 1270 */ 1271 if (tx_flags & I40E_TX_FLAGS_TSO) { 1272 *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM; 1273 this_ip_hdr->check = 0; 1274 } else { 1275 *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4; 1276 } 1277 /* Now set the td_offset for IP header length */ 1278 *td_offset = (network_hdr_len >> 2) << 1279 I40E_TX_DESC_LENGTH_IPLEN_SHIFT; 1280 } else if (tx_flags & I40E_TX_FLAGS_IPV6) { 1281 l4_hdr = this_ipv6_hdr->nexthdr; 1282 *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV6; 1283 /* Now set the td_offset for IP header length */ 1284 *td_offset = (network_hdr_len >> 2) << 1285 I40E_TX_DESC_LENGTH_IPLEN_SHIFT; 1286 } 1287 /* words in MACLEN + dwords in IPLEN + dwords in L4Len */ 1288 *td_offset |= (skb_network_offset(skb) >> 1) << 1289 I40E_TX_DESC_LENGTH_MACLEN_SHIFT; 1290 1291 /* Enable L4 checksum offloads */ 1292 switch (l4_hdr) { 1293 case IPPROTO_TCP: 1294 /* enable checksum offloads */ 1295 *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP; 1296 *td_offset |= (this_tcp_hdrlen >> 2) << 1297 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 1298 break; 1299 case IPPROTO_SCTP: 1300 /* enable SCTP checksum offload */ 1301 *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP; 1302 *td_offset |= (sizeof(struct sctphdr) >> 2) << 1303 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 1304 break; 1305 case IPPROTO_UDP: 1306 /* enable UDP checksum offload */ 1307 *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP; 1308 *td_offset |= (sizeof(struct udphdr) >> 2) << 1309 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 1310 break; 1311 default: 1312 break; 1313 } 1314} 1315 1316/** 1317 * i40e_create_tx_ctx Build the Tx context descriptor 1318 * @tx_ring: ring to create the descriptor on 1319 * @cd_type_cmd_tso_mss: Quad Word 1 1320 * @cd_tunneling: Quad Word 0 - bits 0-31 1321 * @cd_l2tag2: Quad Word 0 - bits 32-63 1322 **/ 1323static void i40e_create_tx_ctx(struct i40e_ring *tx_ring, 1324 const u64 cd_type_cmd_tso_mss, 1325 const u32 cd_tunneling, const u32 cd_l2tag2) 1326{ 1327 struct i40e_tx_context_desc *context_desc; 1328 int i = tx_ring->next_to_use; 1329 1330 if ((cd_type_cmd_tso_mss == I40E_TX_DESC_DTYPE_CONTEXT) && 1331 !cd_tunneling && !cd_l2tag2) 1332 return; 1333 1334 /* grab the next descriptor */ 1335 context_desc = I40E_TX_CTXTDESC(tx_ring, i); 1336 1337 i++; 1338 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0; 1339 1340 /* cpu_to_le32 and assign to struct fields */ 1341 context_desc->tunneling_params = cpu_to_le32(cd_tunneling); 1342 context_desc->l2tag2 = cpu_to_le16(cd_l2tag2); 1343 context_desc->rsvd = cpu_to_le16(0); 1344 context_desc->type_cmd_tso_mss = cpu_to_le64(cd_type_cmd_tso_mss); 1345} 1346 1347/** 1348 * i40e_tx_map - Build the Tx descriptor 1349 * @tx_ring: ring to send buffer on 1350 * @skb: send buffer 1351 * @first: first buffer info buffer to use 1352 * @tx_flags: collected send information 1353 * @hdr_len: size of the packet header 1354 * @td_cmd: the command field in the descriptor 1355 * @td_offset: offset for checksum or crc 1356 **/ 1357static void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, 1358 struct i40e_tx_buffer *first, u32 tx_flags, 1359 const u8 hdr_len, u32 td_cmd, u32 td_offset) 1360{ 1361 unsigned int data_len = skb->data_len; 1362 unsigned int size = skb_headlen(skb); 1363 struct skb_frag_struct *frag; 1364 struct i40e_tx_buffer *tx_bi; 1365 struct i40e_tx_desc *tx_desc; 1366 u16 i = tx_ring->next_to_use; 1367 u32 td_tag = 0; 1368 dma_addr_t dma; 1369 u16 gso_segs; 1370 1371 if (tx_flags & I40E_TX_FLAGS_HW_VLAN) { 1372 td_cmd |= I40E_TX_DESC_CMD_IL2TAG1; 1373 td_tag = (tx_flags & I40E_TX_FLAGS_VLAN_MASK) >> 1374 I40E_TX_FLAGS_VLAN_SHIFT; 1375 } 1376 1377 if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO)) 1378 gso_segs = skb_shinfo(skb)->gso_segs; 1379 else 1380 gso_segs = 1; 1381 1382 /* multiply data chunks by size of headers */ 1383 first->bytecount = skb->len - hdr_len + (gso_segs * hdr_len); 1384 first->gso_segs = gso_segs; 1385 first->skb = skb; 1386 first->tx_flags = tx_flags; 1387 1388 dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE); 1389 1390 tx_desc = I40E_TX_DESC(tx_ring, i); 1391 tx_bi = first; 1392 1393 for (frag = &skb_shinfo(skb)->frags[0];; frag++) { 1394 if (dma_mapping_error(tx_ring->dev, dma)) 1395 goto dma_error; 1396 1397 /* record length, and DMA address */ 1398 dma_unmap_len_set(tx_bi, len, size); 1399 dma_unmap_addr_set(tx_bi, dma, dma); 1400 1401 tx_desc->buffer_addr = cpu_to_le64(dma); 1402 1403 while (unlikely(size > I40E_MAX_DATA_PER_TXD)) { 1404 tx_desc->cmd_type_offset_bsz = 1405 build_ctob(td_cmd, td_offset, 1406 I40E_MAX_DATA_PER_TXD, td_tag); 1407 1408 tx_desc++; 1409 i++; 1410 if (i == tx_ring->count) { 1411 tx_desc = I40E_TX_DESC(tx_ring, 0); 1412 i = 0; 1413 } 1414 1415 dma += I40E_MAX_DATA_PER_TXD; 1416 size -= I40E_MAX_DATA_PER_TXD; 1417 1418 tx_desc->buffer_addr = cpu_to_le64(dma); 1419 } 1420 1421 if (likely(!data_len)) 1422 break; 1423 1424 tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset, 1425 size, td_tag); 1426 1427 tx_desc++; 1428 i++; 1429 if (i == tx_ring->count) { 1430 tx_desc = I40E_TX_DESC(tx_ring, 0); 1431 i = 0; 1432 } 1433 1434 size = skb_frag_size(frag); 1435 data_len -= size; 1436 1437 dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size, 1438 DMA_TO_DEVICE); 1439 1440 tx_bi = &tx_ring->tx_bi[i]; 1441 } 1442 1443 /* Place RS bit on last descriptor of any packet that spans across the 1444 * 4th descriptor (WB_STRIDE aka 0x3) in a 64B cacheline. 1445 */ 1446#define WB_STRIDE 0x3 1447 if (((i & WB_STRIDE) != WB_STRIDE) && 1448 (first <= &tx_ring->tx_bi[i]) && 1449 (first >= &tx_ring->tx_bi[i & ~WB_STRIDE])) { 1450 tx_desc->cmd_type_offset_bsz = 1451 build_ctob(td_cmd, td_offset, size, td_tag) | 1452 cpu_to_le64((u64)I40E_TX_DESC_CMD_EOP << 1453 I40E_TXD_QW1_CMD_SHIFT); 1454 } else { 1455 tx_desc->cmd_type_offset_bsz = 1456 build_ctob(td_cmd, td_offset, size, td_tag) | 1457 cpu_to_le64((u64)I40E_TXD_CMD << 1458 I40E_TXD_QW1_CMD_SHIFT); 1459 } 1460 1461 netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev, 1462 tx_ring->queue_index), 1463 first->bytecount); 1464 1465 /* set the timestamp */ 1466 first->time_stamp = jiffies; 1467 1468 /* Force memory writes to complete before letting h/w 1469 * know there are new descriptors to fetch. (Only 1470 * applicable for weak-ordered memory model archs, 1471 * such as IA-64). 1472 */ 1473 wmb(); 1474 1475 /* set next_to_watch value indicating a packet is present */ 1476 first->next_to_watch = tx_desc; 1477 1478 i++; 1479 if (i == tx_ring->count) 1480 i = 0; 1481 1482 tx_ring->next_to_use = i; 1483 1484 /* notify HW of packet */ 1485 writel(i, tx_ring->tail); 1486 1487 return; 1488 1489dma_error: 1490 dev_info(tx_ring->dev, "TX DMA map failed\n"); 1491 1492 /* clear dma mappings for failed tx_bi map */ 1493 for (;;) { 1494 tx_bi = &tx_ring->tx_bi[i]; 1495 i40e_unmap_and_free_tx_resource(tx_ring, tx_bi); 1496 if (tx_bi == first) 1497 break; 1498 if (i == 0) 1499 i = tx_ring->count; 1500 i--; 1501 } 1502 1503 tx_ring->next_to_use = i; 1504} 1505 1506/** 1507 * __i40e_maybe_stop_tx - 2nd level check for tx stop conditions 1508 * @tx_ring: the ring to be checked 1509 * @size: the size buffer we want to assure is available 1510 * 1511 * Returns -EBUSY if a stop is needed, else 0 1512 **/ 1513static inline int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size) 1514{ 1515 netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index); 1516 /* Memory barrier before checking head and tail */ 1517 smp_mb(); 1518 1519 /* Check again in a case another CPU has just made room available. */ 1520 if (likely(I40E_DESC_UNUSED(tx_ring) < size)) 1521 return -EBUSY; 1522 1523 /* A reprieve! - use start_queue because it doesn't call schedule */ 1524 netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index); 1525 ++tx_ring->tx_stats.restart_queue; 1526 return 0; 1527} 1528 1529/** 1530 * i40e_maybe_stop_tx - 1st level check for tx stop conditions 1531 * @tx_ring: the ring to be checked 1532 * @size: the size buffer we want to assure is available 1533 * 1534 * Returns 0 if stop is not needed 1535 **/ 1536static int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size) 1537{ 1538 if (likely(I40E_DESC_UNUSED(tx_ring) >= size)) 1539 return 0; 1540 return __i40e_maybe_stop_tx(tx_ring, size); 1541} 1542 1543/** 1544 * i40e_xmit_descriptor_count - calculate number of tx descriptors needed 1545 * @skb: send buffer 1546 * @tx_ring: ring to send buffer on 1547 * 1548 * Returns number of data descriptors needed for this skb. Returns 0 to indicate 1549 * there is not enough descriptors available in this ring since we need at least 1550 * one descriptor. 1551 **/ 1552static int i40e_xmit_descriptor_count(struct sk_buff *skb, 1553 struct i40e_ring *tx_ring) 1554{ 1555 unsigned int f; 1556 int count = 0; 1557 1558 /* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD, 1559 * + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD, 1560 * + 4 desc gap to avoid the cache line where head is, 1561 * + 1 desc for context descriptor, 1562 * otherwise try next time 1563 */ 1564 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) 1565 count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size); 1566 1567 count += TXD_USE_COUNT(skb_headlen(skb)); 1568 if (i40e_maybe_stop_tx(tx_ring, count + 4 + 1)) { 1569 tx_ring->tx_stats.tx_busy++; 1570 return 0; 1571 } 1572 return count; 1573} 1574 1575/** 1576 * i40e_xmit_frame_ring - Sends buffer on Tx ring 1577 * @skb: send buffer 1578 * @tx_ring: ring to send buffer on 1579 * 1580 * Returns NETDEV_TX_OK if sent, else an error code 1581 **/ 1582static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, 1583 struct i40e_ring *tx_ring) 1584{ 1585 u64 cd_type_cmd_tso_mss = I40E_TX_DESC_DTYPE_CONTEXT; 1586 u32 cd_tunneling = 0, cd_l2tag2 = 0; 1587 struct i40e_tx_buffer *first; 1588 u32 td_offset = 0; 1589 u32 tx_flags = 0; 1590 __be16 protocol; 1591 u32 td_cmd = 0; 1592 u8 hdr_len = 0; 1593 int tso; 1594 if (0 == i40e_xmit_descriptor_count(skb, tx_ring)) 1595 return NETDEV_TX_BUSY; 1596 1597 /* prepare the xmit flags */ 1598 if (i40e_tx_prepare_vlan_flags(skb, tx_ring, &tx_flags)) 1599 goto out_drop; 1600 1601 /* obtain protocol of skb */ 1602 protocol = vlan_get_protocol(skb); 1603 1604 /* record the location of the first descriptor for this packet */ 1605 first = &tx_ring->tx_bi[tx_ring->next_to_use]; 1606 1607 /* setup IPv4/IPv6 offloads */ 1608 if (protocol == htons(ETH_P_IP)) 1609 tx_flags |= I40E_TX_FLAGS_IPV4; 1610 else if (protocol == htons(ETH_P_IPV6)) 1611 tx_flags |= I40E_TX_FLAGS_IPV6; 1612 1613 tso = i40e_tso(tx_ring, skb, tx_flags, protocol, &hdr_len, 1614 &cd_type_cmd_tso_mss, &cd_tunneling); 1615 1616 if (tso < 0) 1617 goto out_drop; 1618 else if (tso) 1619 tx_flags |= I40E_TX_FLAGS_TSO; 1620 1621 skb_tx_timestamp(skb); 1622 1623 /* always enable CRC insertion offload */ 1624 td_cmd |= I40E_TX_DESC_CMD_ICRC; 1625 1626 /* Always offload the checksum, since it's in the data descriptor */ 1627 if (skb->ip_summed == CHECKSUM_PARTIAL) { 1628 tx_flags |= I40E_TX_FLAGS_CSUM; 1629 1630 i40e_tx_enable_csum(skb, tx_flags, &td_cmd, &td_offset, 1631 tx_ring, &cd_tunneling); 1632 } 1633 1634 i40e_create_tx_ctx(tx_ring, cd_type_cmd_tso_mss, 1635 cd_tunneling, cd_l2tag2); 1636 1637 i40e_tx_map(tx_ring, skb, first, tx_flags, hdr_len, 1638 td_cmd, td_offset); 1639 1640 i40e_maybe_stop_tx(tx_ring, DESC_NEEDED); 1641 1642 return NETDEV_TX_OK; 1643 1644out_drop: 1645 dev_kfree_skb_any(skb); 1646 return NETDEV_TX_OK; 1647} 1648 1649/** 1650 * i40evf_xmit_frame - Selects the correct VSI and Tx queue to send buffer 1651 * @skb: send buffer 1652 * @netdev: network interface device structure 1653 * 1654 * Returns NETDEV_TX_OK if sent, else an error code 1655 **/ 1656netdev_tx_t i40evf_xmit_frame(struct sk_buff *skb, struct net_device *netdev) 1657{ 1658 struct i40evf_adapter *adapter = netdev_priv(netdev); 1659 struct i40e_ring *tx_ring = adapter->tx_rings[skb->queue_mapping]; 1660 1661 /* hardware can't handle really short frames, hardware padding works 1662 * beyond this point 1663 */ 1664 if (unlikely(skb->len < I40E_MIN_TX_LEN)) { 1665 if (skb_pad(skb, I40E_MIN_TX_LEN - skb->len)) 1666 return NETDEV_TX_OK; 1667 skb->len = I40E_MIN_TX_LEN; 1668 skb_set_tail_pointer(skb, I40E_MIN_TX_LEN); 1669 } 1670 1671 return i40e_xmit_frame_ring(skb, tx_ring); 1672} 1673