[go: nahoru, domu]

1/* A network driver using virtio.
2 *
3 * Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, see <http://www.gnu.org/licenses/>.
17 */
18//#define DEBUG
19#include <linux/netdevice.h>
20#include <linux/etherdevice.h>
21#include <linux/ethtool.h>
22#include <linux/module.h>
23#include <linux/virtio.h>
24#include <linux/virtio_net.h>
25#include <linux/scatterlist.h>
26#include <linux/if_vlan.h>
27#include <linux/slab.h>
28#include <linux/cpu.h>
29#include <linux/average.h>
30#include <net/busy_poll.h>
31
32static int napi_weight = NAPI_POLL_WEIGHT;
33module_param(napi_weight, int, 0444);
34
35static bool csum = true, gso = true;
36module_param(csum, bool, 0444);
37module_param(gso, bool, 0444);
38
39/* FIXME: MTU in config. */
40#define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN)
41#define GOOD_COPY_LEN	128
42
43/* Weight used for the RX packet size EWMA. The average packet size is used to
44 * determine the packet buffer size when refilling RX rings. As the entire RX
45 * ring may be refilled at once, the weight is chosen so that the EWMA will be
46 * insensitive to short-term, transient changes in packet size.
47 */
48#define RECEIVE_AVG_WEIGHT 64
49
50/* Minimum alignment for mergeable packet buffers. */
51#define MERGEABLE_BUFFER_ALIGN max(L1_CACHE_BYTES, 256)
52
53#define VIRTNET_DRIVER_VERSION "1.0.0"
54
55struct virtnet_stats {
56	struct u64_stats_sync tx_syncp;
57	struct u64_stats_sync rx_syncp;
58	u64 tx_bytes;
59	u64 tx_packets;
60
61	u64 rx_bytes;
62	u64 rx_packets;
63};
64
65/* Internal representation of a send virtqueue */
66struct send_queue {
67	/* Virtqueue associated with this send _queue */
68	struct virtqueue *vq;
69
70	/* TX: fragments + linear part + virtio header */
71	struct scatterlist sg[MAX_SKB_FRAGS + 2];
72
73	/* Name of the send queue: output.$index */
74	char name[40];
75};
76
77/* Internal representation of a receive virtqueue */
78struct receive_queue {
79	/* Virtqueue associated with this receive_queue */
80	struct virtqueue *vq;
81
82	struct napi_struct napi;
83
84	/* Chain pages by the private ptr. */
85	struct page *pages;
86
87	/* Average packet length for mergeable receive buffers. */
88	struct ewma mrg_avg_pkt_len;
89
90	/* Page frag for packet buffer allocation. */
91	struct page_frag alloc_frag;
92
93	/* RX: fragments + linear part + virtio header */
94	struct scatterlist sg[MAX_SKB_FRAGS + 2];
95
96	/* Name of this receive queue: input.$index */
97	char name[40];
98};
99
100struct virtnet_info {
101	struct virtio_device *vdev;
102	struct virtqueue *cvq;
103	struct net_device *dev;
104	struct send_queue *sq;
105	struct receive_queue *rq;
106	unsigned int status;
107
108	/* Max # of queue pairs supported by the device */
109	u16 max_queue_pairs;
110
111	/* # of queue pairs currently used by the driver */
112	u16 curr_queue_pairs;
113
114	/* I like... big packets and I cannot lie! */
115	bool big_packets;
116
117	/* Host will merge rx buffers for big packets (shake it! shake it!) */
118	bool mergeable_rx_bufs;
119
120	/* Has control virtqueue */
121	bool has_cvq;
122
123	/* Host can handle any s/g split between our header and packet data */
124	bool any_header_sg;
125
126	/* Active statistics */
127	struct virtnet_stats __percpu *stats;
128
129	/* Work struct for refilling if we run low on memory. */
130	struct delayed_work refill;
131
132	/* Work struct for config space updates */
133	struct work_struct config_work;
134
135	/* Does the affinity hint is set for virtqueues? */
136	bool affinity_hint_set;
137
138	/* CPU hot plug notifier */
139	struct notifier_block nb;
140};
141
142struct skb_vnet_hdr {
143	union {
144		struct virtio_net_hdr hdr;
145		struct virtio_net_hdr_mrg_rxbuf mhdr;
146	};
147};
148
149struct padded_vnet_hdr {
150	struct virtio_net_hdr hdr;
151	/*
152	 * virtio_net_hdr should be in a separated sg buffer because of a
153	 * QEMU bug, and data sg buffer shares same page with this header sg.
154	 * This padding makes next sg 16 byte aligned after virtio_net_hdr.
155	 */
156	char padding[6];
157};
158
159/* Converting between virtqueue no. and kernel tx/rx queue no.
160 * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq
161 */
162static int vq2txq(struct virtqueue *vq)
163{
164	return (vq->index - 1) / 2;
165}
166
167static int txq2vq(int txq)
168{
169	return txq * 2 + 1;
170}
171
172static int vq2rxq(struct virtqueue *vq)
173{
174	return vq->index / 2;
175}
176
177static int rxq2vq(int rxq)
178{
179	return rxq * 2;
180}
181
182static inline struct skb_vnet_hdr *skb_vnet_hdr(struct sk_buff *skb)
183{
184	return (struct skb_vnet_hdr *)skb->cb;
185}
186
187/*
188 * private is used to chain pages for big packets, put the whole
189 * most recent used list in the beginning for reuse
190 */
191static void give_pages(struct receive_queue *rq, struct page *page)
192{
193	struct page *end;
194
195	/* Find end of list, sew whole thing into vi->rq.pages. */
196	for (end = page; end->private; end = (struct page *)end->private);
197	end->private = (unsigned long)rq->pages;
198	rq->pages = page;
199}
200
201static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask)
202{
203	struct page *p = rq->pages;
204
205	if (p) {
206		rq->pages = (struct page *)p->private;
207		/* clear private here, it is used to chain pages */
208		p->private = 0;
209	} else
210		p = alloc_page(gfp_mask);
211	return p;
212}
213
214static void skb_xmit_done(struct virtqueue *vq)
215{
216	struct virtnet_info *vi = vq->vdev->priv;
217
218	/* Suppress further interrupts. */
219	virtqueue_disable_cb(vq);
220
221	/* We were probably waiting for more output buffers. */
222	netif_wake_subqueue(vi->dev, vq2txq(vq));
223}
224
225static unsigned int mergeable_ctx_to_buf_truesize(unsigned long mrg_ctx)
226{
227	unsigned int truesize = mrg_ctx & (MERGEABLE_BUFFER_ALIGN - 1);
228	return (truesize + 1) * MERGEABLE_BUFFER_ALIGN;
229}
230
231static void *mergeable_ctx_to_buf_address(unsigned long mrg_ctx)
232{
233	return (void *)(mrg_ctx & -MERGEABLE_BUFFER_ALIGN);
234
235}
236
237static unsigned long mergeable_buf_to_ctx(void *buf, unsigned int truesize)
238{
239	unsigned int size = truesize / MERGEABLE_BUFFER_ALIGN;
240	return (unsigned long)buf | (size - 1);
241}
242
243/* Called from bottom half context */
244static struct sk_buff *page_to_skb(struct receive_queue *rq,
245				   struct page *page, unsigned int offset,
246				   unsigned int len, unsigned int truesize)
247{
248	struct virtnet_info *vi = rq->vq->vdev->priv;
249	struct sk_buff *skb;
250	struct skb_vnet_hdr *hdr;
251	unsigned int copy, hdr_len, hdr_padded_len;
252	char *p;
253
254	p = page_address(page) + offset;
255
256	/* copy small packet so we can reuse these pages for small data */
257	skb = netdev_alloc_skb_ip_align(vi->dev, GOOD_COPY_LEN);
258	if (unlikely(!skb))
259		return NULL;
260
261	hdr = skb_vnet_hdr(skb);
262
263	if (vi->mergeable_rx_bufs) {
264		hdr_len = sizeof hdr->mhdr;
265		hdr_padded_len = sizeof hdr->mhdr;
266	} else {
267		hdr_len = sizeof hdr->hdr;
268		hdr_padded_len = sizeof(struct padded_vnet_hdr);
269	}
270
271	memcpy(hdr, p, hdr_len);
272
273	len -= hdr_len;
274	offset += hdr_padded_len;
275	p += hdr_padded_len;
276
277	copy = len;
278	if (copy > skb_tailroom(skb))
279		copy = skb_tailroom(skb);
280	memcpy(skb_put(skb, copy), p, copy);
281
282	len -= copy;
283	offset += copy;
284
285	if (vi->mergeable_rx_bufs) {
286		if (len)
287			skb_add_rx_frag(skb, 0, page, offset, len, truesize);
288		else
289			put_page(page);
290		return skb;
291	}
292
293	/*
294	 * Verify that we can indeed put this data into a skb.
295	 * This is here to handle cases when the device erroneously
296	 * tries to receive more than is possible. This is usually
297	 * the case of a broken device.
298	 */
299	if (unlikely(len > MAX_SKB_FRAGS * PAGE_SIZE)) {
300		net_dbg_ratelimited("%s: too much data\n", skb->dev->name);
301		dev_kfree_skb(skb);
302		return NULL;
303	}
304	BUG_ON(offset >= PAGE_SIZE);
305	while (len) {
306		unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len);
307		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, offset,
308				frag_size, truesize);
309		len -= frag_size;
310		page = (struct page *)page->private;
311		offset = 0;
312	}
313
314	if (page)
315		give_pages(rq, page);
316
317	return skb;
318}
319
320static struct sk_buff *receive_small(void *buf, unsigned int len)
321{
322	struct sk_buff * skb = buf;
323
324	len -= sizeof(struct virtio_net_hdr);
325	skb_trim(skb, len);
326
327	return skb;
328}
329
330static struct sk_buff *receive_big(struct net_device *dev,
331				   struct receive_queue *rq,
332				   void *buf,
333				   unsigned int len)
334{
335	struct page *page = buf;
336	struct sk_buff *skb = page_to_skb(rq, page, 0, len, PAGE_SIZE);
337
338	if (unlikely(!skb))
339		goto err;
340
341	return skb;
342
343err:
344	dev->stats.rx_dropped++;
345	give_pages(rq, page);
346	return NULL;
347}
348
349static struct sk_buff *receive_mergeable(struct net_device *dev,
350					 struct receive_queue *rq,
351					 unsigned long ctx,
352					 unsigned int len)
353{
354	void *buf = mergeable_ctx_to_buf_address(ctx);
355	struct skb_vnet_hdr *hdr = buf;
356	int num_buf = hdr->mhdr.num_buffers;
357	struct page *page = virt_to_head_page(buf);
358	int offset = buf - page_address(page);
359	unsigned int truesize = max(len, mergeable_ctx_to_buf_truesize(ctx));
360
361	struct sk_buff *head_skb = page_to_skb(rq, page, offset, len, truesize);
362	struct sk_buff *curr_skb = head_skb;
363
364	if (unlikely(!curr_skb))
365		goto err_skb;
366	while (--num_buf) {
367		int num_skb_frags;
368
369		ctx = (unsigned long)virtqueue_get_buf(rq->vq, &len);
370		if (unlikely(!ctx)) {
371			pr_debug("%s: rx error: %d buffers out of %d missing\n",
372				 dev->name, num_buf, hdr->mhdr.num_buffers);
373			dev->stats.rx_length_errors++;
374			goto err_buf;
375		}
376
377		buf = mergeable_ctx_to_buf_address(ctx);
378		page = virt_to_head_page(buf);
379
380		num_skb_frags = skb_shinfo(curr_skb)->nr_frags;
381		if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) {
382			struct sk_buff *nskb = alloc_skb(0, GFP_ATOMIC);
383
384			if (unlikely(!nskb))
385				goto err_skb;
386			if (curr_skb == head_skb)
387				skb_shinfo(curr_skb)->frag_list = nskb;
388			else
389				curr_skb->next = nskb;
390			curr_skb = nskb;
391			head_skb->truesize += nskb->truesize;
392			num_skb_frags = 0;
393		}
394		truesize = max(len, mergeable_ctx_to_buf_truesize(ctx));
395		if (curr_skb != head_skb) {
396			head_skb->data_len += len;
397			head_skb->len += len;
398			head_skb->truesize += truesize;
399		}
400		offset = buf - page_address(page);
401		if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) {
402			put_page(page);
403			skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1,
404					     len, truesize);
405		} else {
406			skb_add_rx_frag(curr_skb, num_skb_frags, page,
407					offset, len, truesize);
408		}
409	}
410
411	ewma_add(&rq->mrg_avg_pkt_len, head_skb->len);
412	return head_skb;
413
414err_skb:
415	put_page(page);
416	while (--num_buf) {
417		ctx = (unsigned long)virtqueue_get_buf(rq->vq, &len);
418		if (unlikely(!ctx)) {
419			pr_debug("%s: rx error: %d buffers missing\n",
420				 dev->name, num_buf);
421			dev->stats.rx_length_errors++;
422			break;
423		}
424		page = virt_to_head_page(mergeable_ctx_to_buf_address(ctx));
425		put_page(page);
426	}
427err_buf:
428	dev->stats.rx_dropped++;
429	dev_kfree_skb(head_skb);
430	return NULL;
431}
432
433static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
434{
435	struct virtnet_info *vi = rq->vq->vdev->priv;
436	struct net_device *dev = vi->dev;
437	struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
438	struct sk_buff *skb;
439	struct skb_vnet_hdr *hdr;
440
441	if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) {
442		pr_debug("%s: short packet %i\n", dev->name, len);
443		dev->stats.rx_length_errors++;
444		if (vi->mergeable_rx_bufs) {
445			unsigned long ctx = (unsigned long)buf;
446			void *base = mergeable_ctx_to_buf_address(ctx);
447			put_page(virt_to_head_page(base));
448		} else if (vi->big_packets) {
449			give_pages(rq, buf);
450		} else {
451			dev_kfree_skb(buf);
452		}
453		return;
454	}
455
456	if (vi->mergeable_rx_bufs)
457		skb = receive_mergeable(dev, rq, (unsigned long)buf, len);
458	else if (vi->big_packets)
459		skb = receive_big(dev, rq, buf, len);
460	else
461		skb = receive_small(buf, len);
462
463	if (unlikely(!skb))
464		return;
465
466	hdr = skb_vnet_hdr(skb);
467
468	u64_stats_update_begin(&stats->rx_syncp);
469	stats->rx_bytes += skb->len;
470	stats->rx_packets++;
471	u64_stats_update_end(&stats->rx_syncp);
472
473	if (hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
474		pr_debug("Needs csum!\n");
475		if (!skb_partial_csum_set(skb,
476					  hdr->hdr.csum_start,
477					  hdr->hdr.csum_offset))
478			goto frame_err;
479	} else if (hdr->hdr.flags & VIRTIO_NET_HDR_F_DATA_VALID) {
480		skb->ip_summed = CHECKSUM_UNNECESSARY;
481	}
482
483	skb->protocol = eth_type_trans(skb, dev);
484	pr_debug("Receiving skb proto 0x%04x len %i type %i\n",
485		 ntohs(skb->protocol), skb->len, skb->pkt_type);
486
487	if (hdr->hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
488		pr_debug("GSO!\n");
489		switch (hdr->hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
490		case VIRTIO_NET_HDR_GSO_TCPV4:
491			skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
492			break;
493		case VIRTIO_NET_HDR_GSO_UDP:
494		{
495			static bool warned;
496
497			if (!warned) {
498				warned = true;
499				netdev_warn(dev,
500					    "host using disabled UFO feature; please fix it\n");
501			}
502			skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
503			break;
504		}
505		case VIRTIO_NET_HDR_GSO_TCPV6:
506			skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
507			break;
508		default:
509			net_warn_ratelimited("%s: bad gso type %u.\n",
510					     dev->name, hdr->hdr.gso_type);
511			goto frame_err;
512		}
513
514		if (hdr->hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN)
515			skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
516
517		skb_shinfo(skb)->gso_size = hdr->hdr.gso_size;
518		if (skb_shinfo(skb)->gso_size == 0) {
519			net_warn_ratelimited("%s: zero gso size.\n", dev->name);
520			goto frame_err;
521		}
522
523		/* Header must be checked, and gso_segs computed. */
524		skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
525		skb_shinfo(skb)->gso_segs = 0;
526	}
527
528	skb_mark_napi_id(skb, &rq->napi);
529
530	netif_receive_skb(skb);
531	return;
532
533frame_err:
534	dev->stats.rx_frame_errors++;
535	dev_kfree_skb(skb);
536}
537
538static int add_recvbuf_small(struct receive_queue *rq, gfp_t gfp)
539{
540	struct virtnet_info *vi = rq->vq->vdev->priv;
541	struct sk_buff *skb;
542	struct skb_vnet_hdr *hdr;
543	int err;
544
545	skb = __netdev_alloc_skb_ip_align(vi->dev, GOOD_PACKET_LEN, gfp);
546	if (unlikely(!skb))
547		return -ENOMEM;
548
549	skb_put(skb, GOOD_PACKET_LEN);
550
551	hdr = skb_vnet_hdr(skb);
552	sg_init_table(rq->sg, MAX_SKB_FRAGS + 2);
553	sg_set_buf(rq->sg, &hdr->hdr, sizeof hdr->hdr);
554	skb_to_sgvec(skb, rq->sg + 1, 0, skb->len);
555
556	err = virtqueue_add_inbuf(rq->vq, rq->sg, 2, skb, gfp);
557	if (err < 0)
558		dev_kfree_skb(skb);
559
560	return err;
561}
562
563static int add_recvbuf_big(struct receive_queue *rq, gfp_t gfp)
564{
565	struct page *first, *list = NULL;
566	char *p;
567	int i, err, offset;
568
569	sg_init_table(rq->sg, MAX_SKB_FRAGS + 2);
570
571	/* page in rq->sg[MAX_SKB_FRAGS + 1] is list tail */
572	for (i = MAX_SKB_FRAGS + 1; i > 1; --i) {
573		first = get_a_page(rq, gfp);
574		if (!first) {
575			if (list)
576				give_pages(rq, list);
577			return -ENOMEM;
578		}
579		sg_set_buf(&rq->sg[i], page_address(first), PAGE_SIZE);
580
581		/* chain new page in list head to match sg */
582		first->private = (unsigned long)list;
583		list = first;
584	}
585
586	first = get_a_page(rq, gfp);
587	if (!first) {
588		give_pages(rq, list);
589		return -ENOMEM;
590	}
591	p = page_address(first);
592
593	/* rq->sg[0], rq->sg[1] share the same page */
594	/* a separated rq->sg[0] for virtio_net_hdr only due to QEMU bug */
595	sg_set_buf(&rq->sg[0], p, sizeof(struct virtio_net_hdr));
596
597	/* rq->sg[1] for data packet, from offset */
598	offset = sizeof(struct padded_vnet_hdr);
599	sg_set_buf(&rq->sg[1], p + offset, PAGE_SIZE - offset);
600
601	/* chain first in list head */
602	first->private = (unsigned long)list;
603	err = virtqueue_add_inbuf(rq->vq, rq->sg, MAX_SKB_FRAGS + 2,
604				  first, gfp);
605	if (err < 0)
606		give_pages(rq, first);
607
608	return err;
609}
610
611static unsigned int get_mergeable_buf_len(struct ewma *avg_pkt_len)
612{
613	const size_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
614	unsigned int len;
615
616	len = hdr_len + clamp_t(unsigned int, ewma_read(avg_pkt_len),
617			GOOD_PACKET_LEN, PAGE_SIZE - hdr_len);
618	return ALIGN(len, MERGEABLE_BUFFER_ALIGN);
619}
620
621static int add_recvbuf_mergeable(struct receive_queue *rq, gfp_t gfp)
622{
623	struct page_frag *alloc_frag = &rq->alloc_frag;
624	char *buf;
625	unsigned long ctx;
626	int err;
627	unsigned int len, hole;
628
629	len = get_mergeable_buf_len(&rq->mrg_avg_pkt_len);
630	if (unlikely(!skb_page_frag_refill(len, alloc_frag, gfp)))
631		return -ENOMEM;
632
633	buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
634	ctx = mergeable_buf_to_ctx(buf, len);
635	get_page(alloc_frag->page);
636	alloc_frag->offset += len;
637	hole = alloc_frag->size - alloc_frag->offset;
638	if (hole < len) {
639		/* To avoid internal fragmentation, if there is very likely not
640		 * enough space for another buffer, add the remaining space to
641		 * the current buffer. This extra space is not included in
642		 * the truesize stored in ctx.
643		 */
644		len += hole;
645		alloc_frag->offset += hole;
646	}
647
648	sg_init_one(rq->sg, buf, len);
649	err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, (void *)ctx, gfp);
650	if (err < 0)
651		put_page(virt_to_head_page(buf));
652
653	return err;
654}
655
656/*
657 * Returns false if we couldn't fill entirely (OOM).
658 *
659 * Normally run in the receive path, but can also be run from ndo_open
660 * before we're receiving packets, or from refill_work which is
661 * careful to disable receiving (using napi_disable).
662 */
663static bool try_fill_recv(struct receive_queue *rq, gfp_t gfp)
664{
665	struct virtnet_info *vi = rq->vq->vdev->priv;
666	int err;
667	bool oom;
668
669	gfp |= __GFP_COLD;
670	do {
671		if (vi->mergeable_rx_bufs)
672			err = add_recvbuf_mergeable(rq, gfp);
673		else if (vi->big_packets)
674			err = add_recvbuf_big(rq, gfp);
675		else
676			err = add_recvbuf_small(rq, gfp);
677
678		oom = err == -ENOMEM;
679		if (err)
680			break;
681	} while (rq->vq->num_free);
682	virtqueue_kick(rq->vq);
683	return !oom;
684}
685
686static void skb_recv_done(struct virtqueue *rvq)
687{
688	struct virtnet_info *vi = rvq->vdev->priv;
689	struct receive_queue *rq = &vi->rq[vq2rxq(rvq)];
690
691	/* Schedule NAPI, Suppress further interrupts if successful. */
692	if (napi_schedule_prep(&rq->napi)) {
693		virtqueue_disable_cb(rvq);
694		__napi_schedule(&rq->napi);
695	}
696}
697
698static void virtnet_napi_enable(struct receive_queue *rq)
699{
700	napi_enable(&rq->napi);
701
702	/* If all buffers were filled by other side before we napi_enabled, we
703	 * won't get another interrupt, so process any outstanding packets
704	 * now.  virtnet_poll wants re-enable the queue, so we disable here.
705	 * We synchronize against interrupts via NAPI_STATE_SCHED */
706	if (napi_schedule_prep(&rq->napi)) {
707		virtqueue_disable_cb(rq->vq);
708		local_bh_disable();
709		__napi_schedule(&rq->napi);
710		local_bh_enable();
711	}
712}
713
714static void refill_work(struct work_struct *work)
715{
716	struct virtnet_info *vi =
717		container_of(work, struct virtnet_info, refill.work);
718	bool still_empty;
719	int i;
720
721	for (i = 0; i < vi->curr_queue_pairs; i++) {
722		struct receive_queue *rq = &vi->rq[i];
723
724		napi_disable(&rq->napi);
725		still_empty = !try_fill_recv(rq, GFP_KERNEL);
726		virtnet_napi_enable(rq);
727
728		/* In theory, this can happen: if we don't get any buffers in
729		 * we will *never* try to fill again.
730		 */
731		if (still_empty)
732			schedule_delayed_work(&vi->refill, HZ/2);
733	}
734}
735
736static int virtnet_receive(struct receive_queue *rq, int budget)
737{
738	struct virtnet_info *vi = rq->vq->vdev->priv;
739	unsigned int len, received = 0;
740	void *buf;
741
742	while (received < budget &&
743	       (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) {
744		receive_buf(rq, buf, len);
745		received++;
746	}
747
748	if (rq->vq->num_free > virtqueue_get_vring_size(rq->vq) / 2) {
749		if (!try_fill_recv(rq, GFP_ATOMIC))
750			schedule_delayed_work(&vi->refill, 0);
751	}
752
753	return received;
754}
755
756static int virtnet_poll(struct napi_struct *napi, int budget)
757{
758	struct receive_queue *rq =
759		container_of(napi, struct receive_queue, napi);
760	unsigned int r, received = 0;
761
762again:
763	received += virtnet_receive(rq, budget - received);
764
765	/* Out of packets? */
766	if (received < budget) {
767		r = virtqueue_enable_cb_prepare(rq->vq);
768		napi_complete(napi);
769		if (unlikely(virtqueue_poll(rq->vq, r)) &&
770		    napi_schedule_prep(napi)) {
771			virtqueue_disable_cb(rq->vq);
772			__napi_schedule(napi);
773			goto again;
774		}
775	}
776
777	return received;
778}
779
780#ifdef CONFIG_NET_RX_BUSY_POLL
781/* must be called with local_bh_disable()d */
782static int virtnet_busy_poll(struct napi_struct *napi)
783{
784	struct receive_queue *rq =
785		container_of(napi, struct receive_queue, napi);
786	struct virtnet_info *vi = rq->vq->vdev->priv;
787	int r, received = 0, budget = 4;
788
789	if (!(vi->status & VIRTIO_NET_S_LINK_UP))
790		return LL_FLUSH_FAILED;
791
792	if (!napi_schedule_prep(napi))
793		return LL_FLUSH_BUSY;
794
795	virtqueue_disable_cb(rq->vq);
796
797again:
798	received += virtnet_receive(rq, budget);
799
800	r = virtqueue_enable_cb_prepare(rq->vq);
801	clear_bit(NAPI_STATE_SCHED, &napi->state);
802	if (unlikely(virtqueue_poll(rq->vq, r)) &&
803	    napi_schedule_prep(napi)) {
804		virtqueue_disable_cb(rq->vq);
805		if (received < budget) {
806			budget -= received;
807			goto again;
808		} else {
809			__napi_schedule(napi);
810		}
811	}
812
813	return received;
814}
815#endif	/* CONFIG_NET_RX_BUSY_POLL */
816
817static int virtnet_open(struct net_device *dev)
818{
819	struct virtnet_info *vi = netdev_priv(dev);
820	int i;
821
822	for (i = 0; i < vi->max_queue_pairs; i++) {
823		if (i < vi->curr_queue_pairs)
824			/* Make sure we have some buffers: if oom use wq. */
825			if (!try_fill_recv(&vi->rq[i], GFP_KERNEL))
826				schedule_delayed_work(&vi->refill, 0);
827		virtnet_napi_enable(&vi->rq[i]);
828	}
829
830	return 0;
831}
832
833static void free_old_xmit_skbs(struct send_queue *sq)
834{
835	struct sk_buff *skb;
836	unsigned int len;
837	struct virtnet_info *vi = sq->vq->vdev->priv;
838	struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
839
840	while ((skb = virtqueue_get_buf(sq->vq, &len)) != NULL) {
841		pr_debug("Sent skb %p\n", skb);
842
843		u64_stats_update_begin(&stats->tx_syncp);
844		stats->tx_bytes += skb->len;
845		stats->tx_packets++;
846		u64_stats_update_end(&stats->tx_syncp);
847
848		dev_kfree_skb_any(skb);
849	}
850}
851
852static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
853{
854	struct skb_vnet_hdr *hdr;
855	const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
856	struct virtnet_info *vi = sq->vq->vdev->priv;
857	unsigned num_sg;
858	unsigned hdr_len;
859	bool can_push;
860
861	pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest);
862	if (vi->mergeable_rx_bufs)
863		hdr_len = sizeof hdr->mhdr;
864	else
865		hdr_len = sizeof hdr->hdr;
866
867	can_push = vi->any_header_sg &&
868		!((unsigned long)skb->data & (__alignof__(*hdr) - 1)) &&
869		!skb_header_cloned(skb) && skb_headroom(skb) >= hdr_len;
870	/* Even if we can, don't push here yet as this would skew
871	 * csum_start offset below. */
872	if (can_push)
873		hdr = (struct skb_vnet_hdr *)(skb->data - hdr_len);
874	else
875		hdr = skb_vnet_hdr(skb);
876
877	if (skb->ip_summed == CHECKSUM_PARTIAL) {
878		hdr->hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
879		hdr->hdr.csum_start = skb_checksum_start_offset(skb);
880		hdr->hdr.csum_offset = skb->csum_offset;
881	} else {
882		hdr->hdr.flags = 0;
883		hdr->hdr.csum_offset = hdr->hdr.csum_start = 0;
884	}
885
886	if (skb_is_gso(skb)) {
887		hdr->hdr.hdr_len = skb_headlen(skb);
888		hdr->hdr.gso_size = skb_shinfo(skb)->gso_size;
889		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
890			hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
891		else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
892			hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
893		else
894			BUG();
895		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCP_ECN)
896			hdr->hdr.gso_type |= VIRTIO_NET_HDR_GSO_ECN;
897	} else {
898		hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
899		hdr->hdr.gso_size = hdr->hdr.hdr_len = 0;
900	}
901
902	if (vi->mergeable_rx_bufs)
903		hdr->mhdr.num_buffers = 0;
904
905	sg_init_table(sq->sg, MAX_SKB_FRAGS + 2);
906	if (can_push) {
907		__skb_push(skb, hdr_len);
908		num_sg = skb_to_sgvec(skb, sq->sg, 0, skb->len);
909		/* Pull header back to avoid skew in tx bytes calculations. */
910		__skb_pull(skb, hdr_len);
911	} else {
912		sg_set_buf(sq->sg, hdr, hdr_len);
913		num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len) + 1;
914	}
915	return virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, skb, GFP_ATOMIC);
916}
917
918static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
919{
920	struct virtnet_info *vi = netdev_priv(dev);
921	int qnum = skb_get_queue_mapping(skb);
922	struct send_queue *sq = &vi->sq[qnum];
923	int err;
924	struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum);
925	bool kick = !skb->xmit_more;
926
927	/* Free up any pending old buffers before queueing new ones. */
928	free_old_xmit_skbs(sq);
929
930	/* Try to transmit */
931	err = xmit_skb(sq, skb);
932
933	/* This should not happen! */
934	if (unlikely(err)) {
935		dev->stats.tx_fifo_errors++;
936		if (net_ratelimit())
937			dev_warn(&dev->dev,
938				 "Unexpected TXQ (%d) queue failure: %d\n", qnum, err);
939		dev->stats.tx_dropped++;
940		dev_kfree_skb_any(skb);
941		return NETDEV_TX_OK;
942	}
943
944	/* Don't wait up for transmitted skbs to be freed. */
945	skb_orphan(skb);
946	nf_reset(skb);
947
948	/* Apparently nice girls don't return TX_BUSY; stop the queue
949	 * before it gets out of hand.  Naturally, this wastes entries. */
950	if (sq->vq->num_free < 2+MAX_SKB_FRAGS) {
951		netif_stop_subqueue(dev, qnum);
952		if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
953			/* More just got used, free them then recheck. */
954			free_old_xmit_skbs(sq);
955			if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) {
956				netif_start_subqueue(dev, qnum);
957				virtqueue_disable_cb(sq->vq);
958			}
959		}
960	}
961
962	if (kick || netif_xmit_stopped(txq))
963		virtqueue_kick(sq->vq);
964
965	return NETDEV_TX_OK;
966}
967
968/*
969 * Send command via the control virtqueue and check status.  Commands
970 * supported by the hypervisor, as indicated by feature bits, should
971 * never fail unless improperly formatted.
972 */
973static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
974				 struct scatterlist *out)
975{
976	struct scatterlist *sgs[4], hdr, stat;
977	struct virtio_net_ctrl_hdr ctrl;
978	virtio_net_ctrl_ack status = ~0;
979	unsigned out_num = 0, tmp;
980
981	/* Caller should know better */
982	BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ));
983
984	ctrl.class = class;
985	ctrl.cmd = cmd;
986	/* Add header */
987	sg_init_one(&hdr, &ctrl, sizeof(ctrl));
988	sgs[out_num++] = &hdr;
989
990	if (out)
991		sgs[out_num++] = out;
992
993	/* Add return status. */
994	sg_init_one(&stat, &status, sizeof(status));
995	sgs[out_num] = &stat;
996
997	BUG_ON(out_num + 1 > ARRAY_SIZE(sgs));
998	virtqueue_add_sgs(vi->cvq, sgs, out_num, 1, vi, GFP_ATOMIC);
999
1000	if (unlikely(!virtqueue_kick(vi->cvq)))
1001		return status == VIRTIO_NET_OK;
1002
1003	/* Spin for a response, the kick causes an ioport write, trapping
1004	 * into the hypervisor, so the request should be handled immediately.
1005	 */
1006	while (!virtqueue_get_buf(vi->cvq, &tmp) &&
1007	       !virtqueue_is_broken(vi->cvq))
1008		cpu_relax();
1009
1010	return status == VIRTIO_NET_OK;
1011}
1012
1013static int virtnet_set_mac_address(struct net_device *dev, void *p)
1014{
1015	struct virtnet_info *vi = netdev_priv(dev);
1016	struct virtio_device *vdev = vi->vdev;
1017	int ret;
1018	struct sockaddr *addr = p;
1019	struct scatterlist sg;
1020
1021	ret = eth_prepare_mac_addr_change(dev, p);
1022	if (ret)
1023		return ret;
1024
1025	if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
1026		sg_init_one(&sg, addr->sa_data, dev->addr_len);
1027		if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
1028					  VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) {
1029			dev_warn(&vdev->dev,
1030				 "Failed to set mac address by vq command.\n");
1031			return -EINVAL;
1032		}
1033	} else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) {
1034		unsigned int i;
1035
1036		/* Naturally, this has an atomicity problem. */
1037		for (i = 0; i < dev->addr_len; i++)
1038			virtio_cwrite8(vdev,
1039				       offsetof(struct virtio_net_config, mac) +
1040				       i, addr->sa_data[i]);
1041	}
1042
1043	eth_commit_mac_addr_change(dev, p);
1044
1045	return 0;
1046}
1047
1048static struct rtnl_link_stats64 *virtnet_stats(struct net_device *dev,
1049					       struct rtnl_link_stats64 *tot)
1050{
1051	struct virtnet_info *vi = netdev_priv(dev);
1052	int cpu;
1053	unsigned int start;
1054
1055	for_each_possible_cpu(cpu) {
1056		struct virtnet_stats *stats = per_cpu_ptr(vi->stats, cpu);
1057		u64 tpackets, tbytes, rpackets, rbytes;
1058
1059		do {
1060			start = u64_stats_fetch_begin_irq(&stats->tx_syncp);
1061			tpackets = stats->tx_packets;
1062			tbytes   = stats->tx_bytes;
1063		} while (u64_stats_fetch_retry_irq(&stats->tx_syncp, start));
1064
1065		do {
1066			start = u64_stats_fetch_begin_irq(&stats->rx_syncp);
1067			rpackets = stats->rx_packets;
1068			rbytes   = stats->rx_bytes;
1069		} while (u64_stats_fetch_retry_irq(&stats->rx_syncp, start));
1070
1071		tot->rx_packets += rpackets;
1072		tot->tx_packets += tpackets;
1073		tot->rx_bytes   += rbytes;
1074		tot->tx_bytes   += tbytes;
1075	}
1076
1077	tot->tx_dropped = dev->stats.tx_dropped;
1078	tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
1079	tot->rx_dropped = dev->stats.rx_dropped;
1080	tot->rx_length_errors = dev->stats.rx_length_errors;
1081	tot->rx_frame_errors = dev->stats.rx_frame_errors;
1082
1083	return tot;
1084}
1085
1086#ifdef CONFIG_NET_POLL_CONTROLLER
1087static void virtnet_netpoll(struct net_device *dev)
1088{
1089	struct virtnet_info *vi = netdev_priv(dev);
1090	int i;
1091
1092	for (i = 0; i < vi->curr_queue_pairs; i++)
1093		napi_schedule(&vi->rq[i].napi);
1094}
1095#endif
1096
1097static void virtnet_ack_link_announce(struct virtnet_info *vi)
1098{
1099	rtnl_lock();
1100	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_ANNOUNCE,
1101				  VIRTIO_NET_CTRL_ANNOUNCE_ACK, NULL))
1102		dev_warn(&vi->dev->dev, "Failed to ack link announce.\n");
1103	rtnl_unlock();
1104}
1105
1106static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs)
1107{
1108	struct scatterlist sg;
1109	struct virtio_net_ctrl_mq s;
1110	struct net_device *dev = vi->dev;
1111
1112	if (!vi->has_cvq || !virtio_has_feature(vi->vdev, VIRTIO_NET_F_MQ))
1113		return 0;
1114
1115	s.virtqueue_pairs = queue_pairs;
1116	sg_init_one(&sg, &s, sizeof(s));
1117
1118	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ,
1119				  VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &sg)) {
1120		dev_warn(&dev->dev, "Fail to set num of queue pairs to %d\n",
1121			 queue_pairs);
1122		return -EINVAL;
1123	} else {
1124		vi->curr_queue_pairs = queue_pairs;
1125		/* virtnet_open() will refill when device is going to up. */
1126		if (dev->flags & IFF_UP)
1127			schedule_delayed_work(&vi->refill, 0);
1128	}
1129
1130	return 0;
1131}
1132
1133static int virtnet_close(struct net_device *dev)
1134{
1135	struct virtnet_info *vi = netdev_priv(dev);
1136	int i;
1137
1138	/* Make sure refill_work doesn't re-enable napi! */
1139	cancel_delayed_work_sync(&vi->refill);
1140
1141	for (i = 0; i < vi->max_queue_pairs; i++)
1142		napi_disable(&vi->rq[i].napi);
1143
1144	return 0;
1145}
1146
1147static void virtnet_set_rx_mode(struct net_device *dev)
1148{
1149	struct virtnet_info *vi = netdev_priv(dev);
1150	struct scatterlist sg[2];
1151	u8 promisc, allmulti;
1152	struct virtio_net_ctrl_mac *mac_data;
1153	struct netdev_hw_addr *ha;
1154	int uc_count;
1155	int mc_count;
1156	void *buf;
1157	int i;
1158
1159	/* We can't dynamically set ndo_set_rx_mode, so return gracefully */
1160	if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX))
1161		return;
1162
1163	promisc = ((dev->flags & IFF_PROMISC) != 0);
1164	allmulti = ((dev->flags & IFF_ALLMULTI) != 0);
1165
1166	sg_init_one(sg, &promisc, sizeof(promisc));
1167
1168	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
1169				  VIRTIO_NET_CTRL_RX_PROMISC, sg))
1170		dev_warn(&dev->dev, "Failed to %sable promisc mode.\n",
1171			 promisc ? "en" : "dis");
1172
1173	sg_init_one(sg, &allmulti, sizeof(allmulti));
1174
1175	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
1176				  VIRTIO_NET_CTRL_RX_ALLMULTI, sg))
1177		dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n",
1178			 allmulti ? "en" : "dis");
1179
1180	uc_count = netdev_uc_count(dev);
1181	mc_count = netdev_mc_count(dev);
1182	/* MAC filter - use one buffer for both lists */
1183	buf = kzalloc(((uc_count + mc_count) * ETH_ALEN) +
1184		      (2 * sizeof(mac_data->entries)), GFP_ATOMIC);
1185	mac_data = buf;
1186	if (!buf)
1187		return;
1188
1189	sg_init_table(sg, 2);
1190
1191	/* Store the unicast list and count in the front of the buffer */
1192	mac_data->entries = uc_count;
1193	i = 0;
1194	netdev_for_each_uc_addr(ha, dev)
1195		memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);
1196
1197	sg_set_buf(&sg[0], mac_data,
1198		   sizeof(mac_data->entries) + (uc_count * ETH_ALEN));
1199
1200	/* multicast list and count fill the end */
1201	mac_data = (void *)&mac_data->macs[uc_count][0];
1202
1203	mac_data->entries = mc_count;
1204	i = 0;
1205	netdev_for_each_mc_addr(ha, dev)
1206		memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);
1207
1208	sg_set_buf(&sg[1], mac_data,
1209		   sizeof(mac_data->entries) + (mc_count * ETH_ALEN));
1210
1211	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
1212				  VIRTIO_NET_CTRL_MAC_TABLE_SET, sg))
1213		dev_warn(&dev->dev, "Failed to set MAC filter table.\n");
1214
1215	kfree(buf);
1216}
1217
1218static int virtnet_vlan_rx_add_vid(struct net_device *dev,
1219				   __be16 proto, u16 vid)
1220{
1221	struct virtnet_info *vi = netdev_priv(dev);
1222	struct scatterlist sg;
1223
1224	sg_init_one(&sg, &vid, sizeof(vid));
1225
1226	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
1227				  VIRTIO_NET_CTRL_VLAN_ADD, &sg))
1228		dev_warn(&dev->dev, "Failed to add VLAN ID %d.\n", vid);
1229	return 0;
1230}
1231
1232static int virtnet_vlan_rx_kill_vid(struct net_device *dev,
1233				    __be16 proto, u16 vid)
1234{
1235	struct virtnet_info *vi = netdev_priv(dev);
1236	struct scatterlist sg;
1237
1238	sg_init_one(&sg, &vid, sizeof(vid));
1239
1240	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
1241				  VIRTIO_NET_CTRL_VLAN_DEL, &sg))
1242		dev_warn(&dev->dev, "Failed to kill VLAN ID %d.\n", vid);
1243	return 0;
1244}
1245
1246static void virtnet_clean_affinity(struct virtnet_info *vi, long hcpu)
1247{
1248	int i;
1249
1250	if (vi->affinity_hint_set) {
1251		for (i = 0; i < vi->max_queue_pairs; i++) {
1252			virtqueue_set_affinity(vi->rq[i].vq, -1);
1253			virtqueue_set_affinity(vi->sq[i].vq, -1);
1254		}
1255
1256		vi->affinity_hint_set = false;
1257	}
1258}
1259
1260static void virtnet_set_affinity(struct virtnet_info *vi)
1261{
1262	int i;
1263	int cpu;
1264
1265	/* In multiqueue mode, when the number of cpu is equal to the number of
1266	 * queue pairs, we let the queue pairs to be private to one cpu by
1267	 * setting the affinity hint to eliminate the contention.
1268	 */
1269	if (vi->curr_queue_pairs == 1 ||
1270	    vi->max_queue_pairs != num_online_cpus()) {
1271		virtnet_clean_affinity(vi, -1);
1272		return;
1273	}
1274
1275	i = 0;
1276	for_each_online_cpu(cpu) {
1277		virtqueue_set_affinity(vi->rq[i].vq, cpu);
1278		virtqueue_set_affinity(vi->sq[i].vq, cpu);
1279		netif_set_xps_queue(vi->dev, cpumask_of(cpu), i);
1280		i++;
1281	}
1282
1283	vi->affinity_hint_set = true;
1284}
1285
1286static int virtnet_cpu_callback(struct notifier_block *nfb,
1287			        unsigned long action, void *hcpu)
1288{
1289	struct virtnet_info *vi = container_of(nfb, struct virtnet_info, nb);
1290
1291	switch(action & ~CPU_TASKS_FROZEN) {
1292	case CPU_ONLINE:
1293	case CPU_DOWN_FAILED:
1294	case CPU_DEAD:
1295		virtnet_set_affinity(vi);
1296		break;
1297	case CPU_DOWN_PREPARE:
1298		virtnet_clean_affinity(vi, (long)hcpu);
1299		break;
1300	default:
1301		break;
1302	}
1303
1304	return NOTIFY_OK;
1305}
1306
1307static void virtnet_get_ringparam(struct net_device *dev,
1308				struct ethtool_ringparam *ring)
1309{
1310	struct virtnet_info *vi = netdev_priv(dev);
1311
1312	ring->rx_max_pending = virtqueue_get_vring_size(vi->rq[0].vq);
1313	ring->tx_max_pending = virtqueue_get_vring_size(vi->sq[0].vq);
1314	ring->rx_pending = ring->rx_max_pending;
1315	ring->tx_pending = ring->tx_max_pending;
1316}
1317
1318
1319static void virtnet_get_drvinfo(struct net_device *dev,
1320				struct ethtool_drvinfo *info)
1321{
1322	struct virtnet_info *vi = netdev_priv(dev);
1323	struct virtio_device *vdev = vi->vdev;
1324
1325	strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
1326	strlcpy(info->version, VIRTNET_DRIVER_VERSION, sizeof(info->version));
1327	strlcpy(info->bus_info, virtio_bus_name(vdev), sizeof(info->bus_info));
1328
1329}
1330
1331/* TODO: Eliminate OOO packets during switching */
1332static int virtnet_set_channels(struct net_device *dev,
1333				struct ethtool_channels *channels)
1334{
1335	struct virtnet_info *vi = netdev_priv(dev);
1336	u16 queue_pairs = channels->combined_count;
1337	int err;
1338
1339	/* We don't support separate rx/tx channels.
1340	 * We don't allow setting 'other' channels.
1341	 */
1342	if (channels->rx_count || channels->tx_count || channels->other_count)
1343		return -EINVAL;
1344
1345	if (queue_pairs > vi->max_queue_pairs || queue_pairs == 0)
1346		return -EINVAL;
1347
1348	get_online_cpus();
1349	err = virtnet_set_queues(vi, queue_pairs);
1350	if (!err) {
1351		netif_set_real_num_tx_queues(dev, queue_pairs);
1352		netif_set_real_num_rx_queues(dev, queue_pairs);
1353
1354		virtnet_set_affinity(vi);
1355	}
1356	put_online_cpus();
1357
1358	return err;
1359}
1360
1361static void virtnet_get_channels(struct net_device *dev,
1362				 struct ethtool_channels *channels)
1363{
1364	struct virtnet_info *vi = netdev_priv(dev);
1365
1366	channels->combined_count = vi->curr_queue_pairs;
1367	channels->max_combined = vi->max_queue_pairs;
1368	channels->max_other = 0;
1369	channels->rx_count = 0;
1370	channels->tx_count = 0;
1371	channels->other_count = 0;
1372}
1373
1374static const struct ethtool_ops virtnet_ethtool_ops = {
1375	.get_drvinfo = virtnet_get_drvinfo,
1376	.get_link = ethtool_op_get_link,
1377	.get_ringparam = virtnet_get_ringparam,
1378	.set_channels = virtnet_set_channels,
1379	.get_channels = virtnet_get_channels,
1380};
1381
1382#define MIN_MTU 68
1383#define MAX_MTU 65535
1384
1385static int virtnet_change_mtu(struct net_device *dev, int new_mtu)
1386{
1387	if (new_mtu < MIN_MTU || new_mtu > MAX_MTU)
1388		return -EINVAL;
1389	dev->mtu = new_mtu;
1390	return 0;
1391}
1392
1393static const struct net_device_ops virtnet_netdev = {
1394	.ndo_open            = virtnet_open,
1395	.ndo_stop   	     = virtnet_close,
1396	.ndo_start_xmit      = start_xmit,
1397	.ndo_validate_addr   = eth_validate_addr,
1398	.ndo_set_mac_address = virtnet_set_mac_address,
1399	.ndo_set_rx_mode     = virtnet_set_rx_mode,
1400	.ndo_change_mtu	     = virtnet_change_mtu,
1401	.ndo_get_stats64     = virtnet_stats,
1402	.ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid,
1403	.ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid,
1404#ifdef CONFIG_NET_POLL_CONTROLLER
1405	.ndo_poll_controller = virtnet_netpoll,
1406#endif
1407#ifdef CONFIG_NET_RX_BUSY_POLL
1408	.ndo_busy_poll		= virtnet_busy_poll,
1409#endif
1410};
1411
1412static void virtnet_config_changed_work(struct work_struct *work)
1413{
1414	struct virtnet_info *vi =
1415		container_of(work, struct virtnet_info, config_work);
1416	u16 v;
1417
1418	if (virtio_cread_feature(vi->vdev, VIRTIO_NET_F_STATUS,
1419				 struct virtio_net_config, status, &v) < 0)
1420		return;
1421
1422	if (v & VIRTIO_NET_S_ANNOUNCE) {
1423		netdev_notify_peers(vi->dev);
1424		virtnet_ack_link_announce(vi);
1425	}
1426
1427	/* Ignore unknown (future) status bits */
1428	v &= VIRTIO_NET_S_LINK_UP;
1429
1430	if (vi->status == v)
1431		return;
1432
1433	vi->status = v;
1434
1435	if (vi->status & VIRTIO_NET_S_LINK_UP) {
1436		netif_carrier_on(vi->dev);
1437		netif_tx_wake_all_queues(vi->dev);
1438	} else {
1439		netif_carrier_off(vi->dev);
1440		netif_tx_stop_all_queues(vi->dev);
1441	}
1442}
1443
1444static void virtnet_config_changed(struct virtio_device *vdev)
1445{
1446	struct virtnet_info *vi = vdev->priv;
1447
1448	schedule_work(&vi->config_work);
1449}
1450
1451static void virtnet_free_queues(struct virtnet_info *vi)
1452{
1453	int i;
1454
1455	for (i = 0; i < vi->max_queue_pairs; i++)
1456		netif_napi_del(&vi->rq[i].napi);
1457
1458	kfree(vi->rq);
1459	kfree(vi->sq);
1460}
1461
1462static void free_receive_bufs(struct virtnet_info *vi)
1463{
1464	int i;
1465
1466	for (i = 0; i < vi->max_queue_pairs; i++) {
1467		while (vi->rq[i].pages)
1468			__free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0);
1469	}
1470}
1471
1472static void free_receive_page_frags(struct virtnet_info *vi)
1473{
1474	int i;
1475	for (i = 0; i < vi->max_queue_pairs; i++)
1476		if (vi->rq[i].alloc_frag.page)
1477			put_page(vi->rq[i].alloc_frag.page);
1478}
1479
1480static void free_unused_bufs(struct virtnet_info *vi)
1481{
1482	void *buf;
1483	int i;
1484
1485	for (i = 0; i < vi->max_queue_pairs; i++) {
1486		struct virtqueue *vq = vi->sq[i].vq;
1487		while ((buf = virtqueue_detach_unused_buf(vq)) != NULL)
1488			dev_kfree_skb(buf);
1489	}
1490
1491	for (i = 0; i < vi->max_queue_pairs; i++) {
1492		struct virtqueue *vq = vi->rq[i].vq;
1493
1494		while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
1495			if (vi->mergeable_rx_bufs) {
1496				unsigned long ctx = (unsigned long)buf;
1497				void *base = mergeable_ctx_to_buf_address(ctx);
1498				put_page(virt_to_head_page(base));
1499			} else if (vi->big_packets) {
1500				give_pages(&vi->rq[i], buf);
1501			} else {
1502				dev_kfree_skb(buf);
1503			}
1504		}
1505	}
1506}
1507
1508static void virtnet_del_vqs(struct virtnet_info *vi)
1509{
1510	struct virtio_device *vdev = vi->vdev;
1511
1512	virtnet_clean_affinity(vi, -1);
1513
1514	vdev->config->del_vqs(vdev);
1515
1516	virtnet_free_queues(vi);
1517}
1518
1519static int virtnet_find_vqs(struct virtnet_info *vi)
1520{
1521	vq_callback_t **callbacks;
1522	struct virtqueue **vqs;
1523	int ret = -ENOMEM;
1524	int i, total_vqs;
1525	const char **names;
1526
1527	/* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by
1528	 * possible N-1 RX/TX queue pairs used in multiqueue mode, followed by
1529	 * possible control vq.
1530	 */
1531	total_vqs = vi->max_queue_pairs * 2 +
1532		    virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ);
1533
1534	/* Allocate space for find_vqs parameters */
1535	vqs = kzalloc(total_vqs * sizeof(*vqs), GFP_KERNEL);
1536	if (!vqs)
1537		goto err_vq;
1538	callbacks = kmalloc(total_vqs * sizeof(*callbacks), GFP_KERNEL);
1539	if (!callbacks)
1540		goto err_callback;
1541	names = kmalloc(total_vqs * sizeof(*names), GFP_KERNEL);
1542	if (!names)
1543		goto err_names;
1544
1545	/* Parameters for control virtqueue, if any */
1546	if (vi->has_cvq) {
1547		callbacks[total_vqs - 1] = NULL;
1548		names[total_vqs - 1] = "control";
1549	}
1550
1551	/* Allocate/initialize parameters for send/receive virtqueues */
1552	for (i = 0; i < vi->max_queue_pairs; i++) {
1553		callbacks[rxq2vq(i)] = skb_recv_done;
1554		callbacks[txq2vq(i)] = skb_xmit_done;
1555		sprintf(vi->rq[i].name, "input.%d", i);
1556		sprintf(vi->sq[i].name, "output.%d", i);
1557		names[rxq2vq(i)] = vi->rq[i].name;
1558		names[txq2vq(i)] = vi->sq[i].name;
1559	}
1560
1561	ret = vi->vdev->config->find_vqs(vi->vdev, total_vqs, vqs, callbacks,
1562					 names);
1563	if (ret)
1564		goto err_find;
1565
1566	if (vi->has_cvq) {
1567		vi->cvq = vqs[total_vqs - 1];
1568		if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN))
1569			vi->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
1570	}
1571
1572	for (i = 0; i < vi->max_queue_pairs; i++) {
1573		vi->rq[i].vq = vqs[rxq2vq(i)];
1574		vi->sq[i].vq = vqs[txq2vq(i)];
1575	}
1576
1577	kfree(names);
1578	kfree(callbacks);
1579	kfree(vqs);
1580
1581	return 0;
1582
1583err_find:
1584	kfree(names);
1585err_names:
1586	kfree(callbacks);
1587err_callback:
1588	kfree(vqs);
1589err_vq:
1590	return ret;
1591}
1592
1593static int virtnet_alloc_queues(struct virtnet_info *vi)
1594{
1595	int i;
1596
1597	vi->sq = kzalloc(sizeof(*vi->sq) * vi->max_queue_pairs, GFP_KERNEL);
1598	if (!vi->sq)
1599		goto err_sq;
1600	vi->rq = kzalloc(sizeof(*vi->rq) * vi->max_queue_pairs, GFP_KERNEL);
1601	if (!vi->rq)
1602		goto err_rq;
1603
1604	INIT_DELAYED_WORK(&vi->refill, refill_work);
1605	for (i = 0; i < vi->max_queue_pairs; i++) {
1606		vi->rq[i].pages = NULL;
1607		netif_napi_add(vi->dev, &vi->rq[i].napi, virtnet_poll,
1608			       napi_weight);
1609		napi_hash_add(&vi->rq[i].napi);
1610
1611		sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg));
1612		ewma_init(&vi->rq[i].mrg_avg_pkt_len, 1, RECEIVE_AVG_WEIGHT);
1613		sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg));
1614	}
1615
1616	return 0;
1617
1618err_rq:
1619	kfree(vi->sq);
1620err_sq:
1621	return -ENOMEM;
1622}
1623
1624static int init_vqs(struct virtnet_info *vi)
1625{
1626	int ret;
1627
1628	/* Allocate send & receive queues */
1629	ret = virtnet_alloc_queues(vi);
1630	if (ret)
1631		goto err;
1632
1633	ret = virtnet_find_vqs(vi);
1634	if (ret)
1635		goto err_free;
1636
1637	get_online_cpus();
1638	virtnet_set_affinity(vi);
1639	put_online_cpus();
1640
1641	return 0;
1642
1643err_free:
1644	virtnet_free_queues(vi);
1645err:
1646	return ret;
1647}
1648
1649#ifdef CONFIG_SYSFS
1650static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue,
1651		struct rx_queue_attribute *attribute, char *buf)
1652{
1653	struct virtnet_info *vi = netdev_priv(queue->dev);
1654	unsigned int queue_index = get_netdev_rx_queue_index(queue);
1655	struct ewma *avg;
1656
1657	BUG_ON(queue_index >= vi->max_queue_pairs);
1658	avg = &vi->rq[queue_index].mrg_avg_pkt_len;
1659	return sprintf(buf, "%u\n", get_mergeable_buf_len(avg));
1660}
1661
1662static struct rx_queue_attribute mergeable_rx_buffer_size_attribute =
1663	__ATTR_RO(mergeable_rx_buffer_size);
1664
1665static struct attribute *virtio_net_mrg_rx_attrs[] = {
1666	&mergeable_rx_buffer_size_attribute.attr,
1667	NULL
1668};
1669
1670static const struct attribute_group virtio_net_mrg_rx_group = {
1671	.name = "virtio_net",
1672	.attrs = virtio_net_mrg_rx_attrs
1673};
1674#endif
1675
1676static bool virtnet_fail_on_feature(struct virtio_device *vdev,
1677				    unsigned int fbit,
1678				    const char *fname, const char *dname)
1679{
1680	if (!virtio_has_feature(vdev, fbit))
1681		return false;
1682
1683	dev_err(&vdev->dev, "device advertises feature %s but not %s",
1684		fname, dname);
1685
1686	return true;
1687}
1688
1689#define VIRTNET_FAIL_ON(vdev, fbit, dbit)			\
1690	virtnet_fail_on_feature(vdev, fbit, #fbit, dbit)
1691
1692static bool virtnet_validate_features(struct virtio_device *vdev)
1693{
1694	if (!virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) &&
1695	    (VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_RX,
1696			     "VIRTIO_NET_F_CTRL_VQ") ||
1697	     VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_VLAN,
1698			     "VIRTIO_NET_F_CTRL_VQ") ||
1699	     VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE,
1700			     "VIRTIO_NET_F_CTRL_VQ") ||
1701	     VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_MQ, "VIRTIO_NET_F_CTRL_VQ") ||
1702	     VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR,
1703			     "VIRTIO_NET_F_CTRL_VQ"))) {
1704		return false;
1705	}
1706
1707	return true;
1708}
1709
1710static int virtnet_probe(struct virtio_device *vdev)
1711{
1712	int i, err;
1713	struct net_device *dev;
1714	struct virtnet_info *vi;
1715	u16 max_queue_pairs;
1716
1717	if (!virtnet_validate_features(vdev))
1718		return -EINVAL;
1719
1720	/* Find if host supports multiqueue virtio_net device */
1721	err = virtio_cread_feature(vdev, VIRTIO_NET_F_MQ,
1722				   struct virtio_net_config,
1723				   max_virtqueue_pairs, &max_queue_pairs);
1724
1725	/* We need at least 2 queue's */
1726	if (err || max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1727	    max_queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1728	    !virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
1729		max_queue_pairs = 1;
1730
1731	/* Allocate ourselves a network device with room for our info */
1732	dev = alloc_etherdev_mq(sizeof(struct virtnet_info), max_queue_pairs);
1733	if (!dev)
1734		return -ENOMEM;
1735
1736	/* Set up network device as normal. */
1737	dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE;
1738	dev->netdev_ops = &virtnet_netdev;
1739	dev->features = NETIF_F_HIGHDMA;
1740
1741	dev->ethtool_ops = &virtnet_ethtool_ops;
1742	SET_NETDEV_DEV(dev, &vdev->dev);
1743
1744	/* Do we support "hardware" checksums? */
1745	if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) {
1746		/* This opens up the world of extra features. */
1747		dev->hw_features |= NETIF_F_HW_CSUM|NETIF_F_SG|NETIF_F_FRAGLIST;
1748		if (csum)
1749			dev->features |= NETIF_F_HW_CSUM|NETIF_F_SG|NETIF_F_FRAGLIST;
1750
1751		if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) {
1752			dev->hw_features |= NETIF_F_TSO
1753				| NETIF_F_TSO_ECN | NETIF_F_TSO6;
1754		}
1755		/* Individual feature bits: what can host handle? */
1756		if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO4))
1757			dev->hw_features |= NETIF_F_TSO;
1758		if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO6))
1759			dev->hw_features |= NETIF_F_TSO6;
1760		if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN))
1761			dev->hw_features |= NETIF_F_TSO_ECN;
1762
1763		if (gso)
1764			dev->features |= dev->hw_features & NETIF_F_ALL_TSO;
1765		/* (!csum && gso) case will be fixed by register_netdev() */
1766	}
1767	if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_CSUM))
1768		dev->features |= NETIF_F_RXCSUM;
1769
1770	dev->vlan_features = dev->features;
1771
1772	/* Configuration may specify what MAC to use.  Otherwise random. */
1773	if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC))
1774		virtio_cread_bytes(vdev,
1775				   offsetof(struct virtio_net_config, mac),
1776				   dev->dev_addr, dev->addr_len);
1777	else
1778		eth_hw_addr_random(dev);
1779
1780	/* Set up our device-specific information */
1781	vi = netdev_priv(dev);
1782	vi->dev = dev;
1783	vi->vdev = vdev;
1784	vdev->priv = vi;
1785	vi->stats = alloc_percpu(struct virtnet_stats);
1786	err = -ENOMEM;
1787	if (vi->stats == NULL)
1788		goto free;
1789
1790	for_each_possible_cpu(i) {
1791		struct virtnet_stats *virtnet_stats;
1792		virtnet_stats = per_cpu_ptr(vi->stats, i);
1793		u64_stats_init(&virtnet_stats->tx_syncp);
1794		u64_stats_init(&virtnet_stats->rx_syncp);
1795	}
1796
1797	INIT_WORK(&vi->config_work, virtnet_config_changed_work);
1798
1799	/* If we can receive ANY GSO packets, we must allocate large ones. */
1800	if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
1801	    virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6) ||
1802	    virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN))
1803		vi->big_packets = true;
1804
1805	if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
1806		vi->mergeable_rx_bufs = true;
1807
1808	if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT))
1809		vi->any_header_sg = true;
1810
1811	if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
1812		vi->has_cvq = true;
1813
1814	if (vi->any_header_sg) {
1815		if (vi->mergeable_rx_bufs)
1816			dev->needed_headroom = sizeof(struct virtio_net_hdr_mrg_rxbuf);
1817		else
1818			dev->needed_headroom = sizeof(struct virtio_net_hdr);
1819	}
1820
1821	/* Use single tx/rx queue pair as default */
1822	vi->curr_queue_pairs = 1;
1823	vi->max_queue_pairs = max_queue_pairs;
1824
1825	/* Allocate/initialize the rx/tx queues, and invoke find_vqs */
1826	err = init_vqs(vi);
1827	if (err)
1828		goto free_stats;
1829
1830#ifdef CONFIG_SYSFS
1831	if (vi->mergeable_rx_bufs)
1832		dev->sysfs_rx_queue_group = &virtio_net_mrg_rx_group;
1833#endif
1834	netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs);
1835	netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs);
1836
1837	err = register_netdev(dev);
1838	if (err) {
1839		pr_debug("virtio_net: registering device failed\n");
1840		goto free_vqs;
1841	}
1842
1843	virtio_device_ready(vdev);
1844
1845	/* Last of all, set up some receive buffers. */
1846	for (i = 0; i < vi->curr_queue_pairs; i++) {
1847		try_fill_recv(&vi->rq[i], GFP_KERNEL);
1848
1849		/* If we didn't even get one input buffer, we're useless. */
1850		if (vi->rq[i].vq->num_free ==
1851		    virtqueue_get_vring_size(vi->rq[i].vq)) {
1852			free_unused_bufs(vi);
1853			err = -ENOMEM;
1854			goto free_recv_bufs;
1855		}
1856	}
1857
1858	vi->nb.notifier_call = &virtnet_cpu_callback;
1859	err = register_hotcpu_notifier(&vi->nb);
1860	if (err) {
1861		pr_debug("virtio_net: registering cpu notifier failed\n");
1862		goto free_recv_bufs;
1863	}
1864
1865	/* Assume link up if device can't report link status,
1866	   otherwise get link status from config. */
1867	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) {
1868		netif_carrier_off(dev);
1869		schedule_work(&vi->config_work);
1870	} else {
1871		vi->status = VIRTIO_NET_S_LINK_UP;
1872		netif_carrier_on(dev);
1873	}
1874
1875	pr_debug("virtnet: registered device %s with %d RX and TX vq's\n",
1876		 dev->name, max_queue_pairs);
1877
1878	return 0;
1879
1880free_recv_bufs:
1881	vi->vdev->config->reset(vdev);
1882
1883	free_receive_bufs(vi);
1884	unregister_netdev(dev);
1885free_vqs:
1886	cancel_delayed_work_sync(&vi->refill);
1887	free_receive_page_frags(vi);
1888	virtnet_del_vqs(vi);
1889free_stats:
1890	free_percpu(vi->stats);
1891free:
1892	free_netdev(dev);
1893	return err;
1894}
1895
1896static void remove_vq_common(struct virtnet_info *vi)
1897{
1898	vi->vdev->config->reset(vi->vdev);
1899
1900	/* Free unused buffers in both send and recv, if any. */
1901	free_unused_bufs(vi);
1902
1903	free_receive_bufs(vi);
1904
1905	free_receive_page_frags(vi);
1906
1907	virtnet_del_vqs(vi);
1908}
1909
1910static void virtnet_remove(struct virtio_device *vdev)
1911{
1912	struct virtnet_info *vi = vdev->priv;
1913
1914	unregister_hotcpu_notifier(&vi->nb);
1915
1916	/* Make sure no work handler is accessing the device. */
1917	flush_work(&vi->config_work);
1918
1919	unregister_netdev(vi->dev);
1920
1921	remove_vq_common(vi);
1922
1923	free_percpu(vi->stats);
1924	free_netdev(vi->dev);
1925}
1926
1927#ifdef CONFIG_PM_SLEEP
1928static int virtnet_freeze(struct virtio_device *vdev)
1929{
1930	struct virtnet_info *vi = vdev->priv;
1931	int i;
1932
1933	unregister_hotcpu_notifier(&vi->nb);
1934
1935	/* Make sure no work handler is accessing the device */
1936	flush_work(&vi->config_work);
1937
1938	netif_device_detach(vi->dev);
1939	cancel_delayed_work_sync(&vi->refill);
1940
1941	if (netif_running(vi->dev)) {
1942		for (i = 0; i < vi->max_queue_pairs; i++) {
1943			napi_disable(&vi->rq[i].napi);
1944			napi_hash_del(&vi->rq[i].napi);
1945			netif_napi_del(&vi->rq[i].napi);
1946		}
1947	}
1948
1949	remove_vq_common(vi);
1950
1951	return 0;
1952}
1953
1954static int virtnet_restore(struct virtio_device *vdev)
1955{
1956	struct virtnet_info *vi = vdev->priv;
1957	int err, i;
1958
1959	err = init_vqs(vi);
1960	if (err)
1961		return err;
1962
1963	virtio_device_ready(vdev);
1964
1965	if (netif_running(vi->dev)) {
1966		for (i = 0; i < vi->curr_queue_pairs; i++)
1967			if (!try_fill_recv(&vi->rq[i], GFP_KERNEL))
1968				schedule_delayed_work(&vi->refill, 0);
1969
1970		for (i = 0; i < vi->max_queue_pairs; i++)
1971			virtnet_napi_enable(&vi->rq[i]);
1972	}
1973
1974	netif_device_attach(vi->dev);
1975
1976	rtnl_lock();
1977	virtnet_set_queues(vi, vi->curr_queue_pairs);
1978	rtnl_unlock();
1979
1980	err = register_hotcpu_notifier(&vi->nb);
1981	if (err)
1982		return err;
1983
1984	return 0;
1985}
1986#endif
1987
1988static struct virtio_device_id id_table[] = {
1989	{ VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
1990	{ 0 },
1991};
1992
1993static unsigned int features[] = {
1994	VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM,
1995	VIRTIO_NET_F_GSO, VIRTIO_NET_F_MAC,
1996	VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_TSO6,
1997	VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6,
1998	VIRTIO_NET_F_GUEST_ECN,
1999	VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ,
2000	VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN,
2001	VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ,
2002	VIRTIO_NET_F_CTRL_MAC_ADDR,
2003	VIRTIO_F_ANY_LAYOUT,
2004};
2005
2006static struct virtio_driver virtio_net_driver = {
2007	.feature_table = features,
2008	.feature_table_size = ARRAY_SIZE(features),
2009	.driver.name =	KBUILD_MODNAME,
2010	.driver.owner =	THIS_MODULE,
2011	.id_table =	id_table,
2012	.probe =	virtnet_probe,
2013	.remove =	virtnet_remove,
2014	.config_changed = virtnet_config_changed,
2015#ifdef CONFIG_PM_SLEEP
2016	.freeze =	virtnet_freeze,
2017	.restore =	virtnet_restore,
2018#endif
2019};
2020
2021module_virtio_driver(virtio_net_driver);
2022
2023MODULE_DEVICE_TABLE(virtio, id_table);
2024MODULE_DESCRIPTION("Virtio network driver");
2025MODULE_LICENSE("GPL");
2026