1 /*
2  *  linux/net/ipv4/inet_lro.c
3  *
4  *  Large Receive Offload (ipv4 / tcp)
5  *
6  *  (C) Copyright IBM Corp. 2007
7  *
8  *  Authors:
9  *       Jan-Bernd Themann <themann@de.ibm.com>
10  *       Christoph Raisch <raisch@de.ibm.com>
11  *
12  *
13  * This program is free software; you can redistribute it and/or modify
14  * it under the terms of the GNU General Public License as published by
15  * the Free Software Foundation; either version 2, or (at your option)
16  * any later version.
17  *
18  * This program is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21  * GNU General Public License for more details.
22  *
23  * You should have received a copy of the GNU General Public License
24  * along with this program; if not, write to the Free Software
25  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26  */
27 
28 
29 #include <linux/module.h>
30 #include <linux/if_vlan.h>
31 #include <linux/inet_lro.h>
32 
33 MODULE_LICENSE("GPL");
34 MODULE_AUTHOR("Jan-Bernd Themann <themann@de.ibm.com>");
35 MODULE_DESCRIPTION("Large Receive Offload (ipv4 / tcp)");
36 
37 #define TCP_HDR_LEN(tcph) (tcph->doff << 2)
38 #define IP_HDR_LEN(iph) (iph->ihl << 2)
39 #define TCP_PAYLOAD_LENGTH(iph, tcph) \
40 	(ntohs(iph->tot_len) - IP_HDR_LEN(iph) - TCP_HDR_LEN(tcph))
41 
42 #define IPH_LEN_WO_OPTIONS 5
43 #define TCPH_LEN_WO_OPTIONS 5
44 #define TCPH_LEN_W_TIMESTAMP 8
45 
46 #define LRO_MAX_PG_HLEN 64
47 
48 #define LRO_INC_STATS(lro_mgr, attr) { lro_mgr->stats.attr++; }
49 
50 /*
51  * Basic tcp checks whether packet is suitable for LRO
52  */
53 
lro_tcp_ip_check(const struct iphdr * iph,const struct tcphdr * tcph,int len,const struct net_lro_desc * lro_desc)54 static int lro_tcp_ip_check(const struct iphdr *iph, const struct tcphdr *tcph,
55 			    int len, const struct net_lro_desc *lro_desc)
56 {
57         /* check ip header: don't aggregate padded frames */
58 	if (ntohs(iph->tot_len) != len)
59 		return -1;
60 
61 	if (TCP_PAYLOAD_LENGTH(iph, tcph) == 0)
62 		return -1;
63 
64 	if (iph->ihl != IPH_LEN_WO_OPTIONS)
65 		return -1;
66 
67 	if (tcph->cwr || tcph->ece || tcph->urg || !tcph->ack ||
68 	    tcph->rst || tcph->syn || tcph->fin)
69 		return -1;
70 
71 	if (INET_ECN_is_ce(ipv4_get_dsfield(iph)))
72 		return -1;
73 
74 	if (tcph->doff != TCPH_LEN_WO_OPTIONS &&
75 	    tcph->doff != TCPH_LEN_W_TIMESTAMP)
76 		return -1;
77 
78 	/* check tcp options (only timestamp allowed) */
79 	if (tcph->doff == TCPH_LEN_W_TIMESTAMP) {
80 		__be32 *topt = (__be32 *)(tcph + 1);
81 
82 		if (*topt != htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
83 				   | (TCPOPT_TIMESTAMP << 8)
84 				   | TCPOLEN_TIMESTAMP))
85 			return -1;
86 
87 		/* timestamp should be in right order */
88 		topt++;
89 		if (lro_desc && after(ntohl(lro_desc->tcp_rcv_tsval),
90 				      ntohl(*topt)))
91 			return -1;
92 
93 		/* timestamp reply should not be zero */
94 		topt++;
95 		if (*topt == 0)
96 			return -1;
97 	}
98 
99 	return 0;
100 }
101 
lro_update_tcp_ip_header(struct net_lro_desc * lro_desc)102 static void lro_update_tcp_ip_header(struct net_lro_desc *lro_desc)
103 {
104 	struct iphdr *iph = lro_desc->iph;
105 	struct tcphdr *tcph = lro_desc->tcph;
106 	__be32 *p;
107 	__wsum tcp_hdr_csum;
108 
109 	tcph->ack_seq = lro_desc->tcp_ack;
110 	tcph->window = lro_desc->tcp_window;
111 
112 	if (lro_desc->tcp_saw_tstamp) {
113 		p = (__be32 *)(tcph + 1);
114 		*(p+2) = lro_desc->tcp_rcv_tsecr;
115 	}
116 
117 	iph->tot_len = htons(lro_desc->ip_tot_len);
118 
119 	iph->check = 0;
120 	iph->check = ip_fast_csum((u8 *)lro_desc->iph, iph->ihl);
121 
122 	tcph->check = 0;
123 	tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), 0);
124 	lro_desc->data_csum = csum_add(lro_desc->data_csum, tcp_hdr_csum);
125 	tcph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
126 					lro_desc->ip_tot_len -
127 					IP_HDR_LEN(iph), IPPROTO_TCP,
128 					lro_desc->data_csum);
129 }
130 
lro_tcp_data_csum(struct iphdr * iph,struct tcphdr * tcph,int len)131 static __wsum lro_tcp_data_csum(struct iphdr *iph, struct tcphdr *tcph, int len)
132 {
133 	__wsum tcp_csum;
134 	__wsum tcp_hdr_csum;
135 	__wsum tcp_ps_hdr_csum;
136 
137 	tcp_csum = ~csum_unfold(tcph->check);
138 	tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), tcp_csum);
139 
140 	tcp_ps_hdr_csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
141 					     len + TCP_HDR_LEN(tcph),
142 					     IPPROTO_TCP, 0);
143 
144 	return csum_sub(csum_sub(tcp_csum, tcp_hdr_csum),
145 			tcp_ps_hdr_csum);
146 }
147 
lro_init_desc(struct net_lro_desc * lro_desc,struct sk_buff * skb,struct iphdr * iph,struct tcphdr * tcph)148 static void lro_init_desc(struct net_lro_desc *lro_desc, struct sk_buff *skb,
149 			  struct iphdr *iph, struct tcphdr *tcph)
150 {
151 	int nr_frags;
152 	__be32 *ptr;
153 	u32 tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
154 
155 	nr_frags = skb_shinfo(skb)->nr_frags;
156 	lro_desc->parent = skb;
157 	lro_desc->next_frag = &(skb_shinfo(skb)->frags[nr_frags]);
158 	lro_desc->iph = iph;
159 	lro_desc->tcph = tcph;
160 	lro_desc->tcp_next_seq = ntohl(tcph->seq) + tcp_data_len;
161 	lro_desc->tcp_ack = tcph->ack_seq;
162 	lro_desc->tcp_window = tcph->window;
163 
164 	lro_desc->pkt_aggr_cnt = 1;
165 	lro_desc->ip_tot_len = ntohs(iph->tot_len);
166 
167 	if (tcph->doff == 8) {
168 		ptr = (__be32 *)(tcph+1);
169 		lro_desc->tcp_saw_tstamp = 1;
170 		lro_desc->tcp_rcv_tsval = *(ptr+1);
171 		lro_desc->tcp_rcv_tsecr = *(ptr+2);
172 	}
173 
174 	lro_desc->mss = tcp_data_len;
175 	lro_desc->active = 1;
176 
177 	lro_desc->data_csum = lro_tcp_data_csum(iph, tcph,
178 						tcp_data_len);
179 }
180 
lro_clear_desc(struct net_lro_desc * lro_desc)181 static inline void lro_clear_desc(struct net_lro_desc *lro_desc)
182 {
183 	memset(lro_desc, 0, sizeof(struct net_lro_desc));
184 }
185 
lro_add_common(struct net_lro_desc * lro_desc,struct iphdr * iph,struct tcphdr * tcph,int tcp_data_len)186 static void lro_add_common(struct net_lro_desc *lro_desc, struct iphdr *iph,
187 			   struct tcphdr *tcph, int tcp_data_len)
188 {
189 	struct sk_buff *parent = lro_desc->parent;
190 	__be32 *topt;
191 
192 	lro_desc->pkt_aggr_cnt++;
193 	lro_desc->ip_tot_len += tcp_data_len;
194 	lro_desc->tcp_next_seq += tcp_data_len;
195 	lro_desc->tcp_window = tcph->window;
196 	lro_desc->tcp_ack = tcph->ack_seq;
197 
198 	/* don't update tcp_rcv_tsval, would not work with PAWS */
199 	if (lro_desc->tcp_saw_tstamp) {
200 		topt = (__be32 *) (tcph + 1);
201 		lro_desc->tcp_rcv_tsecr = *(topt + 2);
202 	}
203 
204 	lro_desc->data_csum = csum_block_add(lro_desc->data_csum,
205 					     lro_tcp_data_csum(iph, tcph,
206 							       tcp_data_len),
207 					     parent->len);
208 
209 	parent->len += tcp_data_len;
210 	parent->data_len += tcp_data_len;
211 	if (tcp_data_len > lro_desc->mss)
212 		lro_desc->mss = tcp_data_len;
213 }
214 
lro_add_packet(struct net_lro_desc * lro_desc,struct sk_buff * skb,struct iphdr * iph,struct tcphdr * tcph)215 static void lro_add_packet(struct net_lro_desc *lro_desc, struct sk_buff *skb,
216 			   struct iphdr *iph, struct tcphdr *tcph)
217 {
218 	struct sk_buff *parent = lro_desc->parent;
219 	int tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
220 
221 	lro_add_common(lro_desc, iph, tcph, tcp_data_len);
222 
223 	skb_pull(skb, (skb->len - tcp_data_len));
224 	parent->truesize += skb->truesize;
225 
226 	if (lro_desc->last_skb)
227 		lro_desc->last_skb->next = skb;
228 	else
229 		skb_shinfo(parent)->frag_list = skb;
230 
231 	lro_desc->last_skb = skb;
232 }
233 
lro_add_frags(struct net_lro_desc * lro_desc,int len,int hlen,int truesize,struct skb_frag_struct * skb_frags,struct iphdr * iph,struct tcphdr * tcph)234 static void lro_add_frags(struct net_lro_desc *lro_desc,
235 			  int len, int hlen, int truesize,
236 			  struct skb_frag_struct *skb_frags,
237 			  struct iphdr *iph, struct tcphdr *tcph)
238 {
239 	struct sk_buff *skb = lro_desc->parent;
240 	int tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
241 
242 	lro_add_common(lro_desc, iph, tcph, tcp_data_len);
243 
244 	skb->truesize += truesize;
245 
246 	skb_frags[0].page_offset += hlen;
247 	skb_frag_size_sub(&skb_frags[0], hlen);
248 
249 	while (tcp_data_len > 0) {
250 		*(lro_desc->next_frag) = *skb_frags;
251 		tcp_data_len -= skb_frag_size(skb_frags);
252 		lro_desc->next_frag++;
253 		skb_frags++;
254 		skb_shinfo(skb)->nr_frags++;
255 	}
256 }
257 
lro_check_tcp_conn(struct net_lro_desc * lro_desc,struct iphdr * iph,struct tcphdr * tcph)258 static int lro_check_tcp_conn(struct net_lro_desc *lro_desc,
259 			      struct iphdr *iph,
260 			      struct tcphdr *tcph)
261 {
262 	if ((lro_desc->iph->saddr != iph->saddr) ||
263 	    (lro_desc->iph->daddr != iph->daddr) ||
264 	    (lro_desc->tcph->source != tcph->source) ||
265 	    (lro_desc->tcph->dest != tcph->dest))
266 		return -1;
267 	return 0;
268 }
269 
lro_get_desc(struct net_lro_mgr * lro_mgr,struct net_lro_desc * lro_arr,struct iphdr * iph,struct tcphdr * tcph)270 static struct net_lro_desc *lro_get_desc(struct net_lro_mgr *lro_mgr,
271 					 struct net_lro_desc *lro_arr,
272 					 struct iphdr *iph,
273 					 struct tcphdr *tcph)
274 {
275 	struct net_lro_desc *lro_desc = NULL;
276 	struct net_lro_desc *tmp;
277 	int max_desc = lro_mgr->max_desc;
278 	int i;
279 
280 	for (i = 0; i < max_desc; i++) {
281 		tmp = &lro_arr[i];
282 		if (tmp->active)
283 			if (!lro_check_tcp_conn(tmp, iph, tcph)) {
284 				lro_desc = tmp;
285 				goto out;
286 			}
287 	}
288 
289 	for (i = 0; i < max_desc; i++) {
290 		if (!lro_arr[i].active) {
291 			lro_desc = &lro_arr[i];
292 			goto out;
293 		}
294 	}
295 
296 	LRO_INC_STATS(lro_mgr, no_desc);
297 out:
298 	return lro_desc;
299 }
300 
lro_flush(struct net_lro_mgr * lro_mgr,struct net_lro_desc * lro_desc)301 static void lro_flush(struct net_lro_mgr *lro_mgr,
302 		      struct net_lro_desc *lro_desc)
303 {
304 	if (lro_desc->pkt_aggr_cnt > 1)
305 		lro_update_tcp_ip_header(lro_desc);
306 
307 	skb_shinfo(lro_desc->parent)->gso_size = lro_desc->mss;
308 
309 	if (lro_mgr->features & LRO_F_NAPI)
310 		netif_receive_skb(lro_desc->parent);
311 	else
312 		netif_rx(lro_desc->parent);
313 
314 	LRO_INC_STATS(lro_mgr, flushed);
315 	lro_clear_desc(lro_desc);
316 }
317 
__lro_proc_skb(struct net_lro_mgr * lro_mgr,struct sk_buff * skb,void * priv)318 static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb,
319 			  void *priv)
320 {
321 	struct net_lro_desc *lro_desc;
322 	struct iphdr *iph;
323 	struct tcphdr *tcph;
324 	u64 flags;
325 	int vlan_hdr_len = 0;
326 
327 	if (!lro_mgr->get_skb_header ||
328 	    lro_mgr->get_skb_header(skb, (void *)&iph, (void *)&tcph,
329 				    &flags, priv))
330 		goto out;
331 
332 	if (!(flags & LRO_IPV4) || !(flags & LRO_TCP))
333 		goto out;
334 
335 	lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph);
336 	if (!lro_desc)
337 		goto out;
338 
339 	if ((skb->protocol == htons(ETH_P_8021Q)) &&
340 	    !(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID))
341 		vlan_hdr_len = VLAN_HLEN;
342 
343 	if (!lro_desc->active) { /* start new lro session */
344 		if (lro_tcp_ip_check(iph, tcph, skb->len - vlan_hdr_len, NULL))
345 			goto out;
346 
347 		skb->ip_summed = lro_mgr->ip_summed_aggr;
348 		lro_init_desc(lro_desc, skb, iph, tcph);
349 		LRO_INC_STATS(lro_mgr, aggregated);
350 		return 0;
351 	}
352 
353 	if (lro_desc->tcp_next_seq != ntohl(tcph->seq))
354 		goto out2;
355 
356 	if (lro_tcp_ip_check(iph, tcph, skb->len, lro_desc))
357 		goto out2;
358 
359 	lro_add_packet(lro_desc, skb, iph, tcph);
360 	LRO_INC_STATS(lro_mgr, aggregated);
361 
362 	if ((lro_desc->pkt_aggr_cnt >= lro_mgr->max_aggr) ||
363 	    lro_desc->parent->len > (0xFFFF - lro_mgr->dev->mtu))
364 		lro_flush(lro_mgr, lro_desc);
365 
366 	return 0;
367 
368 out2: /* send aggregated SKBs to stack */
369 	lro_flush(lro_mgr, lro_desc);
370 
371 out:
372 	return 1;
373 }
374 
375 
lro_gen_skb(struct net_lro_mgr * lro_mgr,struct skb_frag_struct * frags,int len,int true_size,void * mac_hdr,int hlen,__wsum sum,u32 ip_summed)376 static struct sk_buff *lro_gen_skb(struct net_lro_mgr *lro_mgr,
377 				   struct skb_frag_struct *frags,
378 				   int len, int true_size,
379 				   void *mac_hdr,
380 				   int hlen, __wsum sum,
381 				   u32 ip_summed)
382 {
383 	struct sk_buff *skb;
384 	struct skb_frag_struct *skb_frags;
385 	int data_len = len;
386 	int hdr_len = min(len, hlen);
387 
388 	skb = netdev_alloc_skb(lro_mgr->dev, hlen + lro_mgr->frag_align_pad);
389 	if (!skb)
390 		return NULL;
391 
392 	skb_reserve(skb, lro_mgr->frag_align_pad);
393 	skb->len = len;
394 	skb->data_len = len - hdr_len;
395 	skb->truesize += true_size;
396 	skb->tail += hdr_len;
397 
398 	memcpy(skb->data, mac_hdr, hdr_len);
399 
400 	skb_frags = skb_shinfo(skb)->frags;
401 	while (data_len > 0) {
402 		*skb_frags = *frags;
403 		data_len -= skb_frag_size(frags);
404 		skb_frags++;
405 		frags++;
406 		skb_shinfo(skb)->nr_frags++;
407 	}
408 
409 	skb_shinfo(skb)->frags[0].page_offset += hdr_len;
410 	skb_frag_size_sub(&skb_shinfo(skb)->frags[0], hdr_len);
411 
412 	skb->ip_summed = ip_summed;
413 	skb->csum = sum;
414 	skb->protocol = eth_type_trans(skb, lro_mgr->dev);
415 	return skb;
416 }
417 
__lro_proc_segment(struct net_lro_mgr * lro_mgr,struct skb_frag_struct * frags,int len,int true_size,void * priv,__wsum sum)418 static struct sk_buff *__lro_proc_segment(struct net_lro_mgr *lro_mgr,
419 					  struct skb_frag_struct *frags,
420 					  int len, int true_size,
421 					  void *priv, __wsum sum)
422 {
423 	struct net_lro_desc *lro_desc;
424 	struct iphdr *iph;
425 	struct tcphdr *tcph;
426 	struct sk_buff *skb;
427 	u64 flags;
428 	void *mac_hdr;
429 	int mac_hdr_len;
430 	int hdr_len = LRO_MAX_PG_HLEN;
431 	int vlan_hdr_len = 0;
432 
433 	if (!lro_mgr->get_frag_header ||
434 	    lro_mgr->get_frag_header(frags, (void *)&mac_hdr, (void *)&iph,
435 				     (void *)&tcph, &flags, priv)) {
436 		mac_hdr = skb_frag_address(frags);
437 		goto out1;
438 	}
439 
440 	if (!(flags & LRO_IPV4) || !(flags & LRO_TCP))
441 		goto out1;
442 
443 	hdr_len = (int)((void *)(tcph) + TCP_HDR_LEN(tcph) - mac_hdr);
444 	mac_hdr_len = (int)((void *)(iph) - mac_hdr);
445 
446 	lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph);
447 	if (!lro_desc)
448 		goto out1;
449 
450 	if (!lro_desc->active) { /* start new lro session */
451 		if (lro_tcp_ip_check(iph, tcph, len - mac_hdr_len, NULL))
452 			goto out1;
453 
454 		skb = lro_gen_skb(lro_mgr, frags, len, true_size, mac_hdr,
455 				  hdr_len, 0, lro_mgr->ip_summed_aggr);
456 		if (!skb)
457 			goto out;
458 
459 		if ((skb->protocol == htons(ETH_P_8021Q)) &&
460 		    !(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID))
461 			vlan_hdr_len = VLAN_HLEN;
462 
463 		iph = (void *)(skb->data + vlan_hdr_len);
464 		tcph = (void *)((u8 *)skb->data + vlan_hdr_len
465 				+ IP_HDR_LEN(iph));
466 
467 		lro_init_desc(lro_desc, skb, iph, tcph);
468 		LRO_INC_STATS(lro_mgr, aggregated);
469 		return NULL;
470 	}
471 
472 	if (lro_desc->tcp_next_seq != ntohl(tcph->seq))
473 		goto out2;
474 
475 	if (lro_tcp_ip_check(iph, tcph, len - mac_hdr_len, lro_desc))
476 		goto out2;
477 
478 	lro_add_frags(lro_desc, len, hdr_len, true_size, frags, iph, tcph);
479 	LRO_INC_STATS(lro_mgr, aggregated);
480 
481 	if ((skb_shinfo(lro_desc->parent)->nr_frags >= lro_mgr->max_aggr) ||
482 	    lro_desc->parent->len > (0xFFFF - lro_mgr->dev->mtu))
483 		lro_flush(lro_mgr, lro_desc);
484 
485 	return NULL;
486 
487 out2: /* send aggregated packets to the stack */
488 	lro_flush(lro_mgr, lro_desc);
489 
490 out1:  /* Original packet has to be posted to the stack */
491 	skb = lro_gen_skb(lro_mgr, frags, len, true_size, mac_hdr,
492 			  hdr_len, sum, lro_mgr->ip_summed);
493 out:
494 	return skb;
495 }
496 
lro_receive_skb(struct net_lro_mgr * lro_mgr,struct sk_buff * skb,void * priv)497 void lro_receive_skb(struct net_lro_mgr *lro_mgr,
498 		     struct sk_buff *skb,
499 		     void *priv)
500 {
501 	if (__lro_proc_skb(lro_mgr, skb, priv)) {
502 		if (lro_mgr->features & LRO_F_NAPI)
503 			netif_receive_skb(skb);
504 		else
505 			netif_rx(skb);
506 	}
507 }
508 EXPORT_SYMBOL(lro_receive_skb);
509 
lro_receive_frags(struct net_lro_mgr * lro_mgr,struct skb_frag_struct * frags,int len,int true_size,void * priv,__wsum sum)510 void lro_receive_frags(struct net_lro_mgr *lro_mgr,
511 		       struct skb_frag_struct *frags,
512 		       int len, int true_size, void *priv, __wsum sum)
513 {
514 	struct sk_buff *skb;
515 
516 	skb = __lro_proc_segment(lro_mgr, frags, len, true_size, priv, sum);
517 	if (!skb)
518 		return;
519 
520 	if (lro_mgr->features & LRO_F_NAPI)
521 		netif_receive_skb(skb);
522 	else
523 		netif_rx(skb);
524 }
525 EXPORT_SYMBOL(lro_receive_frags);
526 
lro_flush_all(struct net_lro_mgr * lro_mgr)527 void lro_flush_all(struct net_lro_mgr *lro_mgr)
528 {
529 	int i;
530 	struct net_lro_desc *lro_desc = lro_mgr->lro_arr;
531 
532 	for (i = 0; i < lro_mgr->max_desc; i++) {
533 		if (lro_desc[i].active)
534 			lro_flush(lro_mgr, &lro_desc[i]);
535 	}
536 }
537 EXPORT_SYMBOL(lro_flush_all);
538 
lro_flush_pkt(struct net_lro_mgr * lro_mgr,struct iphdr * iph,struct tcphdr * tcph)539 void lro_flush_pkt(struct net_lro_mgr *lro_mgr,
540 		  struct iphdr *iph, struct tcphdr *tcph)
541 {
542 	struct net_lro_desc *lro_desc;
543 
544 	lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph);
545 	if (lro_desc->active)
546 		lro_flush(lro_mgr, lro_desc);
547 }
548 EXPORT_SYMBOL(lro_flush_pkt);
549