static inline void __segment_seek(struct page **page, int *poff, int *plen, int off) { *poff += off; *page += *poff / PAGE_SIZE; *poff = *poff % PAGE_SIZE; *plen -= off; } static inline int __splice_segment(struct page *page, unsigned int poff, unsigned int plen, unsigned int *off, unsigned int *len, struct sk_buff *skb, struct splice_pipe_desc *spd) { /* skip this segment if already processed */ if (*off >= plen) { *off -= plen; return 0; } /* ignore any bits we already processed */ if (*off) { __segment_seek(&page, &poff, &plen, *off); *off = 0; } do { unsigned int flen = min(*len, plen); if (spd_fill_page(spd, page, flen, poff, skb)) return 1; __segment_seek(&page, &poff, &plen, flen); *len -= flen; } while (*len && plen); return 0; } int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset, unsigned int *len, struct splice_pipe_desc *spd) { int seg; /* * map the linear part */ if (__splice_segment(virt_to_page(skb->data), (unsigned long) skb->data & (PAGE_SIZE - 1), skb_headlen(skb), offset, len, skb, spd)) return 1; /* * then map the fragments */ for (seg = 0; seg < skb_shinfo(skb)->nr_frags; seg++) { const skb_frag_t *f = &skb_shinfo(skb)->frags[seg]; if (__splice_segment(f->page, f->page_offset, f->size, offset, len, skb, spd)) return 1; } /* * now see if we have a frag_list to map */ if (skb_shinfo(skb)->frag_list) { struct sk_buff *list = skb_shinfo(skb)->frag_list; for (; list && *len; list = list->next) if (__skb_splice_bits(list, offset, len, spd)) return 1; } return 0; } /* * Map data from the skb to a pipe. Should handle both the linear part, * the fragments, and the frag list. */ int skb_splice_bits(struct sk_buff *__skb, unsigned int offset, struct pipe_inode_info *pipe, unsigned int tlen, unsigned int flags) { struct partial_page partial[PIPE_BUFFERS]; struct page *pages[PIPE_BUFFERS]; struct splice_pipe_desc spd = { .pages = pages, .partial = partial, .flags = flags, .ops = &sock_pipe_buf_ops, .spd_release = sock_spd_release, }; struct sk_buff *skb; /* * I'd love to avoid the clone here, but tcp_read_sock() * ignores reference counts and unconditonally kills the sk_buff * on return from the actor. */ skb = skb_clone(__skb, GFP_KERNEL); if (unlikely(!skb)) return -ENOMEM; __skb_splice_bits(skb, &offset, &tlen, &spd); /* * drop our reference to the clone, the pipe consumption will * drop the rest. */ kfree_skb(skb); if (spd.nr_pages) { int ret; struct sock *sk = __skb->sk; /* * Drop the socket lock, otherwise we have reverse * locking dependencies between sk_lock and i_mutex * here as compared to sendfile(). We enter here * with the socket lock held, and splice_to_pipe() will * grab the pipe inode lock. For sendfile() emulation, * we call into ->sendpage() with the i_mutex lock held * and networking will grab the socket lock. */ release_sock(sk); ret = splice_to_pipe(pipe, &spd); lock_sock(sk); return ret; } return 0; }