kernel: backport few bgmac patches to 4.0
[openwrt.git] / target / linux / generic / patches-4.0 / 072-03-bgmac-implement-scatter-gather-support.patch
1 From: Felix Fietkau <nbd@openwrt.org>
2 Date: Mon, 23 Mar 2015 02:42:26 +0100
3 Subject: [PATCH] bgmac: implement scatter/gather support
4
5 Always use software checksumming, since the hardware does not have any
6 checksum offload support.
7 This significantly improves local TCP tx performance.
8
9 Signed-off-by: Felix Fietkau <nbd@openwrt.org>
10 ---
11
12 --- a/drivers/net/ethernet/broadcom/bgmac.c
13 +++ b/drivers/net/ethernet/broadcom/bgmac.c
14 @@ -115,53 +115,91 @@ static void bgmac_dma_tx_enable(struct b
15         bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_CTL, ctl);
16  }
17  
18 +static void
19 +bgmac_dma_tx_add_buf(struct bgmac *bgmac, struct bgmac_dma_ring *ring,
20 +                    int i, int len, u32 ctl0)
21 +{
22 +       struct bgmac_slot_info *slot;
23 +       struct bgmac_dma_desc *dma_desc;
24 +       u32 ctl1;
25 +
26 +       if (i == ring->num_slots - 1)
27 +               ctl0 |= BGMAC_DESC_CTL0_EOT;
28 +
29 +       ctl1 = len & BGMAC_DESC_CTL1_LEN;
30 +
31 +       slot = &ring->slots[i];
32 +       dma_desc = &ring->cpu_base[i];
33 +       dma_desc->addr_low = cpu_to_le32(lower_32_bits(slot->dma_addr));
34 +       dma_desc->addr_high = cpu_to_le32(upper_32_bits(slot->dma_addr));
35 +       dma_desc->ctl0 = cpu_to_le32(ctl0);
36 +       dma_desc->ctl1 = cpu_to_le32(ctl1);
37 +}
38 +
39  static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac,
40                                     struct bgmac_dma_ring *ring,
41                                     struct sk_buff *skb)
42  {
43         struct device *dma_dev = bgmac->core->dma_dev;
44         struct net_device *net_dev = bgmac->net_dev;
45 -       struct bgmac_dma_desc *dma_desc;
46 -       struct bgmac_slot_info *slot;
47 -       u32 ctl0, ctl1;
48 +       struct bgmac_slot_info *slot = &ring->slots[ring->end];
49         int free_slots;
50 +       int nr_frags;
51 +       u32 flags;
52 +       int index = ring->end;
53 +       int i;
54  
55         if (skb->len > BGMAC_DESC_CTL1_LEN) {
56                 bgmac_err(bgmac, "Too long skb (%d)\n", skb->len);
57 -               goto err_stop_drop;
58 +               goto err_drop;
59         }
60  
61 +       if (skb->ip_summed == CHECKSUM_PARTIAL)
62 +               skb_checksum_help(skb);
63 +
64 +       nr_frags = skb_shinfo(skb)->nr_frags;
65 +
66         if (ring->start <= ring->end)
67                 free_slots = ring->start - ring->end + BGMAC_TX_RING_SLOTS;
68         else
69                 free_slots = ring->start - ring->end;
70 -       if (free_slots == 1) {
71 +
72 +       if (free_slots <= nr_frags + 1) {
73                 bgmac_err(bgmac, "TX ring is full, queue should be stopped!\n");
74                 netif_stop_queue(net_dev);
75                 return NETDEV_TX_BUSY;
76         }
77  
78 -       slot = &ring->slots[ring->end];
79 -       slot->skb = skb;
80 -       slot->dma_addr = dma_map_single(dma_dev, skb->data, skb->len,
81 +       slot->dma_addr = dma_map_single(dma_dev, skb->data, skb_headlen(skb),
82                                         DMA_TO_DEVICE);
83 -       if (dma_mapping_error(dma_dev, slot->dma_addr)) {
84 -               bgmac_err(bgmac, "Mapping error of skb on ring 0x%X\n",
85 -                         ring->mmio_base);
86 -               goto err_stop_drop;
87 -       }
88 +       if (unlikely(dma_mapping_error(dma_dev, slot->dma_addr)))
89 +               goto err_dma_head;
90  
91 -       ctl0 = BGMAC_DESC_CTL0_IOC | BGMAC_DESC_CTL0_SOF | BGMAC_DESC_CTL0_EOF;
92 -       if (ring->end == ring->num_slots - 1)
93 -               ctl0 |= BGMAC_DESC_CTL0_EOT;
94 -       ctl1 = skb->len & BGMAC_DESC_CTL1_LEN;
95 +       flags = BGMAC_DESC_CTL0_SOF;
96 +       if (!nr_frags)
97 +               flags |= BGMAC_DESC_CTL0_EOF | BGMAC_DESC_CTL0_IOC;
98 +
99 +       bgmac_dma_tx_add_buf(bgmac, ring, index, skb_headlen(skb), flags);
100 +       flags = 0;
101 +
102 +       for (i = 0; i < nr_frags; i++) {
103 +               struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
104 +               int len = skb_frag_size(frag);
105 +
106 +               index = (index + 1) % BGMAC_TX_RING_SLOTS;
107 +               slot = &ring->slots[index];
108 +               slot->dma_addr = skb_frag_dma_map(dma_dev, frag, 0,
109 +                                                 len, DMA_TO_DEVICE);
110 +               if (unlikely(dma_mapping_error(dma_dev, slot->dma_addr)))
111 +                       goto err_dma;
112  
113 -       dma_desc = ring->cpu_base;
114 -       dma_desc += ring->end;
115 -       dma_desc->addr_low = cpu_to_le32(lower_32_bits(slot->dma_addr));
116 -       dma_desc->addr_high = cpu_to_le32(upper_32_bits(slot->dma_addr));
117 -       dma_desc->ctl0 = cpu_to_le32(ctl0);
118 -       dma_desc->ctl1 = cpu_to_le32(ctl1);
119 +               if (i == nr_frags - 1)
120 +                       flags |= BGMAC_DESC_CTL0_EOF | BGMAC_DESC_CTL0_IOC;
121 +
122 +               bgmac_dma_tx_add_buf(bgmac, ring, index, len, flags);
123 +       }
124 +
125 +       slot->skb = skb;
126  
127         netdev_sent_queue(net_dev, skb->len);
128  
129 @@ -170,20 +208,35 @@ static netdev_tx_t bgmac_dma_tx_add(stru
130         /* Increase ring->end to point empty slot. We tell hardware the first
131          * slot it should *not* read.
132          */
133 -       if (++ring->end >= BGMAC_TX_RING_SLOTS)
134 -               ring->end = 0;
135 +       ring->end = (index + 1) % BGMAC_TX_RING_SLOTS;
136         bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_INDEX,
137                     ring->index_base +
138                     ring->end * sizeof(struct bgmac_dma_desc));
139  
140 -       /* Always keep one slot free to allow detecting bugged calls. */
141 -       if (--free_slots == 1)
142 +       free_slots -= nr_frags + 1;
143 +       if (free_slots < 8)
144                 netif_stop_queue(net_dev);
145  
146         return NETDEV_TX_OK;
147  
148 -err_stop_drop:
149 -       netif_stop_queue(net_dev);
150 +err_dma:
151 +       dma_unmap_single(dma_dev, slot->dma_addr, skb_headlen(skb),
152 +                        DMA_TO_DEVICE);
153 +
154 +       while (i > 0) {
155 +               int index = (ring->end + i) % BGMAC_TX_RING_SLOTS;
156 +               struct bgmac_slot_info *slot = &ring->slots[index];
157 +               u32 ctl1 = le32_to_cpu(ring->cpu_base[index].ctl1);
158 +               int len = ctl1 & BGMAC_DESC_CTL1_LEN;
159 +
160 +               dma_unmap_page(dma_dev, slot->dma_addr, len, DMA_TO_DEVICE);
161 +       }
162 +
163 +err_dma_head:
164 +       bgmac_err(bgmac, "Mapping error of skb on ring 0x%X\n",
165 +                 ring->mmio_base);
166 +
167 +err_drop:
168         dev_kfree_skb(skb);
169         return NETDEV_TX_OK;
170  }
171 @@ -205,32 +258,45 @@ static void bgmac_dma_tx_free(struct bgm
172  
173         while (ring->start != empty_slot) {
174                 struct bgmac_slot_info *slot = &ring->slots[ring->start];
175 +               u32 ctl1 = le32_to_cpu(ring->cpu_base[ring->start].ctl1);
176 +               int len = ctl1 & BGMAC_DESC_CTL1_LEN;
177  
178 -               if (slot->skb) {
179 +               if (!slot->dma_addr) {
180 +                       bgmac_err(bgmac, "Hardware reported transmission for empty TX ring slot %d! End of ring: %d\n",
181 +                                 ring->start, ring->end);
182 +                       goto next;
183 +               }
184 +
185 +               if (ctl1 & BGMAC_DESC_CTL0_SOF)
186                         /* Unmap no longer used buffer */
187 -                       dma_unmap_single(dma_dev, slot->dma_addr,
188 -                                        slot->skb->len, DMA_TO_DEVICE);
189 -                       slot->dma_addr = 0;
190 +                       dma_unmap_single(dma_dev, slot->dma_addr, len,
191 +                                        DMA_TO_DEVICE);
192 +               else
193 +                       dma_unmap_page(dma_dev, slot->dma_addr, len,
194 +                                      DMA_TO_DEVICE);
195  
196 +               if (slot->skb) {
197                         bytes_compl += slot->skb->len;
198                         pkts_compl++;
199  
200                         /* Free memory! :) */
201                         dev_kfree_skb(slot->skb);
202                         slot->skb = NULL;
203 -               } else {
204 -                       bgmac_err(bgmac, "Hardware reported transmission for empty TX ring slot %d! End of ring: %d\n",
205 -                                 ring->start, ring->end);
206                 }
207  
208 +next:
209 +               slot->dma_addr = 0;
210                 if (++ring->start >= BGMAC_TX_RING_SLOTS)
211                         ring->start = 0;
212                 freed = true;
213         }
214  
215 +       if (!pkts_compl)
216 +               return;
217 +
218         netdev_completed_queue(bgmac->net_dev, pkts_compl, bytes_compl);
219  
220 -       if (freed && netif_queue_stopped(bgmac->net_dev))
221 +       if (netif_queue_stopped(bgmac->net_dev))
222                 netif_wake_queue(bgmac->net_dev);
223  }
224  
225 @@ -439,17 +505,25 @@ static void bgmac_dma_tx_ring_free(struc
226                                    struct bgmac_dma_ring *ring)
227  {
228         struct device *dma_dev = bgmac->core->dma_dev;
229 +       struct bgmac_dma_desc *dma_desc = ring->cpu_base;
230         struct bgmac_slot_info *slot;
231         int i;
232  
233         for (i = 0; i < ring->num_slots; i++) {
234 +               int len = dma_desc[i].ctl1 & BGMAC_DESC_CTL1_LEN;
235 +
236                 slot = &ring->slots[i];
237 -               if (slot->skb) {
238 -                       if (slot->dma_addr)
239 -                               dma_unmap_single(dma_dev, slot->dma_addr,
240 -                                                slot->skb->len, DMA_TO_DEVICE);
241 -                       dev_kfree_skb(slot->skb);
242 -               }
243 +               dev_kfree_skb(slot->skb);
244 +
245 +               if (!slot->dma_addr)
246 +                       continue;
247 +
248 +               if (slot->skb)
249 +                       dma_unmap_single(dma_dev, slot->dma_addr,
250 +                                        len, DMA_TO_DEVICE);
251 +               else
252 +                       dma_unmap_page(dma_dev, slot->dma_addr,
253 +                                      len, DMA_TO_DEVICE);
254         }
255  }
256  
257 @@ -1583,6 +1657,10 @@ static int bgmac_probe(struct bcma_devic
258                 goto err_dma_free;
259         }
260  
261 +       net_dev->features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
262 +       net_dev->hw_features = net_dev->features;
263 +       net_dev->vlan_features = net_dev->features;
264 +
265         err = register_netdev(bgmac->net_dev);
266         if (err) {
267                 bgmac_err(bgmac, "Cannot register net device\n");