brcm2708: update linux 4.4 patches to latest version
[openwrt.git] / target / linux / brcm2708 / patches-4.4 / 0020-dmaengine-bcm2835-Add-slave-dma-support.patch
1 From dc5b926436bccc4efbb1695ec3696b5db6746d3b Mon Sep 17 00:00:00 2001
2 From: =?UTF-8?q?Noralf=20Tr=C3=B8nnes?= <noralf@tronnes.org>
3 Date: Thu, 9 Apr 2015 12:34:11 +0200
4 Subject: [PATCH 020/170] dmaengine: bcm2835: Add slave dma support
5 MIME-Version: 1.0
6 Content-Type: text/plain; charset=UTF-8
7 Content-Transfer-Encoding: 8bit
8
9 Add slave transfer capability to BCM2835 dmaengine driver.
10 This patch is pulled from the bcm2708-dmaengine driver in the
11 Raspberry Pi repo. The work was done by Gellert Weisz.
12
13 Tested using the bcm2835-mmc driver from the same repo.
14
15 Signed-off-by: Noralf Trønnes <noralf@tronnes.org>
16 ---
17  drivers/dma/bcm2835-dma.c | 206 ++++++++++++++++++++++++++++++++++++++++++----
18  1 file changed, 192 insertions(+), 14 deletions(-)
19
20 --- a/drivers/dma/bcm2835-dma.c
21 +++ b/drivers/dma/bcm2835-dma.c
22 @@ -1,11 +1,10 @@
23  /*
24   * BCM2835 DMA engine support
25   *
26 - * This driver only supports cyclic DMA transfers
27 - * as needed for the I2S module.
28 - *
29   * Author:      Florian Meier <florian.meier@koalo.de>
30   *              Copyright 2013
31 + *              Gellert Weisz <gellert@raspberrypi.org>
32 + *              Copyright 2013-2014
33   *
34   * Based on
35   *     OMAP DMAengine support by Russell King
36 @@ -95,6 +94,8 @@ struct bcm2835_desc {
37         size_t size;
38  };
39  
40 +#define BCM2835_DMA_WAIT_CYCLES        0  /* Slow down DMA transfers: 0-31 */
41 +
42  #define BCM2835_DMA_CS         0x00
43  #define BCM2835_DMA_ADDR       0x04
44  #define BCM2835_DMA_SOURCE_AD  0x0c
45 @@ -111,12 +112,16 @@ struct bcm2835_desc {
46  #define BCM2835_DMA_RESET      BIT(31) /* WO, self clearing */
47  
48  #define BCM2835_DMA_INT_EN     BIT(0)
49 +#define BCM2835_DMA_WAIT_RESP  BIT(3)
50  #define BCM2835_DMA_D_INC      BIT(4)
51 +#define BCM2835_DMA_D_WIDTH    BIT(5)
52  #define BCM2835_DMA_D_DREQ     BIT(6)
53  #define BCM2835_DMA_S_INC      BIT(8)
54 +#define BCM2835_DMA_S_WIDTH    BIT(9)
55  #define BCM2835_DMA_S_DREQ     BIT(10)
56  
57  #define BCM2835_DMA_PER_MAP(x) ((x) << 16)
58 +#define BCM2835_DMA_WAITS(x)   (((x) & 0x1f) << 21)
59  
60  #define BCM2835_DMA_DATA_TYPE_S8       1
61  #define BCM2835_DMA_DATA_TYPE_S16      2
62 @@ -130,6 +135,14 @@ struct bcm2835_desc {
63  #define BCM2835_DMA_CHAN(n)    ((n) << 8) /* Base address */
64  #define BCM2835_DMA_CHANIO(base, n) ((base) + BCM2835_DMA_CHAN(n))
65  
66 +#define MAX_NORMAL_TRANSFER    SZ_1G
67 +/*
68 + * Max length on a Lite channel is 65535 bytes.
69 + * DMA handles byte-enables on SDRAM reads and writes even on 128-bit accesses,
70 + * but byte-enables don't exist on peripheral addresses, so align to 32-bit.
71 + */
72 +#define MAX_LITE_TRANSFER      (SZ_64K - 4)
73 +
74  static inline struct bcm2835_dmadev *to_bcm2835_dma_dev(struct dma_device *d)
75  {
76         return container_of(d, struct bcm2835_dmadev, ddev);
77 @@ -226,12 +239,18 @@ static irqreturn_t bcm2835_dma_callback(
78         d = c->desc;
79  
80         if (d) {
81 -               /* TODO Only works for cyclic DMA */
82 -               vchan_cyclic_callback(&d->vd);
83 -       }
84 +               if (c->cyclic) {
85 +                       vchan_cyclic_callback(&d->vd);
86  
87 -       /* Keep the DMA engine running */
88 -       writel(BCM2835_DMA_ACTIVE, c->chan_base + BCM2835_DMA_CS);
89 +                       /* Keep the DMA engine running */
90 +                       writel(BCM2835_DMA_ACTIVE,
91 +                              c->chan_base + BCM2835_DMA_CS);
92 +
93 +               } else {
94 +                       vchan_cookie_complete(&c->desc->vd);
95 +                       bcm2835_dma_start_desc(c);
96 +               }
97 +       }
98  
99         spin_unlock_irqrestore(&c->vc.lock, flags);
100  
101 @@ -339,8 +358,6 @@ static void bcm2835_dma_issue_pending(st
102         struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
103         unsigned long flags;
104  
105 -       c->cyclic = true; /* Nothing else is implemented */
106 -
107         spin_lock_irqsave(&c->vc.lock, flags);
108         if (vchan_issue_pending(&c->vc) && !c->desc)
109                 bcm2835_dma_start_desc(c);
110 @@ -358,7 +375,7 @@ static struct dma_async_tx_descriptor *b
111         struct bcm2835_desc *d;
112         dma_addr_t dev_addr;
113         unsigned int es, sync_type;
114 -       unsigned int frame;
115 +       unsigned int frame, max_size;
116         int i;
117  
118         /* Grab configuration */
119 @@ -393,7 +410,12 @@ static struct dma_async_tx_descriptor *b
120  
121         d->c = c;
122         d->dir = direction;
123 -       d->frames = buf_len / period_len;
124 +       if (c->ch >= 8) /* LITE channel */
125 +               max_size = MAX_LITE_TRANSFER;
126 +       else
127 +               max_size = MAX_NORMAL_TRANSFER;
128 +       period_len = min(period_len, max_size);
129 +       d->frames = (buf_len - 1) / (period_len + 1);
130  
131         d->cb_list = kcalloc(d->frames, sizeof(*d->cb_list), GFP_KERNEL);
132         if (!d->cb_list) {
133 @@ -441,17 +463,171 @@ static struct dma_async_tx_descriptor *b
134                                 BCM2835_DMA_PER_MAP(c->dreq);
135  
136                 /* Length of a frame */
137 -               control_block->length = period_len;
138 +               if (frame != d->frames - 1)
139 +                       control_block->length = period_len;
140 +               else
141 +                       control_block->length = buf_len - (d->frames - 1) *
142 +                                               period_len;
143                 d->size += control_block->length;
144  
145                 /*
146                  * Next block is the next frame.
147 -                * This DMA engine driver currently only supports cyclic DMA.
148 +                * This function is called on cyclic DMA transfers.
149                  * Therefore, wrap around at number of frames.
150                  */
151                 control_block->next = d->cb_list[((frame + 1) % d->frames)].paddr;
152         }
153  
154 +       c->cyclic = true;
155 +
156 +       return vchan_tx_prep(&c->vc, &d->vd, flags);
157 +}
158 +
159 +static struct dma_async_tx_descriptor *
160 +bcm2835_dma_prep_slave_sg(struct dma_chan *chan,
161 +                         struct scatterlist *sgl,
162 +                         unsigned int sg_len,
163 +                         enum dma_transfer_direction direction,
164 +                         unsigned long flags, void *context)
165 +{
166 +       struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
167 +       enum dma_slave_buswidth dev_width;
168 +       struct bcm2835_desc *d;
169 +       dma_addr_t dev_addr;
170 +       struct scatterlist *sgent;
171 +       unsigned int i, sync_type, split_cnt, max_size;
172 +
173 +       if (!is_slave_direction(direction)) {
174 +               dev_err(chan->device->dev, "direction not supported\n");
175 +               return NULL;
176 +       }
177 +
178 +       if (direction == DMA_DEV_TO_MEM) {
179 +               dev_addr = c->cfg.src_addr;
180 +               dev_width = c->cfg.src_addr_width;
181 +               sync_type = BCM2835_DMA_S_DREQ;
182 +       } else {
183 +               dev_addr = c->cfg.dst_addr;
184 +               dev_width = c->cfg.dst_addr_width;
185 +               sync_type = BCM2835_DMA_D_DREQ;
186 +       }
187 +
188 +       /* Bus width translates to the element size (ES) */
189 +       switch (dev_width) {
190 +       case DMA_SLAVE_BUSWIDTH_4_BYTES:
191 +               break;
192 +       default:
193 +               dev_err(chan->device->dev, "buswidth not supported: %i\n",
194 +                       dev_width);
195 +               return NULL;
196 +       }
197 +
198 +       /* Allocate and setup the descriptor. */
199 +       d = kzalloc(sizeof(*d), GFP_NOWAIT);
200 +       if (!d)
201 +               return NULL;
202 +
203 +       d->dir = direction;
204 +
205 +       if (c->ch >= 8) /* LITE channel */
206 +               max_size = MAX_LITE_TRANSFER;
207 +       else
208 +               max_size = MAX_NORMAL_TRANSFER;
209 +
210 +       /*
211 +        * Store the length of the SG list in d->frames
212 +        * taking care to account for splitting up transfers
213 +        * too large for a LITE channel
214 +        */
215 +       d->frames = 0;
216 +       for_each_sg(sgl, sgent, sg_len, i) {
217 +               unsigned int len = sg_dma_len(sgent);
218 +
219 +               d->frames += len / max_size + 1;
220 +       }
221 +
222 +       /* Allocate memory for control blocks */
223 +       d->control_block_size = d->frames * sizeof(struct bcm2835_dma_cb);
224 +       d->control_block_base = dma_zalloc_coherent(chan->device->dev,
225 +                       d->control_block_size, &d->control_block_base_phys,
226 +                       GFP_NOWAIT);
227 +       if (!d->control_block_base) {
228 +               kfree(d);
229 +               return NULL;
230 +       }
231 +
232 +       /*
233 +        * Iterate over all SG entries, create a control block
234 +        * for each frame and link them together.
235 +        * Count the number of times an SG entry had to be split
236 +        * as a result of using a LITE channel
237 +        */
238 +       split_cnt = 0;
239 +
240 +       for_each_sg(sgl, sgent, sg_len, i) {
241 +               unsigned int j;
242 +               dma_addr_t addr = sg_dma_address(sgent);
243 +               unsigned int len = sg_dma_len(sgent);
244 +
245 +               for (j = 0; j < len; j += max_size) {
246 +                       struct bcm2835_dma_cb *control_block =
247 +                               &d->control_block_base[i + split_cnt];
248 +
249 +                       /* Setup addresses */
250 +                       if (d->dir == DMA_DEV_TO_MEM) {
251 +                               control_block->info = BCM2835_DMA_D_INC |
252 +                                                     BCM2835_DMA_D_WIDTH |
253 +                                                     BCM2835_DMA_S_DREQ;
254 +                               control_block->src = dev_addr;
255 +                               control_block->dst = addr + (dma_addr_t)j;
256 +                       } else {
257 +                               control_block->info = BCM2835_DMA_S_INC |
258 +                                                     BCM2835_DMA_S_WIDTH |
259 +                                                     BCM2835_DMA_D_DREQ;
260 +                               control_block->src = addr + (dma_addr_t)j;
261 +                               control_block->dst = dev_addr;
262 +                       }
263 +
264 +                       /* Common part */
265 +                       control_block->info |=
266 +                               BCM2835_DMA_WAITS(BCM2835_DMA_WAIT_CYCLES);
267 +                       control_block->info |= BCM2835_DMA_WAIT_RESP;
268 +
269 +                       /* Enable */
270 +                       if (i == sg_len - 1 && len - j <= max_size)
271 +                               control_block->info |= BCM2835_DMA_INT_EN;
272 +
273 +                       /* Setup synchronization */
274 +                       if (sync_type)
275 +                               control_block->info |= sync_type;
276 +
277 +                       /* Setup DREQ channel */
278 +                       if (c->dreq)
279 +                               control_block->info |=
280 +                                       BCM2835_DMA_PER_MAP(c->dreq);
281 +
282 +                       /* Length of a frame */
283 +                       control_block->length = min(len - j, max_size);
284 +                       d->size += control_block->length;
285 +
286 +                       if (i < sg_len - 1 || len - j > max_size) {
287 +                               /* Next block is the next frame. */
288 +                               control_block->next =
289 +                                       d->control_block_base_phys +
290 +                                       sizeof(struct bcm2835_dma_cb) *
291 +                                       (i + split_cnt + 1);
292 +                       } else {
293 +                               /* Next block is empty. */
294 +                               control_block->next = 0;
295 +                       }
296 +
297 +                       if (len - j > max_size)
298 +                               split_cnt++;
299 +               }
300 +       }
301 +
302 +       c->cyclic = false;
303 +
304         return vchan_tx_prep(&c->vc, &d->vd, flags);
305  error_cb:
306         i--;
307 @@ -620,6 +796,7 @@ static int bcm2835_dma_probe(struct plat
308         od->ddev.device_tx_status = bcm2835_dma_tx_status;
309         od->ddev.device_issue_pending = bcm2835_dma_issue_pending;
310         od->ddev.device_prep_dma_cyclic = bcm2835_dma_prep_dma_cyclic;
311 +       od->ddev.device_prep_slave_sg = bcm2835_dma_prep_slave_sg;
312         od->ddev.device_config = bcm2835_dma_slave_config;
313         od->ddev.device_terminate_all = bcm2835_dma_terminate_all;
314         od->ddev.src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
315 @@ -708,4 +885,5 @@ module_platform_driver(bcm2835_dma_drive
316  MODULE_ALIAS("platform:bcm2835-dma");
317  MODULE_DESCRIPTION("BCM2835 DMA engine driver");
318  MODULE_AUTHOR("Florian Meier <florian.meier@koalo.de>");
319 +MODULE_AUTHOR("Gellert Weisz <gellert@raspberrypi.org>");
320  MODULE_LICENSE("GPL v2");