brcm2708: update against latest rpi-3.10.y branch
[openwrt.git] / target / linux / brcm2708 / patches-3.10 / 0149-bcm2708_fb-use-IRQ-for-DMA-copies.patch
1 From ab41011a32fdc35352ee6b308f29b6954056d787 Mon Sep 17 00:00:00 2001
2 From: Luke Diamand <luked@broadcom.com>
3 Date: Wed, 1 Jan 2014 00:45:29 +0000
4 Subject: [PATCH 149/174] bcm2708_fb: use IRQ for DMA copies
5
6 The copyarea ioctl() uses DMA to speed things along. This
7 was busy-waiting for completion. This change supports using
8 an interrupt instead for larger transfers. For small
9 transfers, busy-waiting is still likely to be faster.
10
11 Signed-off-by: Luke Diamand <luke@diamand.org>
12 ---
13  arch/arm/mach-bcm2708/dma.c              |  8 ++++
14  arch/arm/mach-bcm2708/include/mach/dma.h |  2 +
15  drivers/video/bcm2708_fb.c               | 64 ++++++++++++++++++++++++++++++--
16  3 files changed, 70 insertions(+), 4 deletions(-)
17
18 --- a/arch/arm/mach-bcm2708/dma.c
19 +++ b/arch/arm/mach-bcm2708/dma.c
20 @@ -83,6 +83,14 @@ extern void bcm_dma_wait_idle(void __iom
21  
22  EXPORT_SYMBOL_GPL(bcm_dma_start);
23  
24 +extern bool bcm_dma_is_busy(void __iomem *dma_chan_base)
25 +{
26 +       dsb();
27 +
28 +       return readl(dma_chan_base + BCM2708_DMA_CS) & BCM2708_DMA_ACTIVE;
29 +}
30 +EXPORT_SYMBOL_GPL(bcm_dma_is_busy);
31 +
32  /* Complete an ongoing DMA (assuming its results are to be ignored)
33     Does nothing if there is no DMA in progress.
34     This routine waits for the current AXI transfer to complete before
35 --- a/arch/arm/mach-bcm2708/include/mach/dma.h
36 +++ b/arch/arm/mach-bcm2708/include/mach/dma.h
37 @@ -64,11 +64,13 @@ struct bcm2708_dma_cb {
38         unsigned long next;
39         unsigned long pad[2];
40  };
41 +struct scatterlist;
42  
43  extern int bcm_sg_suitable_for_dma(struct scatterlist *sg_ptr, int sg_len);
44  extern void bcm_dma_start(void __iomem *dma_chan_base,
45                           dma_addr_t control_block);
46  extern void bcm_dma_wait_idle(void __iomem *dma_chan_base);
47 +extern bool bcm_dma_is_busy(void __iomem *dma_chan_base);
48  extern int /*rc*/ bcm_dma_abort(void __iomem *dma_chan_base);
49  
50  /* When listing features we can ask for when allocating DMA channels give
51 --- a/drivers/video/bcm2708_fb.c
52 +++ b/drivers/video/bcm2708_fb.c
53 @@ -21,6 +21,7 @@
54  #include <linux/mm.h>
55  #include <linux/fb.h>
56  #include <linux/init.h>
57 +#include <linux/interrupt.h>
58  #include <linux/ioport.h>
59  #include <linux/list.h>
60  #include <linux/platform_device.h>
61 @@ -48,6 +49,11 @@ static const char *bcm2708_name = "BCM27
62  
63  #define DRIVER_NAME "bcm2708_fb"
64  
65 +static u32 dma_busy_wait_threshold = 1<<15;
66 +module_param(dma_busy_wait_threshold, int, 0644);
67 +MODULE_PARM_DESC(dma_busy_wait_threshold, "Busy-wait for DMA completion below this area");
68 +
69 +
70  /* this data structure describes each frame buffer device we find */
71  
72  struct fbinfo_s {
73 @@ -77,6 +83,7 @@ struct bcm2708_fb {
74         void *cb_base;          /* DMA control blocks */
75         dma_addr_t cb_handle;
76         struct dentry *debugfs_dir;
77 +       wait_queue_head_t dma_waitq;
78         struct bcm2708_fb_stats stats;
79  };
80  
81 @@ -95,6 +102,10 @@ static int bcm2708_fb_debugfs_init(struc
82                         "dma_copies",
83                         offsetof(struct bcm2708_fb_stats, dma_copies)
84                 },
85 +               {
86 +                       "dma_irqs",
87 +                       offsetof(struct bcm2708_fb_stats, dma_irqs)
88 +               },
89         };
90  
91         fb->debugfs_dir = debugfs_create_dir(DRIVER_NAME, NULL);
92 @@ -400,6 +411,7 @@ static void bcm2708_fb_copyarea(struct f
93         int bytes_per_pixel = (info->var.bits_per_pixel + 7) >> 3;
94         /* Channel 0 supports larger bursts and is a bit faster */
95         int burst_size = (fb->dma_chan == 0) ? 8 : 2;
96 +       int pixels = region->width * region->height;
97  
98         /* Fallback to cfb_copyarea() if we don't like something */
99         if (bytes_per_pixel > 4 ||
100 @@ -492,8 +504,20 @@ static void bcm2708_fb_copyarea(struct f
101         cb->next = 0;
102  
103  
104 -       bcm_dma_start(fb->dma_chan_base, fb->cb_handle);
105 -       bcm_dma_wait_idle(fb->dma_chan_base);
106 +       if (pixels < dma_busy_wait_threshold) {
107 +               bcm_dma_start(fb->dma_chan_base, fb->cb_handle);
108 +               bcm_dma_wait_idle(fb->dma_chan_base);
109 +       } else {
110 +               void __iomem *dma_chan = fb->dma_chan_base;
111 +               cb->info |= BCM2708_DMA_INT_EN;
112 +               bcm_dma_start(fb->dma_chan_base, fb->cb_handle);
113 +               while (bcm_dma_is_busy(dma_chan)) {
114 +                       wait_event_interruptible(
115 +                               fb->dma_waitq,
116 +                               !bcm_dma_is_busy(dma_chan));
117 +               }
118 +               fb->stats.dma_irqs++;
119 +       }
120         fb->stats.dma_copies++;
121  }
122  
123 @@ -504,6 +528,24 @@ static void bcm2708_fb_imageblit(struct
124         cfb_imageblit(info, image);
125  }
126  
127 +static irqreturn_t bcm2708_fb_dma_irq(int irq, void *cxt)
128 +{
129 +       struct bcm2708_fb *fb = cxt;
130 +
131 +       /* FIXME: should read status register to check if this is
132 +        * actually interrupting us or not, in case this interrupt
133 +        * ever becomes shared amongst several DMA channels
134 +        *
135 +        * readl(dma_chan_base + BCM2708_DMA_CS) & BCM2708_DMA_IRQ;
136 +        */
137 +
138 +       /* acknowledge the interrupt */
139 +       writel(BCM2708_DMA_INT, fb->dma_chan_base + BCM2708_DMA_CS);
140 +
141 +       wake_up(&fb->dma_waitq);
142 +       return IRQ_HANDLED;
143 +}
144 +
145  static struct fb_ops bcm2708_fb_ops = {
146         .owner = THIS_MODULE,
147         .fb_check_var = bcm2708_fb_check_var,
148 @@ -568,6 +610,7 @@ static int bcm2708_fb_register(struct bc
149         fb->fb.monspecs.dclkmax = 100000000;
150  
151         bcm2708_fb_set_bitfields(&fb->fb.var);
152 +       init_waitqueue_head(&fb->dma_waitq);
153  
154         /*
155          * Allocate colourmap.
156 @@ -593,14 +636,15 @@ static int bcm2708_fb_probe(struct platf
157         struct bcm2708_fb *fb;
158         int ret;
159  
160 -       fb = kmalloc(sizeof(struct bcm2708_fb), GFP_KERNEL);
161 +       fb = kzalloc(sizeof(struct bcm2708_fb), GFP_KERNEL);
162         if (!fb) {
163                 dev_err(&dev->dev,
164                         "could not allocate new bcm2708_fb struct\n");
165                 ret = -ENOMEM;
166                 goto free_region;
167         }
168 -       memset(fb, 0, sizeof(struct bcm2708_fb));
169 +
170 +       bcm2708_fb_debugfs_init(fb);
171  
172  
173         bcm2708_fb_debugfs_init(fb);
174 @@ -624,6 +668,14 @@ static int bcm2708_fb_probe(struct platf
175         }
176         fb->dma_chan = ret;
177  
178 +       ret = request_irq(fb->dma_irq, bcm2708_fb_dma_irq,
179 +                         0, "bcm2708_fb dma", fb);
180 +       if (ret) {
181 +               pr_err("%s: failed to request DMA irq\n", __func__);
182 +               goto free_dma_chan;
183 +       }
184 +
185 +
186         pr_info("BCM2708FB: allocated DMA channel %d @ %p\n",
187                fb->dma_chan, fb->dma_chan_base);
188  
189 @@ -635,6 +687,8 @@ static int bcm2708_fb_probe(struct platf
190                 goto out;
191         }
192  
193 +free_dma_chan:
194 +       bcm_dma_chan_free(fb->dma_chan);
195  free_cb:
196         dma_free_writecombine(&dev->dev, SZ_64K, fb->cb_base, fb->cb_handle);
197  free_fb:
198 @@ -662,6 +716,8 @@ static int bcm2708_fb_remove(struct plat
199                           fb->dma);
200         bcm2708_fb_debugfs_deinit(fb);
201  
202 +       free_irq(fb->dma_irq, fb);
203 +
204         kfree(fb);
205  
206         return 0;