brcm2708: add linux 4.1 support
[openwrt.git] / target / linux / brcm2708 / patches-4.1 / 0106-vchiq_arm-Two-cacheing-fixes.patch
1 From edb21286ac7e246dfe7c9ee05101880f719e00e8 Mon Sep 17 00:00:00 2001
2 From: Phil Elwell <phil@raspberrypi.org>
3 Date: Wed, 8 Jul 2015 14:48:57 +0100
4 Subject: [PATCH 106/121] vchiq_arm: Two cacheing fixes
5
6 1) Make fragment size vary with cache line size
7 Without this patch, non-cache-line-aligned transfers may corrupt
8 (or be corrupted by) adjacent data structures.
9
10 Both ARM and VC need to be updated to enable this feature. This is
11 ensured by having the loader apply a new DT parameter -
12 cache-line-size. The existence of this parameter guarantees that the
13 kernel is capable, and the parameter will only be modified from the
14 safe default if the loader is capable.
15
16 2) Flush/invalidate vmalloc'd memory, and invalidate after reads
17 ---
18  arch/arm/boot/dts/bcm2708_common.dtsi              |   5 +
19  .../interface/vchiq_arm/vchiq_2835_arm.c           | 112 +++++++++++++--------
20  2 files changed, 77 insertions(+), 40 deletions(-)
21
22 --- a/arch/arm/boot/dts/bcm2708_common.dtsi
23 +++ b/arch/arm/boot/dts/bcm2708_common.dtsi
24 @@ -218,6 +218,7 @@
25                         compatible = "brcm,bcm2835-vchiq";
26                         reg = <0x7e00b840 0xf>;
27                         interrupts = <0 2>;
28 +                       cache-line-size = <32>;
29                 };
30  
31                 thermal: thermal {
32 @@ -270,4 +271,8 @@
33                         clock-frequency = <126000000>;
34                 };
35         };
36 +
37 +       __overrides__ {
38 +               cache_line_size = <&vchiq>, "cache-line-size:0";
39 +       };
40  };
41 --- a/drivers/misc/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c
42 +++ b/drivers/misc/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c
43 @@ -42,6 +42,7 @@
44  #include <linux/platform_data/mailbox-bcm2708.h>
45  #include <linux/platform_device.h>
46  #include <linux/uaccess.h>
47 +#include <linux/of.h>
48  #include <asm/pgtable.h>
49  
50  #define TOTAL_SLOTS (VCHIQ_SLOT_ZERO_SLOTS + 2 * 32)
51 @@ -64,8 +65,10 @@ typedef struct vchiq_2835_state_struct {
52  } VCHIQ_2835_ARM_STATE_T;
53  
54  static void __iomem *g_regs;
55 -static FRAGMENTS_T *g_fragments_base;
56 -static FRAGMENTS_T *g_free_fragments;
57 +static unsigned int g_cache_line_size = sizeof(CACHE_LINE_SIZE);
58 +static unsigned int g_fragments_size;
59 +static char *g_fragments_base;
60 +static char *g_free_fragments;
61  static struct semaphore g_free_fragments_sema;
62  static unsigned long g_virt_to_bus_offset;
63  
64 @@ -95,9 +98,13 @@ int vchiq_platform_init(struct platform_
65  
66         g_virt_to_bus_offset = virt_to_dma(dev, (void *)0);
67  
68 +       (void)of_property_read_u32(dev->of_node, "cache-line-size",
69 +                                  &g_cache_line_size);
70 +       g_fragments_size = 2 * g_cache_line_size;
71 +
72         /* Allocate space for the channels in coherent memory */
73         slot_mem_size = PAGE_ALIGN(TOTAL_SLOTS * VCHIQ_SLOT_SIZE);
74 -       frag_mem_size = PAGE_ALIGN(sizeof(FRAGMENTS_T) * MAX_FRAGMENTS);
75 +       frag_mem_size = PAGE_ALIGN(g_fragments_size * MAX_FRAGMENTS);
76  
77         slot_mem = dmam_alloc_coherent(dev, slot_mem_size + frag_mem_size,
78                                        &slot_phys, GFP_KERNEL);
79 @@ -117,15 +124,15 @@ int vchiq_platform_init(struct platform_
80         vchiq_slot_zero->platform_data[VCHIQ_PLATFORM_FRAGMENTS_COUNT_IDX] =
81                 MAX_FRAGMENTS;
82  
83 -       g_fragments_base = (FRAGMENTS_T *)(slot_mem + slot_mem_size);
84 +       g_fragments_base = (char *)slot_mem + slot_mem_size;
85         slot_mem_size += frag_mem_size;
86  
87         g_free_fragments = g_fragments_base;
88         for (i = 0; i < (MAX_FRAGMENTS - 1); i++) {
89 -               *(FRAGMENTS_T **)&g_fragments_base[i] =
90 -                       &g_fragments_base[i + 1];
91 +               *(char **)&g_fragments_base[i*g_fragments_size] =
92 +                       &g_fragments_base[(i + 1)*g_fragments_size];
93         }
94 -       *(FRAGMENTS_T **)&g_fragments_base[i] = NULL;
95 +       *(char **)&g_fragments_base[i * g_fragments_size] = NULL;
96         sema_init(&g_free_fragments_sema, MAX_FRAGMENTS);
97  
98         if (vchiq_init_state(state, vchiq_slot_zero, 0) != VCHIQ_SUCCESS)
99 @@ -344,7 +351,7 @@ vchiq_doorbell_irq(int irq, void *dev_id
100  ** cached area.
101  
102  ** N.B. This implementation plays slightly fast and loose with the Linux
103 -** driver programming rules, e.g. its use of __virt_to_bus instead of
104 +** driver programming rules, e.g. its use of dmac_map_area instead of
105  ** dma_map_single, but it isn't a multi-platform driver and it benefits
106  ** from increased speed as a result.
107  */
108 @@ -355,7 +362,6 @@ create_pagelist(char __user *buf, size_t
109  {
110         PAGELIST_T *pagelist;
111         struct page **pages;
112 -       struct page *page;
113         unsigned long *addrs;
114         unsigned int num_pages, offset, i;
115         char *addr, *base_addr, *next_addr;
116 @@ -386,10 +392,25 @@ create_pagelist(char __user *buf, size_t
117         pages = (struct page **)(addrs + num_pages + 1);
118  
119         if (is_vmalloc_addr(buf)) {
120 -               for (actual_pages = 0; actual_pages < num_pages; actual_pages++) {
121 -                       pages[actual_pages] = vmalloc_to_page(buf + (actual_pages * PAGE_SIZE));
122 +               int dir = (type == PAGELIST_WRITE) ?
123 +                       DMA_TO_DEVICE : DMA_FROM_DEVICE;
124 +               unsigned long length = pagelist->length;
125 +               unsigned int offset = pagelist->offset;
126 +
127 +               for (actual_pages = 0; actual_pages < num_pages;
128 +                    actual_pages++) {
129 +                       struct page *pg = vmalloc_to_page(buf + (actual_pages *
130 +                                                                PAGE_SIZE));
131 +                       size_t bytes = PAGE_SIZE - offset;
132 +
133 +                       if (bytes > length)
134 +                               bytes = length;
135 +                       pages[actual_pages] = pg;
136 +                       dmac_map_area(page_address(pg) + offset, bytes, dir);
137 +                       length -= bytes;
138 +                       offset = 0;
139                 }
140 -                *need_release = 0; /* do not try and release vmalloc pages */
141 +               *need_release = 0; /* do not try and release vmalloc pages */
142         } else {
143                 down_read(&task->mm->mmap_sem);
144                 actual_pages = get_user_pages(task, task->mm,
145 @@ -418,7 +439,7 @@ create_pagelist(char __user *buf, size_t
146                                 actual_pages = -ENOMEM;
147                         return actual_pages;
148                 }
149 -                *need_release = 1; /* release user pages */
150 +               *need_release = 1; /* release user pages */
151         }
152  
153         pagelist->length = count;
154 @@ -451,10 +472,10 @@ create_pagelist(char __user *buf, size_t
155  
156         /* Partial cache lines (fragments) require special measures */
157         if ((type == PAGELIST_READ) &&
158 -               ((pagelist->offset & (CACHE_LINE_SIZE - 1)) ||
159 +               ((pagelist->offset & (g_cache_line_size - 1)) ||
160                 ((pagelist->offset + pagelist->length) &
161 -               (CACHE_LINE_SIZE - 1)))) {
162 -               FRAGMENTS_T *fragments;
163 +               (g_cache_line_size - 1)))) {
164 +               char *fragments;
165  
166                 if (down_interruptible(&g_free_fragments_sema) != 0) {
167                         kfree(pagelist);
168 @@ -464,19 +485,15 @@ create_pagelist(char __user *buf, size_t
169                 WARN_ON(g_free_fragments == NULL);
170  
171                 down(&g_free_fragments_mutex);
172 -               fragments = (FRAGMENTS_T *) g_free_fragments;
173 +               fragments = g_free_fragments;
174                 WARN_ON(fragments == NULL);
175 -               g_free_fragments = *(FRAGMENTS_T **) g_free_fragments;
176 +               g_free_fragments = *(char **) g_free_fragments;
177                 up(&g_free_fragments_mutex);
178 -               pagelist->type =
179 -                        PAGELIST_READ_WITH_FRAGMENTS + (fragments -
180 -                                                        g_fragments_base);
181 +               pagelist->type = PAGELIST_READ_WITH_FRAGMENTS +
182 +                       (fragments - g_fragments_base) / g_fragments_size;
183         }
184  
185 -       for (page = virt_to_page(pagelist);
186 -               page <= virt_to_page(addrs + num_pages - 1); page++) {
187 -               flush_dcache_page(page);
188 -       }
189 +       dmac_flush_range(pagelist, addrs + num_pages);
190  
191         *ppagelist = pagelist;
192  
193 @@ -502,13 +519,14 @@ free_pagelist(PAGELIST_T *pagelist, int
194  
195         /* Deal with any partial cache lines (fragments) */
196         if (pagelist->type >= PAGELIST_READ_WITH_FRAGMENTS) {
197 -               FRAGMENTS_T *fragments = g_fragments_base +
198 -                       (pagelist->type - PAGELIST_READ_WITH_FRAGMENTS);
199 +               char *fragments = g_fragments_base +
200 +                       (pagelist->type - PAGELIST_READ_WITH_FRAGMENTS) *
201 +                       g_fragments_size;
202                 int head_bytes, tail_bytes;
203 -               head_bytes = (CACHE_LINE_SIZE - pagelist->offset) &
204 -                       (CACHE_LINE_SIZE - 1);
205 +               head_bytes = (g_cache_line_size - pagelist->offset) &
206 +                       (g_cache_line_size - 1);
207                 tail_bytes = (pagelist->offset + actual) &
208 -                       (CACHE_LINE_SIZE - 1);
209 +                       (g_cache_line_size - 1);
210  
211                 if ((actual >= 0) && (head_bytes != 0)) {
212                         if (head_bytes > actual)
213 @@ -516,32 +534,46 @@ free_pagelist(PAGELIST_T *pagelist, int
214  
215                         memcpy((char *)page_address(pages[0]) +
216                                 pagelist->offset,
217 -                               fragments->headbuf,
218 +                               fragments,
219                                 head_bytes);
220                 }
221                 if ((actual >= 0) && (head_bytes < actual) &&
222                         (tail_bytes != 0)) {
223                         memcpy((char *)page_address(pages[num_pages - 1]) +
224                                 ((pagelist->offset + actual) &
225 -                               (PAGE_SIZE - 1) & ~(CACHE_LINE_SIZE - 1)),
226 -                               fragments->tailbuf, tail_bytes);
227 +                               (PAGE_SIZE - 1) & ~(g_cache_line_size - 1)),
228 +                               fragments + g_cache_line_size,
229 +                               tail_bytes);
230                 }
231  
232                 down(&g_free_fragments_mutex);
233 -               *(FRAGMENTS_T **) fragments = g_free_fragments;
234 +               *(char **)fragments = g_free_fragments;
235                 g_free_fragments = fragments;
236                 up(&g_free_fragments_mutex);
237                 up(&g_free_fragments_sema);
238         }
239  
240 -        if (*need_release) {
241 +       if (*need_release) {
242 +               unsigned int length = pagelist->length;
243 +               unsigned int offset = pagelist->offset;
244 +
245                 for (i = 0; i < num_pages; i++) {
246 -                       if (pagelist->type != PAGELIST_WRITE)
247 -                               set_page_dirty(pages[i]);
248 +                       struct page *pg = pages[i];
249  
250 -                       page_cache_release(pages[i]);
251 +                       if (pagelist->type != PAGELIST_WRITE) {
252 +                               unsigned int bytes = PAGE_SIZE - offset;
253 +
254 +                               if (bytes > length)
255 +                                       bytes = length;
256 +                               dmac_unmap_area(page_address(pg) + offset,
257 +                                               bytes, DMA_FROM_DEVICE);
258 +                               length -= bytes;
259 +                               offset = 0;
260 +                               set_page_dirty(pg);
261 +                       }
262 +                       page_cache_release(pg);
263                 }
264 -        }
265 +       }
266  
267         kfree(pagelist);
268  }