brcm2708: update linux 4.4 patches to latest version
[openwrt.git] / target / linux / brcm2708 / patches-4.4 / 0114-drm-vc4-Add-an-interface-for-capturing-the-GPU-state.patch
1 From 1c343ed1f86866d2625518774536bbef77f7a20a Mon Sep 17 00:00:00 2001
2 From: Eric Anholt <eric@anholt.net>
3 Date: Fri, 30 Oct 2015 10:09:02 -0700
4 Subject: [PATCH 114/170] drm/vc4: Add an interface for capturing the GPU state
5  after a hang.
6
7 This can be parsed with vc4-gpu-tools tools for trying to figure out
8 what was going on.
9
10 Signed-off-by: Eric Anholt <eric@anholt.net>
11 ---
12  drivers/gpu/drm/vc4/vc4_bo.c  |   4 +-
13  drivers/gpu/drm/vc4/vc4_drv.c |   1 +
14  drivers/gpu/drm/vc4/vc4_drv.h |   4 +
15  drivers/gpu/drm/vc4/vc4_gem.c | 185 ++++++++++++++++++++++++++++++++++++++++++
16  include/uapi/drm/vc4_drm.h    |  45 ++++++++++
17  5 files changed, 237 insertions(+), 2 deletions(-)
18
19 --- a/drivers/gpu/drm/vc4/vc4_bo.c
20 +++ b/drivers/gpu/drm/vc4/vc4_bo.c
21 @@ -415,8 +415,8 @@ int vc4_mmap(struct file *filp, struct v
22         gem_obj = vma->vm_private_data;
23         bo = to_vc4_bo(gem_obj);
24  
25 -       if (bo->validated_shader) {
26 -               DRM_ERROR("mmaping of shader BOs not allowed.\n");
27 +       if (bo->validated_shader && (vma->vm_flags & VM_WRITE)) {
28 +               DRM_ERROR("mmaping of shader BOs for writing not allowed.\n");
29                 return -EINVAL;
30         }
31  
32 --- a/drivers/gpu/drm/vc4/vc4_drv.c
33 +++ b/drivers/gpu/drm/vc4/vc4_drv.c
34 @@ -81,6 +81,7 @@ static const struct drm_ioctl_desc vc4_d
35         DRM_IOCTL_DEF_DRV(VC4_CREATE_BO, vc4_create_bo_ioctl, 0),
36         DRM_IOCTL_DEF_DRV(VC4_MMAP_BO, vc4_mmap_bo_ioctl, 0),
37         DRM_IOCTL_DEF_DRV(VC4_CREATE_SHADER_BO, vc4_create_shader_bo_ioctl, 0),
38 +       DRM_IOCTL_DEF_DRV(VC4_GET_HANG_STATE, vc4_get_hang_state_ioctl, DRM_ROOT_ONLY),
39  };
40  
41  static struct drm_driver vc4_drm_driver = {
42 --- a/drivers/gpu/drm/vc4/vc4_drv.h
43 +++ b/drivers/gpu/drm/vc4/vc4_drv.h
44 @@ -20,6 +20,8 @@ struct vc4_dev {
45         struct drm_fbdev_cma *fbdev;
46         struct rpi_firmware *firmware;
47  
48 +       struct vc4_hang_state *hang_state;
49 +
50         /* The kernel-space BO cache.  Tracks buffers that have been
51          * unreferenced by all other users (refcounts of 0!) but not
52          * yet freed, so we can do cheap allocations.
53 @@ -366,6 +368,8 @@ int vc4_create_shader_bo_ioctl(struct dr
54                                struct drm_file *file_priv);
55  int vc4_mmap_bo_ioctl(struct drm_device *dev, void *data,
56                       struct drm_file *file_priv);
57 +int vc4_get_hang_state_ioctl(struct drm_device *dev, void *data,
58 +                            struct drm_file *file_priv);
59  int vc4_mmap(struct file *filp, struct vm_area_struct *vma);
60  int vc4_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
61  void *vc4_prime_vmap(struct drm_gem_object *obj);
62 --- a/drivers/gpu/drm/vc4/vc4_gem.c
63 +++ b/drivers/gpu/drm/vc4/vc4_gem.c
64 @@ -40,6 +40,186 @@ vc4_queue_hangcheck(struct drm_device *d
65                   round_jiffies_up(jiffies + msecs_to_jiffies(100)));
66  }
67  
68 +struct vc4_hang_state {
69 +       struct drm_vc4_get_hang_state user_state;
70 +
71 +       u32 bo_count;
72 +       struct drm_gem_object **bo;
73 +};
74 +
75 +static void
76 +vc4_free_hang_state(struct drm_device *dev, struct vc4_hang_state *state)
77 +{
78 +       unsigned int i;
79 +
80 +       mutex_lock(&dev->struct_mutex);
81 +       for (i = 0; i < state->user_state.bo_count; i++) {
82 +               drm_gem_object_unreference(state->bo[i]);
83 +       }
84 +       mutex_unlock(&dev->struct_mutex);
85 +
86 +       kfree(state);
87 +}
88 +
89 +int
90 +vc4_get_hang_state_ioctl(struct drm_device *dev, void *data,
91 +                        struct drm_file *file_priv)
92 +{
93 +       struct drm_vc4_get_hang_state *get_state = data;
94 +       struct drm_vc4_get_hang_state_bo *bo_state;
95 +       struct vc4_hang_state *kernel_state;
96 +       struct drm_vc4_get_hang_state *state;
97 +       struct vc4_dev *vc4 = to_vc4_dev(dev);
98 +       unsigned long irqflags;
99 +       u32 i;
100 +       int ret;
101 +
102 +       spin_lock_irqsave(&vc4->job_lock, irqflags);
103 +       kernel_state = vc4->hang_state;
104 +       if (!kernel_state) {
105 +               spin_unlock_irqrestore(&vc4->job_lock, irqflags);
106 +               return -ENOENT;
107 +       }
108 +       state = &kernel_state->user_state;
109 +
110 +       /* If the user's array isn't big enough, just return the
111 +        * required array size.
112 +        */
113 +       if (get_state->bo_count < state->bo_count) {
114 +               get_state->bo_count = state->bo_count;
115 +               spin_unlock_irqrestore(&vc4->job_lock, irqflags);
116 +               return 0;
117 +       }
118 +
119 +       vc4->hang_state = NULL;
120 +       spin_unlock_irqrestore(&vc4->job_lock, irqflags);
121 +
122 +       /* Save the user's BO pointer, so we don't stomp it with the memcpy. */
123 +       state->bo = get_state->bo;
124 +       memcpy(get_state, state, sizeof(*state));
125 +
126 +       bo_state = kcalloc(state->bo_count, sizeof(*bo_state), GFP_KERNEL);
127 +       if (!bo_state) {
128 +               ret = -ENOMEM;
129 +               goto err_free;
130 +       }
131 +
132 +       for (i = 0; i < state->bo_count; i++) {
133 +               struct vc4_bo *vc4_bo = to_vc4_bo(kernel_state->bo[i]);
134 +               u32 handle;
135 +               ret = drm_gem_handle_create(file_priv, kernel_state->bo[i],
136 +                                           &handle);
137 +
138 +               if (ret) {
139 +                       state->bo_count = i - 1;
140 +                       goto err;
141 +               }
142 +               bo_state[i].handle = handle;
143 +               bo_state[i].paddr = vc4_bo->base.paddr;
144 +               bo_state[i].size = vc4_bo->base.base.size;
145 +       }
146 +
147 +       ret = copy_to_user((void __user *)(uintptr_t)get_state->bo,
148 +                          bo_state,
149 +                          state->bo_count * sizeof(*bo_state));
150 +       kfree(bo_state);
151 +
152 + err_free:
153 +
154 +       vc4_free_hang_state(dev, kernel_state);
155 +
156 +err:
157 +       return ret;
158 +}
159 +
160 +static void
161 +vc4_save_hang_state(struct drm_device *dev)
162 +{
163 +       struct vc4_dev *vc4 = to_vc4_dev(dev);
164 +       struct drm_vc4_get_hang_state *state;
165 +       struct vc4_hang_state *kernel_state;
166 +       struct vc4_exec_info *exec;
167 +       struct vc4_bo *bo;
168 +       unsigned long irqflags;
169 +       unsigned int i, unref_list_count;
170 +
171 +       kernel_state = kcalloc(1, sizeof(*state), GFP_KERNEL);
172 +       if (!kernel_state)
173 +               return;
174 +
175 +       state = &kernel_state->user_state;
176 +
177 +       spin_lock_irqsave(&vc4->job_lock, irqflags);
178 +       exec = vc4_first_job(vc4);
179 +       if (!exec) {
180 +               spin_unlock_irqrestore(&vc4->job_lock, irqflags);
181 +               return;
182 +       }
183 +
184 +       unref_list_count = 0;
185 +       list_for_each_entry(bo, &exec->unref_list, unref_head)
186 +               unref_list_count++;
187 +
188 +       state->bo_count = exec->bo_count + unref_list_count;
189 +       kernel_state->bo = kcalloc(state->bo_count, sizeof(*kernel_state->bo),
190 +                                  GFP_ATOMIC);
191 +       if (!kernel_state->bo) {
192 +               spin_unlock_irqrestore(&vc4->job_lock, irqflags);
193 +               return;
194 +       }
195 +
196 +       for (i = 0; i < exec->bo_count; i++) {
197 +               drm_gem_object_reference(&exec->bo[i].bo->base);
198 +               kernel_state->bo[i] = &exec->bo[i].bo->base;
199 +       }
200 +
201 +       list_for_each_entry(bo, &exec->unref_list, unref_head) {
202 +               drm_gem_object_reference(&bo->base.base);
203 +               kernel_state->bo[i] = &bo->base.base;
204 +               i++;
205 +       }
206 +
207 +       state->start_bin = exec->ct0ca;
208 +       state->start_render = exec->ct1ca;
209 +
210 +       spin_unlock_irqrestore(&vc4->job_lock, irqflags);
211 +
212 +       state->ct0ca = V3D_READ(V3D_CTNCA(0));
213 +       state->ct0ea = V3D_READ(V3D_CTNEA(0));
214 +
215 +       state->ct1ca = V3D_READ(V3D_CTNCA(1));
216 +       state->ct1ea = V3D_READ(V3D_CTNEA(1));
217 +
218 +       state->ct0cs = V3D_READ(V3D_CTNCS(0));
219 +       state->ct1cs = V3D_READ(V3D_CTNCS(1));
220 +
221 +       state->ct0ra0 = V3D_READ(V3D_CT00RA0);
222 +       state->ct1ra0 = V3D_READ(V3D_CT01RA0);
223 +
224 +       state->bpca = V3D_READ(V3D_BPCA);
225 +       state->bpcs = V3D_READ(V3D_BPCS);
226 +       state->bpoa = V3D_READ(V3D_BPOA);
227 +       state->bpos = V3D_READ(V3D_BPOS);
228 +
229 +       state->vpmbase = V3D_READ(V3D_VPMBASE);
230 +
231 +       state->dbge = V3D_READ(V3D_DBGE);
232 +       state->fdbgo = V3D_READ(V3D_FDBGO);
233 +       state->fdbgb = V3D_READ(V3D_FDBGB);
234 +       state->fdbgr = V3D_READ(V3D_FDBGR);
235 +       state->fdbgs = V3D_READ(V3D_FDBGS);
236 +       state->errstat = V3D_READ(V3D_ERRSTAT);
237 +
238 +       spin_lock_irqsave(&vc4->job_lock, irqflags);
239 +       if (vc4->hang_state) {
240 +               spin_unlock_irqrestore(&vc4->job_lock, irqflags);
241 +               vc4_free_hang_state(dev, kernel_state);
242 +       } else {
243 +               vc4->hang_state = kernel_state;
244 +               spin_unlock_irqrestore(&vc4->job_lock, irqflags);
245 +       }
246 +}
247 +
248  static void
249  vc4_reset(struct drm_device *dev)
250  {
251 @@ -64,6 +244,8 @@ vc4_reset_work(struct work_struct *work)
252         struct vc4_dev *vc4 =
253                 container_of(work, struct vc4_dev, hangcheck.reset_work);
254  
255 +       vc4_save_hang_state(vc4->dev);
256 +
257         vc4_reset(vc4->dev);
258  }
259  
260 @@ -673,4 +855,7 @@ vc4_gem_destroy(struct drm_device *dev)
261         }
262  
263         vc4_bo_cache_destroy(dev);
264 +
265 +       if (vc4->hang_state)
266 +               vc4_free_hang_state(dev, vc4->hang_state);
267  }
268 --- a/include/uapi/drm/vc4_drm.h
269 +++ b/include/uapi/drm/vc4_drm.h
270 @@ -32,6 +32,7 @@
271  #define DRM_VC4_CREATE_BO                         0x03
272  #define DRM_VC4_MMAP_BO                           0x04
273  #define DRM_VC4_CREATE_SHADER_BO                  0x05
274 +#define DRM_VC4_GET_HANG_STATE                    0x06
275  
276  #define DRM_IOCTL_VC4_SUBMIT_CL           DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_SUBMIT_CL, struct drm_vc4_submit_cl)
277  #define DRM_IOCTL_VC4_WAIT_SEQNO          DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_WAIT_SEQNO, struct drm_vc4_wait_seqno)
278 @@ -39,6 +40,7 @@
279  #define DRM_IOCTL_VC4_CREATE_BO           DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_CREATE_BO, struct drm_vc4_create_bo)
280  #define DRM_IOCTL_VC4_MMAP_BO             DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_MMAP_BO, struct drm_vc4_mmap_bo)
281  #define DRM_IOCTL_VC4_CREATE_SHADER_BO    DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_CREATE_SHADER_BO, struct drm_vc4_create_shader_bo)
282 +#define DRM_IOCTL_VC4_GET_HANG_STATE      DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_GET_HANG_STATE, struct drm_vc4_get_hang_state)
283  
284  struct drm_vc4_submit_rcl_surface {
285         uint32_t hindex; /* Handle index, or ~0 if not present. */
286 @@ -226,4 +228,47 @@ struct drm_vc4_mmap_bo {
287         uint64_t offset;
288  };
289  
290 +struct drm_vc4_get_hang_state_bo {
291 +       uint32_t handle;
292 +       uint32_t paddr;
293 +       uint32_t size;
294 +       uint32_t pad;
295 +};
296 +
297 +/**
298 + * struct drm_vc4_hang_state - ioctl argument for collecting state
299 + * from a GPU hang for analysis.
300 +*/
301 +struct drm_vc4_get_hang_state {
302 +       /** Pointer to array of struct drm_vc4_get_hang_state_bo. */
303 +       uint64_t bo;
304 +       /**
305 +        * On input, the size of the bo array.  Output is the number
306 +        * of bos to be returned.
307 +        */
308 +       uint32_t bo_count;
309 +
310 +       uint32_t start_bin, start_render;
311 +
312 +       uint32_t ct0ca, ct0ea;
313 +       uint32_t ct1ca, ct1ea;
314 +       uint32_t ct0cs, ct1cs;
315 +       uint32_t ct0ra0, ct1ra0;
316 +
317 +       uint32_t bpca, bpcs;
318 +       uint32_t bpoa, bpos;
319 +
320 +       uint32_t vpmbase;
321 +
322 +       uint32_t dbge;
323 +       uint32_t fdbgo;
324 +       uint32_t fdbgb;
325 +       uint32_t fdbgr;
326 +       uint32_t fdbgs;
327 +       uint32_t errstat;
328 +
329 +       /* Pad that we may save more registers into in the future. */
330 +       uint32_t pad[16];
331 +};
332 +
333  #endif /* _UAPI_VC4_DRM_H_ */