]>
Commit | Line | Data |
---|---|---|
463873d5 EA |
1 | /* |
2 | * Copyright © 2014 Broadcom | |
3 | * | |
4 | * Permission is hereby granted, free of charge, to any person obtaining a | |
5 | * copy of this software and associated documentation files (the "Software"), | |
6 | * to deal in the Software without restriction, including without limitation | |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
8 | * and/or sell copies of the Software, and to permit persons to whom the | |
9 | * Software is furnished to do so, subject to the following conditions: | |
10 | * | |
11 | * The above copyright notice and this permission notice (including the next | |
12 | * paragraph) shall be included in all copies or substantial portions of the | |
13 | * Software. | |
14 | * | |
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
21 | * IN THE SOFTWARE. | |
22 | */ | |
23 | ||
24 | /** | |
25 | * DOC: Shader validator for VC4. | |
26 | * | |
27 | * The VC4 has no IOMMU between it and system memory, so a user with | |
28 | * access to execute shaders could escalate privilege by overwriting | |
29 | * system memory (using the VPM write address register in the | |
30 | * general-purpose DMA mode) or reading system memory it shouldn't | |
31 | * (reading it as a texture, or uniform data, or vertex data). | |
32 | * | |
33 | * This walks over a shader BO, ensuring that its accesses are | |
34 | * appropriately bounded, and recording how many texture accesses are | |
35 | * made and where so that we can do relocations for them in the | |
36 | * uniform stream. | |
37 | */ | |
38 | ||
39 | #include "vc4_drv.h" | |
40 | #include "vc4_qpu_defines.h" | |
41 | ||
42 | struct vc4_shader_validation_state { | |
d0566c2a EA |
43 | /* Current IP being validated. */ |
44 | uint32_t ip; | |
45 | ||
46 | /* IP at the end of the BO, do not read shader[max_ip] */ | |
47 | uint32_t max_ip; | |
48 | ||
49 | uint64_t *shader; | |
50 | ||
463873d5 EA |
51 | struct vc4_texture_sample_info tmu_setup[2]; |
52 | int tmu_write_count[2]; | |
53 | ||
54 | /* For registers that were last written to by a MIN instruction with | |
55 | * one argument being a uniform, the address of the uniform. | |
56 | * Otherwise, ~0. | |
57 | * | |
58 | * This is used for the validation of direct address memory reads. | |
59 | */ | |
60 | uint32_t live_min_clamp_offsets[32 + 32 + 4]; | |
61 | bool live_max_clamp_regs[32 + 32 + 4]; | |
93aa9ae3 EA |
62 | |
63 | /* Bitfield of which IPs are used as branch targets. | |
64 | * | |
65 | * Used for validation that the uniform stream is updated at the right | |
66 | * points and clearing the texturing/clamping state. | |
67 | */ | |
68 | unsigned long *branch_targets; | |
463873d5 EA |
69 | }; |
70 | ||
71 | static uint32_t | |
72 | waddr_to_live_reg_index(uint32_t waddr, bool is_b) | |
73 | { | |
74 | if (waddr < 32) { | |
75 | if (is_b) | |
76 | return 32 + waddr; | |
77 | else | |
78 | return waddr; | |
79 | } else if (waddr <= QPU_W_ACC3) { | |
80 | return 64 + waddr - QPU_W_ACC0; | |
81 | } else { | |
82 | return ~0; | |
83 | } | |
84 | } | |
85 | ||
86 | static uint32_t | |
87 | raddr_add_a_to_live_reg_index(uint64_t inst) | |
88 | { | |
89 | uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); | |
90 | uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A); | |
91 | uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); | |
92 | uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); | |
93 | ||
94 | if (add_a == QPU_MUX_A) | |
95 | return raddr_a; | |
96 | else if (add_a == QPU_MUX_B && sig != QPU_SIG_SMALL_IMM) | |
97 | return 32 + raddr_b; | |
98 | else if (add_a <= QPU_MUX_R3) | |
99 | return 64 + add_a; | |
100 | else | |
101 | return ~0; | |
102 | } | |
103 | ||
104 | static bool | |
105 | is_tmu_submit(uint32_t waddr) | |
106 | { | |
107 | return (waddr == QPU_W_TMU0_S || | |
108 | waddr == QPU_W_TMU1_S); | |
109 | } | |
110 | ||
111 | static bool | |
112 | is_tmu_write(uint32_t waddr) | |
113 | { | |
114 | return (waddr >= QPU_W_TMU0_S && | |
115 | waddr <= QPU_W_TMU1_B); | |
116 | } | |
117 | ||
118 | static bool | |
119 | record_texture_sample(struct vc4_validated_shader_info *validated_shader, | |
120 | struct vc4_shader_validation_state *validation_state, | |
121 | int tmu) | |
122 | { | |
123 | uint32_t s = validated_shader->num_texture_samples; | |
124 | int i; | |
125 | struct vc4_texture_sample_info *temp_samples; | |
126 | ||
127 | temp_samples = krealloc(validated_shader->texture_samples, | |
128 | (s + 1) * sizeof(*temp_samples), | |
129 | GFP_KERNEL); | |
130 | if (!temp_samples) | |
131 | return false; | |
132 | ||
133 | memcpy(&temp_samples[s], | |
134 | &validation_state->tmu_setup[tmu], | |
135 | sizeof(*temp_samples)); | |
136 | ||
137 | validated_shader->num_texture_samples = s + 1; | |
138 | validated_shader->texture_samples = temp_samples; | |
139 | ||
140 | for (i = 0; i < 4; i++) | |
141 | validation_state->tmu_setup[tmu].p_offset[i] = ~0; | |
142 | ||
143 | return true; | |
144 | } | |
145 | ||
146 | static bool | |
d0566c2a | 147 | check_tmu_write(struct vc4_validated_shader_info *validated_shader, |
463873d5 EA |
148 | struct vc4_shader_validation_state *validation_state, |
149 | bool is_mul) | |
150 | { | |
d0566c2a | 151 | uint64_t inst = validation_state->shader[validation_state->ip]; |
463873d5 EA |
152 | uint32_t waddr = (is_mul ? |
153 | QPU_GET_FIELD(inst, QPU_WADDR_MUL) : | |
154 | QPU_GET_FIELD(inst, QPU_WADDR_ADD)); | |
155 | uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); | |
156 | uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); | |
157 | int tmu = waddr > QPU_W_TMU0_B; | |
158 | bool submit = is_tmu_submit(waddr); | |
159 | bool is_direct = submit && validation_state->tmu_write_count[tmu] == 0; | |
160 | uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); | |
161 | ||
162 | if (is_direct) { | |
163 | uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B); | |
164 | uint32_t clamp_reg, clamp_offset; | |
165 | ||
166 | if (sig == QPU_SIG_SMALL_IMM) { | |
167 | DRM_ERROR("direct TMU read used small immediate\n"); | |
168 | return false; | |
169 | } | |
170 | ||
171 | /* Make sure that this texture load is an add of the base | |
172 | * address of the UBO to a clamped offset within the UBO. | |
173 | */ | |
174 | if (is_mul || | |
175 | QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_ADD) { | |
176 | DRM_ERROR("direct TMU load wasn't an add\n"); | |
177 | return false; | |
178 | } | |
179 | ||
180 | /* We assert that the the clamped address is the first | |
181 | * argument, and the UBO base address is the second argument. | |
182 | * This is arbitrary, but simpler than supporting flipping the | |
183 | * two either way. | |
184 | */ | |
185 | clamp_reg = raddr_add_a_to_live_reg_index(inst); | |
186 | if (clamp_reg == ~0) { | |
187 | DRM_ERROR("direct TMU load wasn't clamped\n"); | |
188 | return false; | |
189 | } | |
190 | ||
191 | clamp_offset = validation_state->live_min_clamp_offsets[clamp_reg]; | |
192 | if (clamp_offset == ~0) { | |
193 | DRM_ERROR("direct TMU load wasn't clamped\n"); | |
194 | return false; | |
195 | } | |
196 | ||
197 | /* Store the clamp value's offset in p1 (see reloc_tex() in | |
198 | * vc4_validate.c). | |
199 | */ | |
200 | validation_state->tmu_setup[tmu].p_offset[1] = | |
201 | clamp_offset; | |
202 | ||
203 | if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) && | |
204 | !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF)) { | |
205 | DRM_ERROR("direct TMU load didn't add to a uniform\n"); | |
206 | return false; | |
207 | } | |
208 | ||
209 | validation_state->tmu_setup[tmu].is_direct = true; | |
210 | } else { | |
211 | if (raddr_a == QPU_R_UNIF || (sig != QPU_SIG_SMALL_IMM && | |
212 | raddr_b == QPU_R_UNIF)) { | |
213 | DRM_ERROR("uniform read in the same instruction as " | |
214 | "texture setup.\n"); | |
215 | return false; | |
216 | } | |
217 | } | |
218 | ||
219 | if (validation_state->tmu_write_count[tmu] >= 4) { | |
220 | DRM_ERROR("TMU%d got too many parameters before dispatch\n", | |
221 | tmu); | |
222 | return false; | |
223 | } | |
224 | validation_state->tmu_setup[tmu].p_offset[validation_state->tmu_write_count[tmu]] = | |
225 | validated_shader->uniforms_size; | |
226 | validation_state->tmu_write_count[tmu]++; | |
227 | /* Since direct uses a RADDR uniform reference, it will get counted in | |
228 | * check_instruction_reads() | |
229 | */ | |
230 | if (!is_direct) | |
231 | validated_shader->uniforms_size += 4; | |
232 | ||
233 | if (submit) { | |
234 | if (!record_texture_sample(validated_shader, | |
235 | validation_state, tmu)) { | |
236 | return false; | |
237 | } | |
238 | ||
239 | validation_state->tmu_write_count[tmu] = 0; | |
240 | } | |
241 | ||
242 | return true; | |
243 | } | |
244 | ||
245 | static bool | |
d0566c2a | 246 | check_reg_write(struct vc4_validated_shader_info *validated_shader, |
463873d5 EA |
247 | struct vc4_shader_validation_state *validation_state, |
248 | bool is_mul) | |
249 | { | |
d0566c2a | 250 | uint64_t inst = validation_state->shader[validation_state->ip]; |
463873d5 EA |
251 | uint32_t waddr = (is_mul ? |
252 | QPU_GET_FIELD(inst, QPU_WADDR_MUL) : | |
253 | QPU_GET_FIELD(inst, QPU_WADDR_ADD)); | |
254 | ||
255 | switch (waddr) { | |
256 | case QPU_W_UNIFORMS_ADDRESS: | |
257 | /* XXX: We'll probably need to support this for reladdr, but | |
258 | * it's definitely a security-related one. | |
259 | */ | |
260 | DRM_ERROR("uniforms address load unsupported\n"); | |
261 | return false; | |
262 | ||
263 | case QPU_W_TLB_COLOR_MS: | |
264 | case QPU_W_TLB_COLOR_ALL: | |
265 | case QPU_W_TLB_Z: | |
266 | /* These only interact with the tile buffer, not main memory, | |
267 | * so they're safe. | |
268 | */ | |
269 | return true; | |
270 | ||
271 | case QPU_W_TMU0_S: | |
272 | case QPU_W_TMU0_T: | |
273 | case QPU_W_TMU0_R: | |
274 | case QPU_W_TMU0_B: | |
275 | case QPU_W_TMU1_S: | |
276 | case QPU_W_TMU1_T: | |
277 | case QPU_W_TMU1_R: | |
278 | case QPU_W_TMU1_B: | |
d0566c2a | 279 | return check_tmu_write(validated_shader, validation_state, |
463873d5 EA |
280 | is_mul); |
281 | ||
282 | case QPU_W_HOST_INT: | |
283 | case QPU_W_TMU_NOSWAP: | |
284 | case QPU_W_TLB_ALPHA_MASK: | |
285 | case QPU_W_MUTEX_RELEASE: | |
286 | /* XXX: I haven't thought about these, so don't support them | |
287 | * for now. | |
288 | */ | |
289 | DRM_ERROR("Unsupported waddr %d\n", waddr); | |
290 | return false; | |
291 | ||
292 | case QPU_W_VPM_ADDR: | |
293 | DRM_ERROR("General VPM DMA unsupported\n"); | |
294 | return false; | |
295 | ||
296 | case QPU_W_VPM: | |
297 | case QPU_W_VPMVCD_SETUP: | |
298 | /* We allow VPM setup in general, even including VPM DMA | |
299 | * configuration setup, because the (unsafe) DMA can only be | |
300 | * triggered by QPU_W_VPM_ADDR writes. | |
301 | */ | |
302 | return true; | |
303 | ||
304 | case QPU_W_TLB_STENCIL_SETUP: | |
305 | return true; | |
306 | } | |
307 | ||
308 | return true; | |
309 | } | |
310 | ||
311 | static void | |
d0566c2a | 312 | track_live_clamps(struct vc4_validated_shader_info *validated_shader, |
463873d5 EA |
313 | struct vc4_shader_validation_state *validation_state) |
314 | { | |
d0566c2a | 315 | uint64_t inst = validation_state->shader[validation_state->ip]; |
463873d5 EA |
316 | uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD); |
317 | uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD); | |
318 | uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL); | |
319 | uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD); | |
320 | uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A); | |
321 | uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B); | |
322 | uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); | |
323 | uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); | |
324 | uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); | |
325 | bool ws = inst & QPU_WS; | |
326 | uint32_t lri_add_a, lri_add, lri_mul; | |
327 | bool add_a_is_min_0; | |
328 | ||
329 | /* Check whether OP_ADD's A argumennt comes from a live MAX(x, 0), | |
330 | * before we clear previous live state. | |
331 | */ | |
332 | lri_add_a = raddr_add_a_to_live_reg_index(inst); | |
333 | add_a_is_min_0 = (lri_add_a != ~0 && | |
334 | validation_state->live_max_clamp_regs[lri_add_a]); | |
335 | ||
336 | /* Clear live state for registers written by our instruction. */ | |
337 | lri_add = waddr_to_live_reg_index(waddr_add, ws); | |
338 | lri_mul = waddr_to_live_reg_index(waddr_mul, !ws); | |
339 | if (lri_mul != ~0) { | |
340 | validation_state->live_max_clamp_regs[lri_mul] = false; | |
341 | validation_state->live_min_clamp_offsets[lri_mul] = ~0; | |
342 | } | |
343 | if (lri_add != ~0) { | |
344 | validation_state->live_max_clamp_regs[lri_add] = false; | |
345 | validation_state->live_min_clamp_offsets[lri_add] = ~0; | |
346 | } else { | |
347 | /* Nothing further to do for live tracking, since only ADDs | |
348 | * generate new live clamp registers. | |
349 | */ | |
350 | return; | |
351 | } | |
352 | ||
353 | /* Now, handle remaining live clamp tracking for the ADD operation. */ | |
354 | ||
355 | if (cond_add != QPU_COND_ALWAYS) | |
356 | return; | |
357 | ||
358 | if (op_add == QPU_A_MAX) { | |
359 | /* Track live clamps of a value to a minimum of 0 (in either | |
360 | * arg). | |
361 | */ | |
362 | if (sig != QPU_SIG_SMALL_IMM || raddr_b != 0 || | |
363 | (add_a != QPU_MUX_B && add_b != QPU_MUX_B)) { | |
364 | return; | |
365 | } | |
366 | ||
367 | validation_state->live_max_clamp_regs[lri_add] = true; | |
368 | } else if (op_add == QPU_A_MIN) { | |
369 | /* Track live clamps of a value clamped to a minimum of 0 and | |
370 | * a maximum of some uniform's offset. | |
371 | */ | |
372 | if (!add_a_is_min_0) | |
373 | return; | |
374 | ||
375 | if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) && | |
376 | !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF && | |
377 | sig != QPU_SIG_SMALL_IMM)) { | |
378 | return; | |
379 | } | |
380 | ||
381 | validation_state->live_min_clamp_offsets[lri_add] = | |
382 | validated_shader->uniforms_size; | |
383 | } | |
384 | } | |
385 | ||
386 | static bool | |
d0566c2a | 387 | check_instruction_writes(struct vc4_validated_shader_info *validated_shader, |
463873d5 EA |
388 | struct vc4_shader_validation_state *validation_state) |
389 | { | |
d0566c2a | 390 | uint64_t inst = validation_state->shader[validation_state->ip]; |
463873d5 EA |
391 | uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD); |
392 | uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL); | |
393 | bool ok; | |
394 | ||
395 | if (is_tmu_write(waddr_add) && is_tmu_write(waddr_mul)) { | |
396 | DRM_ERROR("ADD and MUL both set up textures\n"); | |
397 | return false; | |
398 | } | |
399 | ||
d0566c2a EA |
400 | ok = (check_reg_write(validated_shader, validation_state, false) && |
401 | check_reg_write(validated_shader, validation_state, true)); | |
463873d5 | 402 | |
d0566c2a | 403 | track_live_clamps(validated_shader, validation_state); |
463873d5 EA |
404 | |
405 | return ok; | |
406 | } | |
407 | ||
408 | static bool | |
409 | check_instruction_reads(uint64_t inst, | |
410 | struct vc4_validated_shader_info *validated_shader) | |
411 | { | |
412 | uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); | |
413 | uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); | |
414 | uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); | |
415 | ||
416 | if (raddr_a == QPU_R_UNIF || | |
417 | (raddr_b == QPU_R_UNIF && sig != QPU_SIG_SMALL_IMM)) { | |
418 | /* This can't overflow the uint32_t, because we're reading 8 | |
419 | * bytes of instruction to increment by 4 here, so we'd | |
420 | * already be OOM. | |
421 | */ | |
422 | validated_shader->uniforms_size += 4; | |
423 | } | |
424 | ||
425 | return true; | |
426 | } | |
427 | ||
93aa9ae3 EA |
428 | /* Make sure that all branches are absolute and point within the shader, and |
429 | * note their targets for later. | |
430 | */ | |
431 | static bool | |
432 | vc4_validate_branches(struct vc4_shader_validation_state *validation_state) | |
433 | { | |
434 | uint32_t max_branch_target = 0; | |
435 | bool found_shader_end = false; | |
436 | int ip; | |
437 | int shader_end_ip = 0; | |
438 | int last_branch = -2; | |
439 | ||
440 | for (ip = 0; ip < validation_state->max_ip; ip++) { | |
441 | uint64_t inst = validation_state->shader[ip]; | |
442 | int32_t branch_imm = QPU_GET_FIELD(inst, QPU_BRANCH_TARGET); | |
443 | uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); | |
444 | uint32_t after_delay_ip = ip + 4; | |
445 | uint32_t branch_target_ip; | |
446 | ||
447 | if (sig == QPU_SIG_PROG_END) { | |
448 | shader_end_ip = ip; | |
449 | found_shader_end = true; | |
450 | continue; | |
451 | } | |
452 | ||
453 | if (sig != QPU_SIG_BRANCH) | |
454 | continue; | |
455 | ||
456 | if (ip - last_branch < 4) { | |
457 | DRM_ERROR("Branch at %d during delay slots\n", ip); | |
458 | return false; | |
459 | } | |
460 | last_branch = ip; | |
461 | ||
462 | if (inst & QPU_BRANCH_REG) { | |
463 | DRM_ERROR("branching from register relative " | |
464 | "not supported\n"); | |
465 | return false; | |
466 | } | |
467 | ||
468 | if (!(inst & QPU_BRANCH_REL)) { | |
469 | DRM_ERROR("relative branching required\n"); | |
470 | return false; | |
471 | } | |
472 | ||
473 | /* The actual branch target is the instruction after the delay | |
474 | * slots, plus whatever byte offset is in the low 32 bits of | |
475 | * the instruction. Make sure we're not branching beyond the | |
476 | * end of the shader object. | |
477 | */ | |
478 | if (branch_imm % sizeof(inst) != 0) { | |
479 | DRM_ERROR("branch target not aligned\n"); | |
480 | return false; | |
481 | } | |
482 | ||
483 | branch_target_ip = after_delay_ip + (branch_imm >> 3); | |
484 | if (branch_target_ip >= validation_state->max_ip) { | |
485 | DRM_ERROR("Branch at %d outside of shader (ip %d/%d)\n", | |
486 | ip, branch_target_ip, | |
487 | validation_state->max_ip); | |
488 | return false; | |
489 | } | |
490 | set_bit(branch_target_ip, validation_state->branch_targets); | |
491 | ||
492 | /* Make sure that the non-branching path is also not outside | |
493 | * the shader. | |
494 | */ | |
495 | if (after_delay_ip >= validation_state->max_ip) { | |
496 | DRM_ERROR("Branch at %d continues past shader end " | |
497 | "(%d/%d)\n", | |
498 | ip, after_delay_ip, validation_state->max_ip); | |
499 | return false; | |
500 | } | |
501 | set_bit(after_delay_ip, validation_state->branch_targets); | |
502 | max_branch_target = max(max_branch_target, after_delay_ip); | |
503 | ||
504 | /* There are two delay slots after program end is signaled | |
505 | * that are still executed, then we're finished. | |
506 | */ | |
507 | if (found_shader_end && ip == shader_end_ip + 2) | |
508 | break; | |
509 | } | |
510 | ||
511 | if (max_branch_target > shader_end_ip) { | |
512 | DRM_ERROR("Branch landed after QPU_SIG_PROG_END"); | |
513 | return false; | |
514 | } | |
515 | ||
516 | return true; | |
517 | } | |
518 | ||
463873d5 EA |
519 | struct vc4_validated_shader_info * |
520 | vc4_validate_shader(struct drm_gem_cma_object *shader_obj) | |
521 | { | |
522 | bool found_shader_end = false; | |
523 | int shader_end_ip = 0; | |
d0566c2a | 524 | uint32_t ip; |
93aa9ae3 | 525 | struct vc4_validated_shader_info *validated_shader = NULL; |
463873d5 EA |
526 | struct vc4_shader_validation_state validation_state; |
527 | int i; | |
528 | ||
529 | memset(&validation_state, 0, sizeof(validation_state)); | |
d0566c2a EA |
530 | validation_state.shader = shader_obj->vaddr; |
531 | validation_state.max_ip = shader_obj->base.size / sizeof(uint64_t); | |
463873d5 EA |
532 | |
533 | for (i = 0; i < 8; i++) | |
534 | validation_state.tmu_setup[i / 4].p_offset[i % 4] = ~0; | |
535 | for (i = 0; i < ARRAY_SIZE(validation_state.live_min_clamp_offsets); i++) | |
536 | validation_state.live_min_clamp_offsets[i] = ~0; | |
537 | ||
93aa9ae3 EA |
538 | validation_state.branch_targets = |
539 | kcalloc(BITS_TO_LONGS(validation_state.max_ip), | |
540 | sizeof(unsigned long), GFP_KERNEL); | |
541 | if (!validation_state.branch_targets) | |
542 | goto fail; | |
543 | ||
463873d5 EA |
544 | validated_shader = kcalloc(1, sizeof(*validated_shader), GFP_KERNEL); |
545 | if (!validated_shader) | |
93aa9ae3 EA |
546 | goto fail; |
547 | ||
548 | if (!vc4_validate_branches(&validation_state)) | |
549 | goto fail; | |
463873d5 | 550 | |
d0566c2a EA |
551 | for (ip = 0; ip < validation_state.max_ip; ip++) { |
552 | uint64_t inst = validation_state.shader[ip]; | |
463873d5 EA |
553 | uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); |
554 | ||
d0566c2a EA |
555 | validation_state.ip = ip; |
556 | ||
463873d5 EA |
557 | switch (sig) { |
558 | case QPU_SIG_NONE: | |
559 | case QPU_SIG_WAIT_FOR_SCOREBOARD: | |
560 | case QPU_SIG_SCOREBOARD_UNLOCK: | |
561 | case QPU_SIG_COLOR_LOAD: | |
562 | case QPU_SIG_LOAD_TMU0: | |
563 | case QPU_SIG_LOAD_TMU1: | |
564 | case QPU_SIG_PROG_END: | |
565 | case QPU_SIG_SMALL_IMM: | |
d0566c2a | 566 | if (!check_instruction_writes(validated_shader, |
463873d5 EA |
567 | &validation_state)) { |
568 | DRM_ERROR("Bad write at ip %d\n", ip); | |
569 | goto fail; | |
570 | } | |
571 | ||
572 | if (!check_instruction_reads(inst, validated_shader)) | |
573 | goto fail; | |
574 | ||
575 | if (sig == QPU_SIG_PROG_END) { | |
576 | found_shader_end = true; | |
577 | shader_end_ip = ip; | |
578 | } | |
579 | ||
580 | break; | |
581 | ||
582 | case QPU_SIG_LOAD_IMM: | |
d0566c2a | 583 | if (!check_instruction_writes(validated_shader, |
463873d5 EA |
584 | &validation_state)) { |
585 | DRM_ERROR("Bad LOAD_IMM write at ip %d\n", ip); | |
586 | goto fail; | |
587 | } | |
588 | break; | |
589 | ||
590 | default: | |
591 | DRM_ERROR("Unsupported QPU signal %d at " | |
592 | "instruction %d\n", sig, ip); | |
593 | goto fail; | |
594 | } | |
595 | ||
596 | /* There are two delay slots after program end is signaled | |
597 | * that are still executed, then we're finished. | |
598 | */ | |
599 | if (found_shader_end && ip == shader_end_ip + 2) | |
600 | break; | |
601 | } | |
602 | ||
d0566c2a | 603 | if (ip == validation_state.max_ip) { |
463873d5 EA |
604 | DRM_ERROR("shader failed to terminate before " |
605 | "shader BO end at %zd\n", | |
606 | shader_obj->base.size); | |
607 | goto fail; | |
608 | } | |
609 | ||
610 | /* Again, no chance of integer overflow here because the worst case | |
611 | * scenario is 8 bytes of uniforms plus handles per 8-byte | |
612 | * instruction. | |
613 | */ | |
614 | validated_shader->uniforms_src_size = | |
615 | (validated_shader->uniforms_size + | |
616 | 4 * validated_shader->num_texture_samples); | |
617 | ||
93aa9ae3 EA |
618 | kfree(validation_state.branch_targets); |
619 | ||
463873d5 EA |
620 | return validated_shader; |
621 | ||
622 | fail: | |
93aa9ae3 | 623 | kfree(validation_state.branch_targets); |
463873d5 EA |
624 | if (validated_shader) { |
625 | kfree(validated_shader->texture_samples); | |
626 | kfree(validated_shader); | |
627 | } | |
628 | return NULL; | |
629 | } |