]>
Commit | Line | Data |
---|---|---|
463873d5 EA |
1 | /* |
2 | * Copyright © 2014 Broadcom | |
3 | * | |
4 | * Permission is hereby granted, free of charge, to any person obtaining a | |
5 | * copy of this software and associated documentation files (the "Software"), | |
6 | * to deal in the Software without restriction, including without limitation | |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
8 | * and/or sell copies of the Software, and to permit persons to whom the | |
9 | * Software is furnished to do so, subject to the following conditions: | |
10 | * | |
11 | * The above copyright notice and this permission notice (including the next | |
12 | * paragraph) shall be included in all copies or substantial portions of the | |
13 | * Software. | |
14 | * | |
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
21 | * IN THE SOFTWARE. | |
22 | */ | |
23 | ||
24 | /** | |
25 | * DOC: Shader validator for VC4. | |
26 | * | |
27 | * The VC4 has no IOMMU between it and system memory, so a user with | |
28 | * access to execute shaders could escalate privilege by overwriting | |
29 | * system memory (using the VPM write address register in the | |
30 | * general-purpose DMA mode) or reading system memory it shouldn't | |
31 | * (reading it as a texture, or uniform data, or vertex data). | |
32 | * | |
33 | * This walks over a shader BO, ensuring that its accesses are | |
34 | * appropriately bounded, and recording how many texture accesses are | |
35 | * made and where so that we can do relocations for them in the | |
36 | * uniform stream. | |
37 | */ | |
38 | ||
39 | #include "vc4_drv.h" | |
40 | #include "vc4_qpu_defines.h" | |
41 | ||
42 | struct vc4_shader_validation_state { | |
d0566c2a EA |
43 | /* Current IP being validated. */ |
44 | uint32_t ip; | |
45 | ||
46 | /* IP at the end of the BO, do not read shader[max_ip] */ | |
47 | uint32_t max_ip; | |
48 | ||
49 | uint64_t *shader; | |
50 | ||
463873d5 EA |
51 | struct vc4_texture_sample_info tmu_setup[2]; |
52 | int tmu_write_count[2]; | |
53 | ||
54 | /* For registers that were last written to by a MIN instruction with | |
55 | * one argument being a uniform, the address of the uniform. | |
56 | * Otherwise, ~0. | |
57 | * | |
58 | * This is used for the validation of direct address memory reads. | |
59 | */ | |
60 | uint32_t live_min_clamp_offsets[32 + 32 + 4]; | |
61 | bool live_max_clamp_regs[32 + 32 + 4]; | |
62 | }; | |
63 | ||
64 | static uint32_t | |
65 | waddr_to_live_reg_index(uint32_t waddr, bool is_b) | |
66 | { | |
67 | if (waddr < 32) { | |
68 | if (is_b) | |
69 | return 32 + waddr; | |
70 | else | |
71 | return waddr; | |
72 | } else if (waddr <= QPU_W_ACC3) { | |
73 | return 64 + waddr - QPU_W_ACC0; | |
74 | } else { | |
75 | return ~0; | |
76 | } | |
77 | } | |
78 | ||
79 | static uint32_t | |
80 | raddr_add_a_to_live_reg_index(uint64_t inst) | |
81 | { | |
82 | uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); | |
83 | uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A); | |
84 | uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); | |
85 | uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); | |
86 | ||
87 | if (add_a == QPU_MUX_A) | |
88 | return raddr_a; | |
89 | else if (add_a == QPU_MUX_B && sig != QPU_SIG_SMALL_IMM) | |
90 | return 32 + raddr_b; | |
91 | else if (add_a <= QPU_MUX_R3) | |
92 | return 64 + add_a; | |
93 | else | |
94 | return ~0; | |
95 | } | |
96 | ||
97 | static bool | |
98 | is_tmu_submit(uint32_t waddr) | |
99 | { | |
100 | return (waddr == QPU_W_TMU0_S || | |
101 | waddr == QPU_W_TMU1_S); | |
102 | } | |
103 | ||
104 | static bool | |
105 | is_tmu_write(uint32_t waddr) | |
106 | { | |
107 | return (waddr >= QPU_W_TMU0_S && | |
108 | waddr <= QPU_W_TMU1_B); | |
109 | } | |
110 | ||
111 | static bool | |
112 | record_texture_sample(struct vc4_validated_shader_info *validated_shader, | |
113 | struct vc4_shader_validation_state *validation_state, | |
114 | int tmu) | |
115 | { | |
116 | uint32_t s = validated_shader->num_texture_samples; | |
117 | int i; | |
118 | struct vc4_texture_sample_info *temp_samples; | |
119 | ||
120 | temp_samples = krealloc(validated_shader->texture_samples, | |
121 | (s + 1) * sizeof(*temp_samples), | |
122 | GFP_KERNEL); | |
123 | if (!temp_samples) | |
124 | return false; | |
125 | ||
126 | memcpy(&temp_samples[s], | |
127 | &validation_state->tmu_setup[tmu], | |
128 | sizeof(*temp_samples)); | |
129 | ||
130 | validated_shader->num_texture_samples = s + 1; | |
131 | validated_shader->texture_samples = temp_samples; | |
132 | ||
133 | for (i = 0; i < 4; i++) | |
134 | validation_state->tmu_setup[tmu].p_offset[i] = ~0; | |
135 | ||
136 | return true; | |
137 | } | |
138 | ||
139 | static bool | |
d0566c2a | 140 | check_tmu_write(struct vc4_validated_shader_info *validated_shader, |
463873d5 EA |
141 | struct vc4_shader_validation_state *validation_state, |
142 | bool is_mul) | |
143 | { | |
d0566c2a | 144 | uint64_t inst = validation_state->shader[validation_state->ip]; |
463873d5 EA |
145 | uint32_t waddr = (is_mul ? |
146 | QPU_GET_FIELD(inst, QPU_WADDR_MUL) : | |
147 | QPU_GET_FIELD(inst, QPU_WADDR_ADD)); | |
148 | uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); | |
149 | uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); | |
150 | int tmu = waddr > QPU_W_TMU0_B; | |
151 | bool submit = is_tmu_submit(waddr); | |
152 | bool is_direct = submit && validation_state->tmu_write_count[tmu] == 0; | |
153 | uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); | |
154 | ||
155 | if (is_direct) { | |
156 | uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B); | |
157 | uint32_t clamp_reg, clamp_offset; | |
158 | ||
159 | if (sig == QPU_SIG_SMALL_IMM) { | |
160 | DRM_ERROR("direct TMU read used small immediate\n"); | |
161 | return false; | |
162 | } | |
163 | ||
164 | /* Make sure that this texture load is an add of the base | |
165 | * address of the UBO to a clamped offset within the UBO. | |
166 | */ | |
167 | if (is_mul || | |
168 | QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_ADD) { | |
169 | DRM_ERROR("direct TMU load wasn't an add\n"); | |
170 | return false; | |
171 | } | |
172 | ||
173 | /* We assert that the the clamped address is the first | |
174 | * argument, and the UBO base address is the second argument. | |
175 | * This is arbitrary, but simpler than supporting flipping the | |
176 | * two either way. | |
177 | */ | |
178 | clamp_reg = raddr_add_a_to_live_reg_index(inst); | |
179 | if (clamp_reg == ~0) { | |
180 | DRM_ERROR("direct TMU load wasn't clamped\n"); | |
181 | return false; | |
182 | } | |
183 | ||
184 | clamp_offset = validation_state->live_min_clamp_offsets[clamp_reg]; | |
185 | if (clamp_offset == ~0) { | |
186 | DRM_ERROR("direct TMU load wasn't clamped\n"); | |
187 | return false; | |
188 | } | |
189 | ||
190 | /* Store the clamp value's offset in p1 (see reloc_tex() in | |
191 | * vc4_validate.c). | |
192 | */ | |
193 | validation_state->tmu_setup[tmu].p_offset[1] = | |
194 | clamp_offset; | |
195 | ||
196 | if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) && | |
197 | !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF)) { | |
198 | DRM_ERROR("direct TMU load didn't add to a uniform\n"); | |
199 | return false; | |
200 | } | |
201 | ||
202 | validation_state->tmu_setup[tmu].is_direct = true; | |
203 | } else { | |
204 | if (raddr_a == QPU_R_UNIF || (sig != QPU_SIG_SMALL_IMM && | |
205 | raddr_b == QPU_R_UNIF)) { | |
206 | DRM_ERROR("uniform read in the same instruction as " | |
207 | "texture setup.\n"); | |
208 | return false; | |
209 | } | |
210 | } | |
211 | ||
212 | if (validation_state->tmu_write_count[tmu] >= 4) { | |
213 | DRM_ERROR("TMU%d got too many parameters before dispatch\n", | |
214 | tmu); | |
215 | return false; | |
216 | } | |
217 | validation_state->tmu_setup[tmu].p_offset[validation_state->tmu_write_count[tmu]] = | |
218 | validated_shader->uniforms_size; | |
219 | validation_state->tmu_write_count[tmu]++; | |
220 | /* Since direct uses a RADDR uniform reference, it will get counted in | |
221 | * check_instruction_reads() | |
222 | */ | |
223 | if (!is_direct) | |
224 | validated_shader->uniforms_size += 4; | |
225 | ||
226 | if (submit) { | |
227 | if (!record_texture_sample(validated_shader, | |
228 | validation_state, tmu)) { | |
229 | return false; | |
230 | } | |
231 | ||
232 | validation_state->tmu_write_count[tmu] = 0; | |
233 | } | |
234 | ||
235 | return true; | |
236 | } | |
237 | ||
238 | static bool | |
d0566c2a | 239 | check_reg_write(struct vc4_validated_shader_info *validated_shader, |
463873d5 EA |
240 | struct vc4_shader_validation_state *validation_state, |
241 | bool is_mul) | |
242 | { | |
d0566c2a | 243 | uint64_t inst = validation_state->shader[validation_state->ip]; |
463873d5 EA |
244 | uint32_t waddr = (is_mul ? |
245 | QPU_GET_FIELD(inst, QPU_WADDR_MUL) : | |
246 | QPU_GET_FIELD(inst, QPU_WADDR_ADD)); | |
247 | ||
248 | switch (waddr) { | |
249 | case QPU_W_UNIFORMS_ADDRESS: | |
250 | /* XXX: We'll probably need to support this for reladdr, but | |
251 | * it's definitely a security-related one. | |
252 | */ | |
253 | DRM_ERROR("uniforms address load unsupported\n"); | |
254 | return false; | |
255 | ||
256 | case QPU_W_TLB_COLOR_MS: | |
257 | case QPU_W_TLB_COLOR_ALL: | |
258 | case QPU_W_TLB_Z: | |
259 | /* These only interact with the tile buffer, not main memory, | |
260 | * so they're safe. | |
261 | */ | |
262 | return true; | |
263 | ||
264 | case QPU_W_TMU0_S: | |
265 | case QPU_W_TMU0_T: | |
266 | case QPU_W_TMU0_R: | |
267 | case QPU_W_TMU0_B: | |
268 | case QPU_W_TMU1_S: | |
269 | case QPU_W_TMU1_T: | |
270 | case QPU_W_TMU1_R: | |
271 | case QPU_W_TMU1_B: | |
d0566c2a | 272 | return check_tmu_write(validated_shader, validation_state, |
463873d5 EA |
273 | is_mul); |
274 | ||
275 | case QPU_W_HOST_INT: | |
276 | case QPU_W_TMU_NOSWAP: | |
277 | case QPU_W_TLB_ALPHA_MASK: | |
278 | case QPU_W_MUTEX_RELEASE: | |
279 | /* XXX: I haven't thought about these, so don't support them | |
280 | * for now. | |
281 | */ | |
282 | DRM_ERROR("Unsupported waddr %d\n", waddr); | |
283 | return false; | |
284 | ||
285 | case QPU_W_VPM_ADDR: | |
286 | DRM_ERROR("General VPM DMA unsupported\n"); | |
287 | return false; | |
288 | ||
289 | case QPU_W_VPM: | |
290 | case QPU_W_VPMVCD_SETUP: | |
291 | /* We allow VPM setup in general, even including VPM DMA | |
292 | * configuration setup, because the (unsafe) DMA can only be | |
293 | * triggered by QPU_W_VPM_ADDR writes. | |
294 | */ | |
295 | return true; | |
296 | ||
297 | case QPU_W_TLB_STENCIL_SETUP: | |
298 | return true; | |
299 | } | |
300 | ||
301 | return true; | |
302 | } | |
303 | ||
304 | static void | |
d0566c2a | 305 | track_live_clamps(struct vc4_validated_shader_info *validated_shader, |
463873d5 EA |
306 | struct vc4_shader_validation_state *validation_state) |
307 | { | |
d0566c2a | 308 | uint64_t inst = validation_state->shader[validation_state->ip]; |
463873d5 EA |
309 | uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD); |
310 | uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD); | |
311 | uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL); | |
312 | uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD); | |
313 | uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A); | |
314 | uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B); | |
315 | uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); | |
316 | uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); | |
317 | uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); | |
318 | bool ws = inst & QPU_WS; | |
319 | uint32_t lri_add_a, lri_add, lri_mul; | |
320 | bool add_a_is_min_0; | |
321 | ||
322 | /* Check whether OP_ADD's A argumennt comes from a live MAX(x, 0), | |
323 | * before we clear previous live state. | |
324 | */ | |
325 | lri_add_a = raddr_add_a_to_live_reg_index(inst); | |
326 | add_a_is_min_0 = (lri_add_a != ~0 && | |
327 | validation_state->live_max_clamp_regs[lri_add_a]); | |
328 | ||
329 | /* Clear live state for registers written by our instruction. */ | |
330 | lri_add = waddr_to_live_reg_index(waddr_add, ws); | |
331 | lri_mul = waddr_to_live_reg_index(waddr_mul, !ws); | |
332 | if (lri_mul != ~0) { | |
333 | validation_state->live_max_clamp_regs[lri_mul] = false; | |
334 | validation_state->live_min_clamp_offsets[lri_mul] = ~0; | |
335 | } | |
336 | if (lri_add != ~0) { | |
337 | validation_state->live_max_clamp_regs[lri_add] = false; | |
338 | validation_state->live_min_clamp_offsets[lri_add] = ~0; | |
339 | } else { | |
340 | /* Nothing further to do for live tracking, since only ADDs | |
341 | * generate new live clamp registers. | |
342 | */ | |
343 | return; | |
344 | } | |
345 | ||
346 | /* Now, handle remaining live clamp tracking for the ADD operation. */ | |
347 | ||
348 | if (cond_add != QPU_COND_ALWAYS) | |
349 | return; | |
350 | ||
351 | if (op_add == QPU_A_MAX) { | |
352 | /* Track live clamps of a value to a minimum of 0 (in either | |
353 | * arg). | |
354 | */ | |
355 | if (sig != QPU_SIG_SMALL_IMM || raddr_b != 0 || | |
356 | (add_a != QPU_MUX_B && add_b != QPU_MUX_B)) { | |
357 | return; | |
358 | } | |
359 | ||
360 | validation_state->live_max_clamp_regs[lri_add] = true; | |
361 | } else if (op_add == QPU_A_MIN) { | |
362 | /* Track live clamps of a value clamped to a minimum of 0 and | |
363 | * a maximum of some uniform's offset. | |
364 | */ | |
365 | if (!add_a_is_min_0) | |
366 | return; | |
367 | ||
368 | if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) && | |
369 | !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF && | |
370 | sig != QPU_SIG_SMALL_IMM)) { | |
371 | return; | |
372 | } | |
373 | ||
374 | validation_state->live_min_clamp_offsets[lri_add] = | |
375 | validated_shader->uniforms_size; | |
376 | } | |
377 | } | |
378 | ||
379 | static bool | |
d0566c2a | 380 | check_instruction_writes(struct vc4_validated_shader_info *validated_shader, |
463873d5 EA |
381 | struct vc4_shader_validation_state *validation_state) |
382 | { | |
d0566c2a | 383 | uint64_t inst = validation_state->shader[validation_state->ip]; |
463873d5 EA |
384 | uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD); |
385 | uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL); | |
386 | bool ok; | |
387 | ||
388 | if (is_tmu_write(waddr_add) && is_tmu_write(waddr_mul)) { | |
389 | DRM_ERROR("ADD and MUL both set up textures\n"); | |
390 | return false; | |
391 | } | |
392 | ||
d0566c2a EA |
393 | ok = (check_reg_write(validated_shader, validation_state, false) && |
394 | check_reg_write(validated_shader, validation_state, true)); | |
463873d5 | 395 | |
d0566c2a | 396 | track_live_clamps(validated_shader, validation_state); |
463873d5 EA |
397 | |
398 | return ok; | |
399 | } | |
400 | ||
401 | static bool | |
402 | check_instruction_reads(uint64_t inst, | |
403 | struct vc4_validated_shader_info *validated_shader) | |
404 | { | |
405 | uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); | |
406 | uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); | |
407 | uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); | |
408 | ||
409 | if (raddr_a == QPU_R_UNIF || | |
410 | (raddr_b == QPU_R_UNIF && sig != QPU_SIG_SMALL_IMM)) { | |
411 | /* This can't overflow the uint32_t, because we're reading 8 | |
412 | * bytes of instruction to increment by 4 here, so we'd | |
413 | * already be OOM. | |
414 | */ | |
415 | validated_shader->uniforms_size += 4; | |
416 | } | |
417 | ||
418 | return true; | |
419 | } | |
420 | ||
421 | struct vc4_validated_shader_info * | |
422 | vc4_validate_shader(struct drm_gem_cma_object *shader_obj) | |
423 | { | |
424 | bool found_shader_end = false; | |
425 | int shader_end_ip = 0; | |
d0566c2a | 426 | uint32_t ip; |
463873d5 EA |
427 | struct vc4_validated_shader_info *validated_shader; |
428 | struct vc4_shader_validation_state validation_state; | |
429 | int i; | |
430 | ||
431 | memset(&validation_state, 0, sizeof(validation_state)); | |
d0566c2a EA |
432 | validation_state.shader = shader_obj->vaddr; |
433 | validation_state.max_ip = shader_obj->base.size / sizeof(uint64_t); | |
463873d5 EA |
434 | |
435 | for (i = 0; i < 8; i++) | |
436 | validation_state.tmu_setup[i / 4].p_offset[i % 4] = ~0; | |
437 | for (i = 0; i < ARRAY_SIZE(validation_state.live_min_clamp_offsets); i++) | |
438 | validation_state.live_min_clamp_offsets[i] = ~0; | |
439 | ||
463873d5 EA |
440 | validated_shader = kcalloc(1, sizeof(*validated_shader), GFP_KERNEL); |
441 | if (!validated_shader) | |
442 | return NULL; | |
443 | ||
d0566c2a EA |
444 | for (ip = 0; ip < validation_state.max_ip; ip++) { |
445 | uint64_t inst = validation_state.shader[ip]; | |
463873d5 EA |
446 | uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); |
447 | ||
d0566c2a EA |
448 | validation_state.ip = ip; |
449 | ||
463873d5 EA |
450 | switch (sig) { |
451 | case QPU_SIG_NONE: | |
452 | case QPU_SIG_WAIT_FOR_SCOREBOARD: | |
453 | case QPU_SIG_SCOREBOARD_UNLOCK: | |
454 | case QPU_SIG_COLOR_LOAD: | |
455 | case QPU_SIG_LOAD_TMU0: | |
456 | case QPU_SIG_LOAD_TMU1: | |
457 | case QPU_SIG_PROG_END: | |
458 | case QPU_SIG_SMALL_IMM: | |
d0566c2a | 459 | if (!check_instruction_writes(validated_shader, |
463873d5 EA |
460 | &validation_state)) { |
461 | DRM_ERROR("Bad write at ip %d\n", ip); | |
462 | goto fail; | |
463 | } | |
464 | ||
465 | if (!check_instruction_reads(inst, validated_shader)) | |
466 | goto fail; | |
467 | ||
468 | if (sig == QPU_SIG_PROG_END) { | |
469 | found_shader_end = true; | |
470 | shader_end_ip = ip; | |
471 | } | |
472 | ||
473 | break; | |
474 | ||
475 | case QPU_SIG_LOAD_IMM: | |
d0566c2a | 476 | if (!check_instruction_writes(validated_shader, |
463873d5 EA |
477 | &validation_state)) { |
478 | DRM_ERROR("Bad LOAD_IMM write at ip %d\n", ip); | |
479 | goto fail; | |
480 | } | |
481 | break; | |
482 | ||
483 | default: | |
484 | DRM_ERROR("Unsupported QPU signal %d at " | |
485 | "instruction %d\n", sig, ip); | |
486 | goto fail; | |
487 | } | |
488 | ||
489 | /* There are two delay slots after program end is signaled | |
490 | * that are still executed, then we're finished. | |
491 | */ | |
492 | if (found_shader_end && ip == shader_end_ip + 2) | |
493 | break; | |
494 | } | |
495 | ||
d0566c2a | 496 | if (ip == validation_state.max_ip) { |
463873d5 EA |
497 | DRM_ERROR("shader failed to terminate before " |
498 | "shader BO end at %zd\n", | |
499 | shader_obj->base.size); | |
500 | goto fail; | |
501 | } | |
502 | ||
503 | /* Again, no chance of integer overflow here because the worst case | |
504 | * scenario is 8 bytes of uniforms plus handles per 8-byte | |
505 | * instruction. | |
506 | */ | |
507 | validated_shader->uniforms_src_size = | |
508 | (validated_shader->uniforms_size + | |
509 | 4 * validated_shader->num_texture_samples); | |
510 | ||
511 | return validated_shader; | |
512 | ||
513 | fail: | |
514 | if (validated_shader) { | |
515 | kfree(validated_shader->texture_samples); | |
516 | kfree(validated_shader); | |
517 | } | |
518 | return NULL; | |
519 | } |