drivers/gpu/drm/vc4/vc4_validate.c

   1 /*
   2  * Copyright © 2014 Broadcom
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 /**
  25  * Command list validator for VC4.
  26  *
  27  * The VC4 has no IOMMU between it and system memory.  So, a user with
  28  * access to execute command lists could escalate privilege by
  29  * overwriting system memory (drawing to it as a framebuffer) or
  30  * reading system memory it shouldn't (reading it as a texture, or
  31  * uniform data, or vertex data).
  32  *
  33  * This validates command lists to ensure that all accesses are within
  34  * the bounds of the GEM objects referenced.  It explicitly whitelists
  35  * packets, and looks at the offsets in any address fields to make
  36  * sure they're constrained within the BOs they reference.
  37  *
  38  * Note that because of the validation that's happening anyway, this
  39  * is where GEM relocation processing happens.
  40  */
  41
  42 #include "uapi/drm/vc4_drm.h"
  43 #include "vc4_drv.h"
  44 #include "vc4_packet.h"
  45
  46 #define VALIDATE_ARGS \
  47         struct vc4_exec_info *exec,                     \
  48         void *validated,                                \
  49         void *untrusted
  50
  51 /** Return the width in pixels of a 64-byte microtile. */
  52 static uint32_t
  53 utile_width(int cpp)
  54 {
  55         switch (cpp) {
  56         case 1:
  57         case 2:
  58                 return 8;
  59         case 4:
  60                 return 4;
  61         case 8:
  62                 return 2;
  63         default:
  64                 DRM_ERROR("unknown cpp: %d\n", cpp);
  65                 return 1;
  66         }
  67 }
  68
  69 /** Return the height in pixels of a 64-byte microtile. */
  70 static uint32_t
  71 utile_height(int cpp)
  72 {
  73         switch (cpp) {
  74         case 1:
  75                 return 8;
  76         case 2:
  77         case 4:
  78         case 8:
  79                 return 4;
  80         default:
  81                 DRM_ERROR("unknown cpp: %d\n", cpp);
  82                 return 1;
  83         }
  84 }
  85
  86 /**
  87  * The texture unit decides what tiling format a particular miplevel is using
  88  * this function, so we lay out our miptrees accordingly.
  89  */
  90 static bool
  91 size_is_lt(uint32_t width, uint32_t height, int cpp)
  92 {
  93         return (width <= 4 * utile_width(cpp) ||
  94                 height <= 4 * utile_height(cpp));
  95 }
  96
  97 struct drm_gem_cma_object *
  98 vc4_use_bo(struct vc4_exec_info *exec, uint32_t hindex)
  99 {
 100         struct drm_gem_cma_object *obj;
 101         struct vc4_bo *bo;
 102
 103         if (hindex >= exec->bo_count) {
 104                 DRM_ERROR("BO index %d greater than BO count %d\n",
 105                           hindex, exec->bo_count);
 106                 return NULL;
 107         }
 108         obj = exec->bo[hindex];
 109         bo = to_vc4_bo(&obj->base);
 110
 111         if (bo->validated_shader) {
 112                 DRM_ERROR("Trying to use shader BO as something other than "
 113                           "a shader\n");
 114                 return NULL;
 115         }
 116
 117         return obj;
 118 }
 119
 120 static struct drm_gem_cma_object *
 121 vc4_use_handle(struct vc4_exec_info *exec, uint32_t gem_handles_packet_index)
 122 {
 123         return vc4_use_bo(exec, exec->bo_index[gem_handles_packet_index]);
 124 }
 125
 126 static bool
 127 validate_bin_pos(struct vc4_exec_info *exec, void *untrusted, uint32_t pos)
 128 {
 129         /* Note that the untrusted pointer passed to these functions is
 130          * incremented past the packet byte.
 131          */
 132         return (untrusted - 1 == exec->bin_u + pos);
 133 }
 134
 135 static uint32_t
 136 gl_shader_rec_size(uint32_t pointer_bits)
 137 {
 138         uint32_t attribute_count = pointer_bits & 7;
 139         bool extended = pointer_bits & 8;
 140
 141         if (attribute_count == 0)
 142                 attribute_count = 8;
 143
 144         if (extended)
 145                 return 100 + attribute_count * 4;
 146         else
 147                 return 36 + attribute_count * 8;
 148 }
 149
 150 bool
 151 vc4_check_tex_size(struct vc4_exec_info *exec, struct drm_gem_cma_object *fbo,
 152                    uint32_t offset, uint8_t tiling_format,
 153                    uint32_t width, uint32_t height, uint8_t cpp)
 154 {
 155         uint32_t aligned_width, aligned_height, stride, size;
 156         uint32_t utile_w = utile_width(cpp);
 157         uint32_t utile_h = utile_height(cpp);
 158
 159         /* The shaded vertex format stores signed 12.4 fixed point
 160          * (-2048,2047) offsets from the viewport center, so we should
 161          * never have a render target larger than 4096.  The texture
 162          * unit can only sample from 2048x2048, so it's even more
 163          * restricted.  This lets us avoid worrying about overflow in
 164          * our math.
 165          */
 166         if (width > 4096 || height > 4096) {
 167                 DRM_ERROR("Surface dimesions (%d,%d) too large", width, height);
 168                 return false;
 169         }
 170
 171         switch (tiling_format) {
 172         case VC4_TILING_FORMAT_LINEAR:
 173                 aligned_width = round_up(width, utile_w);
 174                 aligned_height = height;
 175                 break;
 176         case VC4_TILING_FORMAT_T:
 177                 aligned_width = round_up(width, utile_w * 8);
 178                 aligned_height = round_up(height, utile_h * 8);
 179                 break;
 180         case VC4_TILING_FORMAT_LT:
 181                 aligned_width = round_up(width, utile_w);
 182                 aligned_height = round_up(height, utile_h);
 183                 break;
 184         default:
 185                 DRM_ERROR("buffer tiling %d unsupported\n", tiling_format);
 186                 return false;
 187         }
 188
 189         stride = aligned_width * cpp;
 190         size = stride * aligned_height;
 191
 192         if (size + offset < size ||
 193             size + offset > fbo->base.size) {
 194                 DRM_ERROR("Overflow in %dx%d (%dx%d) fbo size (%d + %d > %zd)\n",
 195                           width, height,
 196                           aligned_width, aligned_height,
 197                           size, offset, fbo->base.size);
 198                 return false;
 199         }
 200
 201         return true;
 202 }
 203
 204 static int
 205 validate_flush(VALIDATE_ARGS)
 206 {
 207         if (!validate_bin_pos(exec, untrusted, exec->args->bin_cl_size - 1)) {
 208                 DRM_ERROR("Bin CL must end with VC4_PACKET_FLUSH\n");
 209                 return -EINVAL;
 210         }
 211         exec->found_flush = true;
 212
 213         return 0;
 214 }
 215
 216 static int
 217 validate_start_tile_binning(VALIDATE_ARGS)
 218 {
 219         if (exec->found_start_tile_binning_packet) {
 220                 DRM_ERROR("Duplicate VC4_PACKET_START_TILE_BINNING\n");
 221                 return -EINVAL;
 222         }
 223         exec->found_start_tile_binning_packet = true;
 224
 225         if (!exec->found_tile_binning_mode_config_packet) {
 226                 DRM_ERROR("missing VC4_PACKET_TILE_BINNING_MODE_CONFIG\n");
 227                 return -EINVAL;
 228         }
 229
 230         return 0;
 231 }
 232
 233 static int
 234 validate_increment_semaphore(VALIDATE_ARGS)
 235 {
 236         if (!validate_bin_pos(exec, untrusted, exec->args->bin_cl_size - 2)) {
 237                 DRM_ERROR("Bin CL must end with "
 238                           "VC4_PACKET_INCREMENT_SEMAPHORE\n");
 239                 return -EINVAL;
 240         }
 241         exec->found_increment_semaphore_packet = true;
 242
 243         return 0;
 244 }
 245
 246 static int
 247 validate_indexed_prim_list(VALIDATE_ARGS)
 248 {
 249         struct drm_gem_cma_object *ib;
 250         uint32_t length = *(uint32_t *)(untrusted + 1);
 251         uint32_t offset = *(uint32_t *)(untrusted + 5);
 252         uint32_t max_index = *(uint32_t *)(untrusted + 9);
 253         uint32_t index_size = (*(uint8_t *)(untrusted + 0) >> 4) ? 2 : 1;
 254         struct vc4_shader_state *shader_state;
 255
 256         /* Check overflow condition */
 257         if (exec->shader_state_count == 0) {
 258                 DRM_ERROR("shader state must precede primitives\n");
 259                 return -EINVAL;
 260         }
 261         shader_state = &exec->shader_state[exec->shader_state_count - 1];
 262
 263         if (max_index > shader_state->max_index)
 264                 shader_state->max_index = max_index;
 265
 266         ib = vc4_use_handle(exec, 0);
 267         if (!ib)
 268                 return -EINVAL;
 269
 270         exec->bin_dep_seqno = max(exec->bin_dep_seqno,
 271                                   to_vc4_bo(&ib->base)->write_seqno);
 272
 273         if (offset > ib->base.size ||
 274             (ib->base.size - offset) / index_size < length) {
 275                 DRM_ERROR("IB access overflow (%d + %d*%d > %zd)\n",
 276                           offset, length, index_size, ib->base.size);
 277                 return -EINVAL;
 278         }
 279
 280         *(uint32_t *)(validated + 5) = ib->paddr + offset;
 281
 282         return 0;
 283 }
 284
 285 static int
 286 validate_gl_array_primitive(VALIDATE_ARGS)
 287 {
 288         uint32_t length = *(uint32_t *)(untrusted + 1);
 289         uint32_t base_index = *(uint32_t *)(untrusted + 5);
 290         uint32_t max_index;
 291         struct vc4_shader_state *shader_state;
 292
 293         /* Check overflow condition */
 294         if (exec->shader_state_count == 0) {
 295                 DRM_ERROR("shader state must precede primitives\n");
 296                 return -EINVAL;
 297         }
 298         shader_state = &exec->shader_state[exec->shader_state_count - 1];
 299
 300         if (length + base_index < length) {
 301                 DRM_ERROR("primitive vertex count overflow\n");
 302                 return -EINVAL;
 303         }
 304         max_index = length + base_index - 1;
 305
 306         if (max_index > shader_state->max_index)
 307                 shader_state->max_index = max_index;
 308
 309         return 0;
 310 }
 311
 312 static int
 313 validate_gl_shader_state(VALIDATE_ARGS)
 314 {
 315         uint32_t i = exec->shader_state_count++;
 316
 317         if (i >= exec->shader_state_size) {
 318                 DRM_ERROR("More requests for shader states than declared\n");
 319                 return -EINVAL;
 320         }
 321
 322         exec->shader_state[i].addr = *(uint32_t *)untrusted;
 323         exec->shader_state[i].max_index = 0;
 324
 325         if (exec->shader_state[i].addr & ~0xf) {
 326                 DRM_ERROR("high bits set in GL shader rec reference\n");
 327                 return -EINVAL;
 328         }
 329
 330         *(uint32_t *)validated = (exec->shader_rec_p +
 331                                   exec->shader_state[i].addr);
 332
 333         exec->shader_rec_p +=
 334                 roundup(gl_shader_rec_size(exec->shader_state[i].addr), 16);
 335
 336         return 0;
 337 }
 338
 339 static int
 340 validate_tile_binning_config(VALIDATE_ARGS)
 341 {
 342         struct drm_device *dev = exec->exec_bo->base.dev;
 343         struct vc4_bo *tile_bo;
 344         uint8_t flags;
 345         uint32_t tile_state_size, tile_alloc_size;
 346         uint32_t tile_count;
 347
 348         if (exec->found_tile_binning_mode_config_packet) {
 349                 DRM_ERROR("Duplicate VC4_PACKET_TILE_BINNING_MODE_CONFIG\n");
 350                 return -EINVAL;
 351         }
 352         exec->found_tile_binning_mode_config_packet = true;
 353
 354         exec->bin_tiles_x = *(uint8_t *)(untrusted + 12);
 355         exec->bin_tiles_y = *(uint8_t *)(untrusted + 13);
 356         tile_count = exec->bin_tiles_x * exec->bin_tiles_y;
 357         flags = *(uint8_t *)(untrusted + 14);
 358
 359         if (exec->bin_tiles_x == 0 ||
 360             exec->bin_tiles_y == 0) {
 361                 DRM_ERROR("Tile binning config of %dx%d too small\n",
 362                           exec->bin_tiles_x, exec->bin_tiles_y);
 363                 return -EINVAL;
 364         }
 365
 366         if (flags & (VC4_BIN_CONFIG_DB_NON_MS |
 367                      VC4_BIN_CONFIG_TILE_BUFFER_64BIT)) {
 368                 DRM_ERROR("unsupported binning config flags 0x%02x\n", flags);
 369                 return -EINVAL;
 370         }
 371
 372         /* The tile state data array is 48 bytes per tile, and we put it at
 373          * the start of a BO containing both it and the tile alloc.
 374          */
 375         tile_state_size = 48 * tile_count;
 376
 377         /* Since the tile alloc array will follow us, align. */
 378         exec->tile_alloc_offset = roundup(tile_state_size, 4096);
 379
 380         *(uint8_t *)(validated + 14) =
 381                 ((flags & ~(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_MASK |
 382                             VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_MASK)) |
 383                  VC4_BIN_CONFIG_AUTO_INIT_TSDA |
 384                  VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32,
 385                                VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE) |
 386                  VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_128,
 387                                VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE));
 388
 389         /* Initial block size. */
 390         tile_alloc_size = 32 * tile_count;
 391
 392         /*
 393          * The initial allocation gets rounded to the next 256 bytes before
 394          * the hardware starts fulfilling further allocations.
 395          */
 396         tile_alloc_size = roundup(tile_alloc_size, 256);
 397
 398         /* Add space for the extra allocations.  This is what gets used first,
 399          * before overflow memory.  It must have at least 4096 bytes, but we
 400          * want to avoid overflow memory usage if possible.
 401          */
 402         tile_alloc_size += 1024 * 1024;
 403
 404         tile_bo = vc4_bo_create(dev, exec->tile_alloc_offset + tile_alloc_size,
 405                                 true);
 406         exec->tile_bo = &tile_bo->base;
 407         if (IS_ERR(exec->tile_bo))
 408                 return PTR_ERR(exec->tile_bo);
 409         list_add_tail(&tile_bo->unref_head, &exec->unref_list);
 410
 411         /* tile alloc address. */
 412         *(uint32_t *)(validated + 0) = (exec->tile_bo->paddr +
 413                                         exec->tile_alloc_offset);
 414         /* tile alloc size. */
 415         *(uint32_t *)(validated + 4) = tile_alloc_size;
 416         /* tile state address. */
 417         *(uint32_t *)(validated + 8) = exec->tile_bo->paddr;
 418
 419         return 0;
 420 }
 421
 422 static int
 423 validate_gem_handles(VALIDATE_ARGS)
 424 {
 425         memcpy(exec->bo_index, untrusted, sizeof(exec->bo_index));
 426         return 0;
 427 }
 428
 429 #define VC4_DEFINE_PACKET(packet, func) \
 430         [packet] = { packet ## _SIZE, #packet, func }
 431
 432 static const struct cmd_info {
 433         uint16_t len;
 434         const char *name;
 435         int (*func)(struct vc4_exec_info *exec, void *validated,
 436                     void *untrusted);
 437 } cmd_info[] = {
 438         VC4_DEFINE_PACKET(VC4_PACKET_HALT, NULL),
 439         VC4_DEFINE_PACKET(VC4_PACKET_NOP, NULL),
 440         VC4_DEFINE_PACKET(VC4_PACKET_FLUSH, validate_flush),
 441         VC4_DEFINE_PACKET(VC4_PACKET_FLUSH_ALL, NULL),
 442         VC4_DEFINE_PACKET(VC4_PACKET_START_TILE_BINNING,
 443                           validate_start_tile_binning),
 444         VC4_DEFINE_PACKET(VC4_PACKET_INCREMENT_SEMAPHORE,
 445                           validate_increment_semaphore),
 446
 447         VC4_DEFINE_PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE,
 448                           validate_indexed_prim_list),
 449         VC4_DEFINE_PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE,
 450                           validate_gl_array_primitive),
 451
 452         VC4_DEFINE_PACKET(VC4_PACKET_PRIMITIVE_LIST_FORMAT, NULL),
 453
 454         VC4_DEFINE_PACKET(VC4_PACKET_GL_SHADER_STATE, validate_gl_shader_state),
 455
 456         VC4_DEFINE_PACKET(VC4_PACKET_CONFIGURATION_BITS, NULL),
 457         VC4_DEFINE_PACKET(VC4_PACKET_FLAT_SHADE_FLAGS, NULL),
 458         VC4_DEFINE_PACKET(VC4_PACKET_POINT_SIZE, NULL),
 459         VC4_DEFINE_PACKET(VC4_PACKET_LINE_WIDTH, NULL),
 460         VC4_DEFINE_PACKET(VC4_PACKET_RHT_X_BOUNDARY, NULL),
 461         VC4_DEFINE_PACKET(VC4_PACKET_DEPTH_OFFSET, NULL),
 462         VC4_DEFINE_PACKET(VC4_PACKET_CLIP_WINDOW, NULL),
 463         VC4_DEFINE_PACKET(VC4_PACKET_VIEWPORT_OFFSET, NULL),
 464         VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_XY_SCALING, NULL),
 465         /* Note: The docs say this was also 105, but it was 106 in the
 466          * initial userland code drop.
 467          */
 468         VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_Z_SCALING, NULL),
 469
 470         VC4_DEFINE_PACKET(VC4_PACKET_TILE_BINNING_MODE_CONFIG,
 471                           validate_tile_binning_config),
 472
 473         VC4_DEFINE_PACKET(VC4_PACKET_GEM_HANDLES, validate_gem_handles),
 474 };
 475
 476 int
 477 vc4_validate_bin_cl(struct drm_device *dev,
 478                     void *validated,
 479                     void *unvalidated,
 480                     struct vc4_exec_info *exec)
 481 {
 482         uint32_t len = exec->args->bin_cl_size;
 483         uint32_t dst_offset = 0;
 484         uint32_t src_offset = 0;
 485
 486         while (src_offset < len) {
 487                 void *dst_pkt = validated + dst_offset;
 488                 void *src_pkt = unvalidated + src_offset;
 489                 u8 cmd = *(uint8_t *)src_pkt;
 490                 const struct cmd_info *info;
 491
 492                 if (cmd >= ARRAY_SIZE(cmd_info)) {
 493                         DRM_ERROR("0x%08x: packet %d out of bounds\n",
 494                                   src_offset, cmd);
 495                         return -EINVAL;
 496                 }
 497
 498                 info = &cmd_info[cmd];
 499                 if (!info->name) {
 500                         DRM_ERROR("0x%08x: packet %d invalid\n",
 501                                   src_offset, cmd);
 502                         return -EINVAL;
 503                 }
 504
 505                 if (src_offset + info->len > len) {
 506                         DRM_ERROR("0x%08x: packet %d (%s) length 0x%08x "
 507                                   "exceeds bounds (0x%08x)\n",
 508                                   src_offset, cmd, info->name, info->len,
 509                                   src_offset + len);
 510                         return -EINVAL;
 511                 }
 512
 513                 if (cmd != VC4_PACKET_GEM_HANDLES)
 514                         memcpy(dst_pkt, src_pkt, info->len);
 515
 516                 if (info->func && info->func(exec,
 517                                              dst_pkt + 1,
 518                                              src_pkt + 1)) {
 519                         DRM_ERROR("0x%08x: packet %d (%s) failed to validate\n",
 520                                   src_offset, cmd, info->name);
 521                         return -EINVAL;
 522                 }
 523
 524                 src_offset += info->len;
 525                 /* GEM handle loading doesn't produce HW packets. */
 526                 if (cmd != VC4_PACKET_GEM_HANDLES)
 527                         dst_offset += info->len;
 528
 529                 /* When the CL hits halt, it'll stop reading anything else. */
 530                 if (cmd == VC4_PACKET_HALT)
 531                         break;
 532         }
 533
 534         exec->ct0ea = exec->ct0ca + dst_offset;
 535
 536         if (!exec->found_start_tile_binning_packet) {
 537                 DRM_ERROR("Bin CL missing VC4_PACKET_START_TILE_BINNING\n");
 538                 return -EINVAL;
 539         }
 540
 541         /* The bin CL must be ended with INCREMENT_SEMAPHORE and FLUSH.  The
 542          * semaphore is used to trigger the render CL to start up, and the
 543          * FLUSH is what caps the bin lists with
 544          * VC4_PACKET_RETURN_FROM_SUB_LIST (so they jump back to the main
 545          * render CL when they get called to) and actually triggers the queued
 546          * semaphore increment.
 547          */
 548         if (!exec->found_increment_semaphore_packet || !exec->found_flush) {
 549                 DRM_ERROR("Bin CL missing VC4_PACKET_INCREMENT_SEMAPHORE + "
 550                           "VC4_PACKET_FLUSH\n");
 551                 return -EINVAL;
 552         }
 553
 554         return 0;
 555 }
 556
 557 static bool
 558 reloc_tex(struct vc4_exec_info *exec,
 559           void *uniform_data_u,
 560           struct vc4_texture_sample_info *sample,
 561           uint32_t texture_handle_index, bool is_cs)
 562 {
 563         struct drm_gem_cma_object *tex;
 564         uint32_t p0 = *(uint32_t *)(uniform_data_u + sample->p_offset[0]);
 565         uint32_t p1 = *(uint32_t *)(uniform_data_u + sample->p_offset[1]);
 566         uint32_t p2 = (sample->p_offset[2] != ~0 ?
 567                        *(uint32_t *)(uniform_data_u + sample->p_offset[2]) : 0);
 568         uint32_t p3 = (sample->p_offset[3] != ~0 ?
 569                        *(uint32_t *)(uniform_data_u + sample->p_offset[3]) : 0);
 570         uint32_t *validated_p0 = exec->uniforms_v + sample->p_offset[0];
 571         uint32_t offset = p0 & VC4_TEX_P0_OFFSET_MASK;
 572         uint32_t miplevels = VC4_GET_FIELD(p0, VC4_TEX_P0_MIPLVLS);
 573         uint32_t width = VC4_GET_FIELD(p1, VC4_TEX_P1_WIDTH);
 574         uint32_t height = VC4_GET_FIELD(p1, VC4_TEX_P1_HEIGHT);
 575         uint32_t cpp, tiling_format, utile_w, utile_h;
 576         uint32_t i;
 577         uint32_t cube_map_stride = 0;
 578         enum vc4_texture_data_type type;
 579
 580         tex = vc4_use_bo(exec, texture_handle_index);
 581         if (!tex)
 582                 return false;
 583
 584         if (sample->is_direct) {
 585                 uint32_t remaining_size = tex->base.size - p0;
 586
 587                 if (p0 > tex->base.size - 4) {
 588                         DRM_ERROR("UBO offset greater than UBO size\n");
 589                         goto fail;
 590                 }
 591                 if (p1 > remaining_size - 4) {
 592                         DRM_ERROR("UBO clamp would allow reads "
 593                                   "outside of UBO\n");
 594                         goto fail;
 595                 }
 596                 *validated_p0 = tex->paddr + p0;
 597                 return true;
 598         }
 599
 600         if (width == 0)
 601                 width = 2048;
 602         if (height == 0)
 603                 height = 2048;
 604
 605         if (p0 & VC4_TEX_P0_CMMODE_MASK) {
 606                 if (VC4_GET_FIELD(p2, VC4_TEX_P2_PTYPE) ==
 607                     VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE)
 608                         cube_map_stride = p2 & VC4_TEX_P2_CMST_MASK;
 609                 if (VC4_GET_FIELD(p3, VC4_TEX_P2_PTYPE) ==
 610                     VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE) {
 611                         if (cube_map_stride) {
 612                                 DRM_ERROR("Cube map stride set twice\n");
 613                                 goto fail;
 614                         }
 615
 616                         cube_map_stride = p3 & VC4_TEX_P2_CMST_MASK;
 617                 }
 618                 if (!cube_map_stride) {
 619                         DRM_ERROR("Cube map stride not set\n");
 620                         goto fail;
 621                 }
 622         }
 623
 624         type = (VC4_GET_FIELD(p0, VC4_TEX_P0_TYPE) |
 625                 (VC4_GET_FIELD(p1, VC4_TEX_P1_TYPE4) << 4));
 626
 627         switch (type) {
 628         case VC4_TEXTURE_TYPE_RGBA8888:
 629         case VC4_TEXTURE_TYPE_RGBX8888:
 630         case VC4_TEXTURE_TYPE_RGBA32R:
 631                 cpp = 4;
 632                 break;
 633         case VC4_TEXTURE_TYPE_RGBA4444:
 634         case VC4_TEXTURE_TYPE_RGBA5551:
 635         case VC4_TEXTURE_TYPE_RGB565:
 636         case VC4_TEXTURE_TYPE_LUMALPHA:
 637         case VC4_TEXTURE_TYPE_S16F:
 638         case VC4_TEXTURE_TYPE_S16:
 639                 cpp = 2;
 640                 break;
 641         case VC4_TEXTURE_TYPE_LUMINANCE:
 642         case VC4_TEXTURE_TYPE_ALPHA:
 643         case VC4_TEXTURE_TYPE_S8:
 644                 cpp = 1;
 645                 break;
 646         case VC4_TEXTURE_TYPE_ETC1:
 647                 /* ETC1 is arranged as 64-bit blocks, where each block is 4x4
 648                  * pixels.
 649                  */
 650                 cpp = 8;
 651                 width = (width + 3) >> 2;
 652                 height = (height + 3) >> 2;
 653                 break;
 654         case VC4_TEXTURE_TYPE_BW1:
 655         case VC4_TEXTURE_TYPE_A4:
 656         case VC4_TEXTURE_TYPE_A1:
 657         case VC4_TEXTURE_TYPE_RGBA64:
 658         case VC4_TEXTURE_TYPE_YUV422R:
 659         default:
 660                 DRM_ERROR("Texture format %d unsupported\n", type);
 661                 goto fail;
 662         }
 663         utile_w = utile_width(cpp);
 664         utile_h = utile_height(cpp);
 665
 666         if (type == VC4_TEXTURE_TYPE_RGBA32R) {
 667                 tiling_format = VC4_TILING_FORMAT_LINEAR;
 668         } else {
 669                 if (size_is_lt(width, height, cpp))
 670                         tiling_format = VC4_TILING_FORMAT_LT;
 671                 else
 672                         tiling_format = VC4_TILING_FORMAT_T;
 673         }
 674
 675         if (!vc4_check_tex_size(exec, tex, offset + cube_map_stride * 5,
 676                                 tiling_format, width, height, cpp)) {
 677                 goto fail;
 678         }
 679
 680         /* The mipmap levels are stored before the base of the texture.  Make
 681          * sure there is actually space in the BO.
 682          */
 683         for (i = 1; i <= miplevels; i++) {
 684                 uint32_t level_width = max(width >> i, 1u);
 685                 uint32_t level_height = max(height >> i, 1u);
 686                 uint32_t aligned_width, aligned_height;
 687                 uint32_t level_size;
 688
 689                 /* Once the levels get small enough, they drop from T to LT. */
 690                 if (tiling_format == VC4_TILING_FORMAT_T &&
 691                     size_is_lt(level_width, level_height, cpp)) {
 692                         tiling_format = VC4_TILING_FORMAT_LT;
 693                 }
 694
 695                 switch (tiling_format) {
 696                 case VC4_TILING_FORMAT_T:
 697                         aligned_width = round_up(level_width, utile_w * 8);
 698                         aligned_height = round_up(level_height, utile_h * 8);
 699                         break;
 700                 case VC4_TILING_FORMAT_LT:
 701                         aligned_width = round_up(level_width, utile_w);
 702                         aligned_height = round_up(level_height, utile_h);
 703                         break;
 704                 default:
 705                         aligned_width = round_up(level_width, utile_w);
 706                         aligned_height = level_height;
 707                         break;
 708                 }
 709
 710                 level_size = aligned_width * cpp * aligned_height;
 711
 712                 if (offset < level_size) {
 713                         DRM_ERROR("Level %d (%dx%d -> %dx%d) size %db "
 714                                   "overflowed buffer bounds (offset %d)\n",
 715                                   i, level_width, level_height,
 716                                   aligned_width, aligned_height,
 717                                   level_size, offset);
 718                         goto fail;
 719                 }
 720
 721                 offset -= level_size;
 722         }
 723
 724         *validated_p0 = tex->paddr + p0;
 725
 726         if (is_cs) {
 727                 exec->bin_dep_seqno = max(exec->bin_dep_seqno,
 728                                           to_vc4_bo(&tex->base)->write_seqno);
 729         }
 730
 731         return true;
 732  fail:
 733         DRM_INFO("Texture p0 at %d: 0x%08x\n", sample->p_offset[0], p0);
 734         DRM_INFO("Texture p1 at %d: 0x%08x\n", sample->p_offset[1], p1);
 735         DRM_INFO("Texture p2 at %d: 0x%08x\n", sample->p_offset[2], p2);
 736         DRM_INFO("Texture p3 at %d: 0x%08x\n", sample->p_offset[3], p3);
 737         return false;
 738 }
 739
 740 static int
 741 validate_gl_shader_rec(struct drm_device *dev,
 742                        struct vc4_exec_info *exec,
 743                        struct vc4_shader_state *state)
 744 {
 745         uint32_t *src_handles;
 746         void *pkt_u, *pkt_v;
 747         static const uint32_t shader_reloc_offsets[] = {
 748                 4, /* fs */
 749                 16, /* vs */
 750                 28, /* cs */
 751         };
 752         uint32_t shader_reloc_count = ARRAY_SIZE(shader_reloc_offsets);
 753         struct drm_gem_cma_object *bo[shader_reloc_count + 8];
 754         uint32_t nr_attributes, nr_relocs, packet_size;
 755         int i;
 756
 757         nr_attributes = state->addr & 0x7;
 758         if (nr_attributes == 0)
 759                 nr_attributes = 8;
 760         packet_size = gl_shader_rec_size(state->addr);
 761
 762         nr_relocs = ARRAY_SIZE(shader_reloc_offsets) + nr_attributes;
 763         if (nr_relocs * 4 > exec->shader_rec_size) {
 764                 DRM_ERROR("overflowed shader recs reading %d handles "
 765                           "from %d bytes left\n",
 766                           nr_relocs, exec->shader_rec_size);
 767                 return -EINVAL;
 768         }
 769         src_handles = exec->shader_rec_u;
 770         exec->shader_rec_u += nr_relocs * 4;
 771         exec->shader_rec_size -= nr_relocs * 4;
 772
 773         if (packet_size > exec->shader_rec_size) {
 774                 DRM_ERROR("overflowed shader recs copying %db packet "
 775                           "from %d bytes left\n",
 776                           packet_size, exec->shader_rec_size);
 777                 return -EINVAL;
 778         }
 779         pkt_u = exec->shader_rec_u;
 780         pkt_v = exec->shader_rec_v;
 781         memcpy(pkt_v, pkt_u, packet_size);
 782         exec->shader_rec_u += packet_size;
 783         /* Shader recs have to be aligned to 16 bytes (due to the attribute
 784          * flags being in the low bytes), so round the next validated shader
 785          * rec address up.  This should be safe, since we've got so many
 786          * relocations in a shader rec packet.
 787          */
 788         BUG_ON(roundup(packet_size, 16) - packet_size > nr_relocs * 4);
 789         exec->shader_rec_v += roundup(packet_size, 16);
 790         exec->shader_rec_size -= packet_size;
 791
 792         if (!(*(uint16_t *)pkt_u & VC4_SHADER_FLAG_FS_SINGLE_THREAD)) {
 793                 DRM_ERROR("Multi-threaded fragment shaders not supported.\n");
 794                 return -EINVAL;
 795         }
 796
 797         for (i = 0; i < shader_reloc_count; i++) {
 798                 if (src_handles[i] > exec->bo_count) {
 799                         DRM_ERROR("Shader handle %d too big\n", src_handles[i]);
 800                         return -EINVAL;
 801                 }
 802
 803                 bo[i] = exec->bo[src_handles[i]];
 804                 if (!bo[i])
 805                         return -EINVAL;
 806         }
 807         for (i = shader_reloc_count; i < nr_relocs; i++) {
 808                 bo[i] = vc4_use_bo(exec, src_handles[i]);
 809                 if (!bo[i])
 810                         return -EINVAL;
 811         }
 812
 813         for (i = 0; i < shader_reloc_count; i++) {
 814                 struct vc4_validated_shader_info *validated_shader;
 815                 uint32_t o = shader_reloc_offsets[i];
 816                 uint32_t src_offset = *(uint32_t *)(pkt_u + o);
 817                 uint32_t *texture_handles_u;
 818                 void *uniform_data_u;
 819                 uint32_t tex, uni;
 820
 821                 *(uint32_t *)(pkt_v + o) = bo[i]->paddr + src_offset;
 822
 823                 if (src_offset != 0) {
 824                         DRM_ERROR("Shaders must be at offset 0 of "
 825                                   "the BO.\n");
 826                         return -EINVAL;
 827                 }
 828
 829                 validated_shader = to_vc4_bo(&bo[i]->base)->validated_shader;
 830                 if (!validated_shader)
 831                         return -EINVAL;
 832
 833                 if (validated_shader->uniforms_src_size >
 834                     exec->uniforms_size) {
 835                         DRM_ERROR("Uniforms src buffer overflow\n");
 836                         return -EINVAL;
 837                 }
 838
 839                 texture_handles_u = exec->uniforms_u;
 840                 uniform_data_u = (texture_handles_u +
 841                                   validated_shader->num_texture_samples);
 842
 843                 memcpy(exec->uniforms_v, uniform_data_u,
 844                        validated_shader->uniforms_size);
 845
 846                 for (tex = 0;
 847                      tex < validated_shader->num_texture_samples;
 848                      tex++) {
 849                         if (!reloc_tex(exec,
 850                                        uniform_data_u,
 851                                        &validated_shader->texture_samples[tex],
 852                                        texture_handles_u[tex],
 853                                        i == 2)) {
 854                                 return -EINVAL;
 855                         }
 856                 }
 857
 858                 /* Fill in the uniform slots that need this shader's
 859                  * start-of-uniforms address (used for resetting the uniform
 860                  * stream in the presence of control flow).
 861                  */
 862                 for (uni = 0;
 863                      uni < validated_shader->num_uniform_addr_offsets;
 864                      uni++) {
 865                         uint32_t o = validated_shader->uniform_addr_offsets[uni];
 866                         ((uint32_t *)exec->uniforms_v)[o] = exec->uniforms_p;
 867                 }
 868
 869                 *(uint32_t *)(pkt_v + o + 4) = exec->uniforms_p;
 870
 871                 exec->uniforms_u += validated_shader->uniforms_src_size;
 872                 exec->uniforms_v += validated_shader->uniforms_size;
 873                 exec->uniforms_p += validated_shader->uniforms_size;
 874         }
 875
 876         for (i = 0; i < nr_attributes; i++) {
 877                 struct drm_gem_cma_object *vbo =
 878                         bo[ARRAY_SIZE(shader_reloc_offsets) + i];
 879                 uint32_t o = 36 + i * 8;
 880                 uint32_t offset = *(uint32_t *)(pkt_u + o + 0);
 881                 uint32_t attr_size = *(uint8_t *)(pkt_u + o + 4) + 1;
 882                 uint32_t stride = *(uint8_t *)(pkt_u + o + 5);
 883                 uint32_t max_index;
 884
 885                 exec->bin_dep_seqno = max(exec->bin_dep_seqno,
 886                                           to_vc4_bo(&vbo->base)->write_seqno);
 887
 888                 if (state->addr & 0x8)
 889                         stride |= (*(uint32_t *)(pkt_u + 100 + i * 4)) & ~0xff;
 890
 891                 if (vbo->base.size < offset ||
 892                     vbo->base.size - offset < attr_size) {
 893                         DRM_ERROR("BO offset overflow (%d + %d > %zu)\n",
 894                                   offset, attr_size, vbo->base.size);
 895                         return -EINVAL;
 896                 }
 897
 898                 if (stride != 0) {
 899                         max_index = ((vbo->base.size - offset - attr_size) /
 900                                      stride);
 901                         if (state->max_index > max_index) {
 902                                 DRM_ERROR("primitives use index %d out of "
 903                                           "supplied %d\n",
 904                                           state->max_index, max_index);
 905                                 return -EINVAL;
 906                         }
 907                 }
 908
 909                 *(uint32_t *)(pkt_v + o) = vbo->paddr + offset;
 910         }
 911
 912         return 0;
 913 }
 914
 915 int
 916 vc4_validate_shader_recs(struct drm_device *dev,
 917                          struct vc4_exec_info *exec)
 918 {
 919         uint32_t i;
 920         int ret = 0;
 921
 922         for (i = 0; i < exec->shader_state_count; i++) {
 923                 ret = validate_gl_shader_rec(dev, exec, &exec->shader_state[i]);
 924                 if (ret)
 925                         return ret;
 926         }
 927
 928         return ret;
 929 }