drivers/gpu/drm/vc4/vc4_gem.c

   1 /*
   2  * Copyright © 2014 Broadcom
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include <linux/module.h>
  25 #include <linux/platform_device.h>
  26 #include <linux/device.h>
  27 #include <linux/io.h>
  28
  29 #include "uapi/drm/vc4_drm.h"
  30 #include "vc4_drv.h"
  31 #include "vc4_regs.h"
  32 #include "vc4_trace.h"
  33
  34 static void
  35 vc4_queue_hangcheck(struct drm_device *dev)
  36 {
  37         struct vc4_dev *vc4 = to_vc4_dev(dev);
  38
  39         mod_timer(&vc4->hangcheck.timer,
  40                   round_jiffies_up(jiffies + msecs_to_jiffies(100)));
  41 }
  42
  43 struct vc4_hang_state {
  44         struct drm_vc4_get_hang_state user_state;
  45
  46         u32 bo_count;
  47         struct drm_gem_object **bo;
  48 };
  49
  50 static void
  51 vc4_free_hang_state(struct drm_device *dev, struct vc4_hang_state *state)
  52 {
  53         unsigned int i;
  54
  55         mutex_lock(&dev->struct_mutex);
  56         for (i = 0; i < state->user_state.bo_count; i++)
  57                 drm_gem_object_unreference(state->bo[i]);
  58         mutex_unlock(&dev->struct_mutex);
  59
  60         kfree(state);
  61 }
  62
  63 int
  64 vc4_get_hang_state_ioctl(struct drm_device *dev, void *data,
  65                          struct drm_file *file_priv)
  66 {
  67         struct drm_vc4_get_hang_state *get_state = data;
  68         struct drm_vc4_get_hang_state_bo *bo_state;
  69         struct vc4_hang_state *kernel_state;
  70         struct drm_vc4_get_hang_state *state;
  71         struct vc4_dev *vc4 = to_vc4_dev(dev);
  72         unsigned long irqflags;
  73         u32 i;
  74         int ret = 0;
  75
  76         spin_lock_irqsave(&vc4->job_lock, irqflags);
  77         kernel_state = vc4->hang_state;
  78         if (!kernel_state) {
  79                 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
  80                 return -ENOENT;
  81         }
  82         state = &kernel_state->user_state;
  83
  84         /* If the user's array isn't big enough, just return the
  85          * required array size.
  86          */
  87         if (get_state->bo_count < state->bo_count) {
  88                 get_state->bo_count = state->bo_count;
  89                 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
  90                 return 0;
  91         }
  92
  93         vc4->hang_state = NULL;
  94         spin_unlock_irqrestore(&vc4->job_lock, irqflags);
  95
  96         /* Save the user's BO pointer, so we don't stomp it with the memcpy. */
  97         state->bo = get_state->bo;
  98         memcpy(get_state, state, sizeof(*state));
  99
 100         bo_state = kcalloc(state->bo_count, sizeof(*bo_state), GFP_KERNEL);
 101         if (!bo_state) {
 102                 ret = -ENOMEM;
 103                 goto err_free;
 104         }
 105
 106         for (i = 0; i < state->bo_count; i++) {
 107                 struct vc4_bo *vc4_bo = to_vc4_bo(kernel_state->bo[i]);
 108                 u32 handle;
 109
 110                 ret = drm_gem_handle_create(file_priv, kernel_state->bo[i],
 111                                             &handle);
 112
 113                 if (ret) {
 114                         state->bo_count = i - 1;
 115                         goto err;
 116                 }
 117                 bo_state[i].handle = handle;
 118                 bo_state[i].paddr = vc4_bo->base.paddr;
 119                 bo_state[i].size = vc4_bo->base.base.size;
 120         }
 121
 122         if (copy_to_user((void __user *)(uintptr_t)get_state->bo,
 123                          bo_state,
 124                          state->bo_count * sizeof(*bo_state)))
 125                 ret = -EFAULT;
 126
 127         kfree(bo_state);
 128
 129 err_free:
 130
 131         vc4_free_hang_state(dev, kernel_state);
 132
 133 err:
 134         return ret;
 135 }
 136
 137 static void
 138 vc4_save_hang_state(struct drm_device *dev)
 139 {
 140         struct vc4_dev *vc4 = to_vc4_dev(dev);
 141         struct drm_vc4_get_hang_state *state;
 142         struct vc4_hang_state *kernel_state;
 143         struct vc4_exec_info *exec;
 144         struct vc4_bo *bo;
 145         unsigned long irqflags;
 146         unsigned int i, unref_list_count;
 147
 148         kernel_state = kcalloc(1, sizeof(*kernel_state), GFP_KERNEL);
 149         if (!kernel_state)
 150                 return;
 151
 152         state = &kernel_state->user_state;
 153
 154         spin_lock_irqsave(&vc4->job_lock, irqflags);
 155         exec = vc4_first_job(vc4);
 156         if (!exec) {
 157                 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 158                 return;
 159         }
 160
 161         unref_list_count = 0;
 162         list_for_each_entry(bo, &exec->unref_list, unref_head)
 163                 unref_list_count++;
 164
 165         state->bo_count = exec->bo_count + unref_list_count;
 166         kernel_state->bo = kcalloc(state->bo_count, sizeof(*kernel_state->bo),
 167                                    GFP_ATOMIC);
 168         if (!kernel_state->bo) {
 169                 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 170                 return;
 171         }
 172
 173         for (i = 0; i < exec->bo_count; i++) {
 174                 drm_gem_object_reference(&exec->bo[i]->base);
 175                 kernel_state->bo[i] = &exec->bo[i]->base;
 176         }
 177
 178         list_for_each_entry(bo, &exec->unref_list, unref_head) {
 179                 drm_gem_object_reference(&bo->base.base);
 180                 kernel_state->bo[i] = &bo->base.base;
 181                 i++;
 182         }
 183
 184         state->start_bin = exec->ct0ca;
 185         state->start_render = exec->ct1ca;
 186
 187         spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 188
 189         state->ct0ca = V3D_READ(V3D_CTNCA(0));
 190         state->ct0ea = V3D_READ(V3D_CTNEA(0));
 191
 192         state->ct1ca = V3D_READ(V3D_CTNCA(1));
 193         state->ct1ea = V3D_READ(V3D_CTNEA(1));
 194
 195         state->ct0cs = V3D_READ(V3D_CTNCS(0));
 196         state->ct1cs = V3D_READ(V3D_CTNCS(1));
 197
 198         state->ct0ra0 = V3D_READ(V3D_CT00RA0);
 199         state->ct1ra0 = V3D_READ(V3D_CT01RA0);
 200
 201         state->bpca = V3D_READ(V3D_BPCA);
 202         state->bpcs = V3D_READ(V3D_BPCS);
 203         state->bpoa = V3D_READ(V3D_BPOA);
 204         state->bpos = V3D_READ(V3D_BPOS);
 205
 206         state->vpmbase = V3D_READ(V3D_VPMBASE);
 207
 208         state->dbge = V3D_READ(V3D_DBGE);
 209         state->fdbgo = V3D_READ(V3D_FDBGO);
 210         state->fdbgb = V3D_READ(V3D_FDBGB);
 211         state->fdbgr = V3D_READ(V3D_FDBGR);
 212         state->fdbgs = V3D_READ(V3D_FDBGS);
 213         state->errstat = V3D_READ(V3D_ERRSTAT);
 214
 215         spin_lock_irqsave(&vc4->job_lock, irqflags);
 216         if (vc4->hang_state) {
 217                 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 218                 vc4_free_hang_state(dev, kernel_state);
 219         } else {
 220                 vc4->hang_state = kernel_state;
 221                 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 222         }
 223 }
 224
 225 static void
 226 vc4_reset(struct drm_device *dev)
 227 {
 228         struct vc4_dev *vc4 = to_vc4_dev(dev);
 229
 230         DRM_INFO("Resetting GPU.\n");
 231         vc4_v3d_set_power(vc4, false);
 232         vc4_v3d_set_power(vc4, true);
 233
 234         vc4_irq_reset(dev);
 235
 236         /* Rearm the hangcheck -- another job might have been waiting
 237          * for our hung one to get kicked off, and vc4_irq_reset()
 238          * would have started it.
 239          */
 240         vc4_queue_hangcheck(dev);
 241 }
 242
 243 static void
 244 vc4_reset_work(struct work_struct *work)
 245 {
 246         struct vc4_dev *vc4 =
 247                 container_of(work, struct vc4_dev, hangcheck.reset_work);
 248
 249         vc4_save_hang_state(vc4->dev);
 250
 251         vc4_reset(vc4->dev);
 252 }
 253
 254 static void
 255 vc4_hangcheck_elapsed(unsigned long data)
 256 {
 257         struct drm_device *dev = (struct drm_device *)data;
 258         struct vc4_dev *vc4 = to_vc4_dev(dev);
 259         uint32_t ct0ca, ct1ca;
 260
 261         /* If idle, we can stop watching for hangs. */
 262         if (list_empty(&vc4->job_list))
 263                 return;
 264
 265         ct0ca = V3D_READ(V3D_CTNCA(0));
 266         ct1ca = V3D_READ(V3D_CTNCA(1));
 267
 268         /* If we've made any progress in execution, rearm the timer
 269          * and wait.
 270          */
 271         if (ct0ca != vc4->hangcheck.last_ct0ca ||
 272             ct1ca != vc4->hangcheck.last_ct1ca) {
 273                 vc4->hangcheck.last_ct0ca = ct0ca;
 274                 vc4->hangcheck.last_ct1ca = ct1ca;
 275                 vc4_queue_hangcheck(dev);
 276                 return;
 277         }
 278
 279         /* We've gone too long with no progress, reset.  This has to
 280          * be done from a work struct, since resetting can sleep and
 281          * this timer hook isn't allowed to.
 282          */
 283         schedule_work(&vc4->hangcheck.reset_work);
 284 }
 285
 286 static void
 287 submit_cl(struct drm_device *dev, uint32_t thread, uint32_t start, uint32_t end)
 288 {
 289         struct vc4_dev *vc4 = to_vc4_dev(dev);
 290
 291         /* Set the current and end address of the control list.
 292          * Writing the end register is what starts the job.
 293          */
 294         V3D_WRITE(V3D_CTNCA(thread), start);
 295         V3D_WRITE(V3D_CTNEA(thread), end);
 296 }
 297
 298 int
 299 vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno, uint64_t timeout_ns,
 300                    bool interruptible)
 301 {
 302         struct vc4_dev *vc4 = to_vc4_dev(dev);
 303         int ret = 0;
 304         unsigned long timeout_expire;
 305         DEFINE_WAIT(wait);
 306
 307         if (vc4->finished_seqno >= seqno)
 308                 return 0;
 309
 310         if (timeout_ns == 0)
 311                 return -ETIME;
 312
 313         timeout_expire = jiffies + nsecs_to_jiffies(timeout_ns);
 314
 315         trace_vc4_wait_for_seqno_begin(dev, seqno, timeout_ns);
 316         for (;;) {
 317                 prepare_to_wait(&vc4->job_wait_queue, &wait,
 318                                 interruptible ? TASK_INTERRUPTIBLE :
 319                                 TASK_UNINTERRUPTIBLE);
 320
 321                 if (interruptible && signal_pending(current)) {
 322                         ret = -ERESTARTSYS;
 323                         break;
 324                 }
 325
 326                 if (vc4->finished_seqno >= seqno)
 327                         break;
 328
 329                 if (timeout_ns != ~0ull) {
 330                         if (time_after_eq(jiffies, timeout_expire)) {
 331                                 ret = -ETIME;
 332                                 break;
 333                         }
 334                         schedule_timeout(timeout_expire - jiffies);
 335                 } else {
 336                         schedule();
 337                 }
 338         }
 339
 340         finish_wait(&vc4->job_wait_queue, &wait);
 341         trace_vc4_wait_for_seqno_end(dev, seqno);
 342
 343         if (ret && ret != -ERESTARTSYS) {
 344                 DRM_ERROR("timeout waiting for render thread idle\n");
 345                 return ret;
 346         }
 347
 348         return 0;
 349 }
 350
 351 static void
 352 vc4_flush_caches(struct drm_device *dev)
 353 {
 354         struct vc4_dev *vc4 = to_vc4_dev(dev);
 355
 356         /* Flush the GPU L2 caches.  These caches sit on top of system
 357          * L3 (the 128kb or so shared with the CPU), and are
 358          * non-allocating in the L3.
 359          */
 360         V3D_WRITE(V3D_L2CACTL,
 361                   V3D_L2CACTL_L2CCLR);
 362
 363         V3D_WRITE(V3D_SLCACTL,
 364                   VC4_SET_FIELD(0xf, V3D_SLCACTL_T1CC) |
 365                   VC4_SET_FIELD(0xf, V3D_SLCACTL_T0CC) |
 366                   VC4_SET_FIELD(0xf, V3D_SLCACTL_UCC) |
 367                   VC4_SET_FIELD(0xf, V3D_SLCACTL_ICC));
 368 }
 369
 370 /* Sets the registers for the next job to be actually be executed in
 371  * the hardware.
 372  *
 373  * The job_lock should be held during this.
 374  */
 375 void
 376 vc4_submit_next_job(struct drm_device *dev)
 377 {
 378         struct vc4_dev *vc4 = to_vc4_dev(dev);
 379         struct vc4_exec_info *exec = vc4_first_job(vc4);
 380
 381         if (!exec)
 382                 return;
 383
 384         vc4_flush_caches(dev);
 385
 386         /* Disable the binner's pre-loaded overflow memory address */
 387         V3D_WRITE(V3D_BPOA, 0);
 388         V3D_WRITE(V3D_BPOS, 0);
 389
 390         if (exec->ct0ca != exec->ct0ea)
 391                 submit_cl(dev, 0, exec->ct0ca, exec->ct0ea);
 392         submit_cl(dev, 1, exec->ct1ca, exec->ct1ea);
 393 }
 394
 395 static void
 396 vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno)
 397 {
 398         struct vc4_bo *bo;
 399         unsigned i;
 400
 401         for (i = 0; i < exec->bo_count; i++) {
 402                 bo = to_vc4_bo(&exec->bo[i]->base);
 403                 bo->seqno = seqno;
 404         }
 405
 406         list_for_each_entry(bo, &exec->unref_list, unref_head) {
 407                 bo->seqno = seqno;
 408         }
 409 }
 410
 411 /* Queues a struct vc4_exec_info for execution.  If no job is
 412  * currently executing, then submits it.
 413  *
 414  * Unlike most GPUs, our hardware only handles one command list at a
 415  * time.  To queue multiple jobs at once, we'd need to edit the
 416  * previous command list to have a jump to the new one at the end, and
 417  * then bump the end address.  That's a change for a later date,
 418  * though.
 419  */
 420 static void
 421 vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec)
 422 {
 423         struct vc4_dev *vc4 = to_vc4_dev(dev);
 424         uint64_t seqno;
 425         unsigned long irqflags;
 426
 427         spin_lock_irqsave(&vc4->job_lock, irqflags);
 428
 429         seqno = ++vc4->emit_seqno;
 430         exec->seqno = seqno;
 431         vc4_update_bo_seqnos(exec, seqno);
 432
 433         list_add_tail(&exec->head, &vc4->job_list);
 434
 435         /* If no job was executing, kick ours off.  Otherwise, it'll
 436          * get started when the previous job's frame done interrupt
 437          * occurs.
 438          */
 439         if (vc4_first_job(vc4) == exec) {
 440                 vc4_submit_next_job(dev);
 441                 vc4_queue_hangcheck(dev);
 442         }
 443
 444         spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 445 }
 446
 447 /**
 448  * Looks up a bunch of GEM handles for BOs and stores the array for
 449  * use in the command validator that actually writes relocated
 450  * addresses pointing to them.
 451  */
 452 static int
 453 vc4_cl_lookup_bos(struct drm_device *dev,
 454                   struct drm_file *file_priv,
 455                   struct vc4_exec_info *exec)
 456 {
 457         struct drm_vc4_submit_cl *args = exec->args;
 458         uint32_t *handles;
 459         int ret = 0;
 460         int i;
 461
 462         exec->bo_count = args->bo_handle_count;
 463
 464         if (!exec->bo_count) {
 465                 /* See comment on bo_index for why we have to check
 466                  * this.
 467                  */
 468                 DRM_ERROR("Rendering requires BOs to validate\n");
 469                 return -EINVAL;
 470         }
 471
 472         exec->bo = kcalloc(exec->bo_count, sizeof(struct drm_gem_cma_object *),
 473                            GFP_KERNEL);
 474         if (!exec->bo) {
 475                 DRM_ERROR("Failed to allocate validated BO pointers\n");
 476                 return -ENOMEM;
 477         }
 478
 479         handles = drm_malloc_ab(exec->bo_count, sizeof(uint32_t));
 480         if (!handles) {
 481                 DRM_ERROR("Failed to allocate incoming GEM handles\n");
 482                 goto fail;
 483         }
 484
 485         ret = copy_from_user(handles,
 486                              (void __user *)(uintptr_t)args->bo_handles,
 487                              exec->bo_count * sizeof(uint32_t));
 488         if (ret) {
 489                 DRM_ERROR("Failed to copy in GEM handles\n");
 490                 goto fail;
 491         }
 492
 493         spin_lock(&file_priv->table_lock);
 494         for (i = 0; i < exec->bo_count; i++) {
 495                 struct drm_gem_object *bo = idr_find(&file_priv->object_idr,
 496                                                      handles[i]);
 497                 if (!bo) {
 498                         DRM_ERROR("Failed to look up GEM BO %d: %d\n",
 499                                   i, handles[i]);
 500                         ret = -EINVAL;
 501                         spin_unlock(&file_priv->table_lock);
 502                         goto fail;
 503                 }
 504                 drm_gem_object_reference(bo);
 505                 exec->bo[i] = (struct drm_gem_cma_object *)bo;
 506         }
 507         spin_unlock(&file_priv->table_lock);
 508
 509 fail:
 510         kfree(handles);
 511         return 0;
 512 }
 513
 514 static int
 515 vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec)
 516 {
 517         struct drm_vc4_submit_cl *args = exec->args;
 518         void *temp = NULL;
 519         void *bin;
 520         int ret = 0;
 521         uint32_t bin_offset = 0;
 522         uint32_t shader_rec_offset = roundup(bin_offset + args->bin_cl_size,
 523                                              16);
 524         uint32_t uniforms_offset = shader_rec_offset + args->shader_rec_size;
 525         uint32_t exec_size = uniforms_offset + args->uniforms_size;
 526         uint32_t temp_size = exec_size + (sizeof(struct vc4_shader_state) *
 527                                           args->shader_rec_count);
 528         struct vc4_bo *bo;
 529
 530         if (uniforms_offset < shader_rec_offset ||
 531             exec_size < uniforms_offset ||
 532             args->shader_rec_count >= (UINT_MAX /
 533                                           sizeof(struct vc4_shader_state)) ||
 534             temp_size < exec_size) {
 535                 DRM_ERROR("overflow in exec arguments\n");
 536                 goto fail;
 537         }
 538
 539         /* Allocate space where we'll store the copied in user command lists
 540          * and shader records.
 541          *
 542          * We don't just copy directly into the BOs because we need to
 543          * read the contents back for validation, and I think the
 544          * bo->vaddr is uncached access.
 545          */
 546         temp = kmalloc(temp_size, GFP_KERNEL);
 547         if (!temp) {
 548                 DRM_ERROR("Failed to allocate storage for copying "
 549                           "in bin/render CLs.\n");
 550                 ret = -ENOMEM;
 551                 goto fail;
 552         }
 553         bin = temp + bin_offset;
 554         exec->shader_rec_u = temp + shader_rec_offset;
 555         exec->uniforms_u = temp + uniforms_offset;
 556         exec->shader_state = temp + exec_size;
 557         exec->shader_state_size = args->shader_rec_count;
 558
 559         if (copy_from_user(bin,
 560                            (void __user *)(uintptr_t)args->bin_cl,
 561                            args->bin_cl_size)) {
 562                 ret = -EFAULT;
 563                 goto fail;
 564         }
 565
 566         if (copy_from_user(exec->shader_rec_u,
 567                            (void __user *)(uintptr_t)args->shader_rec,
 568                            args->shader_rec_size)) {
 569                 ret = -EFAULT;
 570                 goto fail;
 571         }
 572
 573         if (copy_from_user(exec->uniforms_u,
 574                            (void __user *)(uintptr_t)args->uniforms,
 575                            args->uniforms_size)) {
 576                 ret = -EFAULT;
 577                 goto fail;
 578         }
 579
 580         bo = vc4_bo_create(dev, exec_size, true);
 581         if (!bo) {
 582                 DRM_ERROR("Couldn't allocate BO for binning\n");
 583                 ret = -ENOMEM;
 584                 goto fail;
 585         }
 586         exec->exec_bo = &bo->base;
 587
 588         list_add_tail(&to_vc4_bo(&exec->exec_bo->base)->unref_head,
 589                       &exec->unref_list);
 590
 591         exec->ct0ca = exec->exec_bo->paddr + bin_offset;
 592
 593         exec->bin_u = bin;
 594
 595         exec->shader_rec_v = exec->exec_bo->vaddr + shader_rec_offset;
 596         exec->shader_rec_p = exec->exec_bo->paddr + shader_rec_offset;
 597         exec->shader_rec_size = args->shader_rec_size;
 598
 599         exec->uniforms_v = exec->exec_bo->vaddr + uniforms_offset;
 600         exec->uniforms_p = exec->exec_bo->paddr + uniforms_offset;
 601         exec->uniforms_size = args->uniforms_size;
 602
 603         ret = vc4_validate_bin_cl(dev,
 604                                   exec->exec_bo->vaddr + bin_offset,
 605                                   bin,
 606                                   exec);
 607         if (ret)
 608                 goto fail;
 609
 610         ret = vc4_validate_shader_recs(dev, exec);
 611
 612 fail:
 613         kfree(temp);
 614         return ret;
 615 }
 616
 617 static void
 618 vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec)
 619 {
 620         unsigned i;
 621
 622         /* Need the struct lock for drm_gem_object_unreference(). */
 623         mutex_lock(&dev->struct_mutex);
 624         if (exec->bo) {
 625                 for (i = 0; i < exec->bo_count; i++)
 626                         drm_gem_object_unreference(&exec->bo[i]->base);
 627                 kfree(exec->bo);
 628         }
 629
 630         while (!list_empty(&exec->unref_list)) {
 631                 struct vc4_bo *bo = list_first_entry(&exec->unref_list,
 632                                                      struct vc4_bo, unref_head);
 633                 list_del(&bo->unref_head);
 634                 drm_gem_object_unreference(&bo->base.base);
 635         }
 636         mutex_unlock(&dev->struct_mutex);
 637
 638         kfree(exec);
 639 }
 640
 641 void
 642 vc4_job_handle_completed(struct vc4_dev *vc4)
 643 {
 644         unsigned long irqflags;
 645         struct vc4_seqno_cb *cb, *cb_temp;
 646
 647         spin_lock_irqsave(&vc4->job_lock, irqflags);
 648         while (!list_empty(&vc4->job_done_list)) {
 649                 struct vc4_exec_info *exec =
 650                         list_first_entry(&vc4->job_done_list,
 651                                          struct vc4_exec_info, head);
 652                 list_del(&exec->head);
 653
 654                 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 655                 vc4_complete_exec(vc4->dev, exec);
 656                 spin_lock_irqsave(&vc4->job_lock, irqflags);
 657         }
 658
 659         list_for_each_entry_safe(cb, cb_temp, &vc4->seqno_cb_list, work.entry) {
 660                 if (cb->seqno <= vc4->finished_seqno) {
 661                         list_del_init(&cb->work.entry);
 662                         schedule_work(&cb->work);
 663                 }
 664         }
 665
 666         spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 667 }
 668
 669 static void vc4_seqno_cb_work(struct work_struct *work)
 670 {
 671         struct vc4_seqno_cb *cb = container_of(work, struct vc4_seqno_cb, work);
 672
 673         cb->func(cb);
 674 }
 675
 676 int vc4_queue_seqno_cb(struct drm_device *dev,
 677                        struct vc4_seqno_cb *cb, uint64_t seqno,
 678                        void (*func)(struct vc4_seqno_cb *cb))
 679 {
 680         struct vc4_dev *vc4 = to_vc4_dev(dev);
 681         int ret = 0;
 682         unsigned long irqflags;
 683
 684         cb->func = func;
 685         INIT_WORK(&cb->work, vc4_seqno_cb_work);
 686
 687         spin_lock_irqsave(&vc4->job_lock, irqflags);
 688         if (seqno > vc4->finished_seqno) {
 689                 cb->seqno = seqno;
 690                 list_add_tail(&cb->work.entry, &vc4->seqno_cb_list);
 691         } else {
 692                 schedule_work(&cb->work);
 693         }
 694         spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 695
 696         return ret;
 697 }
 698
 699 /* Scheduled when any job has been completed, this walks the list of
 700  * jobs that had completed and unrefs their BOs and frees their exec
 701  * structs.
 702  */
 703 static void
 704 vc4_job_done_work(struct work_struct *work)
 705 {
 706         struct vc4_dev *vc4 =
 707                 container_of(work, struct vc4_dev, job_done_work);
 708
 709         vc4_job_handle_completed(vc4);
 710 }
 711
 712 static int
 713 vc4_wait_for_seqno_ioctl_helper(struct drm_device *dev,
 714                                 uint64_t seqno,
 715                                 uint64_t *timeout_ns)
 716 {
 717         unsigned long start = jiffies;
 718         int ret = vc4_wait_for_seqno(dev, seqno, *timeout_ns, true);
 719
 720         if ((ret == -EINTR || ret == -ERESTARTSYS) && *timeout_ns != ~0ull) {
 721                 uint64_t delta = jiffies_to_nsecs(jiffies - start);
 722
 723                 if (*timeout_ns >= delta)
 724                         *timeout_ns -= delta;
 725         }
 726
 727         return ret;
 728 }
 729
 730 int
 731 vc4_wait_seqno_ioctl(struct drm_device *dev, void *data,
 732                      struct drm_file *file_priv)
 733 {
 734         struct drm_vc4_wait_seqno *args = data;
 735
 736         return vc4_wait_for_seqno_ioctl_helper(dev, args->seqno,
 737                                                &args->timeout_ns);
 738 }
 739
 740 int
 741 vc4_wait_bo_ioctl(struct drm_device *dev, void *data,
 742                   struct drm_file *file_priv)
 743 {
 744         int ret;
 745         struct drm_vc4_wait_bo *args = data;
 746         struct drm_gem_object *gem_obj;
 747         struct vc4_bo *bo;
 748
 749         gem_obj = drm_gem_object_lookup(dev, file_priv, args->handle);
 750         if (!gem_obj) {
 751                 DRM_ERROR("Failed to look up GEM BO %d\n", args->handle);
 752                 return -EINVAL;
 753         }
 754         bo = to_vc4_bo(gem_obj);
 755
 756         ret = vc4_wait_for_seqno_ioctl_helper(dev, bo->seqno,
 757                                               &args->timeout_ns);
 758
 759         drm_gem_object_unreference_unlocked(gem_obj);
 760         return ret;
 761 }
 762
 763 /**
 764  * Submits a command list to the VC4.
 765  *
 766  * This is what is called batchbuffer emitting on other hardware.
 767  */
 768 int
 769 vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
 770                     struct drm_file *file_priv)
 771 {
 772         struct vc4_dev *vc4 = to_vc4_dev(dev);
 773         struct drm_vc4_submit_cl *args = data;
 774         struct vc4_exec_info *exec;
 775         int ret;
 776
 777         if ((args->flags & ~VC4_SUBMIT_CL_USE_CLEAR_COLOR) != 0) {
 778                 DRM_ERROR("Unknown flags: 0x%02x\n", args->flags);
 779                 return -EINVAL;
 780         }
 781
 782         exec = kcalloc(1, sizeof(*exec), GFP_KERNEL);
 783         if (!exec) {
 784                 DRM_ERROR("malloc failure on exec struct\n");
 785                 return -ENOMEM;
 786         }
 787
 788         exec->args = args;
 789         INIT_LIST_HEAD(&exec->unref_list);
 790
 791         ret = vc4_cl_lookup_bos(dev, file_priv, exec);
 792         if (ret)
 793                 goto fail;
 794
 795         if (exec->args->bin_cl_size != 0) {
 796                 ret = vc4_get_bcl(dev, exec);
 797                 if (ret)
 798                         goto fail;
 799         } else {
 800                 exec->ct0ca = 0;
 801                 exec->ct0ea = 0;
 802         }
 803
 804         ret = vc4_get_rcl(dev, exec);
 805         if (ret)
 806                 goto fail;
 807
 808         /* Clear this out of the struct we'll be putting in the queue,
 809          * since it's part of our stack.
 810          */
 811         exec->args = NULL;
 812
 813         vc4_queue_submit(dev, exec);
 814
 815         /* Return the seqno for our job. */
 816         args->seqno = vc4->emit_seqno;
 817
 818         return 0;
 819
 820 fail:
 821         vc4_complete_exec(vc4->dev, exec);
 822
 823         return ret;
 824 }
 825
 826 void
 827 vc4_gem_init(struct drm_device *dev)
 828 {
 829         struct vc4_dev *vc4 = to_vc4_dev(dev);
 830
 831         INIT_LIST_HEAD(&vc4->job_list);
 832         INIT_LIST_HEAD(&vc4->job_done_list);
 833         INIT_LIST_HEAD(&vc4->seqno_cb_list);
 834         spin_lock_init(&vc4->job_lock);
 835
 836         INIT_WORK(&vc4->hangcheck.reset_work, vc4_reset_work);
 837         setup_timer(&vc4->hangcheck.timer,
 838                     vc4_hangcheck_elapsed,
 839                     (unsigned long)dev);
 840
 841         INIT_WORK(&vc4->job_done_work, vc4_job_done_work);
 842 }
 843
 844 void
 845 vc4_gem_destroy(struct drm_device *dev)
 846 {
 847         struct vc4_dev *vc4 = to_vc4_dev(dev);
 848
 849         /* Waiting for exec to finish would need to be done before
 850          * unregistering V3D.
 851          */
 852         WARN_ON(vc4->emit_seqno != vc4->finished_seqno);
 853
 854         /* V3D should already have disabled its interrupt and cleared
 855          * the overflow allocation registers.  Now free the object.
 856          */
 857         if (vc4->overflow_mem) {
 858                 drm_gem_object_unreference_unlocked(&vc4->overflow_mem->base.base);
 859                 vc4->overflow_mem = NULL;
 860         }
 861
 862         vc4_bo_cache_destroy(dev);
 863
 864         if (vc4->hang_state)
 865                 vc4_free_hang_state(dev, vc4->hang_state);
 866 }