drivers/gpu/drm/i915/i915_gem_request.c

   1 /*
   2  * Copyright © 2008-2015 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  *
  23  */
  24
  25 #include "i915_drv.h"
  26
  27 static const char *i915_fence_get_driver_name(struct fence *fence)
  28 {
  29         return "i915";
  30 }
  31
  32 static const char *i915_fence_get_timeline_name(struct fence *fence)
  33 {
  34         /* Timelines are bound by eviction to a VM. However, since
  35          * we only have a global seqno at the moment, we only have
  36          * a single timeline. Note that each timeline will have
  37          * multiple execution contexts (fence contexts) as we allow
  38          * engines within a single timeline to execute in parallel.
  39          */
  40         return "global";
  41 }
  42
  43 static bool i915_fence_signaled(struct fence *fence)
  44 {
  45         return i915_gem_request_completed(to_request(fence));
  46 }
  47
  48 static bool i915_fence_enable_signaling(struct fence *fence)
  49 {
  50         if (i915_fence_signaled(fence))
  51                 return false;
  52
  53         intel_engine_enable_signaling(to_request(fence));
  54         return true;
  55 }
  56
  57 static signed long i915_fence_wait(struct fence *fence,
  58                                    bool interruptible,
  59                                    signed long timeout_jiffies)
  60 {
  61         s64 timeout_ns, *timeout;
  62         int ret;
  63
  64         if (timeout_jiffies != MAX_SCHEDULE_TIMEOUT) {
  65                 timeout_ns = jiffies_to_nsecs(timeout_jiffies);
  66                 timeout = &timeout_ns;
  67         } else {
  68                 timeout = NULL;
  69         }
  70
  71         ret = __i915_wait_request(to_request(fence),
  72                                   interruptible, timeout,
  73                                   NO_WAITBOOST);
  74         if (ret == -ETIME)
  75                 return 0;
  76
  77         if (ret < 0)
  78                 return ret;
  79
  80         if (timeout_jiffies != MAX_SCHEDULE_TIMEOUT)
  81                 timeout_jiffies = nsecs_to_jiffies(timeout_ns);
  82
  83         return timeout_jiffies;
  84 }
  85
  86 static void i915_fence_value_str(struct fence *fence, char *str, int size)
  87 {
  88         snprintf(str, size, "%u", fence->seqno);
  89 }
  90
  91 static void i915_fence_timeline_value_str(struct fence *fence, char *str,
  92                                           int size)
  93 {
  94         snprintf(str, size, "%u",
  95                  intel_engine_get_seqno(to_request(fence)->engine));
  96 }
  97
  98 static void i915_fence_release(struct fence *fence)
  99 {
 100         struct drm_i915_gem_request *req = to_request(fence);
 101
 102         kmem_cache_free(req->i915->requests, req);
 103 }
 104
 105 const struct fence_ops i915_fence_ops = {
 106         .get_driver_name = i915_fence_get_driver_name,
 107         .get_timeline_name = i915_fence_get_timeline_name,
 108         .enable_signaling = i915_fence_enable_signaling,
 109         .signaled = i915_fence_signaled,
 110         .wait = i915_fence_wait,
 111         .release = i915_fence_release,
 112         .fence_value_str = i915_fence_value_str,
 113         .timeline_value_str = i915_fence_timeline_value_str,
 114 };
 115
 116 int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
 117                                    struct drm_file *file)
 118 {
 119         struct drm_i915_private *dev_private;
 120         struct drm_i915_file_private *file_priv;
 121
 122         WARN_ON(!req || !file || req->file_priv);
 123
 124         if (!req || !file)
 125                 return -EINVAL;
 126
 127         if (req->file_priv)
 128                 return -EINVAL;
 129
 130         dev_private = req->i915;
 131         file_priv = file->driver_priv;
 132
 133         spin_lock(&file_priv->mm.lock);
 134         req->file_priv = file_priv;
 135         list_add_tail(&req->client_list, &file_priv->mm.request_list);
 136         spin_unlock(&file_priv->mm.lock);
 137
 138         req->pid = get_pid(task_pid(current));
 139
 140         return 0;
 141 }
 142
 143 static inline void
 144 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
 145 {
 146         struct drm_i915_file_private *file_priv = request->file_priv;
 147
 148         if (!file_priv)
 149                 return;
 150
 151         spin_lock(&file_priv->mm.lock);
 152         list_del(&request->client_list);
 153         request->file_priv = NULL;
 154         spin_unlock(&file_priv->mm.lock);
 155
 156         put_pid(request->pid);
 157         request->pid = NULL;
 158 }
 159
 160 static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 161 {
 162         trace_i915_gem_request_retire(request);
 163         list_del_init(&request->list);
 164
 165         /* We know the GPU must have read the request to have
 166          * sent us the seqno + interrupt, so use the position
 167          * of tail of the request to update the last known position
 168          * of the GPU head.
 169          *
 170          * Note this requires that we are always called in request
 171          * completion order.
 172          */
 173         request->ringbuf->last_retired_head = request->postfix;
 174
 175         i915_gem_request_remove_from_client(request);
 176
 177         if (request->previous_context) {
 178                 if (i915.enable_execlists)
 179                         intel_lr_context_unpin(request->previous_context,
 180                                                request->engine);
 181         }
 182
 183         i915_gem_context_unreference(request->ctx);
 184         i915_gem_request_unreference(request);
 185 }
 186
 187 void i915_gem_request_retire_upto(struct drm_i915_gem_request *req)
 188 {
 189         struct intel_engine_cs *engine = req->engine;
 190         struct drm_i915_gem_request *tmp;
 191
 192         lockdep_assert_held(&req->i915->drm.struct_mutex);
 193
 194         if (list_empty(&req->list))
 195                 return;
 196
 197         do {
 198                 tmp = list_first_entry(&engine->request_list,
 199                                        typeof(*tmp), list);
 200
 201                 i915_gem_request_retire(tmp);
 202         } while (tmp != req);
 203
 204         WARN_ON(i915_verify_lists(engine->dev));
 205 }
 206
 207 static int i915_gem_check_wedge(unsigned int reset_counter, bool interruptible)
 208 {
 209         if (__i915_terminally_wedged(reset_counter))
 210                 return -EIO;
 211
 212         if (__i915_reset_in_progress(reset_counter)) {
 213                 /* Non-interruptible callers can't handle -EAGAIN, hence return
 214                  * -EIO unconditionally for these.
 215                  */
 216                 if (!interruptible)
 217                         return -EIO;
 218
 219                 return -EAGAIN;
 220         }
 221
 222         return 0;
 223 }
 224
 225 static int i915_gem_init_seqno(struct drm_i915_private *dev_priv, u32 seqno)
 226 {
 227         struct intel_engine_cs *engine;
 228         int ret;
 229
 230         /* Carefully retire all requests without writing to the rings */
 231         for_each_engine(engine, dev_priv) {
 232                 ret = intel_engine_idle(engine);
 233                 if (ret)
 234                         return ret;
 235         }
 236         i915_gem_retire_requests(dev_priv);
 237
 238         /* If the seqno wraps around, we need to clear the breadcrumb rbtree */
 239         if (!i915_seqno_passed(seqno, dev_priv->next_seqno)) {
 240                 while (intel_kick_waiters(dev_priv) ||
 241                        intel_kick_signalers(dev_priv))
 242                         yield();
 243         }
 244
 245         /* Finally reset hw state */
 246         for_each_engine(engine, dev_priv)
 247                 intel_ring_init_seqno(engine, seqno);
 248
 249         return 0;
 250 }
 251
 252 int i915_gem_set_seqno(struct drm_device *dev, u32 seqno)
 253 {
 254         struct drm_i915_private *dev_priv = to_i915(dev);
 255         int ret;
 256
 257         if (seqno == 0)
 258                 return -EINVAL;
 259
 260         /* HWS page needs to be set less than what we
 261          * will inject to ring
 262          */
 263         ret = i915_gem_init_seqno(dev_priv, seqno - 1);
 264         if (ret)
 265                 return ret;
 266
 267         /* Carefully set the last_seqno value so that wrap
 268          * detection still works
 269          */
 270         dev_priv->next_seqno = seqno;
 271         dev_priv->last_seqno = seqno - 1;
 272         if (dev_priv->last_seqno == 0)
 273                 dev_priv->last_seqno--;
 274
 275         return 0;
 276 }
 277
 278 static int i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno)
 279 {
 280         /* reserve 0 for non-seqno */
 281         if (unlikely(dev_priv->next_seqno == 0)) {
 282                 int ret;
 283
 284                 ret = i915_gem_init_seqno(dev_priv, 0);
 285                 if (ret)
 286                         return ret;
 287
 288                 dev_priv->next_seqno = 1;
 289         }
 290
 291         *seqno = dev_priv->last_seqno = dev_priv->next_seqno++;
 292         return 0;
 293 }
 294
 295 static inline int
 296 __i915_gem_request_alloc(struct intel_engine_cs *engine,
 297                          struct i915_gem_context *ctx,
 298                          struct drm_i915_gem_request **req_out)
 299 {
 300         struct drm_i915_private *dev_priv = engine->i915;
 301         unsigned int reset_counter = i915_reset_counter(&dev_priv->gpu_error);
 302         struct drm_i915_gem_request *req;
 303         u32 seqno;
 304         int ret;
 305
 306         if (!req_out)
 307                 return -EINVAL;
 308
 309         *req_out = NULL;
 310
 311         /* ABI: Before userspace accesses the GPU (e.g. execbuffer), report
 312          * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex
 313          * and restart.
 314          */
 315         ret = i915_gem_check_wedge(reset_counter, dev_priv->mm.interruptible);
 316         if (ret)
 317                 return ret;
 318
 319         /* Move the oldest request to the slab-cache (if not in use!) */
 320         if (!list_empty(&engine->request_list)) {
 321                 req = list_first_entry(&engine->request_list,
 322                                        typeof(*req), list);
 323                 if (i915_gem_request_completed(req))
 324                         i915_gem_request_retire(req);
 325         }
 326
 327         req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL);
 328         if (!req)
 329                 return -ENOMEM;
 330
 331         ret = i915_gem_get_seqno(dev_priv, &seqno);
 332         if (ret)
 333                 goto err;
 334
 335         spin_lock_init(&req->lock);
 336         fence_init(&req->fence,
 337                    &i915_fence_ops,
 338                    &req->lock,
 339                    engine->fence_context,
 340                    seqno);
 341
 342         req->i915 = dev_priv;
 343         req->engine = engine;
 344         req->ctx = ctx;
 345         i915_gem_context_reference(ctx);
 346
 347         /*
 348          * Reserve space in the ring buffer for all the commands required to
 349          * eventually emit this request. This is to guarantee that the
 350          * i915_add_request() call can't fail. Note that the reserve may need
 351          * to be redone if the request is not actually submitted straight
 352          * away, e.g. because a GPU scheduler has deferred it.
 353          */
 354         req->reserved_space = MIN_SPACE_FOR_ADD_REQUEST;
 355
 356         if (i915.enable_execlists)
 357                 ret = intel_logical_ring_alloc_request_extras(req);
 358         else
 359                 ret = intel_ring_alloc_request_extras(req);
 360         if (ret)
 361                 goto err_ctx;
 362
 363         *req_out = req;
 364         return 0;
 365
 366 err_ctx:
 367         i915_gem_context_unreference(ctx);
 368 err:
 369         kmem_cache_free(dev_priv->requests, req);
 370         return ret;
 371 }
 372
 373 /**
 374  * i915_gem_request_alloc - allocate a request structure
 375  *
 376  * @engine: engine that we wish to issue the request on.
 377  * @ctx: context that the request will be associated with.
 378  *       This can be NULL if the request is not directly related to
 379  *       any specific user context, in which case this function will
 380  *       choose an appropriate context to use.
 381  *
 382  * Returns a pointer to the allocated request if successful,
 383  * or an error code if not.
 384  */
 385 struct drm_i915_gem_request *
 386 i915_gem_request_alloc(struct intel_engine_cs *engine,
 387                        struct i915_gem_context *ctx)
 388 {
 389         struct drm_i915_gem_request *req;
 390         int err;
 391
 392         if (!ctx)
 393                 ctx = engine->i915->kernel_context;
 394         err = __i915_gem_request_alloc(engine, ctx, &req);
 395         return err ? ERR_PTR(err) : req;
 396 }
 397
 398 static void i915_gem_mark_busy(const struct intel_engine_cs *engine)
 399 {
 400         struct drm_i915_private *dev_priv = engine->i915;
 401
 402         dev_priv->gt.active_engines |= intel_engine_flag(engine);
 403         if (dev_priv->gt.awake)
 404                 return;
 405
 406         intel_runtime_pm_get_noresume(dev_priv);
 407         dev_priv->gt.awake = true;
 408
 409         intel_enable_gt_powersave(dev_priv);
 410         i915_update_gfx_val(dev_priv);
 411         if (INTEL_GEN(dev_priv) >= 6)
 412                 gen6_rps_busy(dev_priv);
 413
 414         queue_delayed_work(dev_priv->wq,
 415                            &dev_priv->gt.retire_work,
 416                            round_jiffies_up_relative(HZ));
 417 }
 418
 419 /*
 420  * NB: This function is not allowed to fail. Doing so would mean the the
 421  * request is not being tracked for completion but the work itself is
 422  * going to happen on the hardware. This would be a Bad Thing(tm).
 423  */
 424 void __i915_add_request(struct drm_i915_gem_request *request,
 425                         struct drm_i915_gem_object *obj,
 426                         bool flush_caches)
 427 {
 428         struct intel_engine_cs *engine;
 429         struct intel_ringbuffer *ringbuf;
 430         u32 request_start;
 431         u32 reserved_tail;
 432         int ret;
 433
 434         if (WARN_ON(!request))
 435                 return;
 436
 437         engine = request->engine;
 438         ringbuf = request->ringbuf;
 439
 440         /*
 441          * To ensure that this call will not fail, space for its emissions
 442          * should already have been reserved in the ring buffer. Let the ring
 443          * know that it is time to use that space up.
 444          */
 445         request_start = intel_ring_get_tail(ringbuf);
 446         reserved_tail = request->reserved_space;
 447         request->reserved_space = 0;
 448
 449         /*
 450          * Emit any outstanding flushes - execbuf can fail to emit the flush
 451          * after having emitted the batchbuffer command. Hence we need to fix
 452          * things up similar to emitting the lazy request. The difference here
 453          * is that the flush _must_ happen before the next request, no matter
 454          * what.
 455          */
 456         if (flush_caches) {
 457                 if (i915.enable_execlists)
 458                         ret = logical_ring_flush_all_caches(request);
 459                 else
 460                         ret = intel_ring_flush_all_caches(request);
 461                 /* Not allowed to fail! */
 462                 WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret);
 463         }
 464
 465         trace_i915_gem_request_add(request);
 466
 467         request->head = request_start;
 468
 469         /* Whilst this request exists, batch_obj will be on the
 470          * active_list, and so will hold the active reference. Only when this
 471          * request is retired will the the batch_obj be moved onto the
 472          * inactive_list and lose its active reference. Hence we do not need
 473          * to explicitly hold another reference here.
 474          */
 475         request->batch_obj = obj;
 476
 477         /* Seal the request and mark it as pending execution. Note that
 478          * we may inspect this state, without holding any locks, during
 479          * hangcheck. Hence we apply the barrier to ensure that we do not
 480          * see a more recent value in the hws than we are tracking.
 481          */
 482         request->emitted_jiffies = jiffies;
 483         request->previous_seqno = engine->last_submitted_seqno;
 484         smp_store_mb(engine->last_submitted_seqno, request->fence.seqno);
 485         list_add_tail(&request->list, &engine->request_list);
 486
 487         /* Record the position of the start of the request so that
 488          * should we detect the updated seqno part-way through the
 489          * GPU processing the request, we never over-estimate the
 490          * position of the head.
 491          */
 492         request->postfix = intel_ring_get_tail(ringbuf);
 493
 494         if (i915.enable_execlists) {
 495                 ret = engine->emit_request(request);
 496         } else {
 497                 ret = engine->add_request(request);
 498
 499                 request->tail = intel_ring_get_tail(ringbuf);
 500         }
 501         /* Not allowed to fail! */
 502         WARN(ret, "emit|add_request failed: %d!\n", ret);
 503         /* Sanity check that the reserved size was large enough. */
 504         ret = intel_ring_get_tail(ringbuf) - request_start;
 505         if (ret < 0)
 506                 ret += ringbuf->size;
 507         WARN_ONCE(ret > reserved_tail,
 508                   "Not enough space reserved (%d bytes) "
 509                   "for adding the request (%d bytes)\n",
 510                   reserved_tail, ret);
 511
 512         i915_gem_mark_busy(engine);
 513 }
 514
 515 static unsigned long local_clock_us(unsigned int *cpu)
 516 {
 517         unsigned long t;
 518
 519         /* Cheaply and approximately convert from nanoseconds to microseconds.
 520          * The result and subsequent calculations are also defined in the same
 521          * approximate microseconds units. The principal source of timing
 522          * error here is from the simple truncation.
 523          *
 524          * Note that local_clock() is only defined wrt to the current CPU;
 525          * the comparisons are no longer valid if we switch CPUs. Instead of
 526          * blocking preemption for the entire busywait, we can detect the CPU
 527          * switch and use that as indicator of system load and a reason to
 528          * stop busywaiting, see busywait_stop().
 529          */
 530         *cpu = get_cpu();
 531         t = local_clock() >> 10;
 532         put_cpu();
 533
 534         return t;
 535 }
 536
 537 static bool busywait_stop(unsigned long timeout, unsigned int cpu)
 538 {
 539         unsigned int this_cpu;
 540
 541         if (time_after(local_clock_us(&this_cpu), timeout))
 542                 return true;
 543
 544         return this_cpu != cpu;
 545 }
 546
 547 bool __i915_spin_request(const struct drm_i915_gem_request *req,
 548                          int state, unsigned long timeout_us)
 549 {
 550         unsigned int cpu;
 551
 552         /* When waiting for high frequency requests, e.g. during synchronous
 553          * rendering split between the CPU and GPU, the finite amount of time
 554          * required to set up the irq and wait upon it limits the response
 555          * rate. By busywaiting on the request completion for a short while we
 556          * can service the high frequency waits as quick as possible. However,
 557          * if it is a slow request, we want to sleep as quickly as possible.
 558          * The tradeoff between waiting and sleeping is roughly the time it
 559          * takes to sleep on a request, on the order of a microsecond.
 560          */
 561
 562         timeout_us += local_clock_us(&cpu);
 563         do {
 564                 if (i915_gem_request_completed(req))
 565                         return true;
 566
 567                 if (signal_pending_state(state, current))
 568                         break;
 569
 570                 if (busywait_stop(timeout_us, cpu))
 571                         break;
 572
 573                 cpu_relax_lowlatency();
 574         } while (!need_resched());
 575
 576         return false;
 577 }
 578
 579 /**
 580  * __i915_wait_request - wait until execution of request has finished
 581  * @req: duh!
 582  * @interruptible: do an interruptible wait (normally yes)
 583  * @timeout: in - how long to wait (NULL forever); out - how much time remaining
 584  * @rps: client to charge for RPS boosting
 585  *
 586  * Note: It is of utmost importance that the passed in seqno and reset_counter
 587  * values have been read by the caller in an smp safe manner. Where read-side
 588  * locks are involved, it is sufficient to read the reset_counter before
 589  * unlocking the lock that protects the seqno. For lockless tricks, the
 590  * reset_counter _must_ be read before, and an appropriate smp_rmb must be
 591  * inserted.
 592  *
 593  * Returns 0 if the request was found within the alloted time. Else returns the
 594  * errno with remaining time filled in timeout argument.
 595  */
 596 int __i915_wait_request(struct drm_i915_gem_request *req,
 597                         bool interruptible,
 598                         s64 *timeout,
 599                         struct intel_rps_client *rps)
 600 {
 601         int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
 602         DEFINE_WAIT(reset);
 603         struct intel_wait wait;
 604         unsigned long timeout_remain;
 605         int ret = 0;
 606
 607         might_sleep();
 608
 609         if (list_empty(&req->list))
 610                 return 0;
 611
 612         if (i915_gem_request_completed(req))
 613                 return 0;
 614
 615         timeout_remain = MAX_SCHEDULE_TIMEOUT;
 616         if (timeout) {
 617                 if (WARN_ON(*timeout < 0))
 618                         return -EINVAL;
 619
 620                 if (*timeout == 0)
 621                         return -ETIME;
 622
 623                 /* Record current time in case interrupted, or wedged */
 624                 timeout_remain = nsecs_to_jiffies_timeout(*timeout);
 625                 *timeout += ktime_get_raw_ns();
 626         }
 627
 628         trace_i915_gem_request_wait_begin(req);
 629
 630         /* This client is about to stall waiting for the GPU. In many cases
 631          * this is undesirable and limits the throughput of the system, as
 632          * many clients cannot continue processing user input/output whilst
 633          * blocked. RPS autotuning may take tens of milliseconds to respond
 634          * to the GPU load and thus incurs additional latency for the client.
 635          * We can circumvent that by promoting the GPU frequency to maximum
 636          * before we wait. This makes the GPU throttle up much more quickly
 637          * (good for benchmarks and user experience, e.g. window animations),
 638          * but at a cost of spending more power processing the workload
 639          * (bad for battery). Not all clients even want their results
 640          * immediately and for them we should just let the GPU select its own
 641          * frequency to maximise efficiency. To prevent a single client from
 642          * forcing the clocks too high for the whole system, we only allow
 643          * each client to waitboost once in a busy period.
 644          */
 645         if (IS_RPS_CLIENT(rps) && INTEL_GEN(req->i915) >= 6)
 646                 gen6_rps_boost(req->i915, rps, req->emitted_jiffies);
 647
 648         /* Optimistic spin for the next ~jiffie before touching IRQs */
 649         if (i915_spin_request(req, state, 5))
 650                 goto complete;
 651
 652         set_current_state(state);
 653         add_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
 654
 655         intel_wait_init(&wait, req->fence.seqno);
 656         if (intel_engine_add_wait(req->engine, &wait))
 657                 /* In order to check that we haven't missed the interrupt
 658                  * as we enabled it, we need to kick ourselves to do a
 659                  * coherent check on the seqno before we sleep.
 660                  */
 661                 goto wakeup;
 662
 663         for (;;) {
 664                 if (signal_pending_state(state, current)) {
 665                         ret = -ERESTARTSYS;
 666                         break;
 667                 }
 668
 669                 timeout_remain = io_schedule_timeout(timeout_remain);
 670                 if (timeout_remain == 0) {
 671                         ret = -ETIME;
 672                         break;
 673                 }
 674
 675                 if (intel_wait_complete(&wait))
 676                         break;
 677
 678                 set_current_state(state);
 679
 680 wakeup:
 681                 /* Carefully check if the request is complete, giving time
 682                  * for the seqno to be visible following the interrupt.
 683                  * We also have to check in case we are kicked by the GPU
 684                  * reset in order to drop the struct_mutex.
 685                  */
 686                 if (__i915_request_irq_complete(req))
 687                         break;
 688
 689                 /* Only spin if we know the GPU is processing this request */
 690                 if (i915_spin_request(req, state, 2))
 691                         break;
 692         }
 693         remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
 694
 695         intel_engine_remove_wait(req->engine, &wait);
 696         __set_current_state(TASK_RUNNING);
 697 complete:
 698         trace_i915_gem_request_wait_end(req);
 699
 700         if (timeout) {
 701                 *timeout -= ktime_get_raw_ns();
 702                 if (*timeout < 0)
 703                         *timeout = 0;
 704
 705                 /*
 706                  * Apparently ktime isn't accurate enough and occasionally has a
 707                  * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
 708                  * things up to make the test happy. We allow up to 1 jiffy.
 709                  *
 710                  * This is a regrssion from the timespec->ktime conversion.
 711                  */
 712                 if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000)
 713                         *timeout = 0;
 714         }
 715
 716         if (IS_RPS_USER(rps) &&
 717             req->fence.seqno == req->engine->last_submitted_seqno) {
 718                 /* The GPU is now idle and this client has stalled.
 719                  * Since no other client has submitted a request in the
 720                  * meantime, assume that this client is the only one
 721                  * supplying work to the GPU but is unable to keep that
 722                  * work supplied because it is waiting. Since the GPU is
 723                  * then never kept fully busy, RPS autoclocking will
 724                  * keep the clocks relatively low, causing further delays.
 725                  * Compensate by giving the synchronous client credit for
 726                  * a waitboost next time.
 727                  */
 728                 spin_lock(&req->i915->rps.client_lock);
 729                 list_del_init(&rps->link);
 730                 spin_unlock(&req->i915->rps.client_lock);
 731         }
 732
 733         return ret;
 734 }
 735
 736 /**
 737  * Waits for a request to be signaled, and cleans up the
 738  * request and object lists appropriately for that event.
 739  */
 740 int i915_wait_request(struct drm_i915_gem_request *req)
 741 {
 742         int ret;
 743
 744         GEM_BUG_ON(!req);
 745         lockdep_assert_held(&req->i915->drm.struct_mutex);
 746
 747         ret = __i915_wait_request(req, req->i915->mm.interruptible, NULL, NULL);
 748         if (ret)
 749                 return ret;
 750
 751         /* If the GPU hung, we want to keep the requests to find the guilty. */
 752         if (!i915_reset_in_progress(&req->i915->gpu_error))
 753                 i915_gem_request_retire_upto(req);
 754
 755         return 0;
 756 }