drivers/gpu/drm/i915/i915_gem_context.c

   1 /*
   2  * Copyright © 2011-2012 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  *
  23  * Authors:
  24  *    Ben Widawsky <ben@bwidawsk.net>
  25  *
  26  */
  27
  28 /*
  29  * This file implements HW context support. On gen5+ a HW context consists of an
  30  * opaque GPU object which is referenced at times of context saves and restores.
  31  * With RC6 enabled, the context is also referenced as the GPU enters and exists
  32  * from RC6 (GPU has it's own internal power context, except on gen5). Though
  33  * something like a context does exist for the media ring, the code only
  34  * supports contexts for the render ring.
  35  *
  36  * In software, there is a distinction between contexts created by the user,
  37  * and the default HW context. The default HW context is used by GPU clients
  38  * that do not request setup of their own hardware context. The default
  39  * context's state is never restored to help prevent programming errors. This
  40  * would happen if a client ran and piggy-backed off another clients GPU state.
  41  * The default context only exists to give the GPU some offset to load as the
  42  * current to invoke a save of the context we actually care about. In fact, the
  43  * code could likely be constructed, albeit in a more complicated fashion, to
  44  * never use the default context, though that limits the driver's ability to
  45  * swap out, and/or destroy other contexts.
  46  *
  47  * All other contexts are created as a request by the GPU client. These contexts
  48  * store GPU state, and thus allow GPU clients to not re-emit state (and
  49  * potentially query certain state) at any time. The kernel driver makes
  50  * certain that the appropriate commands are inserted.
  51  *
  52  * The context life cycle is semi-complicated in that context BOs may live
  53  * longer than the context itself because of the way the hardware, and object
  54  * tracking works. Below is a very crude representation of the state machine
  55  * describing the context life.
  56  *                                         refcount     pincount     active
  57  * S0: initial state                          0            0           0
  58  * S1: context created                        1            0           0
  59  * S2: context is currently running           2            1           X
  60  * S3: GPU referenced, but not current        2            0           1
  61  * S4: context is current, but destroyed      1            1           0
  62  * S5: like S3, but destroyed                 1            0           1
  63  *
  64  * The most common (but not all) transitions:
  65  * S0->S1: client creates a context
  66  * S1->S2: client submits execbuf with context
  67  * S2->S3: other clients submits execbuf with context
  68  * S3->S1: context object was retired
  69  * S3->S2: clients submits another execbuf
  70  * S2->S4: context destroy called with current context
  71  * S3->S5->S0: destroy path
  72  * S4->S5->S0: destroy path on current context
  73  *
  74  * There are two confusing terms used above:
  75  *  The "current context" means the context which is currently running on the
  76  *  GPU. The GPU has loaded its state already and has stored away the gtt
  77  *  offset of the BO. The GPU is not actively referencing the data at this
  78  *  offset, but it will on the next context switch. The only way to avoid this
  79  *  is to do a GPU reset.
  80  *
  81  *  An "active context' is one which was previously the "current context" and is
  82  *  on the active list waiting for the next context switch to occur. Until this
  83  *  happens, the object must remain at the same gtt offset. It is therefore
  84  *  possible to destroy a context, but it is still active.
  85  *
  86  */
  87
  88 #include <drm/drmP.h>
  89 #include <drm/i915_drm.h>
  90 #include "i915_drv.h"
  91 #include "i915_trace.h"
  92
  93 #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
  94
  95 /* This is a HW constraint. The value below is the largest known requirement
  96  * I've seen in a spec to date, and that was a workaround for a non-shipping
  97  * part. It should be safe to decrease this, but it's more future proof as is.
  98  */
  99 #define GEN6_CONTEXT_ALIGN (64<<10)
 100 #define GEN7_CONTEXT_ALIGN 4096
 101
 102 static size_t get_context_alignment(struct drm_i915_private *dev_priv)
 103 {
 104         if (IS_GEN6(dev_priv))
 105                 return GEN6_CONTEXT_ALIGN;
 106
 107         return GEN7_CONTEXT_ALIGN;
 108 }
 109
 110 static int get_context_size(struct drm_i915_private *dev_priv)
 111 {
 112         int ret;
 113         u32 reg;
 114
 115         switch (INTEL_GEN(dev_priv)) {
 116         case 6:
 117                 reg = I915_READ(CXT_SIZE);
 118                 ret = GEN6_CXT_TOTAL_SIZE(reg) * 64;
 119                 break;
 120         case 7:
 121                 reg = I915_READ(GEN7_CXT_SIZE);
 122                 if (IS_HASWELL(dev_priv))
 123                         ret = HSW_CXT_TOTAL_SIZE;
 124                 else
 125                         ret = GEN7_CXT_TOTAL_SIZE(reg) * 64;
 126                 break;
 127         case 8:
 128                 ret = GEN8_CXT_TOTAL_SIZE;
 129                 break;
 130         default:
 131                 BUG();
 132         }
 133
 134         return ret;
 135 }
 136
 137 static void i915_gem_context_clean(struct i915_gem_context *ctx)
 138 {
 139         struct i915_hw_ppgtt *ppgtt = ctx->ppgtt;
 140         struct i915_vma *vma, *next;
 141
 142         if (!ppgtt)
 143                 return;
 144
 145         list_for_each_entry_safe(vma, next, &ppgtt->base.inactive_list,
 146                                  vm_link) {
 147                 if (WARN_ON(__i915_vma_unbind_no_wait(vma)))
 148                         break;
 149         }
 150 }
 151
 152 void i915_gem_context_free(struct kref *ctx_ref)
 153 {
 154         struct i915_gem_context *ctx = container_of(ctx_ref, typeof(*ctx), ref);
 155
 156         lockdep_assert_held(&ctx->i915->dev->struct_mutex);
 157         trace_i915_context_free(ctx);
 158
 159         if (i915.enable_execlists)
 160                 intel_lr_context_free(ctx);
 161
 162         /*
 163          * This context is going away and we need to remove all VMAs still
 164          * around. This is to handle imported shared objects for which
 165          * destructor did not run when their handles were closed.
 166          */
 167         i915_gem_context_clean(ctx);
 168
 169         i915_ppgtt_put(ctx->ppgtt);
 170
 171         if (ctx->legacy_hw_ctx.rcs_state)
 172                 drm_gem_object_unreference(&ctx->legacy_hw_ctx.rcs_state->base);
 173         list_del(&ctx->link);
 174
 175         ida_simple_remove(&ctx->i915->context_hw_ida, ctx->hw_id);
 176         kfree(ctx);
 177 }
 178
 179 struct drm_i915_gem_object *
 180 i915_gem_alloc_context_obj(struct drm_device *dev, size_t size)
 181 {
 182         struct drm_i915_gem_object *obj;
 183         int ret;
 184
 185         lockdep_assert_held(&dev->struct_mutex);
 186
 187         obj = i915_gem_object_create(dev, size);
 188         if (IS_ERR(obj))
 189                 return obj;
 190
 191         /*
 192          * Try to make the context utilize L3 as well as LLC.
 193          *
 194          * On VLV we don't have L3 controls in the PTEs so we
 195          * shouldn't touch the cache level, especially as that
 196          * would make the object snooped which might have a
 197          * negative performance impact.
 198          *
 199          * Snooping is required on non-llc platforms in execlist
 200          * mode, but since all GGTT accesses use PAT entry 0 we
 201          * get snooping anyway regardless of cache_level.
 202          *
 203          * This is only applicable for Ivy Bridge devices since
 204          * later platforms don't have L3 control bits in the PTE.
 205          */
 206         if (IS_IVYBRIDGE(dev)) {
 207                 ret = i915_gem_object_set_cache_level(obj, I915_CACHE_L3_LLC);
 208                 /* Failure shouldn't ever happen this early */
 209                 if (WARN_ON(ret)) {
 210                         drm_gem_object_unreference(&obj->base);
 211                         return ERR_PTR(ret);
 212                 }
 213         }
 214
 215         return obj;
 216 }
 217
 218 static int assign_hw_id(struct drm_i915_private *dev_priv, unsigned *out)
 219 {
 220         int ret;
 221
 222         ret = ida_simple_get(&dev_priv->context_hw_ida,
 223                              0, MAX_CONTEXT_HW_ID, GFP_KERNEL);
 224         if (ret < 0) {
 225                 /* Contexts are only released when no longer active.
 226                  * Flush any pending retires to hopefully release some
 227                  * stale contexts and try again.
 228                  */
 229                 i915_gem_retire_requests(dev_priv);
 230                 ret = ida_simple_get(&dev_priv->context_hw_ida,
 231                                      0, MAX_CONTEXT_HW_ID, GFP_KERNEL);
 232                 if (ret < 0)
 233                         return ret;
 234         }
 235
 236         *out = ret;
 237         return 0;
 238 }
 239
 240 static struct i915_gem_context *
 241 __create_hw_context(struct drm_device *dev,
 242                     struct drm_i915_file_private *file_priv)
 243 {
 244         struct drm_i915_private *dev_priv = dev->dev_private;
 245         struct i915_gem_context *ctx;
 246         int ret;
 247
 248         ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
 249         if (ctx == NULL)
 250                 return ERR_PTR(-ENOMEM);
 251
 252         ret = assign_hw_id(dev_priv, &ctx->hw_id);
 253         if (ret) {
 254                 kfree(ctx);
 255                 return ERR_PTR(ret);
 256         }
 257
 258         kref_init(&ctx->ref);
 259         list_add_tail(&ctx->link, &dev_priv->context_list);
 260         ctx->i915 = dev_priv;
 261
 262         if (dev_priv->hw_context_size) {
 263                 struct drm_i915_gem_object *obj =
 264                                 i915_gem_alloc_context_obj(dev, dev_priv->hw_context_size);
 265                 if (IS_ERR(obj)) {
 266                         ret = PTR_ERR(obj);
 267                         goto err_out;
 268                 }
 269                 ctx->legacy_hw_ctx.rcs_state = obj;
 270         }
 271
 272         /* Default context will never have a file_priv */
 273         if (file_priv != NULL) {
 274                 ret = idr_alloc(&file_priv->context_idr, ctx,
 275                                 DEFAULT_CONTEXT_HANDLE, 0, GFP_KERNEL);
 276                 if (ret < 0)
 277                         goto err_out;
 278         } else
 279                 ret = DEFAULT_CONTEXT_HANDLE;
 280
 281         ctx->file_priv = file_priv;
 282         ctx->user_handle = ret;
 283         /* NB: Mark all slices as needing a remap so that when the context first
 284          * loads it will restore whatever remap state already exists. If there
 285          * is no remap info, it will be a NOP. */
 286         ctx->remap_slice = ALL_L3_SLICES(dev_priv);
 287
 288         ctx->hang_stats.ban_period_seconds = DRM_I915_CTX_BAN_PERIOD;
 289
 290         return ctx;
 291
 292 err_out:
 293         i915_gem_context_unreference(ctx);
 294         return ERR_PTR(ret);
 295 }
 296
 297 /**
 298  * The default context needs to exist per ring that uses contexts. It stores the
 299  * context state of the GPU for applications that don't utilize HW contexts, as
 300  * well as an idle case.
 301  */
 302 static struct i915_gem_context *
 303 i915_gem_create_context(struct drm_device *dev,
 304                         struct drm_i915_file_private *file_priv)
 305 {
 306         struct i915_gem_context *ctx;
 307
 308         lockdep_assert_held(&dev->struct_mutex);
 309
 310         ctx = __create_hw_context(dev, file_priv);
 311         if (IS_ERR(ctx))
 312                 return ctx;
 313
 314         if (USES_FULL_PPGTT(dev)) {
 315                 struct i915_hw_ppgtt *ppgtt = i915_ppgtt_create(dev, file_priv);
 316
 317                 if (IS_ERR(ppgtt)) {
 318                         DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n",
 319                                          PTR_ERR(ppgtt));
 320                         idr_remove(&file_priv->context_idr, ctx->user_handle);
 321                         i915_gem_context_unreference(ctx);
 322                         return ERR_CAST(ppgtt);
 323                 }
 324
 325                 ctx->ppgtt = ppgtt;
 326         }
 327
 328         trace_i915_context_create(ctx);
 329
 330         return ctx;
 331 }
 332
 333 static void i915_gem_context_unpin(struct i915_gem_context *ctx,
 334                                    struct intel_engine_cs *engine)
 335 {
 336         if (i915.enable_execlists) {
 337                 intel_lr_context_unpin(ctx, engine);
 338         } else {
 339                 if (engine->id == RCS && ctx->legacy_hw_ctx.rcs_state)
 340                         i915_gem_object_ggtt_unpin(ctx->legacy_hw_ctx.rcs_state);
 341                 i915_gem_context_unreference(ctx);
 342         }
 343 }
 344
 345 void i915_gem_context_reset(struct drm_device *dev)
 346 {
 347         struct drm_i915_private *dev_priv = dev->dev_private;
 348
 349         lockdep_assert_held(&dev->struct_mutex);
 350
 351         if (i915.enable_execlists) {
 352                 struct i915_gem_context *ctx;
 353
 354                 list_for_each_entry(ctx, &dev_priv->context_list, link)
 355                         intel_lr_context_reset(dev_priv, ctx);
 356         }
 357
 358         i915_gem_context_lost(dev_priv);
 359 }
 360
 361 int i915_gem_context_init(struct drm_device *dev)
 362 {
 363         struct drm_i915_private *dev_priv = dev->dev_private;
 364         struct i915_gem_context *ctx;
 365
 366         /* Init should only be called once per module load. Eventually the
 367          * restriction on the context_disabled check can be loosened. */
 368         if (WARN_ON(dev_priv->kernel_context))
 369                 return 0;
 370
 371         if (intel_vgpu_active(dev_priv) &&
 372             HAS_LOGICAL_RING_CONTEXTS(dev_priv)) {
 373                 if (!i915.enable_execlists) {
 374                         DRM_INFO("Only EXECLIST mode is supported in vgpu.\n");
 375                         return -EINVAL;
 376                 }
 377         }
 378
 379         /* Using the simple ida interface, the max is limited by sizeof(int) */
 380         BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX);
 381         ida_init(&dev_priv->context_hw_ida);
 382
 383         if (i915.enable_execlists) {
 384                 /* NB: intentionally left blank. We will allocate our own
 385                  * backing objects as we need them, thank you very much */
 386                 dev_priv->hw_context_size = 0;
 387         } else if (HAS_HW_CONTEXTS(dev_priv)) {
 388                 dev_priv->hw_context_size =
 389                         round_up(get_context_size(dev_priv), 4096);
 390                 if (dev_priv->hw_context_size > (1<<20)) {
 391                         DRM_DEBUG_DRIVER("Disabling HW Contexts; invalid size %d\n",
 392                                          dev_priv->hw_context_size);
 393                         dev_priv->hw_context_size = 0;
 394                 }
 395         }
 396
 397         ctx = i915_gem_create_context(dev, NULL);
 398         if (IS_ERR(ctx)) {
 399                 DRM_ERROR("Failed to create default global context (error %ld)\n",
 400                           PTR_ERR(ctx));
 401                 return PTR_ERR(ctx);
 402         }
 403
 404         if (ctx->legacy_hw_ctx.rcs_state) {
 405                 int ret;
 406
 407                 /* We may need to do things with the shrinker which
 408                  * require us to immediately switch back to the default
 409                  * context. This can cause a problem as pinning the
 410                  * default context also requires GTT space which may not
 411                  * be available. To avoid this we always pin the default
 412                  * context.
 413                  */
 414                 ret = i915_gem_obj_ggtt_pin(ctx->legacy_hw_ctx.rcs_state,
 415                                             get_context_alignment(dev_priv), 0);
 416                 if (ret) {
 417                         DRM_ERROR("Failed to pinned default global context (error %d)\n",
 418                                   ret);
 419                         i915_gem_context_unreference(ctx);
 420                         return ret;
 421                 }
 422         }
 423
 424         dev_priv->kernel_context = ctx;
 425
 426         DRM_DEBUG_DRIVER("%s context support initialized\n",
 427                         i915.enable_execlists ? "LR" :
 428                         dev_priv->hw_context_size ? "HW" : "fake");
 429         return 0;
 430 }
 431
 432 void i915_gem_context_lost(struct drm_i915_private *dev_priv)
 433 {
 434         struct intel_engine_cs *engine;
 435
 436         lockdep_assert_held(&dev_priv->dev->struct_mutex);
 437
 438         for_each_engine(engine, dev_priv) {
 439                 if (engine->last_context == NULL)
 440                         continue;
 441
 442                 i915_gem_context_unpin(engine->last_context, engine);
 443                 engine->last_context = NULL;
 444         }
 445
 446         /* Force the GPU state to be reinitialised on enabling */
 447         dev_priv->kernel_context->legacy_hw_ctx.initialized = false;
 448         dev_priv->kernel_context->remap_slice = ALL_L3_SLICES(dev_priv);
 449 }
 450
 451 void i915_gem_context_fini(struct drm_device *dev)
 452 {
 453         struct drm_i915_private *dev_priv = dev->dev_private;
 454         struct i915_gem_context *dctx = dev_priv->kernel_context;
 455
 456         lockdep_assert_held(&dev->struct_mutex);
 457
 458         if (dctx->legacy_hw_ctx.rcs_state)
 459                 i915_gem_object_ggtt_unpin(dctx->legacy_hw_ctx.rcs_state);
 460
 461         i915_gem_context_unreference(dctx);
 462         dev_priv->kernel_context = NULL;
 463
 464         ida_destroy(&dev_priv->context_hw_ida);
 465 }
 466
 467 static int context_idr_cleanup(int id, void *p, void *data)
 468 {
 469         struct i915_gem_context *ctx = p;
 470
 471         i915_gem_context_unreference(ctx);
 472         return 0;
 473 }
 474
 475 int i915_gem_context_open(struct drm_device *dev, struct drm_file *file)
 476 {
 477         struct drm_i915_file_private *file_priv = file->driver_priv;
 478         struct i915_gem_context *ctx;
 479
 480         idr_init(&file_priv->context_idr);
 481
 482         mutex_lock(&dev->struct_mutex);
 483         ctx = i915_gem_create_context(dev, file_priv);
 484         mutex_unlock(&dev->struct_mutex);
 485
 486         if (IS_ERR(ctx)) {
 487                 idr_destroy(&file_priv->context_idr);
 488                 return PTR_ERR(ctx);
 489         }
 490
 491         return 0;
 492 }
 493
 494 void i915_gem_context_close(struct drm_device *dev, struct drm_file *file)
 495 {
 496         struct drm_i915_file_private *file_priv = file->driver_priv;
 497
 498         lockdep_assert_held(&dev->struct_mutex);
 499
 500         idr_for_each(&file_priv->context_idr, context_idr_cleanup, NULL);
 501         idr_destroy(&file_priv->context_idr);
 502 }
 503
 504 static inline int
 505 mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
 506 {
 507         struct drm_i915_private *dev_priv = req->i915;
 508         struct intel_engine_cs *engine = req->engine;
 509         u32 flags = hw_flags | MI_MM_SPACE_GTT;
 510         const int num_rings =
 511                 /* Use an extended w/a on ivb+ if signalling from other rings */
 512                 i915_semaphore_is_enabled(dev_priv) ?
 513                 hweight32(INTEL_INFO(dev_priv)->ring_mask) - 1 :
 514                 0;
 515         int len, ret;
 516
 517         /* w/a: If Flush TLB Invalidation Mode is enabled, driver must do a TLB
 518          * invalidation prior to MI_SET_CONTEXT. On GEN6 we don't set the value
 519          * explicitly, so we rely on the value at ring init, stored in
 520          * itlb_before_ctx_switch.
 521          */
 522         if (IS_GEN6(dev_priv)) {
 523                 ret = engine->flush(req, I915_GEM_GPU_DOMAINS, 0);
 524                 if (ret)
 525                         return ret;
 526         }
 527
 528         /* These flags are for resource streamer on HSW+ */
 529         if (IS_HASWELL(dev_priv) || INTEL_GEN(dev_priv) >= 8)
 530                 flags |= (HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN);
 531         else if (INTEL_GEN(dev_priv) < 8)
 532                 flags |= (MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN);
 533
 534
 535         len = 4;
 536         if (INTEL_GEN(dev_priv) >= 7)
 537                 len += 2 + (num_rings ? 4*num_rings + 6 : 0);
 538
 539         ret = intel_ring_begin(req, len);
 540         if (ret)
 541                 return ret;
 542
 543         /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */
 544         if (INTEL_GEN(dev_priv) >= 7) {
 545                 intel_ring_emit(engine, MI_ARB_ON_OFF | MI_ARB_DISABLE);
 546                 if (num_rings) {
 547                         struct intel_engine_cs *signaller;
 548
 549                         intel_ring_emit(engine,
 550                                         MI_LOAD_REGISTER_IMM(num_rings));
 551                         for_each_engine(signaller, dev_priv) {
 552                                 if (signaller == engine)
 553                                         continue;
 554
 555                                 intel_ring_emit_reg(engine,
 556                                                     RING_PSMI_CTL(signaller->mmio_base));
 557                                 intel_ring_emit(engine,
 558                                                 _MASKED_BIT_ENABLE(GEN6_PSMI_SLEEP_MSG_DISABLE));
 559                         }
 560                 }
 561         }
 562
 563         intel_ring_emit(engine, MI_NOOP);
 564         intel_ring_emit(engine, MI_SET_CONTEXT);
 565         intel_ring_emit(engine,
 566                         i915_gem_obj_ggtt_offset(req->ctx->legacy_hw_ctx.rcs_state) |
 567                         flags);
 568         /*
 569          * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP
 570          * WaMiSetContext_Hang:snb,ivb,vlv
 571          */
 572         intel_ring_emit(engine, MI_NOOP);
 573
 574         if (INTEL_GEN(dev_priv) >= 7) {
 575                 if (num_rings) {
 576                         struct intel_engine_cs *signaller;
 577                         i915_reg_t last_reg = {}; /* keep gcc quiet */
 578
 579                         intel_ring_emit(engine,
 580                                         MI_LOAD_REGISTER_IMM(num_rings));
 581                         for_each_engine(signaller, dev_priv) {
 582                                 if (signaller == engine)
 583                                         continue;
 584
 585                                 last_reg = RING_PSMI_CTL(signaller->mmio_base);
 586                                 intel_ring_emit_reg(engine, last_reg);
 587                                 intel_ring_emit(engine,
 588                                                 _MASKED_BIT_DISABLE(GEN6_PSMI_SLEEP_MSG_DISABLE));
 589                         }
 590
 591                         /* Insert a delay before the next switch! */
 592                         intel_ring_emit(engine,
 593                                         MI_STORE_REGISTER_MEM |
 594                                         MI_SRM_LRM_GLOBAL_GTT);
 595                         intel_ring_emit_reg(engine, last_reg);
 596                         intel_ring_emit(engine, engine->scratch.gtt_offset);
 597                         intel_ring_emit(engine, MI_NOOP);
 598                 }
 599                 intel_ring_emit(engine, MI_ARB_ON_OFF | MI_ARB_ENABLE);
 600         }
 601
 602         intel_ring_advance(engine);
 603
 604         return ret;
 605 }
 606
 607 static int remap_l3(struct drm_i915_gem_request *req, int slice)
 608 {
 609         u32 *remap_info = req->i915->l3_parity.remap_info[slice];
 610         struct intel_engine_cs *engine = req->engine;
 611         int i, ret;
 612
 613         if (!remap_info)
 614                 return 0;
 615
 616         ret = intel_ring_begin(req, GEN7_L3LOG_SIZE/4 * 2 + 2);
 617         if (ret)
 618                 return ret;
 619
 620         /*
 621          * Note: We do not worry about the concurrent register cacheline hang
 622          * here because no other code should access these registers other than
 623          * at initialization time.
 624          */
 625         intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4));
 626         for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) {
 627                 intel_ring_emit_reg(engine, GEN7_L3LOG(slice, i));
 628                 intel_ring_emit(engine, remap_info[i]);
 629         }
 630         intel_ring_emit(engine, MI_NOOP);
 631         intel_ring_advance(engine);
 632
 633         return 0;
 634 }
 635
 636 static inline bool skip_rcs_switch(struct i915_hw_ppgtt *ppgtt,
 637                                    struct intel_engine_cs *engine,
 638                                    struct i915_gem_context *to)
 639 {
 640         if (to->remap_slice)
 641                 return false;
 642
 643         if (!to->legacy_hw_ctx.initialized)
 644                 return false;
 645
 646         if (ppgtt && (intel_engine_flag(engine) & ppgtt->pd_dirty_rings))
 647                 return false;
 648
 649         return to == engine->last_context;
 650 }
 651
 652 static bool
 653 needs_pd_load_pre(struct i915_hw_ppgtt *ppgtt,
 654                   struct intel_engine_cs *engine,
 655                   struct i915_gem_context *to)
 656 {
 657         if (!ppgtt)
 658                 return false;
 659
 660         /* Always load the ppgtt on first use */
 661         if (!engine->last_context)
 662                 return true;
 663
 664         /* Same context without new entries, skip */
 665         if (engine->last_context == to &&
 666             !(intel_engine_flag(engine) & ppgtt->pd_dirty_rings))
 667                 return false;
 668
 669         if (engine->id != RCS)
 670                 return true;
 671
 672         if (INTEL_GEN(engine->i915) < 8)
 673                 return true;
 674
 675         return false;
 676 }
 677
 678 static bool
 679 needs_pd_load_post(struct i915_hw_ppgtt *ppgtt,
 680                    struct i915_gem_context *to,
 681                    u32 hw_flags)
 682 {
 683         if (!ppgtt)
 684                 return false;
 685
 686         if (!IS_GEN8(to->i915))
 687                 return false;
 688
 689         if (hw_flags & MI_RESTORE_INHIBIT)
 690                 return true;
 691
 692         return false;
 693 }
 694
 695 static int do_rcs_switch(struct drm_i915_gem_request *req)
 696 {
 697         struct i915_gem_context *to = req->ctx;
 698         struct intel_engine_cs *engine = req->engine;
 699         struct i915_hw_ppgtt *ppgtt = to->ppgtt ?: req->i915->mm.aliasing_ppgtt;
 700         struct i915_gem_context *from;
 701         u32 hw_flags;
 702         int ret, i;
 703
 704         if (skip_rcs_switch(ppgtt, engine, to))
 705                 return 0;
 706
 707         /* Trying to pin first makes error handling easier. */
 708         ret = i915_gem_obj_ggtt_pin(to->legacy_hw_ctx.rcs_state,
 709                                     get_context_alignment(engine->i915),
 710                                     0);
 711         if (ret)
 712                 return ret;
 713
 714         /*
 715          * Pin can switch back to the default context if we end up calling into
 716          * evict_everything - as a last ditch gtt defrag effort that also
 717          * switches to the default context. Hence we need to reload from here.
 718          *
 719          * XXX: Doing so is painfully broken!
 720          */
 721         from = engine->last_context;
 722
 723         /*
 724          * Clear this page out of any CPU caches for coherent swap-in/out. Note
 725          * that thanks to write = false in this call and us not setting any gpu
 726          * write domains when putting a context object onto the active list
 727          * (when switching away from it), this won't block.
 728          *
 729          * XXX: We need a real interface to do this instead of trickery.
 730          */
 731         ret = i915_gem_object_set_to_gtt_domain(to->legacy_hw_ctx.rcs_state, false);
 732         if (ret)
 733                 goto unpin_out;
 734
 735         if (needs_pd_load_pre(ppgtt, engine, to)) {
 736                 /* Older GENs and non render rings still want the load first,
 737                  * "PP_DCLV followed by PP_DIR_BASE register through Load
 738                  * Register Immediate commands in Ring Buffer before submitting
 739                  * a context."*/
 740                 trace_switch_mm(engine, to);
 741                 ret = ppgtt->switch_mm(ppgtt, req);
 742                 if (ret)
 743                         goto unpin_out;
 744         }
 745
 746         if (!to->legacy_hw_ctx.initialized || i915_gem_context_is_default(to))
 747                 /* NB: If we inhibit the restore, the context is not allowed to
 748                  * die because future work may end up depending on valid address
 749                  * space. This means we must enforce that a page table load
 750                  * occur when this occurs. */
 751                 hw_flags = MI_RESTORE_INHIBIT;
 752         else if (ppgtt && intel_engine_flag(engine) & ppgtt->pd_dirty_rings)
 753                 hw_flags = MI_FORCE_RESTORE;
 754         else
 755                 hw_flags = 0;
 756
 757         if (to != from || (hw_flags & MI_FORCE_RESTORE)) {
 758                 ret = mi_set_context(req, hw_flags);
 759                 if (ret)
 760                         goto unpin_out;
 761         }
 762
 763         /* The backing object for the context is done after switching to the
 764          * *next* context. Therefore we cannot retire the previous context until
 765          * the next context has already started running. In fact, the below code
 766          * is a bit suboptimal because the retiring can occur simply after the
 767          * MI_SET_CONTEXT instead of when the next seqno has completed.
 768          */
 769         if (from != NULL) {
 770                 from->legacy_hw_ctx.rcs_state->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
 771                 i915_vma_move_to_active(i915_gem_obj_to_ggtt(from->legacy_hw_ctx.rcs_state), req);
 772                 /* As long as MI_SET_CONTEXT is serializing, ie. it flushes the
 773                  * whole damn pipeline, we don't need to explicitly mark the
 774                  * object dirty. The only exception is that the context must be
 775                  * correct in case the object gets swapped out. Ideally we'd be
 776                  * able to defer doing this until we know the object would be
 777                  * swapped, but there is no way to do that yet.
 778                  */
 779                 from->legacy_hw_ctx.rcs_state->dirty = 1;
 780
 781                 /* obj is kept alive until the next request by its active ref */
 782                 i915_gem_object_ggtt_unpin(from->legacy_hw_ctx.rcs_state);
 783                 i915_gem_context_unreference(from);
 784         }
 785         i915_gem_context_reference(to);
 786         engine->last_context = to;
 787
 788         /* GEN8 does *not* require an explicit reload if the PDPs have been
 789          * setup, and we do not wish to move them.
 790          */
 791         if (needs_pd_load_post(ppgtt, to, hw_flags)) {
 792                 trace_switch_mm(engine, to);
 793                 ret = ppgtt->switch_mm(ppgtt, req);
 794                 /* The hardware context switch is emitted, but we haven't
 795                  * actually changed the state - so it's probably safe to bail
 796                  * here. Still, let the user know something dangerous has
 797                  * happened.
 798                  */
 799                 if (ret)
 800                         return ret;
 801         }
 802
 803         if (ppgtt)
 804                 ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine);
 805
 806         for (i = 0; i < MAX_L3_SLICES; i++) {
 807                 if (!(to->remap_slice & (1<<i)))
 808                         continue;
 809
 810                 ret = remap_l3(req, i);
 811                 if (ret)
 812                         return ret;
 813
 814                 to->remap_slice &= ~(1<<i);
 815         }
 816
 817         if (!to->legacy_hw_ctx.initialized) {
 818                 if (engine->init_context) {
 819                         ret = engine->init_context(req);
 820                         if (ret)
 821                                 return ret;
 822                 }
 823                 to->legacy_hw_ctx.initialized = true;
 824         }
 825
 826         return 0;
 827
 828 unpin_out:
 829         i915_gem_object_ggtt_unpin(to->legacy_hw_ctx.rcs_state);
 830         return ret;
 831 }
 832
 833 /**
 834  * i915_switch_context() - perform a GPU context switch.
 835  * @req: request for which we'll execute the context switch
 836  *
 837  * The context life cycle is simple. The context refcount is incremented and
 838  * decremented by 1 and create and destroy. If the context is in use by the GPU,
 839  * it will have a refcount > 1. This allows us to destroy the context abstract
 840  * object while letting the normal object tracking destroy the backing BO.
 841  *
 842  * This function should not be used in execlists mode.  Instead the context is
 843  * switched by writing to the ELSP and requests keep a reference to their
 844  * context.
 845  */
 846 int i915_switch_context(struct drm_i915_gem_request *req)
 847 {
 848         struct intel_engine_cs *engine = req->engine;
 849
 850         WARN_ON(i915.enable_execlists);
 851         lockdep_assert_held(&req->i915->dev->struct_mutex);
 852
 853         if (engine->id != RCS ||
 854             req->ctx->legacy_hw_ctx.rcs_state == NULL) {
 855                 struct i915_gem_context *to = req->ctx;
 856                 struct i915_hw_ppgtt *ppgtt =
 857                         to->ppgtt ?: req->i915->mm.aliasing_ppgtt;
 858
 859                 if (needs_pd_load_pre(ppgtt, engine, to)) {
 860                         int ret;
 861
 862                         trace_switch_mm(engine, to);
 863                         ret = ppgtt->switch_mm(ppgtt, req);
 864                         if (ret)
 865                                 return ret;
 866
 867                         ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine);
 868                 }
 869
 870                 if (to != engine->last_context) {
 871                         i915_gem_context_reference(to);
 872                         if (engine->last_context)
 873                                 i915_gem_context_unreference(engine->last_context);
 874                         engine->last_context = to;
 875                 }
 876
 877                 return 0;
 878         }
 879
 880         return do_rcs_switch(req);
 881 }
 882
 883 static bool contexts_enabled(struct drm_device *dev)
 884 {
 885         return i915.enable_execlists || to_i915(dev)->hw_context_size;
 886 }
 887
 888 int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
 889                                   struct drm_file *file)
 890 {
 891         struct drm_i915_gem_context_create *args = data;
 892         struct drm_i915_file_private *file_priv = file->driver_priv;
 893         struct i915_gem_context *ctx;
 894         int ret;
 895
 896         if (!contexts_enabled(dev))
 897                 return -ENODEV;
 898
 899         if (args->pad != 0)
 900                 return -EINVAL;
 901
 902         ret = i915_mutex_lock_interruptible(dev);
 903         if (ret)
 904                 return ret;
 905
 906         ctx = i915_gem_create_context(dev, file_priv);
 907         mutex_unlock(&dev->struct_mutex);
 908         if (IS_ERR(ctx))
 909                 return PTR_ERR(ctx);
 910
 911         args->ctx_id = ctx->user_handle;
 912         DRM_DEBUG_DRIVER("HW context %d created\n", args->ctx_id);
 913
 914         return 0;
 915 }
 916
 917 int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
 918                                    struct drm_file *file)
 919 {
 920         struct drm_i915_gem_context_destroy *args = data;
 921         struct drm_i915_file_private *file_priv = file->driver_priv;
 922         struct i915_gem_context *ctx;
 923         int ret;
 924
 925         if (args->pad != 0)
 926                 return -EINVAL;
 927
 928         if (args->ctx_id == DEFAULT_CONTEXT_HANDLE)
 929                 return -ENOENT;
 930
 931         ret = i915_mutex_lock_interruptible(dev);
 932         if (ret)
 933                 return ret;
 934
 935         ctx = i915_gem_context_lookup(file_priv, args->ctx_id);
 936         if (IS_ERR(ctx)) {
 937                 mutex_unlock(&dev->struct_mutex);
 938                 return PTR_ERR(ctx);
 939         }
 940
 941         idr_remove(&ctx->file_priv->context_idr, ctx->user_handle);
 942         i915_gem_context_unreference(ctx);
 943         mutex_unlock(&dev->struct_mutex);
 944
 945         DRM_DEBUG_DRIVER("HW context %d destroyed\n", args->ctx_id);
 946         return 0;
 947 }
 948
 949 int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 950                                     struct drm_file *file)
 951 {
 952         struct drm_i915_file_private *file_priv = file->driver_priv;
 953         struct drm_i915_gem_context_param *args = data;
 954         struct i915_gem_context *ctx;
 955         int ret;
 956
 957         ret = i915_mutex_lock_interruptible(dev);
 958         if (ret)
 959                 return ret;
 960
 961         ctx = i915_gem_context_lookup(file_priv, args->ctx_id);
 962         if (IS_ERR(ctx)) {
 963                 mutex_unlock(&dev->struct_mutex);
 964                 return PTR_ERR(ctx);
 965         }
 966
 967         args->size = 0;
 968         switch (args->param) {
 969         case I915_CONTEXT_PARAM_BAN_PERIOD:
 970                 args->value = ctx->hang_stats.ban_period_seconds;
 971                 break;
 972         case I915_CONTEXT_PARAM_NO_ZEROMAP:
 973                 args->value = ctx->flags & CONTEXT_NO_ZEROMAP;
 974                 break;
 975         case I915_CONTEXT_PARAM_GTT_SIZE:
 976                 if (ctx->ppgtt)
 977                         args->value = ctx->ppgtt->base.total;
 978                 else if (to_i915(dev)->mm.aliasing_ppgtt)
 979                         args->value = to_i915(dev)->mm.aliasing_ppgtt->base.total;
 980                 else
 981                         args->value = to_i915(dev)->ggtt.base.total;
 982                 break;
 983         default:
 984                 ret = -EINVAL;
 985                 break;
 986         }
 987         mutex_unlock(&dev->struct_mutex);
 988
 989         return ret;
 990 }
 991
 992 int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 993                                     struct drm_file *file)
 994 {
 995         struct drm_i915_file_private *file_priv = file->driver_priv;
 996         struct drm_i915_gem_context_param *args = data;
 997         struct i915_gem_context *ctx;
 998         int ret;
 999
1000         ret = i915_mutex_lock_interruptible(dev);
1001         if (ret)
1002                 return ret;
1003
1004         ctx = i915_gem_context_lookup(file_priv, args->ctx_id);
1005         if (IS_ERR(ctx)) {
1006                 mutex_unlock(&dev->struct_mutex);
1007                 return PTR_ERR(ctx);
1008         }
1009
1010         switch (args->param) {
1011         case I915_CONTEXT_PARAM_BAN_PERIOD:
1012                 if (args->size)
1013                         ret = -EINVAL;
1014                 else if (args->value < ctx->hang_stats.ban_period_seconds &&
1015                          !capable(CAP_SYS_ADMIN))
1016                         ret = -EPERM;
1017                 else
1018                         ctx->hang_stats.ban_period_seconds = args->value;
1019                 break;
1020         case I915_CONTEXT_PARAM_NO_ZEROMAP:
1021                 if (args->size) {
1022                         ret = -EINVAL;
1023                 } else {
1024                         ctx->flags &= ~CONTEXT_NO_ZEROMAP;
1025                         ctx->flags |= args->value ? CONTEXT_NO_ZEROMAP : 0;
1026                 }
1027                 break;
1028         default:
1029                 ret = -EINVAL;
1030                 break;
1031         }
1032         mutex_unlock(&dev->struct_mutex);
1033
1034         return ret;
1035 }
1036
1037 int i915_gem_context_reset_stats_ioctl(struct drm_device *dev,
1038                                        void *data, struct drm_file *file)
1039 {
1040         struct drm_i915_private *dev_priv = dev->dev_private;
1041         struct drm_i915_reset_stats *args = data;
1042         struct i915_ctx_hang_stats *hs;
1043         struct i915_gem_context *ctx;
1044         int ret;
1045
1046         if (args->flags || args->pad)
1047                 return -EINVAL;
1048
1049         if (args->ctx_id == DEFAULT_CONTEXT_HANDLE && !capable(CAP_SYS_ADMIN))
1050                 return -EPERM;
1051
1052         ret = i915_mutex_lock_interruptible(dev);
1053         if (ret)
1054                 return ret;
1055
1056         ctx = i915_gem_context_lookup(file->driver_priv, args->ctx_id);
1057         if (IS_ERR(ctx)) {
1058                 mutex_unlock(&dev->struct_mutex);
1059                 return PTR_ERR(ctx);
1060         }
1061         hs = &ctx->hang_stats;
1062
1063         if (capable(CAP_SYS_ADMIN))
1064                 args->reset_count = i915_reset_count(&dev_priv->gpu_error);
1065         else
1066                 args->reset_count = 0;
1067
1068         args->batch_active = hs->batch_active;
1069         args->batch_pending = hs->batch_pending;
1070
1071         mutex_unlock(&dev->struct_mutex);
1072
1073         return 0;
1074 }