drivers/gpu/drm/i915/i915_gem.c

   1 /*
   2  * Copyright © 2008 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  *
  23  * Authors:
  24  *    Eric Anholt <eric@anholt.net>
  25  *
  26  */
  27
  28 #include "drmP.h"
  29 #include "drm.h"
  30 #include "i915_drm.h"
  31 #include "i915_drv.h"
  32 #include "i915_trace.h"
  33 #include "intel_drv.h"
  34 #include <linux/shmem_fs.h>
  35 #include <linux/slab.h>
  36 #include <linux/swap.h>
  37 #include <linux/pci.h>
  38
  39 static __must_check int i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj);
  40 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
  41 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
  42 static __must_check int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
  43                                                     unsigned alignment,
  44                                                     bool map_and_fenceable);
  45 static void i915_gem_clear_fence_reg(struct drm_device *dev,
  46                                      struct drm_i915_fence_reg *reg);
  47 static int i915_gem_phys_pwrite(struct drm_device *dev,
  48                                 struct drm_i915_gem_object *obj,
  49                                 struct drm_i915_gem_pwrite *args,
  50                                 struct drm_file *file);
  51 static void i915_gem_free_object_tail(struct drm_i915_gem_object *obj);
  52
  53 static void i915_gem_write_fence(struct drm_device *dev, int reg,
  54                                  struct drm_i915_gem_object *obj);
  55 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
  56                                          struct drm_i915_fence_reg *fence,
  57                                          bool enable);
  58
  59 static int i915_gem_inactive_shrink(struct shrinker *shrinker,
  60                                     struct shrink_control *sc);
  61 static void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
  62
  63 static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
  64 {
  65         if (obj->tiling_mode)
  66                 i915_gem_release_mmap(obj);
  67
  68         /* As we do not have an associated fence register, we will force
  69          * a tiling change if we ever need to acquire one.
  70          */
  71         obj->tiling_changed = false;
  72         obj->fence_reg = I915_FENCE_REG_NONE;
  73 }
  74
  75 /* some bookkeeping */
  76 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
  77                                   size_t size)
  78 {
  79         dev_priv->mm.object_count++;
  80         dev_priv->mm.object_memory += size;
  81 }
  82
  83 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
  84                                      size_t size)
  85 {
  86         dev_priv->mm.object_count--;
  87         dev_priv->mm.object_memory -= size;
  88 }
  89
  90 static int
  91 i915_gem_wait_for_error(struct drm_device *dev)
  92 {
  93         struct drm_i915_private *dev_priv = dev->dev_private;
  94         struct completion *x = &dev_priv->error_completion;
  95         unsigned long flags;
  96         int ret;
  97
  98         if (!atomic_read(&dev_priv->mm.wedged))
  99                 return 0;
 100
 101         ret = wait_for_completion_interruptible(x);
 102         if (ret)
 103                 return ret;
 104
 105         if (atomic_read(&dev_priv->mm.wedged)) {
 106                 /* GPU is hung, bump the completion count to account for
 107                  * the token we just consumed so that we never hit zero and
 108                  * end up waiting upon a subsequent completion event that
 109                  * will never happen.
 110                  */
 111                 spin_lock_irqsave(&x->wait.lock, flags);
 112                 x->done++;
 113                 spin_unlock_irqrestore(&x->wait.lock, flags);
 114         }
 115         return 0;
 116 }
 117
 118 int i915_mutex_lock_interruptible(struct drm_device *dev)
 119 {
 120         int ret;
 121
 122         ret = i915_gem_wait_for_error(dev);
 123         if (ret)
 124                 return ret;
 125
 126         ret = mutex_lock_interruptible(&dev->struct_mutex);
 127         if (ret)
 128                 return ret;
 129
 130         WARN_ON(i915_verify_lists(dev));
 131         return 0;
 132 }
 133
 134 static inline bool
 135 i915_gem_object_is_inactive(struct drm_i915_gem_object *obj)
 136 {
 137         return obj->gtt_space && !obj->active && obj->pin_count == 0;
 138 }
 139
 140 int
 141 i915_gem_init_ioctl(struct drm_device *dev, void *data,
 142                     struct drm_file *file)
 143 {
 144         struct drm_i915_gem_init *args = data;
 145
 146         if (args->gtt_start >= args->gtt_end ||
 147             (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1))
 148                 return -EINVAL;
 149
 150         /* GEM with user mode setting was never supported on ilk and later. */
 151         if (INTEL_INFO(dev)->gen >= 5)
 152                 return -ENODEV;
 153
 154         mutex_lock(&dev->struct_mutex);
 155         i915_gem_init_global_gtt(dev, args->gtt_start,
 156                                  args->gtt_end, args->gtt_end);
 157         mutex_unlock(&dev->struct_mutex);
 158
 159         return 0;
 160 }
 161
 162 int
 163 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 164                             struct drm_file *file)
 165 {
 166         struct drm_i915_private *dev_priv = dev->dev_private;
 167         struct drm_i915_gem_get_aperture *args = data;
 168         struct drm_i915_gem_object *obj;
 169         size_t pinned;
 170
 171         if (!(dev->driver->driver_features & DRIVER_GEM))
 172                 return -ENODEV;
 173
 174         pinned = 0;
 175         mutex_lock(&dev->struct_mutex);
 176         list_for_each_entry(obj, &dev_priv->mm.pinned_list, mm_list)
 177                 pinned += obj->gtt_space->size;
 178         mutex_unlock(&dev->struct_mutex);
 179
 180         args->aper_size = dev_priv->mm.gtt_total;
 181         args->aper_available_size = args->aper_size - pinned;
 182
 183         return 0;
 184 }
 185
 186 static int
 187 i915_gem_create(struct drm_file *file,
 188                 struct drm_device *dev,
 189                 uint64_t size,
 190                 uint32_t *handle_p)
 191 {
 192         struct drm_i915_gem_object *obj;
 193         int ret;
 194         u32 handle;
 195
 196         size = roundup(size, PAGE_SIZE);
 197         if (size == 0)
 198                 return -EINVAL;
 199
 200         /* Allocate the new object */
 201         obj = i915_gem_alloc_object(dev, size);
 202         if (obj == NULL)
 203                 return -ENOMEM;
 204
 205         ret = drm_gem_handle_create(file, &obj->base, &handle);
 206         if (ret) {
 207                 drm_gem_object_release(&obj->base);
 208                 i915_gem_info_remove_obj(dev->dev_private, obj->base.size);
 209                 kfree(obj);
 210                 return ret;
 211         }
 212
 213         /* drop reference from allocate - handle holds it now */
 214         drm_gem_object_unreference(&obj->base);
 215         trace_i915_gem_object_create(obj);
 216
 217         *handle_p = handle;
 218         return 0;
 219 }
 220
 221 int
 222 i915_gem_dumb_create(struct drm_file *file,
 223                      struct drm_device *dev,
 224                      struct drm_mode_create_dumb *args)
 225 {
 226         /* have to work out size/pitch and return them */
 227         args->pitch = ALIGN(args->width * ((args->bpp + 7) / 8), 64);
 228         args->size = args->pitch * args->height;
 229         return i915_gem_create(file, dev,
 230                                args->size, &args->handle);
 231 }
 232
 233 int i915_gem_dumb_destroy(struct drm_file *file,
 234                           struct drm_device *dev,
 235                           uint32_t handle)
 236 {
 237         return drm_gem_handle_delete(file, handle);
 238 }
 239
 240 /**
 241  * Creates a new mm object and returns a handle to it.
 242  */
 243 int
 244 i915_gem_create_ioctl(struct drm_device *dev, void *data,
 245                       struct drm_file *file)
 246 {
 247         struct drm_i915_gem_create *args = data;
 248         return i915_gem_create(file, dev,
 249                                args->size, &args->handle);
 250 }
 251
 252 static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj)
 253 {
 254         drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
 255
 256         return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
 257                 obj->tiling_mode != I915_TILING_NONE;
 258 }
 259
 260 static inline int
 261 __copy_to_user_swizzled(char __user *cpu_vaddr,
 262                         const char *gpu_vaddr, int gpu_offset,
 263                         int length)
 264 {
 265         int ret, cpu_offset = 0;
 266
 267         while (length > 0) {
 268                 int cacheline_end = ALIGN(gpu_offset + 1, 64);
 269                 int this_length = min(cacheline_end - gpu_offset, length);
 270                 int swizzled_gpu_offset = gpu_offset ^ 64;
 271
 272                 ret = __copy_to_user(cpu_vaddr + cpu_offset,
 273                                      gpu_vaddr + swizzled_gpu_offset,
 274                                      this_length);
 275                 if (ret)
 276                         return ret + length;
 277
 278                 cpu_offset += this_length;
 279                 gpu_offset += this_length;
 280                 length -= this_length;
 281         }
 282
 283         return 0;
 284 }
 285
 286 static inline int
 287 __copy_from_user_swizzled(char __user *gpu_vaddr, int gpu_offset,
 288                           const char *cpu_vaddr,
 289                           int length)
 290 {
 291         int ret, cpu_offset = 0;
 292
 293         while (length > 0) {
 294                 int cacheline_end = ALIGN(gpu_offset + 1, 64);
 295                 int this_length = min(cacheline_end - gpu_offset, length);
 296                 int swizzled_gpu_offset = gpu_offset ^ 64;
 297
 298                 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
 299                                        cpu_vaddr + cpu_offset,
 300                                        this_length);
 301                 if (ret)
 302                         return ret + length;
 303
 304                 cpu_offset += this_length;
 305                 gpu_offset += this_length;
 306                 length -= this_length;
 307         }
 308
 309         return 0;
 310 }
 311
 312 /* Per-page copy function for the shmem pread fastpath.
 313  * Flushes invalid cachelines before reading the target if
 314  * needs_clflush is set. */
 315 static int
 316 shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length,
 317                  char __user *user_data,
 318                  bool page_do_bit17_swizzling, bool needs_clflush)
 319 {
 320         char *vaddr;
 321         int ret;
 322
 323         if (unlikely(page_do_bit17_swizzling))
 324                 return -EINVAL;
 325
 326         vaddr = kmap_atomic(page);
 327         if (needs_clflush)
 328                 drm_clflush_virt_range(vaddr + shmem_page_offset,
 329                                        page_length);
 330         ret = __copy_to_user_inatomic(user_data,
 331                                       vaddr + shmem_page_offset,
 332                                       page_length);
 333         kunmap_atomic(vaddr);
 334
 335         return ret;
 336 }
 337
 338 static void
 339 shmem_clflush_swizzled_range(char *addr, unsigned long length,
 340                              bool swizzled)
 341 {
 342         if (unlikely(swizzled)) {
 343                 unsigned long start = (unsigned long) addr;
 344                 unsigned long end = (unsigned long) addr + length;
 345
 346                 /* For swizzling simply ensure that we always flush both
 347                  * channels. Lame, but simple and it works. Swizzled
 348                  * pwrite/pread is far from a hotpath - current userspace
 349                  * doesn't use it at all. */
 350                 start = round_down(start, 128);
 351                 end = round_up(end, 128);
 352
 353                 drm_clflush_virt_range((void *)start, end - start);
 354         } else {
 355                 drm_clflush_virt_range(addr, length);
 356         }
 357
 358 }
 359
 360 /* Only difference to the fast-path function is that this can handle bit17
 361  * and uses non-atomic copy and kmap functions. */
 362 static int
 363 shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
 364                  char __user *user_data,
 365                  bool page_do_bit17_swizzling, bool needs_clflush)
 366 {
 367         char *vaddr;
 368         int ret;
 369
 370         vaddr = kmap(page);
 371         if (needs_clflush)
 372                 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
 373                                              page_length,
 374                                              page_do_bit17_swizzling);
 375
 376         if (page_do_bit17_swizzling)
 377                 ret = __copy_to_user_swizzled(user_data,
 378                                               vaddr, shmem_page_offset,
 379                                               page_length);
 380         else
 381                 ret = __copy_to_user(user_data,
 382                                      vaddr + shmem_page_offset,
 383                                      page_length);
 384         kunmap(page);
 385
 386         return ret;
 387 }
 388
 389 static int
 390 i915_gem_shmem_pread(struct drm_device *dev,
 391                      struct drm_i915_gem_object *obj,
 392                      struct drm_i915_gem_pread *args,
 393                      struct drm_file *file)
 394 {
 395         struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
 396         char __user *user_data;
 397         ssize_t remain;
 398         loff_t offset;
 399         int shmem_page_offset, page_length, ret = 0;
 400         int obj_do_bit17_swizzling, page_do_bit17_swizzling;
 401         int hit_slowpath = 0;
 402         int prefaulted = 0;
 403         int needs_clflush = 0;
 404         int release_page;
 405
 406         user_data = (char __user *) (uintptr_t) args->data_ptr;
 407         remain = args->size;
 408
 409         obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
 410
 411         if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
 412                 /* If we're not in the cpu read domain, set ourself into the gtt
 413                  * read domain and manually flush cachelines (if required). This
 414                  * optimizes for the case when the gpu will dirty the data
 415                  * anyway again before the next pread happens. */
 416                 if (obj->cache_level == I915_CACHE_NONE)
 417                         needs_clflush = 1;
 418                 ret = i915_gem_object_set_to_gtt_domain(obj, false);
 419                 if (ret)
 420                         return ret;
 421         }
 422
 423         offset = args->offset;
 424
 425         while (remain > 0) {
 426                 struct page *page;
 427
 428                 /* Operation in this page
 429                  *
 430                  * shmem_page_offset = offset within page in shmem file
 431                  * page_length = bytes to copy for this page
 432                  */
 433                 shmem_page_offset = offset_in_page(offset);
 434                 page_length = remain;
 435                 if ((shmem_page_offset + page_length) > PAGE_SIZE)
 436                         page_length = PAGE_SIZE - shmem_page_offset;
 437
 438                 if (obj->pages) {
 439                         page = obj->pages[offset >> PAGE_SHIFT];
 440                         release_page = 0;
 441                 } else {
 442                         page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT);
 443                         if (IS_ERR(page)) {
 444                                 ret = PTR_ERR(page);
 445                                 goto out;
 446                         }
 447                         release_page = 1;
 448                 }
 449
 450                 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
 451                         (page_to_phys(page) & (1 << 17)) != 0;
 452
 453                 ret = shmem_pread_fast(page, shmem_page_offset, page_length,
 454                                        user_data, page_do_bit17_swizzling,
 455                                        needs_clflush);
 456                 if (ret == 0)
 457                         goto next_page;
 458
 459                 hit_slowpath = 1;
 460                 page_cache_get(page);
 461                 mutex_unlock(&dev->struct_mutex);
 462
 463                 if (!prefaulted) {
 464                         ret = fault_in_multipages_writeable(user_data, remain);
 465                         /* Userspace is tricking us, but we've already clobbered
 466                          * its pages with the prefault and promised to write the
 467                          * data up to the first fault. Hence ignore any errors
 468                          * and just continue. */
 469                         (void)ret;
 470                         prefaulted = 1;
 471                 }
 472
 473                 ret = shmem_pread_slow(page, shmem_page_offset, page_length,
 474                                        user_data, page_do_bit17_swizzling,
 475                                        needs_clflush);
 476
 477                 mutex_lock(&dev->struct_mutex);
 478                 page_cache_release(page);
 479 next_page:
 480                 mark_page_accessed(page);
 481                 if (release_page)
 482                         page_cache_release(page);
 483
 484                 if (ret) {
 485                         ret = -EFAULT;
 486                         goto out;
 487                 }
 488
 489                 remain -= page_length;
 490                 user_data += page_length;
 491                 offset += page_length;
 492         }
 493
 494 out:
 495         if (hit_slowpath) {
 496                 /* Fixup: Kill any reinstated backing storage pages */
 497                 if (obj->madv == __I915_MADV_PURGED)
 498                         i915_gem_object_truncate(obj);
 499         }
 500
 501         return ret;
 502 }
 503
 504 /**
 505  * Reads data from the object referenced by handle.
 506  *
 507  * On error, the contents of *data are undefined.
 508  */
 509 int
 510 i915_gem_pread_ioctl(struct drm_device *dev, void *data,
 511                      struct drm_file *file)
 512 {
 513         struct drm_i915_gem_pread *args = data;
 514         struct drm_i915_gem_object *obj;
 515         int ret = 0;
 516
 517         if (args->size == 0)
 518                 return 0;
 519
 520         if (!access_ok(VERIFY_WRITE,
 521                        (char __user *)(uintptr_t)args->data_ptr,
 522                        args->size))
 523                 return -EFAULT;
 524
 525         ret = i915_mutex_lock_interruptible(dev);
 526         if (ret)
 527                 return ret;
 528
 529         obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
 530         if (&obj->base == NULL) {
 531                 ret = -ENOENT;
 532                 goto unlock;
 533         }
 534
 535         /* Bounds check source.  */
 536         if (args->offset > obj->base.size ||
 537             args->size > obj->base.size - args->offset) {
 538                 ret = -EINVAL;
 539                 goto out;
 540         }
 541
 542         trace_i915_gem_object_pread(obj, args->offset, args->size);
 543
 544         ret = i915_gem_shmem_pread(dev, obj, args, file);
 545
 546 out:
 547         drm_gem_object_unreference(&obj->base);
 548 unlock:
 549         mutex_unlock(&dev->struct_mutex);
 550         return ret;
 551 }
 552
 553 /* This is the fast write path which cannot handle
 554  * page faults in the source data
 555  */
 556
 557 static inline int
 558 fast_user_write(struct io_mapping *mapping,
 559                 loff_t page_base, int page_offset,
 560                 char __user *user_data,
 561                 int length)
 562 {
 563         char *vaddr_atomic;
 564         unsigned long unwritten;
 565
 566         vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
 567         unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + page_offset,
 568                                                       user_data, length);
 569         io_mapping_unmap_atomic(vaddr_atomic);
 570         return unwritten;
 571 }
 572
 573 /**
 574  * This is the fast pwrite path, where we copy the data directly from the
 575  * user into the GTT, uncached.
 576  */
 577 static int
 578 i915_gem_gtt_pwrite_fast(struct drm_device *dev,
 579                          struct drm_i915_gem_object *obj,
 580                          struct drm_i915_gem_pwrite *args,
 581                          struct drm_file *file)
 582 {
 583         drm_i915_private_t *dev_priv = dev->dev_private;
 584         ssize_t remain;
 585         loff_t offset, page_base;
 586         char __user *user_data;
 587         int page_offset, page_length, ret;
 588
 589         ret = i915_gem_object_pin(obj, 0, true);
 590         if (ret)
 591                 goto out;
 592
 593         ret = i915_gem_object_set_to_gtt_domain(obj, true);
 594         if (ret)
 595                 goto out_unpin;
 596
 597         ret = i915_gem_object_put_fence(obj);
 598         if (ret)
 599                 goto out_unpin;
 600
 601         user_data = (char __user *) (uintptr_t) args->data_ptr;
 602         remain = args->size;
 603
 604         offset = obj->gtt_offset + args->offset;
 605
 606         while (remain > 0) {
 607                 /* Operation in this page
 608                  *
 609                  * page_base = page offset within aperture
 610                  * page_offset = offset within page
 611                  * page_length = bytes to copy for this page
 612                  */
 613                 page_base = offset & PAGE_MASK;
 614                 page_offset = offset_in_page(offset);
 615                 page_length = remain;
 616                 if ((page_offset + remain) > PAGE_SIZE)
 617                         page_length = PAGE_SIZE - page_offset;
 618
 619                 /* If we get a fault while copying data, then (presumably) our
 620                  * source page isn't available.  Return the error and we'll
 621                  * retry in the slow path.
 622                  */
 623                 if (fast_user_write(dev_priv->mm.gtt_mapping, page_base,
 624                                     page_offset, user_data, page_length)) {
 625                         ret = -EFAULT;
 626                         goto out_unpin;
 627                 }
 628
 629                 remain -= page_length;
 630                 user_data += page_length;
 631                 offset += page_length;
 632         }
 633
 634 out_unpin:
 635         i915_gem_object_unpin(obj);
 636 out:
 637         return ret;
 638 }
 639
 640 /* Per-page copy function for the shmem pwrite fastpath.
 641  * Flushes invalid cachelines before writing to the target if
 642  * needs_clflush_before is set and flushes out any written cachelines after
 643  * writing if needs_clflush is set. */
 644 static int
 645 shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
 646                   char __user *user_data,
 647                   bool page_do_bit17_swizzling,
 648                   bool needs_clflush_before,
 649                   bool needs_clflush_after)
 650 {
 651         char *vaddr;
 652         int ret;
 653
 654         if (unlikely(page_do_bit17_swizzling))
 655                 return -EINVAL;
 656
 657         vaddr = kmap_atomic(page);
 658         if (needs_clflush_before)
 659                 drm_clflush_virt_range(vaddr + shmem_page_offset,
 660                                        page_length);
 661         ret = __copy_from_user_inatomic_nocache(vaddr + shmem_page_offset,
 662                                                 user_data,
 663                                                 page_length);
 664         if (needs_clflush_after)
 665                 drm_clflush_virt_range(vaddr + shmem_page_offset,
 666                                        page_length);
 667         kunmap_atomic(vaddr);
 668
 669         return ret;
 670 }
 671
 672 /* Only difference to the fast-path function is that this can handle bit17
 673  * and uses non-atomic copy and kmap functions. */
 674 static int
 675 shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
 676                   char __user *user_data,
 677                   bool page_do_bit17_swizzling,
 678                   bool needs_clflush_before,
 679                   bool needs_clflush_after)
 680 {
 681         char *vaddr;
 682         int ret;
 683
 684         vaddr = kmap(page);
 685         if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
 686                 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
 687                                              page_length,
 688                                              page_do_bit17_swizzling);
 689         if (page_do_bit17_swizzling)
 690                 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset,
 691                                                 user_data,
 692                                                 page_length);
 693         else
 694                 ret = __copy_from_user(vaddr + shmem_page_offset,
 695                                        user_data,
 696                                        page_length);
 697         if (needs_clflush_after)
 698                 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
 699                                              page_length,
 700                                              page_do_bit17_swizzling);
 701         kunmap(page);
 702
 703         return ret;
 704 }
 705
 706 static int
 707 i915_gem_shmem_pwrite(struct drm_device *dev,
 708                       struct drm_i915_gem_object *obj,
 709                       struct drm_i915_gem_pwrite *args,
 710                       struct drm_file *file)
 711 {
 712         struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
 713         ssize_t remain;
 714         loff_t offset;
 715         char __user *user_data;
 716         int shmem_page_offset, page_length, ret = 0;
 717         int obj_do_bit17_swizzling, page_do_bit17_swizzling;
 718         int hit_slowpath = 0;
 719         int needs_clflush_after = 0;
 720         int needs_clflush_before = 0;
 721         int release_page;
 722
 723         user_data = (char __user *) (uintptr_t) args->data_ptr;
 724         remain = args->size;
 725
 726         obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
 727
 728         if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
 729                 /* If we're not in the cpu write domain, set ourself into the gtt
 730                  * write domain and manually flush cachelines (if required). This
 731                  * optimizes for the case when the gpu will use the data
 732                  * right away and we therefore have to clflush anyway. */
 733                 if (obj->cache_level == I915_CACHE_NONE)
 734                         needs_clflush_after = 1;
 735                 ret = i915_gem_object_set_to_gtt_domain(obj, true);
 736                 if (ret)
 737                         return ret;
 738         }
 739         /* Same trick applies for invalidate partially written cachelines before
 740          * writing.  */
 741         if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)
 742             && obj->cache_level == I915_CACHE_NONE)
 743                 needs_clflush_before = 1;
 744
 745         offset = args->offset;
 746         obj->dirty = 1;
 747
 748         while (remain > 0) {
 749                 struct page *page;
 750                 int partial_cacheline_write;
 751
 752                 /* Operation in this page
 753                  *
 754                  * shmem_page_offset = offset within page in shmem file
 755                  * page_length = bytes to copy for this page
 756                  */
 757                 shmem_page_offset = offset_in_page(offset);
 758
 759                 page_length = remain;
 760                 if ((shmem_page_offset + page_length) > PAGE_SIZE)
 761                         page_length = PAGE_SIZE - shmem_page_offset;
 762
 763                 /* If we don't overwrite a cacheline completely we need to be
 764                  * careful to have up-to-date data by first clflushing. Don't
 765                  * overcomplicate things and flush the entire patch. */
 766                 partial_cacheline_write = needs_clflush_before &&
 767                         ((shmem_page_offset | page_length)
 768                                 & (boot_cpu_data.x86_clflush_size - 1));
 769
 770                 if (obj->pages) {
 771                         page = obj->pages[offset >> PAGE_SHIFT];
 772                         release_page = 0;
 773                 } else {
 774                         page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT);
 775                         if (IS_ERR(page)) {
 776                                 ret = PTR_ERR(page);
 777                                 goto out;
 778                         }
 779                         release_page = 1;
 780                 }
 781
 782                 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
 783                         (page_to_phys(page) & (1 << 17)) != 0;
 784
 785                 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
 786                                         user_data, page_do_bit17_swizzling,
 787                                         partial_cacheline_write,
 788                                         needs_clflush_after);
 789                 if (ret == 0)
 790                         goto next_page;
 791
 792                 hit_slowpath = 1;
 793                 page_cache_get(page);
 794                 mutex_unlock(&dev->struct_mutex);
 795
 796                 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
 797                                         user_data, page_do_bit17_swizzling,
 798                                         partial_cacheline_write,
 799                                         needs_clflush_after);
 800
 801                 mutex_lock(&dev->struct_mutex);
 802                 page_cache_release(page);
 803 next_page:
 804                 set_page_dirty(page);
 805                 mark_page_accessed(page);
 806                 if (release_page)
 807                         page_cache_release(page);
 808
 809                 if (ret) {
 810                         ret = -EFAULT;
 811                         goto out;
 812                 }
 813
 814                 remain -= page_length;
 815                 user_data += page_length;
 816                 offset += page_length;
 817         }
 818
 819 out:
 820         if (hit_slowpath) {
 821                 /* Fixup: Kill any reinstated backing storage pages */
 822                 if (obj->madv == __I915_MADV_PURGED)
 823                         i915_gem_object_truncate(obj);
 824                 /* and flush dirty cachelines in case the object isn't in the cpu write
 825                  * domain anymore. */
 826                 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
 827                         i915_gem_clflush_object(obj);
 828                         intel_gtt_chipset_flush();
 829                 }
 830         }
 831
 832         if (needs_clflush_after)
 833                 intel_gtt_chipset_flush();
 834
 835         return ret;
 836 }
 837
 838 /**
 839  * Writes data to the object referenced by handle.
 840  *
 841  * On error, the contents of the buffer that were to be modified are undefined.
 842  */
 843 int
 844 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 845                       struct drm_file *file)
 846 {
 847         struct drm_i915_gem_pwrite *args = data;
 848         struct drm_i915_gem_object *obj;
 849         int ret;
 850
 851         if (args->size == 0)
 852                 return 0;
 853
 854         if (!access_ok(VERIFY_READ,
 855                        (char __user *)(uintptr_t)args->data_ptr,
 856                        args->size))
 857                 return -EFAULT;
 858
 859         ret = fault_in_multipages_readable((char __user *)(uintptr_t)args->data_ptr,
 860                                            args->size);
 861         if (ret)
 862                 return -EFAULT;
 863
 864         ret = i915_mutex_lock_interruptible(dev);
 865         if (ret)
 866                 return ret;
 867
 868         obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
 869         if (&obj->base == NULL) {
 870                 ret = -ENOENT;
 871                 goto unlock;
 872         }
 873
 874         /* Bounds check destination. */
 875         if (args->offset > obj->base.size ||
 876             args->size > obj->base.size - args->offset) {
 877                 ret = -EINVAL;
 878                 goto out;
 879         }
 880
 881         trace_i915_gem_object_pwrite(obj, args->offset, args->size);
 882
 883         ret = -EFAULT;
 884         /* We can only do the GTT pwrite on untiled buffers, as otherwise
 885          * it would end up going through the fenced access, and we'll get
 886          * different detiling behavior between reading and writing.
 887          * pread/pwrite currently are reading and writing from the CPU
 888          * perspective, requiring manual detiling by the client.
 889          */
 890         if (obj->phys_obj) {
 891                 ret = i915_gem_phys_pwrite(dev, obj, args, file);
 892                 goto out;
 893         }
 894
 895         if (obj->gtt_space &&
 896             obj->cache_level == I915_CACHE_NONE &&
 897             obj->tiling_mode == I915_TILING_NONE &&
 898             obj->map_and_fenceable &&
 899             obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
 900                 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
 901                 /* Note that the gtt paths might fail with non-page-backed user
 902                  * pointers (e.g. gtt mappings when moving data between
 903                  * textures). Fallback to the shmem path in that case. */
 904         }
 905
 906         if (ret == -EFAULT)
 907                 ret = i915_gem_shmem_pwrite(dev, obj, args, file);
 908
 909 out:
 910         drm_gem_object_unreference(&obj->base);
 911 unlock:
 912         mutex_unlock(&dev->struct_mutex);
 913         return ret;
 914 }
 915
 916 /**
 917  * Called when user space prepares to use an object with the CPU, either
 918  * through the mmap ioctl's mapping or a GTT mapping.
 919  */
 920 int
 921 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 922                           struct drm_file *file)
 923 {
 924         struct drm_i915_gem_set_domain *args = data;
 925         struct drm_i915_gem_object *obj;
 926         uint32_t read_domains = args->read_domains;
 927         uint32_t write_domain = args->write_domain;
 928         int ret;
 929
 930         if (!(dev->driver->driver_features & DRIVER_GEM))
 931                 return -ENODEV;
 932
 933         /* Only handle setting domains to types used by the CPU. */
 934         if (write_domain & I915_GEM_GPU_DOMAINS)
 935                 return -EINVAL;
 936
 937         if (read_domains & I915_GEM_GPU_DOMAINS)
 938                 return -EINVAL;
 939
 940         /* Having something in the write domain implies it's in the read
 941          * domain, and only that read domain.  Enforce that in the request.
 942          */
 943         if (write_domain != 0 && read_domains != write_domain)
 944                 return -EINVAL;
 945
 946         ret = i915_mutex_lock_interruptible(dev);
 947         if (ret)
 948                 return ret;
 949
 950         obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
 951         if (&obj->base == NULL) {
 952                 ret = -ENOENT;
 953                 goto unlock;
 954         }
 955
 956         if (read_domains & I915_GEM_DOMAIN_GTT) {
 957                 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
 958
 959                 /* Silently promote "you're not bound, there was nothing to do"
 960                  * to success, since the client was just asking us to
 961                  * make sure everything was done.
 962                  */
 963                 if (ret == -EINVAL)
 964                         ret = 0;
 965         } else {
 966                 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
 967         }
 968
 969         drm_gem_object_unreference(&obj->base);
 970 unlock:
 971         mutex_unlock(&dev->struct_mutex);
 972         return ret;
 973 }
 974
 975 /**
 976  * Called when user space has done writes to this buffer
 977  */
 978 int
 979 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
 980                          struct drm_file *file)
 981 {
 982         struct drm_i915_gem_sw_finish *args = data;
 983         struct drm_i915_gem_object *obj;
 984         int ret = 0;
 985
 986         if (!(dev->driver->driver_features & DRIVER_GEM))
 987                 return -ENODEV;
 988
 989         ret = i915_mutex_lock_interruptible(dev);
 990         if (ret)
 991                 return ret;
 992
 993         obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
 994         if (&obj->base == NULL) {
 995                 ret = -ENOENT;
 996                 goto unlock;
 997         }
 998
 999         /* Pinned buffers may be scanout, so flush the cache */
1000         if (obj->pin_count)
1001                 i915_gem_object_flush_cpu_write_domain(obj);
1002
1003         drm_gem_object_unreference(&obj->base);
1004 unlock:
1005         mutex_unlock(&dev->struct_mutex);
1006         return ret;
1007 }
1008
1009 /**
1010  * Maps the contents of an object, returning the address it is mapped
1011  * into.
1012  *
1013  * While the mapping holds a reference on the contents of the object, it doesn't
1014  * imply a ref on the object itself.
1015  */
1016 int
1017 i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1018                     struct drm_file *file)
1019 {
1020         struct drm_i915_gem_mmap *args = data;
1021         struct drm_gem_object *obj;
1022         unsigned long addr;
1023
1024         if (!(dev->driver->driver_features & DRIVER_GEM))
1025                 return -ENODEV;
1026
1027         obj = drm_gem_object_lookup(dev, file, args->handle);
1028         if (obj == NULL)
1029                 return -ENOENT;
1030
1031         down_write(&current->mm->mmap_sem);
1032         addr = do_mmap(obj->filp, 0, args->size,
1033                        PROT_READ | PROT_WRITE, MAP_SHARED,
1034                        args->offset);
1035         up_write(&current->mm->mmap_sem);
1036         drm_gem_object_unreference_unlocked(obj);
1037         if (IS_ERR((void *)addr))
1038                 return addr;
1039
1040         args->addr_ptr = (uint64_t) addr;
1041
1042         return 0;
1043 }
1044
1045 /**
1046  * i915_gem_fault - fault a page into the GTT
1047  * vma: VMA in question
1048  * vmf: fault info
1049  *
1050  * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1051  * from userspace.  The fault handler takes care of binding the object to
1052  * the GTT (if needed), allocating and programming a fence register (again,
1053  * only if needed based on whether the old reg is still valid or the object
1054  * is tiled) and inserting a new PTE into the faulting process.
1055  *
1056  * Note that the faulting process may involve evicting existing objects
1057  * from the GTT and/or fence registers to make room.  So performance may
1058  * suffer if the GTT working set is large or there are few fence registers
1059  * left.
1060  */
1061 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1062 {
1063         struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data);
1064         struct drm_device *dev = obj->base.dev;
1065         drm_i915_private_t *dev_priv = dev->dev_private;
1066         pgoff_t page_offset;
1067         unsigned long pfn;
1068         int ret = 0;
1069         bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
1070
1071         /* We don't use vmf->pgoff since that has the fake offset */
1072         page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
1073                 PAGE_SHIFT;
1074
1075         ret = i915_mutex_lock_interruptible(dev);
1076         if (ret)
1077                 goto out;
1078
1079         trace_i915_gem_object_fault(obj, page_offset, true, write);
1080
1081         /* Now bind it into the GTT if needed */
1082         if (!obj->map_and_fenceable) {
1083                 ret = i915_gem_object_unbind(obj);
1084                 if (ret)
1085                         goto unlock;
1086         }
1087         if (!obj->gtt_space) {
1088                 ret = i915_gem_object_bind_to_gtt(obj, 0, true);
1089                 if (ret)
1090                         goto unlock;
1091
1092                 ret = i915_gem_object_set_to_gtt_domain(obj, write);
1093                 if (ret)
1094                         goto unlock;
1095         }
1096
1097         if (!obj->has_global_gtt_mapping)
1098                 i915_gem_gtt_bind_object(obj, obj->cache_level);
1099
1100         ret = i915_gem_object_get_fence(obj);
1101         if (ret)
1102                 goto unlock;
1103
1104         if (i915_gem_object_is_inactive(obj))
1105                 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
1106
1107         obj->fault_mappable = true;
1108
1109         pfn = ((dev->agp->base + obj->gtt_offset) >> PAGE_SHIFT) +
1110                 page_offset;
1111
1112         /* Finally, remap it using the new GTT offset */
1113         ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
1114 unlock:
1115         mutex_unlock(&dev->struct_mutex);
1116 out:
1117         switch (ret) {
1118         case -EIO:
1119         case -EAGAIN:
1120                 /* Give the error handler a chance to run and move the
1121                  * objects off the GPU active list. Next time we service the
1122                  * fault, we should be able to transition the page into the
1123                  * GTT without touching the GPU (and so avoid further
1124                  * EIO/EGAIN). If the GPU is wedged, then there is no issue
1125                  * with coherency, just lost writes.
1126                  */
1127                 set_need_resched();
1128         case 0:
1129         case -ERESTARTSYS:
1130         case -EINTR:
1131                 return VM_FAULT_NOPAGE;
1132         case -ENOMEM:
1133                 return VM_FAULT_OOM;
1134         default:
1135                 return VM_FAULT_SIGBUS;
1136         }
1137 }
1138
1139 /**
1140  * i915_gem_release_mmap - remove physical page mappings
1141  * @obj: obj in question
1142  *
1143  * Preserve the reservation of the mmapping with the DRM core code, but
1144  * relinquish ownership of the pages back to the system.
1145  *
1146  * It is vital that we remove the page mapping if we have mapped a tiled
1147  * object through the GTT and then lose the fence register due to
1148  * resource pressure. Similarly if the object has been moved out of the
1149  * aperture, than pages mapped into userspace must be revoked. Removing the
1150  * mapping will then trigger a page fault on the next user access, allowing
1151  * fixup by i915_gem_fault().
1152  */
1153 void
1154 i915_gem_release_mmap(struct drm_i915_gem_object *obj)
1155 {
1156         if (!obj->fault_mappable)
1157                 return;
1158
1159         if (obj->base.dev->dev_mapping)
1160                 unmap_mapping_range(obj->base.dev->dev_mapping,
1161                                     (loff_t)obj->base.map_list.hash.key<<PAGE_SHIFT,
1162                                     obj->base.size, 1);
1163
1164         obj->fault_mappable = false;
1165 }
1166
1167 static uint32_t
1168 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
1169 {
1170         uint32_t gtt_size;
1171
1172         if (INTEL_INFO(dev)->gen >= 4 ||
1173             tiling_mode == I915_TILING_NONE)
1174                 return size;
1175
1176         /* Previous chips need a power-of-two fence region when tiling */
1177         if (INTEL_INFO(dev)->gen == 3)
1178                 gtt_size = 1024*1024;
1179         else
1180                 gtt_size = 512*1024;
1181
1182         while (gtt_size < size)
1183                 gtt_size <<= 1;
1184
1185         return gtt_size;
1186 }
1187
1188 /**
1189  * i915_gem_get_gtt_alignment - return required GTT alignment for an object
1190  * @obj: object to check
1191  *
1192  * Return the required GTT alignment for an object, taking into account
1193  * potential fence register mapping.
1194  */
1195 static uint32_t
1196 i915_gem_get_gtt_alignment(struct drm_device *dev,
1197                            uint32_t size,
1198                            int tiling_mode)
1199 {
1200         /*
1201          * Minimum alignment is 4k (GTT page size), but might be greater
1202          * if a fence register is needed for the object.
1203          */
1204         if (INTEL_INFO(dev)->gen >= 4 ||
1205             tiling_mode == I915_TILING_NONE)
1206                 return 4096;
1207
1208         /*
1209          * Previous chips need to be aligned to the size of the smallest
1210          * fence register that can contain the object.
1211          */
1212         return i915_gem_get_gtt_size(dev, size, tiling_mode);
1213 }
1214
1215 /**
1216  * i915_gem_get_unfenced_gtt_alignment - return required GTT alignment for an
1217  *                                       unfenced object
1218  * @dev: the device
1219  * @size: size of the object
1220  * @tiling_mode: tiling mode of the object
1221  *
1222  * Return the required GTT alignment for an object, only taking into account
1223  * unfenced tiled surface requirements.
1224  */
1225 uint32_t
1226 i915_gem_get_unfenced_gtt_alignment(struct drm_device *dev,
1227                                     uint32_t size,
1228                                     int tiling_mode)
1229 {
1230         /*
1231          * Minimum alignment is 4k (GTT page size) for sane hw.
1232          */
1233         if (INTEL_INFO(dev)->gen >= 4 || IS_G33(dev) ||
1234             tiling_mode == I915_TILING_NONE)
1235                 return 4096;
1236
1237         /* Previous hardware however needs to be aligned to a power-of-two
1238          * tile height. The simplest method for determining this is to reuse
1239          * the power-of-tile object size.
1240          */
1241         return i915_gem_get_gtt_size(dev, size, tiling_mode);
1242 }
1243
1244 int
1245 i915_gem_mmap_gtt(struct drm_file *file,
1246                   struct drm_device *dev,
1247                   uint32_t handle,
1248                   uint64_t *offset)
1249 {
1250         struct drm_i915_private *dev_priv = dev->dev_private;
1251         struct drm_i915_gem_object *obj;
1252         int ret;
1253
1254         if (!(dev->driver->driver_features & DRIVER_GEM))
1255                 return -ENODEV;
1256
1257         ret = i915_mutex_lock_interruptible(dev);
1258         if (ret)
1259                 return ret;
1260
1261         obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle));
1262         if (&obj->base == NULL) {
1263                 ret = -ENOENT;
1264                 goto unlock;
1265         }
1266
1267         if (obj->base.size > dev_priv->mm.gtt_mappable_end) {
1268                 ret = -E2BIG;
1269                 goto out;
1270         }
1271
1272         if (obj->madv != I915_MADV_WILLNEED) {
1273                 DRM_ERROR("Attempting to mmap a purgeable buffer\n");
1274                 ret = -EINVAL;
1275                 goto out;
1276         }
1277
1278         if (!obj->base.map_list.map) {
1279                 ret = drm_gem_create_mmap_offset(&obj->base);
1280                 if (ret)
1281                         goto out;
1282         }
1283
1284         *offset = (u64)obj->base.map_list.hash.key << PAGE_SHIFT;
1285
1286 out:
1287         drm_gem_object_unreference(&obj->base);
1288 unlock:
1289         mutex_unlock(&dev->struct_mutex);
1290         return ret;
1291 }
1292
1293 /**
1294  * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
1295  * @dev: DRM device
1296  * @data: GTT mapping ioctl data
1297  * @file: GEM object info
1298  *
1299  * Simply returns the fake offset to userspace so it can mmap it.
1300  * The mmap call will end up in drm_gem_mmap(), which will set things
1301  * up so we can get faults in the handler above.
1302  *
1303  * The fault handler will take care of binding the object into the GTT
1304  * (since it may have been evicted to make room for something), allocating
1305  * a fence register, and mapping the appropriate aperture address into
1306  * userspace.
1307  */
1308 int
1309 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
1310                         struct drm_file *file)
1311 {
1312         struct drm_i915_gem_mmap_gtt *args = data;
1313
1314         if (!(dev->driver->driver_features & DRIVER_GEM))
1315                 return -ENODEV;
1316
1317         return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
1318 }
1319
1320
1321 static int
1322 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj,
1323                               gfp_t gfpmask)
1324 {
1325         int page_count, i;
1326         struct address_space *mapping;
1327         struct inode *inode;
1328         struct page *page;
1329
1330         /* Get the list of pages out of our struct file.  They'll be pinned
1331          * at this point until we release them.
1332          */
1333         page_count = obj->base.size / PAGE_SIZE;
1334         BUG_ON(obj->pages != NULL);
1335         obj->pages = drm_malloc_ab(page_count, sizeof(struct page *));
1336         if (obj->pages == NULL)
1337                 return -ENOMEM;
1338
1339         inode = obj->base.filp->f_path.dentry->d_inode;
1340         mapping = inode->i_mapping;
1341         gfpmask |= mapping_gfp_mask(mapping);
1342
1343         for (i = 0; i < page_count; i++) {
1344                 page = shmem_read_mapping_page_gfp(mapping, i, gfpmask);
1345                 if (IS_ERR(page))
1346                         goto err_pages;
1347
1348                 obj->pages[i] = page;
1349         }
1350
1351         if (i915_gem_object_needs_bit17_swizzle(obj))
1352                 i915_gem_object_do_bit_17_swizzle(obj);
1353
1354         return 0;
1355
1356 err_pages:
1357         while (i--)
1358                 page_cache_release(obj->pages[i]);
1359
1360         drm_free_large(obj->pages);
1361         obj->pages = NULL;
1362         return PTR_ERR(page);
1363 }
1364
1365 static void
1366 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
1367 {
1368         int page_count = obj->base.size / PAGE_SIZE;
1369         int i;
1370
1371         BUG_ON(obj->madv == __I915_MADV_PURGED);
1372
1373         if (i915_gem_object_needs_bit17_swizzle(obj))
1374                 i915_gem_object_save_bit_17_swizzle(obj);
1375
1376         if (obj->madv == I915_MADV_DONTNEED)
1377                 obj->dirty = 0;
1378
1379         for (i = 0; i < page_count; i++) {
1380                 if (obj->dirty)
1381                         set_page_dirty(obj->pages[i]);
1382
1383                 if (obj->madv == I915_MADV_WILLNEED)
1384                         mark_page_accessed(obj->pages[i]);
1385
1386                 page_cache_release(obj->pages[i]);
1387         }
1388         obj->dirty = 0;
1389
1390         drm_free_large(obj->pages);
1391         obj->pages = NULL;
1392 }
1393
1394 void
1395 i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
1396                                struct intel_ring_buffer *ring,
1397                                u32 seqno)
1398 {
1399         struct drm_device *dev = obj->base.dev;
1400         struct drm_i915_private *dev_priv = dev->dev_private;
1401
1402         BUG_ON(ring == NULL);
1403         obj->ring = ring;
1404
1405         /* Add a reference if we're newly entering the active list. */
1406         if (!obj->active) {
1407                 drm_gem_object_reference(&obj->base);
1408                 obj->active = 1;
1409         }
1410
1411         /* Move from whatever list we were on to the tail of execution. */
1412         list_move_tail(&obj->mm_list, &dev_priv->mm.active_list);
1413         list_move_tail(&obj->ring_list, &ring->active_list);
1414
1415         obj->last_rendering_seqno = seqno;
1416
1417         if (obj->fenced_gpu_access) {
1418                 obj->last_fenced_seqno = seqno;
1419
1420                 /* Bump MRU to take account of the delayed flush */
1421                 if (obj->fence_reg != I915_FENCE_REG_NONE) {
1422                         struct drm_i915_fence_reg *reg;
1423
1424                         reg = &dev_priv->fence_regs[obj->fence_reg];
1425                         list_move_tail(&reg->lru_list,
1426                                        &dev_priv->mm.fence_list);
1427                 }
1428         }
1429 }
1430
1431 static void
1432 i915_gem_object_move_off_active(struct drm_i915_gem_object *obj)
1433 {
1434         list_del_init(&obj->ring_list);
1435         obj->last_rendering_seqno = 0;
1436         obj->last_fenced_seqno = 0;
1437 }
1438
1439 static void
1440 i915_gem_object_move_to_flushing(struct drm_i915_gem_object *obj)
1441 {
1442         struct drm_device *dev = obj->base.dev;
1443         drm_i915_private_t *dev_priv = dev->dev_private;
1444
1445         BUG_ON(!obj->active);
1446         list_move_tail(&obj->mm_list, &dev_priv->mm.flushing_list);
1447
1448         i915_gem_object_move_off_active(obj);
1449 }
1450
1451 static void
1452 i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
1453 {
1454         struct drm_device *dev = obj->base.dev;
1455         struct drm_i915_private *dev_priv = dev->dev_private;
1456
1457         if (obj->pin_count != 0)
1458                 list_move_tail(&obj->mm_list, &dev_priv->mm.pinned_list);
1459         else
1460                 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
1461
1462         BUG_ON(!list_empty(&obj->gpu_write_list));
1463         BUG_ON(!obj->active);
1464         obj->ring = NULL;
1465
1466         i915_gem_object_move_off_active(obj);
1467         obj->fenced_gpu_access = false;
1468
1469         obj->active = 0;
1470         obj->pending_gpu_write = false;
1471         drm_gem_object_unreference(&obj->base);
1472
1473         WARN_ON(i915_verify_lists(dev));
1474 }
1475
1476 /* Immediately discard the backing storage */
1477 static void
1478 i915_gem_object_truncate(struct drm_i915_gem_object *obj)
1479 {
1480         struct inode *inode;
1481
1482         /* Our goal here is to return as much of the memory as
1483          * is possible back to the system as we are called from OOM.
1484          * To do this we must instruct the shmfs to drop all of its
1485          * backing pages, *now*.
1486          */
1487         inode = obj->base.filp->f_path.dentry->d_inode;
1488         shmem_truncate_range(inode, 0, (loff_t)-1);
1489
1490         if (obj->base.map_list.map)
1491                 drm_gem_free_mmap_offset(&obj->base);
1492
1493         obj->madv = __I915_MADV_PURGED;
1494 }
1495
1496 static inline int
1497 i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj)
1498 {
1499         return obj->madv == I915_MADV_DONTNEED;
1500 }
1501
1502 static void
1503 i915_gem_process_flushing_list(struct intel_ring_buffer *ring,
1504                                uint32_t flush_domains)
1505 {
1506         struct drm_i915_gem_object *obj, *next;
1507
1508         list_for_each_entry_safe(obj, next,
1509                                  &ring->gpu_write_list,
1510                                  gpu_write_list) {
1511                 if (obj->base.write_domain & flush_domains) {
1512                         uint32_t old_write_domain = obj->base.write_domain;
1513
1514                         obj->base.write_domain = 0;
1515                         list_del_init(&obj->gpu_write_list);
1516                         i915_gem_object_move_to_active(obj, ring,
1517                                                        i915_gem_next_request_seqno(ring));
1518
1519                         trace_i915_gem_object_change_domain(obj,
1520                                                             obj->base.read_domains,
1521                                                             old_write_domain);
1522                 }
1523         }
1524 }
1525
1526 static u32
1527 i915_gem_get_seqno(struct drm_device *dev)
1528 {
1529         drm_i915_private_t *dev_priv = dev->dev_private;
1530         u32 seqno = dev_priv->next_seqno;
1531
1532         /* reserve 0 for non-seqno */
1533         if (++dev_priv->next_seqno == 0)
1534                 dev_priv->next_seqno = 1;
1535
1536         return seqno;
1537 }
1538
1539 u32
1540 i915_gem_next_request_seqno(struct intel_ring_buffer *ring)
1541 {
1542         if (ring->outstanding_lazy_request == 0)
1543                 ring->outstanding_lazy_request = i915_gem_get_seqno(ring->dev);
1544
1545         return ring->outstanding_lazy_request;
1546 }
1547
1548 int
1549 i915_add_request(struct intel_ring_buffer *ring,
1550                  struct drm_file *file,
1551                  struct drm_i915_gem_request *request)
1552 {
1553         drm_i915_private_t *dev_priv = ring->dev->dev_private;
1554         uint32_t seqno;
1555         u32 request_ring_position;
1556         int was_empty;
1557         int ret;
1558
1559         BUG_ON(request == NULL);
1560         seqno = i915_gem_next_request_seqno(ring);
1561
1562         /* Record the position of the start of the request so that
1563          * should we detect the updated seqno part-way through the
1564          * GPU processing the request, we never over-estimate the
1565          * position of the head.
1566          */
1567         request_ring_position = intel_ring_get_tail(ring);
1568
1569         ret = ring->add_request(ring, &seqno);
1570         if (ret)
1571             return ret;
1572
1573         trace_i915_gem_request_add(ring, seqno);
1574
1575         request->seqno = seqno;
1576         request->ring = ring;
1577         request->tail = request_ring_position;
1578         request->emitted_jiffies = jiffies;
1579         was_empty = list_empty(&ring->request_list);
1580         list_add_tail(&request->list, &ring->request_list);
1581
1582         if (file) {
1583                 struct drm_i915_file_private *file_priv = file->driver_priv;
1584
1585                 spin_lock(&file_priv->mm.lock);
1586                 request->file_priv = file_priv;
1587                 list_add_tail(&request->client_list,
1588                               &file_priv->mm.request_list);
1589                 spin_unlock(&file_priv->mm.lock);
1590         }
1591
1592         ring->outstanding_lazy_request = 0;
1593
1594         if (!dev_priv->mm.suspended) {
1595                 if (i915_enable_hangcheck) {
1596                         mod_timer(&dev_priv->hangcheck_timer,
1597                                   jiffies +
1598                                   msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD));
1599                 }
1600                 if (was_empty)
1601                         queue_delayed_work(dev_priv->wq,
1602                                            &dev_priv->mm.retire_work, HZ);
1603         }
1604         return 0;
1605 }
1606
1607 static inline void
1608 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
1609 {
1610         struct drm_i915_file_private *file_priv = request->file_priv;
1611
1612         if (!file_priv)
1613                 return;
1614
1615         spin_lock(&file_priv->mm.lock);
1616         if (request->file_priv) {
1617                 list_del(&request->client_list);
1618                 request->file_priv = NULL;
1619         }
1620         spin_unlock(&file_priv->mm.lock);
1621 }
1622
1623 static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv,
1624                                       struct intel_ring_buffer *ring)
1625 {
1626         while (!list_empty(&ring->request_list)) {
1627                 struct drm_i915_gem_request *request;
1628
1629                 request = list_first_entry(&ring->request_list,
1630                                            struct drm_i915_gem_request,
1631                                            list);
1632
1633                 list_del(&request->list);
1634                 i915_gem_request_remove_from_client(request);
1635                 kfree(request);
1636         }
1637
1638         while (!list_empty(&ring->active_list)) {
1639                 struct drm_i915_gem_object *obj;
1640
1641                 obj = list_first_entry(&ring->active_list,
1642                                        struct drm_i915_gem_object,
1643                                        ring_list);
1644
1645                 obj->base.write_domain = 0;
1646                 list_del_init(&obj->gpu_write_list);
1647                 i915_gem_object_move_to_inactive(obj);
1648         }
1649 }
1650
1651 static void i915_gem_reset_fences(struct drm_device *dev)
1652 {
1653         struct drm_i915_private *dev_priv = dev->dev_private;
1654         int i;
1655
1656         for (i = 0; i < dev_priv->num_fence_regs; i++) {
1657                 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
1658                 struct drm_i915_gem_object *obj = reg->obj;
1659
1660                 if (!obj)
1661                         continue;
1662
1663                 if (obj->tiling_mode)
1664                         i915_gem_release_mmap(obj);
1665
1666                 reg->obj->fence_reg = I915_FENCE_REG_NONE;
1667                 reg->obj->fenced_gpu_access = false;
1668                 reg->obj->last_fenced_seqno = 0;
1669                 i915_gem_clear_fence_reg(dev, reg);
1670         }
1671 }
1672
1673 void i915_gem_reset(struct drm_device *dev)
1674 {
1675         struct drm_i915_private *dev_priv = dev->dev_private;
1676         struct drm_i915_gem_object *obj;
1677         int i;
1678
1679         for (i = 0; i < I915_NUM_RINGS; i++)
1680                 i915_gem_reset_ring_lists(dev_priv, &dev_priv->ring[i]);
1681
1682         /* Remove anything from the flushing lists. The GPU cache is likely
1683          * to be lost on reset along with the data, so simply move the
1684          * lost bo to the inactive list.
1685          */
1686         while (!list_empty(&dev_priv->mm.flushing_list)) {
1687                 obj = list_first_entry(&dev_priv->mm.flushing_list,
1688                                       struct drm_i915_gem_object,
1689                                       mm_list);
1690
1691                 obj->base.write_domain = 0;
1692                 list_del_init(&obj->gpu_write_list);
1693                 i915_gem_object_move_to_inactive(obj);
1694         }
1695
1696         /* Move everything out of the GPU domains to ensure we do any
1697          * necessary invalidation upon reuse.
1698          */
1699         list_for_each_entry(obj,
1700                             &dev_priv->mm.inactive_list,
1701                             mm_list)
1702         {
1703                 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
1704         }
1705
1706         /* The fence registers are invalidated so clear them out */
1707         i915_gem_reset_fences(dev);
1708 }
1709
1710 /**
1711  * This function clears the request list as sequence numbers are passed.
1712  */
1713 void
1714 i915_gem_retire_requests_ring(struct intel_ring_buffer *ring)
1715 {
1716         uint32_t seqno;
1717         int i;
1718
1719         if (list_empty(&ring->request_list))
1720                 return;
1721
1722         WARN_ON(i915_verify_lists(ring->dev));
1723
1724         seqno = ring->get_seqno(ring);
1725
1726         for (i = 0; i < ARRAY_SIZE(ring->sync_seqno); i++)
1727                 if (seqno >= ring->sync_seqno[i])
1728                         ring->sync_seqno[i] = 0;
1729
1730         while (!list_empty(&ring->request_list)) {
1731                 struct drm_i915_gem_request *request;
1732
1733                 request = list_first_entry(&ring->request_list,
1734                                            struct drm_i915_gem_request,
1735                                            list);
1736
1737                 if (!i915_seqno_passed(seqno, request->seqno))
1738                         break;
1739
1740                 trace_i915_gem_request_retire(ring, request->seqno);
1741                 /* We know the GPU must have read the request to have
1742                  * sent us the seqno + interrupt, so use the position
1743                  * of tail of the request to update the last known position
1744                  * of the GPU head.
1745                  */
1746                 ring->last_retired_head = request->tail;
1747
1748                 list_del(&request->list);
1749                 i915_gem_request_remove_from_client(request);
1750                 kfree(request);
1751         }
1752
1753         /* Move any buffers on the active list that are no longer referenced
1754          * by the ringbuffer to the flushing/inactive lists as appropriate.
1755          */
1756         while (!list_empty(&ring->active_list)) {
1757                 struct drm_i915_gem_object *obj;
1758
1759                 obj = list_first_entry(&ring->active_list,
1760                                       struct drm_i915_gem_object,
1761                                       ring_list);
1762
1763                 if (!i915_seqno_passed(seqno, obj->last_rendering_seqno))
1764                         break;
1765
1766                 if (obj->base.write_domain != 0)
1767                         i915_gem_object_move_to_flushing(obj);
1768                 else
1769                         i915_gem_object_move_to_inactive(obj);
1770         }
1771
1772         if (unlikely(ring->trace_irq_seqno &&
1773                      i915_seqno_passed(seqno, ring->trace_irq_seqno))) {
1774                 ring->irq_put(ring);
1775                 ring->trace_irq_seqno = 0;
1776         }
1777
1778         WARN_ON(i915_verify_lists(ring->dev));
1779 }
1780
1781 void
1782 i915_gem_retire_requests(struct drm_device *dev)
1783 {
1784         drm_i915_private_t *dev_priv = dev->dev_private;
1785         int i;
1786
1787         if (!list_empty(&dev_priv->mm.deferred_free_list)) {
1788             struct drm_i915_gem_object *obj, *next;
1789
1790             /* We must be careful that during unbind() we do not
1791              * accidentally infinitely recurse into retire requests.
1792              * Currently:
1793              *   retire -> free -> unbind -> wait -> retire_ring
1794              */
1795             list_for_each_entry_safe(obj, next,
1796                                      &dev_priv->mm.deferred_free_list,
1797                                      mm_list)
1798                     i915_gem_free_object_tail(obj);
1799         }
1800
1801         for (i = 0; i < I915_NUM_RINGS; i++)
1802                 i915_gem_retire_requests_ring(&dev_priv->ring[i]);
1803 }
1804
1805 static void
1806 i915_gem_retire_work_handler(struct work_struct *work)
1807 {
1808         drm_i915_private_t *dev_priv;
1809         struct drm_device *dev;
1810         bool idle;
1811         int i;
1812
1813         dev_priv = container_of(work, drm_i915_private_t,
1814                                 mm.retire_work.work);
1815         dev = dev_priv->dev;
1816
1817         /* Come back later if the device is busy... */
1818         if (!mutex_trylock(&dev->struct_mutex)) {
1819                 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
1820                 return;
1821         }
1822
1823         i915_gem_retire_requests(dev);
1824
1825         /* Send a periodic flush down the ring so we don't hold onto GEM
1826          * objects indefinitely.
1827          */
1828         idle = true;
1829         for (i = 0; i < I915_NUM_RINGS; i++) {
1830                 struct intel_ring_buffer *ring = &dev_priv->ring[i];
1831
1832                 if (!list_empty(&ring->gpu_write_list)) {
1833                         struct drm_i915_gem_request *request;
1834                         int ret;
1835
1836                         ret = i915_gem_flush_ring(ring,
1837                                                   0, I915_GEM_GPU_DOMAINS);
1838                         request = kzalloc(sizeof(*request), GFP_KERNEL);
1839                         if (ret || request == NULL ||
1840                             i915_add_request(ring, NULL, request))
1841                             kfree(request);
1842                 }
1843
1844                 idle &= list_empty(&ring->request_list);
1845         }
1846
1847         if (!dev_priv->mm.suspended && !idle)
1848                 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
1849
1850         mutex_unlock(&dev->struct_mutex);
1851 }
1852
1853 /**
1854  * Waits for a sequence number to be signaled, and cleans up the
1855  * request and object lists appropriately for that event.
1856  */
1857 int
1858 i915_wait_request(struct intel_ring_buffer *ring,
1859                   uint32_t seqno,
1860                   bool do_retire)
1861 {
1862         drm_i915_private_t *dev_priv = ring->dev->dev_private;
1863         u32 ier;
1864         int ret = 0;
1865
1866         BUG_ON(seqno == 0);
1867
1868         if (atomic_read(&dev_priv->mm.wedged)) {
1869                 struct completion *x = &dev_priv->error_completion;
1870                 bool recovery_complete;
1871                 unsigned long flags;
1872
1873                 /* Give the error handler a chance to run. */
1874                 spin_lock_irqsave(&x->wait.lock, flags);
1875                 recovery_complete = x->done > 0;
1876                 spin_unlock_irqrestore(&x->wait.lock, flags);
1877
1878                 return recovery_complete ? -EIO : -EAGAIN;
1879         }
1880
1881         if (seqno == ring->outstanding_lazy_request) {
1882                 struct drm_i915_gem_request *request;
1883
1884                 request = kzalloc(sizeof(*request), GFP_KERNEL);
1885                 if (request == NULL)
1886                         return -ENOMEM;
1887
1888                 ret = i915_add_request(ring, NULL, request);
1889                 if (ret) {
1890                         kfree(request);
1891                         return ret;
1892                 }
1893
1894                 seqno = request->seqno;
1895         }
1896
1897         if (!i915_seqno_passed(ring->get_seqno(ring), seqno)) {
1898                 if (HAS_PCH_SPLIT(ring->dev))
1899                         ier = I915_READ(DEIER) | I915_READ(GTIER);
1900                 else if (IS_VALLEYVIEW(ring->dev))
1901                         ier = I915_READ(GTIER) | I915_READ(VLV_IER);
1902                 else
1903                         ier = I915_READ(IER);
1904                 if (!ier) {
1905                         DRM_ERROR("something (likely vbetool) disabled "
1906                                   "interrupts, re-enabling\n");
1907                         ring->dev->driver->irq_preinstall(ring->dev);
1908                         ring->dev->driver->irq_postinstall(ring->dev);
1909                 }
1910
1911                 trace_i915_gem_request_wait_begin(ring, seqno);
1912
1913                 ring->waiting_seqno = seqno;
1914                 if (ring->irq_get(ring)) {
1915                         if (dev_priv->mm.interruptible)
1916                                 ret = wait_event_interruptible(ring->irq_queue,
1917                                                                i915_seqno_passed(ring->get_seqno(ring), seqno)
1918                                                                || atomic_read(&dev_priv->mm.wedged));
1919                         else
1920                                 wait_event(ring->irq_queue,
1921                                            i915_seqno_passed(ring->get_seqno(ring), seqno)
1922                                            || atomic_read(&dev_priv->mm.wedged));
1923
1924                         ring->irq_put(ring);
1925                 } else if (wait_for_atomic(i915_seqno_passed(ring->get_seqno(ring),
1926                                                              seqno) ||
1927                                            atomic_read(&dev_priv->mm.wedged), 3000))
1928                         ret = -EBUSY;
1929                 ring->waiting_seqno = 0;
1930
1931                 trace_i915_gem_request_wait_end(ring, seqno);
1932         }
1933         if (atomic_read(&dev_priv->mm.wedged))
1934                 ret = -EAGAIN;
1935
1936         /* Directly dispatch request retiring.  While we have the work queue
1937          * to handle this, the waiter on a request often wants an associated
1938          * buffer to have made it to the inactive list, and we would need
1939          * a separate wait queue to handle that.
1940          */
1941         if (ret == 0 && do_retire)
1942                 i915_gem_retire_requests_ring(ring);
1943
1944         return ret;
1945 }
1946
1947 /**
1948  * Ensures that all rendering to the object has completed and the object is
1949  * safe to unbind from the GTT or access from the CPU.
1950  */
1951 int
1952 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj)
1953 {
1954         int ret;
1955
1956         /* This function only exists to support waiting for existing rendering,
1957          * not for emitting required flushes.
1958          */
1959         BUG_ON((obj->base.write_domain & I915_GEM_GPU_DOMAINS) != 0);
1960
1961         /* If there is rendering queued on the buffer being evicted, wait for
1962          * it.
1963          */
1964         if (obj->active) {
1965                 ret = i915_wait_request(obj->ring, obj->last_rendering_seqno,
1966                                         true);
1967                 if (ret)
1968                         return ret;
1969         }
1970
1971         return 0;
1972 }
1973
1974 /**
1975  * i915_gem_object_sync - sync an object to a ring.
1976  *
1977  * @obj: object which may be in use on another ring.
1978  * @to: ring we wish to use the object on. May be NULL.
1979  *
1980  * This code is meant to abstract object synchronization with the GPU.
1981  * Calling with NULL implies synchronizing the object with the CPU
1982  * rather than a particular GPU ring.
1983  *
1984  * Returns 0 if successful, else propagates up the lower layer error.
1985  */
1986 int
1987 i915_gem_object_sync(struct drm_i915_gem_object *obj,
1988                      struct intel_ring_buffer *to)
1989 {
1990         struct intel_ring_buffer *from = obj->ring;
1991         u32 seqno;
1992         int ret, idx;
1993
1994         if (from == NULL || to == from)
1995                 return 0;
1996
1997         if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev))
1998                 return i915_gem_object_wait_rendering(obj);
1999
2000         idx = intel_ring_sync_index(from, to);
2001
2002         seqno = obj->last_rendering_seqno;
2003         if (seqno <= from->sync_seqno[idx])
2004                 return 0;
2005
2006         if (seqno == from->outstanding_lazy_request) {
2007                 struct drm_i915_gem_request *request;
2008
2009                 request = kzalloc(sizeof(*request), GFP_KERNEL);
2010                 if (request == NULL)
2011                         return -ENOMEM;
2012
2013                 ret = i915_add_request(from, NULL, request);
2014                 if (ret) {
2015                         kfree(request);
2016                         return ret;
2017                 }
2018
2019                 seqno = request->seqno;
2020         }
2021
2022
2023         ret = to->sync_to(to, from, seqno);
2024         if (!ret)
2025                 from->sync_seqno[idx] = seqno;
2026
2027         return ret;
2028 }
2029
2030 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
2031 {
2032         u32 old_write_domain, old_read_domains;
2033
2034         /* Act a barrier for all accesses through the GTT */
2035         mb();
2036
2037         /* Force a pagefault for domain tracking on next user access */
2038         i915_gem_release_mmap(obj);
2039
2040         if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
2041                 return;
2042
2043         old_read_domains = obj->base.read_domains;
2044         old_write_domain = obj->base.write_domain;
2045
2046         obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT;
2047         obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
2048
2049         trace_i915_gem_object_change_domain(obj,
2050                                             old_read_domains,
2051                                             old_write_domain);
2052 }
2053
2054 /**
2055  * Unbinds an object from the GTT aperture.
2056  */
2057 int
2058 i915_gem_object_unbind(struct drm_i915_gem_object *obj)
2059 {
2060         drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
2061         int ret = 0;
2062
2063         if (obj->gtt_space == NULL)
2064                 return 0;
2065
2066         if (obj->pin_count != 0) {
2067                 DRM_ERROR("Attempting to unbind pinned buffer\n");
2068                 return -EINVAL;
2069         }
2070
2071         ret = i915_gem_object_finish_gpu(obj);
2072         if (ret == -ERESTARTSYS)
2073                 return ret;
2074         /* Continue on if we fail due to EIO, the GPU is hung so we
2075          * should be safe and we need to cleanup or else we might
2076          * cause memory corruption through use-after-free.
2077          */
2078
2079         i915_gem_object_finish_gtt(obj);
2080
2081         /* Move the object to the CPU domain to ensure that
2082          * any possible CPU writes while it's not in the GTT
2083          * are flushed when we go to remap it.
2084          */
2085         if (ret == 0)
2086                 ret = i915_gem_object_set_to_cpu_domain(obj, 1);
2087         if (ret == -ERESTARTSYS)
2088                 return ret;
2089         if (ret) {
2090                 /* In the event of a disaster, abandon all caches and
2091                  * hope for the best.
2092                  */
2093                 i915_gem_clflush_object(obj);
2094                 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
2095         }
2096
2097         /* release the fence reg _after_ flushing */
2098         ret = i915_gem_object_put_fence(obj);
2099         if (ret == -ERESTARTSYS)
2100                 return ret;
2101
2102         trace_i915_gem_object_unbind(obj);
2103
2104         if (obj->has_global_gtt_mapping)
2105                 i915_gem_gtt_unbind_object(obj);
2106         if (obj->has_aliasing_ppgtt_mapping) {
2107                 i915_ppgtt_unbind_object(dev_priv->mm.aliasing_ppgtt, obj);
2108                 obj->has_aliasing_ppgtt_mapping = 0;
2109         }
2110         i915_gem_gtt_finish_object(obj);
2111
2112         i915_gem_object_put_pages_gtt(obj);
2113
2114         list_del_init(&obj->gtt_list);
2115         list_del_init(&obj->mm_list);
2116         /* Avoid an unnecessary call to unbind on rebind. */
2117         obj->map_and_fenceable = true;
2118
2119         drm_mm_put_block(obj->gtt_space);
2120         obj->gtt_space = NULL;
2121         obj->gtt_offset = 0;
2122
2123         if (i915_gem_object_is_purgeable(obj))
2124                 i915_gem_object_truncate(obj);
2125
2126         return ret;
2127 }
2128
2129 int
2130 i915_gem_flush_ring(struct intel_ring_buffer *ring,
2131                     uint32_t invalidate_domains,
2132                     uint32_t flush_domains)
2133 {
2134         int ret;
2135
2136         if (((invalidate_domains | flush_domains) & I915_GEM_GPU_DOMAINS) == 0)
2137                 return 0;
2138
2139         trace_i915_gem_ring_flush(ring, invalidate_domains, flush_domains);
2140
2141         ret = ring->flush(ring, invalidate_domains, flush_domains);
2142         if (ret)
2143                 return ret;
2144
2145         if (flush_domains & I915_GEM_GPU_DOMAINS)
2146                 i915_gem_process_flushing_list(ring, flush_domains);
2147
2148         return 0;
2149 }
2150
2151 static int i915_ring_idle(struct intel_ring_buffer *ring, bool do_retire)
2152 {
2153         int ret;
2154
2155         if (list_empty(&ring->gpu_write_list) && list_empty(&ring->active_list))
2156                 return 0;
2157
2158         if (!list_empty(&ring->gpu_write_list)) {
2159                 ret = i915_gem_flush_ring(ring,
2160                                     I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
2161                 if (ret)
2162                         return ret;
2163         }
2164
2165         return i915_wait_request(ring, i915_gem_next_request_seqno(ring),
2166                                  do_retire);
2167 }
2168
2169 int i915_gpu_idle(struct drm_device *dev, bool do_retire)
2170 {
2171         drm_i915_private_t *dev_priv = dev->dev_private;
2172         int ret, i;
2173
2174         /* Flush everything onto the inactive list. */
2175         for (i = 0; i < I915_NUM_RINGS; i++) {
2176                 ret = i915_ring_idle(&dev_priv->ring[i], do_retire);
2177                 if (ret)
2178                         return ret;
2179         }
2180
2181         return 0;
2182 }
2183
2184 static void sandybridge_write_fence_reg(struct drm_device *dev, int reg,
2185                                         struct drm_i915_gem_object *obj)
2186 {
2187         drm_i915_private_t *dev_priv = dev->dev_private;
2188         uint64_t val;
2189
2190         if (obj) {
2191                 u32 size = obj->gtt_space->size;
2192
2193                 val = (uint64_t)((obj->gtt_offset + size - 4096) &
2194                                  0xfffff000) << 32;
2195                 val |= obj->gtt_offset & 0xfffff000;
2196                 val |= (uint64_t)((obj->stride / 128) - 1) <<
2197                         SANDYBRIDGE_FENCE_PITCH_SHIFT;
2198
2199                 if (obj->tiling_mode == I915_TILING_Y)
2200                         val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2201                 val |= I965_FENCE_REG_VALID;
2202         } else
2203                 val = 0;
2204
2205         I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + reg * 8, val);
2206         POSTING_READ(FENCE_REG_SANDYBRIDGE_0 + reg * 8);
2207 }
2208
2209 static void i965_write_fence_reg(struct drm_device *dev, int reg,
2210                                  struct drm_i915_gem_object *obj)
2211 {
2212         drm_i915_private_t *dev_priv = dev->dev_private;
2213         uint64_t val;
2214
2215         if (obj) {
2216                 u32 size = obj->gtt_space->size;
2217
2218                 val = (uint64_t)((obj->gtt_offset + size - 4096) &
2219                                  0xfffff000) << 32;
2220                 val |= obj->gtt_offset & 0xfffff000;
2221                 val |= ((obj->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT;
2222                 if (obj->tiling_mode == I915_TILING_Y)
2223                         val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2224                 val |= I965_FENCE_REG_VALID;
2225         } else
2226                 val = 0;
2227
2228         I915_WRITE64(FENCE_REG_965_0 + reg * 8, val);
2229         POSTING_READ(FENCE_REG_965_0 + reg * 8);
2230 }
2231
2232 static void i915_write_fence_reg(struct drm_device *dev, int reg,
2233                                  struct drm_i915_gem_object *obj)
2234 {
2235         drm_i915_private_t *dev_priv = dev->dev_private;
2236         u32 val;
2237
2238         if (obj) {
2239                 u32 size = obj->gtt_space->size;
2240                 int pitch_val;
2241                 int tile_width;
2242
2243                 WARN((obj->gtt_offset & ~I915_FENCE_START_MASK) ||
2244                      (size & -size) != size ||
2245                      (obj->gtt_offset & (size - 1)),
2246                      "object 0x%08x [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
2247                      obj->gtt_offset, obj->map_and_fenceable, size);
2248
2249                 if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
2250                         tile_width = 128;
2251                 else
2252                         tile_width = 512;
2253
2254                 /* Note: pitch better be a power of two tile widths */
2255                 pitch_val = obj->stride / tile_width;
2256                 pitch_val = ffs(pitch_val) - 1;
2257
2258                 val = obj->gtt_offset;
2259                 if (obj->tiling_mode == I915_TILING_Y)
2260                         val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2261                 val |= I915_FENCE_SIZE_BITS(size);
2262                 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2263                 val |= I830_FENCE_REG_VALID;
2264         } else
2265                 val = 0;
2266
2267         if (reg < 8)
2268                 reg = FENCE_REG_830_0 + reg * 4;
2269         else
2270                 reg = FENCE_REG_945_8 + (reg - 8) * 4;
2271
2272         I915_WRITE(reg, val);
2273         POSTING_READ(reg);
2274 }
2275
2276 static void i830_write_fence_reg(struct drm_device *dev, int reg,
2277                                 struct drm_i915_gem_object *obj)
2278 {
2279         drm_i915_private_t *dev_priv = dev->dev_private;
2280         uint32_t val;
2281
2282         if (obj) {
2283                 u32 size = obj->gtt_space->size;
2284                 uint32_t pitch_val;
2285
2286                 WARN((obj->gtt_offset & ~I830_FENCE_START_MASK) ||
2287                      (size & -size) != size ||
2288                      (obj->gtt_offset & (size - 1)),
2289                      "object 0x%08x not 512K or pot-size 0x%08x aligned\n",
2290                      obj->gtt_offset, size);
2291
2292                 pitch_val = obj->stride / 128;
2293                 pitch_val = ffs(pitch_val) - 1;
2294
2295                 val = obj->gtt_offset;
2296                 if (obj->tiling_mode == I915_TILING_Y)
2297                         val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2298                 val |= I830_FENCE_SIZE_BITS(size);
2299                 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2300                 val |= I830_FENCE_REG_VALID;
2301         } else
2302                 val = 0;
2303
2304         I915_WRITE(FENCE_REG_830_0 + reg * 4, val);
2305         POSTING_READ(FENCE_REG_830_0 + reg * 4);
2306 }
2307
2308 static void i915_gem_write_fence(struct drm_device *dev, int reg,
2309                                  struct drm_i915_gem_object *obj)
2310 {
2311         switch (INTEL_INFO(dev)->gen) {
2312         case 7:
2313         case 6: sandybridge_write_fence_reg(dev, reg, obj); break;
2314         case 5:
2315         case 4: i965_write_fence_reg(dev, reg, obj); break;
2316         case 3: i915_write_fence_reg(dev, reg, obj); break;
2317         case 2: i830_write_fence_reg(dev, reg, obj); break;
2318         default: break;
2319         }
2320 }
2321
2322 static inline int fence_number(struct drm_i915_private *dev_priv,
2323                                struct drm_i915_fence_reg *fence)
2324 {
2325         return fence - dev_priv->fence_regs;
2326 }
2327
2328 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
2329                                          struct drm_i915_fence_reg *fence,
2330                                          bool enable)
2331 {
2332         struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2333         int reg = fence_number(dev_priv, fence);
2334
2335         i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL);
2336
2337         if (enable) {
2338                 obj->fence_reg = reg;
2339                 fence->obj = obj;
2340                 list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list);
2341         } else {
2342                 obj->fence_reg = I915_FENCE_REG_NONE;
2343                 fence->obj = NULL;
2344                 list_del_init(&fence->lru_list);
2345         }
2346 }
2347
2348 static int
2349 i915_gem_object_flush_fence(struct drm_i915_gem_object *obj)
2350 {
2351         int ret;
2352
2353         if (obj->fenced_gpu_access) {
2354                 if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) {
2355                         ret = i915_gem_flush_ring(obj->ring,
2356                                                   0, obj->base.write_domain);
2357                         if (ret)
2358                                 return ret;
2359                 }
2360
2361                 obj->fenced_gpu_access = false;
2362         }
2363
2364         if (obj->last_fenced_seqno) {
2365                 ret = i915_wait_request(obj->ring,
2366                                         obj->last_fenced_seqno,
2367                                         true);
2368                 if (ret)
2369                         return ret;
2370
2371                 obj->last_fenced_seqno = 0;
2372         }
2373
2374         /* Ensure that all CPU reads are completed before installing a fence
2375          * and all writes before removing the fence.
2376          */
2377         if (obj->base.read_domains & I915_GEM_DOMAIN_GTT)
2378                 mb();
2379
2380         return 0;
2381 }
2382
2383 int
2384 i915_gem_object_put_fence(struct drm_i915_gem_object *obj)
2385 {
2386         struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2387         int ret;
2388
2389         ret = i915_gem_object_flush_fence(obj);
2390         if (ret)
2391                 return ret;
2392
2393         if (obj->fence_reg == I915_FENCE_REG_NONE)
2394                 return 0;
2395
2396         i915_gem_object_update_fence(obj,
2397                                      &dev_priv->fence_regs[obj->fence_reg],
2398                                      false);
2399         i915_gem_object_fence_lost(obj);
2400
2401         return 0;
2402 }
2403
2404 static struct drm_i915_fence_reg *
2405 i915_find_fence_reg(struct drm_device *dev)
2406 {
2407         struct drm_i915_private *dev_priv = dev->dev_private;
2408         struct drm_i915_fence_reg *reg, *avail;
2409         int i;
2410
2411         /* First try to find a free reg */
2412         avail = NULL;
2413         for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
2414                 reg = &dev_priv->fence_regs[i];
2415                 if (!reg->obj)
2416                         return reg;
2417
2418                 if (!reg->pin_count)
2419                         avail = reg;
2420         }
2421
2422         if (avail == NULL)
2423                 return NULL;
2424
2425         /* None available, try to steal one or wait for a user to finish */
2426         list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) {
2427                 if (reg->pin_count)
2428                         continue;
2429
2430                 return reg;
2431         }
2432
2433         return NULL;
2434 }
2435
2436 /**
2437  * i915_gem_object_get_fence - set up fencing for an object
2438  * @obj: object to map through a fence reg
2439  *
2440  * When mapping objects through the GTT, userspace wants to be able to write
2441  * to them without having to worry about swizzling if the object is tiled.
2442  * This function walks the fence regs looking for a free one for @obj,
2443  * stealing one if it can't find any.
2444  *
2445  * It then sets up the reg based on the object's properties: address, pitch
2446  * and tiling format.
2447  *
2448  * For an untiled surface, this removes any existing fence.
2449  */
2450 int
2451 i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
2452 {
2453         struct drm_device *dev = obj->base.dev;
2454         struct drm_i915_private *dev_priv = dev->dev_private;
2455         struct drm_i915_fence_reg *reg;
2456         int ret;
2457
2458         if (obj->tiling_mode == I915_TILING_NONE)
2459                 return i915_gem_object_put_fence(obj);
2460
2461         /* Just update our place in the LRU if our fence is getting reused. */
2462         if (obj->fence_reg != I915_FENCE_REG_NONE) {
2463                 reg = &dev_priv->fence_regs[obj->fence_reg];
2464                 list_move_tail(&reg->lru_list, &dev_priv->mm.fence_list);
2465
2466                 if (obj->tiling_changed) {
2467                         ret = i915_gem_object_flush_fence(obj);
2468                         if (ret)
2469                                 return ret;
2470
2471                         goto update;
2472                 }
2473
2474                 return 0;
2475         }
2476
2477         reg = i915_find_fence_reg(dev);
2478         if (reg == NULL)
2479                 return -EDEADLK;
2480
2481         ret = i915_gem_object_flush_fence(obj);
2482         if (ret)
2483                 return ret;
2484
2485         if (reg->obj) {
2486                 struct drm_i915_gem_object *old = reg->obj;
2487
2488                 drm_gem_object_reference(&old->base);
2489
2490                 if (old->tiling_mode)
2491                         i915_gem_release_mmap(old);
2492
2493                 ret = i915_gem_object_flush_fence(old);
2494                 if (ret) {
2495                         drm_gem_object_unreference(&old->base);
2496                         return ret;
2497                 }
2498
2499                 old->fence_reg = I915_FENCE_REG_NONE;
2500                 old->last_fenced_seqno = 0;
2501
2502                 drm_gem_object_unreference(&old->base);
2503         }
2504
2505         reg->obj = obj;
2506         list_move_tail(&reg->lru_list, &dev_priv->mm.fence_list);
2507         obj->fence_reg = reg - dev_priv->fence_regs;
2508
2509 update:
2510         obj->tiling_changed = false;
2511         i915_gem_write_fence(dev, reg - dev_priv->fence_regs, obj);
2512         return 0;
2513 }
2514
2515 /**
2516  * i915_gem_clear_fence_reg - clear out fence register info
2517  * @obj: object to clear
2518  *
2519  * Zeroes out the fence register itself and clears out the associated
2520  * data structures in dev_priv and obj.
2521  */
2522 static void
2523 i915_gem_clear_fence_reg(struct drm_device *dev,
2524                          struct drm_i915_fence_reg *reg)
2525 {
2526         drm_i915_private_t *dev_priv = dev->dev_private;
2527         uint32_t fence_reg = reg - dev_priv->fence_regs;
2528
2529         switch (INTEL_INFO(dev)->gen) {
2530         case 7:
2531         case 6:
2532                 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + fence_reg*8, 0);
2533                 break;
2534         case 5:
2535         case 4:
2536                 I915_WRITE64(FENCE_REG_965_0 + fence_reg*8, 0);
2537                 break;
2538         case 3:
2539                 if (fence_reg >= 8)
2540                         fence_reg = FENCE_REG_945_8 + (fence_reg - 8) * 4;
2541                 else
2542         case 2:
2543                         fence_reg = FENCE_REG_830_0 + fence_reg * 4;
2544
2545                 I915_WRITE(fence_reg, 0);
2546                 break;
2547         }
2548
2549         list_del_init(&reg->lru_list);
2550         reg->obj = NULL;
2551         reg->pin_count = 0;
2552 }
2553
2554 /**
2555  * Finds free space in the GTT aperture and binds the object there.
2556  */
2557 static int
2558 i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
2559                             unsigned alignment,
2560                             bool map_and_fenceable)
2561 {
2562         struct drm_device *dev = obj->base.dev;
2563         drm_i915_private_t *dev_priv = dev->dev_private;
2564         struct drm_mm_node *free_space;
2565         gfp_t gfpmask = __GFP_NORETRY | __GFP_NOWARN;
2566         u32 size, fence_size, fence_alignment, unfenced_alignment;
2567         bool mappable, fenceable;
2568         int ret;
2569
2570         if (obj->madv != I915_MADV_WILLNEED) {
2571                 DRM_ERROR("Attempting to bind a purgeable object\n");
2572                 return -EINVAL;
2573         }
2574
2575         fence_size = i915_gem_get_gtt_size(dev,
2576                                            obj->base.size,
2577                                            obj->tiling_mode);
2578         fence_alignment = i915_gem_get_gtt_alignment(dev,
2579                                                      obj->base.size,
2580                                                      obj->tiling_mode);
2581         unfenced_alignment =
2582                 i915_gem_get_unfenced_gtt_alignment(dev,
2583                                                     obj->base.size,
2584                                                     obj->tiling_mode);
2585
2586         if (alignment == 0)
2587                 alignment = map_and_fenceable ? fence_alignment :
2588                                                 unfenced_alignment;
2589         if (map_and_fenceable && alignment & (fence_alignment - 1)) {
2590                 DRM_ERROR("Invalid object alignment requested %u\n", alignment);
2591                 return -EINVAL;
2592         }
2593
2594         size = map_and_fenceable ? fence_size : obj->base.size;
2595
2596         /* If the object is bigger than the entire aperture, reject it early
2597          * before evicting everything in a vain attempt to find space.
2598          */
2599         if (obj->base.size >
2600             (map_and_fenceable ? dev_priv->mm.gtt_mappable_end : dev_priv->mm.gtt_total)) {
2601                 DRM_ERROR("Attempting to bind an object larger than the aperture\n");
2602                 return -E2BIG;
2603         }
2604
2605  search_free:
2606         if (map_and_fenceable)
2607                 free_space =
2608                         drm_mm_search_free_in_range(&dev_priv->mm.gtt_space,
2609                                                     size, alignment, 0,
2610                                                     dev_priv->mm.gtt_mappable_end,
2611                                                     0);
2612         else
2613                 free_space = drm_mm_search_free(&dev_priv->mm.gtt_space,
2614                                                 size, alignment, 0);
2615
2616         if (free_space != NULL) {
2617                 if (map_and_fenceable)
2618                         obj->gtt_space =
2619                                 drm_mm_get_block_range_generic(free_space,
2620                                                                size, alignment, 0,
2621                                                                dev_priv->mm.gtt_mappable_end,
2622                                                                0);
2623                 else
2624                         obj->gtt_space =
2625                                 drm_mm_get_block(free_space, size, alignment);
2626         }
2627         if (obj->gtt_space == NULL) {
2628                 /* If the gtt is empty and we're still having trouble
2629                  * fitting our object in, we're out of memory.
2630                  */
2631                 ret = i915_gem_evict_something(dev, size, alignment,
2632                                                map_and_fenceable);
2633                 if (ret)
2634                         return ret;
2635
2636                 goto search_free;
2637         }
2638
2639         ret = i915_gem_object_get_pages_gtt(obj, gfpmask);
2640         if (ret) {
2641                 drm_mm_put_block(obj->gtt_space);
2642                 obj->gtt_space = NULL;
2643
2644                 if (ret == -ENOMEM) {
2645                         /* first try to reclaim some memory by clearing the GTT */
2646                         ret = i915_gem_evict_everything(dev, false);
2647                         if (ret) {
2648                                 /* now try to shrink everyone else */
2649                                 if (gfpmask) {
2650                                         gfpmask = 0;
2651                                         goto search_free;
2652                                 }
2653
2654                                 return -ENOMEM;
2655                         }
2656
2657                         goto search_free;
2658                 }
2659
2660                 return ret;
2661         }
2662
2663         ret = i915_gem_gtt_prepare_object(obj);
2664         if (ret) {
2665                 i915_gem_object_put_pages_gtt(obj);
2666                 drm_mm_put_block(obj->gtt_space);
2667                 obj->gtt_space = NULL;
2668
2669                 if (i915_gem_evict_everything(dev, false))
2670                         return ret;
2671
2672                 goto search_free;
2673         }
2674
2675         if (!dev_priv->mm.aliasing_ppgtt)
2676                 i915_gem_gtt_bind_object(obj, obj->cache_level);
2677
2678         list_add_tail(&obj->gtt_list, &dev_priv->mm.gtt_list);
2679         list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
2680
2681         /* Assert that the object is not currently in any GPU domain. As it
2682          * wasn't in the GTT, there shouldn't be any way it could have been in
2683          * a GPU cache
2684          */
2685         BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
2686         BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
2687
2688         obj->gtt_offset = obj->gtt_space->start;
2689
2690         fenceable =
2691                 obj->gtt_space->size == fence_size &&
2692                 (obj->gtt_space->start & (fence_alignment - 1)) == 0;
2693
2694         mappable =
2695                 obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end;
2696
2697         obj->map_and_fenceable = mappable && fenceable;
2698
2699         trace_i915_gem_object_bind(obj, map_and_fenceable);
2700         return 0;
2701 }
2702
2703 void
2704 i915_gem_clflush_object(struct drm_i915_gem_object *obj)
2705 {
2706         /* If we don't have a page list set up, then we're not pinned
2707          * to GPU, and we can ignore the cache flush because it'll happen
2708          * again at bind time.
2709          */
2710         if (obj->pages == NULL)
2711                 return;
2712
2713         /* If the GPU is snooping the contents of the CPU cache,
2714          * we do not need to manually clear the CPU cache lines.  However,
2715          * the caches are only snooped when the render cache is
2716          * flushed/invalidated.  As we always have to emit invalidations
2717          * and flushes when moving into and out of the RENDER domain, correct
2718          * snooping behaviour occurs naturally as the result of our domain
2719          * tracking.
2720          */
2721         if (obj->cache_level != I915_CACHE_NONE)
2722                 return;
2723
2724         trace_i915_gem_object_clflush(obj);
2725
2726         drm_clflush_pages(obj->pages, obj->base.size / PAGE_SIZE);
2727 }
2728
2729 /** Flushes any GPU write domain for the object if it's dirty. */
2730 static int
2731 i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj)
2732 {
2733         if ((obj->base.write_domain & I915_GEM_GPU_DOMAINS) == 0)
2734                 return 0;
2735
2736         /* Queue the GPU write cache flushing we need. */
2737         return i915_gem_flush_ring(obj->ring, 0, obj->base.write_domain);
2738 }
2739
2740 /** Flushes the GTT write domain for the object if it's dirty. */
2741 static void
2742 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
2743 {
2744         uint32_t old_write_domain;
2745
2746         if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
2747                 return;
2748
2749         /* No actual flushing is required for the GTT write domain.  Writes
2750          * to it immediately go to main memory as far as we know, so there's
2751          * no chipset flush.  It also doesn't land in render cache.
2752          *
2753          * However, we do have to enforce the order so that all writes through
2754          * the GTT land before any writes to the device, such as updates to
2755          * the GATT itself.
2756          */
2757         wmb();
2758
2759         old_write_domain = obj->base.write_domain;
2760         obj->base.write_domain = 0;
2761
2762         trace_i915_gem_object_change_domain(obj,
2763                                             obj->base.read_domains,
2764                                             old_write_domain);
2765 }
2766
2767 /** Flushes the CPU write domain for the object if it's dirty. */
2768 static void
2769 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
2770 {
2771         uint32_t old_write_domain;
2772
2773         if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
2774                 return;
2775
2776         i915_gem_clflush_object(obj);
2777         intel_gtt_chipset_flush();
2778         old_write_domain = obj->base.write_domain;
2779         obj->base.write_domain = 0;
2780
2781         trace_i915_gem_object_change_domain(obj,
2782                                             obj->base.read_domains,
2783                                             old_write_domain);
2784 }
2785
2786 /**
2787  * Moves a single object to the GTT read, and possibly write domain.
2788  *
2789  * This function returns when the move is complete, including waiting on
2790  * flushes to occur.
2791  */
2792 int
2793 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
2794 {
2795         uint32_t old_write_domain, old_read_domains;
2796         int ret;
2797
2798         /* Not valid to be called on unbound objects. */
2799         if (obj->gtt_space == NULL)
2800                 return -EINVAL;
2801
2802         if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
2803                 return 0;
2804
2805         ret = i915_gem_object_flush_gpu_write_domain(obj);
2806         if (ret)
2807                 return ret;
2808
2809         if (obj->pending_gpu_write || write) {
2810                 ret = i915_gem_object_wait_rendering(obj);
2811                 if (ret)
2812                         return ret;
2813         }
2814
2815         i915_gem_object_flush_cpu_write_domain(obj);
2816
2817         old_write_domain = obj->base.write_domain;
2818         old_read_domains = obj->base.read_domains;
2819
2820         /* It should now be out of any other write domains, and we can update
2821          * the domain values for our changes.
2822          */
2823         BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
2824         obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
2825         if (write) {
2826                 obj->base.read_domains = I915_GEM_DOMAIN_GTT;
2827                 obj->base.write_domain = I915_GEM_DOMAIN_GTT;
2828                 obj->dirty = 1;
2829         }
2830
2831         trace_i915_gem_object_change_domain(obj,
2832                                             old_read_domains,
2833                                             old_write_domain);
2834
2835         return 0;
2836 }
2837
2838 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
2839                                     enum i915_cache_level cache_level)
2840 {
2841         struct drm_device *dev = obj->base.dev;
2842         drm_i915_private_t *dev_priv = dev->dev_private;
2843         int ret;
2844
2845         if (obj->cache_level == cache_level)
2846                 return 0;
2847
2848         if (obj->pin_count) {
2849                 DRM_DEBUG("can not change the cache level of pinned objects\n");
2850                 return -EBUSY;
2851         }
2852
2853         if (obj->gtt_space) {
2854                 ret = i915_gem_object_finish_gpu(obj);
2855                 if (ret)
2856                         return ret;
2857
2858                 i915_gem_object_finish_gtt(obj);
2859
2860                 /* Before SandyBridge, you could not use tiling or fence
2861                  * registers with snooped memory, so relinquish any fences
2862                  * currently pointing to our region in the aperture.
2863                  */
2864                 if (INTEL_INFO(obj->base.dev)->gen < 6) {
2865                         ret = i915_gem_object_put_fence(obj);
2866                         if (ret)
2867                                 return ret;
2868                 }
2869
2870                 if (obj->has_global_gtt_mapping)
2871                         i915_gem_gtt_bind_object(obj, cache_level);
2872                 if (obj->has_aliasing_ppgtt_mapping)
2873                         i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
2874                                                obj, cache_level);
2875         }
2876
2877         if (cache_level == I915_CACHE_NONE) {
2878                 u32 old_read_domains, old_write_domain;
2879
2880                 /* If we're coming from LLC cached, then we haven't
2881                  * actually been tracking whether the data is in the
2882                  * CPU cache or not, since we only allow one bit set
2883                  * in obj->write_domain and have been skipping the clflushes.
2884                  * Just set it to the CPU cache for now.
2885                  */
2886                 WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU);
2887                 WARN_ON(obj->base.read_domains & ~I915_GEM_DOMAIN_CPU);
2888
2889                 old_read_domains = obj->base.read_domains;
2890                 old_write_domain = obj->base.write_domain;
2891
2892                 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
2893                 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
2894
2895                 trace_i915_gem_object_change_domain(obj,
2896                                                     old_read_domains,
2897                                                     old_write_domain);
2898         }
2899
2900         obj->cache_level = cache_level;
2901         return 0;
2902 }
2903
2904 /*
2905  * Prepare buffer for display plane (scanout, cursors, etc).
2906  * Can be called from an uninterruptible phase (modesetting) and allows
2907  * any flushes to be pipelined (for pageflips).
2908  */
2909 int
2910 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
2911                                      u32 alignment,
2912                                      struct intel_ring_buffer *pipelined)
2913 {
2914         u32 old_read_domains, old_write_domain;
2915         int ret;
2916
2917         ret = i915_gem_object_flush_gpu_write_domain(obj);
2918         if (ret)
2919                 return ret;
2920
2921         if (pipelined != obj->ring) {
2922                 ret = i915_gem_object_sync(obj, pipelined);
2923                 if (ret)
2924                         return ret;
2925         }
2926
2927         /* The display engine is not coherent with the LLC cache on gen6.  As
2928          * a result, we make sure that the pinning that is about to occur is
2929          * done with uncached PTEs. This is lowest common denominator for all
2930          * chipsets.
2931          *
2932          * However for gen6+, we could do better by using the GFDT bit instead
2933          * of uncaching, which would allow us to flush all the LLC-cached data
2934          * with that bit in the PTE to main memory with just one PIPE_CONTROL.
2935          */
2936         ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE);
2937         if (ret)
2938                 return ret;
2939
2940         /* As the user may map the buffer once pinned in the display plane
2941          * (e.g. libkms for the bootup splash), we have to ensure that we
2942          * always use map_and_fenceable for all scanout buffers.
2943          */
2944         ret = i915_gem_object_pin(obj, alignment, true);
2945         if (ret)
2946                 return ret;
2947
2948         i915_gem_object_flush_cpu_write_domain(obj);
2949
2950         old_write_domain = obj->base.write_domain;
2951         old_read_domains = obj->base.read_domains;
2952
2953         /* It should now be out of any other write domains, and we can update
2954          * the domain values for our changes.
2955          */
2956         BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
2957         obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
2958
2959         trace_i915_gem_object_change_domain(obj,
2960                                             old_read_domains,
2961                                             old_write_domain);
2962
2963         return 0;
2964 }
2965
2966 int
2967 i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj)
2968 {
2969         int ret;
2970
2971         if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0)
2972                 return 0;
2973
2974         if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) {
2975                 ret = i915_gem_flush_ring(obj->ring, 0, obj->base.write_domain);
2976                 if (ret)
2977                         return ret;
2978         }
2979
2980         ret = i915_gem_object_wait_rendering(obj);
2981         if (ret)
2982                 return ret;
2983
2984         /* Ensure that we invalidate the GPU's caches and TLBs. */
2985         obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
2986         return 0;
2987 }
2988
2989 /**
2990  * Moves a single object to the CPU read, and possibly write domain.
2991  *
2992  * This function returns when the move is complete, including waiting on
2993  * flushes to occur.
2994  */
2995 int
2996 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
2997 {
2998         uint32_t old_write_domain, old_read_domains;
2999         int ret;
3000
3001         if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
3002                 return 0;
3003
3004         ret = i915_gem_object_flush_gpu_write_domain(obj);
3005         if (ret)
3006                 return ret;
3007
3008         if (write || obj->pending_gpu_write) {
3009                 ret = i915_gem_object_wait_rendering(obj);
3010                 if (ret)
3011                         return ret;
3012         }
3013
3014         i915_gem_object_flush_gtt_write_domain(obj);
3015
3016         old_write_domain = obj->base.write_domain;
3017         old_read_domains = obj->base.read_domains;
3018
3019         /* Flush the CPU cache if it's still invalid. */
3020         if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
3021                 i915_gem_clflush_object(obj);
3022
3023                 obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
3024         }
3025
3026         /* It should now be out of any other write domains, and we can update
3027          * the domain values for our changes.
3028          */
3029         BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
3030
3031         /* If we're writing through the CPU, then the GPU read domains will
3032          * need to be invalidated at next use.
3033          */
3034         if (write) {
3035                 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
3036                 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
3037         }
3038
3039         trace_i915_gem_object_change_domain(obj,
3040                                             old_read_domains,
3041                                             old_write_domain);
3042
3043         return 0;
3044 }
3045
3046 /* Throttle our rendering by waiting until the ring has completed our requests
3047  * emitted over 20 msec ago.
3048  *
3049  * Note that if we were to use the current jiffies each time around the loop,
3050  * we wouldn't escape the function with any frames outstanding if the time to
3051  * render a frame was over 20ms.
3052  *
3053  * This should get us reasonable parallelism between CPU and GPU but also
3054  * relatively low latency when blocking on a particular request to finish.
3055  */
3056 static int
3057 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
3058 {
3059         struct drm_i915_private *dev_priv = dev->dev_private;
3060         struct drm_i915_file_private *file_priv = file->driver_priv;
3061         unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
3062         struct drm_i915_gem_request *request;
3063         struct intel_ring_buffer *ring = NULL;
3064         u32 seqno = 0;
3065         int ret;
3066
3067         if (atomic_read(&dev_priv->mm.wedged))
3068                 return -EIO;
3069
3070         spin_lock(&file_priv->mm.lock);
3071         list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
3072                 if (time_after_eq(request->emitted_jiffies, recent_enough))
3073                         break;
3074
3075                 ring = request->ring;
3076                 seqno = request->seqno;
3077         }
3078         spin_unlock(&file_priv->mm.lock);
3079
3080         if (seqno == 0)
3081                 return 0;
3082
3083         ret = 0;
3084         if (!i915_seqno_passed(ring->get_seqno(ring), seqno)) {
3085                 /* And wait for the seqno passing without holding any locks and
3086                  * causing extra latency for others. This is safe as the irq
3087                  * generation is designed to be run atomically and so is
3088                  * lockless.
3089                  */
3090                 if (ring->irq_get(ring)) {
3091                         ret = wait_event_interruptible(ring->irq_queue,
3092                                                        i915_seqno_passed(ring->get_seqno(ring), seqno)
3093                                                        || atomic_read(&dev_priv->mm.wedged));
3094                         ring->irq_put(ring);
3095
3096                         if (ret == 0 && atomic_read(&dev_priv->mm.wedged))
3097                                 ret = -EIO;
3098                 } else if (wait_for_atomic(i915_seqno_passed(ring->get_seqno(ring),
3099                                                              seqno) ||
3100                                     atomic_read(&dev_priv->mm.wedged), 3000)) {
3101                         ret = -EBUSY;
3102                 }
3103         }
3104
3105         if (ret == 0)
3106                 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
3107
3108         return ret;
3109 }
3110
3111 int
3112 i915_gem_object_pin(struct drm_i915_gem_object *obj,
3113                     uint32_t alignment,
3114                     bool map_and_fenceable)
3115 {
3116         struct drm_device *dev = obj->base.dev;
3117         struct drm_i915_private *dev_priv = dev->dev_private;
3118         int ret;
3119
3120         BUG_ON(obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT);
3121         WARN_ON(i915_verify_lists(dev));
3122
3123         if (obj->gtt_space != NULL) {
3124                 if ((alignment && obj->gtt_offset & (alignment - 1)) ||
3125                     (map_and_fenceable && !obj->map_and_fenceable)) {
3126                         WARN(obj->pin_count,
3127                              "bo is already pinned with incorrect alignment:"
3128                              " offset=%x, req.alignment=%x, req.map_and_fenceable=%d,"
3129                              " obj->map_and_fenceable=%d\n",
3130                              obj->gtt_offset, alignment,
3131                              map_and_fenceable,
3132                              obj->map_and_fenceable);
3133                         ret = i915_gem_object_unbind(obj);
3134                         if (ret)
3135                                 return ret;
3136                 }
3137         }
3138
3139         if (obj->gtt_space == NULL) {
3140                 ret = i915_gem_object_bind_to_gtt(obj, alignment,
3141                                                   map_and_fenceable);
3142                 if (ret)
3143                         return ret;
3144         }
3145
3146         if (!obj->has_global_gtt_mapping && map_and_fenceable)
3147                 i915_gem_gtt_bind_object(obj, obj->cache_level);
3148
3149         if (obj->pin_count++ == 0) {
3150                 if (!obj->active)
3151                         list_move_tail(&obj->mm_list,
3152                                        &dev_priv->mm.pinned_list);
3153         }
3154         obj->pin_mappable |= map_and_fenceable;
3155
3156         WARN_ON(i915_verify_lists(dev));
3157         return 0;
3158 }
3159
3160 void
3161 i915_gem_object_unpin(struct drm_i915_gem_object *obj)
3162 {
3163         struct drm_device *dev = obj->base.dev;
3164         drm_i915_private_t *dev_priv = dev->dev_private;
3165
3166         WARN_ON(i915_verify_lists(dev));
3167         BUG_ON(obj->pin_count == 0);
3168         BUG_ON(obj->gtt_space == NULL);
3169
3170         if (--obj->pin_count == 0) {
3171                 if (!obj->active)
3172                         list_move_tail(&obj->mm_list,
3173                                        &dev_priv->mm.inactive_list);
3174                 obj->pin_mappable = false;
3175         }
3176         WARN_ON(i915_verify_lists(dev));
3177 }
3178
3179 int
3180 i915_gem_pin_ioctl(struct drm_device *dev, void *data,
3181                    struct drm_file *file)
3182 {
3183         struct drm_i915_gem_pin *args = data;
3184         struct drm_i915_gem_object *obj;
3185         int ret;
3186
3187         ret = i915_mutex_lock_interruptible(dev);
3188         if (ret)
3189                 return ret;
3190
3191         obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
3192         if (&obj->base == NULL) {
3193                 ret = -ENOENT;
3194                 goto unlock;
3195         }
3196
3197         if (obj->madv != I915_MADV_WILLNEED) {
3198                 DRM_ERROR("Attempting to pin a purgeable buffer\n");
3199                 ret = -EINVAL;
3200                 goto out;
3201         }
3202
3203         if (obj->pin_filp != NULL && obj->pin_filp != file) {
3204                 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n",
3205                           args->handle);
3206                 ret = -EINVAL;
3207                 goto out;
3208         }
3209
3210         obj->user_pin_count++;
3211         obj->pin_filp = file;
3212         if (obj->user_pin_count == 1) {
3213                 ret = i915_gem_object_pin(obj, args->alignment, true);
3214                 if (ret)
3215                         goto out;
3216         }
3217
3218         /* XXX - flush the CPU caches for pinned objects
3219          * as the X server doesn't manage domains yet
3220          */
3221         i915_gem_object_flush_cpu_write_domain(obj);
3222         args->offset = obj->gtt_offset;
3223 out:
3224         drm_gem_object_unreference(&obj->base);
3225 unlock:
3226         mutex_unlock(&dev->struct_mutex);
3227         return ret;
3228 }
3229
3230 int
3231 i915_gem_unpin_ioctl(struct drm_device *dev, void *data,
3232                      struct drm_file *file)
3233 {
3234         struct drm_i915_gem_pin *args = data;
3235         struct drm_i915_gem_object *obj;
3236         int ret;
3237
3238         ret = i915_mutex_lock_interruptible(dev);
3239         if (ret)
3240                 return ret;
3241
3242         obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
3243         if (&obj->base == NULL) {
3244                 ret = -ENOENT;
3245                 goto unlock;
3246         }
3247
3248         if (obj->pin_filp != file) {
3249                 DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n",
3250                           args->handle);
3251                 ret = -EINVAL;
3252                 goto out;
3253         }
3254         obj->user_pin_count--;
3255         if (obj->user_pin_count == 0) {
3256                 obj->pin_filp = NULL;
3257                 i915_gem_object_unpin(obj);
3258         }
3259
3260 out:
3261         drm_gem_object_unreference(&obj->base);
3262 unlock:
3263         mutex_unlock(&dev->struct_mutex);
3264         return ret;
3265 }
3266
3267 int
3268 i915_gem_busy_ioctl(struct drm_device *dev, void *data,
3269                     struct drm_file *file)
3270 {
3271         struct drm_i915_gem_busy *args = data;
3272         struct drm_i915_gem_object *obj;
3273         int ret;
3274
3275         ret = i915_mutex_lock_interruptible(dev);
3276         if (ret)
3277                 return ret;
3278
3279         obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
3280         if (&obj->base == NULL) {
3281                 ret = -ENOENT;
3282                 goto unlock;
3283         }
3284
3285         /* Count all active objects as busy, even if they are currently not used
3286          * by the gpu. Users of this interface expect objects to eventually
3287          * become non-busy without any further actions, therefore emit any
3288          * necessary flushes here.
3289          */
3290         args->busy = obj->active;
3291         if (args->busy) {
3292                 /* Unconditionally flush objects, even when the gpu still uses this
3293                  * object. Userspace calling this function indicates that it wants to
3294                  * use this buffer rather sooner than later, so issuing the required
3295                  * flush earlier is beneficial.
3296                  */
3297                 if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) {
3298                         ret = i915_gem_flush_ring(obj->ring,
3299                                                   0, obj->base.write_domain);
3300                 } else if (obj->ring->outstanding_lazy_request ==
3301                            obj->last_rendering_seqno) {
3302                         struct drm_i915_gem_request *request;
3303
3304                         /* This ring is not being cleared by active usage,
3305                          * so emit a request to do so.
3306                          */
3307                         request = kzalloc(sizeof(*request), GFP_KERNEL);
3308                         if (request) {
3309                                 ret = i915_add_request(obj->ring, NULL, request);
3310                                 if (ret)
3311                                         kfree(request);
3312                         } else
3313                                 ret = -ENOMEM;
3314                 }
3315
3316                 /* Update the active list for the hardware's current position.
3317                  * Otherwise this only updates on a delayed timer or when irqs
3318                  * are actually unmasked, and our working set ends up being
3319                  * larger than required.
3320                  */
3321                 i915_gem_retire_requests_ring(obj->ring);
3322
3323                 args->busy = obj->active;
3324         }
3325
3326         drm_gem_object_unreference(&obj->base);
3327 unlock:
3328         mutex_unlock(&dev->struct_mutex);
3329         return ret;
3330 }
3331
3332 int
3333 i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
3334                         struct drm_file *file_priv)
3335 {
3336         return i915_gem_ring_throttle(dev, file_priv);
3337 }
3338
3339 int
3340 i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
3341                        struct drm_file *file_priv)
3342 {
3343         struct drm_i915_gem_madvise *args = data;
3344         struct drm_i915_gem_object *obj;
3345         int ret;
3346
3347         switch (args->madv) {
3348         case I915_MADV_DONTNEED:
3349         case I915_MADV_WILLNEED:
3350             break;
3351         default:
3352             return -EINVAL;
3353         }
3354
3355         ret = i915_mutex_lock_interruptible(dev);
3356         if (ret)
3357                 return ret;
3358
3359         obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle));
3360         if (&obj->base == NULL) {
3361                 ret = -ENOENT;
3362                 goto unlock;
3363         }
3364
3365         if (obj->pin_count) {
3366                 ret = -EINVAL;
3367                 goto out;
3368         }
3369
3370         if (obj->madv != __I915_MADV_PURGED)
3371                 obj->madv = args->madv;
3372
3373         /* if the object is no longer bound, discard its backing storage */
3374         if (i915_gem_object_is_purgeable(obj) &&
3375             obj->gtt_space == NULL)
3376                 i915_gem_object_truncate(obj);
3377
3378         args->retained = obj->madv != __I915_MADV_PURGED;
3379
3380 out:
3381         drm_gem_object_unreference(&obj->base);
3382 unlock:
3383         mutex_unlock(&dev->struct_mutex);
3384         return ret;
3385 }
3386
3387 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
3388                                                   size_t size)
3389 {
3390         struct drm_i915_private *dev_priv = dev->dev_private;
3391         struct drm_i915_gem_object *obj;
3392         struct address_space *mapping;
3393
3394         obj = kzalloc(sizeof(*obj), GFP_KERNEL);
3395         if (obj == NULL)
3396                 return NULL;
3397
3398         if (drm_gem_object_init(dev, &obj->base, size) != 0) {
3399                 kfree(obj);
3400                 return NULL;
3401         }
3402
3403         mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
3404         mapping_set_gfp_mask(mapping, GFP_HIGHUSER | __GFP_RECLAIMABLE);
3405
3406         i915_gem_info_add_obj(dev_priv, size);
3407
3408         obj->base.write_domain = I915_GEM_DOMAIN_CPU;
3409         obj->base.read_domains = I915_GEM_DOMAIN_CPU;
3410
3411         if (HAS_LLC(dev)) {
3412                 /* On some devices, we can have the GPU use the LLC (the CPU
3413                  * cache) for about a 10% performance improvement
3414                  * compared to uncached.  Graphics requests other than
3415                  * display scanout are coherent with the CPU in
3416                  * accessing this cache.  This means in this mode we
3417                  * don't need to clflush on the CPU side, and on the
3418                  * GPU side we only need to flush internal caches to
3419                  * get data visible to the CPU.
3420                  *
3421                  * However, we maintain the display planes as UC, and so
3422                  * need to rebind when first used as such.
3423                  */
3424                 obj->cache_level = I915_CACHE_LLC;
3425         } else
3426                 obj->cache_level = I915_CACHE_NONE;
3427
3428         obj->base.driver_private = NULL;
3429         obj->fence_reg = I915_FENCE_REG_NONE;
3430         INIT_LIST_HEAD(&obj->mm_list);
3431         INIT_LIST_HEAD(&obj->gtt_list);
3432         INIT_LIST_HEAD(&obj->ring_list);
3433         INIT_LIST_HEAD(&obj->exec_list);
3434         INIT_LIST_HEAD(&obj->gpu_write_list);
3435         obj->madv = I915_MADV_WILLNEED;
3436         /* Avoid an unnecessary call to unbind on the first bind. */
3437         obj->map_and_fenceable = true;
3438
3439         return obj;
3440 }
3441
3442 int i915_gem_init_object(struct drm_gem_object *obj)
3443 {
3444         BUG();
3445
3446         return 0;
3447 }
3448
3449 static void i915_gem_free_object_tail(struct drm_i915_gem_object *obj)
3450 {
3451         struct drm_device *dev = obj->base.dev;
3452         drm_i915_private_t *dev_priv = dev->dev_private;
3453         int ret;
3454
3455         ret = i915_gem_object_unbind(obj);
3456         if (ret == -ERESTARTSYS) {
3457                 list_move(&obj->mm_list,
3458                           &dev_priv->mm.deferred_free_list);
3459                 return;
3460         }
3461
3462         trace_i915_gem_object_destroy(obj);
3463
3464         if (obj->base.map_list.map)
3465                 drm_gem_free_mmap_offset(&obj->base);
3466
3467         drm_gem_object_release(&obj->base);
3468         i915_gem_info_remove_obj(dev_priv, obj->base.size);
3469
3470         kfree(obj->bit_17);
3471         kfree(obj);
3472 }
3473
3474 void i915_gem_free_object(struct drm_gem_object *gem_obj)
3475 {
3476         struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
3477         struct drm_device *dev = obj->base.dev;
3478
3479         while (obj->pin_count > 0)
3480                 i915_gem_object_unpin(obj);
3481
3482         if (obj->phys_obj)
3483                 i915_gem_detach_phys_object(dev, obj);
3484
3485         i915_gem_free_object_tail(obj);
3486 }
3487
3488 int
3489 i915_gem_idle(struct drm_device *dev)
3490 {
3491         drm_i915_private_t *dev_priv = dev->dev_private;
3492         int ret;
3493
3494         mutex_lock(&dev->struct_mutex);
3495
3496         if (dev_priv->mm.suspended) {
3497                 mutex_unlock(&dev->struct_mutex);
3498                 return 0;
3499         }
3500
3501         ret = i915_gpu_idle(dev, true);
3502         if (ret) {
3503                 mutex_unlock(&dev->struct_mutex);
3504                 return ret;
3505         }
3506
3507         /* Under UMS, be paranoid and evict. */
3508         if (!drm_core_check_feature(dev, DRIVER_MODESET)) {
3509                 ret = i915_gem_evict_inactive(dev, false);
3510                 if (ret) {
3511                         mutex_unlock(&dev->struct_mutex);
3512                         return ret;
3513                 }
3514         }
3515
3516         i915_gem_reset_fences(dev);
3517
3518         /* Hack!  Don't let anybody do execbuf while we don't control the chip.
3519          * We need to replace this with a semaphore, or something.
3520          * And not confound mm.suspended!
3521          */
3522         dev_priv->mm.suspended = 1;
3523         del_timer_sync(&dev_priv->hangcheck_timer);
3524
3525         i915_kernel_lost_context(dev);
3526         i915_gem_cleanup_ringbuffer(dev);
3527
3528         mutex_unlock(&dev->struct_mutex);
3529
3530         /* Cancel the retire work handler, which should be idle now. */
3531         cancel_delayed_work_sync(&dev_priv->mm.retire_work);
3532
3533         return 0;
3534 }
3535
3536 void i915_gem_init_swizzling(struct drm_device *dev)
3537 {
3538         drm_i915_private_t *dev_priv = dev->dev_private;
3539
3540         if (INTEL_INFO(dev)->gen < 5 ||
3541             dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
3542                 return;
3543
3544         I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
3545                                  DISP_TILE_SURFACE_SWIZZLING);
3546
3547         if (IS_GEN5(dev))
3548                 return;
3549
3550         I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
3551         if (IS_GEN6(dev))
3552                 I915_WRITE(ARB_MODE, ARB_MODE_ENABLE(ARB_MODE_SWIZZLE_SNB));
3553         else
3554                 I915_WRITE(ARB_MODE, ARB_MODE_ENABLE(ARB_MODE_SWIZZLE_IVB));
3555 }
3556
3557 void i915_gem_init_ppgtt(struct drm_device *dev)
3558 {
3559         drm_i915_private_t *dev_priv = dev->dev_private;
3560         uint32_t pd_offset;
3561         struct intel_ring_buffer *ring;
3562         struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
3563         uint32_t __iomem *pd_addr;
3564         uint32_t pd_entry;
3565         int i;
3566
3567         if (!dev_priv->mm.aliasing_ppgtt)
3568                 return;
3569
3570
3571         pd_addr = dev_priv->mm.gtt->gtt + ppgtt->pd_offset/sizeof(uint32_t);
3572         for (i = 0; i < ppgtt->num_pd_entries; i++) {
3573                 dma_addr_t pt_addr;
3574
3575                 if (dev_priv->mm.gtt->needs_dmar)
3576                         pt_addr = ppgtt->pt_dma_addr[i];
3577                 else
3578                         pt_addr = page_to_phys(ppgtt->pt_pages[i]);
3579
3580                 pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr);
3581                 pd_entry |= GEN6_PDE_VALID;
3582
3583                 writel(pd_entry, pd_addr + i);
3584         }
3585         readl(pd_addr);
3586
3587         pd_offset = ppgtt->pd_offset;
3588         pd_offset /= 64; /* in cachelines, */
3589         pd_offset <<= 16;
3590
3591         if (INTEL_INFO(dev)->gen == 6) {
3592                 uint32_t ecochk, gab_ctl, ecobits;
3593
3594                 ecobits = I915_READ(GAC_ECO_BITS);
3595                 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
3596
3597                 gab_ctl = I915_READ(GAB_CTL);
3598                 I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);
3599
3600                 ecochk = I915_READ(GAM_ECOCHK);
3601                 I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT |
3602                                        ECOCHK_PPGTT_CACHE64B);
3603                 I915_WRITE(GFX_MODE, GFX_MODE_ENABLE(GFX_PPGTT_ENABLE));
3604         } else if (INTEL_INFO(dev)->gen >= 7) {
3605                 I915_WRITE(GAM_ECOCHK, ECOCHK_PPGTT_CACHE64B);
3606                 /* GFX_MODE is per-ring on gen7+ */
3607         }
3608
3609         for (i = 0; i < I915_NUM_RINGS; i++) {
3610                 ring = &dev_priv->ring[i];
3611
3612                 if (INTEL_INFO(dev)->gen >= 7)
3613                         I915_WRITE(RING_MODE_GEN7(ring),
3614                                    GFX_MODE_ENABLE(GFX_PPGTT_ENABLE));
3615
3616                 I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
3617                 I915_WRITE(RING_PP_DIR_BASE(ring), pd_offset);
3618         }
3619 }
3620
3621 int
3622 i915_gem_init_hw(struct drm_device *dev)
3623 {
3624         drm_i915_private_t *dev_priv = dev->dev_private;
3625         int ret;
3626
3627         i915_gem_init_swizzling(dev);
3628
3629         ret = intel_init_render_ring_buffer(dev);
3630         if (ret)
3631                 return ret;
3632
3633         if (HAS_BSD(dev)) {
3634                 ret = intel_init_bsd_ring_buffer(dev);
3635                 if (ret)
3636                         goto cleanup_render_ring;
3637         }
3638
3639         if (HAS_BLT(dev)) {
3640                 ret = intel_init_blt_ring_buffer(dev);
3641                 if (ret)
3642                         goto cleanup_bsd_ring;
3643         }
3644
3645         dev_priv->next_seqno = 1;
3646
3647         i915_gem_init_ppgtt(dev);
3648
3649         return 0;
3650
3651 cleanup_bsd_ring:
3652         intel_cleanup_ring_buffer(&dev_priv->ring[VCS]);
3653 cleanup_render_ring:
3654         intel_cleanup_ring_buffer(&dev_priv->ring[RCS]);
3655         return ret;
3656 }
3657
3658 void
3659 i915_gem_cleanup_ringbuffer(struct drm_device *dev)
3660 {
3661         drm_i915_private_t *dev_priv = dev->dev_private;
3662         int i;
3663
3664         for (i = 0; i < I915_NUM_RINGS; i++)
3665                 intel_cleanup_ring_buffer(&dev_priv->ring[i]);
3666 }
3667
3668 int
3669 i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
3670                        struct drm_file *file_priv)
3671 {
3672         drm_i915_private_t *dev_priv = dev->dev_private;
3673         int ret, i;
3674
3675         if (drm_core_check_feature(dev, DRIVER_MODESET))
3676                 return 0;
3677
3678         if (atomic_read(&dev_priv->mm.wedged)) {
3679                 DRM_ERROR("Reenabling wedged hardware, good luck\n");
3680                 atomic_set(&dev_priv->mm.wedged, 0);
3681         }
3682
3683         mutex_lock(&dev->struct_mutex);
3684         dev_priv->mm.suspended = 0;
3685
3686         ret = i915_gem_init_hw(dev);
3687         if (ret != 0) {
3688                 mutex_unlock(&dev->struct_mutex);
3689                 return ret;
3690         }
3691
3692         BUG_ON(!list_empty(&dev_priv->mm.active_list));
3693         BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
3694         BUG_ON(!list_empty(&dev_priv->mm.inactive_list));
3695         for (i = 0; i < I915_NUM_RINGS; i++) {
3696                 BUG_ON(!list_empty(&dev_priv->ring[i].active_list));
3697                 BUG_ON(!list_empty(&dev_priv->ring[i].request_list));
3698         }
3699         mutex_unlock(&dev->struct_mutex);
3700
3701         ret = drm_irq_install(dev);
3702         if (ret)
3703                 goto cleanup_ringbuffer;
3704
3705         return 0;
3706
3707 cleanup_ringbuffer:
3708         mutex_lock(&dev->struct_mutex);
3709         i915_gem_cleanup_ringbuffer(dev);
3710         dev_priv->mm.suspended = 1;
3711         mutex_unlock(&dev->struct_mutex);
3712
3713         return ret;
3714 }
3715
3716 int
3717 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
3718                        struct drm_file *file_priv)
3719 {
3720         if (drm_core_check_feature(dev, DRIVER_MODESET))
3721                 return 0;
3722
3723         drm_irq_uninstall(dev);
3724         return i915_gem_idle(dev);
3725 }
3726
3727 void
3728 i915_gem_lastclose(struct drm_device *dev)
3729 {
3730         int ret;
3731
3732         if (drm_core_check_feature(dev, DRIVER_MODESET))
3733                 return;
3734
3735         ret = i915_gem_idle(dev);
3736         if (ret)
3737                 DRM_ERROR("failed to idle hardware: %d\n", ret);
3738 }
3739
3740 static void
3741 init_ring_lists(struct intel_ring_buffer *ring)
3742 {
3743         INIT_LIST_HEAD(&ring->active_list);
3744         INIT_LIST_HEAD(&ring->request_list);
3745         INIT_LIST_HEAD(&ring->gpu_write_list);
3746 }
3747
3748 void
3749 i915_gem_load(struct drm_device *dev)
3750 {
3751         int i;
3752         drm_i915_private_t *dev_priv = dev->dev_private;
3753
3754         INIT_LIST_HEAD(&dev_priv->mm.active_list);
3755         INIT_LIST_HEAD(&dev_priv->mm.flushing_list);
3756         INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
3757         INIT_LIST_HEAD(&dev_priv->mm.pinned_list);
3758         INIT_LIST_HEAD(&dev_priv->mm.fence_list);
3759         INIT_LIST_HEAD(&dev_priv->mm.deferred_free_list);
3760         INIT_LIST_HEAD(&dev_priv->mm.gtt_list);
3761         for (i = 0; i < I915_NUM_RINGS; i++)
3762                 init_ring_lists(&dev_priv->ring[i]);
3763         for (i = 0; i < I915_MAX_NUM_FENCES; i++)
3764                 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
3765         INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
3766                           i915_gem_retire_work_handler);
3767         init_completion(&dev_priv->error_completion);
3768
3769         /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
3770         if (IS_GEN3(dev)) {
3771                 u32 tmp = I915_READ(MI_ARB_STATE);
3772                 if (!(tmp & MI_ARB_C3_LP_WRITE_ENABLE)) {
3773                         /* arb state is a masked write, so set bit + bit in mask */
3774                         tmp = MI_ARB_C3_LP_WRITE_ENABLE | (MI_ARB_C3_LP_WRITE_ENABLE << MI_ARB_MASK_SHIFT);
3775                         I915_WRITE(MI_ARB_STATE, tmp);
3776                 }
3777         }
3778
3779         dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
3780
3781         /* Old X drivers will take 0-2 for front, back, depth buffers */
3782         if (!drm_core_check_feature(dev, DRIVER_MODESET))
3783                 dev_priv->fence_reg_start = 3;
3784
3785         if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
3786                 dev_priv->num_fence_regs = 16;
3787         else
3788                 dev_priv->num_fence_regs = 8;
3789
3790         /* Initialize fence registers to zero */
3791         for (i = 0; i < dev_priv->num_fence_regs; i++) {
3792                 i915_gem_clear_fence_reg(dev, &dev_priv->fence_regs[i]);
3793         }
3794
3795         i915_gem_detect_bit_6_swizzle(dev);
3796         init_waitqueue_head(&dev_priv->pending_flip_queue);
3797
3798         dev_priv->mm.interruptible = true;
3799
3800         dev_priv->mm.inactive_shrinker.shrink = i915_gem_inactive_shrink;
3801         dev_priv->mm.inactive_shrinker.seeks = DEFAULT_SEEKS;
3802         register_shrinker(&dev_priv->mm.inactive_shrinker);
3803 }
3804
3805 /*
3806  * Create a physically contiguous memory object for this object
3807  * e.g. for cursor + overlay regs
3808  */
3809 static int i915_gem_init_phys_object(struct drm_device *dev,
3810                                      int id, int size, int align)
3811 {
3812         drm_i915_private_t *dev_priv = dev->dev_private;
3813         struct drm_i915_gem_phys_object *phys_obj;
3814         int ret;
3815
3816         if (dev_priv->mm.phys_objs[id - 1] || !size)
3817                 return 0;
3818
3819         phys_obj = kzalloc(sizeof(struct drm_i915_gem_phys_object), GFP_KERNEL);
3820         if (!phys_obj)
3821                 return -ENOMEM;
3822
3823         phys_obj->id = id;
3824
3825         phys_obj->handle = drm_pci_alloc(dev, size, align);
3826         if (!phys_obj->handle) {
3827                 ret = -ENOMEM;
3828                 goto kfree_obj;
3829         }
3830 #ifdef CONFIG_X86
3831         set_memory_wc((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
3832 #endif
3833
3834         dev_priv->mm.phys_objs[id - 1] = phys_obj;
3835
3836         return 0;
3837 kfree_obj:
3838         kfree(phys_obj);
3839         return ret;
3840 }
3841
3842 static void i915_gem_free_phys_object(struct drm_device *dev, int id)
3843 {
3844         drm_i915_private_t *dev_priv = dev->dev_private;
3845         struct drm_i915_gem_phys_object *phys_obj;
3846
3847         if (!dev_priv->mm.phys_objs[id - 1])
3848                 return;
3849
3850         phys_obj = dev_priv->mm.phys_objs[id - 1];
3851         if (phys_obj->cur_obj) {
3852                 i915_gem_detach_phys_object(dev, phys_obj->cur_obj);
3853         }
3854
3855 #ifdef CONFIG_X86
3856         set_memory_wb((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
3857 #endif
3858         drm_pci_free(dev, phys_obj->handle);
3859         kfree(phys_obj);
3860         dev_priv->mm.phys_objs[id - 1] = NULL;
3861 }
3862
3863 void i915_gem_free_all_phys_object(struct drm_device *dev)
3864 {
3865         int i;
3866
3867         for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++)
3868                 i915_gem_free_phys_object(dev, i);
3869 }
3870
3871 void i915_gem_detach_phys_object(struct drm_device *dev,
3872                                  struct drm_i915_gem_object *obj)
3873 {
3874         struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
3875         char *vaddr;
3876         int i;
3877         int page_count;
3878
3879         if (!obj->phys_obj)
3880                 return;
3881         vaddr = obj->phys_obj->handle->vaddr;
3882
3883         page_count = obj->base.size / PAGE_SIZE;
3884         for (i = 0; i < page_count; i++) {
3885                 struct page *page = shmem_read_mapping_page(mapping, i);
3886                 if (!IS_ERR(page)) {
3887                         char *dst = kmap_atomic(page);
3888                         memcpy(dst, vaddr + i*PAGE_SIZE, PAGE_SIZE);
3889                         kunmap_atomic(dst);
3890
3891                         drm_clflush_pages(&page, 1);
3892
3893                         set_page_dirty(page);
3894                         mark_page_accessed(page);
3895                         page_cache_release(page);
3896                 }
3897         }
3898         intel_gtt_chipset_flush();
3899
3900         obj->phys_obj->cur_obj = NULL;
3901         obj->phys_obj = NULL;
3902 }
3903
3904 int
3905 i915_gem_attach_phys_object(struct drm_device *dev,
3906                             struct drm_i915_gem_object *obj,
3907                             int id,
3908                             int align)
3909 {
3910         struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
3911         drm_i915_private_t *dev_priv = dev->dev_private;
3912         int ret = 0;
3913         int page_count;
3914         int i;
3915
3916         if (id > I915_MAX_PHYS_OBJECT)
3917                 return -EINVAL;
3918
3919         if (obj->phys_obj) {
3920                 if (obj->phys_obj->id == id)
3921                         return 0;
3922                 i915_gem_detach_phys_object(dev, obj);
3923         }
3924
3925         /* create a new object */
3926         if (!dev_priv->mm.phys_objs[id - 1]) {
3927                 ret = i915_gem_init_phys_object(dev, id,
3928                                                 obj->base.size, align);
3929                 if (ret) {
3930                         DRM_ERROR("failed to init phys object %d size: %zu\n",
3931                                   id, obj->base.size);
3932                         return ret;
3933                 }
3934         }
3935
3936         /* bind to the object */
3937         obj->phys_obj = dev_priv->mm.phys_objs[id - 1];
3938         obj->phys_obj->cur_obj = obj;
3939
3940         page_count = obj->base.size / PAGE_SIZE;
3941
3942         for (i = 0; i < page_count; i++) {
3943                 struct page *page;
3944                 char *dst, *src;
3945
3946                 page = shmem_read_mapping_page(mapping, i);
3947                 if (IS_ERR(page))
3948                         return PTR_ERR(page);
3949
3950                 src = kmap_atomic(page);
3951                 dst = obj->phys_obj->handle->vaddr + (i * PAGE_SIZE);
3952                 memcpy(dst, src, PAGE_SIZE);
3953                 kunmap_atomic(src);
3954
3955                 mark_page_accessed(page);
3956                 page_cache_release(page);
3957         }
3958
3959         return 0;
3960 }
3961
3962 static int
3963 i915_gem_phys_pwrite(struct drm_device *dev,
3964                      struct drm_i915_gem_object *obj,
3965                      struct drm_i915_gem_pwrite *args,
3966                      struct drm_file *file_priv)
3967 {
3968         void *vaddr = obj->phys_obj->handle->vaddr + args->offset;
3969         char __user *user_data = (char __user *) (uintptr_t) args->data_ptr;
3970
3971         if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
3972                 unsigned long unwritten;
3973
3974                 /* The physical object once assigned is fixed for the lifetime
3975                  * of the obj, so we can safely drop the lock and continue
3976                  * to access vaddr.
3977                  */
3978                 mutex_unlock(&dev->struct_mutex);
3979                 unwritten = copy_from_user(vaddr, user_data, args->size);
3980                 mutex_lock(&dev->struct_mutex);
3981                 if (unwritten)
3982                         return -EFAULT;
3983         }
3984
3985         intel_gtt_chipset_flush();
3986         return 0;
3987 }
3988
3989 void i915_gem_release(struct drm_device *dev, struct drm_file *file)
3990 {
3991         struct drm_i915_file_private *file_priv = file->driver_priv;
3992
3993         /* Clean up our request list when the client is going away, so that
3994          * later retire_requests won't dereference our soon-to-be-gone
3995          * file_priv.
3996          */
3997         spin_lock(&file_priv->mm.lock);
3998         while (!list_empty(&file_priv->mm.request_list)) {
3999                 struct drm_i915_gem_request *request;
4000
4001                 request = list_first_entry(&file_priv->mm.request_list,
4002                                            struct drm_i915_gem_request,
4003                                            client_list);
4004                 list_del(&request->client_list);
4005                 request->file_priv = NULL;
4006         }
4007         spin_unlock(&file_priv->mm.lock);
4008 }
4009
4010 static int
4011 i915_gpu_is_active(struct drm_device *dev)
4012 {
4013         drm_i915_private_t *dev_priv = dev->dev_private;
4014         int lists_empty;
4015
4016         lists_empty = list_empty(&dev_priv->mm.flushing_list) &&
4017                       list_empty(&dev_priv->mm.active_list);
4018
4019         return !lists_empty;
4020 }
4021
4022 static int
4023 i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc)
4024 {
4025         struct drm_i915_private *dev_priv =
4026                 container_of(shrinker,
4027                              struct drm_i915_private,
4028                              mm.inactive_shrinker);
4029         struct drm_device *dev = dev_priv->dev;
4030         struct drm_i915_gem_object *obj, *next;
4031         int nr_to_scan = sc->nr_to_scan;
4032         int cnt;
4033
4034         if (!mutex_trylock(&dev->struct_mutex))
4035                 return 0;
4036
4037         /* "fast-path" to count number of available objects */
4038         if (nr_to_scan == 0) {
4039                 cnt = 0;
4040                 list_for_each_entry(obj,
4041                                     &dev_priv->mm.inactive_list,
4042                                     mm_list)
4043                         cnt++;
4044                 mutex_unlock(&dev->struct_mutex);
4045                 return cnt / 100 * sysctl_vfs_cache_pressure;
4046         }
4047
4048 rescan:
4049         /* first scan for clean buffers */
4050         i915_gem_retire_requests(dev);
4051
4052         list_for_each_entry_safe(obj, next,
4053                                  &dev_priv->mm.inactive_list,
4054                                  mm_list) {
4055                 if (i915_gem_object_is_purgeable(obj)) {
4056                         if (i915_gem_object_unbind(obj) == 0 &&
4057                             --nr_to_scan == 0)
4058                                 break;
4059                 }
4060         }
4061
4062         /* second pass, evict/count anything still on the inactive list */
4063         cnt = 0;
4064         list_for_each_entry_safe(obj, next,
4065                                  &dev_priv->mm.inactive_list,
4066                                  mm_list) {
4067                 if (nr_to_scan &&
4068                     i915_gem_object_unbind(obj) == 0)
4069                         nr_to_scan--;
4070                 else
4071                         cnt++;
4072         }
4073
4074         if (nr_to_scan && i915_gpu_is_active(dev)) {
4075                 /*
4076                  * We are desperate for pages, so as a last resort, wait
4077                  * for the GPU to finish and discard whatever we can.
4078                  * This has a dramatic impact to reduce the number of
4079                  * OOM-killer events whilst running the GPU aggressively.
4080                  */
4081                 if (i915_gpu_idle(dev, true) == 0)
4082                         goto rescan;
4083         }
4084         mutex_unlock(&dev->struct_mutex);
4085         return cnt / 100 * sysctl_vfs_cache_pressure;
4086 }