]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blobdiff - drivers/gpu/drm/i915/intel_display.c
drm/i915: kill STANDARD/CURSOR plane screams
[mirror_ubuntu-artful-kernel.git] / drivers / gpu / drm / i915 / intel_display.c
index 40018388660c005fa8e8bf78dc08819bdd7386a3..fd140c3984080f69504d27028f0320fbc3265464 100644 (file)
 #include <linux/reservation.h>
 #include <linux/dma-buf.h>
 
+static bool is_mmio_work(struct intel_flip_work *work)
+{
+       return work->mmio_work.func;
+}
+
 /* Primary plane formats for gen <= 3 */
 static const uint32_t i8xx_primary_formats[] = {
        DRM_FORMAT_C8,
@@ -118,9 +123,7 @@ static void ironlake_pfit_enable(struct intel_crtc *crtc);
 static void intel_modeset_setup_hw_state(struct drm_device *dev);
 static void intel_pre_disable_primary_noatomic(struct drm_crtc *crtc);
 static int ilk_max_pixel_rate(struct drm_atomic_state *state);
-static void intel_modeset_verify_crtc(struct drm_crtc *crtc,
-                                     struct drm_crtc_state *old_state,
-                                     struct drm_crtc_state *new_state);
+static int broxton_calc_cdclk(int max_pixclk);
 
 struct intel_limit {
        struct {
@@ -2525,6 +2528,20 @@ out_unref_obj:
        return false;
 }
 
+/* Update plane->state->fb to match plane->fb after driver-internal updates */
+static void
+update_state_fb(struct drm_plane *plane)
+{
+       if (plane->fb == plane->state->fb)
+               return;
+
+       if (plane->state->fb)
+               drm_framebuffer_unreference(plane->state->fb);
+       plane->state->fb = plane->fb;
+       if (plane->state->fb)
+               drm_framebuffer_reference(plane->state->fb);
+}
+
 static void
 intel_find_initial_plane_obj(struct intel_crtc *intel_crtc,
                             struct intel_initial_plane_config *plane_config)
@@ -3097,6 +3114,14 @@ intel_pipe_set_base_atomic(struct drm_crtc *crtc, struct drm_framebuffer *fb,
        return -ENODEV;
 }
 
+static void intel_complete_page_flips(struct drm_i915_private *dev_priv)
+{
+       struct intel_crtc *crtc;
+
+       for_each_intel_crtc(dev_priv->dev, crtc)
+               intel_finish_page_flip_cs(dev_priv, crtc->pipe);
+}
+
 static void intel_update_primary_planes(struct drm_device *dev)
 {
        struct drm_crtc *crtc;
@@ -3137,6 +3162,13 @@ void intel_prepare_reset(struct drm_i915_private *dev_priv)
 
 void intel_finish_reset(struct drm_i915_private *dev_priv)
 {
+       /*
+        * Flips in the rings will be nuked by the reset,
+        * so complete all pending flips so that user space
+        * will get its events and not get stuck.
+        */
+       intel_complete_page_flips(dev_priv);
+
        /* no reset support for gen2 */
        if (IS_GEN2(dev_priv))
                return;
@@ -3179,7 +3211,20 @@ void intel_finish_reset(struct drm_i915_private *dev_priv)
 
 static bool intel_crtc_has_pending_flip(struct drm_crtc *crtc)
 {
-       return !list_empty_careful(&to_intel_crtc(crtc)->flip_work);
+       struct drm_device *dev = crtc->dev;
+       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+       unsigned reset_counter;
+       bool pending;
+
+       reset_counter = i915_reset_counter(&to_i915(dev)->gpu_error);
+       if (intel_crtc->reset_counter != reset_counter)
+               return false;
+
+       spin_lock_irq(&dev->event_lock);
+       pending = to_intel_crtc(crtc)->flip_work != NULL;
+       spin_unlock_irq(&dev->event_lock);
+
+       return pending;
 }
 
 static void intel_update_pipe_config(struct intel_crtc *crtc,
@@ -3755,7 +3800,7 @@ bool intel_has_pending_fb_unpin(struct drm_device *dev)
                if (atomic_read(&crtc->unpin_work_count) == 0)
                        continue;
 
-               if (!list_empty_careful(&crtc->flip_work))
+               if (crtc->flip_work)
                        intel_wait_for_vblank(dev, crtc->pipe);
 
                return true;
@@ -3764,30 +3809,23 @@ bool intel_has_pending_fb_unpin(struct drm_device *dev)
        return false;
 }
 
-static void page_flip_completed(struct intel_crtc *intel_crtc, struct intel_flip_work *work)
+static void page_flip_completed(struct intel_crtc *intel_crtc)
 {
        struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev);
-       struct drm_plane_state *new_plane_state;
-       struct drm_plane *primary = intel_crtc->base.primary;
+       struct intel_flip_work *work = intel_crtc->flip_work;
+
+       intel_crtc->flip_work = NULL;
 
        if (work->event)
                drm_crtc_send_vblank_event(&intel_crtc->base, work->event);
 
        drm_crtc_vblank_put(&intel_crtc->base);
 
-       new_plane_state = &work->old_plane_state[0]->base;
-       if (work->num_planes >= 1 &&
-           new_plane_state->plane == primary &&
-           new_plane_state->fb)
-               trace_i915_flip_complete(intel_crtc->plane,
-                                        intel_fb_obj(new_plane_state->fb));
-
-       if (work->can_async_unpin) {
-               list_del_init(&work->head);
-               wake_up_all(&dev_priv->pending_flip_queue);
-       }
-
+       wake_up_all(&dev_priv->pending_flip_queue);
        queue_work(dev_priv->wq, &work->unpin_work);
+
+       trace_i915_flip_complete(intel_crtc->plane,
+                                work->pending_flip_obj);
 }
 
 static int intel_crtc_wait_for_pending_flips(struct drm_crtc *crtc)
@@ -3806,7 +3844,18 @@ static int intel_crtc_wait_for_pending_flips(struct drm_crtc *crtc)
        if (ret < 0)
                return ret;
 
-       WARN(ret == 0, "Stuck page flip\n");
+       if (ret == 0) {
+               struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+               struct intel_flip_work *work;
+
+               spin_lock_irq(&dev->event_lock);
+               work = intel_crtc->flip_work;
+               if (work && !is_mmio_work(work)) {
+                       WARN_ONCE(1, "Removing stuck page flip\n");
+                       page_flip_completed(intel_crtc);
+               }
+               spin_unlock_irq(&dev->event_lock);
+       }
 
        return 0;
 }
@@ -4229,8 +4278,9 @@ int skl_update_scaler_crtc(struct intel_crtc_state *state)
        struct intel_crtc *intel_crtc = to_intel_crtc(state->base.crtc);
        const struct drm_display_mode *adjusted_mode = &state->base.adjusted_mode;
 
-       DRM_DEBUG_KMS("Updating scaler for [CRTC:%i] scaler_user index %u.%u\n",
-                     intel_crtc->base.base.id, intel_crtc->pipe, SKL_CRTC_INDEX);
+       DRM_DEBUG_KMS("Updating scaler for [CRTC:%d:%s] scaler_user index %u.%u\n",
+                     intel_crtc->base.base.id, intel_crtc->base.name,
+                     intel_crtc->pipe, SKL_CRTC_INDEX);
 
        return skl_update_scaler(state, !state->base.active, SKL_CRTC_INDEX,
                &state->scaler_state.scaler_id, BIT(DRM_ROTATE_0),
@@ -4260,9 +4310,9 @@ static int skl_update_scaler_plane(struct intel_crtc_state *crtc_state,
 
        bool force_detach = !fb || !plane_state->visible;
 
-       DRM_DEBUG_KMS("Updating scaler for [PLANE:%d] scaler_user index %u.%u\n",
-                     intel_plane->base.base.id, intel_crtc->pipe,
-                     drm_plane_index(&intel_plane->base));
+       DRM_DEBUG_KMS("Updating scaler for [PLANE:%d:%s] scaler_user index %u.%u\n",
+                     intel_plane->base.base.id, intel_plane->base.name,
+                     intel_crtc->pipe, drm_plane_index(&intel_plane->base));
 
        ret = skl_update_scaler(crtc_state, force_detach,
                                drm_plane_index(&intel_plane->base),
@@ -4278,8 +4328,9 @@ static int skl_update_scaler_plane(struct intel_crtc_state *crtc_state,
 
        /* check colorkey */
        if (plane_state->ckey.flags != I915_SET_COLORKEY_NONE) {
-               DRM_DEBUG_KMS("[PLANE:%d] scaling with color key not allowed",
-                             intel_plane->base.base.id);
+               DRM_DEBUG_KMS("[PLANE:%d:%s] scaling with color key not allowed",
+                             intel_plane->base.base.id,
+                             intel_plane->base.name);
                return -EINVAL;
        }
 
@@ -4298,8 +4349,9 @@ static int skl_update_scaler_plane(struct intel_crtc_state *crtc_state,
        case DRM_FORMAT_VYUY:
                break;
        default:
-               DRM_DEBUG_KMS("[PLANE:%d] FB:%d unsupported scaling format 0x%x\n",
-                       intel_plane->base.base.id, fb->base.id, fb->pixel_format);
+               DRM_DEBUG_KMS("[PLANE:%d:%s] FB:%d unsupported scaling format 0x%x\n",
+                             intel_plane->base.base.id, intel_plane->base.name,
+                             fb->base.id, fb->pixel_format);
                return -EINVAL;
        }
 
@@ -4537,6 +4589,39 @@ intel_pre_disable_primary_noatomic(struct drm_crtc *crtc)
        }
 }
 
+static void intel_post_plane_update(struct intel_crtc_state *old_crtc_state)
+{
+       struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc);
+       struct drm_atomic_state *old_state = old_crtc_state->base.state;
+       struct intel_crtc_state *pipe_config =
+               to_intel_crtc_state(crtc->base.state);
+       struct drm_device *dev = crtc->base.dev;
+       struct drm_plane *primary = crtc->base.primary;
+       struct drm_plane_state *old_pri_state =
+               drm_atomic_get_existing_plane_state(old_state, primary);
+
+       intel_frontbuffer_flip(dev, pipe_config->fb_bits);
+
+       crtc->wm.cxsr_allowed = true;
+
+       if (pipe_config->update_wm_post && pipe_config->base.active)
+               intel_update_watermarks(&crtc->base);
+
+       if (old_pri_state) {
+               struct intel_plane_state *primary_state =
+                       to_intel_plane_state(primary->state);
+               struct intel_plane_state *old_primary_state =
+                       to_intel_plane_state(old_pri_state);
+
+               intel_fbc_post_update(crtc);
+
+               if (primary_state->visible &&
+                   (needs_modeset(&pipe_config->base) ||
+                    !old_primary_state->visible))
+                       intel_post_enable_primary(&crtc->base);
+       }
+}
+
 static void intel_pre_plane_update(struct intel_crtc_state *old_crtc_state)
 {
        struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc);
@@ -4556,7 +4641,7 @@ static void intel_pre_plane_update(struct intel_crtc_state *old_crtc_state)
                struct intel_plane_state *old_primary_state =
                        to_intel_plane_state(old_pri_state);
 
-               intel_fbc_pre_update(crtc, pipe_config, primary_state);
+               intel_fbc_pre_update(crtc);
 
                if (old_primary_state->visible &&
                    (modeset || !primary_state->visible))
@@ -5146,21 +5231,18 @@ modeset_get_crtc_power_domains(struct drm_crtc *crtc,
        struct drm_i915_private *dev_priv = crtc->dev->dev_private;
        struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
        enum intel_display_power_domain domain;
-       unsigned long domains, new_domains, old_domains, ms_domain = 0;
+       unsigned long domains, new_domains, old_domains;
 
        old_domains = intel_crtc->enabled_power_domains;
        intel_crtc->enabled_power_domains = new_domains =
                get_crtc_power_domains(crtc, crtc_state);
 
-       if (needs_modeset(&crtc_state->base))
-               ms_domain = BIT(POWER_DOMAIN_MODESET);
-
-       domains = (new_domains & ~old_domains) | ms_domain;
+       domains = new_domains & ~old_domains;
 
        for_each_power_domain(domain, domains)
                intel_display_power_get(dev_priv, domain);
 
-       return (old_domains & ~new_domains) | ms_domain;
+       return old_domains & ~new_domains;
 }
 
 static void modeset_put_power_domains(struct drm_i915_private *dev_priv,
@@ -5187,21 +5269,34 @@ static int intel_compute_max_dotclk(struct drm_i915_private *dev_priv)
                return max_cdclk_freq*90/100;
 }
 
+static int skl_calc_cdclk(int max_pixclk, int vco);
+
 static void intel_update_max_cdclk(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
 
        if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) {
                u32 limit = I915_READ(SKL_DFSM) & SKL_DFSM_CDCLK_LIMIT_MASK;
+               int max_cdclk, vco;
+
+               vco = dev_priv->skl_preferred_vco_freq;
+               WARN_ON(vco != 8100000 && vco != 8640000);
 
+               /*
+                * Use the lower (vco 8640) cdclk values as a
+                * first guess. skl_calc_cdclk() will correct it
+                * if the preferred vco is 8100 instead.
+                */
                if (limit == SKL_DFSM_CDCLK_LIMIT_675)
-                       dev_priv->max_cdclk_freq = 675000;
+                       max_cdclk = 617143;
                else if (limit == SKL_DFSM_CDCLK_LIMIT_540)
-                       dev_priv->max_cdclk_freq = 540000;
+                       max_cdclk = 540000;
                else if (limit == SKL_DFSM_CDCLK_LIMIT_450)
-                       dev_priv->max_cdclk_freq = 450000;
+                       max_cdclk = 432000;
                else
-                       dev_priv->max_cdclk_freq = 337500;
+                       max_cdclk = 308571;
+
+               dev_priv->max_cdclk_freq = skl_calc_cdclk(max_cdclk, vco);
        } else if (IS_BROXTON(dev)) {
                dev_priv->max_cdclk_freq = 624000;
        } else if (IS_BROADWELL(dev))  {
@@ -5242,8 +5337,14 @@ static void intel_update_cdclk(struct drm_device *dev)
        struct drm_i915_private *dev_priv = dev->dev_private;
 
        dev_priv->cdclk_freq = dev_priv->display.get_display_clock_speed(dev);
-       DRM_DEBUG_DRIVER("Current CD clock rate: %d kHz\n",
-                        dev_priv->cdclk_freq);
+
+       if (INTEL_GEN(dev_priv) >= 9)
+               DRM_DEBUG_DRIVER("Current CD clock rate: %d kHz, VCO: %d kHz, ref: %d kHz\n",
+                                dev_priv->cdclk_freq, dev_priv->cdclk_pll.vco,
+                                dev_priv->cdclk_pll.ref);
+       else
+               DRM_DEBUG_DRIVER("Current CD clock rate: %d kHz\n",
+                                dev_priv->cdclk_freq);
 
        /*
         * 9:0 CMBUS [sic] CDCLK frequency (cdfreq):
@@ -5253,9 +5354,6 @@ static void intel_update_cdclk(struct drm_device *dev)
         */
        if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
                I915_WRITE(GMBUSFREQ_VLV, DIV_ROUND_UP(dev_priv->cdclk_freq, 1000));
-
-       if (dev_priv->max_cdclk_freq == 0)
-               intel_update_max_cdclk(dev);
 }
 
 /* convert from kHz to .1 fixpoint MHz with -1MHz offset */
@@ -5264,51 +5362,93 @@ static int skl_cdclk_decimal(int cdclk)
        return DIV_ROUND_CLOSEST(cdclk - 1000, 500);
 }
 
-static void broxton_set_cdclk(struct drm_i915_private *dev_priv, int cdclk)
+static int bxt_de_pll_vco(struct drm_i915_private *dev_priv, int cdclk)
 {
-       uint32_t divider;
-       uint32_t ratio;
-       uint32_t current_cdclk;
-       int ret;
+       int ratio;
+
+       if (cdclk == dev_priv->cdclk_pll.ref)
+               return 0;
 
-       /* frequency = 19.2MHz * ratio / 2 / div{1,1.5,2,4} */
        switch (cdclk) {
+       default:
+               MISSING_CASE(cdclk);
        case 144000:
+       case 288000:
+       case 384000:
+       case 576000:
+               ratio = 60;
+               break;
+       case 624000:
+               ratio = 65;
+               break;
+       }
+
+       return dev_priv->cdclk_pll.ref * ratio;
+}
+
+static void bxt_de_pll_disable(struct drm_i915_private *dev_priv)
+{
+       I915_WRITE(BXT_DE_PLL_ENABLE, 0);
+
+       /* Timeout 200us */
+       if (wait_for((I915_READ(BXT_DE_PLL_ENABLE) & BXT_DE_PLL_LOCK) == 0, 1))
+               DRM_ERROR("timeout waiting for DE PLL unlock\n");
+
+       dev_priv->cdclk_pll.vco = 0;
+}
+
+static void bxt_de_pll_enable(struct drm_i915_private *dev_priv, int vco)
+{
+       int ratio = DIV_ROUND_CLOSEST(vco, dev_priv->cdclk_pll.ref);
+       u32 val;
+
+       val = I915_READ(BXT_DE_PLL_CTL);
+       val &= ~BXT_DE_PLL_RATIO_MASK;
+       val |= BXT_DE_PLL_RATIO(ratio);
+       I915_WRITE(BXT_DE_PLL_CTL, val);
+
+       I915_WRITE(BXT_DE_PLL_ENABLE, BXT_DE_PLL_PLL_ENABLE);
+
+       /* Timeout 200us */
+       if (wait_for((I915_READ(BXT_DE_PLL_ENABLE) & BXT_DE_PLL_LOCK) != 0, 1))
+               DRM_ERROR("timeout waiting for DE PLL lock\n");
+
+       dev_priv->cdclk_pll.vco = vco;
+}
+
+static void broxton_set_cdclk(struct drm_i915_private *dev_priv, int cdclk)
+{
+       u32 val, divider;
+       int vco, ret;
+
+       vco = bxt_de_pll_vco(dev_priv, cdclk);
+
+       DRM_DEBUG_DRIVER("Changing CDCLK to %d kHz (VCO %d kHz)\n", cdclk, vco);
+
+       /* cdclk = vco / 2 / div{1,1.5,2,4} */
+       switch (DIV_ROUND_CLOSEST(vco, cdclk)) {
+       case 8:
                divider = BXT_CDCLK_CD2X_DIV_SEL_4;
-               ratio = BXT_DE_PLL_RATIO(60);
                break;
-       case 288000:
+       case 4:
                divider = BXT_CDCLK_CD2X_DIV_SEL_2;
-               ratio = BXT_DE_PLL_RATIO(60);
                break;
-       case 384000:
+       case 3:
                divider = BXT_CDCLK_CD2X_DIV_SEL_1_5;
-               ratio = BXT_DE_PLL_RATIO(60);
-               break;
-       case 576000:
-               divider = BXT_CDCLK_CD2X_DIV_SEL_1;
-               ratio = BXT_DE_PLL_RATIO(60);
                break;
-       case 624000:
+       case 2:
                divider = BXT_CDCLK_CD2X_DIV_SEL_1;
-               ratio = BXT_DE_PLL_RATIO(65);
-               break;
-       case 19200:
-               /*
-                * Bypass frequency with DE PLL disabled. Init ratio, divider
-                * to suppress GCC warning.
-                */
-               ratio = 0;
-               divider = 0;
                break;
        default:
-               DRM_ERROR("unsupported CDCLK freq %d", cdclk);
+               WARN_ON(cdclk != dev_priv->cdclk_pll.ref);
+               WARN_ON(vco != 0);
 
-               return;
+               divider = BXT_CDCLK_CD2X_DIV_SEL_1;
+               break;
        }
 
-       mutex_lock(&dev_priv->rps.hw_lock);
        /* Inform power controller of upcoming frequency change */
+       mutex_lock(&dev_priv->rps.hw_lock);
        ret = sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ,
                                      0x80000000);
        mutex_unlock(&dev_priv->rps.hw_lock);
@@ -5319,52 +5459,26 @@ static void broxton_set_cdclk(struct drm_i915_private *dev_priv, int cdclk)
                return;
        }
 
-       current_cdclk = I915_READ(CDCLK_CTL) & CDCLK_FREQ_DECIMAL_MASK;
-       /* convert from .1 fixpoint MHz with -1MHz offset to kHz */
-       current_cdclk = current_cdclk * 500 + 1000;
+       if (dev_priv->cdclk_pll.vco != 0 &&
+           dev_priv->cdclk_pll.vco != vco)
+               bxt_de_pll_disable(dev_priv);
+
+       if (dev_priv->cdclk_pll.vco != vco)
+               bxt_de_pll_enable(dev_priv, vco);
 
+       val = divider | skl_cdclk_decimal(cdclk);
        /*
-        * DE PLL has to be disabled when
-        * - setting to 19.2MHz (bypass, PLL isn't used)
-        * - before setting to 624MHz (PLL needs toggling)
-        * - before setting to any frequency from 624MHz (PLL needs toggling)
-        */
-       if (cdclk == 19200 || cdclk == 624000 ||
-           current_cdclk == 624000) {
-               I915_WRITE(BXT_DE_PLL_ENABLE, ~BXT_DE_PLL_PLL_ENABLE);
-               /* Timeout 200us */
-               if (wait_for(!(I915_READ(BXT_DE_PLL_ENABLE) & BXT_DE_PLL_LOCK),
-                            1))
-                       DRM_ERROR("timout waiting for DE PLL unlock\n");
-       }
-
-       if (cdclk != 19200) {
-               uint32_t val;
-
-               val = I915_READ(BXT_DE_PLL_CTL);
-               val &= ~BXT_DE_PLL_RATIO_MASK;
-               val |= ratio;
-               I915_WRITE(BXT_DE_PLL_CTL, val);
-
-               I915_WRITE(BXT_DE_PLL_ENABLE, BXT_DE_PLL_PLL_ENABLE);
-               /* Timeout 200us */
-               if (wait_for(I915_READ(BXT_DE_PLL_ENABLE) & BXT_DE_PLL_LOCK, 1))
-                       DRM_ERROR("timeout waiting for DE PLL lock\n");
-
-               val = divider | skl_cdclk_decimal(cdclk);
-               /*
-                * FIXME if only the cd2x divider needs changing, it could be done
-                * without shutting off the pipe (if only one pipe is active).
-                */
-               val |= BXT_CDCLK_CD2X_PIPE_NONE;
-               /*
-                * Disable SSA Precharge when CD clock frequency < 500 MHz,
-                * enable otherwise.
-                */
-               if (cdclk >= 500000)
-                       val |= BXT_CDCLK_SSA_PRECHARGE_ENABLE;
-               I915_WRITE(CDCLK_CTL, val);
-       }
+        * FIXME if only the cd2x divider needs changing, it could be done
+        * without shutting off the pipe (if only one pipe is active).
+        */
+       val |= BXT_CDCLK_CD2X_PIPE_NONE;
+       /*
+        * Disable SSA Precharge when CD clock frequency < 500 MHz,
+        * enable otherwise.
+        */
+       if (cdclk >= 500000)
+               val |= BXT_CDCLK_SSA_PRECHARGE_ENABLE;
+       I915_WRITE(CDCLK_CTL, val);
 
        mutex_lock(&dev_priv->rps.hw_lock);
        ret = sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ,
@@ -5380,114 +5494,155 @@ static void broxton_set_cdclk(struct drm_i915_private *dev_priv, int cdclk)
        intel_update_cdclk(dev_priv->dev);
 }
 
-static bool broxton_cdclk_is_enabled(struct drm_i915_private *dev_priv)
+static void bxt_sanitize_cdclk(struct drm_i915_private *dev_priv)
 {
-       if (!(I915_READ(BXT_DE_PLL_ENABLE) & BXT_DE_PLL_PLL_ENABLE))
-               return false;
+       u32 cdctl, expected;
 
-       /* TODO: Check for a valid CDCLK rate */
+       intel_update_cdclk(dev_priv->dev);
 
-       if (!(I915_READ(DBUF_CTL) & DBUF_POWER_REQUEST)) {
-               DRM_DEBUG_DRIVER("CDCLK enabled, but DBUF power not requested\n");
+       if (dev_priv->cdclk_pll.vco == 0 ||
+           dev_priv->cdclk_freq == dev_priv->cdclk_pll.ref)
+               goto sanitize;
 
-               return false;
-       }
+       /* DPLL okay; verify the cdclock
+        *
+        * Some BIOS versions leave an incorrect decimal frequency value and
+        * set reserved MBZ bits in CDCLK_CTL at least during exiting from S4,
+        * so sanitize this register.
+        */
+       cdctl = I915_READ(CDCLK_CTL);
+       /*
+        * Let's ignore the pipe field, since BIOS could have configured the
+        * dividers both synching to an active pipe, or asynchronously
+        * (PIPE_NONE).
+        */
+       cdctl &= ~BXT_CDCLK_CD2X_PIPE_NONE;
 
-       if (!(I915_READ(DBUF_CTL) & DBUF_POWER_STATE)) {
-               DRM_DEBUG_DRIVER("CDCLK enabled, but DBUF power hasn't settled\n");
+       expected = (cdctl & BXT_CDCLK_CD2X_DIV_SEL_MASK) |
+                  skl_cdclk_decimal(dev_priv->cdclk_freq);
+       /*
+        * Disable SSA Precharge when CD clock frequency < 500 MHz,
+        * enable otherwise.
+        */
+       if (dev_priv->cdclk_freq >= 500000)
+               expected |= BXT_CDCLK_SSA_PRECHARGE_ENABLE;
 
-               return false;
-       }
+       if (cdctl == expected)
+               /* All well; nothing to sanitize */
+               return;
 
-       return true;
-}
+sanitize:
+       DRM_DEBUG_KMS("Sanitizing cdclk programmed by pre-os\n");
 
-bool broxton_cdclk_verify_state(struct drm_i915_private *dev_priv)
-{
-       return broxton_cdclk_is_enabled(dev_priv);
+       /* force cdclk programming */
+       dev_priv->cdclk_freq = 0;
+
+       /* force full PLL disable + enable */
+       dev_priv->cdclk_pll.vco = -1;
 }
 
 void broxton_init_cdclk(struct drm_i915_private *dev_priv)
 {
-       /* check if cd clock is enabled */
-       if (broxton_cdclk_is_enabled(dev_priv)) {
-               DRM_DEBUG_KMS("CDCLK already enabled, won't reprogram it\n");
-               return;
-       }
+       bxt_sanitize_cdclk(dev_priv);
 
-       DRM_DEBUG_KMS("CDCLK not enabled, enabling it\n");
+       if (dev_priv->cdclk_freq != 0 && dev_priv->cdclk_pll.vco != 0)
+               return;
 
        /*
         * FIXME:
         * - The initial CDCLK needs to be read from VBT.
         *   Need to make this change after VBT has changes for BXT.
-        * - check if setting the max (or any) cdclk freq is really necessary
-        *   here, it belongs to modeset time
         */
-       broxton_set_cdclk(dev_priv, 624000);
-
-       I915_WRITE(DBUF_CTL, I915_READ(DBUF_CTL) | DBUF_POWER_REQUEST);
-       POSTING_READ(DBUF_CTL);
-
-       udelay(10);
-
-       if (!(I915_READ(DBUF_CTL) & DBUF_POWER_STATE))
-               DRM_ERROR("DBuf power enable timeout!\n");
+       broxton_set_cdclk(dev_priv, broxton_calc_cdclk(0));
 }
 
 void broxton_uninit_cdclk(struct drm_i915_private *dev_priv)
 {
-       I915_WRITE(DBUF_CTL, I915_READ(DBUF_CTL) & ~DBUF_POWER_REQUEST);
-       POSTING_READ(DBUF_CTL);
+       broxton_set_cdclk(dev_priv, dev_priv->cdclk_pll.ref);
+}
+
+static int skl_calc_cdclk(int max_pixclk, int vco)
+{
+       if (vco == 8640000) {
+               if (max_pixclk > 540000)
+                       return 617143;
+               else if (max_pixclk > 432000)
+                       return 540000;
+               else if (max_pixclk > 308571)
+                       return 432000;
+               else
+                       return 308571;
+       } else {
+               if (max_pixclk > 540000)
+                       return 675000;
+               else if (max_pixclk > 450000)
+                       return 540000;
+               else if (max_pixclk > 337500)
+                       return 450000;
+               else
+                       return 337500;
+       }
+}
 
-       udelay(10);
+static void
+skl_dpll0_update(struct drm_i915_private *dev_priv)
+{
+       u32 val;
 
-       if (I915_READ(DBUF_CTL) & DBUF_POWER_STATE)
-               DRM_ERROR("DBuf power disable timeout!\n");
+       dev_priv->cdclk_pll.ref = 24000;
+       dev_priv->cdclk_pll.vco = 0;
 
-       /* Set minimum (bypass) frequency, in effect turning off the DE PLL */
-       broxton_set_cdclk(dev_priv, 19200);
-}
+       val = I915_READ(LCPLL1_CTL);
+       if ((val & LCPLL_PLL_ENABLE) == 0)
+               return;
 
-static const struct skl_cdclk_entry {
-       unsigned int freq;
-       unsigned int vco;
-} skl_cdclk_frequencies[] = {
-       { .freq = 308570, .vco = 8640 },
-       { .freq = 337500, .vco = 8100 },
-       { .freq = 432000, .vco = 8640 },
-       { .freq = 450000, .vco = 8100 },
-       { .freq = 540000, .vco = 8100 },
-       { .freq = 617140, .vco = 8640 },
-       { .freq = 675000, .vco = 8100 },
-};
+       if (WARN_ON((val & LCPLL_PLL_LOCK) == 0))
+               return;
 
-static unsigned int skl_cdclk_get_vco(unsigned int freq)
-{
-       unsigned int i;
+       val = I915_READ(DPLL_CTRL1);
 
-       for (i = 0; i < ARRAY_SIZE(skl_cdclk_frequencies); i++) {
-               const struct skl_cdclk_entry *e = &skl_cdclk_frequencies[i];
+       if (WARN_ON((val & (DPLL_CTRL1_HDMI_MODE(SKL_DPLL0) |
+                           DPLL_CTRL1_SSC(SKL_DPLL0) |
+                           DPLL_CTRL1_OVERRIDE(SKL_DPLL0))) !=
+                   DPLL_CTRL1_OVERRIDE(SKL_DPLL0)))
+               return;
 
-               if (e->freq == freq)
-                       return e->vco;
+       switch (val & DPLL_CTRL1_LINK_RATE_MASK(SKL_DPLL0)) {
+       case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_810, SKL_DPLL0):
+       case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1350, SKL_DPLL0):
+       case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1620, SKL_DPLL0):
+       case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_2700, SKL_DPLL0):
+               dev_priv->cdclk_pll.vco = 8100000;
+               break;
+       case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1080, SKL_DPLL0):
+       case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_2160, SKL_DPLL0):
+               dev_priv->cdclk_pll.vco = 8640000;
+               break;
+       default:
+               MISSING_CASE(val & DPLL_CTRL1_LINK_RATE_MASK(SKL_DPLL0));
+               break;
        }
+}
+
+void skl_set_preferred_cdclk_vco(struct drm_i915_private *dev_priv, int vco)
+{
+       bool changed = dev_priv->skl_preferred_vco_freq != vco;
+
+       dev_priv->skl_preferred_vco_freq = vco;
 
-       return 8100;
+       if (changed)
+               intel_update_max_cdclk(dev_priv->dev);
 }
 
 static void
 skl_dpll0_enable(struct drm_i915_private *dev_priv, int vco)
 {
-       int min_cdclk;
+       int min_cdclk = skl_calc_cdclk(0, vco);
        u32 val;
 
-       /* select the minimum CDCLK before enabling DPLL 0 */
-       if (vco == 8640)
-               min_cdclk = 308570;
-       else
-               min_cdclk = 337500;
+       WARN_ON(vco != 8100000 && vco != 8640000);
 
+       /* select the minimum CDCLK before enabling DPLL 0 */
        val = CDCLK_FREQ_337_308 | skl_cdclk_decimal(min_cdclk);
        I915_WRITE(CDCLK_CTL, val);
        POSTING_READ(CDCLK_CTL);
@@ -5499,14 +5654,14 @@ skl_dpll0_enable(struct drm_i915_private *dev_priv, int vco)
         * 8100 while the eDP 1.4 alternate link rates need a VCO of 8640.
         * The modeset code is responsible for the selection of the exact link
         * rate later on, with the constraint of choosing a frequency that
-        * works with required_vco.
+        * works with vco.
         */
        val = I915_READ(DPLL_CTRL1);
 
        val &= ~(DPLL_CTRL1_HDMI_MODE(SKL_DPLL0) | DPLL_CTRL1_SSC(SKL_DPLL0) |
                 DPLL_CTRL1_LINK_RATE_MASK(SKL_DPLL0));
        val |= DPLL_CTRL1_OVERRIDE(SKL_DPLL0);
-       if (vco == 8640)
+       if (vco == 8640000)
                val |= DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1080,
                                            SKL_DPLL0);
        else
@@ -5520,6 +5675,11 @@ skl_dpll0_enable(struct drm_i915_private *dev_priv, int vco)
 
        if (wait_for(I915_READ(LCPLL1_CTL) & LCPLL_PLL_LOCK, 5))
                DRM_ERROR("DPLL0 not locked\n");
+
+       dev_priv->cdclk_pll.vco = vco;
+
+       /* We'll want to keep using the current vco from now on. */
+       skl_set_preferred_cdclk_vco(dev_priv, vco);
 }
 
 static void
@@ -5528,6 +5688,8 @@ skl_dpll0_disable(struct drm_i915_private *dev_priv)
        I915_WRITE(LCPLL1_CTL, I915_READ(LCPLL1_CTL) & ~LCPLL_PLL_ENABLE);
        if (wait_for(!(I915_READ(LCPLL1_CTL) & LCPLL_PLL_LOCK), 1))
                DRM_ERROR("Couldn't disable DPLL0\n");
+
+       dev_priv->cdclk_pll.vco = 0;
 }
 
 static bool skl_cdclk_pcu_ready(struct drm_i915_private *dev_priv)
@@ -5557,12 +5719,14 @@ static bool skl_cdclk_wait_for_pcu_ready(struct drm_i915_private *dev_priv)
        return false;
 }
 
-static void skl_set_cdclk(struct drm_i915_private *dev_priv, int cdclk)
+static void skl_set_cdclk(struct drm_i915_private *dev_priv, int cdclk, int vco)
 {
        struct drm_device *dev = dev_priv->dev;
        u32 freq_select, pcu_ack;
 
-       DRM_DEBUG_DRIVER("Changing CDCLK to %dKHz\n", cdclk);
+       WARN_ON((cdclk == 24000) != (vco == 0));
+
+       DRM_DEBUG_DRIVER("Changing CDCLK to %d kHz (VCO %d kHz)\n", cdclk, vco);
 
        if (!skl_cdclk_wait_for_pcu_ready(dev_priv)) {
                DRM_ERROR("failed to inform PCU about cdclk change\n");
@@ -5580,19 +5744,26 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv, int cdclk)
                freq_select = CDCLK_FREQ_540;
                pcu_ack = 2;
                break;
-       case 308570:
+       case 308571:
        case 337500:
        default:
                freq_select = CDCLK_FREQ_337_308;
                pcu_ack = 0;
                break;
-       case 617140:
+       case 617143:
        case 675000:
                freq_select = CDCLK_FREQ_675_617;
                pcu_ack = 3;
                break;
        }
 
+       if (dev_priv->cdclk_pll.vco != 0 &&
+           dev_priv->cdclk_pll.vco != vco)
+               skl_dpll0_disable(dev_priv);
+
+       if (dev_priv->cdclk_pll.vco != vco)
+               skl_dpll0_enable(dev_priv, vco);
+
        I915_WRITE(CDCLK_CTL, freq_select | skl_cdclk_decimal(cdclk));
        POSTING_READ(CDCLK_CTL);
 
@@ -5604,49 +5775,41 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv, int cdclk)
        intel_update_cdclk(dev);
 }
 
+static void skl_sanitize_cdclk(struct drm_i915_private *dev_priv);
+
 void skl_uninit_cdclk(struct drm_i915_private *dev_priv)
 {
-       /* disable DBUF power */
-       I915_WRITE(DBUF_CTL, I915_READ(DBUF_CTL) & ~DBUF_POWER_REQUEST);
-       POSTING_READ(DBUF_CTL);
-
-       udelay(10);
-
-       if (I915_READ(DBUF_CTL) & DBUF_POWER_STATE)
-               DRM_ERROR("DBuf power disable timeout\n");
-
-       skl_dpll0_disable(dev_priv);
+       skl_set_cdclk(dev_priv, dev_priv->cdclk_pll.ref, 0);
 }
 
 void skl_init_cdclk(struct drm_i915_private *dev_priv)
 {
-       unsigned int vco;
-
-       /* DPLL0 not enabled (happens on early BIOS versions) */
-       if (!(I915_READ(LCPLL1_CTL) & LCPLL_PLL_ENABLE)) {
-               /* enable DPLL0 */
-               vco = skl_cdclk_get_vco(dev_priv->skl_boot_cdclk);
-               skl_dpll0_enable(dev_priv, vco);
-       }
+       int cdclk, vco;
 
-       /* set CDCLK to the frequency the BIOS chose */
-       skl_set_cdclk(dev_priv, dev_priv->skl_boot_cdclk);
+       skl_sanitize_cdclk(dev_priv);
 
-       /* enable DBUF power */
-       I915_WRITE(DBUF_CTL, I915_READ(DBUF_CTL) | DBUF_POWER_REQUEST);
-       POSTING_READ(DBUF_CTL);
+       if (dev_priv->cdclk_freq != 0 && dev_priv->cdclk_pll.vco != 0) {
+               /*
+                * Use the current vco as our initial
+                * guess as to what the preferred vco is.
+                */
+               if (dev_priv->skl_preferred_vco_freq == 0)
+                       skl_set_preferred_cdclk_vco(dev_priv,
+                                                   dev_priv->cdclk_pll.vco);
+               return;
+       }
 
-       udelay(10);
+       vco = dev_priv->skl_preferred_vco_freq;
+       if (vco == 0)
+               vco = 8100000;
+       cdclk = skl_calc_cdclk(0, vco);
 
-       if (!(I915_READ(DBUF_CTL) & DBUF_POWER_STATE))
-               DRM_ERROR("DBuf power enable timeout\n");
+       skl_set_cdclk(dev_priv, cdclk, vco);
 }
 
-int skl_sanitize_cdclk(struct drm_i915_private *dev_priv)
+static void skl_sanitize_cdclk(struct drm_i915_private *dev_priv)
 {
-       uint32_t lcpll1 = I915_READ(LCPLL1_CTL);
-       uint32_t cdctl = I915_READ(CDCLK_CTL);
-       int freq = dev_priv->skl_boot_cdclk;
+       uint32_t cdctl, expected;
 
        /*
         * check if the pre-os intialized the display
@@ -5656,8 +5819,10 @@ int skl_sanitize_cdclk(struct drm_i915_private *dev_priv)
        if ((I915_READ(SWF_ILK(0x18)) & 0x00FFFFFF) == 0)
                goto sanitize;
 
+       intel_update_cdclk(dev_priv->dev);
        /* Is PLL enabled and locked ? */
-       if (!((lcpll1 & LCPLL_PLL_ENABLE) && (lcpll1 & LCPLL_PLL_LOCK)))
+       if (dev_priv->cdclk_pll.vco == 0 ||
+           dev_priv->cdclk_freq == dev_priv->cdclk_pll.ref)
                goto sanitize;
 
        /* DPLL okay; verify the cdclock
@@ -5666,19 +5831,20 @@ int skl_sanitize_cdclk(struct drm_i915_private *dev_priv)
         * decimal part is programmed wrong from BIOS where pre-os does not
         * enable display. Verify the same as well.
         */
-       if (cdctl == ((cdctl & CDCLK_FREQ_SEL_MASK) | skl_cdclk_decimal(freq)))
+       cdctl = I915_READ(CDCLK_CTL);
+       expected = (cdctl & CDCLK_FREQ_SEL_MASK) |
+               skl_cdclk_decimal(dev_priv->cdclk_freq);
+       if (cdctl == expected)
                /* All well; nothing to sanitize */
-               return false;
+               return;
+
 sanitize:
-       /*
-        * As of now initialize with max cdclk till
-        * we get dynamic cdclk support
-        * */
-       dev_priv->skl_boot_cdclk = dev_priv->max_cdclk_freq;
-       skl_init_cdclk(dev_priv);
+       DRM_DEBUG_KMS("Sanitizing cdclk programmed by pre-os\n");
 
-       /* we did have to sanitize */
-       return true;
+       /* force cdclk programming */
+       dev_priv->cdclk_freq = 0;
+       /* force full PLL disable + enable */
+       dev_priv->cdclk_pll.vco = -1;
 }
 
 /* Adjust CDclk dividers to allow high res or save power if possible */
@@ -5820,10 +5986,6 @@ static int valleyview_calc_cdclk(struct drm_i915_private *dev_priv,
 
 static int broxton_calc_cdclk(int max_pixclk)
 {
-       /*
-        * FIXME:
-        * - set 19.2MHz bypass frequency if there are no active pipes
-        */
        if (max_pixclk > 576000)
                return 624000;
        else if (max_pixclk > 384000)
@@ -6154,7 +6316,7 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc)
                return;
 
        if (to_intel_plane_state(crtc->primary->state)->visible) {
-               WARN_ON(list_empty(&intel_crtc->flip_work));
+               WARN_ON(intel_crtc->flip_work);
 
                intel_pre_disable_primary_noatomic(crtc);
 
@@ -6164,8 +6326,8 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc)
 
        dev_priv->display.crtc_disable(crtc);
 
-       DRM_DEBUG_KMS("[CRTC:%d] hw state adjusted, was enabled, now disabled\n",
-                     crtc->base.id);
+       DRM_DEBUG_KMS("[CRTC:%d:%s] hw state adjusted, was enabled, now disabled\n",
+                     crtc->base.id, crtc->name);
 
        WARN_ON(drm_atomic_set_mode_for_crtc(crtc->state, NULL) < 0);
        crtc->state->active = false;
@@ -6206,12 +6368,6 @@ int intel_display_suspend(struct drm_device *dev)
                DRM_ERROR("Suspending crtc's failed with %i\n", ret);
        else
                dev_priv->modeset_restore_state = state;
-
-       /*
-        * Make sure all unpin_work completes before returning.
-        */
-       flush_workqueue(dev_priv->wq);
-
        return ret;
 }
 
@@ -6225,10 +6381,9 @@ void intel_encoder_destroy(struct drm_encoder *encoder)
 
 /* Cross check the actual hw state with our own modeset state tracking (and it's
  * internal consistency). */
-static void intel_connector_verify_state(struct intel_connector *connector,
-                                        struct drm_connector_state *conn_state)
+static void intel_connector_verify_state(struct intel_connector *connector)
 {
-       struct drm_crtc *crtc = conn_state->crtc;
+       struct drm_crtc *crtc = connector->base.state->crtc;
 
        DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n",
                      connector->base.base.id,
@@ -6236,6 +6391,7 @@ static void intel_connector_verify_state(struct intel_connector *connector,
 
        if (connector->get_hw_state(connector)) {
                struct intel_encoder *encoder = connector->encoder;
+               struct drm_connector_state *conn_state = connector->base.state;
 
                I915_STATE_WARN(!crtc,
                         "connector enabled without attached crtc\n");
@@ -6257,7 +6413,7 @@ static void intel_connector_verify_state(struct intel_connector *connector,
        } else {
                I915_STATE_WARN(crtc && crtc->state->active,
                        "attached crtc is active, but connector isn't\n");
-               I915_STATE_WARN(!crtc && conn_state->best_encoder,
+               I915_STATE_WARN(!crtc && connector->base.state->best_encoder,
                        "best encoder set without crtc!\n");
        }
 }
@@ -6471,10 +6627,10 @@ static int intel_crtc_compute_config(struct intel_crtc *crtc,
        struct drm_device *dev = crtc->base.dev;
        struct drm_i915_private *dev_priv = dev->dev_private;
        const struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode;
+       int clock_limit = dev_priv->max_dotclk_freq;
 
-       /* FIXME should check pixel clock limits on all platforms */
        if (INTEL_INFO(dev)->gen < 4) {
-               int clock_limit = dev_priv->max_cdclk_freq * 9 / 10;
+               clock_limit = dev_priv->max_cdclk_freq * 9 / 10;
 
                /*
                 * Enable double wide mode when the dot clock
@@ -6482,16 +6638,16 @@ static int intel_crtc_compute_config(struct intel_crtc *crtc,
                 */
                if (intel_crtc_supports_double_wide(crtc) &&
                    adjusted_mode->crtc_clock > clock_limit) {
-                       clock_limit *= 2;
+                       clock_limit = dev_priv->max_dotclk_freq;
                        pipe_config->double_wide = true;
                }
+       }
 
-               if (adjusted_mode->crtc_clock > clock_limit) {
-                       DRM_DEBUG_KMS("requested pixel clock (%d kHz) too high (max: %d kHz, double wide: %s)\n",
-                                     adjusted_mode->crtc_clock, clock_limit,
-                                     yesno(pipe_config->double_wide));
-                       return -EINVAL;
-               }
+       if (adjusted_mode->crtc_clock > clock_limit) {
+               DRM_DEBUG_KMS("requested pixel clock (%d kHz) too high (max: %d kHz, double wide: %s)\n",
+                             adjusted_mode->crtc_clock, clock_limit,
+                             yesno(pipe_config->double_wide));
+               return -EINVAL;
        }
 
        /*
@@ -6523,76 +6679,98 @@ static int intel_crtc_compute_config(struct intel_crtc *crtc,
 static int skylake_get_display_clock_speed(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = to_i915(dev);
-       uint32_t lcpll1 = I915_READ(LCPLL1_CTL);
-       uint32_t cdctl = I915_READ(CDCLK_CTL);
-       uint32_t linkrate;
+       uint32_t cdctl;
 
-       if (!(lcpll1 & LCPLL_PLL_ENABLE))
-               return 24000; /* 24MHz is the cd freq with NSSC ref */
+       skl_dpll0_update(dev_priv);
 
-       if ((cdctl & CDCLK_FREQ_SEL_MASK) == CDCLK_FREQ_540)
-               return 540000;
+       if (dev_priv->cdclk_pll.vco == 0)
+               return dev_priv->cdclk_pll.ref;
 
-       linkrate = (I915_READ(DPLL_CTRL1) &
-                   DPLL_CTRL1_LINK_RATE_MASK(SKL_DPLL0)) >> 1;
+       cdctl = I915_READ(CDCLK_CTL);
 
-       if (linkrate == DPLL_CTRL1_LINK_RATE_2160 ||
-           linkrate == DPLL_CTRL1_LINK_RATE_1080) {
-               /* vco 8640 */
+       if (dev_priv->cdclk_pll.vco == 8640000) {
                switch (cdctl & CDCLK_FREQ_SEL_MASK) {
                case CDCLK_FREQ_450_432:
                        return 432000;
                case CDCLK_FREQ_337_308:
-                       return 308570;
+                       return 308571;
+               case CDCLK_FREQ_540:
+                       return 540000;
                case CDCLK_FREQ_675_617:
-                       return 617140;
+                       return 617143;
                default:
-                       WARN(1, "Unknown cd freq selection\n");
+                       MISSING_CASE(cdctl & CDCLK_FREQ_SEL_MASK);
                }
        } else {
-               /* vco 8100 */
                switch (cdctl & CDCLK_FREQ_SEL_MASK) {
                case CDCLK_FREQ_450_432:
                        return 450000;
                case CDCLK_FREQ_337_308:
                        return 337500;
+               case CDCLK_FREQ_540:
+                       return 540000;
                case CDCLK_FREQ_675_617:
                        return 675000;
                default:
-                       WARN(1, "Unknown cd freq selection\n");
+                       MISSING_CASE(cdctl & CDCLK_FREQ_SEL_MASK);
                }
        }
 
-       /* error case, do as if DPLL0 isn't enabled */
-       return 24000;
+       return dev_priv->cdclk_pll.ref;
+}
+
+static void bxt_de_pll_update(struct drm_i915_private *dev_priv)
+{
+       u32 val;
+
+       dev_priv->cdclk_pll.ref = 19200;
+       dev_priv->cdclk_pll.vco = 0;
+
+       val = I915_READ(BXT_DE_PLL_ENABLE);
+       if ((val & BXT_DE_PLL_PLL_ENABLE) == 0)
+               return;
+
+       if (WARN_ON((val & BXT_DE_PLL_LOCK) == 0))
+               return;
+
+       val = I915_READ(BXT_DE_PLL_CTL);
+       dev_priv->cdclk_pll.vco = (val & BXT_DE_PLL_RATIO_MASK) *
+               dev_priv->cdclk_pll.ref;
 }
 
 static int broxton_get_display_clock_speed(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = to_i915(dev);
-       uint32_t cdctl = I915_READ(CDCLK_CTL);
-       uint32_t pll_ratio = I915_READ(BXT_DE_PLL_CTL) & BXT_DE_PLL_RATIO_MASK;
-       uint32_t pll_enab = I915_READ(BXT_DE_PLL_ENABLE);
-       int cdclk;
+       u32 divider;
+       int div, vco;
 
-       if (!(pll_enab & BXT_DE_PLL_PLL_ENABLE))
-               return 19200;
+       bxt_de_pll_update(dev_priv);
+
+       vco = dev_priv->cdclk_pll.vco;
+       if (vco == 0)
+               return dev_priv->cdclk_pll.ref;
 
-       cdclk = 19200 * pll_ratio / 2;
+       divider = I915_READ(CDCLK_CTL) & BXT_CDCLK_CD2X_DIV_SEL_MASK;
 
-       switch (cdctl & BXT_CDCLK_CD2X_DIV_SEL_MASK) {
+       switch (divider) {
        case BXT_CDCLK_CD2X_DIV_SEL_1:
-               return cdclk;  /* 576MHz or 624MHz */
+               div = 2;
+               break;
        case BXT_CDCLK_CD2X_DIV_SEL_1_5:
-               return cdclk * 2 / 3; /* 384MHz */
+               div = 3;
+               break;
        case BXT_CDCLK_CD2X_DIV_SEL_2:
-               return cdclk / 2; /* 288MHz */
+               div = 4;
+               break;
        case BXT_CDCLK_CD2X_DIV_SEL_4:
-               return cdclk / 4; /* 144MHz */
+               div = 8;
+               break;
+       default:
+               MISSING_CASE(divider);
+               return dev_priv->cdclk_pll.ref;
        }
 
-       /* error case, do as if DE PLL isn't enabled */
-       return 19200;
+       return DIV_ROUND_CLOSEST(vco, div);
 }
 
 static int broadwell_get_display_clock_speed(struct drm_device *dev)
@@ -8183,12 +8361,14 @@ static void ironlake_init_pch_refclk(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
        struct intel_encoder *encoder;
+       int i;
        u32 val, final;
        bool has_lvds = false;
        bool has_cpu_edp = false;
        bool has_panel = false;
        bool has_ck505 = false;
        bool can_ssc = false;
+       bool using_ssc_source = false;
 
        /* We need to take the global config into account */
        for_each_intel_encoder(dev, encoder) {
@@ -8215,10 +8395,24 @@ static void ironlake_init_pch_refclk(struct drm_device *dev)
                can_ssc = true;
        }
 
-       DRM_DEBUG_KMS("has_panel %d has_lvds %d has_ck505 %d\n",
-                     has_panel, has_lvds, has_ck505);
+       /* Check if any DPLLs are using the SSC source */
+       for (i = 0; i < dev_priv->num_shared_dpll; i++) {
+               u32 temp = I915_READ(PCH_DPLL(i));
 
-       /* Ironlake: try to setup display ref clock before DPLL
+               if (!(temp & DPLL_VCO_ENABLE))
+                       continue;
+
+               if ((temp & PLL_REF_INPUT_MASK) ==
+                   PLLB_REF_INPUT_SPREADSPECTRUMIN) {
+                       using_ssc_source = true;
+                       break;
+               }
+       }
+
+       DRM_DEBUG_KMS("has_panel %d has_lvds %d has_ck505 %d using_ssc_source %d\n",
+                     has_panel, has_lvds, has_ck505, using_ssc_source);
+
+       /* Ironlake: try to setup display ref clock before DPLL
         * enabling. This is only under driver's control after
         * PCH B stepping, previous chipset stepping should be
         * ignoring this setting.
@@ -8236,9 +8430,12 @@ static void ironlake_init_pch_refclk(struct drm_device *dev)
        else
                final |= DREF_NONSPREAD_SOURCE_ENABLE;
 
-       final &= ~DREF_SSC_SOURCE_MASK;
        final &= ~DREF_CPU_SOURCE_OUTPUT_MASK;
-       final &= ~DREF_SSC1_ENABLE;
+
+       if (!using_ssc_source) {
+               final &= ~DREF_SSC_SOURCE_MASK;
+               final &= ~DREF_SSC1_ENABLE;
+       }
 
        if (has_panel) {
                final |= DREF_SSC_SOURCE_ENABLE;
@@ -8301,7 +8498,7 @@ static void ironlake_init_pch_refclk(struct drm_device *dev)
                POSTING_READ(PCH_DREF_CONTROL);
                udelay(200);
        } else {
-               DRM_DEBUG_KMS("Disabling SSC entirely\n");
+               DRM_DEBUG_KMS("Disabling CPU source output\n");
 
                val &= ~DREF_CPU_SOURCE_OUTPUT_MASK;
 
@@ -8312,16 +8509,20 @@ static void ironlake_init_pch_refclk(struct drm_device *dev)
                POSTING_READ(PCH_DREF_CONTROL);
                udelay(200);
 
-               /* Turn off the SSC source */
-               val &= ~DREF_SSC_SOURCE_MASK;
-               val |= DREF_SSC_SOURCE_DISABLE;
+               if (!using_ssc_source) {
+                       DRM_DEBUG_KMS("Disabling SSC source\n");
 
-               /* Turn off SSC1 */
-               val &= ~DREF_SSC1_ENABLE;
+                       /* Turn off the SSC source */
+                       val &= ~DREF_SSC_SOURCE_MASK;
+                       val |= DREF_SSC_SOURCE_DISABLE;
 
-               I915_WRITE(PCH_DREF_CONTROL, val);
-               POSTING_READ(PCH_DREF_CONTROL);
-               udelay(200);
+                       /* Turn off SSC1 */
+                       val &= ~DREF_SSC1_ENABLE;
+
+                       I915_WRITE(PCH_DREF_CONTROL, val);
+                       POSTING_READ(PCH_DREF_CONTROL);
+                       udelay(200);
+               }
        }
 
        BUG_ON(val != final);
@@ -9647,6 +9848,47 @@ static void broadwell_modeset_commit_cdclk(struct drm_atomic_state *old_state)
        broadwell_set_cdclk(dev, req_cdclk);
 }
 
+static int skl_modeset_calc_cdclk(struct drm_atomic_state *state)
+{
+       struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
+       struct drm_i915_private *dev_priv = to_i915(state->dev);
+       const int max_pixclk = ilk_max_pixel_rate(state);
+       int vco = intel_state->cdclk_pll_vco;
+       int cdclk;
+
+       /*
+        * FIXME should also account for plane ratio
+        * once 64bpp pixel formats are supported.
+        */
+       cdclk = skl_calc_cdclk(max_pixclk, vco);
+
+       /*
+        * FIXME move the cdclk caclulation to
+        * compute_config() so we can fail gracegully.
+        */
+       if (cdclk > dev_priv->max_cdclk_freq) {
+               DRM_ERROR("requested cdclk (%d kHz) exceeds max (%d kHz)\n",
+                         cdclk, dev_priv->max_cdclk_freq);
+               cdclk = dev_priv->max_cdclk_freq;
+       }
+
+       intel_state->cdclk = intel_state->dev_cdclk = cdclk;
+       if (!intel_state->active_crtcs)
+               intel_state->dev_cdclk = skl_calc_cdclk(0, vco);
+
+       return 0;
+}
+
+static void skl_modeset_commit_cdclk(struct drm_atomic_state *old_state)
+{
+       struct drm_i915_private *dev_priv = to_i915(old_state->dev);
+       struct intel_atomic_state *intel_state = to_intel_atomic_state(old_state);
+       unsigned int req_cdclk = intel_state->dev_cdclk;
+       unsigned int req_vco = intel_state->cdclk_pll_vco;
+
+       skl_set_cdclk(dev_priv, req_cdclk, req_vco);
+}
+
 static int haswell_crtc_compute_clock(struct intel_crtc *crtc,
                                      struct intel_crtc_state *crtc_state)
 {
@@ -10752,13 +10994,6 @@ void intel_mark_idle(struct drm_i915_private *dev_priv)
        intel_runtime_pm_put(dev_priv);
 }
 
-void intel_free_flip_work(struct intel_flip_work *work)
-{
-       kfree(work->old_connector_state);
-       kfree(work->new_connector_state);
-       kfree(work);
-}
-
 static void intel_crtc_destroy(struct drm_crtc *crtc)
 {
        struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
@@ -10766,293 +11001,645 @@ static void intel_crtc_destroy(struct drm_crtc *crtc)
        struct intel_flip_work *work;
 
        spin_lock_irq(&dev->event_lock);
-       while (!list_empty(&intel_crtc->flip_work)) {
-               work = list_first_entry(&intel_crtc->flip_work,
-                                       struct intel_flip_work, head);
-               list_del_init(&work->head);
-               spin_unlock_irq(&dev->event_lock);
+       work = intel_crtc->flip_work;
+       intel_crtc->flip_work = NULL;
+       spin_unlock_irq(&dev->event_lock);
 
+       if (work) {
                cancel_work_sync(&work->mmio_work);
                cancel_work_sync(&work->unpin_work);
-               intel_free_flip_work(work);
-
-               spin_lock_irq(&dev->event_lock);
+               kfree(work);
        }
-       spin_unlock_irq(&dev->event_lock);
 
        drm_crtc_cleanup(crtc);
 
        kfree(intel_crtc);
 }
 
-static void intel_crtc_post_flip_update(struct intel_flip_work *work,
-                                       struct drm_crtc *crtc)
+static void intel_unpin_work_fn(struct work_struct *__work)
 {
-       struct intel_crtc_state *crtc_state = work->new_crtc_state;
-       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+       struct intel_flip_work *work =
+               container_of(__work, struct intel_flip_work, unpin_work);
+       struct intel_crtc *crtc = to_intel_crtc(work->crtc);
+       struct drm_device *dev = crtc->base.dev;
+       struct drm_plane *primary = crtc->base.primary;
 
-       if (crtc_state->disable_cxsr)
-               intel_crtc->wm.cxsr_allowed = true;
+       if (is_mmio_work(work))
+               flush_work(&work->mmio_work);
 
-       if (crtc_state->update_wm_post && crtc_state->base.active)
-               intel_update_watermarks(crtc);
+       mutex_lock(&dev->struct_mutex);
+       intel_unpin_fb_obj(work->old_fb, primary->state->rotation);
+       drm_gem_object_unreference(&work->pending_flip_obj->base);
 
-       if (work->num_planes > 0 &&
-           work->old_plane_state[0]->base.plane == crtc->primary) {
-               struct intel_plane_state *plane_state =
-                       work->new_plane_state[0];
+       if (work->flip_queued_req)
+               i915_gem_request_assign(&work->flip_queued_req, NULL);
+       mutex_unlock(&dev->struct_mutex);
 
-               if (plane_state->visible &&
-                   (needs_modeset(&crtc_state->base) ||
-                    !work->old_plane_state[0]->visible))
-                       intel_post_enable_primary(crtc);
-       }
+       intel_frontbuffer_flip_complete(dev, to_intel_plane(primary)->frontbuffer_bit);
+       intel_fbc_post_update(crtc);
+       drm_framebuffer_unreference(work->old_fb);
+
+       BUG_ON(atomic_read(&crtc->unpin_work_count) == 0);
+       atomic_dec(&crtc->unpin_work_count);
+
+       kfree(work);
 }
 
-static void intel_unpin_work_fn(struct work_struct *__work)
+/* Is 'a' after or equal to 'b'? */
+static bool g4x_flip_count_after_eq(u32 a, u32 b)
 {
-       struct intel_flip_work *work =
-               container_of(__work, struct intel_flip_work, unpin_work);
-       struct drm_crtc *crtc = work->old_crtc_state->base.crtc;
-       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-       struct drm_device *dev = crtc->dev;
+       return !((a - b) & 0x80000000);
+}
+
+static bool __pageflip_finished_cs(struct intel_crtc *crtc,
+                                  struct intel_flip_work *work)
+{
+       struct drm_device *dev = crtc->base.dev;
        struct drm_i915_private *dev_priv = dev->dev_private;
-       int i;
+       unsigned reset_counter;
 
-       if (work->fb_bits)
-               intel_frontbuffer_flip_complete(dev, work->fb_bits);
+       reset_counter = i915_reset_counter(&dev_priv->gpu_error);
+       if (crtc->reset_counter != reset_counter)
+               return true;
 
        /*
-        * Unless work->can_async_unpin is false, there's no way to ensure
-        * that work->new_crtc_state contains valid memory during unpin
-        * because intel_atomic_commit may free it before this runs.
+        * The relevant registers doen't exist on pre-ctg.
+        * As the flip done interrupt doesn't trigger for mmio
+        * flips on gmch platforms, a flip count check isn't
+        * really needed there. But since ctg has the registers,
+        * include it in the check anyway.
         */
-       if (!work->can_async_unpin) {
-               intel_crtc_post_flip_update(work, crtc);
+       if (INTEL_INFO(dev)->gen < 5 && !IS_G4X(dev))
+               return true;
 
-               if (dev_priv->display.optimize_watermarks)
-                       dev_priv->display.optimize_watermarks(work->new_crtc_state);
-       }
+       /*
+        * BDW signals flip done immediately if the plane
+        * is disabled, even if the plane enable is already
+        * armed to occur at the next vblank :(
+        */
 
-       if (work->fb_bits & to_intel_plane(crtc->primary)->frontbuffer_bit)
-               intel_fbc_post_update(intel_crtc);
+       /*
+        * A DSPSURFLIVE check isn't enough in case the mmio and CS flips
+        * used the same base address. In that case the mmio flip might
+        * have completed, but the CS hasn't even executed the flip yet.
+        *
+        * A flip count check isn't enough as the CS might have updated
+        * the base address just after start of vblank, but before we
+        * managed to process the interrupt. This means we'd complete the
+        * CS flip too soon.
+        *
+        * Combining both checks should get us a good enough result. It may
+        * still happen that the CS flip has been executed, but has not
+        * yet actually completed. But in case the base address is the same
+        * anyway, we don't really care.
+        */
+       return (I915_READ(DSPSURFLIVE(crtc->plane)) & ~0xfff) ==
+               crtc->flip_work->gtt_offset &&
+               g4x_flip_count_after_eq(I915_READ(PIPE_FLIPCOUNT_G4X(crtc->pipe)),
+                                   crtc->flip_work->flip_count);
+}
 
-       if (work->put_power_domains)
-               modeset_put_power_domains(dev_priv, work->put_power_domains);
+static bool
+__pageflip_finished_mmio(struct intel_crtc *crtc,
+                              struct intel_flip_work *work)
+{
+       /*
+        * MMIO work completes when vblank is different from
+        * flip_queued_vblank.
+        *
+        * Reset counter value doesn't matter, this is handled by
+        * i915_wait_request finishing early, so no need to handle
+        * reset here.
+        */
+       return intel_crtc_get_vblank_counter(crtc) != work->flip_queued_vblank;
+}
 
-       /* Make sure mmio work is completely finished before freeing all state here. */
-       flush_work(&work->mmio_work);
 
-       if (!work->can_async_unpin &&
-           (work->new_crtc_state->update_pipe ||
-            needs_modeset(&work->new_crtc_state->base))) {
-               /* This must be called before work is unpinned for serialization. */
-               intel_modeset_verify_crtc(crtc, &work->old_crtc_state->base,
-                                         &work->new_crtc_state->base);
+static bool pageflip_finished(struct intel_crtc *crtc,
+                             struct intel_flip_work *work)
+{
+       if (!atomic_read(&work->pending))
+               return false;
 
-               for (i = 0; i < work->num_new_connectors; i++) {
-                       struct drm_connector_state *conn_state =
-                               work->new_connector_state[i];
-                       struct drm_connector *con = conn_state->connector;
+       smp_rmb();
 
-                       WARN_ON(!con);
+       if (is_mmio_work(work))
+               return __pageflip_finished_mmio(crtc, work);
+       else
+               return __pageflip_finished_cs(crtc, work);
+}
 
-                       intel_connector_verify_state(to_intel_connector(con),
-                                                    conn_state);
-               }
-       }
+void intel_finish_page_flip_cs(struct drm_i915_private *dev_priv, int pipe)
+{
+       struct drm_device *dev = dev_priv->dev;
+       struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe];
+       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+       struct intel_flip_work *work;
+       unsigned long flags;
 
-       for (i = 0; i < work->num_old_connectors; i++) {
-               struct drm_connector_state *old_con_state =
-                       work->old_connector_state[i];
-               struct drm_connector *con =
-                       old_con_state->connector;
+       /* Ignore early vblank irqs */
+       if (!crtc)
+               return;
 
-               con->funcs->atomic_destroy_state(con, old_con_state);
-       }
+       /*
+        * This is called both by irq handlers and the reset code (to complete
+        * lost pageflips) so needs the full irqsave spinlocks.
+        */
+       spin_lock_irqsave(&dev->event_lock, flags);
+       work = intel_crtc->flip_work;
 
-       if (!work->can_async_unpin || !list_empty(&work->head)) {
-               spin_lock_irq(&dev->event_lock);
-               WARN(list_empty(&work->head) != work->can_async_unpin,
-                    "[CRTC:%i] Pin work %p async %i with %i planes, active %i -> %i ms %i\n",
-                    crtc->base.id, work, work->can_async_unpin, work->num_planes,
-                    work->old_crtc_state->base.active, work->new_crtc_state->base.active,
-                    needs_modeset(&work->new_crtc_state->base));
+       if (work != NULL &&
+           !is_mmio_work(work) &&
+           pageflip_finished(intel_crtc, work))
+               page_flip_completed(intel_crtc);
 
-               if (!list_empty(&work->head))
-                       list_del(&work->head);
+       spin_unlock_irqrestore(&dev->event_lock, flags);
+}
 
-               wake_up_all(&dev_priv->pending_flip_queue);
-               spin_unlock_irq(&dev->event_lock);
-       }
+void intel_finish_page_flip_mmio(struct drm_i915_private *dev_priv, int pipe)
+{
+       struct drm_device *dev = dev_priv->dev;
+       struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe];
+       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+       struct intel_flip_work *work;
+       unsigned long flags;
 
-       /* New crtc_state freed? */
-       if (work->free_new_crtc_state)
-               intel_crtc_destroy_state(crtc, &work->new_crtc_state->base);
+       /* Ignore early vblank irqs */
+       if (!crtc)
+               return;
 
-       intel_crtc_destroy_state(crtc, &work->old_crtc_state->base);
+       /*
+        * This is called both by irq handlers and the reset code (to complete
+        * lost pageflips) so needs the full irqsave spinlocks.
+        */
+       spin_lock_irqsave(&dev->event_lock, flags);
+       work = intel_crtc->flip_work;
 
-       for (i = 0; i < work->num_planes; i++) {
-               struct intel_plane_state *old_plane_state =
-                       work->old_plane_state[i];
-               struct drm_framebuffer *old_fb = old_plane_state->base.fb;
-               struct drm_plane *plane = old_plane_state->base.plane;
-               struct drm_i915_gem_request *req;
+       if (work != NULL &&
+           is_mmio_work(work) &&
+           pageflip_finished(intel_crtc, work))
+               page_flip_completed(intel_crtc);
+
+       spin_unlock_irqrestore(&dev->event_lock, flags);
+}
 
-               req = old_plane_state->wait_req;
-               old_plane_state->wait_req = NULL;
-               if (req)
-                       i915_gem_request_unreference(req);
+static inline void intel_mark_page_flip_active(struct intel_crtc *crtc,
+                                              struct intel_flip_work *work)
+{
+       work->flip_queued_vblank = intel_crtc_get_vblank_counter(crtc);
 
-               fence_put(old_plane_state->base.fence);
-               old_plane_state->base.fence = NULL;
+       /* Ensure that the work item is consistent when activating it ... */
+       smp_mb__before_atomic();
+       atomic_set(&work->pending, 1);
+}
 
-               if (old_fb &&
-                   (plane->type != DRM_PLANE_TYPE_CURSOR ||
-                    !INTEL_INFO(dev_priv)->cursor_needs_physical)) {
-                       mutex_lock(&dev->struct_mutex);
-                       intel_unpin_fb_obj(old_fb, old_plane_state->base.rotation);
-                       mutex_unlock(&dev->struct_mutex);
-               }
+static int intel_gen2_queue_flip(struct drm_device *dev,
+                                struct drm_crtc *crtc,
+                                struct drm_framebuffer *fb,
+                                struct drm_i915_gem_object *obj,
+                                struct drm_i915_gem_request *req,
+                                uint32_t flags)
+{
+       struct intel_engine_cs *engine = req->engine;
+       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+       u32 flip_mask;
+       int ret;
 
-               intel_plane_destroy_state(plane, &old_plane_state->base);
-       }
+       ret = intel_ring_begin(req, 6);
+       if (ret)
+               return ret;
 
-       if (!WARN_ON(atomic_read(&intel_crtc->unpin_work_count) == 0))
-               atomic_dec(&intel_crtc->unpin_work_count);
+       /* Can't queue multiple flips, so wait for the previous
+        * one to finish before executing the next.
+        */
+       if (intel_crtc->plane)
+               flip_mask = MI_WAIT_FOR_PLANE_B_FLIP;
+       else
+               flip_mask = MI_WAIT_FOR_PLANE_A_FLIP;
+       intel_ring_emit(engine, MI_WAIT_FOR_EVENT | flip_mask);
+       intel_ring_emit(engine, MI_NOOP);
+       intel_ring_emit(engine, MI_DISPLAY_FLIP |
+                       MI_DISPLAY_FLIP_PLANE(intel_crtc->plane));
+       intel_ring_emit(engine, fb->pitches[0]);
+       intel_ring_emit(engine, intel_crtc->flip_work->gtt_offset);
+       intel_ring_emit(engine, 0); /* aux display base address, unused */
 
-       intel_free_flip_work(work);
+       return 0;
 }
 
+static int intel_gen3_queue_flip(struct drm_device *dev,
+                                struct drm_crtc *crtc,
+                                struct drm_framebuffer *fb,
+                                struct drm_i915_gem_object *obj,
+                                struct drm_i915_gem_request *req,
+                                uint32_t flags)
+{
+       struct intel_engine_cs *engine = req->engine;
+       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+       u32 flip_mask;
+       int ret;
 
-static bool pageflip_finished(struct intel_crtc *crtc,
-                             struct intel_flip_work *work)
+       ret = intel_ring_begin(req, 6);
+       if (ret)
+               return ret;
+
+       if (intel_crtc->plane)
+               flip_mask = MI_WAIT_FOR_PLANE_B_FLIP;
+       else
+               flip_mask = MI_WAIT_FOR_PLANE_A_FLIP;
+       intel_ring_emit(engine, MI_WAIT_FOR_EVENT | flip_mask);
+       intel_ring_emit(engine, MI_NOOP);
+       intel_ring_emit(engine, MI_DISPLAY_FLIP_I915 |
+                       MI_DISPLAY_FLIP_PLANE(intel_crtc->plane));
+       intel_ring_emit(engine, fb->pitches[0]);
+       intel_ring_emit(engine, intel_crtc->flip_work->gtt_offset);
+       intel_ring_emit(engine, MI_NOOP);
+
+       return 0;
+}
+
+static int intel_gen4_queue_flip(struct drm_device *dev,
+                                struct drm_crtc *crtc,
+                                struct drm_framebuffer *fb,
+                                struct drm_i915_gem_object *obj,
+                                struct drm_i915_gem_request *req,
+                                uint32_t flags)
 {
-       if (!atomic_read(&work->pending))
-               return false;
+       struct intel_engine_cs *engine = req->engine;
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+       uint32_t pf, pipesrc;
+       int ret;
 
-       smp_rmb();
+       ret = intel_ring_begin(req, 4);
+       if (ret)
+               return ret;
 
-       /*
-        * MMIO work completes when vblank is different from
-        * flip_queued_vblank.
+       /* i965+ uses the linear or tiled offsets from the
+        * Display Registers (which do not change across a page-flip)
+        * so we need only reprogram the base address.
         */
-       return intel_crtc_get_vblank_counter(crtc) != work->flip_queued_vblank;
+       intel_ring_emit(engine, MI_DISPLAY_FLIP |
+                       MI_DISPLAY_FLIP_PLANE(intel_crtc->plane));
+       intel_ring_emit(engine, fb->pitches[0]);
+       intel_ring_emit(engine, intel_crtc->flip_work->gtt_offset |
+                       obj->tiling_mode);
+
+       /* XXX Enabling the panel-fitter across page-flip is so far
+        * untested on non-native modes, so ignore it for now.
+        * pf = I915_READ(pipe == 0 ? PFA_CTL_1 : PFB_CTL_1) & PF_ENABLE;
+        */
+       pf = 0;
+       pipesrc = I915_READ(PIPESRC(intel_crtc->pipe)) & 0x0fff0fff;
+       intel_ring_emit(engine, pf | pipesrc);
+
+       return 0;
 }
 
-void intel_finish_page_flip_mmio(struct drm_i915_private *dev_priv, int pipe)
+static int intel_gen6_queue_flip(struct drm_device *dev,
+                                struct drm_crtc *crtc,
+                                struct drm_framebuffer *fb,
+                                struct drm_i915_gem_object *obj,
+                                struct drm_i915_gem_request *req,
+                                uint32_t flags)
 {
-       struct drm_device *dev = dev_priv->dev;
-       struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe];
+       struct intel_engine_cs *engine = req->engine;
+       struct drm_i915_private *dev_priv = dev->dev_private;
        struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-       struct intel_flip_work *work;
-       unsigned long flags;
+       uint32_t pf, pipesrc;
+       int ret;
 
-       /* Ignore early vblank irqs */
-       if (!crtc)
-               return;
+       ret = intel_ring_begin(req, 4);
+       if (ret)
+               return ret;
+
+       intel_ring_emit(engine, MI_DISPLAY_FLIP |
+                       MI_DISPLAY_FLIP_PLANE(intel_crtc->plane));
+       intel_ring_emit(engine, fb->pitches[0] | obj->tiling_mode);
+       intel_ring_emit(engine, intel_crtc->flip_work->gtt_offset);
+
+       /* Contrary to the suggestions in the documentation,
+        * "Enable Panel Fitter" does not seem to be required when page
+        * flipping with a non-native mode, and worse causes a normal
+        * modeset to fail.
+        * pf = I915_READ(PF_CTL(intel_crtc->pipe)) & PF_ENABLE;
+        */
+       pf = 0;
+       pipesrc = I915_READ(PIPESRC(intel_crtc->pipe)) & 0x0fff0fff;
+       intel_ring_emit(engine, pf | pipesrc);
+
+       return 0;
+}
+
+static int intel_gen7_queue_flip(struct drm_device *dev,
+                                struct drm_crtc *crtc,
+                                struct drm_framebuffer *fb,
+                                struct drm_i915_gem_object *obj,
+                                struct drm_i915_gem_request *req,
+                                uint32_t flags)
+{
+       struct intel_engine_cs *engine = req->engine;
+       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+       uint32_t plane_bit = 0;
+       int len, ret;
+
+       switch (intel_crtc->plane) {
+       case PLANE_A:
+               plane_bit = MI_DISPLAY_FLIP_IVB_PLANE_A;
+               break;
+       case PLANE_B:
+               plane_bit = MI_DISPLAY_FLIP_IVB_PLANE_B;
+               break;
+       case PLANE_C:
+               plane_bit = MI_DISPLAY_FLIP_IVB_PLANE_C;
+               break;
+       default:
+               WARN_ONCE(1, "unknown plane in flip command\n");
+               return -ENODEV;
+       }
+
+       len = 4;
+       if (engine->id == RCS) {
+               len += 6;
+               /*
+                * On Gen 8, SRM is now taking an extra dword to accommodate
+                * 48bits addresses, and we need a NOOP for the batch size to
+                * stay even.
+                */
+               if (IS_GEN8(dev))
+                       len += 2;
+       }
 
        /*
-        * This is called both by irq handlers and the reset code (to complete
-        * lost pageflips) so needs the full irqsave spinlocks.
+        * BSpec MI_DISPLAY_FLIP for IVB:
+        * "The full packet must be contained within the same cache line."
+        *
+        * Currently the LRI+SRM+MI_DISPLAY_FLIP all fit within the same
+        * cacheline, if we ever start emitting more commands before
+        * the MI_DISPLAY_FLIP we may need to first emit everything else,
+        * then do the cacheline alignment, and finally emit the
+        * MI_DISPLAY_FLIP.
         */
-       spin_lock_irqsave(&dev->event_lock, flags);
-       while (!list_empty(&intel_crtc->flip_work)) {
-               work = list_first_entry(&intel_crtc->flip_work,
-                                       struct intel_flip_work,
-                                       head);
+       ret = intel_ring_cacheline_align(req);
+       if (ret)
+               return ret;
 
-               if (!pageflip_finished(intel_crtc, work) ||
-                   work_busy(&work->unpin_work))
-                       break;
+       ret = intel_ring_begin(req, len);
+       if (ret)
+               return ret;
 
-               page_flip_completed(intel_crtc, work);
+       /* Unmask the flip-done completion message. Note that the bspec says that
+        * we should do this for both the BCS and RCS, and that we must not unmask
+        * more than one flip event at any time (or ensure that one flip message
+        * can be sent by waiting for flip-done prior to queueing new flips).
+        * Experimentation says that BCS works despite DERRMR masking all
+        * flip-done completion events and that unmasking all planes at once
+        * for the RCS also doesn't appear to drop events. Setting the DERRMR
+        * to zero does lead to lockups within MI_DISPLAY_FLIP.
+        */
+       if (engine->id == RCS) {
+               intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1));
+               intel_ring_emit_reg(engine, DERRMR);
+               intel_ring_emit(engine, ~(DERRMR_PIPEA_PRI_FLIP_DONE |
+                                         DERRMR_PIPEB_PRI_FLIP_DONE |
+                                         DERRMR_PIPEC_PRI_FLIP_DONE));
+               if (IS_GEN8(dev))
+                       intel_ring_emit(engine, MI_STORE_REGISTER_MEM_GEN8 |
+                                             MI_SRM_LRM_GLOBAL_GTT);
+               else
+                       intel_ring_emit(engine, MI_STORE_REGISTER_MEM |
+                                             MI_SRM_LRM_GLOBAL_GTT);
+               intel_ring_emit_reg(engine, DERRMR);
+               intel_ring_emit(engine, engine->scratch.gtt_offset + 256);
+               if (IS_GEN8(dev)) {
+                       intel_ring_emit(engine, 0);
+                       intel_ring_emit(engine, MI_NOOP);
+               }
        }
-       spin_unlock_irqrestore(&dev->event_lock, flags);
+
+       intel_ring_emit(engine, MI_DISPLAY_FLIP_I915 | plane_bit);
+       intel_ring_emit(engine, (fb->pitches[0] | obj->tiling_mode));
+       intel_ring_emit(engine, intel_crtc->flip_work->gtt_offset);
+       intel_ring_emit(engine, (MI_NOOP));
+
+       return 0;
+}
+
+static bool use_mmio_flip(struct intel_engine_cs *engine,
+                         struct drm_i915_gem_object *obj)
+{
+       /*
+        * This is not being used for older platforms, because
+        * non-availability of flip done interrupt forces us to use
+        * CS flips. Older platforms derive flip done using some clever
+        * tricks involving the flip_pending status bits and vblank irqs.
+        * So using MMIO flips there would disrupt this mechanism.
+        */
+
+       if (engine == NULL)
+               return true;
+
+       if (INTEL_GEN(engine->i915) < 5)
+               return false;
+
+       if (i915.use_mmio_flip < 0)
+               return false;
+       else if (i915.use_mmio_flip > 0)
+               return true;
+       else if (i915.enable_execlists)
+               return true;
+       else if (obj->base.dma_buf &&
+                !reservation_object_test_signaled_rcu(obj->base.dma_buf->resv,
+                                                      false))
+               return true;
+       else
+               return engine != i915_gem_request_get_engine(obj->last_write_req);
+}
+
+static void skl_do_mmio_flip(struct intel_crtc *intel_crtc,
+                            unsigned int rotation,
+                            struct intel_flip_work *work)
+{
+       struct drm_device *dev = intel_crtc->base.dev;
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       struct drm_framebuffer *fb = intel_crtc->base.primary->fb;
+       const enum pipe pipe = intel_crtc->pipe;
+       u32 ctl, stride, tile_height;
+
+       ctl = I915_READ(PLANE_CTL(pipe, 0));
+       ctl &= ~PLANE_CTL_TILED_MASK;
+       switch (fb->modifier[0]) {
+       case DRM_FORMAT_MOD_NONE:
+               break;
+       case I915_FORMAT_MOD_X_TILED:
+               ctl |= PLANE_CTL_TILED_X;
+               break;
+       case I915_FORMAT_MOD_Y_TILED:
+               ctl |= PLANE_CTL_TILED_Y;
+               break;
+       case I915_FORMAT_MOD_Yf_TILED:
+               ctl |= PLANE_CTL_TILED_YF;
+               break;
+       default:
+               MISSING_CASE(fb->modifier[0]);
+       }
+
+       /*
+        * The stride is either expressed as a multiple of 64 bytes chunks for
+        * linear buffers or in number of tiles for tiled buffers.
+        */
+       if (intel_rotation_90_or_270(rotation)) {
+               /* stride = Surface height in tiles */
+               tile_height = intel_tile_height(dev_priv, fb->modifier[0], 0);
+               stride = DIV_ROUND_UP(fb->height, tile_height);
+       } else {
+               stride = fb->pitches[0] /
+                       intel_fb_stride_alignment(dev_priv, fb->modifier[0],
+                                                 fb->pixel_format);
+       }
+
+       /*
+        * Both PLANE_CTL and PLANE_STRIDE are not updated on vblank but on
+        * PLANE_SURF updates, the update is then guaranteed to be atomic.
+        */
+       I915_WRITE(PLANE_CTL(pipe, 0), ctl);
+       I915_WRITE(PLANE_STRIDE(pipe, 0), stride);
+
+       I915_WRITE(PLANE_SURF(pipe, 0), work->gtt_offset);
+       POSTING_READ(PLANE_SURF(pipe, 0));
+}
+
+static void ilk_do_mmio_flip(struct intel_crtc *intel_crtc,
+                            struct intel_flip_work *work)
+{
+       struct drm_device *dev = intel_crtc->base.dev;
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       struct intel_framebuffer *intel_fb =
+               to_intel_framebuffer(intel_crtc->base.primary->fb);
+       struct drm_i915_gem_object *obj = intel_fb->obj;
+       i915_reg_t reg = DSPCNTR(intel_crtc->plane);
+       u32 dspcntr;
+
+       dspcntr = I915_READ(reg);
+
+       if (obj->tiling_mode != I915_TILING_NONE)
+               dspcntr |= DISPPLANE_TILED;
+       else
+               dspcntr &= ~DISPPLANE_TILED;
+
+       I915_WRITE(reg, dspcntr);
+
+       I915_WRITE(DSPSURF(intel_crtc->plane), work->gtt_offset);
+       POSTING_READ(DSPSURF(intel_crtc->plane));
 }
 
 static void intel_mmio_flip_work_func(struct work_struct *w)
 {
        struct intel_flip_work *work =
                container_of(w, struct intel_flip_work, mmio_work);
-       struct drm_crtc *crtc = work->old_crtc_state->base.crtc;
-       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-       struct intel_crtc_state *crtc_state = work->new_crtc_state;
-       struct drm_device *dev = crtc->dev;
-       struct drm_i915_private *dev_priv = dev->dev_private;
-       struct drm_i915_gem_request *req;
-       int i;
+       struct intel_crtc *crtc = to_intel_crtc(work->crtc);
+       struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+       struct intel_framebuffer *intel_fb =
+               to_intel_framebuffer(crtc->base.primary->fb);
+       struct drm_i915_gem_object *obj = intel_fb->obj;
+
+       if (work->flip_queued_req)
+               WARN_ON(__i915_wait_request(work->flip_queued_req,
+                                           false, NULL,
+                                           &dev_priv->rps.mmioflips));
+
+       /* For framebuffer backed by dmabuf, wait for fence */
+       if (obj->base.dma_buf)
+               WARN_ON(reservation_object_wait_timeout_rcu(obj->base.dma_buf->resv,
+                                                           false, false,
+                                                           MAX_SCHEDULE_TIMEOUT) < 0);
 
-       if (!needs_modeset(&crtc_state->base) && crtc_state->update_pipe) {
-               work->put_power_domains =
-                       modeset_get_crtc_power_domains(crtc, crtc_state);
-       }
+       intel_pipe_update_start(crtc);
 
-       for (i = 0; i < work->num_planes; i++) {
-               struct intel_plane_state *old_plane_state = work->old_plane_state[i];
+       if (INTEL_GEN(dev_priv) >= 9)
+               skl_do_mmio_flip(crtc, work->rotation, work);
+       else
+               /* use_mmio_flip() retricts MMIO flips to ilk+ */
+               ilk_do_mmio_flip(crtc, work);
 
-               /* For framebuffer backed by dmabuf, wait for fence */
-               if (old_plane_state->base.fence)
-                       WARN_ON(fence_wait(old_plane_state->base.fence, false) < 0);
+       intel_pipe_update_end(crtc, work);
+}
 
-               req = old_plane_state->wait_req;
-               if (!req)
-                       continue;
+static int intel_default_queue_flip(struct drm_device *dev,
+                                   struct drm_crtc *crtc,
+                                   struct drm_framebuffer *fb,
+                                   struct drm_i915_gem_object *obj,
+                                   struct drm_i915_gem_request *req,
+                                   uint32_t flags)
+{
+       return -ENODEV;
+}
 
-               WARN_ON(__i915_wait_request(req, false, NULL,
-                                           &dev_priv->rps.mmioflips));
-       }
+static bool __pageflip_stall_check_cs(struct drm_i915_private *dev_priv,
+                                     struct intel_crtc *intel_crtc,
+                                     struct intel_flip_work *work)
+{
+       u32 addr, vblank;
+
+       if (!atomic_read(&work->pending))
+               return false;
 
-       intel_frontbuffer_flip_prepare(dev, crtc_state->fb_bits);
+       smp_rmb();
 
-       intel_pipe_update_start(intel_crtc);
-       if (!needs_modeset(&crtc_state->base)) {
-               if (crtc_state->base.color_mgmt_changed || crtc_state->update_pipe) {
-                       intel_color_set_csc(&crtc_state->base);
-                       intel_color_load_luts(&crtc_state->base);
-               }
+       vblank = intel_crtc_get_vblank_counter(intel_crtc);
+       if (work->flip_ready_vblank == 0) {
+               if (work->flip_queued_req &&
+                   !i915_gem_request_completed(work->flip_queued_req, true))
+                       return false;
 
-               if (crtc_state->update_pipe)
-                       intel_update_pipe_config(intel_crtc, work->old_crtc_state);
-               else if (INTEL_INFO(dev)->gen >= 9)
-                       skl_detach_scalers(intel_crtc);
+               work->flip_ready_vblank = vblank;
        }
 
-       for (i = 0; i < work->num_planes; i++) {
-               struct intel_plane_state *new_plane_state = work->new_plane_state[i];
-               struct intel_plane *plane = to_intel_plane(new_plane_state->base.plane);
+       if (vblank - work->flip_ready_vblank < 3)
+               return false;
 
-               plane->update_plane(&plane->base, crtc_state, new_plane_state);
-       }
+       /* Potential stall - if we see that the flip has happened,
+        * assume a missed interrupt. */
+       if (INTEL_GEN(dev_priv) >= 4)
+               addr = I915_HI_DISPBASE(I915_READ(DSPSURF(intel_crtc->plane)));
+       else
+               addr = I915_READ(DSPADDR(intel_crtc->plane));
 
-       intel_pipe_update_end(intel_crtc, work);
+       /* There is a potential issue here with a false positive after a flip
+        * to the same address. We could address this by checking for a
+        * non-incrementing frame counter.
+        */
+       return addr == work->gtt_offset;
 }
 
-static struct fence *intel_get_excl_fence(struct drm_i915_gem_object *obj)
+void intel_check_page_flip(struct drm_i915_private *dev_priv, int pipe)
 {
-       struct reservation_object *resv;
-
-
-       if (!obj->base.dma_buf)
-               return NULL;
-
-       resv = obj->base.dma_buf->resv;
-
-       /* For framebuffer backed by dmabuf, wait for fence */
-       while (1) {
-               struct fence *fence_excl, *ret = NULL;
+       struct drm_device *dev = dev_priv->dev;
+       struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe];
+       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+       struct intel_flip_work *work;
 
-               rcu_read_lock();
+       WARN_ON(!in_interrupt());
 
-               fence_excl = rcu_dereference(resv->fence_excl);
-               if (fence_excl)
-                       ret = fence_get_rcu(fence_excl);
+       if (crtc == NULL)
+               return;
 
-               rcu_read_unlock();
+       spin_lock(&dev->event_lock);
+       work = intel_crtc->flip_work;
 
-               if (ret == fence_excl)
-                       return ret;
+       if (work != NULL && !is_mmio_work(work) &&
+           __pageflip_stall_check_cs(dev_priv, intel_crtc, work)) {
+               WARN_ONCE(1,
+                         "Kicking stuck page flip: queued at %d, now %d\n",
+                       work->flip_queued_vblank, intel_crtc_get_vblank_counter(intel_crtc));
+               page_flip_completed(intel_crtc);
+               work = NULL;
        }
+
+       if (work != NULL && !is_mmio_work(work) &&
+           intel_crtc_get_vblank_counter(intel_crtc) - work->flip_queued_vblank > 1)
+               intel_queue_rps_boost_for_request(work->flip_queued_req);
+       spin_unlock(&dev->event_lock);
 }
 
 static int intel_crtc_page_flip(struct drm_crtc *crtc,
@@ -11062,20 +11649,17 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 {
        struct drm_device *dev = crtc->dev;
        struct drm_i915_private *dev_priv = dev->dev_private;
-       struct drm_plane_state *old_state, *new_state = NULL;
-       struct drm_crtc_state *new_crtc_state = NULL;
-       struct drm_framebuffer *old_fb = crtc->primary->state->fb;
+       struct drm_framebuffer *old_fb = crtc->primary->fb;
        struct drm_i915_gem_object *obj = intel_fb_obj(fb);
        struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
        struct drm_plane *primary = crtc->primary;
+       enum pipe pipe = intel_crtc->pipe;
        struct intel_flip_work *work;
+       struct intel_engine_cs *engine;
+       bool mmio_flip;
+       struct drm_i915_gem_request *request = NULL;
        int ret;
 
-       old_state = crtc->primary->state;
-
-       if (!crtc->state->active)
-               return -EINVAL;
-
        /*
         * drm_mode_page_flip_ioctl() should already catch this, but double
         * check to be safe.  In the future we may enable pageflipping from
@@ -11085,7 +11669,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
                return -EBUSY;
 
        /* Can't change pixel format via MI display flips. */
-       if (fb->pixel_format != old_fb->pixel_format)
+       if (fb->pixel_format != crtc->primary->fb->pixel_format)
                return -EINVAL;
 
        /*
@@ -11093,131 +11677,205 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
         * Note that pitch changes could also affect these register.
         */
        if (INTEL_INFO(dev)->gen > 3 &&
-           (fb->offsets[0] != old_fb->offsets[0] ||
-            fb->pitches[0] != old_fb->pitches[0]))
+           (fb->offsets[0] != crtc->primary->fb->offsets[0] ||
+            fb->pitches[0] != crtc->primary->fb->pitches[0]))
                return -EINVAL;
 
-       work = kzalloc(sizeof(*work), GFP_KERNEL);
-       new_crtc_state = intel_crtc_duplicate_state(crtc);
-       new_state = intel_plane_duplicate_state(primary);
-
-       if (!work || !new_crtc_state || !new_state) {
-               ret = -ENOMEM;
-               goto cleanup;
-       }
+       if (i915_terminally_wedged(&dev_priv->gpu_error))
+               goto out_hang;
 
-       drm_framebuffer_unreference(new_state->fb);
-       drm_framebuffer_reference(fb);
-       new_state->fb = fb;
+       work = kzalloc(sizeof(*work), GFP_KERNEL);
+       if (work == NULL)
+               return -ENOMEM;
 
        work->event = event;
+       work->crtc = crtc;
+       work->old_fb = old_fb;
        INIT_WORK(&work->unpin_work, intel_unpin_work_fn);
-       INIT_WORK(&work->mmio_work, intel_mmio_flip_work_func);
-
-       work->new_crtc_state = to_intel_crtc_state(new_crtc_state);
-       work->old_crtc_state = intel_crtc->config;
-
-       work->fb_bits = to_intel_plane(primary)->frontbuffer_bit;
-       work->new_crtc_state->fb_bits = work->fb_bits;
 
-       work->can_async_unpin = true;
-       work->num_planes = 1;
-       work->old_plane_state[0] = to_intel_plane_state(old_state);
-       work->new_plane_state[0] = to_intel_plane_state(new_state);
-
-       /* Step 1: vblank waiting and workqueue throttling,
-        * similar to intel_atomic_prepare_commit
-        */
        ret = drm_crtc_vblank_get(crtc);
        if (ret)
-               goto cleanup;
+               goto free_work;
 
        /* We borrow the event spin lock for protecting flip_work */
        spin_lock_irq(&dev->event_lock);
-       if (!list_empty(&intel_crtc->flip_work)) {
-               struct intel_flip_work *old_work;
-
-               old_work = list_last_entry(&intel_crtc->flip_work,
-                                          struct intel_flip_work, head);
-
+       if (intel_crtc->flip_work) {
                /* Before declaring the flip queue wedged, check if
                 * the hardware completed the operation behind our backs.
                 */
-               if (pageflip_finished(intel_crtc, old_work)) {
+               if (pageflip_finished(intel_crtc, intel_crtc->flip_work)) {
                        DRM_DEBUG_DRIVER("flip queue: previous flip completed, continuing\n");
-                       page_flip_completed(intel_crtc, old_work);
+                       page_flip_completed(intel_crtc);
                } else {
                        DRM_DEBUG_DRIVER("flip queue: crtc already busy\n");
                        spin_unlock_irq(&dev->event_lock);
 
-                       ret = -EBUSY;
-                       goto cleanup_vblank;
+                       drm_crtc_vblank_put(crtc);
+                       kfree(work);
+                       return -EBUSY;
                }
        }
-       list_add_tail(&work->head, &intel_crtc->flip_work);
+       intel_crtc->flip_work = work;
        spin_unlock_irq(&dev->event_lock);
 
        if (atomic_read(&intel_crtc->unpin_work_count) >= 2)
                flush_workqueue(dev_priv->wq);
 
-       /* step 2, similar to intel_prepare_plane_fb */
-       ret = mutex_lock_interruptible(&dev->struct_mutex);
-       if (ret)
-               goto cleanup_work;
+       /* Reference the objects for the scheduled work. */
+       drm_framebuffer_reference(work->old_fb);
+       drm_gem_object_reference(&obj->base);
 
-       ret = intel_pin_and_fence_fb_obj(fb, new_state->rotation);
-       if (ret)
-               goto cleanup_unlock;
+       crtc->primary->fb = fb;
+       update_state_fb(crtc->primary);
+       intel_fbc_pre_update(intel_crtc);
 
-       i915_gem_track_fb(intel_fb_obj(old_fb), obj,
-                         to_intel_plane(primary)->frontbuffer_bit);
+       work->pending_flip_obj = obj;
+
+       ret = i915_mutex_lock_interruptible(dev);
+       if (ret)
+               goto cleanup;
 
-       /* point of no return, swap state */
-       primary->state = new_state;
-       crtc->state = new_crtc_state;
-       intel_crtc->config = to_intel_crtc_state(new_crtc_state);
-       primary->fb = fb;
+       intel_crtc->reset_counter = i915_reset_counter(&dev_priv->gpu_error);
+       if (__i915_reset_in_progress_or_wedged(intel_crtc->reset_counter)) {
+               ret = -EIO;
+               goto cleanup;
+       }
 
-       /* scheduling flip work */
        atomic_inc(&intel_crtc->unpin_work_count);
 
-       if (obj->last_write_req &&
-           !i915_gem_request_completed(obj->last_write_req, true))
-               i915_gem_request_assign(&work->old_plane_state[0]->wait_req,
+       if (INTEL_INFO(dev)->gen >= 5 || IS_G4X(dev))
+               work->flip_count = I915_READ(PIPE_FLIPCOUNT_G4X(pipe)) + 1;
+
+       if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) {
+               engine = &dev_priv->engine[BCS];
+               if (obj->tiling_mode != intel_fb_obj(work->old_fb)->tiling_mode)
+                       /* vlv: DISPLAY_FLIP fails to change tiling */
+                       engine = NULL;
+       } else if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) {
+               engine = &dev_priv->engine[BCS];
+       } else if (INTEL_INFO(dev)->gen >= 7) {
+               engine = i915_gem_request_get_engine(obj->last_write_req);
+               if (engine == NULL || engine->id != RCS)
+                       engine = &dev_priv->engine[BCS];
+       } else {
+               engine = &dev_priv->engine[RCS];
+       }
+
+       mmio_flip = use_mmio_flip(engine, obj);
+
+       /* When using CS flips, we want to emit semaphores between rings.
+        * However, when using mmio flips we will create a task to do the
+        * synchronisation, so all we want here is to pin the framebuffer
+        * into the display plane and skip any waits.
+        */
+       if (!mmio_flip) {
+               ret = i915_gem_object_sync(obj, engine, &request);
+               if (!ret && !request) {
+                       request = i915_gem_request_alloc(engine, NULL);
+                       ret = PTR_ERR_OR_ZERO(request);
+               }
+
+               if (ret)
+                       goto cleanup_pending;
+       }
+
+       ret = intel_pin_and_fence_fb_obj(fb, primary->state->rotation);
+       if (ret)
+               goto cleanup_pending;
+
+       work->gtt_offset = intel_plane_obj_offset(to_intel_plane(primary),
+                                                 obj, 0);
+       work->gtt_offset += intel_crtc->dspaddr_offset;
+       work->rotation = crtc->primary->state->rotation;
+
+       if (mmio_flip) {
+               INIT_WORK(&work->mmio_work, intel_mmio_flip_work_func);
+
+               i915_gem_request_assign(&work->flip_queued_req,
                                        obj->last_write_req);
 
-       if (obj->base.dma_buf)
-               work->old_plane_state[0]->base.fence = intel_get_excl_fence(obj);
+               schedule_work(&work->mmio_work);
+       } else {
+               i915_gem_request_assign(&work->flip_queued_req, request);
+               ret = dev_priv->display.queue_flip(dev, crtc, fb, obj, request,
+                                                  page_flip_flags);
+               if (ret)
+                       goto cleanup_unpin;
 
-       intel_fbc_pre_update(intel_crtc,
-                            to_intel_crtc_state(new_crtc_state),
-                            to_intel_plane_state(new_state));
+               intel_mark_page_flip_active(intel_crtc, work);
 
-       schedule_work(&work->mmio_work);
+               i915_add_request_no_flush(request);
+       }
 
+       i915_gem_track_fb(intel_fb_obj(old_fb), obj,
+                         to_intel_plane(primary)->frontbuffer_bit);
        mutex_unlock(&dev->struct_mutex);
 
+       intel_frontbuffer_flip_prepare(dev,
+                                      to_intel_plane(primary)->frontbuffer_bit);
+
        trace_i915_flip_request(intel_crtc->plane, obj);
 
        return 0;
 
-cleanup_unlock:
+cleanup_unpin:
+       intel_unpin_fb_obj(fb, crtc->primary->state->rotation);
+cleanup_pending:
+       if (!IS_ERR_OR_NULL(request))
+               i915_add_request_no_flush(request);
+       atomic_dec(&intel_crtc->unpin_work_count);
        mutex_unlock(&dev->struct_mutex);
-cleanup_work:
+cleanup:
+       crtc->primary->fb = old_fb;
+       update_state_fb(crtc->primary);
+
+       drm_gem_object_unreference_unlocked(&obj->base);
+       drm_framebuffer_unreference(work->old_fb);
+
        spin_lock_irq(&dev->event_lock);
-       list_del(&work->head);
+       intel_crtc->flip_work = NULL;
        spin_unlock_irq(&dev->event_lock);
 
-cleanup_vblank:
        drm_crtc_vblank_put(crtc);
-cleanup:
-       if (new_state)
-               intel_plane_destroy_state(primary, new_state);
+free_work:
+       kfree(work);
+
+       if (ret == -EIO) {
+               struct drm_atomic_state *state;
+               struct drm_plane_state *plane_state;
+
+out_hang:
+               state = drm_atomic_state_alloc(dev);
+               if (!state)
+                       return -ENOMEM;
+               state->acquire_ctx = drm_modeset_legacy_acquire_ctx(crtc);
 
-       if (new_crtc_state)
-               intel_crtc_destroy_state(crtc, new_crtc_state);
+retry:
+               plane_state = drm_atomic_get_plane_state(state, primary);
+               ret = PTR_ERR_OR_ZERO(plane_state);
+               if (!ret) {
+                       drm_atomic_set_fb_for_plane(plane_state, fb);
+
+                       ret = drm_atomic_set_crtc_for_plane(plane_state, crtc);
+                       if (!ret)
+                               ret = drm_atomic_commit(state);
+               }
+
+               if (ret == -EDEADLK) {
+                       drm_modeset_backoff(state->acquire_ctx);
+                       drm_atomic_state_clear(state);
+                       goto retry;
+               }
+
+               if (ret)
+                       drm_atomic_state_free(state);
 
-       intel_free_flip_work(work);
+               if (ret == 0 && event) {
+                       spin_lock_irq(&dev->event_lock);
+                       drm_crtc_send_vblank_event(crtc, event);
+                       spin_unlock_irq(&dev->event_lock);
+               }
+       }
        return ret;
 }
 
@@ -11277,12 +11935,12 @@ int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state,
        struct drm_i915_private *dev_priv = to_i915(dev);
        struct intel_plane_state *old_plane_state =
                to_intel_plane_state(plane->state);
-       int idx = intel_crtc->base.base.id, ret;
        bool mode_changed = needs_modeset(crtc_state);
        bool was_crtc_enabled = crtc->state->active;
        bool is_crtc_enabled = crtc_state->active;
        bool turn_off, turn_on, visible, was_visible;
        struct drm_framebuffer *fb = plane_state->fb;
+       int ret;
 
        if (crtc_state && INTEL_INFO(dev)->gen >= 9 &&
            plane->type != DRM_PLANE_TYPE_CURSOR) {
@@ -11321,11 +11979,15 @@ int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state,
        turn_off = was_visible && (!visible || mode_changed);
        turn_on = visible && (!was_visible || mode_changed);
 
-       DRM_DEBUG_ATOMIC("[CRTC:%i] has [PLANE:%i] with fb %i\n", idx,
-                        plane->base.id, fb ? fb->base.id : -1);
+       DRM_DEBUG_ATOMIC("[CRTC:%d:%s] has [PLANE:%d:%s] with fb %i\n",
+                        intel_crtc->base.base.id,
+                        intel_crtc->base.name,
+                        plane->base.id, plane->name,
+                        fb ? fb->base.id : -1);
 
-       DRM_DEBUG_ATOMIC("[PLANE:%i] visible %i -> %i, off %i, on %i, ms %i\n",
-                        plane->base.id, was_visible, visible,
+       DRM_DEBUG_ATOMIC("[PLANE:%d:%s] visible %i -> %i, off %i, on %i, ms %i\n",
+                        plane->base.id, plane->name,
+                        was_visible, visible,
                         turn_off, turn_on, mode_changed);
 
        if (turn_on) {
@@ -11616,7 +12278,8 @@ static void intel_dump_pipe_config(struct intel_crtc *crtc,
        struct intel_plane_state *state;
        struct drm_framebuffer *fb;
 
-       DRM_DEBUG_KMS("[CRTC:%d]%s config %p for pipe %c\n", crtc->base.base.id,
+       DRM_DEBUG_KMS("[CRTC:%d:%s]%s config %p for pipe %c\n",
+                     crtc->base.base.id, crtc->base.name,
                      context, pipe_config, pipe_name(crtc->pipe));
 
        DRM_DEBUG_KMS("cpu_transcoder: %s\n", transcoder_name(pipe_config->cpu_transcoder));
@@ -11717,29 +12380,24 @@ static void intel_dump_pipe_config(struct intel_crtc *crtc,
                state = to_intel_plane_state(plane->state);
                fb = state->base.fb;
                if (!fb) {
-                       DRM_DEBUG_KMS("%s PLANE:%d plane: %u.%u idx: %d "
-                               "disabled, scaler_id = %d\n",
-                               plane->type == DRM_PLANE_TYPE_CURSOR ? "CURSOR" : "STANDARD",
-                               plane->base.id, intel_plane->pipe,
-                               (crtc->base.primary == plane) ? 0 : intel_plane->plane + 1,
-                               drm_plane_index(plane), state->scaler_id);
+                       DRM_DEBUG_KMS("[PLANE:%d:%s] disabled, scaler_id = %d\n",
+                                     plane->base.id, plane->name, state->scaler_id);
                        continue;
                }
 
-               DRM_DEBUG_KMS("%s PLANE:%d plane: %u.%u idx: %d enabled",
-                       plane->type == DRM_PLANE_TYPE_CURSOR ? "CURSOR" : "STANDARD",
-                       plane->base.id, intel_plane->pipe,
-                       crtc->base.primary == plane ? 0 : intel_plane->plane + 1,
-                       drm_plane_index(plane));
-               DRM_DEBUG_KMS("\tFB:%d, fb = %ux%u format = 0x%x",
-                       fb->base.id, fb->width, fb->height, fb->pixel_format);
-               DRM_DEBUG_KMS("\tscaler:%d src (%u, %u) %ux%u dst (%u, %u) %ux%u\n",
-                       state->scaler_id,
-                       state->src.x1 >> 16, state->src.y1 >> 16,
-                       drm_rect_width(&state->src) >> 16,
-                       drm_rect_height(&state->src) >> 16,
-                       state->dst.x1, state->dst.y1,
-                       drm_rect_width(&state->dst), drm_rect_height(&state->dst));
+               DRM_DEBUG_KMS("[PLANE:%d:%s] enabled",
+                             plane->base.id, plane->name);
+               DRM_DEBUG_KMS("\tFB:%d, fb = %ux%u format = %s",
+                             fb->base.id, fb->width, fb->height,
+                             drm_get_format_name(fb->pixel_format));
+               DRM_DEBUG_KMS("\tscaler:%d src %dx%d+%d+%d dst %dx%d+%d+%d\n",
+                             state->scaler_id,
+                             state->src.x1 >> 16, state->src.y1 >> 16,
+                             drm_rect_width(&state->src) >> 16,
+                             drm_rect_height(&state->src) >> 16,
+                             state->dst.x1, state->dst.y1,
+                             drm_rect_width(&state->dst),
+                             drm_rect_height(&state->dst));
        }
 }
 
@@ -12339,8 +12997,7 @@ verify_connector_state(struct drm_device *dev, struct drm_crtc *crtc)
                if (state->crtc != crtc)
                        continue;
 
-               intel_connector_verify_state(to_intel_connector(connector),
-                                            connector->state);
+               intel_connector_verify_state(to_intel_connector(connector));
 
                I915_STATE_WARN(state->best_encoder != encoder,
                     "connector's atomic encoder doesn't match legacy encoder\n");
@@ -12407,7 +13064,7 @@ verify_crtc_state(struct drm_crtc *crtc,
        pipe_config->base.crtc = crtc;
        pipe_config->base.state = old_state;
 
-       DRM_DEBUG_KMS("[CRTC:%d]\n", crtc->base.id);
+       DRM_DEBUG_KMS("[CRTC:%d:%s]\n", crtc->base.id, crtc->name);
 
        active = dev_priv->display.get_pipe_config(intel_crtc, pipe_config);
 
@@ -12542,7 +13199,12 @@ intel_modeset_verify_crtc(struct drm_crtc *crtc,
                         struct drm_crtc_state *old_state,
                         struct drm_crtc_state *new_state)
 {
+       if (!needs_modeset(new_state) &&
+           !to_intel_crtc_state(new_state)->update_pipe)
+               return;
+
        verify_wm_state(crtc, new_state);
+       verify_connector_state(crtc->dev, crtc);
        verify_crtc_state(crtc, old_state, new_state);
        verify_shared_dpll_state(crtc->dev, crtc, old_state, new_state);
 }
@@ -12763,9 +13425,17 @@ static int intel_modeset_checks(struct drm_atomic_state *state)
         * adjusted_mode bits in the crtc directly.
         */
        if (dev_priv->display.modeset_calc_cdclk) {
+               if (!intel_state->cdclk_pll_vco)
+                       intel_state->cdclk_pll_vco = dev_priv->cdclk_pll.vco;
+               if (!intel_state->cdclk_pll_vco)
+                       intel_state->cdclk_pll_vco = dev_priv->skl_preferred_vco_freq;
+
                ret = dev_priv->display.modeset_calc_cdclk(state);
+               if (ret < 0)
+                       return ret;
 
-               if (!ret && intel_state->dev_cdclk != dev_priv->cdclk_freq)
+               if (intel_state->dev_cdclk != dev_priv->cdclk_freq ||
+                   intel_state->cdclk_pll_vco != dev_priv->cdclk_pll.vco)
                        ret = intel_modeset_all_pipes(state);
 
                if (ret < 0)
@@ -12886,33 +13556,11 @@ static int intel_atomic_check(struct drm_device *dev,
        return calc_watermark_data(state);
 }
 
-static bool needs_work(struct drm_crtc_state *crtc_state)
-{
-       /* hw state checker needs to run */
-       if (needs_modeset(crtc_state))
-               return true;
-
-       /* unpin old fb's, possibly vblank update */
-       if (crtc_state->planes_changed)
-               return true;
-
-       /* pipe parameters need to be updated, and hw state checker */
-       if (to_intel_crtc_state(crtc_state)->update_pipe)
-               return true;
-
-       /* vblank event requested? */
-       if (crtc_state->event)
-               return true;
-
-       return false;
-}
-
 static int intel_atomic_prepare_commit(struct drm_device *dev,
                                       struct drm_atomic_state *state,
                                       bool nonblock)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
-       struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
        struct drm_plane_state *plane_state;
        struct drm_crtc_state *crtc_state;
        struct drm_plane *plane;
@@ -12925,44 +13573,15 @@ static int intel_atomic_prepare_commit(struct drm_device *dev,
        }
 
        for_each_crtc_in_state(state, crtc, crtc_state, i) {
-               struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-               struct intel_flip_work *work;
-
-               if (!state->legacy_cursor_update) {
-                       ret = intel_crtc_wait_for_pending_flips(crtc);
-                       if (ret)
-                               return ret;
-
-                       if (atomic_read(&intel_crtc->unpin_work_count) >= 2)
-                               flush_workqueue(dev_priv->wq);
-               }
-
-               /* test if we need to update something */
-               if (!needs_work(crtc_state))
+               if (state->legacy_cursor_update)
                        continue;
 
-               intel_state->work[i] = work =
-                       kzalloc(sizeof(**intel_state->work), GFP_KERNEL);
-
-               if (!work)
-                       return -ENOMEM;
-
-               if (needs_modeset(crtc_state) ||
-                   to_intel_crtc_state(crtc_state)->update_pipe) {
-                       work->num_old_connectors = hweight32(crtc->state->connector_mask);
-
-                       work->old_connector_state = kcalloc(work->num_old_connectors,
-                                                           sizeof(*work->old_connector_state),
-                                                           GFP_KERNEL);
-
-                       work->num_new_connectors = hweight32(crtc_state->connector_mask);
-                       work->new_connector_state = kcalloc(work->num_new_connectors,
-                                                           sizeof(*work->new_connector_state),
-                                                           GFP_KERNEL);
+               ret = intel_crtc_wait_for_pending_flips(crtc);
+               if (ret)
+                       return ret;
 
-                       if (!work->old_connector_state || !work->new_connector_state)
-                               return -ENOMEM;
-               }
+               if (atomic_read(&to_intel_crtc(crtc)->unpin_work_count) >= 2)
+                       flush_workqueue(dev_priv->wq);
        }
 
        ret = mutex_lock_interruptible(&dev->struct_mutex);
@@ -12977,15 +13596,6 @@ static int intel_atomic_prepare_commit(struct drm_device *dev,
                        struct intel_plane_state *intel_plane_state =
                                to_intel_plane_state(plane_state);
 
-                       if (plane_state->fence) {
-                               long lret = fence_wait(plane_state->fence, true);
-
-                               if (lret < 0) {
-                                       ret = lret;
-                                       break;
-                               }
-                       }
-
                        if (!intel_plane_state->wait_req)
                                continue;
 
@@ -13015,126 +13625,69 @@ u32 intel_crtc_get_vblank_counter(struct intel_crtc *crtc)
        return dev->driver->get_vblank_counter(dev, crtc->pipe);
 }
 
-static void intel_prepare_work(struct drm_crtc *crtc,
-                              struct intel_flip_work *work,
-                              struct drm_atomic_state *state,
-                              struct drm_crtc_state *old_crtc_state)
+static void intel_atomic_wait_for_vblanks(struct drm_device *dev,
+                                         struct drm_i915_private *dev_priv,
+                                         unsigned crtc_mask)
 {
-       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-       struct drm_plane_state *old_plane_state;
-       struct drm_plane *plane;
-       int i, j = 0;
+       unsigned last_vblank_count[I915_MAX_PIPES];
+       enum pipe pipe;
+       int ret;
 
-       INIT_WORK(&work->unpin_work, intel_unpin_work_fn);
-       INIT_WORK(&work->mmio_work, intel_mmio_flip_work_func);
-       atomic_inc(&intel_crtc->unpin_work_count);
+       if (!crtc_mask)
+               return;
 
-       for_each_plane_in_state(state, plane, old_plane_state, i) {
-               struct intel_plane_state *old_state = to_intel_plane_state(old_plane_state);
-               struct intel_plane_state *new_state = to_intel_plane_state(plane->state);
+       for_each_pipe(dev_priv, pipe) {
+               struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe];
 
-               if (old_state->base.crtc != crtc &&
-                   new_state->base.crtc != crtc)
+               if (!((1 << pipe) & crtc_mask))
                        continue;
 
-               if (plane->type == DRM_PLANE_TYPE_PRIMARY) {
-                       plane->fb = new_state->base.fb;
-                       crtc->x = new_state->base.src_x >> 16;
-                       crtc->y = new_state->base.src_y >> 16;
+               ret = drm_crtc_vblank_get(crtc);
+               if (WARN_ON(ret != 0)) {
+                       crtc_mask &= ~(1 << pipe);
+                       continue;
                }
 
-               old_state->wait_req = new_state->wait_req;
-               new_state->wait_req = NULL;
-
-               old_state->base.fence = new_state->base.fence;
-               new_state->base.fence = NULL;
-
-               /* remove plane state from the atomic state and move it to work */
-               old_plane_state->state = NULL;
-               state->planes[i] = NULL;
-               state->plane_states[i] = NULL;
-
-               work->old_plane_state[j] = old_state;
-               work->new_plane_state[j++] = new_state;
+               last_vblank_count[pipe] = drm_crtc_vblank_count(crtc);
        }
 
-       old_crtc_state->state = NULL;
-       state->crtcs[drm_crtc_index(crtc)] = NULL;
-       state->crtc_states[drm_crtc_index(crtc)] = NULL;
-
-       work->old_crtc_state = to_intel_crtc_state(old_crtc_state);
-       work->new_crtc_state = to_intel_crtc_state(crtc->state);
-       work->num_planes = j;
-
-       work->event = crtc->state->event;
-       crtc->state->event = NULL;
-
-       if (needs_modeset(crtc->state) || work->new_crtc_state->update_pipe) {
-               struct drm_connector *conn;
-               struct drm_connector_state *old_conn_state;
-               int k = 0;
-
-               j = 0;
-
-               /*
-                * intel_unpin_work_fn cannot depend on the connector list
-                * because it may be freed from underneath it, so add
-                * them all to the work struct while we're holding locks.
-                */
-               for_each_connector_in_state(state, conn, old_conn_state, i) {
-                       if (old_conn_state->crtc == crtc) {
-                               work->old_connector_state[j++] = old_conn_state;
-
-                               state->connectors[i] = NULL;
-                               state->connector_states[i] = NULL;
-                       }
-               }
-
-               /* If another crtc has stolen the connector from state,
-                * then for_each_connector_in_state is no longer reliable,
-                * so use drm_for_each_connector here.
-                */
-               drm_for_each_connector(conn, state->dev)
-                       if (conn->state->crtc == crtc)
-                               work->new_connector_state[k++] = conn->state;
-
-               WARN(j != work->num_old_connectors, "j = %i, expected %i\n", j, work->num_old_connectors);
-               WARN(k != work->num_new_connectors, "k = %i, expected %i\n", k, work->num_new_connectors);
-       } else if (!work->new_crtc_state->update_wm_post)
-               work->can_async_unpin = true;
+       for_each_pipe(dev_priv, pipe) {
+               struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe];
+               long lret;
 
-       work->fb_bits = work->new_crtc_state->fb_bits;
-}
+               if (!((1 << pipe) & crtc_mask))
+                       continue;
 
-static void intel_schedule_unpin(struct drm_crtc *crtc,
-                                struct intel_atomic_state *state,
-                                struct intel_flip_work *work)
-{
-       struct drm_device *dev = crtc->dev;
-       struct drm_i915_private *dev_priv = dev->dev_private;
+               lret = wait_event_timeout(dev->vblank[pipe].queue,
+                               last_vblank_count[pipe] !=
+                                       drm_crtc_vblank_count(crtc),
+                               msecs_to_jiffies(50));
 
-       to_intel_crtc(crtc)->config = work->new_crtc_state;
+               WARN(!lret, "pipe %c vblank wait timed out\n", pipe_name(pipe));
 
-       queue_work(dev_priv->wq, &work->unpin_work);
+               drm_crtc_vblank_put(crtc);
+       }
 }
 
-static void intel_schedule_update(struct drm_crtc *crtc,
-                                 struct intel_atomic_state *state,
-                                 struct intel_flip_work *work)
+static bool needs_vblank_wait(struct intel_crtc_state *crtc_state)
 {
-       struct drm_device *dev = crtc->dev;
+       /* fb updated, need to unpin old fb */
+       if (crtc_state->fb_changed)
+               return true;
 
-       if (work->can_async_unpin) {
-               INIT_LIST_HEAD(&work->head);
-               intel_schedule_unpin(crtc, state, work);
-               return;
-       }
+       /* wm changes, need vblank before final wm's */
+       if (crtc_state->update_wm_post)
+               return true;
 
-       spin_lock_irq(&dev->event_lock);
-       list_add_tail(&work->head, &to_intel_crtc(crtc)->flip_work);
-       spin_unlock_irq(&dev->event_lock);
+       /*
+        * cxsr is re-enabled after vblank.
+        * This is already handled by crtc_state->update_wm_post,
+        * but added for clarity.
+        */
+       if (crtc_state->disable_cxsr)
+               return true;
 
-       intel_schedule_unpin(crtc, state, work);
+       return false;
 }
 
 /**
@@ -13161,7 +13714,11 @@ static int intel_atomic_commit(struct drm_device *dev,
        struct drm_i915_private *dev_priv = dev->dev_private;
        struct drm_crtc_state *old_crtc_state;
        struct drm_crtc *crtc;
+       struct intel_crtc_state *intel_cstate;
        int ret = 0, i;
+       bool hw_check = intel_state->modeset;
+       unsigned long put_domains[I915_MAX_PIPES] = {};
+       unsigned crtc_vblank_mask = 0;
 
        ret = intel_atomic_prepare_commit(dev, state, nonblock);
        if (ret) {
@@ -13179,20 +13736,27 @@ static int intel_atomic_commit(struct drm_device *dev,
                       sizeof(intel_state->min_pixclk));
                dev_priv->active_crtcs = intel_state->active_crtcs;
                dev_priv->atomic_cdclk_freq = intel_state->cdclk;
+
+               intel_display_power_get(dev_priv, POWER_DOMAIN_MODESET);
        }
 
        for_each_crtc_in_state(state, crtc, old_crtc_state, i) {
                struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 
+               if (needs_modeset(crtc->state) ||
+                   to_intel_crtc_state(crtc->state)->update_pipe) {
+                       hw_check = true;
+
+                       put_domains[to_intel_crtc(crtc)->pipe] =
+                               modeset_get_crtc_power_domains(crtc,
+                                       to_intel_crtc_state(crtc->state));
+               }
+
                if (!needs_modeset(crtc->state))
                        continue;
 
                intel_pre_plane_update(to_intel_crtc_state(old_crtc_state));
 
-               intel_state->work[i]->put_power_domains =
-                       modeset_get_crtc_power_domains(crtc,
-                               to_intel_crtc_state(crtc->state));
-
                if (old_crtc_state->active) {
                        intel_crtc_disable_planes(crtc, old_crtc_state->plane_mask);
                        dev_priv->display.crtc_disable(crtc);
@@ -13220,7 +13784,8 @@ static int intel_atomic_commit(struct drm_device *dev,
                drm_atomic_helper_update_legacy_modeset_state(state->dev, state);
 
                if (dev_priv->display.modeset_commit_cdclk &&
-                   intel_state->dev_cdclk != dev_priv->cdclk_freq)
+                   (intel_state->dev_cdclk != dev_priv->cdclk_freq ||
+                    intel_state->cdclk_pll_vco != dev_priv->cdclk_pll.vco))
                        dev_priv->display.modeset_commit_cdclk(state);
 
                intel_modeset_verify_disabled(dev);
@@ -13244,42 +13809,51 @@ static int intel_atomic_commit(struct drm_device *dev,
 
                if (crtc->state->active &&
                    drm_atomic_get_existing_plane_state(state, crtc->primary))
-                       intel_fbc_enable(intel_crtc, pipe_config, to_intel_plane_state(crtc->primary->state));
+                       intel_fbc_enable(intel_crtc);
 
                if (crtc->state->active &&
                    (crtc->state->planes_changed || update_pipe))
                        drm_atomic_helper_commit_planes_on_crtc(old_crtc_state);
+
+               if (pipe_config->base.active && needs_vblank_wait(pipe_config))
+                       crtc_vblank_mask |= 1 << i;
        }
 
        /* FIXME: add subpixel order */
 
+       if (!state->legacy_cursor_update)
+               intel_atomic_wait_for_vblanks(dev, dev_priv, crtc_vblank_mask);
+
+       /*
+        * Now that the vblank has passed, we can go ahead and program the
+        * optimal watermarks on platforms that need two-step watermark
+        * programming.
+        *
+        * TODO: Move this (and other cleanup) to an async worker eventually.
+        */
        for_each_crtc_in_state(state, crtc, old_crtc_state, i) {
-               struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-               struct intel_flip_work *work =
-                       intel_state->work[i];
-
-               if (!work) {
-                       if (!list_empty_careful(&intel_crtc->flip_work)) {
-                               spin_lock_irq(&dev->event_lock);
-                               if (!list_empty(&intel_crtc->flip_work))
-                                       work = list_last_entry(&intel_crtc->flip_work,
-                                                              struct intel_flip_work, head);
-
-                               if (work && work->new_crtc_state == to_intel_crtc_state(old_crtc_state)) {
-                                       work->free_new_crtc_state = true;
-                                       state->crtc_states[i] = NULL;
-                                       state->crtcs[i] = NULL;
-                               }
-                               spin_unlock_irq(&dev->event_lock);
-                       }
-                       continue;
-               }
+               intel_cstate = to_intel_crtc_state(crtc->state);
+
+               if (dev_priv->display.optimize_watermarks)
+                       dev_priv->display.optimize_watermarks(intel_cstate);
+       }
+
+       for_each_crtc_in_state(state, crtc, old_crtc_state, i) {
+               intel_post_plane_update(to_intel_crtc_state(old_crtc_state));
+
+               if (put_domains[i])
+                       modeset_put_power_domains(dev_priv, put_domains[i]);
 
-               intel_state->work[i] = NULL;
-               intel_prepare_work(crtc, work, state, old_crtc_state);
-               intel_schedule_update(crtc, intel_state, work);
+               intel_modeset_verify_crtc(crtc, old_crtc_state, crtc->state);
        }
 
+       if (intel_state->modeset)
+               intel_display_power_put(dev_priv, POWER_DOMAIN_MODESET);
+
+       mutex_lock(&dev->struct_mutex);
+       drm_atomic_helper_cleanup_planes(dev, state);
+       mutex_unlock(&dev->struct_mutex);
+
        drm_atomic_state_free(state);
 
        /* As one of the primary mmio accessors, KMS has a high likelihood
@@ -13307,8 +13881,8 @@ void intel_crtc_restore_mode(struct drm_crtc *crtc)
 
        state = drm_atomic_state_alloc(dev);
        if (!state) {
-               DRM_DEBUG_KMS("[CRTC:%d] crtc restore failed, out of memory",
-                             crtc->base.id);
+               DRM_DEBUG_KMS("[CRTC:%d:%s] crtc restore failed, out of memory",
+                             crtc->base.id, crtc->name);
                return;
        }
 
@@ -13371,20 +13945,11 @@ intel_prepare_plane_fb(struct drm_plane *plane,
        struct intel_plane *intel_plane = to_intel_plane(plane);
        struct drm_i915_gem_object *obj = intel_fb_obj(fb);
        struct drm_i915_gem_object *old_obj = intel_fb_obj(plane->state->fb);
-       struct drm_crtc *crtc = new_state->crtc ?: plane->state->crtc;
        int ret = 0;
 
        if (!obj && !old_obj)
                return 0;
 
-       if (WARN_ON(!new_state->state) || WARN_ON(!crtc) ||
-           WARN_ON(!to_intel_atomic_state(new_state->state)->work[to_intel_crtc(crtc)->pipe])) {
-               if (WARN_ON(old_obj != obj))
-                       return -EINVAL;
-
-               return 0;
-       }
-
        if (old_obj) {
                struct drm_crtc_state *crtc_state =
                        drm_atomic_get_existing_crtc_state(new_state->state, plane->state->crtc);
@@ -13409,6 +13974,19 @@ intel_prepare_plane_fb(struct drm_plane *plane,
                }
        }
 
+       /* For framebuffer backed by dmabuf, wait for fence */
+       if (obj && obj->base.dma_buf) {
+               long lret;
+
+               lret = reservation_object_wait_timeout_rcu(obj->base.dma_buf->resv,
+                                                          false, true,
+                                                          MAX_SCHEDULE_TIMEOUT);
+               if (lret == -ERESTARTSYS)
+                       return lret;
+
+               WARN(lret < 0, "waiting returns %li\n", lret);
+       }
+
        if (!obj) {
                ret = 0;
        } else if (plane->type == DRM_PLANE_TYPE_CURSOR &&
@@ -13428,8 +14006,6 @@ intel_prepare_plane_fb(struct drm_plane *plane,
 
                        i915_gem_request_assign(&plane_state->wait_req,
                                                obj->last_write_req);
-
-                       plane_state->base.fence = intel_get_excl_fence(obj);
                }
 
                i915_gem_track_fb(old_obj, obj, intel_plane->frontbuffer_bit);
@@ -13472,9 +14048,6 @@ intel_cleanup_plane_fb(struct drm_plane *plane,
                i915_gem_track_fb(old_obj, obj, intel_plane->frontbuffer_bit);
 
        i915_gem_request_assign(&old_intel_state->wait_req, NULL);
-
-       fence_put(old_intel_state->base.fence);
-       old_intel_state->base.fence = NULL;
 }
 
 int
@@ -13543,8 +14116,6 @@ static void intel_begin_crtc_commit(struct drm_crtc *crtc,
                to_intel_crtc_state(old_crtc_state);
        bool modeset = needs_modeset(crtc->state);
 
-       intel_frontbuffer_flip_prepare(dev, to_intel_crtc_state(crtc->state)->fb_bits);
-
        /* Perform vblank evasion around commit operation */
        intel_pipe_update_start(intel_crtc);
 
@@ -13579,9 +14150,11 @@ static void intel_finish_crtc_commit(struct drm_crtc *crtc,
  */
 void intel_plane_destroy(struct drm_plane *plane)
 {
-       struct intel_plane *intel_plane = to_intel_plane(plane);
+       if (!plane)
+               return;
+
        drm_plane_cleanup(plane);
-       kfree(intel_plane);
+       kfree(to_intel_plane(plane));
 }
 
 const struct drm_plane_funcs intel_plane_funcs = {
@@ -13653,10 +14226,24 @@ static struct drm_plane *intel_primary_plane_create(struct drm_device *dev,
                primary->disable_plane = i9xx_disable_primary_plane;
        }
 
-       ret = drm_universal_plane_init(dev, &primary->base, 0,
-                                      &intel_plane_funcs,
-                                      intel_primary_formats, num_formats,
-                                      DRM_PLANE_TYPE_PRIMARY, NULL);
+       if (INTEL_INFO(dev)->gen >= 9)
+               ret = drm_universal_plane_init(dev, &primary->base, 0,
+                                              &intel_plane_funcs,
+                                              intel_primary_formats, num_formats,
+                                              DRM_PLANE_TYPE_PRIMARY,
+                                              "plane 1%c", pipe_name(pipe));
+       else if (INTEL_INFO(dev)->gen >= 5 || IS_G4X(dev))
+               ret = drm_universal_plane_init(dev, &primary->base, 0,
+                                              &intel_plane_funcs,
+                                              intel_primary_formats, num_formats,
+                                              DRM_PLANE_TYPE_PRIMARY,
+                                              "primary %c", pipe_name(pipe));
+       else
+               ret = drm_universal_plane_init(dev, &primary->base, 0,
+                                              &intel_plane_funcs,
+                                              intel_primary_formats, num_formats,
+                                              DRM_PLANE_TYPE_PRIMARY,
+                                              "plane %c", plane_name(primary->plane));
        if (ret)
                goto fail;
 
@@ -13814,7 +14401,8 @@ static struct drm_plane *intel_cursor_plane_create(struct drm_device *dev,
                                       &intel_plane_funcs,
                                       intel_cursor_formats,
                                       ARRAY_SIZE(intel_cursor_formats),
-                                      DRM_PLANE_TYPE_CURSOR, NULL);
+                                      DRM_PLANE_TYPE_CURSOR,
+                                      "cursor %c", pipe_name(pipe));
        if (ret)
                goto fail;
 
@@ -13880,8 +14468,6 @@ static void intel_crtc_init(struct drm_device *dev, int pipe)
        intel_crtc->base.state = &crtc_state->base;
        crtc_state->base.crtc = &intel_crtc->base;
 
-       INIT_LIST_HEAD(&intel_crtc->flip_work);
-
        /* initialize shared scalers */
        if (INTEL_INFO(dev)->gen >= 9) {
                if (pipe == PIPE_C)
@@ -13901,7 +14487,8 @@ static void intel_crtc_init(struct drm_device *dev, int pipe)
                goto fail;
 
        ret = drm_crtc_init_with_planes(dev, &intel_crtc->base, primary,
-                                       cursor, &intel_crtc_funcs, NULL);
+                                       cursor, &intel_crtc_funcs,
+                                       "pipe %c", pipe_name(pipe));
        if (ret)
                goto fail;
 
@@ -13935,10 +14522,8 @@ static void intel_crtc_init(struct drm_device *dev, int pipe)
        return;
 
 fail:
-       if (primary)
-               drm_plane_cleanup(primary);
-       if (cursor)
-               drm_plane_cleanup(cursor);
+       intel_plane_destroy(primary);
+       intel_plane_destroy(cursor);
        kfree(crtc_state);
        kfree(intel_crtc);
 }
@@ -14631,6 +15216,39 @@ void intel_init_display_hooks(struct drm_i915_private *dev_priv)
                        broxton_modeset_commit_cdclk;
                dev_priv->display.modeset_calc_cdclk =
                        broxton_modeset_calc_cdclk;
+       } else if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) {
+               dev_priv->display.modeset_commit_cdclk =
+                       skl_modeset_commit_cdclk;
+               dev_priv->display.modeset_calc_cdclk =
+                       skl_modeset_calc_cdclk;
+       }
+
+       switch (INTEL_INFO(dev_priv)->gen) {
+       case 2:
+               dev_priv->display.queue_flip = intel_gen2_queue_flip;
+               break;
+
+       case 3:
+               dev_priv->display.queue_flip = intel_gen3_queue_flip;
+               break;
+
+       case 4:
+       case 5:
+               dev_priv->display.queue_flip = intel_gen4_queue_flip;
+               break;
+
+       case 6:
+               dev_priv->display.queue_flip = intel_gen6_queue_flip;
+               break;
+       case 7:
+       case 8: /* FIXME(BDW): Check that the gen8 RCS flip works. */
+               dev_priv->display.queue_flip = intel_gen7_queue_flip;
+               break;
+       case 9:
+               /* Drop through - unsupported since execlist only. */
+       default:
+               /* Default just returns -ENODEV to indicate unsupported */
+               dev_priv->display.queue_flip = intel_default_queue_flip;
        }
 }
 
@@ -15001,6 +15619,9 @@ void intel_modeset_init(struct drm_device *dev)
 
        intel_shared_dpll_init(dev);
 
+       if (dev_priv->max_cdclk_freq == 0)
+               intel_update_max_cdclk(dev);
+
        /* Just disable it once at startup */
        i915_disable_vga(dev);
        intel_setup_outputs(dev);
@@ -15141,8 +15762,8 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc)
        if (INTEL_INFO(dev)->gen < 4 && !intel_check_plane_mapping(crtc)) {
                bool plane;
 
-               DRM_DEBUG_KMS("[CRTC:%d] wrong plane connection detected!\n",
-                             crtc->base.base.id);
+               DRM_DEBUG_KMS("[CRTC:%d:%s] wrong plane connection detected!\n",
+                             crtc->base.base.id, crtc->base.name);
 
                /* Pipe has the wrong plane attached and the plane is active.
                 * Temporarily change the plane mapping and disable everything
@@ -15310,26 +15931,24 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev)
                if (crtc_state->base.active) {
                        dev_priv->active_crtcs |= 1 << crtc->pipe;
 
-                       if (IS_BROADWELL(dev_priv)) {
+                       if (INTEL_GEN(dev_priv) >= 9 || IS_BROADWELL(dev_priv))
                                pixclk = ilk_pipe_pixel_rate(crtc_state);
-
-                               /* pixel rate mustn't exceed 95% of cdclk with IPS on BDW */
-                               if (crtc_state->ips_enabled)
-                                       pixclk = DIV_ROUND_UP(pixclk * 100, 95);
-                       } else if (IS_VALLEYVIEW(dev_priv) ||
-                                  IS_CHERRYVIEW(dev_priv) ||
-                                  IS_BROXTON(dev_priv))
+                       else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
                                pixclk = crtc_state->base.adjusted_mode.crtc_clock;
                        else
                                WARN_ON(dev_priv->display.modeset_calc_cdclk);
+
+                       /* pixel rate mustn't exceed 95% of cdclk with IPS on BDW */
+                       if (IS_BROADWELL(dev_priv) && crtc_state->ips_enabled)
+                               pixclk = DIV_ROUND_UP(pixclk * 100, 95);
                }
 
                dev_priv->min_pixclk[crtc->pipe] = pixclk;
 
                readout_plane_state(crtc);
 
-               DRM_DEBUG_KMS("[CRTC:%d] hw state readout: %s\n",
-                             crtc->base.base.id,
+               DRM_DEBUG_KMS("[CRTC:%d:%s] hw state readout: %s\n",
+                             crtc->base.base.id, crtc->base.name,
                              crtc->active ? "enabled" : "disabled");
        }
 
@@ -15589,9 +16208,9 @@ void intel_modeset_gem_init(struct drm_device *dev)
                        DRM_ERROR("failed to pin boot fb on pipe %d\n",
                                  to_intel_crtc(c)->pipe);
                        drm_framebuffer_unreference(c->primary->fb);
-                       drm_framebuffer_unreference(c->primary->state->fb);
-                       c->primary->fb = c->primary->state->fb = NULL;
+                       c->primary->fb = NULL;
                        c->primary->crtc = c->primary->state->crtc = NULL;
+                       update_state_fb(c->primary);
                        c->state->plane_mask &= ~(1 << drm_plane_index(c->primary));
                }
        }