]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blobdiff - drivers/gpu/drm/radeon/evergreen_cs.c
drm/radeon/kms: skip db/cb/streamout checking when possible on evergreen
[mirror_ubuntu-jammy-kernel.git] / drivers / gpu / drm / radeon / evergreen_cs.c
index 4674a688ad40bef06b1f10d699d35ae522e479a0..7327bc7b7df50a1388cebabf28c7058e93310aca 100644 (file)
@@ -86,6 +86,9 @@ struct evergreen_cs_track {
        struct radeon_bo        *db_s_read_bo;
        struct radeon_bo        *db_s_write_bo;
        bool                    sx_misc_kill_all_prims;
+       bool                    cb_dirty;
+       bool                    db_dirty;
+       bool                    streamout_dirty;
 };
 
 static u32 evergreen_cs_get_aray_mode(u32 tiling_flags)
@@ -139,6 +142,7 @@ static void evergreen_cs_track_init(struct evergreen_cs_track *track)
        }
        track->cb_target_mask = 0xFFFFFFFF;
        track->cb_shader_mask = 0xFFFFFFFF;
+       track->cb_dirty = true;
 
        track->db_depth_view = 0xFFFFC000;
        track->db_depth_size = 0xFFFFFFFF;
@@ -156,6 +160,7 @@ static void evergreen_cs_track_init(struct evergreen_cs_track *track)
        track->db_s_write_offset = 0xFFFFFFFF;
        track->db_s_read_bo = NULL;
        track->db_s_write_bo = NULL;
+       track->db_dirty = true;
 
        for (i = 0; i < 4; i++) {
                track->vgt_strmout_size[i] = 0;
@@ -163,6 +168,7 @@ static void evergreen_cs_track_init(struct evergreen_cs_track *track)
                track->vgt_strmout_bo_offset[i] = 0xFFFFFFFF;
                track->vgt_strmout_bo_mc[i] = 0xFFFFFFFF;
        }
+       track->streamout_dirty = true;
        track->sx_misc_kill_all_prims = false;
 }
 
@@ -797,30 +803,36 @@ static int evergreen_cs_track_validate_texture(struct radeon_cs_parser *p,
 static int evergreen_cs_track_check(struct radeon_cs_parser *p)
 {
        struct evergreen_cs_track *track = p->track;
-       unsigned tmp, i, j;
+       unsigned tmp, i;
        int r;
+       unsigned buffer_mask = 0;
 
        /* check streamout */
-       for (i = 0; i < 4; i++) {
-               if (track->vgt_strmout_config & (1 << i)) {
-                       for (j = 0; j < 4; j++) {
-                               if ((track->vgt_strmout_buffer_config >> (i * 4)) & (1 << j)) {
-                                       if (track->vgt_strmout_bo[j]) {
-                                               u64 offset = (u64)track->vgt_strmout_bo_offset[j] +
-                                                       (u64)track->vgt_strmout_size[j];
-                                               if (offset > radeon_bo_size(track->vgt_strmout_bo[i])) {
-                                                       DRM_ERROR("streamout %d bo too small: 0x%llx, 0x%lx\n",
-                                                                 j, offset,
-                                                                 radeon_bo_size(track->vgt_strmout_bo[j]));
-                                                       return -EINVAL;
-                                               }
-                                       } else {
-                                               dev_warn(p->dev, "No buffer for streamout %d\n", j);
+       if (track->streamout_dirty && track->vgt_strmout_config) {
+               for (i = 0; i < 4; i++) {
+                       if (track->vgt_strmout_config & (1 << i)) {
+                               buffer_mask |= (track->vgt_strmout_buffer_config >> (i * 4)) & 0xf;
+                       }
+               }
+
+               for (i = 0; i < 4; i++) {
+                       if (buffer_mask & (1 << i)) {
+                               if (track->vgt_strmout_bo[i]) {
+                                       u64 offset = (u64)track->vgt_strmout_bo_offset[i] +
+                                                       (u64)track->vgt_strmout_size[i];
+                                       if (offset > radeon_bo_size(track->vgt_strmout_bo[i])) {
+                                               DRM_ERROR("streamout %d bo too small: 0x%llx, 0x%lx\n",
+                                                         i, offset,
+                                                         radeon_bo_size(track->vgt_strmout_bo[i]));
                                                return -EINVAL;
                                        }
+                               } else {
+                                       dev_warn(p->dev, "No buffer for streamout %d\n", i);
+                                       return -EINVAL;
                                }
                        }
                }
+               track->streamout_dirty = false;
        }
 
        if (track->sx_misc_kill_all_prims)
@@ -828,34 +840,40 @@ static int evergreen_cs_track_check(struct radeon_cs_parser *p)
 
        /* check that we have a cb for each enabled target
         */
-       tmp = track->cb_target_mask;
-       for (i = 0; i < 8; i++) {
-               if ((tmp >> (i * 4)) & 0xF) {
-                       /* at least one component is enabled */
-                       if (track->cb_color_bo[i] == NULL) {
-                               dev_warn(p->dev, "%s:%d mask 0x%08X | 0x%08X no cb for %d\n",
-                                       __func__, __LINE__, track->cb_target_mask, track->cb_shader_mask, i);
-                               return -EINVAL;
-                       }
-                       /* check cb */
-                       r = evergreen_cs_track_validate_cb(p, i);
-                       if (r) {
-                               return r;
+       if (track->cb_dirty) {
+               tmp = track->cb_target_mask;
+               for (i = 0; i < 8; i++) {
+                       if ((tmp >> (i * 4)) & 0xF) {
+                               /* at least one component is enabled */
+                               if (track->cb_color_bo[i] == NULL) {
+                                       dev_warn(p->dev, "%s:%d mask 0x%08X | 0x%08X no cb for %d\n",
+                                               __func__, __LINE__, track->cb_target_mask, track->cb_shader_mask, i);
+                                       return -EINVAL;
+                               }
+                               /* check cb */
+                               r = evergreen_cs_track_validate_cb(p, i);
+                               if (r) {
+                                       return r;
+                               }
                        }
                }
+               track->cb_dirty = false;
        }
 
-       /* Check stencil buffer */
-       if (G_028800_STENCIL_ENABLE(track->db_depth_control)) {
-               r = evergreen_cs_track_validate_stencil(p);
-               if (r)
-                       return r;
-       }
-       /* Check depth buffer */
-       if (G_028800_Z_WRITE_ENABLE(track->db_depth_control)) {
-               r = evergreen_cs_track_validate_depth(p);
-               if (r)
-                       return r;
+       if (track->db_dirty) {
+               /* Check stencil buffer */
+               if (G_028800_STENCIL_ENABLE(track->db_depth_control)) {
+                       r = evergreen_cs_track_validate_stencil(p);
+                       if (r)
+                               return r;
+               }
+               /* Check depth buffer */
+               if (G_028800_Z_WRITE_ENABLE(track->db_depth_control)) {
+                       r = evergreen_cs_track_validate_depth(p);
+                       if (r)
+                               return r;
+               }
+               track->db_dirty = false;
        }
 
        return 0;
@@ -1189,6 +1207,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
                break;
        case DB_DEPTH_CONTROL:
                track->db_depth_control = radeon_get_ib_value(p, idx);
+               track->db_dirty = true;
                break;
        case CAYMAN_DB_EQAA:
                if (p->rdev->family < CHIP_CAYMAN) {
@@ -1230,19 +1249,24 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
                                                DB_MACRO_TILE_ASPECT(mtaspect);
                        }
                }
+               track->db_dirty = true;
                break;
        case DB_STENCIL_INFO:
                track->db_s_info = radeon_get_ib_value(p, idx);
+               track->db_dirty = true;
                break;
        case DB_DEPTH_VIEW:
                track->db_depth_view = radeon_get_ib_value(p, idx);
+               track->db_dirty = true;
                break;
        case DB_DEPTH_SIZE:
                track->db_depth_size = radeon_get_ib_value(p, idx);
                track->db_depth_size_idx = idx;
+               track->db_dirty = true;
                break;
        case R_02805C_DB_DEPTH_SLICE:
                track->db_depth_slice = radeon_get_ib_value(p, idx);
+               track->db_dirty = true;
                break;
        case DB_Z_READ_BASE:
                r = evergreen_cs_packet_next_reloc(p, &reloc);
@@ -1254,6 +1278,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
                track->db_z_read_offset = radeon_get_ib_value(p, idx);
                ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
                track->db_z_read_bo = reloc->robj;
+               track->db_dirty = true;
                break;
        case DB_Z_WRITE_BASE:
                r = evergreen_cs_packet_next_reloc(p, &reloc);
@@ -1265,6 +1290,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
                track->db_z_write_offset = radeon_get_ib_value(p, idx);
                ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
                track->db_z_write_bo = reloc->robj;
+               track->db_dirty = true;
                break;
        case DB_STENCIL_READ_BASE:
                r = evergreen_cs_packet_next_reloc(p, &reloc);
@@ -1276,6 +1302,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
                track->db_s_read_offset = radeon_get_ib_value(p, idx);
                ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
                track->db_s_read_bo = reloc->robj;
+               track->db_dirty = true;
                break;
        case DB_STENCIL_WRITE_BASE:
                r = evergreen_cs_packet_next_reloc(p, &reloc);
@@ -1287,12 +1314,15 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
                track->db_s_write_offset = radeon_get_ib_value(p, idx);
                ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
                track->db_s_write_bo = reloc->robj;
+               track->db_dirty = true;
                break;
        case VGT_STRMOUT_CONFIG:
                track->vgt_strmout_config = radeon_get_ib_value(p, idx);
+               track->streamout_dirty = true;
                break;
        case VGT_STRMOUT_BUFFER_CONFIG:
                track->vgt_strmout_buffer_config = radeon_get_ib_value(p, idx);
+               track->streamout_dirty = true;
                break;
        case VGT_STRMOUT_BUFFER_BASE_0:
        case VGT_STRMOUT_BUFFER_BASE_1:
@@ -1309,6 +1339,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
                ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
                track->vgt_strmout_bo[tmp] = reloc->robj;
                track->vgt_strmout_bo_mc[tmp] = reloc->lobj.gpu_offset;
+               track->streamout_dirty = true;
                break;
        case VGT_STRMOUT_BUFFER_SIZE_0:
        case VGT_STRMOUT_BUFFER_SIZE_1:
@@ -1317,6 +1348,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
                tmp = (reg - VGT_STRMOUT_BUFFER_SIZE_0) / 16;
                /* size in register is DWs, convert to bytes */
                track->vgt_strmout_size[tmp] = radeon_get_ib_value(p, idx) * 4;
+               track->streamout_dirty = true;
                break;
        case CP_COHER_BASE:
                r = evergreen_cs_packet_next_reloc(p, &reloc);
@@ -1328,9 +1360,11 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
                ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
        case CB_TARGET_MASK:
                track->cb_target_mask = radeon_get_ib_value(p, idx);
+               track->cb_dirty = true;
                break;
        case CB_SHADER_MASK:
                track->cb_shader_mask = radeon_get_ib_value(p, idx);
+               track->cb_dirty = true;
                break;
        case PA_SC_AA_CONFIG:
                if (p->rdev->family >= CHIP_CAYMAN) {
@@ -1360,6 +1394,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
        case CB_COLOR7_VIEW:
                tmp = (reg - CB_COLOR0_VIEW) / 0x3c;
                track->cb_color_view[tmp] = radeon_get_ib_value(p, idx);
+               track->cb_dirty = true;
                break;
        case CB_COLOR8_VIEW:
        case CB_COLOR9_VIEW:
@@ -1367,6 +1402,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
        case CB_COLOR11_VIEW:
                tmp = ((reg - CB_COLOR8_VIEW) / 0x1c) + 8;
                track->cb_color_view[tmp] = radeon_get_ib_value(p, idx);
+               track->cb_dirty = true;
                break;
        case CB_COLOR0_INFO:
        case CB_COLOR1_INFO:
@@ -1388,6 +1424,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
                        ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags));
                        track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags));
                }
+               track->cb_dirty = true;
                break;
        case CB_COLOR8_INFO:
        case CB_COLOR9_INFO:
@@ -1405,6 +1442,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
                        ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags));
                        track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags));
                }
+               track->cb_dirty = true;
                break;
        case CB_COLOR0_PITCH:
        case CB_COLOR1_PITCH:
@@ -1417,6 +1455,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
                tmp = (reg - CB_COLOR0_PITCH) / 0x3c;
                track->cb_color_pitch[tmp] = radeon_get_ib_value(p, idx);
                track->cb_color_pitch_idx[tmp] = idx;
+               track->cb_dirty = true;
                break;
        case CB_COLOR8_PITCH:
        case CB_COLOR9_PITCH:
@@ -1425,6 +1464,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
                tmp = ((reg - CB_COLOR8_PITCH) / 0x1c) + 8;
                track->cb_color_pitch[tmp] = radeon_get_ib_value(p, idx);
                track->cb_color_pitch_idx[tmp] = idx;
+               track->cb_dirty = true;
                break;
        case CB_COLOR0_SLICE:
        case CB_COLOR1_SLICE:
@@ -1437,6 +1477,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
                tmp = (reg - CB_COLOR0_SLICE) / 0x3c;
                track->cb_color_slice[tmp] = radeon_get_ib_value(p, idx);
                track->cb_color_slice_idx[tmp] = idx;
+               track->cb_dirty = true;
                break;
        case CB_COLOR8_SLICE:
        case CB_COLOR9_SLICE:
@@ -1445,6 +1486,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
                tmp = ((reg - CB_COLOR8_SLICE) / 0x1c) + 8;
                track->cb_color_slice[tmp] = radeon_get_ib_value(p, idx);
                track->cb_color_slice_idx[tmp] = idx;
+               track->cb_dirty = true;
                break;
        case CB_COLOR0_ATTRIB:
        case CB_COLOR1_ATTRIB:
@@ -1476,6 +1518,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
                }
                tmp = ((reg - CB_COLOR0_ATTRIB) / 0x3c);
                track->cb_color_attrib[tmp] = ib[idx];
+               track->cb_dirty = true;
                break;
        case CB_COLOR8_ATTRIB:
        case CB_COLOR9_ATTRIB:
@@ -1503,6 +1546,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
                }
                tmp = ((reg - CB_COLOR8_ATTRIB) / 0x1c) + 8;
                track->cb_color_attrib[tmp] = ib[idx];
+               track->cb_dirty = true;
                break;
        case CB_COLOR0_DIM:
        case CB_COLOR1_DIM:
@@ -1599,6 +1643,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
                ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
                track->cb_color_base_last[tmp] = ib[idx];
                track->cb_color_bo[tmp] = reloc->robj;
+               track->cb_dirty = true;
                break;
        case CB_COLOR8_BASE:
        case CB_COLOR9_BASE:
@@ -1615,6 +1660,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
                ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
                track->cb_color_base_last[tmp] = ib[idx];
                track->cb_color_bo[tmp] = reloc->robj;
+               track->cb_dirty = true;
                break;
        case CB_IMMED0_BASE:
        case CB_IMMED1_BASE:
@@ -1811,6 +1857,8 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
        {
                int pred_op;
                int tmp;
+               uint64_t offset;
+
                if (pkt->count != 1) {
                        DRM_ERROR("bad SET PREDICATION\n");
                        return -EINVAL;
@@ -1834,8 +1882,12 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
                        return -EINVAL;
                }
 
-               ib[idx + 0] = idx_value + (u32)(reloc->lobj.gpu_offset & 0xffffffff);
-               ib[idx + 1] = tmp + (upper_32_bits(reloc->lobj.gpu_offset) & 0xff);
+               offset = reloc->lobj.gpu_offset +
+                        (idx_value & 0xfffffff0) +
+                        ((u64)(tmp & 0xff) << 32);
+
+               ib[idx + 0] = offset;
+               ib[idx + 1] = (tmp & 0xffffff00) | (upper_32_bits(offset) & 0xff);
        }
        break;
        case PACKET3_CONTEXT_CONTROL:
@@ -1863,6 +1915,9 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
                }
                break;
        case PACKET3_INDEX_BASE:
+       {
+               uint64_t offset;
+
                if (pkt->count != 1) {
                        DRM_ERROR("bad INDEX_BASE\n");
                        return -EINVAL;
@@ -1872,15 +1927,24 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
                        DRM_ERROR("bad INDEX_BASE\n");
                        return -EINVAL;
                }
-               ib[idx+0] = idx_value + (u32)(reloc->lobj.gpu_offset & 0xffffffff);
-               ib[idx+1] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
+
+               offset = reloc->lobj.gpu_offset +
+                        idx_value +
+                        ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
+
+               ib[idx+0] = offset;
+               ib[idx+1] = upper_32_bits(offset) & 0xff;
+
                r = evergreen_cs_track_check(p);
                if (r) {
                        dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
                        return r;
                }
                break;
+       }
        case PACKET3_DRAW_INDEX:
+       {
+               uint64_t offset;
                if (pkt->count != 3) {
                        DRM_ERROR("bad DRAW_INDEX\n");
                        return -EINVAL;
@@ -1890,15 +1954,25 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
                        DRM_ERROR("bad DRAW_INDEX\n");
                        return -EINVAL;
                }
-               ib[idx+0] = idx_value + (u32)(reloc->lobj.gpu_offset & 0xffffffff);
-               ib[idx+1] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
+
+               offset = reloc->lobj.gpu_offset +
+                        idx_value +
+                        ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
+
+               ib[idx+0] = offset;
+               ib[idx+1] = upper_32_bits(offset) & 0xff;
+
                r = evergreen_cs_track_check(p);
                if (r) {
                        dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
                        return r;
                }
                break;
+       }
        case PACKET3_DRAW_INDEX_2:
+       {
+               uint64_t offset;
+
                if (pkt->count != 4) {
                        DRM_ERROR("bad DRAW_INDEX_2\n");
                        return -EINVAL;
@@ -1908,14 +1982,21 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
                        DRM_ERROR("bad DRAW_INDEX_2\n");
                        return -EINVAL;
                }
-               ib[idx+1] = idx_value + (u32)(reloc->lobj.gpu_offset & 0xffffffff);
-               ib[idx+2] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
+
+               offset = reloc->lobj.gpu_offset +
+                        radeon_get_ib_value(p, idx+1) +
+                        ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
+
+               ib[idx+1] = offset;
+               ib[idx+2] = upper_32_bits(offset) & 0xff;
+
                r = evergreen_cs_track_check(p);
                if (r) {
                        dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
                        return r;
                }
                break;
+       }
        case PACKET3_DRAW_INDEX_AUTO:
                if (pkt->count != 1) {
                        DRM_ERROR("bad DRAW_INDEX_AUTO\n");
@@ -2006,13 +2087,20 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
                }
                /* bit 4 is reg (0) or mem (1) */
                if (idx_value & 0x10) {
+                       uint64_t offset;
+
                        r = evergreen_cs_packet_next_reloc(p, &reloc);
                        if (r) {
                                DRM_ERROR("bad WAIT_REG_MEM\n");
                                return -EINVAL;
                        }
-                       ib[idx+1] += (u32)(reloc->lobj.gpu_offset & 0xffffffff);
-                       ib[idx+2] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
+
+                       offset = reloc->lobj.gpu_offset +
+                                (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
+                                ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
+
+                       ib[idx+1] = (ib[idx+1] & 0x3) | (offset & 0xfffffffc);
+                       ib[idx+2] = upper_32_bits(offset) & 0xff;
                }
                break;
        case PACKET3_SURFACE_SYNC:
@@ -2037,16 +2125,25 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
                        return -EINVAL;
                }
                if (pkt->count) {
+                       uint64_t offset;
+
                        r = evergreen_cs_packet_next_reloc(p, &reloc);
                        if (r) {
                                DRM_ERROR("bad EVENT_WRITE\n");
                                return -EINVAL;
                        }
-                       ib[idx+1] += (u32)(reloc->lobj.gpu_offset & 0xffffffff);
-                       ib[idx+2] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
+                       offset = reloc->lobj.gpu_offset +
+                                (radeon_get_ib_value(p, idx+1) & 0xfffffff8) +
+                                ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
+
+                       ib[idx+1] = offset & 0xfffffff8;
+                       ib[idx+2] = upper_32_bits(offset) & 0xff;
                }
                break;
        case PACKET3_EVENT_WRITE_EOP:
+       {
+               uint64_t offset;
+
                if (pkt->count != 4) {
                        DRM_ERROR("bad EVENT_WRITE_EOP\n");
                        return -EINVAL;
@@ -2056,10 +2153,19 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
                        DRM_ERROR("bad EVENT_WRITE_EOP\n");
                        return -EINVAL;
                }
-               ib[idx+1] += (u32)(reloc->lobj.gpu_offset & 0xffffffff);
-               ib[idx+2] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
+
+               offset = reloc->lobj.gpu_offset +
+                        (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
+                        ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
+
+               ib[idx+1] = offset & 0xfffffffc;
+               ib[idx+2] = (ib[idx+2] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
                break;
+       }
        case PACKET3_EVENT_WRITE_EOS:
+       {
+               uint64_t offset;
+
                if (pkt->count != 3) {
                        DRM_ERROR("bad EVENT_WRITE_EOS\n");
                        return -EINVAL;
@@ -2069,9 +2175,15 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
                        DRM_ERROR("bad EVENT_WRITE_EOS\n");
                        return -EINVAL;
                }
-               ib[idx+1] += (u32)(reloc->lobj.gpu_offset & 0xffffffff);
-               ib[idx+2] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
+
+               offset = reloc->lobj.gpu_offset +
+                        (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
+                        ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
+
+               ib[idx+1] = offset & 0xfffffffc;
+               ib[idx+2] = (ib[idx+2] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
                break;
+       }
        case PACKET3_SET_CONFIG_REG:
                start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
                end_reg = 4 * pkt->count + start_reg - 4;
@@ -2164,6 +2276,8 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
                                ib[idx+1+(i*8)+3] += moffset;
                                break;
                        case SQ_TEX_VTX_VALID_BUFFER:
+                       {
+                               uint64_t offset64;
                                /* vtx base */
                                r = evergreen_cs_packet_next_reloc(p, &reloc);
                                if (r) {
@@ -2175,11 +2289,15 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
                                if (p->rdev && (size + offset) > radeon_bo_size(reloc->robj)) {
                                        /* force size to size of the buffer */
                                        dev_warn(p->dev, "vbo resource seems too big for the bo\n");
-                                       ib[idx+1+(i*8)+1] = radeon_bo_size(reloc->robj);
+                                       ib[idx+1+(i*8)+1] = radeon_bo_size(reloc->robj) - offset;
                                }
-                               ib[idx+1+(i*8)+0] += (u32)((reloc->lobj.gpu_offset) & 0xffffffff);
-                               ib[idx+1+(i*8)+2] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
+
+                               offset64 = reloc->lobj.gpu_offset + offset;
+                               ib[idx+1+(i*8)+0] = offset64;
+                               ib[idx+1+(i*8)+2] = (ib[idx+1+(i*8)+2] & 0xffffff00) |
+                                                   (upper_32_bits(offset64) & 0xff);
                                break;
+                       }
                        case SQ_TEX_VTX_INVALID_TEXTURE:
                        case SQ_TEX_VTX_INVALID_BUFFER:
                        default:
@@ -2255,8 +2373,9 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
                                          offset + 4, radeon_bo_size(reloc->robj));
                                return -EINVAL;
                        }
-                       ib[idx+1] += (u32)(reloc->lobj.gpu_offset & 0xffffffff);
-                       ib[idx+2] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
+                       offset += reloc->lobj.gpu_offset;
+                       ib[idx+1] = offset;
+                       ib[idx+2] = upper_32_bits(offset) & 0xff;
                }
                /* Reading data from SRC_ADDRESS. */
                if (((idx_value >> 1) & 0x3) == 2) {
@@ -2273,8 +2392,9 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
                                          offset + 4, radeon_bo_size(reloc->robj));
                                return -EINVAL;
                        }
-                       ib[idx+3] += (u32)(reloc->lobj.gpu_offset & 0xffffffff);
-                       ib[idx+4] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
+                       offset += reloc->lobj.gpu_offset;
+                       ib[idx+3] = offset;
+                       ib[idx+4] = upper_32_bits(offset) & 0xff;
                }
                break;
        case PACKET3_COPY_DW:
@@ -2297,8 +2417,9 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
                                          offset + 4, radeon_bo_size(reloc->robj));
                                return -EINVAL;
                        }
-                       ib[idx+1] += (u32)(reloc->lobj.gpu_offset & 0xffffffff);
-                       ib[idx+2] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
+                       offset += reloc->lobj.gpu_offset;
+                       ib[idx+1] = offset;
+                       ib[idx+2] = upper_32_bits(offset) & 0xff;
                } else {
                        /* SRC is a reg. */
                        reg = radeon_get_ib_value(p, idx+1) << 2;
@@ -2320,8 +2441,9 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
                                          offset + 4, radeon_bo_size(reloc->robj));
                                return -EINVAL;
                        }
-                       ib[idx+3] += (u32)(reloc->lobj.gpu_offset & 0xffffffff);
-                       ib[idx+4] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
+                       offset += reloc->lobj.gpu_offset;
+                       ib[idx+3] = offset;
+                       ib[idx+4] = upper_32_bits(offset) & 0xff;
                } else {
                        /* DST is a reg. */
                        reg = radeon_get_ib_value(p, idx+3) << 2;