2 * Copyright 2010 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
24 * Authors: Dave Airlie
30 #include "evergreend.h"
31 #include "evergreen_reg_safe.h"
32 #include "cayman_reg_safe.h"
34 #define MAX(a,b) (((a)>(b))?(a):(b))
35 #define MIN(a,b) (((a)<(b))?(a):(b))
37 static int evergreen_cs_packet_next_reloc(struct radeon_cs_parser
*p
,
38 struct radeon_cs_reloc
**cs_reloc
);
40 struct evergreen_cs_track
{
46 u32 nsamples
; /* unused */
47 struct radeon_bo
*cb_color_bo
[12];
48 u32 cb_color_bo_offset
[12];
49 struct radeon_bo
*cb_color_fmask_bo
[8]; /* unused */
50 struct radeon_bo
*cb_color_cmask_bo
[8]; /* unused */
51 u32 cb_color_info
[12];
52 u32 cb_color_view
[12];
53 u32 cb_color_pitch
[12];
54 u32 cb_color_slice
[12];
55 u32 cb_color_slice_idx
[12];
56 u32 cb_color_attrib
[12];
57 u32 cb_color_cmask_slice
[8];/* unused */
58 u32 cb_color_fmask_slice
[8];/* unused */
60 u32 cb_shader_mask
; /* unused */
61 u32 vgt_strmout_config
;
62 u32 vgt_strmout_buffer_config
;
63 struct radeon_bo
*vgt_strmout_bo
[4];
64 u32 vgt_strmout_bo_offset
[4];
65 u32 vgt_strmout_size
[4];
72 u32 db_z_write_offset
;
73 struct radeon_bo
*db_z_read_bo
;
74 struct radeon_bo
*db_z_write_bo
;
77 u32 db_s_write_offset
;
78 struct radeon_bo
*db_s_read_bo
;
79 struct radeon_bo
*db_s_write_bo
;
80 bool sx_misc_kill_all_prims
;
86 struct radeon_bo
*htile_bo
;
89 static u32
evergreen_cs_get_aray_mode(u32 tiling_flags
)
91 if (tiling_flags
& RADEON_TILING_MACRO
)
92 return ARRAY_2D_TILED_THIN1
;
93 else if (tiling_flags
& RADEON_TILING_MICRO
)
94 return ARRAY_1D_TILED_THIN1
;
96 return ARRAY_LINEAR_GENERAL
;
99 static u32
evergreen_cs_get_num_banks(u32 nbanks
)
103 return ADDR_SURF_2_BANK
;
105 return ADDR_SURF_4_BANK
;
108 return ADDR_SURF_8_BANK
;
110 return ADDR_SURF_16_BANK
;
114 static void evergreen_cs_track_init(struct evergreen_cs_track
*track
)
118 for (i
= 0; i
< 8; i
++) {
119 track
->cb_color_fmask_bo
[i
] = NULL
;
120 track
->cb_color_cmask_bo
[i
] = NULL
;
121 track
->cb_color_cmask_slice
[i
] = 0;
122 track
->cb_color_fmask_slice
[i
] = 0;
125 for (i
= 0; i
< 12; i
++) {
126 track
->cb_color_bo
[i
] = NULL
;
127 track
->cb_color_bo_offset
[i
] = 0xFFFFFFFF;
128 track
->cb_color_info
[i
] = 0;
129 track
->cb_color_view
[i
] = 0xFFFFFFFF;
130 track
->cb_color_pitch
[i
] = 0;
131 track
->cb_color_slice
[i
] = 0xfffffff;
132 track
->cb_color_slice_idx
[i
] = 0;
134 track
->cb_target_mask
= 0xFFFFFFFF;
135 track
->cb_shader_mask
= 0xFFFFFFFF;
136 track
->cb_dirty
= true;
138 track
->db_depth_slice
= 0xffffffff;
139 track
->db_depth_view
= 0xFFFFC000;
140 track
->db_depth_size
= 0xFFFFFFFF;
141 track
->db_depth_control
= 0xFFFFFFFF;
142 track
->db_z_info
= 0xFFFFFFFF;
143 track
->db_z_read_offset
= 0xFFFFFFFF;
144 track
->db_z_write_offset
= 0xFFFFFFFF;
145 track
->db_z_read_bo
= NULL
;
146 track
->db_z_write_bo
= NULL
;
147 track
->db_s_info
= 0xFFFFFFFF;
148 track
->db_s_read_offset
= 0xFFFFFFFF;
149 track
->db_s_write_offset
= 0xFFFFFFFF;
150 track
->db_s_read_bo
= NULL
;
151 track
->db_s_write_bo
= NULL
;
152 track
->db_dirty
= true;
153 track
->htile_bo
= NULL
;
154 track
->htile_offset
= 0xFFFFFFFF;
155 track
->htile_surface
= 0;
157 for (i
= 0; i
< 4; i
++) {
158 track
->vgt_strmout_size
[i
] = 0;
159 track
->vgt_strmout_bo
[i
] = NULL
;
160 track
->vgt_strmout_bo_offset
[i
] = 0xFFFFFFFF;
162 track
->streamout_dirty
= true;
163 track
->sx_misc_kill_all_prims
= false;
167 /* value gathered from cs */
183 unsigned long base_align
;
186 static int evergreen_surface_check_linear(struct radeon_cs_parser
*p
,
187 struct eg_surface
*surf
,
190 surf
->layer_size
= surf
->nbx
* surf
->nby
* surf
->bpe
* surf
->nsamples
;
191 surf
->base_align
= surf
->bpe
;
197 static int evergreen_surface_check_linear_aligned(struct radeon_cs_parser
*p
,
198 struct eg_surface
*surf
,
201 struct evergreen_cs_track
*track
= p
->track
;
204 palign
= MAX(64, track
->group_size
/ surf
->bpe
);
205 surf
->layer_size
= surf
->nbx
* surf
->nby
* surf
->bpe
* surf
->nsamples
;
206 surf
->base_align
= track
->group_size
;
207 surf
->palign
= palign
;
209 if (surf
->nbx
& (palign
- 1)) {
211 dev_warn(p
->dev
, "%s:%d %s pitch %d invalid must be aligned with %d\n",
212 __func__
, __LINE__
, prefix
, surf
->nbx
, palign
);
219 static int evergreen_surface_check_1d(struct radeon_cs_parser
*p
,
220 struct eg_surface
*surf
,
223 struct evergreen_cs_track
*track
= p
->track
;
226 palign
= track
->group_size
/ (8 * surf
->bpe
* surf
->nsamples
);
227 palign
= MAX(8, palign
);
228 surf
->layer_size
= surf
->nbx
* surf
->nby
* surf
->bpe
;
229 surf
->base_align
= track
->group_size
;
230 surf
->palign
= palign
;
232 if ((surf
->nbx
& (palign
- 1))) {
234 dev_warn(p
->dev
, "%s:%d %s pitch %d invalid must be aligned with %d (%d %d %d)\n",
235 __func__
, __LINE__
, prefix
, surf
->nbx
, palign
,
236 track
->group_size
, surf
->bpe
, surf
->nsamples
);
240 if ((surf
->nby
& (8 - 1))) {
242 dev_warn(p
->dev
, "%s:%d %s height %d invalid must be aligned with 8\n",
243 __func__
, __LINE__
, prefix
, surf
->nby
);
250 static int evergreen_surface_check_2d(struct radeon_cs_parser
*p
,
251 struct eg_surface
*surf
,
254 struct evergreen_cs_track
*track
= p
->track
;
255 unsigned palign
, halign
, tileb
, slice_pt
;
256 unsigned mtile_pr
, mtile_ps
, mtileb
;
258 tileb
= 64 * surf
->bpe
* surf
->nsamples
;
260 if (tileb
> surf
->tsplit
) {
261 slice_pt
= tileb
/ surf
->tsplit
;
263 tileb
= tileb
/ slice_pt
;
264 /* macro tile width & height */
265 palign
= (8 * surf
->bankw
* track
->npipes
) * surf
->mtilea
;
266 halign
= (8 * surf
->bankh
* surf
->nbanks
) / surf
->mtilea
;
267 mtileb
= (palign
/ 8) * (halign
/ 8) * tileb
;;
268 mtile_pr
= surf
->nbx
/ palign
;
269 mtile_ps
= (mtile_pr
* surf
->nby
) / halign
;
270 surf
->layer_size
= mtile_ps
* mtileb
* slice_pt
;
271 surf
->base_align
= (palign
/ 8) * (halign
/ 8) * tileb
;
272 surf
->palign
= palign
;
273 surf
->halign
= halign
;
275 if ((surf
->nbx
& (palign
- 1))) {
277 dev_warn(p
->dev
, "%s:%d %s pitch %d invalid must be aligned with %d\n",
278 __func__
, __LINE__
, prefix
, surf
->nbx
, palign
);
282 if ((surf
->nby
& (halign
- 1))) {
284 dev_warn(p
->dev
, "%s:%d %s height %d invalid must be aligned with %d\n",
285 __func__
, __LINE__
, prefix
, surf
->nby
, halign
);
293 static int evergreen_surface_check(struct radeon_cs_parser
*p
,
294 struct eg_surface
*surf
,
297 /* some common value computed here */
298 surf
->bpe
= r600_fmt_get_blocksize(surf
->format
);
300 switch (surf
->mode
) {
301 case ARRAY_LINEAR_GENERAL
:
302 return evergreen_surface_check_linear(p
, surf
, prefix
);
303 case ARRAY_LINEAR_ALIGNED
:
304 return evergreen_surface_check_linear_aligned(p
, surf
, prefix
);
305 case ARRAY_1D_TILED_THIN1
:
306 return evergreen_surface_check_1d(p
, surf
, prefix
);
307 case ARRAY_2D_TILED_THIN1
:
308 return evergreen_surface_check_2d(p
, surf
, prefix
);
310 dev_warn(p
->dev
, "%s:%d %s invalid array mode %d\n",
311 __func__
, __LINE__
, prefix
, surf
->mode
);
317 static int evergreen_surface_value_conv_check(struct radeon_cs_parser
*p
,
318 struct eg_surface
*surf
,
321 switch (surf
->mode
) {
322 case ARRAY_2D_TILED_THIN1
:
324 case ARRAY_LINEAR_GENERAL
:
325 case ARRAY_LINEAR_ALIGNED
:
326 case ARRAY_1D_TILED_THIN1
:
329 dev_warn(p
->dev
, "%s:%d %s invalid array mode %d\n",
330 __func__
, __LINE__
, prefix
, surf
->mode
);
334 switch (surf
->nbanks
) {
335 case 0: surf
->nbanks
= 2; break;
336 case 1: surf
->nbanks
= 4; break;
337 case 2: surf
->nbanks
= 8; break;
338 case 3: surf
->nbanks
= 16; break;
340 dev_warn(p
->dev
, "%s:%d %s invalid number of banks %d\n",
341 __func__
, __LINE__
, prefix
, surf
->nbanks
);
344 switch (surf
->bankw
) {
345 case 0: surf
->bankw
= 1; break;
346 case 1: surf
->bankw
= 2; break;
347 case 2: surf
->bankw
= 4; break;
348 case 3: surf
->bankw
= 8; break;
350 dev_warn(p
->dev
, "%s:%d %s invalid bankw %d\n",
351 __func__
, __LINE__
, prefix
, surf
->bankw
);
354 switch (surf
->bankh
) {
355 case 0: surf
->bankh
= 1; break;
356 case 1: surf
->bankh
= 2; break;
357 case 2: surf
->bankh
= 4; break;
358 case 3: surf
->bankh
= 8; break;
360 dev_warn(p
->dev
, "%s:%d %s invalid bankh %d\n",
361 __func__
, __LINE__
, prefix
, surf
->bankh
);
364 switch (surf
->mtilea
) {
365 case 0: surf
->mtilea
= 1; break;
366 case 1: surf
->mtilea
= 2; break;
367 case 2: surf
->mtilea
= 4; break;
368 case 3: surf
->mtilea
= 8; break;
370 dev_warn(p
->dev
, "%s:%d %s invalid macro tile aspect %d\n",
371 __func__
, __LINE__
, prefix
, surf
->mtilea
);
374 switch (surf
->tsplit
) {
375 case 0: surf
->tsplit
= 64; break;
376 case 1: surf
->tsplit
= 128; break;
377 case 2: surf
->tsplit
= 256; break;
378 case 3: surf
->tsplit
= 512; break;
379 case 4: surf
->tsplit
= 1024; break;
380 case 5: surf
->tsplit
= 2048; break;
381 case 6: surf
->tsplit
= 4096; break;
383 dev_warn(p
->dev
, "%s:%d %s invalid tile split %d\n",
384 __func__
, __LINE__
, prefix
, surf
->tsplit
);
390 static int evergreen_cs_track_validate_cb(struct radeon_cs_parser
*p
, unsigned id
)
392 struct evergreen_cs_track
*track
= p
->track
;
393 struct eg_surface surf
;
394 unsigned pitch
, slice
, mslice
;
395 unsigned long offset
;
398 mslice
= G_028C6C_SLICE_MAX(track
->cb_color_view
[id
]) + 1;
399 pitch
= track
->cb_color_pitch
[id
];
400 slice
= track
->cb_color_slice
[id
];
401 surf
.nbx
= (pitch
+ 1) * 8;
402 surf
.nby
= ((slice
+ 1) * 64) / surf
.nbx
;
403 surf
.mode
= G_028C70_ARRAY_MODE(track
->cb_color_info
[id
]);
404 surf
.format
= G_028C70_FORMAT(track
->cb_color_info
[id
]);
405 surf
.tsplit
= G_028C74_TILE_SPLIT(track
->cb_color_attrib
[id
]);
406 surf
.nbanks
= G_028C74_NUM_BANKS(track
->cb_color_attrib
[id
]);
407 surf
.bankw
= G_028C74_BANK_WIDTH(track
->cb_color_attrib
[id
]);
408 surf
.bankh
= G_028C74_BANK_HEIGHT(track
->cb_color_attrib
[id
]);
409 surf
.mtilea
= G_028C74_MACRO_TILE_ASPECT(track
->cb_color_attrib
[id
]);
412 if (!r600_fmt_is_valid_color(surf
.format
)) {
413 dev_warn(p
->dev
, "%s:%d cb invalid format %d for %d (0x%08x)\n",
414 __func__
, __LINE__
, surf
.format
,
415 id
, track
->cb_color_info
[id
]);
419 r
= evergreen_surface_value_conv_check(p
, &surf
, "cb");
424 r
= evergreen_surface_check(p
, &surf
, "cb");
426 dev_warn(p
->dev
, "%s:%d cb[%d] invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n",
427 __func__
, __LINE__
, id
, track
->cb_color_pitch
[id
],
428 track
->cb_color_slice
[id
], track
->cb_color_attrib
[id
],
429 track
->cb_color_info
[id
]);
433 offset
= track
->cb_color_bo_offset
[id
] << 8;
434 if (offset
& (surf
.base_align
- 1)) {
435 dev_warn(p
->dev
, "%s:%d cb[%d] bo base %ld not aligned with %ld\n",
436 __func__
, __LINE__
, id
, offset
, surf
.base_align
);
440 offset
+= surf
.layer_size
* mslice
;
441 if (offset
> radeon_bo_size(track
->cb_color_bo
[id
])) {
442 /* old ddx are broken they allocate bo with w*h*bpp but
443 * program slice with ALIGN(h, 8), catch this and patch
447 volatile u32
*ib
= p
->ib
.ptr
;
448 unsigned long tmp
, nby
, bsize
, size
, min
= 0;
450 /* find the height the ddx wants */
454 bsize
= radeon_bo_size(track
->cb_color_bo
[id
]);
455 tmp
= track
->cb_color_bo_offset
[id
] << 8;
456 for (nby
= surf
.nby
; nby
> min
; nby
--) {
457 size
= nby
* surf
.nbx
* surf
.bpe
* surf
.nsamples
;
458 if ((tmp
+ size
* mslice
) <= bsize
) {
464 slice
= ((nby
* surf
.nbx
) / 64) - 1;
465 if (!evergreen_surface_check(p
, &surf
, "cb")) {
466 /* check if this one works */
467 tmp
+= surf
.layer_size
* mslice
;
469 ib
[track
->cb_color_slice_idx
[id
]] = slice
;
475 dev_warn(p
->dev
, "%s:%d cb[%d] bo too small (layer size %d, "
476 "offset %d, max layer %d, bo size %ld, slice %d)\n",
477 __func__
, __LINE__
, id
, surf
.layer_size
,
478 track
->cb_color_bo_offset
[id
] << 8, mslice
,
479 radeon_bo_size(track
->cb_color_bo
[id
]), slice
);
480 dev_warn(p
->dev
, "%s:%d problematic surf: (%d %d) (%d %d %d %d %d %d %d)\n",
481 __func__
, __LINE__
, surf
.nbx
, surf
.nby
,
482 surf
.mode
, surf
.bpe
, surf
.nsamples
,
483 surf
.bankw
, surf
.bankh
,
484 surf
.tsplit
, surf
.mtilea
);
492 static int evergreen_cs_track_validate_htile(struct radeon_cs_parser
*p
,
493 unsigned nbx
, unsigned nby
)
495 struct evergreen_cs_track
*track
= p
->track
;
498 if (track
->htile_bo
== NULL
) {
499 dev_warn(p
->dev
, "%s:%d htile enabled without htile surface 0x%08x\n",
500 __func__
, __LINE__
, track
->db_z_info
);
504 if (G_028ABC_LINEAR(track
->htile_surface
)) {
505 /* pitch must be 16 htiles aligned == 16 * 8 pixel aligned */
506 nbx
= round_up(nbx
, 16 * 8);
507 /* height is npipes htiles aligned == npipes * 8 pixel aligned */
508 nby
= round_up(nby
, track
->npipes
* 8);
510 switch (track
->npipes
) {
512 nbx
= round_up(nbx
, 64 * 8);
513 nby
= round_up(nby
, 64 * 8);
516 nbx
= round_up(nbx
, 64 * 8);
517 nby
= round_up(nby
, 32 * 8);
520 nbx
= round_up(nbx
, 32 * 8);
521 nby
= round_up(nby
, 32 * 8);
524 nbx
= round_up(nbx
, 32 * 8);
525 nby
= round_up(nby
, 16 * 8);
528 dev_warn(p
->dev
, "%s:%d invalid num pipes %d\n",
529 __func__
, __LINE__
, track
->npipes
);
533 /* compute number of htile */
536 size
= nbx
* nby
* 4;
537 size
+= track
->htile_offset
;
539 if (size
> radeon_bo_size(track
->htile_bo
)) {
540 dev_warn(p
->dev
, "%s:%d htile surface too small %ld for %ld (%d %d)\n",
541 __func__
, __LINE__
, radeon_bo_size(track
->htile_bo
),
548 static int evergreen_cs_track_validate_stencil(struct radeon_cs_parser
*p
)
550 struct evergreen_cs_track
*track
= p
->track
;
551 struct eg_surface surf
;
552 unsigned pitch
, slice
, mslice
;
553 unsigned long offset
;
556 mslice
= G_028008_SLICE_MAX(track
->db_depth_view
) + 1;
557 pitch
= G_028058_PITCH_TILE_MAX(track
->db_depth_size
);
558 slice
= track
->db_depth_slice
;
559 surf
.nbx
= (pitch
+ 1) * 8;
560 surf
.nby
= ((slice
+ 1) * 64) / surf
.nbx
;
561 surf
.mode
= G_028040_ARRAY_MODE(track
->db_z_info
);
562 surf
.format
= G_028044_FORMAT(track
->db_s_info
);
563 surf
.tsplit
= G_028044_TILE_SPLIT(track
->db_s_info
);
564 surf
.nbanks
= G_028040_NUM_BANKS(track
->db_z_info
);
565 surf
.bankw
= G_028040_BANK_WIDTH(track
->db_z_info
);
566 surf
.bankh
= G_028040_BANK_HEIGHT(track
->db_z_info
);
567 surf
.mtilea
= G_028040_MACRO_TILE_ASPECT(track
->db_z_info
);
570 if (surf
.format
!= 1) {
571 dev_warn(p
->dev
, "%s:%d stencil invalid format %d\n",
572 __func__
, __LINE__
, surf
.format
);
575 /* replace by color format so we can use same code */
576 surf
.format
= V_028C70_COLOR_8
;
578 r
= evergreen_surface_value_conv_check(p
, &surf
, "stencil");
583 r
= evergreen_surface_check(p
, &surf
, NULL
);
585 /* old userspace doesn't compute proper depth/stencil alignment
586 * check that alignment against a bigger byte per elements and
587 * only report if that alignment is wrong too.
589 surf
.format
= V_028C70_COLOR_8_8_8_8
;
590 r
= evergreen_surface_check(p
, &surf
, "stencil");
592 dev_warn(p
->dev
, "%s:%d stencil invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n",
593 __func__
, __LINE__
, track
->db_depth_size
,
594 track
->db_depth_slice
, track
->db_s_info
, track
->db_z_info
);
599 offset
= track
->db_s_read_offset
<< 8;
600 if (offset
& (surf
.base_align
- 1)) {
601 dev_warn(p
->dev
, "%s:%d stencil read bo base %ld not aligned with %ld\n",
602 __func__
, __LINE__
, offset
, surf
.base_align
);
605 offset
+= surf
.layer_size
* mslice
;
606 if (offset
> radeon_bo_size(track
->db_s_read_bo
)) {
607 dev_warn(p
->dev
, "%s:%d stencil read bo too small (layer size %d, "
608 "offset %ld, max layer %d, bo size %ld)\n",
609 __func__
, __LINE__
, surf
.layer_size
,
610 (unsigned long)track
->db_s_read_offset
<< 8, mslice
,
611 radeon_bo_size(track
->db_s_read_bo
));
612 dev_warn(p
->dev
, "%s:%d stencil invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n",
613 __func__
, __LINE__
, track
->db_depth_size
,
614 track
->db_depth_slice
, track
->db_s_info
, track
->db_z_info
);
618 offset
= track
->db_s_write_offset
<< 8;
619 if (offset
& (surf
.base_align
- 1)) {
620 dev_warn(p
->dev
, "%s:%d stencil write bo base %ld not aligned with %ld\n",
621 __func__
, __LINE__
, offset
, surf
.base_align
);
624 offset
+= surf
.layer_size
* mslice
;
625 if (offset
> radeon_bo_size(track
->db_s_write_bo
)) {
626 dev_warn(p
->dev
, "%s:%d stencil write bo too small (layer size %d, "
627 "offset %ld, max layer %d, bo size %ld)\n",
628 __func__
, __LINE__
, surf
.layer_size
,
629 (unsigned long)track
->db_s_write_offset
<< 8, mslice
,
630 radeon_bo_size(track
->db_s_write_bo
));
635 if (G_028040_TILE_SURFACE_ENABLE(track
->db_z_info
)) {
636 r
= evergreen_cs_track_validate_htile(p
, surf
.nbx
, surf
.nby
);
645 static int evergreen_cs_track_validate_depth(struct radeon_cs_parser
*p
)
647 struct evergreen_cs_track
*track
= p
->track
;
648 struct eg_surface surf
;
649 unsigned pitch
, slice
, mslice
;
650 unsigned long offset
;
653 mslice
= G_028008_SLICE_MAX(track
->db_depth_view
) + 1;
654 pitch
= G_028058_PITCH_TILE_MAX(track
->db_depth_size
);
655 slice
= track
->db_depth_slice
;
656 surf
.nbx
= (pitch
+ 1) * 8;
657 surf
.nby
= ((slice
+ 1) * 64) / surf
.nbx
;
658 surf
.mode
= G_028040_ARRAY_MODE(track
->db_z_info
);
659 surf
.format
= G_028040_FORMAT(track
->db_z_info
);
660 surf
.tsplit
= G_028040_TILE_SPLIT(track
->db_z_info
);
661 surf
.nbanks
= G_028040_NUM_BANKS(track
->db_z_info
);
662 surf
.bankw
= G_028040_BANK_WIDTH(track
->db_z_info
);
663 surf
.bankh
= G_028040_BANK_HEIGHT(track
->db_z_info
);
664 surf
.mtilea
= G_028040_MACRO_TILE_ASPECT(track
->db_z_info
);
667 switch (surf
.format
) {
669 surf
.format
= V_028C70_COLOR_16
;
672 case V_028040_Z_32_FLOAT
:
673 surf
.format
= V_028C70_COLOR_8_8_8_8
;
676 dev_warn(p
->dev
, "%s:%d depth invalid format %d\n",
677 __func__
, __LINE__
, surf
.format
);
681 r
= evergreen_surface_value_conv_check(p
, &surf
, "depth");
683 dev_warn(p
->dev
, "%s:%d depth invalid (0x%08x 0x%08x 0x%08x)\n",
684 __func__
, __LINE__
, track
->db_depth_size
,
685 track
->db_depth_slice
, track
->db_z_info
);
689 r
= evergreen_surface_check(p
, &surf
, "depth");
691 dev_warn(p
->dev
, "%s:%d depth invalid (0x%08x 0x%08x 0x%08x)\n",
692 __func__
, __LINE__
, track
->db_depth_size
,
693 track
->db_depth_slice
, track
->db_z_info
);
697 offset
= track
->db_z_read_offset
<< 8;
698 if (offset
& (surf
.base_align
- 1)) {
699 dev_warn(p
->dev
, "%s:%d stencil read bo base %ld not aligned with %ld\n",
700 __func__
, __LINE__
, offset
, surf
.base_align
);
703 offset
+= surf
.layer_size
* mslice
;
704 if (offset
> radeon_bo_size(track
->db_z_read_bo
)) {
705 dev_warn(p
->dev
, "%s:%d depth read bo too small (layer size %d, "
706 "offset %ld, max layer %d, bo size %ld)\n",
707 __func__
, __LINE__
, surf
.layer_size
,
708 (unsigned long)track
->db_z_read_offset
<< 8, mslice
,
709 radeon_bo_size(track
->db_z_read_bo
));
713 offset
= track
->db_z_write_offset
<< 8;
714 if (offset
& (surf
.base_align
- 1)) {
715 dev_warn(p
->dev
, "%s:%d stencil write bo base %ld not aligned with %ld\n",
716 __func__
, __LINE__
, offset
, surf
.base_align
);
719 offset
+= surf
.layer_size
* mslice
;
720 if (offset
> radeon_bo_size(track
->db_z_write_bo
)) {
721 dev_warn(p
->dev
, "%s:%d depth write bo too small (layer size %d, "
722 "offset %ld, max layer %d, bo size %ld)\n",
723 __func__
, __LINE__
, surf
.layer_size
,
724 (unsigned long)track
->db_z_write_offset
<< 8, mslice
,
725 radeon_bo_size(track
->db_z_write_bo
));
730 if (G_028040_TILE_SURFACE_ENABLE(track
->db_z_info
)) {
731 r
= evergreen_cs_track_validate_htile(p
, surf
.nbx
, surf
.nby
);
740 static int evergreen_cs_track_validate_texture(struct radeon_cs_parser
*p
,
741 struct radeon_bo
*texture
,
742 struct radeon_bo
*mipmap
,
745 struct eg_surface surf
;
746 unsigned long toffset
, moffset
;
747 unsigned dim
, llevel
, mslice
, width
, height
, depth
, i
;
751 texdw
[0] = radeon_get_ib_value(p
, idx
+ 0);
752 texdw
[1] = radeon_get_ib_value(p
, idx
+ 1);
753 texdw
[2] = radeon_get_ib_value(p
, idx
+ 2);
754 texdw
[3] = radeon_get_ib_value(p
, idx
+ 3);
755 texdw
[4] = radeon_get_ib_value(p
, idx
+ 4);
756 texdw
[5] = radeon_get_ib_value(p
, idx
+ 5);
757 texdw
[6] = radeon_get_ib_value(p
, idx
+ 6);
758 texdw
[7] = radeon_get_ib_value(p
, idx
+ 7);
759 dim
= G_030000_DIM(texdw
[0]);
760 llevel
= G_030014_LAST_LEVEL(texdw
[5]);
761 mslice
= G_030014_LAST_ARRAY(texdw
[5]) + 1;
762 width
= G_030000_TEX_WIDTH(texdw
[0]) + 1;
763 height
= G_030004_TEX_HEIGHT(texdw
[1]) + 1;
764 depth
= G_030004_TEX_DEPTH(texdw
[1]) + 1;
765 surf
.format
= G_03001C_DATA_FORMAT(texdw
[7]);
766 surf
.nbx
= (G_030000_PITCH(texdw
[0]) + 1) * 8;
767 surf
.nbx
= r600_fmt_get_nblocksx(surf
.format
, surf
.nbx
);
768 surf
.nby
= r600_fmt_get_nblocksy(surf
.format
, height
);
769 surf
.mode
= G_030004_ARRAY_MODE(texdw
[1]);
770 surf
.tsplit
= G_030018_TILE_SPLIT(texdw
[6]);
771 surf
.nbanks
= G_03001C_NUM_BANKS(texdw
[7]);
772 surf
.bankw
= G_03001C_BANK_WIDTH(texdw
[7]);
773 surf
.bankh
= G_03001C_BANK_HEIGHT(texdw
[7]);
774 surf
.mtilea
= G_03001C_MACRO_TILE_ASPECT(texdw
[7]);
776 toffset
= texdw
[2] << 8;
777 moffset
= texdw
[3] << 8;
779 if (!r600_fmt_is_valid_texture(surf
.format
, p
->family
)) {
780 dev_warn(p
->dev
, "%s:%d texture invalid format %d\n",
781 __func__
, __LINE__
, surf
.format
);
785 case V_030000_SQ_TEX_DIM_1D
:
786 case V_030000_SQ_TEX_DIM_2D
:
787 case V_030000_SQ_TEX_DIM_CUBEMAP
:
788 case V_030000_SQ_TEX_DIM_1D_ARRAY
:
789 case V_030000_SQ_TEX_DIM_2D_ARRAY
:
792 case V_030000_SQ_TEX_DIM_2D_MSAA
:
793 case V_030000_SQ_TEX_DIM_2D_ARRAY_MSAA
:
794 surf
.nsamples
= 1 << llevel
;
798 case V_030000_SQ_TEX_DIM_3D
:
801 dev_warn(p
->dev
, "%s:%d texture invalid dimension %d\n",
802 __func__
, __LINE__
, dim
);
806 r
= evergreen_surface_value_conv_check(p
, &surf
, "texture");
812 evergreen_surface_check(p
, &surf
, NULL
);
813 surf
.nby
= ALIGN(surf
.nby
, surf
.halign
);
815 r
= evergreen_surface_check(p
, &surf
, "texture");
817 dev_warn(p
->dev
, "%s:%d texture invalid 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n",
818 __func__
, __LINE__
, texdw
[0], texdw
[1], texdw
[4],
819 texdw
[5], texdw
[6], texdw
[7]);
823 /* check texture size */
824 if (toffset
& (surf
.base_align
- 1)) {
825 dev_warn(p
->dev
, "%s:%d texture bo base %ld not aligned with %ld\n",
826 __func__
, __LINE__
, toffset
, surf
.base_align
);
829 if (moffset
& (surf
.base_align
- 1)) {
830 dev_warn(p
->dev
, "%s:%d mipmap bo base %ld not aligned with %ld\n",
831 __func__
, __LINE__
, moffset
, surf
.base_align
);
834 if (dim
== SQ_TEX_DIM_3D
) {
835 toffset
+= surf
.layer_size
* depth
;
837 toffset
+= surf
.layer_size
* mslice
;
839 if (toffset
> radeon_bo_size(texture
)) {
840 dev_warn(p
->dev
, "%s:%d texture bo too small (layer size %d, "
841 "offset %ld, max layer %d, depth %d, bo size %ld) (%d %d)\n",
842 __func__
, __LINE__
, surf
.layer_size
,
843 (unsigned long)texdw
[2] << 8, mslice
,
844 depth
, radeon_bo_size(texture
),
849 /* check mipmap size */
850 for (i
= 1; i
<= llevel
; i
++) {
853 w
= r600_mip_minify(width
, i
);
854 h
= r600_mip_minify(height
, i
);
855 d
= r600_mip_minify(depth
, i
);
856 surf
.nbx
= r600_fmt_get_nblocksx(surf
.format
, w
);
857 surf
.nby
= r600_fmt_get_nblocksy(surf
.format
, h
);
860 case ARRAY_2D_TILED_THIN1
:
861 if (surf
.nbx
< surf
.palign
|| surf
.nby
< surf
.halign
) {
862 surf
.mode
= ARRAY_1D_TILED_THIN1
;
864 /* recompute alignment */
865 evergreen_surface_check(p
, &surf
, NULL
);
867 case ARRAY_LINEAR_GENERAL
:
868 case ARRAY_LINEAR_ALIGNED
:
869 case ARRAY_1D_TILED_THIN1
:
872 dev_warn(p
->dev
, "%s:%d invalid array mode %d\n",
873 __func__
, __LINE__
, surf
.mode
);
876 surf
.nbx
= ALIGN(surf
.nbx
, surf
.palign
);
877 surf
.nby
= ALIGN(surf
.nby
, surf
.halign
);
879 r
= evergreen_surface_check(p
, &surf
, "mipmap");
884 if (dim
== SQ_TEX_DIM_3D
) {
885 moffset
+= surf
.layer_size
* d
;
887 moffset
+= surf
.layer_size
* mslice
;
889 if (moffset
> radeon_bo_size(mipmap
)) {
890 dev_warn(p
->dev
, "%s:%d mipmap [%d] bo too small (layer size %d, "
891 "offset %ld, coffset %ld, max layer %d, depth %d, "
892 "bo size %ld) level0 (%d %d %d)\n",
893 __func__
, __LINE__
, i
, surf
.layer_size
,
894 (unsigned long)texdw
[3] << 8, moffset
, mslice
,
895 d
, radeon_bo_size(mipmap
),
896 width
, height
, depth
);
897 dev_warn(p
->dev
, "%s:%d problematic surf: (%d %d) (%d %d %d %d %d %d %d)\n",
898 __func__
, __LINE__
, surf
.nbx
, surf
.nby
,
899 surf
.mode
, surf
.bpe
, surf
.nsamples
,
900 surf
.bankw
, surf
.bankh
,
901 surf
.tsplit
, surf
.mtilea
);
909 static int evergreen_cs_track_check(struct radeon_cs_parser
*p
)
911 struct evergreen_cs_track
*track
= p
->track
;
914 unsigned buffer_mask
= 0;
916 /* check streamout */
917 if (track
->streamout_dirty
&& track
->vgt_strmout_config
) {
918 for (i
= 0; i
< 4; i
++) {
919 if (track
->vgt_strmout_config
& (1 << i
)) {
920 buffer_mask
|= (track
->vgt_strmout_buffer_config
>> (i
* 4)) & 0xf;
924 for (i
= 0; i
< 4; i
++) {
925 if (buffer_mask
& (1 << i
)) {
926 if (track
->vgt_strmout_bo
[i
]) {
927 u64 offset
= (u64
)track
->vgt_strmout_bo_offset
[i
] +
928 (u64
)track
->vgt_strmout_size
[i
];
929 if (offset
> radeon_bo_size(track
->vgt_strmout_bo
[i
])) {
930 DRM_ERROR("streamout %d bo too small: 0x%llx, 0x%lx\n",
932 radeon_bo_size(track
->vgt_strmout_bo
[i
]));
936 dev_warn(p
->dev
, "No buffer for streamout %d\n", i
);
941 track
->streamout_dirty
= false;
944 if (track
->sx_misc_kill_all_prims
)
947 /* check that we have a cb for each enabled target
949 if (track
->cb_dirty
) {
950 tmp
= track
->cb_target_mask
;
951 for (i
= 0; i
< 8; i
++) {
952 if ((tmp
>> (i
* 4)) & 0xF) {
953 /* at least one component is enabled */
954 if (track
->cb_color_bo
[i
] == NULL
) {
955 dev_warn(p
->dev
, "%s:%d mask 0x%08X | 0x%08X no cb for %d\n",
956 __func__
, __LINE__
, track
->cb_target_mask
, track
->cb_shader_mask
, i
);
960 r
= evergreen_cs_track_validate_cb(p
, i
);
966 track
->cb_dirty
= false;
969 if (track
->db_dirty
) {
970 /* Check stencil buffer */
971 if (G_028044_FORMAT(track
->db_s_info
) != V_028044_STENCIL_INVALID
&&
972 G_028800_STENCIL_ENABLE(track
->db_depth_control
)) {
973 r
= evergreen_cs_track_validate_stencil(p
);
977 /* Check depth buffer */
978 if (G_028040_FORMAT(track
->db_z_info
) != V_028040_Z_INVALID
&&
979 G_028800_Z_ENABLE(track
->db_depth_control
)) {
980 r
= evergreen_cs_track_validate_depth(p
);
984 track
->db_dirty
= false;
991 * evergreen_cs_packet_parse() - parse cp packet and point ib index to next packet
992 * @parser: parser structure holding parsing context.
993 * @pkt: where to store packet informations
995 * Assume that chunk_ib_index is properly set. Will return -EINVAL
996 * if packet is bigger than remaining ib size. or if packets is unknown.
998 int evergreen_cs_packet_parse(struct radeon_cs_parser
*p
,
999 struct radeon_cs_packet
*pkt
,
1002 struct radeon_cs_chunk
*ib_chunk
= &p
->chunks
[p
->chunk_ib_idx
];
1005 if (idx
>= ib_chunk
->length_dw
) {
1006 DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
1007 idx
, ib_chunk
->length_dw
);
1010 header
= radeon_get_ib_value(p
, idx
);
1012 pkt
->type
= CP_PACKET_GET_TYPE(header
);
1013 pkt
->count
= CP_PACKET_GET_COUNT(header
);
1014 pkt
->one_reg_wr
= 0;
1015 switch (pkt
->type
) {
1017 pkt
->reg
= CP_PACKET0_GET_REG(header
);
1020 pkt
->opcode
= CP_PACKET3_GET_OPCODE(header
);
1026 DRM_ERROR("Unknown packet type %d at %d !\n", pkt
->type
, idx
);
1029 if ((pkt
->count
+ 1 + pkt
->idx
) >= ib_chunk
->length_dw
) {
1030 DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n",
1031 pkt
->idx
, pkt
->type
, pkt
->count
, ib_chunk
->length_dw
);
1038 * evergreen_cs_packet_next_reloc() - parse next packet which should be reloc packet3
1039 * @parser: parser structure holding parsing context.
1040 * @data: pointer to relocation data
1041 * @offset_start: starting offset
1042 * @offset_mask: offset mask (to align start offset on)
1043 * @reloc: reloc informations
1045 * Check next packet is relocation packet3, do bo validation and compute
1046 * GPU offset using the provided start.
1048 static int evergreen_cs_packet_next_reloc(struct radeon_cs_parser
*p
,
1049 struct radeon_cs_reloc
**cs_reloc
)
1051 struct radeon_cs_chunk
*relocs_chunk
;
1052 struct radeon_cs_packet p3reloc
;
1056 if (p
->chunk_relocs_idx
== -1) {
1057 DRM_ERROR("No relocation chunk !\n");
1061 relocs_chunk
= &p
->chunks
[p
->chunk_relocs_idx
];
1062 r
= evergreen_cs_packet_parse(p
, &p3reloc
, p
->idx
);
1066 p
->idx
+= p3reloc
.count
+ 2;
1067 if (p3reloc
.type
!= PACKET_TYPE3
|| p3reloc
.opcode
!= PACKET3_NOP
) {
1068 DRM_ERROR("No packet3 for relocation for packet at %d.\n",
1072 idx
= radeon_get_ib_value(p
, p3reloc
.idx
+ 1);
1073 if (idx
>= relocs_chunk
->length_dw
) {
1074 DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
1075 idx
, relocs_chunk
->length_dw
);
1078 /* FIXME: we assume reloc size is 4 dwords */
1079 *cs_reloc
= p
->relocs_ptr
[(idx
/ 4)];
1084 * evergreen_cs_packet_next_vline() - parse userspace VLINE packet
1085 * @parser: parser structure holding parsing context.
1087 * Userspace sends a special sequence for VLINE waits.
1088 * PACKET0 - VLINE_START_END + value
1089 * PACKET3 - WAIT_REG_MEM poll vline status reg
1090 * RELOC (P3) - crtc_id in reloc.
1092 * This function parses this and relocates the VLINE START END
1093 * and WAIT_REG_MEM packets to the correct crtc.
1094 * It also detects a switched off crtc and nulls out the
1095 * wait in that case.
1097 static int evergreen_cs_packet_parse_vline(struct radeon_cs_parser
*p
)
1099 struct drm_mode_object
*obj
;
1100 struct drm_crtc
*crtc
;
1101 struct radeon_crtc
*radeon_crtc
;
1102 struct radeon_cs_packet p3reloc
, wait_reg_mem
;
1105 uint32_t header
, h_idx
, reg
, wait_reg_mem_info
;
1106 volatile uint32_t *ib
;
1110 /* parse the WAIT_REG_MEM */
1111 r
= evergreen_cs_packet_parse(p
, &wait_reg_mem
, p
->idx
);
1115 /* check its a WAIT_REG_MEM */
1116 if (wait_reg_mem
.type
!= PACKET_TYPE3
||
1117 wait_reg_mem
.opcode
!= PACKET3_WAIT_REG_MEM
) {
1118 DRM_ERROR("vline wait missing WAIT_REG_MEM segment\n");
1122 wait_reg_mem_info
= radeon_get_ib_value(p
, wait_reg_mem
.idx
+ 1);
1123 /* bit 4 is reg (0) or mem (1) */
1124 if (wait_reg_mem_info
& 0x10) {
1125 DRM_ERROR("vline WAIT_REG_MEM waiting on MEM rather than REG\n");
1128 /* waiting for value to be equal */
1129 if ((wait_reg_mem_info
& 0x7) != 0x3) {
1130 DRM_ERROR("vline WAIT_REG_MEM function not equal\n");
1133 if ((radeon_get_ib_value(p
, wait_reg_mem
.idx
+ 2) << 2) != EVERGREEN_VLINE_STATUS
) {
1134 DRM_ERROR("vline WAIT_REG_MEM bad reg\n");
1138 if (radeon_get_ib_value(p
, wait_reg_mem
.idx
+ 5) != EVERGREEN_VLINE_STAT
) {
1139 DRM_ERROR("vline WAIT_REG_MEM bad bit mask\n");
1143 /* jump over the NOP */
1144 r
= evergreen_cs_packet_parse(p
, &p3reloc
, p
->idx
+ wait_reg_mem
.count
+ 2);
1149 p
->idx
+= wait_reg_mem
.count
+ 2;
1150 p
->idx
+= p3reloc
.count
+ 2;
1152 header
= radeon_get_ib_value(p
, h_idx
);
1153 crtc_id
= radeon_get_ib_value(p
, h_idx
+ 2 + 7 + 1);
1154 reg
= CP_PACKET0_GET_REG(header
);
1155 obj
= drm_mode_object_find(p
->rdev
->ddev
, crtc_id
, DRM_MODE_OBJECT_CRTC
);
1157 DRM_ERROR("cannot find crtc %d\n", crtc_id
);
1160 crtc
= obj_to_crtc(obj
);
1161 radeon_crtc
= to_radeon_crtc(crtc
);
1162 crtc_id
= radeon_crtc
->crtc_id
;
1164 if (!crtc
->enabled
) {
1165 /* if the CRTC isn't enabled - we need to nop out the WAIT_REG_MEM */
1166 ib
[h_idx
+ 2] = PACKET2(0);
1167 ib
[h_idx
+ 3] = PACKET2(0);
1168 ib
[h_idx
+ 4] = PACKET2(0);
1169 ib
[h_idx
+ 5] = PACKET2(0);
1170 ib
[h_idx
+ 6] = PACKET2(0);
1171 ib
[h_idx
+ 7] = PACKET2(0);
1172 ib
[h_idx
+ 8] = PACKET2(0);
1175 case EVERGREEN_VLINE_START_END
:
1176 header
&= ~R600_CP_PACKET0_REG_MASK
;
1177 header
|= (EVERGREEN_VLINE_START_END
+ radeon_crtc
->crtc_offset
) >> 2;
1179 ib
[h_idx
+ 4] = (EVERGREEN_VLINE_STATUS
+ radeon_crtc
->crtc_offset
) >> 2;
1182 DRM_ERROR("unknown crtc reloc\n");
1189 static int evergreen_packet0_check(struct radeon_cs_parser
*p
,
1190 struct radeon_cs_packet
*pkt
,
1191 unsigned idx
, unsigned reg
)
1196 case EVERGREEN_VLINE_START_END
:
1197 r
= evergreen_cs_packet_parse_vline(p
);
1199 DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1205 printk(KERN_ERR
"Forbidden register 0x%04X in cs at %d\n",
1212 static int evergreen_cs_parse_packet0(struct radeon_cs_parser
*p
,
1213 struct radeon_cs_packet
*pkt
)
1221 for (i
= 0; i
<= pkt
->count
; i
++, idx
++, reg
+= 4) {
1222 r
= evergreen_packet0_check(p
, pkt
, idx
, reg
);
1231 * evergreen_cs_check_reg() - check if register is authorized or not
1232 * @parser: parser structure holding parsing context
1233 * @reg: register we are testing
1234 * @idx: index into the cs buffer
1236 * This function will test against evergreen_reg_safe_bm and return 0
1237 * if register is safe. If register is not flag as safe this function
1238 * will test it against a list of register needind special handling.
1240 static int evergreen_cs_check_reg(struct radeon_cs_parser
*p
, u32 reg
, u32 idx
)
1242 struct evergreen_cs_track
*track
= (struct evergreen_cs_track
*)p
->track
;
1243 struct radeon_cs_reloc
*reloc
;
1248 if (p
->rdev
->family
>= CHIP_CAYMAN
)
1249 last_reg
= ARRAY_SIZE(cayman_reg_safe_bm
);
1251 last_reg
= ARRAY_SIZE(evergreen_reg_safe_bm
);
1254 if (i
>= last_reg
) {
1255 dev_warn(p
->dev
, "forbidden register 0x%08x at %d\n", reg
, idx
);
1258 m
= 1 << ((reg
>> 2) & 31);
1259 if (p
->rdev
->family
>= CHIP_CAYMAN
) {
1260 if (!(cayman_reg_safe_bm
[i
] & m
))
1263 if (!(evergreen_reg_safe_bm
[i
] & m
))
1268 /* force following reg to 0 in an attempt to disable out buffer
1269 * which will need us to better understand how it works to perform
1270 * security check on it (Jerome)
1272 case SQ_ESGS_RING_SIZE
:
1273 case SQ_GSVS_RING_SIZE
:
1274 case SQ_ESTMP_RING_SIZE
:
1275 case SQ_GSTMP_RING_SIZE
:
1276 case SQ_HSTMP_RING_SIZE
:
1277 case SQ_LSTMP_RING_SIZE
:
1278 case SQ_PSTMP_RING_SIZE
:
1279 case SQ_VSTMP_RING_SIZE
:
1280 case SQ_ESGS_RING_ITEMSIZE
:
1281 case SQ_ESTMP_RING_ITEMSIZE
:
1282 case SQ_GSTMP_RING_ITEMSIZE
:
1283 case SQ_GSVS_RING_ITEMSIZE
:
1284 case SQ_GS_VERT_ITEMSIZE
:
1285 case SQ_GS_VERT_ITEMSIZE_1
:
1286 case SQ_GS_VERT_ITEMSIZE_2
:
1287 case SQ_GS_VERT_ITEMSIZE_3
:
1288 case SQ_GSVS_RING_OFFSET_1
:
1289 case SQ_GSVS_RING_OFFSET_2
:
1290 case SQ_GSVS_RING_OFFSET_3
:
1291 case SQ_HSTMP_RING_ITEMSIZE
:
1292 case SQ_LSTMP_RING_ITEMSIZE
:
1293 case SQ_PSTMP_RING_ITEMSIZE
:
1294 case SQ_VSTMP_RING_ITEMSIZE
:
1295 case VGT_TF_RING_SIZE
:
1296 /* get value to populate the IB don't remove */
1297 /*tmp =radeon_get_ib_value(p, idx);
1300 case SQ_ESGS_RING_BASE
:
1301 case SQ_GSVS_RING_BASE
:
1302 case SQ_ESTMP_RING_BASE
:
1303 case SQ_GSTMP_RING_BASE
:
1304 case SQ_HSTMP_RING_BASE
:
1305 case SQ_LSTMP_RING_BASE
:
1306 case SQ_PSTMP_RING_BASE
:
1307 case SQ_VSTMP_RING_BASE
:
1308 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1310 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1314 ib
[idx
] += (u32
)((reloc
->lobj
.gpu_offset
>> 8) & 0xffffffff);
1316 case DB_DEPTH_CONTROL
:
1317 track
->db_depth_control
= radeon_get_ib_value(p
, idx
);
1318 track
->db_dirty
= true;
1320 case CAYMAN_DB_EQAA
:
1321 if (p
->rdev
->family
< CHIP_CAYMAN
) {
1322 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1327 case CAYMAN_DB_DEPTH_INFO
:
1328 if (p
->rdev
->family
< CHIP_CAYMAN
) {
1329 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1335 track
->db_z_info
= radeon_get_ib_value(p
, idx
);
1336 if (!(p
->cs_flags
& RADEON_CS_KEEP_TILING_FLAGS
)) {
1337 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1339 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1343 ib
[idx
] &= ~Z_ARRAY_MODE(0xf);
1344 track
->db_z_info
&= ~Z_ARRAY_MODE(0xf);
1345 ib
[idx
] |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc
->lobj
.tiling_flags
));
1346 track
->db_z_info
|= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc
->lobj
.tiling_flags
));
1347 if (reloc
->lobj
.tiling_flags
& RADEON_TILING_MACRO
) {
1348 unsigned bankw
, bankh
, mtaspect
, tile_split
;
1350 evergreen_tiling_fields(reloc
->lobj
.tiling_flags
,
1351 &bankw
, &bankh
, &mtaspect
,
1353 ib
[idx
] |= DB_NUM_BANKS(evergreen_cs_get_num_banks(track
->nbanks
));
1354 ib
[idx
] |= DB_TILE_SPLIT(tile_split
) |
1355 DB_BANK_WIDTH(bankw
) |
1356 DB_BANK_HEIGHT(bankh
) |
1357 DB_MACRO_TILE_ASPECT(mtaspect
);
1360 track
->db_dirty
= true;
1362 case DB_STENCIL_INFO
:
1363 track
->db_s_info
= radeon_get_ib_value(p
, idx
);
1364 track
->db_dirty
= true;
1367 track
->db_depth_view
= radeon_get_ib_value(p
, idx
);
1368 track
->db_dirty
= true;
1371 track
->db_depth_size
= radeon_get_ib_value(p
, idx
);
1372 track
->db_dirty
= true;
1374 case R_02805C_DB_DEPTH_SLICE
:
1375 track
->db_depth_slice
= radeon_get_ib_value(p
, idx
);
1376 track
->db_dirty
= true;
1378 case DB_Z_READ_BASE
:
1379 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1381 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1385 track
->db_z_read_offset
= radeon_get_ib_value(p
, idx
);
1386 ib
[idx
] += (u32
)((reloc
->lobj
.gpu_offset
>> 8) & 0xffffffff);
1387 track
->db_z_read_bo
= reloc
->robj
;
1388 track
->db_dirty
= true;
1390 case DB_Z_WRITE_BASE
:
1391 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1393 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1397 track
->db_z_write_offset
= radeon_get_ib_value(p
, idx
);
1398 ib
[idx
] += (u32
)((reloc
->lobj
.gpu_offset
>> 8) & 0xffffffff);
1399 track
->db_z_write_bo
= reloc
->robj
;
1400 track
->db_dirty
= true;
1402 case DB_STENCIL_READ_BASE
:
1403 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1405 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1409 track
->db_s_read_offset
= radeon_get_ib_value(p
, idx
);
1410 ib
[idx
] += (u32
)((reloc
->lobj
.gpu_offset
>> 8) & 0xffffffff);
1411 track
->db_s_read_bo
= reloc
->robj
;
1412 track
->db_dirty
= true;
1414 case DB_STENCIL_WRITE_BASE
:
1415 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1417 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1421 track
->db_s_write_offset
= radeon_get_ib_value(p
, idx
);
1422 ib
[idx
] += (u32
)((reloc
->lobj
.gpu_offset
>> 8) & 0xffffffff);
1423 track
->db_s_write_bo
= reloc
->robj
;
1424 track
->db_dirty
= true;
1426 case VGT_STRMOUT_CONFIG
:
1427 track
->vgt_strmout_config
= radeon_get_ib_value(p
, idx
);
1428 track
->streamout_dirty
= true;
1430 case VGT_STRMOUT_BUFFER_CONFIG
:
1431 track
->vgt_strmout_buffer_config
= radeon_get_ib_value(p
, idx
);
1432 track
->streamout_dirty
= true;
1434 case VGT_STRMOUT_BUFFER_BASE_0
:
1435 case VGT_STRMOUT_BUFFER_BASE_1
:
1436 case VGT_STRMOUT_BUFFER_BASE_2
:
1437 case VGT_STRMOUT_BUFFER_BASE_3
:
1438 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1440 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1444 tmp
= (reg
- VGT_STRMOUT_BUFFER_BASE_0
) / 16;
1445 track
->vgt_strmout_bo_offset
[tmp
] = radeon_get_ib_value(p
, idx
) << 8;
1446 ib
[idx
] += (u32
)((reloc
->lobj
.gpu_offset
>> 8) & 0xffffffff);
1447 track
->vgt_strmout_bo
[tmp
] = reloc
->robj
;
1448 track
->streamout_dirty
= true;
1450 case VGT_STRMOUT_BUFFER_SIZE_0
:
1451 case VGT_STRMOUT_BUFFER_SIZE_1
:
1452 case VGT_STRMOUT_BUFFER_SIZE_2
:
1453 case VGT_STRMOUT_BUFFER_SIZE_3
:
1454 tmp
= (reg
- VGT_STRMOUT_BUFFER_SIZE_0
) / 16;
1455 /* size in register is DWs, convert to bytes */
1456 track
->vgt_strmout_size
[tmp
] = radeon_get_ib_value(p
, idx
) * 4;
1457 track
->streamout_dirty
= true;
1460 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1462 dev_warn(p
->dev
, "missing reloc for CP_COHER_BASE "
1466 ib
[idx
] += (u32
)((reloc
->lobj
.gpu_offset
>> 8) & 0xffffffff);
1467 case CB_TARGET_MASK
:
1468 track
->cb_target_mask
= radeon_get_ib_value(p
, idx
);
1469 track
->cb_dirty
= true;
1471 case CB_SHADER_MASK
:
1472 track
->cb_shader_mask
= radeon_get_ib_value(p
, idx
);
1473 track
->cb_dirty
= true;
1475 case PA_SC_AA_CONFIG
:
1476 if (p
->rdev
->family
>= CHIP_CAYMAN
) {
1477 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1481 tmp
= radeon_get_ib_value(p
, idx
) & MSAA_NUM_SAMPLES_MASK
;
1482 track
->nsamples
= 1 << tmp
;
1484 case CAYMAN_PA_SC_AA_CONFIG
:
1485 if (p
->rdev
->family
< CHIP_CAYMAN
) {
1486 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1490 tmp
= radeon_get_ib_value(p
, idx
) & CAYMAN_MSAA_NUM_SAMPLES_MASK
;
1491 track
->nsamples
= 1 << tmp
;
1493 case CB_COLOR0_VIEW
:
1494 case CB_COLOR1_VIEW
:
1495 case CB_COLOR2_VIEW
:
1496 case CB_COLOR3_VIEW
:
1497 case CB_COLOR4_VIEW
:
1498 case CB_COLOR5_VIEW
:
1499 case CB_COLOR6_VIEW
:
1500 case CB_COLOR7_VIEW
:
1501 tmp
= (reg
- CB_COLOR0_VIEW
) / 0x3c;
1502 track
->cb_color_view
[tmp
] = radeon_get_ib_value(p
, idx
);
1503 track
->cb_dirty
= true;
1505 case CB_COLOR8_VIEW
:
1506 case CB_COLOR9_VIEW
:
1507 case CB_COLOR10_VIEW
:
1508 case CB_COLOR11_VIEW
:
1509 tmp
= ((reg
- CB_COLOR8_VIEW
) / 0x1c) + 8;
1510 track
->cb_color_view
[tmp
] = radeon_get_ib_value(p
, idx
);
1511 track
->cb_dirty
= true;
1513 case CB_COLOR0_INFO
:
1514 case CB_COLOR1_INFO
:
1515 case CB_COLOR2_INFO
:
1516 case CB_COLOR3_INFO
:
1517 case CB_COLOR4_INFO
:
1518 case CB_COLOR5_INFO
:
1519 case CB_COLOR6_INFO
:
1520 case CB_COLOR7_INFO
:
1521 tmp
= (reg
- CB_COLOR0_INFO
) / 0x3c;
1522 track
->cb_color_info
[tmp
] = radeon_get_ib_value(p
, idx
);
1523 if (!(p
->cs_flags
& RADEON_CS_KEEP_TILING_FLAGS
)) {
1524 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1526 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1530 ib
[idx
] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc
->lobj
.tiling_flags
));
1531 track
->cb_color_info
[tmp
] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc
->lobj
.tiling_flags
));
1533 track
->cb_dirty
= true;
1535 case CB_COLOR8_INFO
:
1536 case CB_COLOR9_INFO
:
1537 case CB_COLOR10_INFO
:
1538 case CB_COLOR11_INFO
:
1539 tmp
= ((reg
- CB_COLOR8_INFO
) / 0x1c) + 8;
1540 track
->cb_color_info
[tmp
] = radeon_get_ib_value(p
, idx
);
1541 if (!(p
->cs_flags
& RADEON_CS_KEEP_TILING_FLAGS
)) {
1542 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1544 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1548 ib
[idx
] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc
->lobj
.tiling_flags
));
1549 track
->cb_color_info
[tmp
] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc
->lobj
.tiling_flags
));
1551 track
->cb_dirty
= true;
1553 case CB_COLOR0_PITCH
:
1554 case CB_COLOR1_PITCH
:
1555 case CB_COLOR2_PITCH
:
1556 case CB_COLOR3_PITCH
:
1557 case CB_COLOR4_PITCH
:
1558 case CB_COLOR5_PITCH
:
1559 case CB_COLOR6_PITCH
:
1560 case CB_COLOR7_PITCH
:
1561 tmp
= (reg
- CB_COLOR0_PITCH
) / 0x3c;
1562 track
->cb_color_pitch
[tmp
] = radeon_get_ib_value(p
, idx
);
1563 track
->cb_dirty
= true;
1565 case CB_COLOR8_PITCH
:
1566 case CB_COLOR9_PITCH
:
1567 case CB_COLOR10_PITCH
:
1568 case CB_COLOR11_PITCH
:
1569 tmp
= ((reg
- CB_COLOR8_PITCH
) / 0x1c) + 8;
1570 track
->cb_color_pitch
[tmp
] = radeon_get_ib_value(p
, idx
);
1571 track
->cb_dirty
= true;
1573 case CB_COLOR0_SLICE
:
1574 case CB_COLOR1_SLICE
:
1575 case CB_COLOR2_SLICE
:
1576 case CB_COLOR3_SLICE
:
1577 case CB_COLOR4_SLICE
:
1578 case CB_COLOR5_SLICE
:
1579 case CB_COLOR6_SLICE
:
1580 case CB_COLOR7_SLICE
:
1581 tmp
= (reg
- CB_COLOR0_SLICE
) / 0x3c;
1582 track
->cb_color_slice
[tmp
] = radeon_get_ib_value(p
, idx
);
1583 track
->cb_color_slice_idx
[tmp
] = idx
;
1584 track
->cb_dirty
= true;
1586 case CB_COLOR8_SLICE
:
1587 case CB_COLOR9_SLICE
:
1588 case CB_COLOR10_SLICE
:
1589 case CB_COLOR11_SLICE
:
1590 tmp
= ((reg
- CB_COLOR8_SLICE
) / 0x1c) + 8;
1591 track
->cb_color_slice
[tmp
] = radeon_get_ib_value(p
, idx
);
1592 track
->cb_color_slice_idx
[tmp
] = idx
;
1593 track
->cb_dirty
= true;
1595 case CB_COLOR0_ATTRIB
:
1596 case CB_COLOR1_ATTRIB
:
1597 case CB_COLOR2_ATTRIB
:
1598 case CB_COLOR3_ATTRIB
:
1599 case CB_COLOR4_ATTRIB
:
1600 case CB_COLOR5_ATTRIB
:
1601 case CB_COLOR6_ATTRIB
:
1602 case CB_COLOR7_ATTRIB
:
1603 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1605 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1609 if (!(p
->cs_flags
& RADEON_CS_KEEP_TILING_FLAGS
)) {
1610 if (reloc
->lobj
.tiling_flags
& RADEON_TILING_MACRO
) {
1611 unsigned bankw
, bankh
, mtaspect
, tile_split
;
1613 evergreen_tiling_fields(reloc
->lobj
.tiling_flags
,
1614 &bankw
, &bankh
, &mtaspect
,
1616 ib
[idx
] |= CB_NUM_BANKS(evergreen_cs_get_num_banks(track
->nbanks
));
1617 ib
[idx
] |= CB_TILE_SPLIT(tile_split
) |
1618 CB_BANK_WIDTH(bankw
) |
1619 CB_BANK_HEIGHT(bankh
) |
1620 CB_MACRO_TILE_ASPECT(mtaspect
);
1623 tmp
= ((reg
- CB_COLOR0_ATTRIB
) / 0x3c);
1624 track
->cb_color_attrib
[tmp
] = ib
[idx
];
1625 track
->cb_dirty
= true;
1627 case CB_COLOR8_ATTRIB
:
1628 case CB_COLOR9_ATTRIB
:
1629 case CB_COLOR10_ATTRIB
:
1630 case CB_COLOR11_ATTRIB
:
1631 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1633 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1637 if (!(p
->cs_flags
& RADEON_CS_KEEP_TILING_FLAGS
)) {
1638 if (reloc
->lobj
.tiling_flags
& RADEON_TILING_MACRO
) {
1639 unsigned bankw
, bankh
, mtaspect
, tile_split
;
1641 evergreen_tiling_fields(reloc
->lobj
.tiling_flags
,
1642 &bankw
, &bankh
, &mtaspect
,
1644 ib
[idx
] |= CB_NUM_BANKS(evergreen_cs_get_num_banks(track
->nbanks
));
1645 ib
[idx
] |= CB_TILE_SPLIT(tile_split
) |
1646 CB_BANK_WIDTH(bankw
) |
1647 CB_BANK_HEIGHT(bankh
) |
1648 CB_MACRO_TILE_ASPECT(mtaspect
);
1651 tmp
= ((reg
- CB_COLOR8_ATTRIB
) / 0x1c) + 8;
1652 track
->cb_color_attrib
[tmp
] = ib
[idx
];
1653 track
->cb_dirty
= true;
1655 case CB_COLOR0_FMASK
:
1656 case CB_COLOR1_FMASK
:
1657 case CB_COLOR2_FMASK
:
1658 case CB_COLOR3_FMASK
:
1659 case CB_COLOR4_FMASK
:
1660 case CB_COLOR5_FMASK
:
1661 case CB_COLOR6_FMASK
:
1662 case CB_COLOR7_FMASK
:
1663 tmp
= (reg
- CB_COLOR0_FMASK
) / 0x3c;
1664 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1666 dev_err(p
->dev
, "bad SET_CONTEXT_REG 0x%04X\n", reg
);
1669 ib
[idx
] += (u32
)((reloc
->lobj
.gpu_offset
>> 8) & 0xffffffff);
1670 track
->cb_color_fmask_bo
[tmp
] = reloc
->robj
;
1672 case CB_COLOR0_CMASK
:
1673 case CB_COLOR1_CMASK
:
1674 case CB_COLOR2_CMASK
:
1675 case CB_COLOR3_CMASK
:
1676 case CB_COLOR4_CMASK
:
1677 case CB_COLOR5_CMASK
:
1678 case CB_COLOR6_CMASK
:
1679 case CB_COLOR7_CMASK
:
1680 tmp
= (reg
- CB_COLOR0_CMASK
) / 0x3c;
1681 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1683 dev_err(p
->dev
, "bad SET_CONTEXT_REG 0x%04X\n", reg
);
1686 ib
[idx
] += (u32
)((reloc
->lobj
.gpu_offset
>> 8) & 0xffffffff);
1687 track
->cb_color_cmask_bo
[tmp
] = reloc
->robj
;
1689 case CB_COLOR0_FMASK_SLICE
:
1690 case CB_COLOR1_FMASK_SLICE
:
1691 case CB_COLOR2_FMASK_SLICE
:
1692 case CB_COLOR3_FMASK_SLICE
:
1693 case CB_COLOR4_FMASK_SLICE
:
1694 case CB_COLOR5_FMASK_SLICE
:
1695 case CB_COLOR6_FMASK_SLICE
:
1696 case CB_COLOR7_FMASK_SLICE
:
1697 tmp
= (reg
- CB_COLOR0_FMASK_SLICE
) / 0x3c;
1698 track
->cb_color_fmask_slice
[tmp
] = radeon_get_ib_value(p
, idx
);
1700 case CB_COLOR0_CMASK_SLICE
:
1701 case CB_COLOR1_CMASK_SLICE
:
1702 case CB_COLOR2_CMASK_SLICE
:
1703 case CB_COLOR3_CMASK_SLICE
:
1704 case CB_COLOR4_CMASK_SLICE
:
1705 case CB_COLOR5_CMASK_SLICE
:
1706 case CB_COLOR6_CMASK_SLICE
:
1707 case CB_COLOR7_CMASK_SLICE
:
1708 tmp
= (reg
- CB_COLOR0_CMASK_SLICE
) / 0x3c;
1709 track
->cb_color_cmask_slice
[tmp
] = radeon_get_ib_value(p
, idx
);
1711 case CB_COLOR0_BASE
:
1712 case CB_COLOR1_BASE
:
1713 case CB_COLOR2_BASE
:
1714 case CB_COLOR3_BASE
:
1715 case CB_COLOR4_BASE
:
1716 case CB_COLOR5_BASE
:
1717 case CB_COLOR6_BASE
:
1718 case CB_COLOR7_BASE
:
1719 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1721 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1725 tmp
= (reg
- CB_COLOR0_BASE
) / 0x3c;
1726 track
->cb_color_bo_offset
[tmp
] = radeon_get_ib_value(p
, idx
);
1727 ib
[idx
] += (u32
)((reloc
->lobj
.gpu_offset
>> 8) & 0xffffffff);
1728 track
->cb_color_bo
[tmp
] = reloc
->robj
;
1729 track
->cb_dirty
= true;
1731 case CB_COLOR8_BASE
:
1732 case CB_COLOR9_BASE
:
1733 case CB_COLOR10_BASE
:
1734 case CB_COLOR11_BASE
:
1735 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1737 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1741 tmp
= ((reg
- CB_COLOR8_BASE
) / 0x1c) + 8;
1742 track
->cb_color_bo_offset
[tmp
] = radeon_get_ib_value(p
, idx
);
1743 ib
[idx
] += (u32
)((reloc
->lobj
.gpu_offset
>> 8) & 0xffffffff);
1744 track
->cb_color_bo
[tmp
] = reloc
->robj
;
1745 track
->cb_dirty
= true;
1747 case DB_HTILE_DATA_BASE
:
1748 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1750 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1754 track
->htile_offset
= radeon_get_ib_value(p
, idx
);
1755 ib
[idx
] += (u32
)((reloc
->lobj
.gpu_offset
>> 8) & 0xffffffff);
1756 track
->htile_bo
= reloc
->robj
;
1757 track
->db_dirty
= true;
1759 case DB_HTILE_SURFACE
:
1761 track
->htile_surface
= radeon_get_ib_value(p
, idx
);
1762 track
->db_dirty
= true;
1764 case CB_IMMED0_BASE
:
1765 case CB_IMMED1_BASE
:
1766 case CB_IMMED2_BASE
:
1767 case CB_IMMED3_BASE
:
1768 case CB_IMMED4_BASE
:
1769 case CB_IMMED5_BASE
:
1770 case CB_IMMED6_BASE
:
1771 case CB_IMMED7_BASE
:
1772 case CB_IMMED8_BASE
:
1773 case CB_IMMED9_BASE
:
1774 case CB_IMMED10_BASE
:
1775 case CB_IMMED11_BASE
:
1776 case SQ_PGM_START_FS
:
1777 case SQ_PGM_START_ES
:
1778 case SQ_PGM_START_VS
:
1779 case SQ_PGM_START_GS
:
1780 case SQ_PGM_START_PS
:
1781 case SQ_PGM_START_HS
:
1782 case SQ_PGM_START_LS
:
1783 case SQ_CONST_MEM_BASE
:
1784 case SQ_ALU_CONST_CACHE_GS_0
:
1785 case SQ_ALU_CONST_CACHE_GS_1
:
1786 case SQ_ALU_CONST_CACHE_GS_2
:
1787 case SQ_ALU_CONST_CACHE_GS_3
:
1788 case SQ_ALU_CONST_CACHE_GS_4
:
1789 case SQ_ALU_CONST_CACHE_GS_5
:
1790 case SQ_ALU_CONST_CACHE_GS_6
:
1791 case SQ_ALU_CONST_CACHE_GS_7
:
1792 case SQ_ALU_CONST_CACHE_GS_8
:
1793 case SQ_ALU_CONST_CACHE_GS_9
:
1794 case SQ_ALU_CONST_CACHE_GS_10
:
1795 case SQ_ALU_CONST_CACHE_GS_11
:
1796 case SQ_ALU_CONST_CACHE_GS_12
:
1797 case SQ_ALU_CONST_CACHE_GS_13
:
1798 case SQ_ALU_CONST_CACHE_GS_14
:
1799 case SQ_ALU_CONST_CACHE_GS_15
:
1800 case SQ_ALU_CONST_CACHE_PS_0
:
1801 case SQ_ALU_CONST_CACHE_PS_1
:
1802 case SQ_ALU_CONST_CACHE_PS_2
:
1803 case SQ_ALU_CONST_CACHE_PS_3
:
1804 case SQ_ALU_CONST_CACHE_PS_4
:
1805 case SQ_ALU_CONST_CACHE_PS_5
:
1806 case SQ_ALU_CONST_CACHE_PS_6
:
1807 case SQ_ALU_CONST_CACHE_PS_7
:
1808 case SQ_ALU_CONST_CACHE_PS_8
:
1809 case SQ_ALU_CONST_CACHE_PS_9
:
1810 case SQ_ALU_CONST_CACHE_PS_10
:
1811 case SQ_ALU_CONST_CACHE_PS_11
:
1812 case SQ_ALU_CONST_CACHE_PS_12
:
1813 case SQ_ALU_CONST_CACHE_PS_13
:
1814 case SQ_ALU_CONST_CACHE_PS_14
:
1815 case SQ_ALU_CONST_CACHE_PS_15
:
1816 case SQ_ALU_CONST_CACHE_VS_0
:
1817 case SQ_ALU_CONST_CACHE_VS_1
:
1818 case SQ_ALU_CONST_CACHE_VS_2
:
1819 case SQ_ALU_CONST_CACHE_VS_3
:
1820 case SQ_ALU_CONST_CACHE_VS_4
:
1821 case SQ_ALU_CONST_CACHE_VS_5
:
1822 case SQ_ALU_CONST_CACHE_VS_6
:
1823 case SQ_ALU_CONST_CACHE_VS_7
:
1824 case SQ_ALU_CONST_CACHE_VS_8
:
1825 case SQ_ALU_CONST_CACHE_VS_9
:
1826 case SQ_ALU_CONST_CACHE_VS_10
:
1827 case SQ_ALU_CONST_CACHE_VS_11
:
1828 case SQ_ALU_CONST_CACHE_VS_12
:
1829 case SQ_ALU_CONST_CACHE_VS_13
:
1830 case SQ_ALU_CONST_CACHE_VS_14
:
1831 case SQ_ALU_CONST_CACHE_VS_15
:
1832 case SQ_ALU_CONST_CACHE_HS_0
:
1833 case SQ_ALU_CONST_CACHE_HS_1
:
1834 case SQ_ALU_CONST_CACHE_HS_2
:
1835 case SQ_ALU_CONST_CACHE_HS_3
:
1836 case SQ_ALU_CONST_CACHE_HS_4
:
1837 case SQ_ALU_CONST_CACHE_HS_5
:
1838 case SQ_ALU_CONST_CACHE_HS_6
:
1839 case SQ_ALU_CONST_CACHE_HS_7
:
1840 case SQ_ALU_CONST_CACHE_HS_8
:
1841 case SQ_ALU_CONST_CACHE_HS_9
:
1842 case SQ_ALU_CONST_CACHE_HS_10
:
1843 case SQ_ALU_CONST_CACHE_HS_11
:
1844 case SQ_ALU_CONST_CACHE_HS_12
:
1845 case SQ_ALU_CONST_CACHE_HS_13
:
1846 case SQ_ALU_CONST_CACHE_HS_14
:
1847 case SQ_ALU_CONST_CACHE_HS_15
:
1848 case SQ_ALU_CONST_CACHE_LS_0
:
1849 case SQ_ALU_CONST_CACHE_LS_1
:
1850 case SQ_ALU_CONST_CACHE_LS_2
:
1851 case SQ_ALU_CONST_CACHE_LS_3
:
1852 case SQ_ALU_CONST_CACHE_LS_4
:
1853 case SQ_ALU_CONST_CACHE_LS_5
:
1854 case SQ_ALU_CONST_CACHE_LS_6
:
1855 case SQ_ALU_CONST_CACHE_LS_7
:
1856 case SQ_ALU_CONST_CACHE_LS_8
:
1857 case SQ_ALU_CONST_CACHE_LS_9
:
1858 case SQ_ALU_CONST_CACHE_LS_10
:
1859 case SQ_ALU_CONST_CACHE_LS_11
:
1860 case SQ_ALU_CONST_CACHE_LS_12
:
1861 case SQ_ALU_CONST_CACHE_LS_13
:
1862 case SQ_ALU_CONST_CACHE_LS_14
:
1863 case SQ_ALU_CONST_CACHE_LS_15
:
1864 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1866 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1870 ib
[idx
] += (u32
)((reloc
->lobj
.gpu_offset
>> 8) & 0xffffffff);
1872 case SX_MEMORY_EXPORT_BASE
:
1873 if (p
->rdev
->family
>= CHIP_CAYMAN
) {
1874 dev_warn(p
->dev
, "bad SET_CONFIG_REG "
1878 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1880 dev_warn(p
->dev
, "bad SET_CONFIG_REG "
1884 ib
[idx
] += (u32
)((reloc
->lobj
.gpu_offset
>> 8) & 0xffffffff);
1886 case CAYMAN_SX_SCATTER_EXPORT_BASE
:
1887 if (p
->rdev
->family
< CHIP_CAYMAN
) {
1888 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1892 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1894 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1898 ib
[idx
] += (u32
)((reloc
->lobj
.gpu_offset
>> 8) & 0xffffffff);
1901 track
->sx_misc_kill_all_prims
= (radeon_get_ib_value(p
, idx
) & 0x1) != 0;
1904 dev_warn(p
->dev
, "forbidden register 0x%08x at %d\n", reg
, idx
);
1910 static bool evergreen_is_safe_reg(struct radeon_cs_parser
*p
, u32 reg
, u32 idx
)
1914 if (p
->rdev
->family
>= CHIP_CAYMAN
)
1915 last_reg
= ARRAY_SIZE(cayman_reg_safe_bm
);
1917 last_reg
= ARRAY_SIZE(evergreen_reg_safe_bm
);
1920 if (i
>= last_reg
) {
1921 dev_warn(p
->dev
, "forbidden register 0x%08x at %d\n", reg
, idx
);
1924 m
= 1 << ((reg
>> 2) & 31);
1925 if (p
->rdev
->family
>= CHIP_CAYMAN
) {
1926 if (!(cayman_reg_safe_bm
[i
] & m
))
1929 if (!(evergreen_reg_safe_bm
[i
] & m
))
1932 dev_warn(p
->dev
, "forbidden register 0x%08x at %d\n", reg
, idx
);
1936 static int evergreen_packet3_check(struct radeon_cs_parser
*p
,
1937 struct radeon_cs_packet
*pkt
)
1939 struct radeon_cs_reloc
*reloc
;
1940 struct evergreen_cs_track
*track
;
1944 unsigned start_reg
, end_reg
, reg
;
1948 track
= (struct evergreen_cs_track
*)p
->track
;
1951 idx_value
= radeon_get_ib_value(p
, idx
);
1953 switch (pkt
->opcode
) {
1954 case PACKET3_SET_PREDICATION
:
1960 if (pkt
->count
!= 1) {
1961 DRM_ERROR("bad SET PREDICATION\n");
1965 tmp
= radeon_get_ib_value(p
, idx
+ 1);
1966 pred_op
= (tmp
>> 16) & 0x7;
1968 /* for the clear predicate operation */
1973 DRM_ERROR("bad SET PREDICATION operation %d\n", pred_op
);
1977 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1979 DRM_ERROR("bad SET PREDICATION\n");
1983 offset
= reloc
->lobj
.gpu_offset
+
1984 (idx_value
& 0xfffffff0) +
1985 ((u64
)(tmp
& 0xff) << 32);
1987 ib
[idx
+ 0] = offset
;
1988 ib
[idx
+ 1] = (tmp
& 0xffffff00) | (upper_32_bits(offset
) & 0xff);
1991 case PACKET3_CONTEXT_CONTROL
:
1992 if (pkt
->count
!= 1) {
1993 DRM_ERROR("bad CONTEXT_CONTROL\n");
1997 case PACKET3_INDEX_TYPE
:
1998 case PACKET3_NUM_INSTANCES
:
1999 case PACKET3_CLEAR_STATE
:
2001 DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES/CLEAR_STATE\n");
2005 case CAYMAN_PACKET3_DEALLOC_STATE
:
2006 if (p
->rdev
->family
< CHIP_CAYMAN
) {
2007 DRM_ERROR("bad PACKET3_DEALLOC_STATE\n");
2011 DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES/CLEAR_STATE\n");
2015 case PACKET3_INDEX_BASE
:
2019 if (pkt
->count
!= 1) {
2020 DRM_ERROR("bad INDEX_BASE\n");
2023 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
2025 DRM_ERROR("bad INDEX_BASE\n");
2029 offset
= reloc
->lobj
.gpu_offset
+
2031 ((u64
)(radeon_get_ib_value(p
, idx
+1) & 0xff) << 32);
2034 ib
[idx
+1] = upper_32_bits(offset
) & 0xff;
2036 r
= evergreen_cs_track_check(p
);
2038 dev_warn(p
->dev
, "%s:%d invalid cmd stream\n", __func__
, __LINE__
);
2043 case PACKET3_DRAW_INDEX
:
2046 if (pkt
->count
!= 3) {
2047 DRM_ERROR("bad DRAW_INDEX\n");
2050 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
2052 DRM_ERROR("bad DRAW_INDEX\n");
2056 offset
= reloc
->lobj
.gpu_offset
+
2058 ((u64
)(radeon_get_ib_value(p
, idx
+1) & 0xff) << 32);
2061 ib
[idx
+1] = upper_32_bits(offset
) & 0xff;
2063 r
= evergreen_cs_track_check(p
);
2065 dev_warn(p
->dev
, "%s:%d invalid cmd stream\n", __func__
, __LINE__
);
2070 case PACKET3_DRAW_INDEX_2
:
2074 if (pkt
->count
!= 4) {
2075 DRM_ERROR("bad DRAW_INDEX_2\n");
2078 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
2080 DRM_ERROR("bad DRAW_INDEX_2\n");
2084 offset
= reloc
->lobj
.gpu_offset
+
2085 radeon_get_ib_value(p
, idx
+1) +
2086 ((u64
)(radeon_get_ib_value(p
, idx
+2) & 0xff) << 32);
2089 ib
[idx
+2] = upper_32_bits(offset
) & 0xff;
2091 r
= evergreen_cs_track_check(p
);
2093 dev_warn(p
->dev
, "%s:%d invalid cmd stream\n", __func__
, __LINE__
);
2098 case PACKET3_DRAW_INDEX_AUTO
:
2099 if (pkt
->count
!= 1) {
2100 DRM_ERROR("bad DRAW_INDEX_AUTO\n");
2103 r
= evergreen_cs_track_check(p
);
2105 dev_warn(p
->dev
, "%s:%d invalid cmd stream %d\n", __func__
, __LINE__
, idx
);
2109 case PACKET3_DRAW_INDEX_MULTI_AUTO
:
2110 if (pkt
->count
!= 2) {
2111 DRM_ERROR("bad DRAW_INDEX_MULTI_AUTO\n");
2114 r
= evergreen_cs_track_check(p
);
2116 dev_warn(p
->dev
, "%s:%d invalid cmd stream %d\n", __func__
, __LINE__
, idx
);
2120 case PACKET3_DRAW_INDEX_IMMD
:
2121 if (pkt
->count
< 2) {
2122 DRM_ERROR("bad DRAW_INDEX_IMMD\n");
2125 r
= evergreen_cs_track_check(p
);
2127 dev_warn(p
->dev
, "%s:%d invalid cmd stream\n", __func__
, __LINE__
);
2131 case PACKET3_DRAW_INDEX_OFFSET
:
2132 if (pkt
->count
!= 2) {
2133 DRM_ERROR("bad DRAW_INDEX_OFFSET\n");
2136 r
= evergreen_cs_track_check(p
);
2138 dev_warn(p
->dev
, "%s:%d invalid cmd stream\n", __func__
, __LINE__
);
2142 case PACKET3_DRAW_INDEX_OFFSET_2
:
2143 if (pkt
->count
!= 3) {
2144 DRM_ERROR("bad DRAW_INDEX_OFFSET_2\n");
2147 r
= evergreen_cs_track_check(p
);
2149 dev_warn(p
->dev
, "%s:%d invalid cmd stream\n", __func__
, __LINE__
);
2153 case PACKET3_DISPATCH_DIRECT
:
2154 if (pkt
->count
!= 3) {
2155 DRM_ERROR("bad DISPATCH_DIRECT\n");
2158 r
= evergreen_cs_track_check(p
);
2160 dev_warn(p
->dev
, "%s:%d invalid cmd stream %d\n", __func__
, __LINE__
, idx
);
2164 case PACKET3_DISPATCH_INDIRECT
:
2165 if (pkt
->count
!= 1) {
2166 DRM_ERROR("bad DISPATCH_INDIRECT\n");
2169 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
2171 DRM_ERROR("bad DISPATCH_INDIRECT\n");
2174 ib
[idx
+0] = idx_value
+ (u32
)(reloc
->lobj
.gpu_offset
& 0xffffffff);
2175 r
= evergreen_cs_track_check(p
);
2177 dev_warn(p
->dev
, "%s:%d invalid cmd stream\n", __func__
, __LINE__
);
2181 case PACKET3_WAIT_REG_MEM
:
2182 if (pkt
->count
!= 5) {
2183 DRM_ERROR("bad WAIT_REG_MEM\n");
2186 /* bit 4 is reg (0) or mem (1) */
2187 if (idx_value
& 0x10) {
2190 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
2192 DRM_ERROR("bad WAIT_REG_MEM\n");
2196 offset
= reloc
->lobj
.gpu_offset
+
2197 (radeon_get_ib_value(p
, idx
+1) & 0xfffffffc) +
2198 ((u64
)(radeon_get_ib_value(p
, idx
+2) & 0xff) << 32);
2200 ib
[idx
+1] = (ib
[idx
+1] & 0x3) | (offset
& 0xfffffffc);
2201 ib
[idx
+2] = upper_32_bits(offset
) & 0xff;
2204 case PACKET3_SURFACE_SYNC
:
2205 if (pkt
->count
!= 3) {
2206 DRM_ERROR("bad SURFACE_SYNC\n");
2209 /* 0xffffffff/0x0 is flush all cache flag */
2210 if (radeon_get_ib_value(p
, idx
+ 1) != 0xffffffff ||
2211 radeon_get_ib_value(p
, idx
+ 2) != 0) {
2212 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
2214 DRM_ERROR("bad SURFACE_SYNC\n");
2217 ib
[idx
+2] += (u32
)((reloc
->lobj
.gpu_offset
>> 8) & 0xffffffff);
2220 case PACKET3_EVENT_WRITE
:
2221 if (pkt
->count
!= 2 && pkt
->count
!= 0) {
2222 DRM_ERROR("bad EVENT_WRITE\n");
2228 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
2230 DRM_ERROR("bad EVENT_WRITE\n");
2233 offset
= reloc
->lobj
.gpu_offset
+
2234 (radeon_get_ib_value(p
, idx
+1) & 0xfffffff8) +
2235 ((u64
)(radeon_get_ib_value(p
, idx
+2) & 0xff) << 32);
2237 ib
[idx
+1] = offset
& 0xfffffff8;
2238 ib
[idx
+2] = upper_32_bits(offset
) & 0xff;
2241 case PACKET3_EVENT_WRITE_EOP
:
2245 if (pkt
->count
!= 4) {
2246 DRM_ERROR("bad EVENT_WRITE_EOP\n");
2249 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
2251 DRM_ERROR("bad EVENT_WRITE_EOP\n");
2255 offset
= reloc
->lobj
.gpu_offset
+
2256 (radeon_get_ib_value(p
, idx
+1) & 0xfffffffc) +
2257 ((u64
)(radeon_get_ib_value(p
, idx
+2) & 0xff) << 32);
2259 ib
[idx
+1] = offset
& 0xfffffffc;
2260 ib
[idx
+2] = (ib
[idx
+2] & 0xffffff00) | (upper_32_bits(offset
) & 0xff);
2263 case PACKET3_EVENT_WRITE_EOS
:
2267 if (pkt
->count
!= 3) {
2268 DRM_ERROR("bad EVENT_WRITE_EOS\n");
2271 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
2273 DRM_ERROR("bad EVENT_WRITE_EOS\n");
2277 offset
= reloc
->lobj
.gpu_offset
+
2278 (radeon_get_ib_value(p
, idx
+1) & 0xfffffffc) +
2279 ((u64
)(radeon_get_ib_value(p
, idx
+2) & 0xff) << 32);
2281 ib
[idx
+1] = offset
& 0xfffffffc;
2282 ib
[idx
+2] = (ib
[idx
+2] & 0xffffff00) | (upper_32_bits(offset
) & 0xff);
2285 case PACKET3_SET_CONFIG_REG
:
2286 start_reg
= (idx_value
<< 2) + PACKET3_SET_CONFIG_REG_START
;
2287 end_reg
= 4 * pkt
->count
+ start_reg
- 4;
2288 if ((start_reg
< PACKET3_SET_CONFIG_REG_START
) ||
2289 (start_reg
>= PACKET3_SET_CONFIG_REG_END
) ||
2290 (end_reg
>= PACKET3_SET_CONFIG_REG_END
)) {
2291 DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
2294 for (i
= 0; i
< pkt
->count
; i
++) {
2295 reg
= start_reg
+ (4 * i
);
2296 r
= evergreen_cs_check_reg(p
, reg
, idx
+1+i
);
2301 case PACKET3_SET_CONTEXT_REG
:
2302 start_reg
= (idx_value
<< 2) + PACKET3_SET_CONTEXT_REG_START
;
2303 end_reg
= 4 * pkt
->count
+ start_reg
- 4;
2304 if ((start_reg
< PACKET3_SET_CONTEXT_REG_START
) ||
2305 (start_reg
>= PACKET3_SET_CONTEXT_REG_END
) ||
2306 (end_reg
>= PACKET3_SET_CONTEXT_REG_END
)) {
2307 DRM_ERROR("bad PACKET3_SET_CONTEXT_REG\n");
2310 for (i
= 0; i
< pkt
->count
; i
++) {
2311 reg
= start_reg
+ (4 * i
);
2312 r
= evergreen_cs_check_reg(p
, reg
, idx
+1+i
);
2317 case PACKET3_SET_RESOURCE
:
2318 if (pkt
->count
% 8) {
2319 DRM_ERROR("bad SET_RESOURCE\n");
2322 start_reg
= (idx_value
<< 2) + PACKET3_SET_RESOURCE_START
;
2323 end_reg
= 4 * pkt
->count
+ start_reg
- 4;
2324 if ((start_reg
< PACKET3_SET_RESOURCE_START
) ||
2325 (start_reg
>= PACKET3_SET_RESOURCE_END
) ||
2326 (end_reg
>= PACKET3_SET_RESOURCE_END
)) {
2327 DRM_ERROR("bad SET_RESOURCE\n");
2330 for (i
= 0; i
< (pkt
->count
/ 8); i
++) {
2331 struct radeon_bo
*texture
, *mipmap
;
2332 u32 toffset
, moffset
;
2335 switch (G__SQ_CONSTANT_TYPE(radeon_get_ib_value(p
, idx
+1+(i
*8)+7))) {
2336 case SQ_TEX_VTX_VALID_TEXTURE
:
2338 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
2340 DRM_ERROR("bad SET_RESOURCE (tex)\n");
2343 if (!(p
->cs_flags
& RADEON_CS_KEEP_TILING_FLAGS
)) {
2344 ib
[idx
+1+(i
*8)+1] |=
2345 TEX_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc
->lobj
.tiling_flags
));
2346 if (reloc
->lobj
.tiling_flags
& RADEON_TILING_MACRO
) {
2347 unsigned bankw
, bankh
, mtaspect
, tile_split
;
2349 evergreen_tiling_fields(reloc
->lobj
.tiling_flags
,
2350 &bankw
, &bankh
, &mtaspect
,
2352 ib
[idx
+1+(i
*8)+6] |= TEX_TILE_SPLIT(tile_split
);
2353 ib
[idx
+1+(i
*8)+7] |=
2354 TEX_BANK_WIDTH(bankw
) |
2355 TEX_BANK_HEIGHT(bankh
) |
2356 MACRO_TILE_ASPECT(mtaspect
) |
2357 TEX_NUM_BANKS(evergreen_cs_get_num_banks(track
->nbanks
));
2360 texture
= reloc
->robj
;
2361 toffset
= (u32
)((reloc
->lobj
.gpu_offset
>> 8) & 0xffffffff);
2363 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
2365 DRM_ERROR("bad SET_RESOURCE (tex)\n");
2368 moffset
= (u32
)((reloc
->lobj
.gpu_offset
>> 8) & 0xffffffff);
2369 mipmap
= reloc
->robj
;
2370 r
= evergreen_cs_track_validate_texture(p
, texture
, mipmap
, idx
+1+(i
*8));
2373 ib
[idx
+1+(i
*8)+2] += toffset
;
2374 ib
[idx
+1+(i
*8)+3] += moffset
;
2376 case SQ_TEX_VTX_VALID_BUFFER
:
2380 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
2382 DRM_ERROR("bad SET_RESOURCE (vtx)\n");
2385 offset
= radeon_get_ib_value(p
, idx
+1+(i
*8)+0);
2386 size
= radeon_get_ib_value(p
, idx
+1+(i
*8)+1);
2387 if (p
->rdev
&& (size
+ offset
) > radeon_bo_size(reloc
->robj
)) {
2388 /* force size to size of the buffer */
2389 dev_warn(p
->dev
, "vbo resource seems too big for the bo\n");
2390 ib
[idx
+1+(i
*8)+1] = radeon_bo_size(reloc
->robj
) - offset
;
2393 offset64
= reloc
->lobj
.gpu_offset
+ offset
;
2394 ib
[idx
+1+(i
*8)+0] = offset64
;
2395 ib
[idx
+1+(i
*8)+2] = (ib
[idx
+1+(i
*8)+2] & 0xffffff00) |
2396 (upper_32_bits(offset64
) & 0xff);
2399 case SQ_TEX_VTX_INVALID_TEXTURE
:
2400 case SQ_TEX_VTX_INVALID_BUFFER
:
2402 DRM_ERROR("bad SET_RESOURCE\n");
2407 case PACKET3_SET_ALU_CONST
:
2408 /* XXX fix me ALU const buffers only */
2410 case PACKET3_SET_BOOL_CONST
:
2411 start_reg
= (idx_value
<< 2) + PACKET3_SET_BOOL_CONST_START
;
2412 end_reg
= 4 * pkt
->count
+ start_reg
- 4;
2413 if ((start_reg
< PACKET3_SET_BOOL_CONST_START
) ||
2414 (start_reg
>= PACKET3_SET_BOOL_CONST_END
) ||
2415 (end_reg
>= PACKET3_SET_BOOL_CONST_END
)) {
2416 DRM_ERROR("bad SET_BOOL_CONST\n");
2420 case PACKET3_SET_LOOP_CONST
:
2421 start_reg
= (idx_value
<< 2) + PACKET3_SET_LOOP_CONST_START
;
2422 end_reg
= 4 * pkt
->count
+ start_reg
- 4;
2423 if ((start_reg
< PACKET3_SET_LOOP_CONST_START
) ||
2424 (start_reg
>= PACKET3_SET_LOOP_CONST_END
) ||
2425 (end_reg
>= PACKET3_SET_LOOP_CONST_END
)) {
2426 DRM_ERROR("bad SET_LOOP_CONST\n");
2430 case PACKET3_SET_CTL_CONST
:
2431 start_reg
= (idx_value
<< 2) + PACKET3_SET_CTL_CONST_START
;
2432 end_reg
= 4 * pkt
->count
+ start_reg
- 4;
2433 if ((start_reg
< PACKET3_SET_CTL_CONST_START
) ||
2434 (start_reg
>= PACKET3_SET_CTL_CONST_END
) ||
2435 (end_reg
>= PACKET3_SET_CTL_CONST_END
)) {
2436 DRM_ERROR("bad SET_CTL_CONST\n");
2440 case PACKET3_SET_SAMPLER
:
2441 if (pkt
->count
% 3) {
2442 DRM_ERROR("bad SET_SAMPLER\n");
2445 start_reg
= (idx_value
<< 2) + PACKET3_SET_SAMPLER_START
;
2446 end_reg
= 4 * pkt
->count
+ start_reg
- 4;
2447 if ((start_reg
< PACKET3_SET_SAMPLER_START
) ||
2448 (start_reg
>= PACKET3_SET_SAMPLER_END
) ||
2449 (end_reg
>= PACKET3_SET_SAMPLER_END
)) {
2450 DRM_ERROR("bad SET_SAMPLER\n");
2454 case PACKET3_STRMOUT_BUFFER_UPDATE
:
2455 if (pkt
->count
!= 4) {
2456 DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (invalid count)\n");
2459 /* Updating memory at DST_ADDRESS. */
2460 if (idx_value
& 0x1) {
2462 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
2464 DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing dst reloc)\n");
2467 offset
= radeon_get_ib_value(p
, idx
+1);
2468 offset
+= ((u64
)(radeon_get_ib_value(p
, idx
+2) & 0xff)) << 32;
2469 if ((offset
+ 4) > radeon_bo_size(reloc
->robj
)) {
2470 DRM_ERROR("bad STRMOUT_BUFFER_UPDATE dst bo too small: 0x%llx, 0x%lx\n",
2471 offset
+ 4, radeon_bo_size(reloc
->robj
));
2474 offset
+= reloc
->lobj
.gpu_offset
;
2476 ib
[idx
+2] = upper_32_bits(offset
) & 0xff;
2478 /* Reading data from SRC_ADDRESS. */
2479 if (((idx_value
>> 1) & 0x3) == 2) {
2481 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
2483 DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing src reloc)\n");
2486 offset
= radeon_get_ib_value(p
, idx
+3);
2487 offset
+= ((u64
)(radeon_get_ib_value(p
, idx
+4) & 0xff)) << 32;
2488 if ((offset
+ 4) > radeon_bo_size(reloc
->robj
)) {
2489 DRM_ERROR("bad STRMOUT_BUFFER_UPDATE src bo too small: 0x%llx, 0x%lx\n",
2490 offset
+ 4, radeon_bo_size(reloc
->robj
));
2493 offset
+= reloc
->lobj
.gpu_offset
;
2495 ib
[idx
+4] = upper_32_bits(offset
) & 0xff;
2498 case PACKET3_COPY_DW
:
2499 if (pkt
->count
!= 4) {
2500 DRM_ERROR("bad COPY_DW (invalid count)\n");
2503 if (idx_value
& 0x1) {
2505 /* SRC is memory. */
2506 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
2508 DRM_ERROR("bad COPY_DW (missing src reloc)\n");
2511 offset
= radeon_get_ib_value(p
, idx
+1);
2512 offset
+= ((u64
)(radeon_get_ib_value(p
, idx
+2) & 0xff)) << 32;
2513 if ((offset
+ 4) > radeon_bo_size(reloc
->robj
)) {
2514 DRM_ERROR("bad COPY_DW src bo too small: 0x%llx, 0x%lx\n",
2515 offset
+ 4, radeon_bo_size(reloc
->robj
));
2518 offset
+= reloc
->lobj
.gpu_offset
;
2520 ib
[idx
+2] = upper_32_bits(offset
) & 0xff;
2523 reg
= radeon_get_ib_value(p
, idx
+1) << 2;
2524 if (!evergreen_is_safe_reg(p
, reg
, idx
+1))
2527 if (idx_value
& 0x2) {
2529 /* DST is memory. */
2530 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
2532 DRM_ERROR("bad COPY_DW (missing dst reloc)\n");
2535 offset
= radeon_get_ib_value(p
, idx
+3);
2536 offset
+= ((u64
)(radeon_get_ib_value(p
, idx
+4) & 0xff)) << 32;
2537 if ((offset
+ 4) > radeon_bo_size(reloc
->robj
)) {
2538 DRM_ERROR("bad COPY_DW dst bo too small: 0x%llx, 0x%lx\n",
2539 offset
+ 4, radeon_bo_size(reloc
->robj
));
2542 offset
+= reloc
->lobj
.gpu_offset
;
2544 ib
[idx
+4] = upper_32_bits(offset
) & 0xff;
2547 reg
= radeon_get_ib_value(p
, idx
+3) << 2;
2548 if (!evergreen_is_safe_reg(p
, reg
, idx
+3))
2555 DRM_ERROR("Packet3 opcode %x not supported\n", pkt
->opcode
);
2561 int evergreen_cs_parse(struct radeon_cs_parser
*p
)
2563 struct radeon_cs_packet pkt
;
2564 struct evergreen_cs_track
*track
;
2568 if (p
->track
== NULL
) {
2569 /* initialize tracker, we are in kms */
2570 track
= kzalloc(sizeof(*track
), GFP_KERNEL
);
2573 evergreen_cs_track_init(track
);
2574 if (p
->rdev
->family
>= CHIP_CAYMAN
)
2575 tmp
= p
->rdev
->config
.cayman
.tile_config
;
2577 tmp
= p
->rdev
->config
.evergreen
.tile_config
;
2579 switch (tmp
& 0xf) {
2595 switch ((tmp
& 0xf0) >> 4) {
2608 switch ((tmp
& 0xf00) >> 8) {
2610 track
->group_size
= 256;
2614 track
->group_size
= 512;
2618 switch ((tmp
& 0xf000) >> 12) {
2620 track
->row_size
= 1;
2624 track
->row_size
= 2;
2627 track
->row_size
= 4;
2634 r
= evergreen_cs_packet_parse(p
, &pkt
, p
->idx
);
2640 p
->idx
+= pkt
.count
+ 2;
2643 r
= evergreen_cs_parse_packet0(p
, &pkt
);
2648 r
= evergreen_packet3_check(p
, &pkt
);
2651 DRM_ERROR("Unknown packet type %d !\n", pkt
.type
);
2661 } while (p
->idx
< p
->chunks
[p
->chunk_ib_idx
].length_dw
);
2663 for (r
= 0; r
< p
->ib
.length_dw
; r
++) {
2664 printk(KERN_INFO
"%05d 0x%08X\n", r
, p
->ib
.ptr
[r
]);
2674 static bool evergreen_vm_reg_valid(u32 reg
)
2676 /* context regs are fine */
2680 /* check config regs */
2682 case GRBM_GFX_INDEX
:
2683 case VGT_VTX_VECT_EJECT_REG
:
2684 case VGT_CACHE_INVALIDATION
:
2685 case VGT_GS_VERTEX_REUSE
:
2686 case VGT_PRIMITIVE_TYPE
:
2687 case VGT_INDEX_TYPE
:
2688 case VGT_NUM_INDICES
:
2689 case VGT_NUM_INSTANCES
:
2690 case VGT_COMPUTE_DIM_X
:
2691 case VGT_COMPUTE_DIM_Y
:
2692 case VGT_COMPUTE_DIM_Z
:
2693 case VGT_COMPUTE_START_X
:
2694 case VGT_COMPUTE_START_Y
:
2695 case VGT_COMPUTE_START_Z
:
2696 case VGT_COMPUTE_INDEX
:
2697 case VGT_COMPUTE_THREAD_GROUP_SIZE
:
2698 case VGT_HS_OFFCHIP_PARAM
:
2700 case PA_SU_LINE_STIPPLE_VALUE
:
2701 case PA_SC_LINE_STIPPLE_STATE
:
2703 case SQ_DYN_GPR_CNTL_PS_FLUSH_REQ
:
2704 case SQ_DYN_GPR_SIMD_LOCK_EN
:
2706 case SQ_GPR_RESOURCE_MGMT_1
:
2707 case SQ_GLOBAL_GPR_RESOURCE_MGMT_1
:
2708 case SQ_GLOBAL_GPR_RESOURCE_MGMT_2
:
2709 case SQ_CONST_MEM_BASE
:
2710 case SQ_STATIC_THREAD_MGMT_1
:
2711 case SQ_STATIC_THREAD_MGMT_2
:
2712 case SQ_STATIC_THREAD_MGMT_3
:
2713 case SPI_CONFIG_CNTL
:
2714 case SPI_CONFIG_CNTL_1
:
2721 case TD_PS_BORDER_COLOR_INDEX
:
2722 case TD_PS_BORDER_COLOR_RED
:
2723 case TD_PS_BORDER_COLOR_GREEN
:
2724 case TD_PS_BORDER_COLOR_BLUE
:
2725 case TD_PS_BORDER_COLOR_ALPHA
:
2726 case TD_VS_BORDER_COLOR_INDEX
:
2727 case TD_VS_BORDER_COLOR_RED
:
2728 case TD_VS_BORDER_COLOR_GREEN
:
2729 case TD_VS_BORDER_COLOR_BLUE
:
2730 case TD_VS_BORDER_COLOR_ALPHA
:
2731 case TD_GS_BORDER_COLOR_INDEX
:
2732 case TD_GS_BORDER_COLOR_RED
:
2733 case TD_GS_BORDER_COLOR_GREEN
:
2734 case TD_GS_BORDER_COLOR_BLUE
:
2735 case TD_GS_BORDER_COLOR_ALPHA
:
2736 case TD_HS_BORDER_COLOR_INDEX
:
2737 case TD_HS_BORDER_COLOR_RED
:
2738 case TD_HS_BORDER_COLOR_GREEN
:
2739 case TD_HS_BORDER_COLOR_BLUE
:
2740 case TD_HS_BORDER_COLOR_ALPHA
:
2741 case TD_LS_BORDER_COLOR_INDEX
:
2742 case TD_LS_BORDER_COLOR_RED
:
2743 case TD_LS_BORDER_COLOR_GREEN
:
2744 case TD_LS_BORDER_COLOR_BLUE
:
2745 case TD_LS_BORDER_COLOR_ALPHA
:
2746 case TD_CS_BORDER_COLOR_INDEX
:
2747 case TD_CS_BORDER_COLOR_RED
:
2748 case TD_CS_BORDER_COLOR_GREEN
:
2749 case TD_CS_BORDER_COLOR_BLUE
:
2750 case TD_CS_BORDER_COLOR_ALPHA
:
2751 case SQ_ESGS_RING_SIZE
:
2752 case SQ_GSVS_RING_SIZE
:
2753 case SQ_ESTMP_RING_SIZE
:
2754 case SQ_GSTMP_RING_SIZE
:
2755 case SQ_HSTMP_RING_SIZE
:
2756 case SQ_LSTMP_RING_SIZE
:
2757 case SQ_PSTMP_RING_SIZE
:
2758 case SQ_VSTMP_RING_SIZE
:
2759 case SQ_ESGS_RING_ITEMSIZE
:
2760 case SQ_ESTMP_RING_ITEMSIZE
:
2761 case SQ_GSTMP_RING_ITEMSIZE
:
2762 case SQ_GSVS_RING_ITEMSIZE
:
2763 case SQ_GS_VERT_ITEMSIZE
:
2764 case SQ_GS_VERT_ITEMSIZE_1
:
2765 case SQ_GS_VERT_ITEMSIZE_2
:
2766 case SQ_GS_VERT_ITEMSIZE_3
:
2767 case SQ_GSVS_RING_OFFSET_1
:
2768 case SQ_GSVS_RING_OFFSET_2
:
2769 case SQ_GSVS_RING_OFFSET_3
:
2770 case SQ_HSTMP_RING_ITEMSIZE
:
2771 case SQ_LSTMP_RING_ITEMSIZE
:
2772 case SQ_PSTMP_RING_ITEMSIZE
:
2773 case SQ_VSTMP_RING_ITEMSIZE
:
2774 case VGT_TF_RING_SIZE
:
2775 case SQ_ESGS_RING_BASE
:
2776 case SQ_GSVS_RING_BASE
:
2777 case SQ_ESTMP_RING_BASE
:
2778 case SQ_GSTMP_RING_BASE
:
2779 case SQ_HSTMP_RING_BASE
:
2780 case SQ_LSTMP_RING_BASE
:
2781 case SQ_PSTMP_RING_BASE
:
2782 case SQ_VSTMP_RING_BASE
:
2783 case CAYMAN_VGT_OFFCHIP_LDS_BASE
:
2784 case CAYMAN_SQ_EX_ALLOC_TABLE_SLOTS
:
2791 static int evergreen_vm_packet3_check(struct radeon_device
*rdev
,
2792 u32
*ib
, struct radeon_cs_packet
*pkt
)
2794 u32 idx
= pkt
->idx
+ 1;
2795 u32 idx_value
= ib
[idx
];
2796 u32 start_reg
, end_reg
, reg
, i
;
2798 switch (pkt
->opcode
) {
2800 case PACKET3_SET_BASE
:
2801 case PACKET3_CLEAR_STATE
:
2802 case PACKET3_INDEX_BUFFER_SIZE
:
2803 case PACKET3_DISPATCH_DIRECT
:
2804 case PACKET3_DISPATCH_INDIRECT
:
2805 case PACKET3_MODE_CONTROL
:
2806 case PACKET3_SET_PREDICATION
:
2807 case PACKET3_COND_EXEC
:
2808 case PACKET3_PRED_EXEC
:
2809 case PACKET3_DRAW_INDIRECT
:
2810 case PACKET3_DRAW_INDEX_INDIRECT
:
2811 case PACKET3_INDEX_BASE
:
2812 case PACKET3_DRAW_INDEX_2
:
2813 case PACKET3_CONTEXT_CONTROL
:
2814 case PACKET3_DRAW_INDEX_OFFSET
:
2815 case PACKET3_INDEX_TYPE
:
2816 case PACKET3_DRAW_INDEX
:
2817 case PACKET3_DRAW_INDEX_AUTO
:
2818 case PACKET3_DRAW_INDEX_IMMD
:
2819 case PACKET3_NUM_INSTANCES
:
2820 case PACKET3_DRAW_INDEX_MULTI_AUTO
:
2821 case PACKET3_STRMOUT_BUFFER_UPDATE
:
2822 case PACKET3_DRAW_INDEX_OFFSET_2
:
2823 case PACKET3_DRAW_INDEX_MULTI_ELEMENT
:
2824 case PACKET3_MPEG_INDEX
:
2825 case PACKET3_WAIT_REG_MEM
:
2826 case PACKET3_MEM_WRITE
:
2827 case PACKET3_SURFACE_SYNC
:
2828 case PACKET3_EVENT_WRITE
:
2829 case PACKET3_EVENT_WRITE_EOP
:
2830 case PACKET3_EVENT_WRITE_EOS
:
2831 case PACKET3_SET_CONTEXT_REG
:
2832 case PACKET3_SET_BOOL_CONST
:
2833 case PACKET3_SET_LOOP_CONST
:
2834 case PACKET3_SET_RESOURCE
:
2835 case PACKET3_SET_SAMPLER
:
2836 case PACKET3_SET_CTL_CONST
:
2837 case PACKET3_SET_RESOURCE_OFFSET
:
2838 case PACKET3_SET_CONTEXT_REG_INDIRECT
:
2839 case PACKET3_SET_RESOURCE_INDIRECT
:
2840 case CAYMAN_PACKET3_DEALLOC_STATE
:
2842 case PACKET3_COND_WRITE
:
2843 if (idx_value
& 0x100) {
2844 reg
= ib
[idx
+ 5] * 4;
2845 if (!evergreen_vm_reg_valid(reg
))
2849 case PACKET3_COPY_DW
:
2850 if (idx_value
& 0x2) {
2851 reg
= ib
[idx
+ 3] * 4;
2852 if (!evergreen_vm_reg_valid(reg
))
2856 case PACKET3_SET_CONFIG_REG
:
2857 start_reg
= (idx_value
<< 2) + PACKET3_SET_CONFIG_REG_START
;
2858 end_reg
= 4 * pkt
->count
+ start_reg
- 4;
2859 if ((start_reg
< PACKET3_SET_CONFIG_REG_START
) ||
2860 (start_reg
>= PACKET3_SET_CONFIG_REG_END
) ||
2861 (end_reg
>= PACKET3_SET_CONFIG_REG_END
)) {
2862 DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
2865 for (i
= 0; i
< pkt
->count
; i
++) {
2866 reg
= start_reg
+ (4 * i
);
2867 if (!evergreen_vm_reg_valid(reg
))
2877 int evergreen_ib_parse(struct radeon_device
*rdev
, struct radeon_ib
*ib
)
2881 struct radeon_cs_packet pkt
;
2885 pkt
.type
= CP_PACKET_GET_TYPE(ib
->ptr
[idx
]);
2886 pkt
.count
= CP_PACKET_GET_COUNT(ib
->ptr
[idx
]);
2890 dev_err(rdev
->dev
, "Packet0 not allowed!\n");
2897 pkt
.opcode
= CP_PACKET3_GET_OPCODE(ib
->ptr
[idx
]);
2898 ret
= evergreen_vm_packet3_check(rdev
, ib
->ptr
, &pkt
);
2899 idx
+= pkt
.count
+ 2;
2902 dev_err(rdev
->dev
, "Unknown packet type %d !\n", pkt
.type
);
2908 } while (idx
< ib
->length_dw
);