]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blob - drivers/char/drm/radeon_state.c
drm: Use register writes instead of BITBLT_MULTI packets for buffer swap blits
[mirror_ubuntu-artful-kernel.git] / drivers / char / drm / radeon_state.c
1 /* radeon_state.c -- State support for Radeon -*- linux-c -*- */
2 /*
3 * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 * DEALINGS IN THE SOFTWARE.
24 *
25 * Authors:
26 * Gareth Hughes <gareth@valinux.com>
27 * Kevin E. Martin <martin@valinux.com>
28 */
29
30 #include "drmP.h"
31 #include "drm.h"
32 #include "drm_sarea.h"
33 #include "radeon_drm.h"
34 #include "radeon_drv.h"
35
36 /* ================================================================
37 * Helper functions for client state checking and fixup
38 */
39
40 static __inline__ int radeon_check_and_fixup_offset(drm_radeon_private_t *
41 dev_priv,
42 drm_file_t * filp_priv,
43 u32 *offset)
44 {
45 u64 off = *offset;
46 u32 fb_start = dev_priv->fb_location;
47 u32 fb_end = fb_start + dev_priv->fb_size - 1;
48 u32 gart_start = dev_priv->gart_vm_start;
49 u32 gart_end = gart_start + dev_priv->gart_size - 1;
50 struct drm_radeon_driver_file_fields *radeon_priv;
51
52 /* Hrm ... the story of the offset ... So this function converts
53 * the various ideas of what userland clients might have for an
54 * offset in the card address space into an offset into the card
55 * address space :) So with a sane client, it should just keep
56 * the value intact and just do some boundary checking. However,
57 * not all clients are sane. Some older clients pass us 0 based
58 * offsets relative to the start of the framebuffer and some may
59 * assume the AGP aperture it appended to the framebuffer, so we
60 * try to detect those cases and fix them up.
61 *
62 * Note: It might be a good idea here to make sure the offset lands
63 * in some "allowed" area to protect things like the PCIE GART...
64 */
65
66 /* First, the best case, the offset already lands in either the
67 * framebuffer or the GART mapped space
68 */
69 if ((off >= fb_start && off <= fb_end) ||
70 (off >= gart_start && off <= gart_end))
71 return 0;
72
73 /* Ok, that didn't happen... now check if we have a zero based
74 * offset that fits in the framebuffer + gart space, apply the
75 * magic offset we get from SETPARAM or calculated from fb_location
76 */
77 if (off < (dev_priv->fb_size + dev_priv->gart_size)) {
78 radeon_priv = filp_priv->driver_priv;
79 off += radeon_priv->radeon_fb_delta;
80 }
81
82 /* Finally, assume we aimed at a GART offset if beyond the fb */
83 if (off > fb_end)
84 off = off - fb_end - 1 + gart_start;
85
86 /* Now recheck and fail if out of bounds */
87 if ((off >= fb_start && off <= fb_end) ||
88 (off >= gart_start && off <= gart_end)) {
89 DRM_DEBUG("offset fixed up to 0x%x\n", (unsigned int)off);
90 *offset = off;
91 return 0;
92 }
93 return DRM_ERR(EINVAL);
94 }
95
96 static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t *
97 dev_priv,
98 drm_file_t * filp_priv,
99 int id, u32 *data)
100 {
101 switch (id) {
102
103 case RADEON_EMIT_PP_MISC:
104 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
105 &data[(RADEON_RB3D_DEPTHOFFSET - RADEON_PP_MISC) / 4])) {
106 DRM_ERROR("Invalid depth buffer offset\n");
107 return DRM_ERR(EINVAL);
108 }
109 break;
110
111 case RADEON_EMIT_PP_CNTL:
112 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
113 &data[(RADEON_RB3D_COLOROFFSET - RADEON_PP_CNTL) / 4])) {
114 DRM_ERROR("Invalid colour buffer offset\n");
115 return DRM_ERR(EINVAL);
116 }
117 break;
118
119 case R200_EMIT_PP_TXOFFSET_0:
120 case R200_EMIT_PP_TXOFFSET_1:
121 case R200_EMIT_PP_TXOFFSET_2:
122 case R200_EMIT_PP_TXOFFSET_3:
123 case R200_EMIT_PP_TXOFFSET_4:
124 case R200_EMIT_PP_TXOFFSET_5:
125 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
126 &data[0])) {
127 DRM_ERROR("Invalid R200 texture offset\n");
128 return DRM_ERR(EINVAL);
129 }
130 break;
131
132 case RADEON_EMIT_PP_TXFILTER_0:
133 case RADEON_EMIT_PP_TXFILTER_1:
134 case RADEON_EMIT_PP_TXFILTER_2:
135 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
136 &data[(RADEON_PP_TXOFFSET_0 - RADEON_PP_TXFILTER_0) / 4])) {
137 DRM_ERROR("Invalid R100 texture offset\n");
138 return DRM_ERR(EINVAL);
139 }
140 break;
141
142 case R200_EMIT_PP_CUBIC_OFFSETS_0:
143 case R200_EMIT_PP_CUBIC_OFFSETS_1:
144 case R200_EMIT_PP_CUBIC_OFFSETS_2:
145 case R200_EMIT_PP_CUBIC_OFFSETS_3:
146 case R200_EMIT_PP_CUBIC_OFFSETS_4:
147 case R200_EMIT_PP_CUBIC_OFFSETS_5:{
148 int i;
149 for (i = 0; i < 5; i++) {
150 if (radeon_check_and_fixup_offset(dev_priv,
151 filp_priv,
152 &data[i])) {
153 DRM_ERROR
154 ("Invalid R200 cubic texture offset\n");
155 return DRM_ERR(EINVAL);
156 }
157 }
158 break;
159 }
160
161 case RADEON_EMIT_PP_CUBIC_OFFSETS_T0:
162 case RADEON_EMIT_PP_CUBIC_OFFSETS_T1:
163 case RADEON_EMIT_PP_CUBIC_OFFSETS_T2:{
164 int i;
165 for (i = 0; i < 5; i++) {
166 if (radeon_check_and_fixup_offset(dev_priv,
167 filp_priv,
168 &data[i])) {
169 DRM_ERROR
170 ("Invalid R100 cubic texture offset\n");
171 return DRM_ERR(EINVAL);
172 }
173 }
174 }
175 break;
176
177 case R200_EMIT_VAP_CTL:{
178 RING_LOCALS;
179 BEGIN_RING(2);
180 OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
181 ADVANCE_RING();
182 }
183 break;
184
185 case RADEON_EMIT_RB3D_COLORPITCH:
186 case RADEON_EMIT_RE_LINE_PATTERN:
187 case RADEON_EMIT_SE_LINE_WIDTH:
188 case RADEON_EMIT_PP_LUM_MATRIX:
189 case RADEON_EMIT_PP_ROT_MATRIX_0:
190 case RADEON_EMIT_RB3D_STENCILREFMASK:
191 case RADEON_EMIT_SE_VPORT_XSCALE:
192 case RADEON_EMIT_SE_CNTL:
193 case RADEON_EMIT_SE_CNTL_STATUS:
194 case RADEON_EMIT_RE_MISC:
195 case RADEON_EMIT_PP_BORDER_COLOR_0:
196 case RADEON_EMIT_PP_BORDER_COLOR_1:
197 case RADEON_EMIT_PP_BORDER_COLOR_2:
198 case RADEON_EMIT_SE_ZBIAS_FACTOR:
199 case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
200 case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
201 case R200_EMIT_PP_TXCBLEND_0:
202 case R200_EMIT_PP_TXCBLEND_1:
203 case R200_EMIT_PP_TXCBLEND_2:
204 case R200_EMIT_PP_TXCBLEND_3:
205 case R200_EMIT_PP_TXCBLEND_4:
206 case R200_EMIT_PP_TXCBLEND_5:
207 case R200_EMIT_PP_TXCBLEND_6:
208 case R200_EMIT_PP_TXCBLEND_7:
209 case R200_EMIT_TCL_LIGHT_MODEL_CTL_0:
210 case R200_EMIT_TFACTOR_0:
211 case R200_EMIT_VTX_FMT_0:
212 case R200_EMIT_MATRIX_SELECT_0:
213 case R200_EMIT_TEX_PROC_CTL_2:
214 case R200_EMIT_TCL_UCP_VERT_BLEND_CTL:
215 case R200_EMIT_PP_TXFILTER_0:
216 case R200_EMIT_PP_TXFILTER_1:
217 case R200_EMIT_PP_TXFILTER_2:
218 case R200_EMIT_PP_TXFILTER_3:
219 case R200_EMIT_PP_TXFILTER_4:
220 case R200_EMIT_PP_TXFILTER_5:
221 case R200_EMIT_VTE_CNTL:
222 case R200_EMIT_OUTPUT_VTX_COMP_SEL:
223 case R200_EMIT_PP_TAM_DEBUG3:
224 case R200_EMIT_PP_CNTL_X:
225 case R200_EMIT_RB3D_DEPTHXY_OFFSET:
226 case R200_EMIT_RE_AUX_SCISSOR_CNTL:
227 case R200_EMIT_RE_SCISSOR_TL_0:
228 case R200_EMIT_RE_SCISSOR_TL_1:
229 case R200_EMIT_RE_SCISSOR_TL_2:
230 case R200_EMIT_SE_VAP_CNTL_STATUS:
231 case R200_EMIT_SE_VTX_STATE_CNTL:
232 case R200_EMIT_RE_POINTSIZE:
233 case R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0:
234 case R200_EMIT_PP_CUBIC_FACES_0:
235 case R200_EMIT_PP_CUBIC_FACES_1:
236 case R200_EMIT_PP_CUBIC_FACES_2:
237 case R200_EMIT_PP_CUBIC_FACES_3:
238 case R200_EMIT_PP_CUBIC_FACES_4:
239 case R200_EMIT_PP_CUBIC_FACES_5:
240 case RADEON_EMIT_PP_TEX_SIZE_0:
241 case RADEON_EMIT_PP_TEX_SIZE_1:
242 case RADEON_EMIT_PP_TEX_SIZE_2:
243 case R200_EMIT_RB3D_BLENDCOLOR:
244 case R200_EMIT_TCL_POINT_SPRITE_CNTL:
245 case RADEON_EMIT_PP_CUBIC_FACES_0:
246 case RADEON_EMIT_PP_CUBIC_FACES_1:
247 case RADEON_EMIT_PP_CUBIC_FACES_2:
248 case R200_EMIT_PP_TRI_PERF_CNTL:
249 case R200_EMIT_PP_AFS_0:
250 case R200_EMIT_PP_AFS_1:
251 case R200_EMIT_ATF_TFACTOR:
252 case R200_EMIT_PP_TXCTLALL_0:
253 case R200_EMIT_PP_TXCTLALL_1:
254 case R200_EMIT_PP_TXCTLALL_2:
255 case R200_EMIT_PP_TXCTLALL_3:
256 case R200_EMIT_PP_TXCTLALL_4:
257 case R200_EMIT_PP_TXCTLALL_5:
258 case R200_EMIT_VAP_PVS_CNTL:
259 /* These packets don't contain memory offsets */
260 break;
261
262 default:
263 DRM_ERROR("Unknown state packet ID %d\n", id);
264 return DRM_ERR(EINVAL);
265 }
266
267 return 0;
268 }
269
270 static __inline__ int radeon_check_and_fixup_packet3(drm_radeon_private_t *
271 dev_priv,
272 drm_file_t *filp_priv,
273 drm_radeon_kcmd_buffer_t *
274 cmdbuf,
275 unsigned int *cmdsz)
276 {
277 u32 *cmd = (u32 *) cmdbuf->buf;
278
279 *cmdsz = 2 + ((cmd[0] & RADEON_CP_PACKET_COUNT_MASK) >> 16);
280
281 if ((cmd[0] & 0xc0000000) != RADEON_CP_PACKET3) {
282 DRM_ERROR("Not a type 3 packet\n");
283 return DRM_ERR(EINVAL);
284 }
285
286 if (4 * *cmdsz > cmdbuf->bufsz) {
287 DRM_ERROR("Packet size larger than size of data provided\n");
288 return DRM_ERR(EINVAL);
289 }
290
291 /* Check client state and fix it up if necessary */
292 if (cmd[0] & 0x8000) { /* MSB of opcode: next DWORD GUI_CNTL */
293 u32 offset;
294
295 if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
296 | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
297 offset = cmd[2] << 10;
298 if (radeon_check_and_fixup_offset
299 (dev_priv, filp_priv, &offset)) {
300 DRM_ERROR("Invalid first packet offset\n");
301 return DRM_ERR(EINVAL);
302 }
303 cmd[2] = (cmd[2] & 0xffc00000) | offset >> 10;
304 }
305
306 if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
307 (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
308 offset = cmd[3] << 10;
309 if (radeon_check_and_fixup_offset
310 (dev_priv, filp_priv, &offset)) {
311 DRM_ERROR("Invalid second packet offset\n");
312 return DRM_ERR(EINVAL);
313 }
314 cmd[3] = (cmd[3] & 0xffc00000) | offset >> 10;
315 }
316 }
317
318 return 0;
319 }
320
321 /* ================================================================
322 * CP hardware state programming functions
323 */
324
325 static __inline__ void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,
326 drm_clip_rect_t * box)
327 {
328 RING_LOCALS;
329
330 DRM_DEBUG(" box: x1=%d y1=%d x2=%d y2=%d\n",
331 box->x1, box->y1, box->x2, box->y2);
332
333 BEGIN_RING(4);
334 OUT_RING(CP_PACKET0(RADEON_RE_TOP_LEFT, 0));
335 OUT_RING((box->y1 << 16) | box->x1);
336 OUT_RING(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0));
337 OUT_RING(((box->y2 - 1) << 16) | (box->x2 - 1));
338 ADVANCE_RING();
339 }
340
341 /* Emit 1.1 state
342 */
343 static int radeon_emit_state(drm_radeon_private_t * dev_priv,
344 drm_file_t * filp_priv,
345 drm_radeon_context_regs_t * ctx,
346 drm_radeon_texture_regs_t * tex,
347 unsigned int dirty)
348 {
349 RING_LOCALS;
350 DRM_DEBUG("dirty=0x%08x\n", dirty);
351
352 if (dirty & RADEON_UPLOAD_CONTEXT) {
353 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
354 &ctx->rb3d_depthoffset)) {
355 DRM_ERROR("Invalid depth buffer offset\n");
356 return DRM_ERR(EINVAL);
357 }
358
359 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
360 &ctx->rb3d_coloroffset)) {
361 DRM_ERROR("Invalid depth buffer offset\n");
362 return DRM_ERR(EINVAL);
363 }
364
365 BEGIN_RING(14);
366 OUT_RING(CP_PACKET0(RADEON_PP_MISC, 6));
367 OUT_RING(ctx->pp_misc);
368 OUT_RING(ctx->pp_fog_color);
369 OUT_RING(ctx->re_solid_color);
370 OUT_RING(ctx->rb3d_blendcntl);
371 OUT_RING(ctx->rb3d_depthoffset);
372 OUT_RING(ctx->rb3d_depthpitch);
373 OUT_RING(ctx->rb3d_zstencilcntl);
374 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 2));
375 OUT_RING(ctx->pp_cntl);
376 OUT_RING(ctx->rb3d_cntl);
377 OUT_RING(ctx->rb3d_coloroffset);
378 OUT_RING(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
379 OUT_RING(ctx->rb3d_colorpitch);
380 ADVANCE_RING();
381 }
382
383 if (dirty & RADEON_UPLOAD_VERTFMT) {
384 BEGIN_RING(2);
385 OUT_RING(CP_PACKET0(RADEON_SE_COORD_FMT, 0));
386 OUT_RING(ctx->se_coord_fmt);
387 ADVANCE_RING();
388 }
389
390 if (dirty & RADEON_UPLOAD_LINE) {
391 BEGIN_RING(5);
392 OUT_RING(CP_PACKET0(RADEON_RE_LINE_PATTERN, 1));
393 OUT_RING(ctx->re_line_pattern);
394 OUT_RING(ctx->re_line_state);
395 OUT_RING(CP_PACKET0(RADEON_SE_LINE_WIDTH, 0));
396 OUT_RING(ctx->se_line_width);
397 ADVANCE_RING();
398 }
399
400 if (dirty & RADEON_UPLOAD_BUMPMAP) {
401 BEGIN_RING(5);
402 OUT_RING(CP_PACKET0(RADEON_PP_LUM_MATRIX, 0));
403 OUT_RING(ctx->pp_lum_matrix);
404 OUT_RING(CP_PACKET0(RADEON_PP_ROT_MATRIX_0, 1));
405 OUT_RING(ctx->pp_rot_matrix_0);
406 OUT_RING(ctx->pp_rot_matrix_1);
407 ADVANCE_RING();
408 }
409
410 if (dirty & RADEON_UPLOAD_MASKS) {
411 BEGIN_RING(4);
412 OUT_RING(CP_PACKET0(RADEON_RB3D_STENCILREFMASK, 2));
413 OUT_RING(ctx->rb3d_stencilrefmask);
414 OUT_RING(ctx->rb3d_ropcntl);
415 OUT_RING(ctx->rb3d_planemask);
416 ADVANCE_RING();
417 }
418
419 if (dirty & RADEON_UPLOAD_VIEWPORT) {
420 BEGIN_RING(7);
421 OUT_RING(CP_PACKET0(RADEON_SE_VPORT_XSCALE, 5));
422 OUT_RING(ctx->se_vport_xscale);
423 OUT_RING(ctx->se_vport_xoffset);
424 OUT_RING(ctx->se_vport_yscale);
425 OUT_RING(ctx->se_vport_yoffset);
426 OUT_RING(ctx->se_vport_zscale);
427 OUT_RING(ctx->se_vport_zoffset);
428 ADVANCE_RING();
429 }
430
431 if (dirty & RADEON_UPLOAD_SETUP) {
432 BEGIN_RING(4);
433 OUT_RING(CP_PACKET0(RADEON_SE_CNTL, 0));
434 OUT_RING(ctx->se_cntl);
435 OUT_RING(CP_PACKET0(RADEON_SE_CNTL_STATUS, 0));
436 OUT_RING(ctx->se_cntl_status);
437 ADVANCE_RING();
438 }
439
440 if (dirty & RADEON_UPLOAD_MISC) {
441 BEGIN_RING(2);
442 OUT_RING(CP_PACKET0(RADEON_RE_MISC, 0));
443 OUT_RING(ctx->re_misc);
444 ADVANCE_RING();
445 }
446
447 if (dirty & RADEON_UPLOAD_TEX0) {
448 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
449 &tex[0].pp_txoffset)) {
450 DRM_ERROR("Invalid texture offset for unit 0\n");
451 return DRM_ERR(EINVAL);
452 }
453
454 BEGIN_RING(9);
455 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_0, 5));
456 OUT_RING(tex[0].pp_txfilter);
457 OUT_RING(tex[0].pp_txformat);
458 OUT_RING(tex[0].pp_txoffset);
459 OUT_RING(tex[0].pp_txcblend);
460 OUT_RING(tex[0].pp_txablend);
461 OUT_RING(tex[0].pp_tfactor);
462 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_0, 0));
463 OUT_RING(tex[0].pp_border_color);
464 ADVANCE_RING();
465 }
466
467 if (dirty & RADEON_UPLOAD_TEX1) {
468 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
469 &tex[1].pp_txoffset)) {
470 DRM_ERROR("Invalid texture offset for unit 1\n");
471 return DRM_ERR(EINVAL);
472 }
473
474 BEGIN_RING(9);
475 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_1, 5));
476 OUT_RING(tex[1].pp_txfilter);
477 OUT_RING(tex[1].pp_txformat);
478 OUT_RING(tex[1].pp_txoffset);
479 OUT_RING(tex[1].pp_txcblend);
480 OUT_RING(tex[1].pp_txablend);
481 OUT_RING(tex[1].pp_tfactor);
482 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_1, 0));
483 OUT_RING(tex[1].pp_border_color);
484 ADVANCE_RING();
485 }
486
487 if (dirty & RADEON_UPLOAD_TEX2) {
488 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
489 &tex[2].pp_txoffset)) {
490 DRM_ERROR("Invalid texture offset for unit 2\n");
491 return DRM_ERR(EINVAL);
492 }
493
494 BEGIN_RING(9);
495 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_2, 5));
496 OUT_RING(tex[2].pp_txfilter);
497 OUT_RING(tex[2].pp_txformat);
498 OUT_RING(tex[2].pp_txoffset);
499 OUT_RING(tex[2].pp_txcblend);
500 OUT_RING(tex[2].pp_txablend);
501 OUT_RING(tex[2].pp_tfactor);
502 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_2, 0));
503 OUT_RING(tex[2].pp_border_color);
504 ADVANCE_RING();
505 }
506
507 return 0;
508 }
509
510 /* Emit 1.2 state
511 */
512 static int radeon_emit_state2(drm_radeon_private_t * dev_priv,
513 drm_file_t * filp_priv,
514 drm_radeon_state_t * state)
515 {
516 RING_LOCALS;
517
518 if (state->dirty & RADEON_UPLOAD_ZBIAS) {
519 BEGIN_RING(3);
520 OUT_RING(CP_PACKET0(RADEON_SE_ZBIAS_FACTOR, 1));
521 OUT_RING(state->context2.se_zbias_factor);
522 OUT_RING(state->context2.se_zbias_constant);
523 ADVANCE_RING();
524 }
525
526 return radeon_emit_state(dev_priv, filp_priv, &state->context,
527 state->tex, state->dirty);
528 }
529
530 /* New (1.3) state mechanism. 3 commands (packet, scalar, vector) in
531 * 1.3 cmdbuffers allow all previous state to be updated as well as
532 * the tcl scalar and vector areas.
533 */
534 static struct {
535 int start;
536 int len;
537 const char *name;
538 } packet[RADEON_MAX_STATE_PACKETS] = {
539 {RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
540 {RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
541 {RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
542 {RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
543 {RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
544 {RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
545 {RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
546 {RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
547 {RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
548 {RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
549 {RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
550 {RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
551 {RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
552 {RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
553 {RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
554 {RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
555 {RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
556 {RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
557 {RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
558 {RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
559 {RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
560 "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
561 {R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
562 {R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
563 {R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
564 {R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
565 {R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
566 {R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
567 {R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
568 {R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
569 {R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
570 {R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
571 {R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
572 {R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
573 {R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
574 {R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
575 {R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
576 {R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
577 {R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
578 {R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
579 {R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
580 {R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
581 {R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
582 {R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
583 {R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
584 {R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
585 {R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
586 {R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
587 {R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
588 {R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
589 {R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1,
590 "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
591 {R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
592 {R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
593 {R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
594 {R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
595 {R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
596 {R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
597 {R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
598 {R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
599 {R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
600 {R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
601 {R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
602 "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
603 {R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"}, /* 61 */
604 {R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
605 {R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
606 {R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
607 {R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
608 {R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
609 {R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
610 {R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
611 {R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
612 {R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
613 {R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
614 {R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
615 {RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
616 {RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
617 {RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
618 {R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
619 {R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
620 {RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
621 {RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
622 {RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
623 {RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
624 {RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
625 {RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
626 {R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
627 {R200_PP_AFS_0, 32, "R200_PP_AFS_0"}, /* 85 */
628 {R200_PP_AFS_1, 32, "R200_PP_AFS_1"},
629 {R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
630 {R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
631 {R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
632 {R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
633 {R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
634 {R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
635 {R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
636 {R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"},
637 };
638
639 /* ================================================================
640 * Performance monitoring functions
641 */
642
643 static void radeon_clear_box(drm_radeon_private_t * dev_priv,
644 int x, int y, int w, int h, int r, int g, int b)
645 {
646 u32 color;
647 RING_LOCALS;
648
649 x += dev_priv->sarea_priv->boxes[0].x1;
650 y += dev_priv->sarea_priv->boxes[0].y1;
651
652 switch (dev_priv->color_fmt) {
653 case RADEON_COLOR_FORMAT_RGB565:
654 color = (((r & 0xf8) << 8) |
655 ((g & 0xfc) << 3) | ((b & 0xf8) >> 3));
656 break;
657 case RADEON_COLOR_FORMAT_ARGB8888:
658 default:
659 color = (((0xff) << 24) | (r << 16) | (g << 8) | b);
660 break;
661 }
662
663 BEGIN_RING(4);
664 RADEON_WAIT_UNTIL_3D_IDLE();
665 OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
666 OUT_RING(0xffffffff);
667 ADVANCE_RING();
668
669 BEGIN_RING(6);
670
671 OUT_RING(CP_PACKET3(RADEON_CNTL_PAINT_MULTI, 4));
672 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
673 RADEON_GMC_BRUSH_SOLID_COLOR |
674 (dev_priv->color_fmt << 8) |
675 RADEON_GMC_SRC_DATATYPE_COLOR |
676 RADEON_ROP3_P | RADEON_GMC_CLR_CMP_CNTL_DIS);
677
678 if (dev_priv->page_flipping && dev_priv->current_page == 1) {
679 OUT_RING(dev_priv->front_pitch_offset);
680 } else {
681 OUT_RING(dev_priv->back_pitch_offset);
682 }
683
684 OUT_RING(color);
685
686 OUT_RING((x << 16) | y);
687 OUT_RING((w << 16) | h);
688
689 ADVANCE_RING();
690 }
691
692 static void radeon_cp_performance_boxes(drm_radeon_private_t * dev_priv)
693 {
694 /* Collapse various things into a wait flag -- trying to
695 * guess if userspase slept -- better just to have them tell us.
696 */
697 if (dev_priv->stats.last_frame_reads > 1 ||
698 dev_priv->stats.last_clear_reads > dev_priv->stats.clears) {
699 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
700 }
701
702 if (dev_priv->stats.freelist_loops) {
703 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
704 }
705
706 /* Purple box for page flipping
707 */
708 if (dev_priv->stats.boxes & RADEON_BOX_FLIP)
709 radeon_clear_box(dev_priv, 4, 4, 8, 8, 255, 0, 255);
710
711 /* Red box if we have to wait for idle at any point
712 */
713 if (dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE)
714 radeon_clear_box(dev_priv, 16, 4, 8, 8, 255, 0, 0);
715
716 /* Blue box: lost context?
717 */
718
719 /* Yellow box for texture swaps
720 */
721 if (dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD)
722 radeon_clear_box(dev_priv, 40, 4, 8, 8, 255, 255, 0);
723
724 /* Green box if hardware never idles (as far as we can tell)
725 */
726 if (!(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE))
727 radeon_clear_box(dev_priv, 64, 4, 8, 8, 0, 255, 0);
728
729 /* Draw bars indicating number of buffers allocated
730 * (not a great measure, easily confused)
731 */
732 if (dev_priv->stats.requested_bufs) {
733 if (dev_priv->stats.requested_bufs > 100)
734 dev_priv->stats.requested_bufs = 100;
735
736 radeon_clear_box(dev_priv, 4, 16,
737 dev_priv->stats.requested_bufs, 4,
738 196, 128, 128);
739 }
740
741 memset(&dev_priv->stats, 0, sizeof(dev_priv->stats));
742
743 }
744
745 /* ================================================================
746 * CP command dispatch functions
747 */
748
749 static void radeon_cp_dispatch_clear(drm_device_t * dev,
750 drm_radeon_clear_t * clear,
751 drm_radeon_clear_rect_t * depth_boxes)
752 {
753 drm_radeon_private_t *dev_priv = dev->dev_private;
754 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
755 drm_radeon_depth_clear_t *depth_clear = &dev_priv->depth_clear;
756 int nbox = sarea_priv->nbox;
757 drm_clip_rect_t *pbox = sarea_priv->boxes;
758 unsigned int flags = clear->flags;
759 u32 rb3d_cntl = 0, rb3d_stencilrefmask = 0;
760 int i;
761 RING_LOCALS;
762 DRM_DEBUG("flags = 0x%x\n", flags);
763
764 dev_priv->stats.clears++;
765
766 if (dev_priv->page_flipping && dev_priv->current_page == 1) {
767 unsigned int tmp = flags;
768
769 flags &= ~(RADEON_FRONT | RADEON_BACK);
770 if (tmp & RADEON_FRONT)
771 flags |= RADEON_BACK;
772 if (tmp & RADEON_BACK)
773 flags |= RADEON_FRONT;
774 }
775
776 if (flags & (RADEON_FRONT | RADEON_BACK)) {
777
778 BEGIN_RING(4);
779
780 /* Ensure the 3D stream is idle before doing a
781 * 2D fill to clear the front or back buffer.
782 */
783 RADEON_WAIT_UNTIL_3D_IDLE();
784
785 OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
786 OUT_RING(clear->color_mask);
787
788 ADVANCE_RING();
789
790 /* Make sure we restore the 3D state next time.
791 */
792 dev_priv->sarea_priv->ctx_owner = 0;
793
794 for (i = 0; i < nbox; i++) {
795 int x = pbox[i].x1;
796 int y = pbox[i].y1;
797 int w = pbox[i].x2 - x;
798 int h = pbox[i].y2 - y;
799
800 DRM_DEBUG("dispatch clear %d,%d-%d,%d flags 0x%x\n",
801 x, y, w, h, flags);
802
803 if (flags & RADEON_FRONT) {
804 BEGIN_RING(6);
805
806 OUT_RING(CP_PACKET3
807 (RADEON_CNTL_PAINT_MULTI, 4));
808 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
809 RADEON_GMC_BRUSH_SOLID_COLOR |
810 (dev_priv->
811 color_fmt << 8) |
812 RADEON_GMC_SRC_DATATYPE_COLOR |
813 RADEON_ROP3_P |
814 RADEON_GMC_CLR_CMP_CNTL_DIS);
815
816 OUT_RING(dev_priv->front_pitch_offset);
817 OUT_RING(clear->clear_color);
818
819 OUT_RING((x << 16) | y);
820 OUT_RING((w << 16) | h);
821
822 ADVANCE_RING();
823 }
824
825 if (flags & RADEON_BACK) {
826 BEGIN_RING(6);
827
828 OUT_RING(CP_PACKET3
829 (RADEON_CNTL_PAINT_MULTI, 4));
830 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
831 RADEON_GMC_BRUSH_SOLID_COLOR |
832 (dev_priv->
833 color_fmt << 8) |
834 RADEON_GMC_SRC_DATATYPE_COLOR |
835 RADEON_ROP3_P |
836 RADEON_GMC_CLR_CMP_CNTL_DIS);
837
838 OUT_RING(dev_priv->back_pitch_offset);
839 OUT_RING(clear->clear_color);
840
841 OUT_RING((x << 16) | y);
842 OUT_RING((w << 16) | h);
843
844 ADVANCE_RING();
845 }
846 }
847 }
848
849 /* hyper z clear */
850 /* no docs available, based on reverse engeneering by Stephane Marchesin */
851 if ((flags & (RADEON_DEPTH | RADEON_STENCIL))
852 && (flags & RADEON_CLEAR_FASTZ)) {
853
854 int i;
855 int depthpixperline =
856 dev_priv->depth_fmt ==
857 RADEON_DEPTH_FORMAT_16BIT_INT_Z ? (dev_priv->depth_pitch /
858 2) : (dev_priv->
859 depth_pitch / 4);
860
861 u32 clearmask;
862
863 u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
864 ((clear->depth_mask & 0xff) << 24);
865
866 /* Make sure we restore the 3D state next time.
867 * we haven't touched any "normal" state - still need this?
868 */
869 dev_priv->sarea_priv->ctx_owner = 0;
870
871 if ((dev_priv->flags & RADEON_HAS_HIERZ)
872 && (flags & RADEON_USE_HIERZ)) {
873 /* FIXME : reverse engineer that for Rx00 cards */
874 /* FIXME : the mask supposedly contains low-res z values. So can't set
875 just to the max (0xff? or actually 0x3fff?), need to take z clear
876 value into account? */
877 /* pattern seems to work for r100, though get slight
878 rendering errors with glxgears. If hierz is not enabled for r100,
879 only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
880 other ones are ignored, and the same clear mask can be used. That's
881 very different behaviour than R200 which needs different clear mask
882 and different number of tiles to clear if hierz is enabled or not !?!
883 */
884 clearmask = (0xff << 22) | (0xff << 6) | 0x003f003f;
885 } else {
886 /* clear mask : chooses the clearing pattern.
887 rv250: could be used to clear only parts of macrotiles
888 (but that would get really complicated...)?
889 bit 0 and 1 (either or both of them ?!?!) are used to
890 not clear tile (or maybe one of the bits indicates if the tile is
891 compressed or not), bit 2 and 3 to not clear tile 1,...,.
892 Pattern is as follows:
893 | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
894 bits -------------------------------------------------
895 | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
896 rv100: clearmask covers 2x8 4x1 tiles, but one clear still
897 covers 256 pixels ?!?
898 */
899 clearmask = 0x0;
900 }
901
902 BEGIN_RING(8);
903 RADEON_WAIT_UNTIL_2D_IDLE();
904 OUT_RING_REG(RADEON_RB3D_DEPTHCLEARVALUE,
905 tempRB3D_DEPTHCLEARVALUE);
906 /* what offset is this exactly ? */
907 OUT_RING_REG(RADEON_RB3D_ZMASKOFFSET, 0);
908 /* need ctlstat, otherwise get some strange black flickering */
909 OUT_RING_REG(RADEON_RB3D_ZCACHE_CTLSTAT,
910 RADEON_RB3D_ZC_FLUSH_ALL);
911 ADVANCE_RING();
912
913 for (i = 0; i < nbox; i++) {
914 int tileoffset, nrtilesx, nrtilesy, j;
915 /* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
916 if ((dev_priv->flags & RADEON_HAS_HIERZ)
917 && !(dev_priv->microcode_version == UCODE_R200)) {
918 /* FIXME : figure this out for r200 (when hierz is enabled). Or
919 maybe r200 actually doesn't need to put the low-res z value into
920 the tile cache like r100, but just needs to clear the hi-level z-buffer?
921 Works for R100, both with hierz and without.
922 R100 seems to operate on 2x1 8x8 tiles, but...
923 odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
924 problematic with resolutions which are not 64 pix aligned? */
925 tileoffset =
926 ((pbox[i].y1 >> 3) * depthpixperline +
927 pbox[i].x1) >> 6;
928 nrtilesx =
929 ((pbox[i].x2 & ~63) -
930 (pbox[i].x1 & ~63)) >> 4;
931 nrtilesy =
932 (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
933 for (j = 0; j <= nrtilesy; j++) {
934 BEGIN_RING(4);
935 OUT_RING(CP_PACKET3
936 (RADEON_3D_CLEAR_ZMASK, 2));
937 /* first tile */
938 OUT_RING(tileoffset * 8);
939 /* the number of tiles to clear */
940 OUT_RING(nrtilesx + 4);
941 /* clear mask : chooses the clearing pattern. */
942 OUT_RING(clearmask);
943 ADVANCE_RING();
944 tileoffset += depthpixperline >> 6;
945 }
946 } else if (dev_priv->microcode_version == UCODE_R200) {
947 /* works for rv250. */
948 /* find first macro tile (8x2 4x4 z-pixels on rv250) */
949 tileoffset =
950 ((pbox[i].y1 >> 3) * depthpixperline +
951 pbox[i].x1) >> 5;
952 nrtilesx =
953 (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
954 nrtilesy =
955 (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
956 for (j = 0; j <= nrtilesy; j++) {
957 BEGIN_RING(4);
958 OUT_RING(CP_PACKET3
959 (RADEON_3D_CLEAR_ZMASK, 2));
960 /* first tile */
961 /* judging by the first tile offset needed, could possibly
962 directly address/clear 4x4 tiles instead of 8x2 * 4x4
963 macro tiles, though would still need clear mask for
964 right/bottom if truely 4x4 granularity is desired ? */
965 OUT_RING(tileoffset * 16);
966 /* the number of tiles to clear */
967 OUT_RING(nrtilesx + 1);
968 /* clear mask : chooses the clearing pattern. */
969 OUT_RING(clearmask);
970 ADVANCE_RING();
971 tileoffset += depthpixperline >> 5;
972 }
973 } else { /* rv 100 */
974 /* rv100 might not need 64 pix alignment, who knows */
975 /* offsets are, hmm, weird */
976 tileoffset =
977 ((pbox[i].y1 >> 4) * depthpixperline +
978 pbox[i].x1) >> 6;
979 nrtilesx =
980 ((pbox[i].x2 & ~63) -
981 (pbox[i].x1 & ~63)) >> 4;
982 nrtilesy =
983 (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
984 for (j = 0; j <= nrtilesy; j++) {
985 BEGIN_RING(4);
986 OUT_RING(CP_PACKET3
987 (RADEON_3D_CLEAR_ZMASK, 2));
988 OUT_RING(tileoffset * 128);
989 /* the number of tiles to clear */
990 OUT_RING(nrtilesx + 4);
991 /* clear mask : chooses the clearing pattern. */
992 OUT_RING(clearmask);
993 ADVANCE_RING();
994 tileoffset += depthpixperline >> 6;
995 }
996 }
997 }
998
999 /* TODO don't always clear all hi-level z tiles */
1000 if ((dev_priv->flags & RADEON_HAS_HIERZ)
1001 && (dev_priv->microcode_version == UCODE_R200)
1002 && (flags & RADEON_USE_HIERZ))
1003 /* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
1004 /* FIXME : the mask supposedly contains low-res z values. So can't set
1005 just to the max (0xff? or actually 0x3fff?), need to take z clear
1006 value into account? */
1007 {
1008 BEGIN_RING(4);
1009 OUT_RING(CP_PACKET3(RADEON_3D_CLEAR_HIZ, 2));
1010 OUT_RING(0x0); /* First tile */
1011 OUT_RING(0x3cc0);
1012 OUT_RING((0xff << 22) | (0xff << 6) | 0x003f003f);
1013 ADVANCE_RING();
1014 }
1015 }
1016
1017 /* We have to clear the depth and/or stencil buffers by
1018 * rendering a quad into just those buffers. Thus, we have to
1019 * make sure the 3D engine is configured correctly.
1020 */
1021 else if ((dev_priv->microcode_version == UCODE_R200) &&
1022 (flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1023
1024 int tempPP_CNTL;
1025 int tempRE_CNTL;
1026 int tempRB3D_CNTL;
1027 int tempRB3D_ZSTENCILCNTL;
1028 int tempRB3D_STENCILREFMASK;
1029 int tempRB3D_PLANEMASK;
1030 int tempSE_CNTL;
1031 int tempSE_VTE_CNTL;
1032 int tempSE_VTX_FMT_0;
1033 int tempSE_VTX_FMT_1;
1034 int tempSE_VAP_CNTL;
1035 int tempRE_AUX_SCISSOR_CNTL;
1036
1037 tempPP_CNTL = 0;
1038 tempRE_CNTL = 0;
1039
1040 tempRB3D_CNTL = depth_clear->rb3d_cntl;
1041
1042 tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1043 tempRB3D_STENCILREFMASK = 0x0;
1044
1045 tempSE_CNTL = depth_clear->se_cntl;
1046
1047 /* Disable TCL */
1048
1049 tempSE_VAP_CNTL = ( /* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK | */
1050 (0x9 <<
1051 SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT));
1052
1053 tempRB3D_PLANEMASK = 0x0;
1054
1055 tempRE_AUX_SCISSOR_CNTL = 0x0;
1056
1057 tempSE_VTE_CNTL =
1058 SE_VTE_CNTL__VTX_XY_FMT_MASK | SE_VTE_CNTL__VTX_Z_FMT_MASK;
1059
1060 /* Vertex format (X, Y, Z, W) */
1061 tempSE_VTX_FMT_0 =
1062 SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK |
1063 SE_VTX_FMT_0__VTX_W0_PRESENT_MASK;
1064 tempSE_VTX_FMT_1 = 0x0;
1065
1066 /*
1067 * Depth buffer specific enables
1068 */
1069 if (flags & RADEON_DEPTH) {
1070 /* Enable depth buffer */
1071 tempRB3D_CNTL |= RADEON_Z_ENABLE;
1072 } else {
1073 /* Disable depth buffer */
1074 tempRB3D_CNTL &= ~RADEON_Z_ENABLE;
1075 }
1076
1077 /*
1078 * Stencil buffer specific enables
1079 */
1080 if (flags & RADEON_STENCIL) {
1081 tempRB3D_CNTL |= RADEON_STENCIL_ENABLE;
1082 tempRB3D_STENCILREFMASK = clear->depth_mask;
1083 } else {
1084 tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE;
1085 tempRB3D_STENCILREFMASK = 0x00000000;
1086 }
1087
1088 if (flags & RADEON_USE_COMP_ZBUF) {
1089 tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1090 RADEON_Z_DECOMPRESSION_ENABLE;
1091 }
1092 if (flags & RADEON_USE_HIERZ) {
1093 tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1094 }
1095
1096 BEGIN_RING(26);
1097 RADEON_WAIT_UNTIL_2D_IDLE();
1098
1099 OUT_RING_REG(RADEON_PP_CNTL, tempPP_CNTL);
1100 OUT_RING_REG(R200_RE_CNTL, tempRE_CNTL);
1101 OUT_RING_REG(RADEON_RB3D_CNTL, tempRB3D_CNTL);
1102 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1103 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK,
1104 tempRB3D_STENCILREFMASK);
1105 OUT_RING_REG(RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK);
1106 OUT_RING_REG(RADEON_SE_CNTL, tempSE_CNTL);
1107 OUT_RING_REG(R200_SE_VTE_CNTL, tempSE_VTE_CNTL);
1108 OUT_RING_REG(R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0);
1109 OUT_RING_REG(R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1);
1110 OUT_RING_REG(R200_SE_VAP_CNTL, tempSE_VAP_CNTL);
1111 OUT_RING_REG(R200_RE_AUX_SCISSOR_CNTL, tempRE_AUX_SCISSOR_CNTL);
1112 ADVANCE_RING();
1113
1114 /* Make sure we restore the 3D state next time.
1115 */
1116 dev_priv->sarea_priv->ctx_owner = 0;
1117
1118 for (i = 0; i < nbox; i++) {
1119
1120 /* Funny that this should be required --
1121 * sets top-left?
1122 */
1123 radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1124
1125 BEGIN_RING(14);
1126 OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 12));
1127 OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1128 RADEON_PRIM_WALK_RING |
1129 (3 << RADEON_NUM_VERTICES_SHIFT)));
1130 OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1131 OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1132 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1133 OUT_RING(0x3f800000);
1134 OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1135 OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1136 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1137 OUT_RING(0x3f800000);
1138 OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1139 OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1140 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1141 OUT_RING(0x3f800000);
1142 ADVANCE_RING();
1143 }
1144 } else if ((flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1145
1146 int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1147
1148 rb3d_cntl = depth_clear->rb3d_cntl;
1149
1150 if (flags & RADEON_DEPTH) {
1151 rb3d_cntl |= RADEON_Z_ENABLE;
1152 } else {
1153 rb3d_cntl &= ~RADEON_Z_ENABLE;
1154 }
1155
1156 if (flags & RADEON_STENCIL) {
1157 rb3d_cntl |= RADEON_STENCIL_ENABLE;
1158 rb3d_stencilrefmask = clear->depth_mask; /* misnamed field */
1159 } else {
1160 rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
1161 rb3d_stencilrefmask = 0x00000000;
1162 }
1163
1164 if (flags & RADEON_USE_COMP_ZBUF) {
1165 tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1166 RADEON_Z_DECOMPRESSION_ENABLE;
1167 }
1168 if (flags & RADEON_USE_HIERZ) {
1169 tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1170 }
1171
1172 BEGIN_RING(13);
1173 RADEON_WAIT_UNTIL_2D_IDLE();
1174
1175 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 1));
1176 OUT_RING(0x00000000);
1177 OUT_RING(rb3d_cntl);
1178
1179 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1180 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK, rb3d_stencilrefmask);
1181 OUT_RING_REG(RADEON_RB3D_PLANEMASK, 0x00000000);
1182 OUT_RING_REG(RADEON_SE_CNTL, depth_clear->se_cntl);
1183 ADVANCE_RING();
1184
1185 /* Make sure we restore the 3D state next time.
1186 */
1187 dev_priv->sarea_priv->ctx_owner = 0;
1188
1189 for (i = 0; i < nbox; i++) {
1190
1191 /* Funny that this should be required --
1192 * sets top-left?
1193 */
1194 radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1195
1196 BEGIN_RING(15);
1197
1198 OUT_RING(CP_PACKET3(RADEON_3D_DRAW_IMMD, 13));
1199 OUT_RING(RADEON_VTX_Z_PRESENT |
1200 RADEON_VTX_PKCOLOR_PRESENT);
1201 OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1202 RADEON_PRIM_WALK_RING |
1203 RADEON_MAOS_ENABLE |
1204 RADEON_VTX_FMT_RADEON_MODE |
1205 (3 << RADEON_NUM_VERTICES_SHIFT)));
1206
1207 OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1208 OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1209 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1210 OUT_RING(0x0);
1211
1212 OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1213 OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1214 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1215 OUT_RING(0x0);
1216
1217 OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1218 OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1219 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1220 OUT_RING(0x0);
1221
1222 ADVANCE_RING();
1223 }
1224 }
1225
1226 /* Increment the clear counter. The client-side 3D driver must
1227 * wait on this value before performing the clear ioctl. We
1228 * need this because the card's so damned fast...
1229 */
1230 dev_priv->sarea_priv->last_clear++;
1231
1232 BEGIN_RING(4);
1233
1234 RADEON_CLEAR_AGE(dev_priv->sarea_priv->last_clear);
1235 RADEON_WAIT_UNTIL_IDLE();
1236
1237 ADVANCE_RING();
1238 }
1239
1240 static void radeon_cp_dispatch_swap(drm_device_t * dev)
1241 {
1242 drm_radeon_private_t *dev_priv = dev->dev_private;
1243 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1244 int nbox = sarea_priv->nbox;
1245 drm_clip_rect_t *pbox = sarea_priv->boxes;
1246 int i;
1247 RING_LOCALS;
1248 DRM_DEBUG("\n");
1249
1250 /* Do some trivial performance monitoring...
1251 */
1252 if (dev_priv->do_boxes)
1253 radeon_cp_performance_boxes(dev_priv);
1254
1255 /* Wait for the 3D stream to idle before dispatching the bitblt.
1256 * This will prevent data corruption between the two streams.
1257 */
1258 BEGIN_RING(2);
1259
1260 RADEON_WAIT_UNTIL_3D_IDLE();
1261
1262 ADVANCE_RING();
1263
1264 for (i = 0; i < nbox; i++) {
1265 int x = pbox[i].x1;
1266 int y = pbox[i].y1;
1267 int w = pbox[i].x2 - x;
1268 int h = pbox[i].y2 - y;
1269
1270 DRM_DEBUG("dispatch swap %d,%d-%d,%d\n", x, y, w, h);
1271
1272 BEGIN_RING(9);
1273
1274 OUT_RING(CP_PACKET0(RADEON_DP_GUI_MASTER_CNTL, 0));
1275 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1276 RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1277 RADEON_GMC_BRUSH_NONE |
1278 (dev_priv->color_fmt << 8) |
1279 RADEON_GMC_SRC_DATATYPE_COLOR |
1280 RADEON_ROP3_S |
1281 RADEON_DP_SRC_SOURCE_MEMORY |
1282 RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1283
1284 /* Make this work even if front & back are flipped:
1285 */
1286 OUT_RING(CP_PACKET0(RADEON_SRC_PITCH_OFFSET, 1));
1287 if (dev_priv->current_page == 0) {
1288 OUT_RING(dev_priv->back_pitch_offset);
1289 OUT_RING(dev_priv->front_pitch_offset);
1290 } else {
1291 OUT_RING(dev_priv->front_pitch_offset);
1292 OUT_RING(dev_priv->back_pitch_offset);
1293 }
1294
1295 OUT_RING(CP_PACKET0(RADEON_SRC_X_Y, 2));
1296 OUT_RING((x << 16) | y);
1297 OUT_RING((x << 16) | y);
1298 OUT_RING((w << 16) | h);
1299
1300 ADVANCE_RING();
1301 }
1302
1303 /* Increment the frame counter. The client-side 3D driver must
1304 * throttle the framerate by waiting for this value before
1305 * performing the swapbuffer ioctl.
1306 */
1307 dev_priv->sarea_priv->last_frame++;
1308
1309 BEGIN_RING(4);
1310
1311 RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
1312 RADEON_WAIT_UNTIL_2D_IDLE();
1313
1314 ADVANCE_RING();
1315 }
1316
1317 static void radeon_cp_dispatch_flip(drm_device_t * dev)
1318 {
1319 drm_radeon_private_t *dev_priv = dev->dev_private;
1320 drm_sarea_t *sarea = (drm_sarea_t *) dev_priv->sarea->handle;
1321 int offset = (dev_priv->current_page == 1)
1322 ? dev_priv->front_offset : dev_priv->back_offset;
1323 RING_LOCALS;
1324 DRM_DEBUG("%s: page=%d pfCurrentPage=%d\n",
1325 __FUNCTION__,
1326 dev_priv->current_page, dev_priv->sarea_priv->pfCurrentPage);
1327
1328 /* Do some trivial performance monitoring...
1329 */
1330 if (dev_priv->do_boxes) {
1331 dev_priv->stats.boxes |= RADEON_BOX_FLIP;
1332 radeon_cp_performance_boxes(dev_priv);
1333 }
1334
1335 /* Update the frame offsets for both CRTCs
1336 */
1337 BEGIN_RING(6);
1338
1339 RADEON_WAIT_UNTIL_3D_IDLE();
1340 OUT_RING_REG(RADEON_CRTC_OFFSET,
1341 ((sarea->frame.y * dev_priv->front_pitch +
1342 sarea->frame.x * (dev_priv->color_fmt - 2)) & ~7)
1343 + offset);
1344 OUT_RING_REG(RADEON_CRTC2_OFFSET, dev_priv->sarea_priv->crtc2_base
1345 + offset);
1346
1347 ADVANCE_RING();
1348
1349 /* Increment the frame counter. The client-side 3D driver must
1350 * throttle the framerate by waiting for this value before
1351 * performing the swapbuffer ioctl.
1352 */
1353 dev_priv->sarea_priv->last_frame++;
1354 dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page =
1355 1 - dev_priv->current_page;
1356
1357 BEGIN_RING(2);
1358
1359 RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
1360
1361 ADVANCE_RING();
1362 }
1363
1364 static int bad_prim_vertex_nr(int primitive, int nr)
1365 {
1366 switch (primitive & RADEON_PRIM_TYPE_MASK) {
1367 case RADEON_PRIM_TYPE_NONE:
1368 case RADEON_PRIM_TYPE_POINT:
1369 return nr < 1;
1370 case RADEON_PRIM_TYPE_LINE:
1371 return (nr & 1) || nr == 0;
1372 case RADEON_PRIM_TYPE_LINE_STRIP:
1373 return nr < 2;
1374 case RADEON_PRIM_TYPE_TRI_LIST:
1375 case RADEON_PRIM_TYPE_3VRT_POINT_LIST:
1376 case RADEON_PRIM_TYPE_3VRT_LINE_LIST:
1377 case RADEON_PRIM_TYPE_RECT_LIST:
1378 return nr % 3 || nr == 0;
1379 case RADEON_PRIM_TYPE_TRI_FAN:
1380 case RADEON_PRIM_TYPE_TRI_STRIP:
1381 return nr < 3;
1382 default:
1383 return 1;
1384 }
1385 }
1386
1387 typedef struct {
1388 unsigned int start;
1389 unsigned int finish;
1390 unsigned int prim;
1391 unsigned int numverts;
1392 unsigned int offset;
1393 unsigned int vc_format;
1394 } drm_radeon_tcl_prim_t;
1395
1396 static void radeon_cp_dispatch_vertex(drm_device_t * dev,
1397 drm_buf_t * buf,
1398 drm_radeon_tcl_prim_t * prim)
1399 {
1400 drm_radeon_private_t *dev_priv = dev->dev_private;
1401 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1402 int offset = dev_priv->gart_buffers_offset + buf->offset + prim->start;
1403 int numverts = (int)prim->numverts;
1404 int nbox = sarea_priv->nbox;
1405 int i = 0;
1406 RING_LOCALS;
1407
1408 DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d %d verts\n",
1409 prim->prim,
1410 prim->vc_format, prim->start, prim->finish, prim->numverts);
1411
1412 if (bad_prim_vertex_nr(prim->prim, prim->numverts)) {
1413 DRM_ERROR("bad prim %x numverts %d\n",
1414 prim->prim, prim->numverts);
1415 return;
1416 }
1417
1418 do {
1419 /* Emit the next cliprect */
1420 if (i < nbox) {
1421 radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1422 }
1423
1424 /* Emit the vertex buffer rendering commands */
1425 BEGIN_RING(5);
1426
1427 OUT_RING(CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, 3));
1428 OUT_RING(offset);
1429 OUT_RING(numverts);
1430 OUT_RING(prim->vc_format);
1431 OUT_RING(prim->prim | RADEON_PRIM_WALK_LIST |
1432 RADEON_COLOR_ORDER_RGBA |
1433 RADEON_VTX_FMT_RADEON_MODE |
1434 (numverts << RADEON_NUM_VERTICES_SHIFT));
1435
1436 ADVANCE_RING();
1437
1438 i++;
1439 } while (i < nbox);
1440 }
1441
1442 static void radeon_cp_discard_buffer(drm_device_t * dev, drm_buf_t * buf)
1443 {
1444 drm_radeon_private_t *dev_priv = dev->dev_private;
1445 drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
1446 RING_LOCALS;
1447
1448 buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
1449
1450 /* Emit the vertex buffer age */
1451 BEGIN_RING(2);
1452 RADEON_DISPATCH_AGE(buf_priv->age);
1453 ADVANCE_RING();
1454
1455 buf->pending = 1;
1456 buf->used = 0;
1457 }
1458
1459 static void radeon_cp_dispatch_indirect(drm_device_t * dev,
1460 drm_buf_t * buf, int start, int end)
1461 {
1462 drm_radeon_private_t *dev_priv = dev->dev_private;
1463 RING_LOCALS;
1464 DRM_DEBUG("indirect: buf=%d s=0x%x e=0x%x\n", buf->idx, start, end);
1465
1466 if (start != end) {
1467 int offset = (dev_priv->gart_buffers_offset
1468 + buf->offset + start);
1469 int dwords = (end - start + 3) / sizeof(u32);
1470
1471 /* Indirect buffer data must be an even number of
1472 * dwords, so if we've been given an odd number we must
1473 * pad the data with a Type-2 CP packet.
1474 */
1475 if (dwords & 1) {
1476 u32 *data = (u32 *)
1477 ((char *)dev->agp_buffer_map->handle
1478 + buf->offset + start);
1479 data[dwords++] = RADEON_CP_PACKET2;
1480 }
1481
1482 /* Fire off the indirect buffer */
1483 BEGIN_RING(3);
1484
1485 OUT_RING(CP_PACKET0(RADEON_CP_IB_BASE, 1));
1486 OUT_RING(offset);
1487 OUT_RING(dwords);
1488
1489 ADVANCE_RING();
1490 }
1491 }
1492
1493 static void radeon_cp_dispatch_indices(drm_device_t * dev,
1494 drm_buf_t * elt_buf,
1495 drm_radeon_tcl_prim_t * prim)
1496 {
1497 drm_radeon_private_t *dev_priv = dev->dev_private;
1498 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1499 int offset = dev_priv->gart_buffers_offset + prim->offset;
1500 u32 *data;
1501 int dwords;
1502 int i = 0;
1503 int start = prim->start + RADEON_INDEX_PRIM_OFFSET;
1504 int count = (prim->finish - start) / sizeof(u16);
1505 int nbox = sarea_priv->nbox;
1506
1507 DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n",
1508 prim->prim,
1509 prim->vc_format,
1510 prim->start, prim->finish, prim->offset, prim->numverts);
1511
1512 if (bad_prim_vertex_nr(prim->prim, count)) {
1513 DRM_ERROR("bad prim %x count %d\n", prim->prim, count);
1514 return;
1515 }
1516
1517 if (start >= prim->finish || (prim->start & 0x7)) {
1518 DRM_ERROR("buffer prim %d\n", prim->prim);
1519 return;
1520 }
1521
1522 dwords = (prim->finish - prim->start + 3) / sizeof(u32);
1523
1524 data = (u32 *) ((char *)dev->agp_buffer_map->handle +
1525 elt_buf->offset + prim->start);
1526
1527 data[0] = CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, dwords - 2);
1528 data[1] = offset;
1529 data[2] = prim->numverts;
1530 data[3] = prim->vc_format;
1531 data[4] = (prim->prim |
1532 RADEON_PRIM_WALK_IND |
1533 RADEON_COLOR_ORDER_RGBA |
1534 RADEON_VTX_FMT_RADEON_MODE |
1535 (count << RADEON_NUM_VERTICES_SHIFT));
1536
1537 do {
1538 if (i < nbox)
1539 radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1540
1541 radeon_cp_dispatch_indirect(dev, elt_buf,
1542 prim->start, prim->finish);
1543
1544 i++;
1545 } while (i < nbox);
1546
1547 }
1548
1549 #define RADEON_MAX_TEXTURE_SIZE RADEON_BUFFER_SIZE
1550
1551 static int radeon_cp_dispatch_texture(DRMFILE filp,
1552 drm_device_t * dev,
1553 drm_radeon_texture_t * tex,
1554 drm_radeon_tex_image_t * image)
1555 {
1556 drm_radeon_private_t *dev_priv = dev->dev_private;
1557 drm_file_t *filp_priv;
1558 drm_buf_t *buf;
1559 u32 format;
1560 u32 *buffer;
1561 const u8 __user *data;
1562 int size, dwords, tex_width, blit_width, spitch;
1563 u32 height;
1564 int i;
1565 u32 texpitch, microtile;
1566 u32 offset;
1567 RING_LOCALS;
1568
1569 DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
1570
1571 if (radeon_check_and_fixup_offset(dev_priv, filp_priv, &tex->offset)) {
1572 DRM_ERROR("Invalid destination offset\n");
1573 return DRM_ERR(EINVAL);
1574 }
1575
1576 dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
1577
1578 /* Flush the pixel cache. This ensures no pixel data gets mixed
1579 * up with the texture data from the host data blit, otherwise
1580 * part of the texture image may be corrupted.
1581 */
1582 BEGIN_RING(4);
1583 RADEON_FLUSH_CACHE();
1584 RADEON_WAIT_UNTIL_IDLE();
1585 ADVANCE_RING();
1586
1587 /* The compiler won't optimize away a division by a variable,
1588 * even if the only legal values are powers of two. Thus, we'll
1589 * use a shift instead.
1590 */
1591 switch (tex->format) {
1592 case RADEON_TXFORMAT_ARGB8888:
1593 case RADEON_TXFORMAT_RGBA8888:
1594 format = RADEON_COLOR_FORMAT_ARGB8888;
1595 tex_width = tex->width * 4;
1596 blit_width = image->width * 4;
1597 break;
1598 case RADEON_TXFORMAT_AI88:
1599 case RADEON_TXFORMAT_ARGB1555:
1600 case RADEON_TXFORMAT_RGB565:
1601 case RADEON_TXFORMAT_ARGB4444:
1602 case RADEON_TXFORMAT_VYUY422:
1603 case RADEON_TXFORMAT_YVYU422:
1604 format = RADEON_COLOR_FORMAT_RGB565;
1605 tex_width = tex->width * 2;
1606 blit_width = image->width * 2;
1607 break;
1608 case RADEON_TXFORMAT_I8:
1609 case RADEON_TXFORMAT_RGB332:
1610 format = RADEON_COLOR_FORMAT_CI8;
1611 tex_width = tex->width * 1;
1612 blit_width = image->width * 1;
1613 break;
1614 default:
1615 DRM_ERROR("invalid texture format %d\n", tex->format);
1616 return DRM_ERR(EINVAL);
1617 }
1618 spitch = blit_width >> 6;
1619 if (spitch == 0 && image->height > 1)
1620 return DRM_ERR(EINVAL);
1621
1622 texpitch = tex->pitch;
1623 if ((texpitch << 22) & RADEON_DST_TILE_MICRO) {
1624 microtile = 1;
1625 if (tex_width < 64) {
1626 texpitch &= ~(RADEON_DST_TILE_MICRO >> 22);
1627 /* we got tiled coordinates, untile them */
1628 image->x *= 2;
1629 }
1630 } else
1631 microtile = 0;
1632
1633 DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width);
1634
1635 do {
1636 DRM_DEBUG("tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
1637 tex->offset >> 10, tex->pitch, tex->format,
1638 image->x, image->y, image->width, image->height);
1639
1640 /* Make a copy of some parameters in case we have to
1641 * update them for a multi-pass texture blit.
1642 */
1643 height = image->height;
1644 data = (const u8 __user *)image->data;
1645
1646 size = height * blit_width;
1647
1648 if (size > RADEON_MAX_TEXTURE_SIZE) {
1649 height = RADEON_MAX_TEXTURE_SIZE / blit_width;
1650 size = height * blit_width;
1651 } else if (size < 4 && size > 0) {
1652 size = 4;
1653 } else if (size == 0) {
1654 return 0;
1655 }
1656
1657 buf = radeon_freelist_get(dev);
1658 if (0 && !buf) {
1659 radeon_do_cp_idle(dev_priv);
1660 buf = radeon_freelist_get(dev);
1661 }
1662 if (!buf) {
1663 DRM_DEBUG("radeon_cp_dispatch_texture: EAGAIN\n");
1664 if (DRM_COPY_TO_USER(tex->image, image, sizeof(*image)))
1665 return DRM_ERR(EFAULT);
1666 return DRM_ERR(EAGAIN);
1667 }
1668
1669 /* Dispatch the indirect buffer.
1670 */
1671 buffer =
1672 (u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset);
1673 dwords = size / 4;
1674
1675 #define RADEON_COPY_MT(_buf, _data, _width) \
1676 do { \
1677 if (DRM_COPY_FROM_USER(_buf, _data, (_width))) {\
1678 DRM_ERROR("EFAULT on pad, %d bytes\n", (_width)); \
1679 return DRM_ERR(EFAULT); \
1680 } \
1681 } while(0)
1682
1683 if (microtile) {
1684 /* texture micro tiling in use, minimum texture width is thus 16 bytes.
1685 however, we cannot use blitter directly for texture width < 64 bytes,
1686 since minimum tex pitch is 64 bytes and we need this to match
1687 the texture width, otherwise the blitter will tile it wrong.
1688 Thus, tiling manually in this case. Additionally, need to special
1689 case tex height = 1, since our actual image will have height 2
1690 and we need to ensure we don't read beyond the texture size
1691 from user space. */
1692 if (tex->height == 1) {
1693 if (tex_width >= 64 || tex_width <= 16) {
1694 RADEON_COPY_MT(buffer, data,
1695 (int)(tex_width * sizeof(u32)));
1696 } else if (tex_width == 32) {
1697 RADEON_COPY_MT(buffer, data, 16);
1698 RADEON_COPY_MT(buffer + 8,
1699 data + 16, 16);
1700 }
1701 } else if (tex_width >= 64 || tex_width == 16) {
1702 RADEON_COPY_MT(buffer, data,
1703 (int)(dwords * sizeof(u32)));
1704 } else if (tex_width < 16) {
1705 for (i = 0; i < tex->height; i++) {
1706 RADEON_COPY_MT(buffer, data, tex_width);
1707 buffer += 4;
1708 data += tex_width;
1709 }
1710 } else if (tex_width == 32) {
1711 /* TODO: make sure this works when not fitting in one buffer
1712 (i.e. 32bytes x 2048...) */
1713 for (i = 0; i < tex->height; i += 2) {
1714 RADEON_COPY_MT(buffer, data, 16);
1715 data += 16;
1716 RADEON_COPY_MT(buffer + 8, data, 16);
1717 data += 16;
1718 RADEON_COPY_MT(buffer + 4, data, 16);
1719 data += 16;
1720 RADEON_COPY_MT(buffer + 12, data, 16);
1721 data += 16;
1722 buffer += 16;
1723 }
1724 }
1725 } else {
1726 if (tex_width >= 32) {
1727 /* Texture image width is larger than the minimum, so we
1728 * can upload it directly.
1729 */
1730 RADEON_COPY_MT(buffer, data,
1731 (int)(dwords * sizeof(u32)));
1732 } else {
1733 /* Texture image width is less than the minimum, so we
1734 * need to pad out each image scanline to the minimum
1735 * width.
1736 */
1737 for (i = 0; i < tex->height; i++) {
1738 RADEON_COPY_MT(buffer, data, tex_width);
1739 buffer += 8;
1740 data += tex_width;
1741 }
1742 }
1743 }
1744
1745 #undef RADEON_COPY_MT
1746 buf->filp = filp;
1747 buf->used = size;
1748 offset = dev_priv->gart_buffers_offset + buf->offset;
1749 BEGIN_RING(9);
1750 OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1751 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1752 RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1753 RADEON_GMC_BRUSH_NONE |
1754 (format << 8) |
1755 RADEON_GMC_SRC_DATATYPE_COLOR |
1756 RADEON_ROP3_S |
1757 RADEON_DP_SRC_SOURCE_MEMORY |
1758 RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1759 OUT_RING((spitch << 22) | (offset >> 10));
1760 OUT_RING((texpitch << 22) | (tex->offset >> 10));
1761 OUT_RING(0);
1762 OUT_RING((image->x << 16) | image->y);
1763 OUT_RING((image->width << 16) | height);
1764 RADEON_WAIT_UNTIL_2D_IDLE();
1765 ADVANCE_RING();
1766
1767 radeon_cp_discard_buffer(dev, buf);
1768
1769 /* Update the input parameters for next time */
1770 image->y += height;
1771 image->height -= height;
1772 image->data = (const u8 __user *)image->data + size;
1773 } while (image->height > 0);
1774
1775 /* Flush the pixel cache after the blit completes. This ensures
1776 * the texture data is written out to memory before rendering
1777 * continues.
1778 */
1779 BEGIN_RING(4);
1780 RADEON_FLUSH_CACHE();
1781 RADEON_WAIT_UNTIL_2D_IDLE();
1782 ADVANCE_RING();
1783 return 0;
1784 }
1785
1786 static void radeon_cp_dispatch_stipple(drm_device_t * dev, u32 * stipple)
1787 {
1788 drm_radeon_private_t *dev_priv = dev->dev_private;
1789 int i;
1790 RING_LOCALS;
1791 DRM_DEBUG("\n");
1792
1793 BEGIN_RING(35);
1794
1795 OUT_RING(CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0));
1796 OUT_RING(0x00000000);
1797
1798 OUT_RING(CP_PACKET0_TABLE(RADEON_RE_STIPPLE_DATA, 31));
1799 for (i = 0; i < 32; i++) {
1800 OUT_RING(stipple[i]);
1801 }
1802
1803 ADVANCE_RING();
1804 }
1805
1806 static void radeon_apply_surface_regs(int surf_index,
1807 drm_radeon_private_t *dev_priv)
1808 {
1809 if (!dev_priv->mmio)
1810 return;
1811
1812 radeon_do_cp_idle(dev_priv);
1813
1814 RADEON_WRITE(RADEON_SURFACE0_INFO + 16 * surf_index,
1815 dev_priv->surfaces[surf_index].flags);
1816 RADEON_WRITE(RADEON_SURFACE0_LOWER_BOUND + 16 * surf_index,
1817 dev_priv->surfaces[surf_index].lower);
1818 RADEON_WRITE(RADEON_SURFACE0_UPPER_BOUND + 16 * surf_index,
1819 dev_priv->surfaces[surf_index].upper);
1820 }
1821
1822 /* Allocates a virtual surface
1823 * doesn't always allocate a real surface, will stretch an existing
1824 * surface when possible.
1825 *
1826 * Note that refcount can be at most 2, since during a free refcount=3
1827 * might mean we have to allocate a new surface which might not always
1828 * be available.
1829 * For example : we allocate three contigous surfaces ABC. If B is
1830 * freed, we suddenly need two surfaces to store A and C, which might
1831 * not always be available.
1832 */
1833 static int alloc_surface(drm_radeon_surface_alloc_t *new,
1834 drm_radeon_private_t *dev_priv, DRMFILE filp)
1835 {
1836 struct radeon_virt_surface *s;
1837 int i;
1838 int virt_surface_index;
1839 uint32_t new_upper, new_lower;
1840
1841 new_lower = new->address;
1842 new_upper = new_lower + new->size - 1;
1843
1844 /* sanity check */
1845 if ((new_lower >= new_upper) || (new->flags == 0) || (new->size == 0) ||
1846 ((new_upper & RADEON_SURF_ADDRESS_FIXED_MASK) !=
1847 RADEON_SURF_ADDRESS_FIXED_MASK)
1848 || ((new_lower & RADEON_SURF_ADDRESS_FIXED_MASK) != 0))
1849 return -1;
1850
1851 /* make sure there is no overlap with existing surfaces */
1852 for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1853 if ((dev_priv->surfaces[i].refcount != 0) &&
1854 (((new_lower >= dev_priv->surfaces[i].lower) &&
1855 (new_lower < dev_priv->surfaces[i].upper)) ||
1856 ((new_lower < dev_priv->surfaces[i].lower) &&
1857 (new_upper > dev_priv->surfaces[i].lower)))) {
1858 return -1;
1859 }
1860 }
1861
1862 /* find a virtual surface */
1863 for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++)
1864 if (dev_priv->virt_surfaces[i].filp == 0)
1865 break;
1866 if (i == 2 * RADEON_MAX_SURFACES) {
1867 return -1;
1868 }
1869 virt_surface_index = i;
1870
1871 /* try to reuse an existing surface */
1872 for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1873 /* extend before */
1874 if ((dev_priv->surfaces[i].refcount == 1) &&
1875 (new->flags == dev_priv->surfaces[i].flags) &&
1876 (new_upper + 1 == dev_priv->surfaces[i].lower)) {
1877 s = &(dev_priv->virt_surfaces[virt_surface_index]);
1878 s->surface_index = i;
1879 s->lower = new_lower;
1880 s->upper = new_upper;
1881 s->flags = new->flags;
1882 s->filp = filp;
1883 dev_priv->surfaces[i].refcount++;
1884 dev_priv->surfaces[i].lower = s->lower;
1885 radeon_apply_surface_regs(s->surface_index, dev_priv);
1886 return virt_surface_index;
1887 }
1888
1889 /* extend after */
1890 if ((dev_priv->surfaces[i].refcount == 1) &&
1891 (new->flags == dev_priv->surfaces[i].flags) &&
1892 (new_lower == dev_priv->surfaces[i].upper + 1)) {
1893 s = &(dev_priv->virt_surfaces[virt_surface_index]);
1894 s->surface_index = i;
1895 s->lower = new_lower;
1896 s->upper = new_upper;
1897 s->flags = new->flags;
1898 s->filp = filp;
1899 dev_priv->surfaces[i].refcount++;
1900 dev_priv->surfaces[i].upper = s->upper;
1901 radeon_apply_surface_regs(s->surface_index, dev_priv);
1902 return virt_surface_index;
1903 }
1904 }
1905
1906 /* okay, we need a new one */
1907 for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1908 if (dev_priv->surfaces[i].refcount == 0) {
1909 s = &(dev_priv->virt_surfaces[virt_surface_index]);
1910 s->surface_index = i;
1911 s->lower = new_lower;
1912 s->upper = new_upper;
1913 s->flags = new->flags;
1914 s->filp = filp;
1915 dev_priv->surfaces[i].refcount = 1;
1916 dev_priv->surfaces[i].lower = s->lower;
1917 dev_priv->surfaces[i].upper = s->upper;
1918 dev_priv->surfaces[i].flags = s->flags;
1919 radeon_apply_surface_regs(s->surface_index, dev_priv);
1920 return virt_surface_index;
1921 }
1922 }
1923
1924 /* we didn't find anything */
1925 return -1;
1926 }
1927
1928 static int free_surface(DRMFILE filp, drm_radeon_private_t * dev_priv,
1929 int lower)
1930 {
1931 struct radeon_virt_surface *s;
1932 int i;
1933 /* find the virtual surface */
1934 for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
1935 s = &(dev_priv->virt_surfaces[i]);
1936 if (s->filp) {
1937 if ((lower == s->lower) && (filp == s->filp)) {
1938 if (dev_priv->surfaces[s->surface_index].
1939 lower == s->lower)
1940 dev_priv->surfaces[s->surface_index].
1941 lower = s->upper;
1942
1943 if (dev_priv->surfaces[s->surface_index].
1944 upper == s->upper)
1945 dev_priv->surfaces[s->surface_index].
1946 upper = s->lower;
1947
1948 dev_priv->surfaces[s->surface_index].refcount--;
1949 if (dev_priv->surfaces[s->surface_index].
1950 refcount == 0)
1951 dev_priv->surfaces[s->surface_index].
1952 flags = 0;
1953 s->filp = NULL;
1954 radeon_apply_surface_regs(s->surface_index,
1955 dev_priv);
1956 return 0;
1957 }
1958 }
1959 }
1960 return 1;
1961 }
1962
1963 static void radeon_surfaces_release(DRMFILE filp,
1964 drm_radeon_private_t * dev_priv)
1965 {
1966 int i;
1967 for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
1968 if (dev_priv->virt_surfaces[i].filp == filp)
1969 free_surface(filp, dev_priv,
1970 dev_priv->virt_surfaces[i].lower);
1971 }
1972 }
1973
1974 /* ================================================================
1975 * IOCTL functions
1976 */
1977 static int radeon_surface_alloc(DRM_IOCTL_ARGS)
1978 {
1979 DRM_DEVICE;
1980 drm_radeon_private_t *dev_priv = dev->dev_private;
1981 drm_radeon_surface_alloc_t alloc;
1982
1983 DRM_COPY_FROM_USER_IOCTL(alloc,
1984 (drm_radeon_surface_alloc_t __user *) data,
1985 sizeof(alloc));
1986
1987 if (alloc_surface(&alloc, dev_priv, filp) == -1)
1988 return DRM_ERR(EINVAL);
1989 else
1990 return 0;
1991 }
1992
1993 static int radeon_surface_free(DRM_IOCTL_ARGS)
1994 {
1995 DRM_DEVICE;
1996 drm_radeon_private_t *dev_priv = dev->dev_private;
1997 drm_radeon_surface_free_t memfree;
1998
1999 DRM_COPY_FROM_USER_IOCTL(memfree, (drm_radeon_surface_free_t __user *) data,
2000 sizeof(memfree));
2001
2002 if (free_surface(filp, dev_priv, memfree.address))
2003 return DRM_ERR(EINVAL);
2004 else
2005 return 0;
2006 }
2007
2008 static int radeon_cp_clear(DRM_IOCTL_ARGS)
2009 {
2010 DRM_DEVICE;
2011 drm_radeon_private_t *dev_priv = dev->dev_private;
2012 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2013 drm_radeon_clear_t clear;
2014 drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
2015 DRM_DEBUG("\n");
2016
2017 LOCK_TEST_WITH_RETURN(dev, filp);
2018
2019 DRM_COPY_FROM_USER_IOCTL(clear, (drm_radeon_clear_t __user *) data,
2020 sizeof(clear));
2021
2022 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2023
2024 if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2025 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2026
2027 if (DRM_COPY_FROM_USER(&depth_boxes, clear.depth_boxes,
2028 sarea_priv->nbox * sizeof(depth_boxes[0])))
2029 return DRM_ERR(EFAULT);
2030
2031 radeon_cp_dispatch_clear(dev, &clear, depth_boxes);
2032
2033 COMMIT_RING();
2034 return 0;
2035 }
2036
2037 /* Not sure why this isn't set all the time:
2038 */
2039 static int radeon_do_init_pageflip(drm_device_t * dev)
2040 {
2041 drm_radeon_private_t *dev_priv = dev->dev_private;
2042 RING_LOCALS;
2043
2044 DRM_DEBUG("\n");
2045
2046 BEGIN_RING(6);
2047 RADEON_WAIT_UNTIL_3D_IDLE();
2048 OUT_RING(CP_PACKET0(RADEON_CRTC_OFFSET_CNTL, 0));
2049 OUT_RING(RADEON_READ(RADEON_CRTC_OFFSET_CNTL) |
2050 RADEON_CRTC_OFFSET_FLIP_CNTL);
2051 OUT_RING(CP_PACKET0(RADEON_CRTC2_OFFSET_CNTL, 0));
2052 OUT_RING(RADEON_READ(RADEON_CRTC2_OFFSET_CNTL) |
2053 RADEON_CRTC_OFFSET_FLIP_CNTL);
2054 ADVANCE_RING();
2055
2056 dev_priv->page_flipping = 1;
2057 dev_priv->current_page = 0;
2058 dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page;
2059
2060 return 0;
2061 }
2062
2063 /* Called whenever a client dies, from drm_release.
2064 * NOTE: Lock isn't necessarily held when this is called!
2065 */
2066 static int radeon_do_cleanup_pageflip(drm_device_t * dev)
2067 {
2068 drm_radeon_private_t *dev_priv = dev->dev_private;
2069 DRM_DEBUG("\n");
2070
2071 if (dev_priv->current_page != 0)
2072 radeon_cp_dispatch_flip(dev);
2073
2074 dev_priv->page_flipping = 0;
2075 return 0;
2076 }
2077
2078 /* Swapping and flipping are different operations, need different ioctls.
2079 * They can & should be intermixed to support multiple 3d windows.
2080 */
2081 static int radeon_cp_flip(DRM_IOCTL_ARGS)
2082 {
2083 DRM_DEVICE;
2084 drm_radeon_private_t *dev_priv = dev->dev_private;
2085 DRM_DEBUG("\n");
2086
2087 LOCK_TEST_WITH_RETURN(dev, filp);
2088
2089 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2090
2091 if (!dev_priv->page_flipping)
2092 radeon_do_init_pageflip(dev);
2093
2094 radeon_cp_dispatch_flip(dev);
2095
2096 COMMIT_RING();
2097 return 0;
2098 }
2099
2100 static int radeon_cp_swap(DRM_IOCTL_ARGS)
2101 {
2102 DRM_DEVICE;
2103 drm_radeon_private_t *dev_priv = dev->dev_private;
2104 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2105 DRM_DEBUG("\n");
2106
2107 LOCK_TEST_WITH_RETURN(dev, filp);
2108
2109 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2110
2111 if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2112 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2113
2114 radeon_cp_dispatch_swap(dev);
2115 dev_priv->sarea_priv->ctx_owner = 0;
2116
2117 COMMIT_RING();
2118 return 0;
2119 }
2120
2121 static int radeon_cp_vertex(DRM_IOCTL_ARGS)
2122 {
2123 DRM_DEVICE;
2124 drm_radeon_private_t *dev_priv = dev->dev_private;
2125 drm_file_t *filp_priv;
2126 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2127 drm_device_dma_t *dma = dev->dma;
2128 drm_buf_t *buf;
2129 drm_radeon_vertex_t vertex;
2130 drm_radeon_tcl_prim_t prim;
2131
2132 LOCK_TEST_WITH_RETURN(dev, filp);
2133
2134 DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2135
2136 DRM_COPY_FROM_USER_IOCTL(vertex, (drm_radeon_vertex_t __user *) data,
2137 sizeof(vertex));
2138
2139 DRM_DEBUG("pid=%d index=%d count=%d discard=%d\n",
2140 DRM_CURRENTPID, vertex.idx, vertex.count, vertex.discard);
2141
2142 if (vertex.idx < 0 || vertex.idx >= dma->buf_count) {
2143 DRM_ERROR("buffer index %d (of %d max)\n",
2144 vertex.idx, dma->buf_count - 1);
2145 return DRM_ERR(EINVAL);
2146 }
2147 if (vertex.prim < 0 || vertex.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2148 DRM_ERROR("buffer prim %d\n", vertex.prim);
2149 return DRM_ERR(EINVAL);
2150 }
2151
2152 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2153 VB_AGE_TEST_WITH_RETURN(dev_priv);
2154
2155 buf = dma->buflist[vertex.idx];
2156
2157 if (buf->filp != filp) {
2158 DRM_ERROR("process %d using buffer owned by %p\n",
2159 DRM_CURRENTPID, buf->filp);
2160 return DRM_ERR(EINVAL);
2161 }
2162 if (buf->pending) {
2163 DRM_ERROR("sending pending buffer %d\n", vertex.idx);
2164 return DRM_ERR(EINVAL);
2165 }
2166
2167 /* Build up a prim_t record:
2168 */
2169 if (vertex.count) {
2170 buf->used = vertex.count; /* not used? */
2171
2172 if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2173 if (radeon_emit_state(dev_priv, filp_priv,
2174 &sarea_priv->context_state,
2175 sarea_priv->tex_state,
2176 sarea_priv->dirty)) {
2177 DRM_ERROR("radeon_emit_state failed\n");
2178 return DRM_ERR(EINVAL);
2179 }
2180
2181 sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2182 RADEON_UPLOAD_TEX1IMAGES |
2183 RADEON_UPLOAD_TEX2IMAGES |
2184 RADEON_REQUIRE_QUIESCENCE);
2185 }
2186
2187 prim.start = 0;
2188 prim.finish = vertex.count; /* unused */
2189 prim.prim = vertex.prim;
2190 prim.numverts = vertex.count;
2191 prim.vc_format = dev_priv->sarea_priv->vc_format;
2192
2193 radeon_cp_dispatch_vertex(dev, buf, &prim);
2194 }
2195
2196 if (vertex.discard) {
2197 radeon_cp_discard_buffer(dev, buf);
2198 }
2199
2200 COMMIT_RING();
2201 return 0;
2202 }
2203
2204 static int radeon_cp_indices(DRM_IOCTL_ARGS)
2205 {
2206 DRM_DEVICE;
2207 drm_radeon_private_t *dev_priv = dev->dev_private;
2208 drm_file_t *filp_priv;
2209 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2210 drm_device_dma_t *dma = dev->dma;
2211 drm_buf_t *buf;
2212 drm_radeon_indices_t elts;
2213 drm_radeon_tcl_prim_t prim;
2214 int count;
2215
2216 LOCK_TEST_WITH_RETURN(dev, filp);
2217
2218 DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2219
2220 DRM_COPY_FROM_USER_IOCTL(elts, (drm_radeon_indices_t __user *) data,
2221 sizeof(elts));
2222
2223 DRM_DEBUG("pid=%d index=%d start=%d end=%d discard=%d\n",
2224 DRM_CURRENTPID, elts.idx, elts.start, elts.end, elts.discard);
2225
2226 if (elts.idx < 0 || elts.idx >= dma->buf_count) {
2227 DRM_ERROR("buffer index %d (of %d max)\n",
2228 elts.idx, dma->buf_count - 1);
2229 return DRM_ERR(EINVAL);
2230 }
2231 if (elts.prim < 0 || elts.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2232 DRM_ERROR("buffer prim %d\n", elts.prim);
2233 return DRM_ERR(EINVAL);
2234 }
2235
2236 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2237 VB_AGE_TEST_WITH_RETURN(dev_priv);
2238
2239 buf = dma->buflist[elts.idx];
2240
2241 if (buf->filp != filp) {
2242 DRM_ERROR("process %d using buffer owned by %p\n",
2243 DRM_CURRENTPID, buf->filp);
2244 return DRM_ERR(EINVAL);
2245 }
2246 if (buf->pending) {
2247 DRM_ERROR("sending pending buffer %d\n", elts.idx);
2248 return DRM_ERR(EINVAL);
2249 }
2250
2251 count = (elts.end - elts.start) / sizeof(u16);
2252 elts.start -= RADEON_INDEX_PRIM_OFFSET;
2253
2254 if (elts.start & 0x7) {
2255 DRM_ERROR("misaligned buffer 0x%x\n", elts.start);
2256 return DRM_ERR(EINVAL);
2257 }
2258 if (elts.start < buf->used) {
2259 DRM_ERROR("no header 0x%x - 0x%x\n", elts.start, buf->used);
2260 return DRM_ERR(EINVAL);
2261 }
2262
2263 buf->used = elts.end;
2264
2265 if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2266 if (radeon_emit_state(dev_priv, filp_priv,
2267 &sarea_priv->context_state,
2268 sarea_priv->tex_state,
2269 sarea_priv->dirty)) {
2270 DRM_ERROR("radeon_emit_state failed\n");
2271 return DRM_ERR(EINVAL);
2272 }
2273
2274 sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2275 RADEON_UPLOAD_TEX1IMAGES |
2276 RADEON_UPLOAD_TEX2IMAGES |
2277 RADEON_REQUIRE_QUIESCENCE);
2278 }
2279
2280 /* Build up a prim_t record:
2281 */
2282 prim.start = elts.start;
2283 prim.finish = elts.end;
2284 prim.prim = elts.prim;
2285 prim.offset = 0; /* offset from start of dma buffers */
2286 prim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2287 prim.vc_format = dev_priv->sarea_priv->vc_format;
2288
2289 radeon_cp_dispatch_indices(dev, buf, &prim);
2290 if (elts.discard) {
2291 radeon_cp_discard_buffer(dev, buf);
2292 }
2293
2294 COMMIT_RING();
2295 return 0;
2296 }
2297
2298 static int radeon_cp_texture(DRM_IOCTL_ARGS)
2299 {
2300 DRM_DEVICE;
2301 drm_radeon_private_t *dev_priv = dev->dev_private;
2302 drm_radeon_texture_t tex;
2303 drm_radeon_tex_image_t image;
2304 int ret;
2305
2306 LOCK_TEST_WITH_RETURN(dev, filp);
2307
2308 DRM_COPY_FROM_USER_IOCTL(tex, (drm_radeon_texture_t __user *) data,
2309 sizeof(tex));
2310
2311 if (tex.image == NULL) {
2312 DRM_ERROR("null texture image!\n");
2313 return DRM_ERR(EINVAL);
2314 }
2315
2316 if (DRM_COPY_FROM_USER(&image,
2317 (drm_radeon_tex_image_t __user *) tex.image,
2318 sizeof(image)))
2319 return DRM_ERR(EFAULT);
2320
2321 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2322 VB_AGE_TEST_WITH_RETURN(dev_priv);
2323
2324 ret = radeon_cp_dispatch_texture(filp, dev, &tex, &image);
2325
2326 COMMIT_RING();
2327 return ret;
2328 }
2329
2330 static int radeon_cp_stipple(DRM_IOCTL_ARGS)
2331 {
2332 DRM_DEVICE;
2333 drm_radeon_private_t *dev_priv = dev->dev_private;
2334 drm_radeon_stipple_t stipple;
2335 u32 mask[32];
2336
2337 LOCK_TEST_WITH_RETURN(dev, filp);
2338
2339 DRM_COPY_FROM_USER_IOCTL(stipple, (drm_radeon_stipple_t __user *) data,
2340 sizeof(stipple));
2341
2342 if (DRM_COPY_FROM_USER(&mask, stipple.mask, 32 * sizeof(u32)))
2343 return DRM_ERR(EFAULT);
2344
2345 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2346
2347 radeon_cp_dispatch_stipple(dev, mask);
2348
2349 COMMIT_RING();
2350 return 0;
2351 }
2352
2353 static int radeon_cp_indirect(DRM_IOCTL_ARGS)
2354 {
2355 DRM_DEVICE;
2356 drm_radeon_private_t *dev_priv = dev->dev_private;
2357 drm_device_dma_t *dma = dev->dma;
2358 drm_buf_t *buf;
2359 drm_radeon_indirect_t indirect;
2360 RING_LOCALS;
2361
2362 LOCK_TEST_WITH_RETURN(dev, filp);
2363
2364 DRM_COPY_FROM_USER_IOCTL(indirect,
2365 (drm_radeon_indirect_t __user *) data,
2366 sizeof(indirect));
2367
2368 DRM_DEBUG("indirect: idx=%d s=%d e=%d d=%d\n",
2369 indirect.idx, indirect.start, indirect.end, indirect.discard);
2370
2371 if (indirect.idx < 0 || indirect.idx >= dma->buf_count) {
2372 DRM_ERROR("buffer index %d (of %d max)\n",
2373 indirect.idx, dma->buf_count - 1);
2374 return DRM_ERR(EINVAL);
2375 }
2376
2377 buf = dma->buflist[indirect.idx];
2378
2379 if (buf->filp != filp) {
2380 DRM_ERROR("process %d using buffer owned by %p\n",
2381 DRM_CURRENTPID, buf->filp);
2382 return DRM_ERR(EINVAL);
2383 }
2384 if (buf->pending) {
2385 DRM_ERROR("sending pending buffer %d\n", indirect.idx);
2386 return DRM_ERR(EINVAL);
2387 }
2388
2389 if (indirect.start < buf->used) {
2390 DRM_ERROR("reusing indirect: start=0x%x actual=0x%x\n",
2391 indirect.start, buf->used);
2392 return DRM_ERR(EINVAL);
2393 }
2394
2395 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2396 VB_AGE_TEST_WITH_RETURN(dev_priv);
2397
2398 buf->used = indirect.end;
2399
2400 /* Wait for the 3D stream to idle before the indirect buffer
2401 * containing 2D acceleration commands is processed.
2402 */
2403 BEGIN_RING(2);
2404
2405 RADEON_WAIT_UNTIL_3D_IDLE();
2406
2407 ADVANCE_RING();
2408
2409 /* Dispatch the indirect buffer full of commands from the
2410 * X server. This is insecure and is thus only available to
2411 * privileged clients.
2412 */
2413 radeon_cp_dispatch_indirect(dev, buf, indirect.start, indirect.end);
2414 if (indirect.discard) {
2415 radeon_cp_discard_buffer(dev, buf);
2416 }
2417
2418 COMMIT_RING();
2419 return 0;
2420 }
2421
2422 static int radeon_cp_vertex2(DRM_IOCTL_ARGS)
2423 {
2424 DRM_DEVICE;
2425 drm_radeon_private_t *dev_priv = dev->dev_private;
2426 drm_file_t *filp_priv;
2427 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2428 drm_device_dma_t *dma = dev->dma;
2429 drm_buf_t *buf;
2430 drm_radeon_vertex2_t vertex;
2431 int i;
2432 unsigned char laststate;
2433
2434 LOCK_TEST_WITH_RETURN(dev, filp);
2435
2436 DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2437
2438 DRM_COPY_FROM_USER_IOCTL(vertex, (drm_radeon_vertex2_t __user *) data,
2439 sizeof(vertex));
2440
2441 DRM_DEBUG("pid=%d index=%d discard=%d\n",
2442 DRM_CURRENTPID, vertex.idx, vertex.discard);
2443
2444 if (vertex.idx < 0 || vertex.idx >= dma->buf_count) {
2445 DRM_ERROR("buffer index %d (of %d max)\n",
2446 vertex.idx, dma->buf_count - 1);
2447 return DRM_ERR(EINVAL);
2448 }
2449
2450 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2451 VB_AGE_TEST_WITH_RETURN(dev_priv);
2452
2453 buf = dma->buflist[vertex.idx];
2454
2455 if (buf->filp != filp) {
2456 DRM_ERROR("process %d using buffer owned by %p\n",
2457 DRM_CURRENTPID, buf->filp);
2458 return DRM_ERR(EINVAL);
2459 }
2460
2461 if (buf->pending) {
2462 DRM_ERROR("sending pending buffer %d\n", vertex.idx);
2463 return DRM_ERR(EINVAL);
2464 }
2465
2466 if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2467 return DRM_ERR(EINVAL);
2468
2469 for (laststate = 0xff, i = 0; i < vertex.nr_prims; i++) {
2470 drm_radeon_prim_t prim;
2471 drm_radeon_tcl_prim_t tclprim;
2472
2473 if (DRM_COPY_FROM_USER(&prim, &vertex.prim[i], sizeof(prim)))
2474 return DRM_ERR(EFAULT);
2475
2476 if (prim.stateidx != laststate) {
2477 drm_radeon_state_t state;
2478
2479 if (DRM_COPY_FROM_USER(&state,
2480 &vertex.state[prim.stateidx],
2481 sizeof(state)))
2482 return DRM_ERR(EFAULT);
2483
2484 if (radeon_emit_state2(dev_priv, filp_priv, &state)) {
2485 DRM_ERROR("radeon_emit_state2 failed\n");
2486 return DRM_ERR(EINVAL);
2487 }
2488
2489 laststate = prim.stateidx;
2490 }
2491
2492 tclprim.start = prim.start;
2493 tclprim.finish = prim.finish;
2494 tclprim.prim = prim.prim;
2495 tclprim.vc_format = prim.vc_format;
2496
2497 if (prim.prim & RADEON_PRIM_WALK_IND) {
2498 tclprim.offset = prim.numverts * 64;
2499 tclprim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2500
2501 radeon_cp_dispatch_indices(dev, buf, &tclprim);
2502 } else {
2503 tclprim.numverts = prim.numverts;
2504 tclprim.offset = 0; /* not used */
2505
2506 radeon_cp_dispatch_vertex(dev, buf, &tclprim);
2507 }
2508
2509 if (sarea_priv->nbox == 1)
2510 sarea_priv->nbox = 0;
2511 }
2512
2513 if (vertex.discard) {
2514 radeon_cp_discard_buffer(dev, buf);
2515 }
2516
2517 COMMIT_RING();
2518 return 0;
2519 }
2520
2521 static int radeon_emit_packets(drm_radeon_private_t * dev_priv,
2522 drm_file_t * filp_priv,
2523 drm_radeon_cmd_header_t header,
2524 drm_radeon_kcmd_buffer_t *cmdbuf)
2525 {
2526 int id = (int)header.packet.packet_id;
2527 int sz, reg;
2528 int *data = (int *)cmdbuf->buf;
2529 RING_LOCALS;
2530
2531 if (id >= RADEON_MAX_STATE_PACKETS)
2532 return DRM_ERR(EINVAL);
2533
2534 sz = packet[id].len;
2535 reg = packet[id].start;
2536
2537 if (sz * sizeof(int) > cmdbuf->bufsz) {
2538 DRM_ERROR("Packet size provided larger than data provided\n");
2539 return DRM_ERR(EINVAL);
2540 }
2541
2542 if (radeon_check_and_fixup_packets(dev_priv, filp_priv, id, data)) {
2543 DRM_ERROR("Packet verification failed\n");
2544 return DRM_ERR(EINVAL);
2545 }
2546
2547 BEGIN_RING(sz + 1);
2548 OUT_RING(CP_PACKET0(reg, (sz - 1)));
2549 OUT_RING_TABLE(data, sz);
2550 ADVANCE_RING();
2551
2552 cmdbuf->buf += sz * sizeof(int);
2553 cmdbuf->bufsz -= sz * sizeof(int);
2554 return 0;
2555 }
2556
2557 static __inline__ int radeon_emit_scalars(drm_radeon_private_t *dev_priv,
2558 drm_radeon_cmd_header_t header,
2559 drm_radeon_kcmd_buffer_t *cmdbuf)
2560 {
2561 int sz = header.scalars.count;
2562 int start = header.scalars.offset;
2563 int stride = header.scalars.stride;
2564 RING_LOCALS;
2565
2566 BEGIN_RING(3 + sz);
2567 OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2568 OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2569 OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2570 OUT_RING_TABLE(cmdbuf->buf, sz);
2571 ADVANCE_RING();
2572 cmdbuf->buf += sz * sizeof(int);
2573 cmdbuf->bufsz -= sz * sizeof(int);
2574 return 0;
2575 }
2576
2577 /* God this is ugly
2578 */
2579 static __inline__ int radeon_emit_scalars2(drm_radeon_private_t *dev_priv,
2580 drm_radeon_cmd_header_t header,
2581 drm_radeon_kcmd_buffer_t *cmdbuf)
2582 {
2583 int sz = header.scalars.count;
2584 int start = ((unsigned int)header.scalars.offset) + 0x100;
2585 int stride = header.scalars.stride;
2586 RING_LOCALS;
2587
2588 BEGIN_RING(3 + sz);
2589 OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2590 OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2591 OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2592 OUT_RING_TABLE(cmdbuf->buf, sz);
2593 ADVANCE_RING();
2594 cmdbuf->buf += sz * sizeof(int);
2595 cmdbuf->bufsz -= sz * sizeof(int);
2596 return 0;
2597 }
2598
2599 static __inline__ int radeon_emit_vectors(drm_radeon_private_t *dev_priv,
2600 drm_radeon_cmd_header_t header,
2601 drm_radeon_kcmd_buffer_t *cmdbuf)
2602 {
2603 int sz = header.vectors.count;
2604 int start = header.vectors.offset;
2605 int stride = header.vectors.stride;
2606 RING_LOCALS;
2607
2608 BEGIN_RING(5 + sz);
2609 OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2610 OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2611 OUT_RING(start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2612 OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2613 OUT_RING_TABLE(cmdbuf->buf, sz);
2614 ADVANCE_RING();
2615
2616 cmdbuf->buf += sz * sizeof(int);
2617 cmdbuf->bufsz -= sz * sizeof(int);
2618 return 0;
2619 }
2620
2621 static __inline__ int radeon_emit_veclinear(drm_radeon_private_t *dev_priv,
2622 drm_radeon_cmd_header_t header,
2623 drm_radeon_kcmd_buffer_t *cmdbuf)
2624 {
2625 int sz = header.veclinear.count * 4;
2626 int start = header.veclinear.addr_lo | (header.veclinear.addr_hi << 8);
2627 RING_LOCALS;
2628
2629 if (!sz)
2630 return 0;
2631 if (sz * 4 > cmdbuf->bufsz)
2632 return DRM_ERR(EINVAL);
2633
2634 BEGIN_RING(5 + sz);
2635 OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2636 OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2637 OUT_RING(start | (1 << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2638 OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2639 OUT_RING_TABLE(cmdbuf->buf, sz);
2640 ADVANCE_RING();
2641
2642 cmdbuf->buf += sz * sizeof(int);
2643 cmdbuf->bufsz -= sz * sizeof(int);
2644 return 0;
2645 }
2646
2647 static int radeon_emit_packet3(drm_device_t * dev,
2648 drm_file_t * filp_priv,
2649 drm_radeon_kcmd_buffer_t *cmdbuf)
2650 {
2651 drm_radeon_private_t *dev_priv = dev->dev_private;
2652 unsigned int cmdsz;
2653 int ret;
2654 RING_LOCALS;
2655
2656 DRM_DEBUG("\n");
2657
2658 if ((ret = radeon_check_and_fixup_packet3(dev_priv, filp_priv,
2659 cmdbuf, &cmdsz))) {
2660 DRM_ERROR("Packet verification failed\n");
2661 return ret;
2662 }
2663
2664 BEGIN_RING(cmdsz);
2665 OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2666 ADVANCE_RING();
2667
2668 cmdbuf->buf += cmdsz * 4;
2669 cmdbuf->bufsz -= cmdsz * 4;
2670 return 0;
2671 }
2672
2673 static int radeon_emit_packet3_cliprect(drm_device_t *dev,
2674 drm_file_t *filp_priv,
2675 drm_radeon_kcmd_buffer_t *cmdbuf,
2676 int orig_nbox)
2677 {
2678 drm_radeon_private_t *dev_priv = dev->dev_private;
2679 drm_clip_rect_t box;
2680 unsigned int cmdsz;
2681 int ret;
2682 drm_clip_rect_t __user *boxes = cmdbuf->boxes;
2683 int i = 0;
2684 RING_LOCALS;
2685
2686 DRM_DEBUG("\n");
2687
2688 if ((ret = radeon_check_and_fixup_packet3(dev_priv, filp_priv,
2689 cmdbuf, &cmdsz))) {
2690 DRM_ERROR("Packet verification failed\n");
2691 return ret;
2692 }
2693
2694 if (!orig_nbox)
2695 goto out;
2696
2697 do {
2698 if (i < cmdbuf->nbox) {
2699 if (DRM_COPY_FROM_USER(&box, &boxes[i], sizeof(box)))
2700 return DRM_ERR(EFAULT);
2701 /* FIXME The second and subsequent times round
2702 * this loop, send a WAIT_UNTIL_3D_IDLE before
2703 * calling emit_clip_rect(). This fixes a
2704 * lockup on fast machines when sending
2705 * several cliprects with a cmdbuf, as when
2706 * waving a 2D window over a 3D
2707 * window. Something in the commands from user
2708 * space seems to hang the card when they're
2709 * sent several times in a row. That would be
2710 * the correct place to fix it but this works
2711 * around it until I can figure that out - Tim
2712 * Smith */
2713 if (i) {
2714 BEGIN_RING(2);
2715 RADEON_WAIT_UNTIL_3D_IDLE();
2716 ADVANCE_RING();
2717 }
2718 radeon_emit_clip_rect(dev_priv, &box);
2719 }
2720
2721 BEGIN_RING(cmdsz);
2722 OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2723 ADVANCE_RING();
2724
2725 } while (++i < cmdbuf->nbox);
2726 if (cmdbuf->nbox == 1)
2727 cmdbuf->nbox = 0;
2728
2729 out:
2730 cmdbuf->buf += cmdsz * 4;
2731 cmdbuf->bufsz -= cmdsz * 4;
2732 return 0;
2733 }
2734
2735 static int radeon_emit_wait(drm_device_t * dev, int flags)
2736 {
2737 drm_radeon_private_t *dev_priv = dev->dev_private;
2738 RING_LOCALS;
2739
2740 DRM_DEBUG("%s: %x\n", __FUNCTION__, flags);
2741 switch (flags) {
2742 case RADEON_WAIT_2D:
2743 BEGIN_RING(2);
2744 RADEON_WAIT_UNTIL_2D_IDLE();
2745 ADVANCE_RING();
2746 break;
2747 case RADEON_WAIT_3D:
2748 BEGIN_RING(2);
2749 RADEON_WAIT_UNTIL_3D_IDLE();
2750 ADVANCE_RING();
2751 break;
2752 case RADEON_WAIT_2D | RADEON_WAIT_3D:
2753 BEGIN_RING(2);
2754 RADEON_WAIT_UNTIL_IDLE();
2755 ADVANCE_RING();
2756 break;
2757 default:
2758 return DRM_ERR(EINVAL);
2759 }
2760
2761 return 0;
2762 }
2763
2764 static int radeon_cp_cmdbuf(DRM_IOCTL_ARGS)
2765 {
2766 DRM_DEVICE;
2767 drm_radeon_private_t *dev_priv = dev->dev_private;
2768 drm_file_t *filp_priv;
2769 drm_device_dma_t *dma = dev->dma;
2770 drm_buf_t *buf = NULL;
2771 int idx;
2772 drm_radeon_kcmd_buffer_t cmdbuf;
2773 drm_radeon_cmd_header_t header;
2774 int orig_nbox, orig_bufsz;
2775 char *kbuf = NULL;
2776
2777 LOCK_TEST_WITH_RETURN(dev, filp);
2778
2779 DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2780
2781 DRM_COPY_FROM_USER_IOCTL(cmdbuf,
2782 (drm_radeon_cmd_buffer_t __user *) data,
2783 sizeof(cmdbuf));
2784
2785 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2786 VB_AGE_TEST_WITH_RETURN(dev_priv);
2787
2788 if (cmdbuf.bufsz > 64 * 1024 || cmdbuf.bufsz < 0) {
2789 return DRM_ERR(EINVAL);
2790 }
2791
2792 /* Allocate an in-kernel area and copy in the cmdbuf. Do this to avoid
2793 * races between checking values and using those values in other code,
2794 * and simply to avoid a lot of function calls to copy in data.
2795 */
2796 orig_bufsz = cmdbuf.bufsz;
2797 if (orig_bufsz != 0) {
2798 kbuf = drm_alloc(cmdbuf.bufsz, DRM_MEM_DRIVER);
2799 if (kbuf == NULL)
2800 return DRM_ERR(ENOMEM);
2801 if (DRM_COPY_FROM_USER(kbuf, (void __user *)cmdbuf.buf,
2802 cmdbuf.bufsz)) {
2803 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2804 return DRM_ERR(EFAULT);
2805 }
2806 cmdbuf.buf = kbuf;
2807 }
2808
2809 orig_nbox = cmdbuf.nbox;
2810
2811 if (dev_priv->microcode_version == UCODE_R300) {
2812 int temp;
2813 temp = r300_do_cp_cmdbuf(dev, filp, filp_priv, &cmdbuf);
2814
2815 if (orig_bufsz != 0)
2816 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2817
2818 return temp;
2819 }
2820
2821 /* microcode_version != r300 */
2822 while (cmdbuf.bufsz >= sizeof(header)) {
2823
2824 header.i = *(int *)cmdbuf.buf;
2825 cmdbuf.buf += sizeof(header);
2826 cmdbuf.bufsz -= sizeof(header);
2827
2828 switch (header.header.cmd_type) {
2829 case RADEON_CMD_PACKET:
2830 DRM_DEBUG("RADEON_CMD_PACKET\n");
2831 if (radeon_emit_packets
2832 (dev_priv, filp_priv, header, &cmdbuf)) {
2833 DRM_ERROR("radeon_emit_packets failed\n");
2834 goto err;
2835 }
2836 break;
2837
2838 case RADEON_CMD_SCALARS:
2839 DRM_DEBUG("RADEON_CMD_SCALARS\n");
2840 if (radeon_emit_scalars(dev_priv, header, &cmdbuf)) {
2841 DRM_ERROR("radeon_emit_scalars failed\n");
2842 goto err;
2843 }
2844 break;
2845
2846 case RADEON_CMD_VECTORS:
2847 DRM_DEBUG("RADEON_CMD_VECTORS\n");
2848 if (radeon_emit_vectors(dev_priv, header, &cmdbuf)) {
2849 DRM_ERROR("radeon_emit_vectors failed\n");
2850 goto err;
2851 }
2852 break;
2853
2854 case RADEON_CMD_DMA_DISCARD:
2855 DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
2856 idx = header.dma.buf_idx;
2857 if (idx < 0 || idx >= dma->buf_count) {
2858 DRM_ERROR("buffer index %d (of %d max)\n",
2859 idx, dma->buf_count - 1);
2860 goto err;
2861 }
2862
2863 buf = dma->buflist[idx];
2864 if (buf->filp != filp || buf->pending) {
2865 DRM_ERROR("bad buffer %p %p %d\n",
2866 buf->filp, filp, buf->pending);
2867 goto err;
2868 }
2869
2870 radeon_cp_discard_buffer(dev, buf);
2871 break;
2872
2873 case RADEON_CMD_PACKET3:
2874 DRM_DEBUG("RADEON_CMD_PACKET3\n");
2875 if (radeon_emit_packet3(dev, filp_priv, &cmdbuf)) {
2876 DRM_ERROR("radeon_emit_packet3 failed\n");
2877 goto err;
2878 }
2879 break;
2880
2881 case RADEON_CMD_PACKET3_CLIP:
2882 DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n");
2883 if (radeon_emit_packet3_cliprect
2884 (dev, filp_priv, &cmdbuf, orig_nbox)) {
2885 DRM_ERROR("radeon_emit_packet3_clip failed\n");
2886 goto err;
2887 }
2888 break;
2889
2890 case RADEON_CMD_SCALARS2:
2891 DRM_DEBUG("RADEON_CMD_SCALARS2\n");
2892 if (radeon_emit_scalars2(dev_priv, header, &cmdbuf)) {
2893 DRM_ERROR("radeon_emit_scalars2 failed\n");
2894 goto err;
2895 }
2896 break;
2897
2898 case RADEON_CMD_WAIT:
2899 DRM_DEBUG("RADEON_CMD_WAIT\n");
2900 if (radeon_emit_wait(dev, header.wait.flags)) {
2901 DRM_ERROR("radeon_emit_wait failed\n");
2902 goto err;
2903 }
2904 break;
2905 case RADEON_CMD_VECLINEAR:
2906 DRM_DEBUG("RADEON_CMD_VECLINEAR\n");
2907 if (radeon_emit_veclinear(dev_priv, header, &cmdbuf)) {
2908 DRM_ERROR("radeon_emit_veclinear failed\n");
2909 goto err;
2910 }
2911 break;
2912
2913 default:
2914 DRM_ERROR("bad cmd_type %d at %p\n",
2915 header.header.cmd_type,
2916 cmdbuf.buf - sizeof(header));
2917 goto err;
2918 }
2919 }
2920
2921 if (orig_bufsz != 0)
2922 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2923
2924 DRM_DEBUG("DONE\n");
2925 COMMIT_RING();
2926 return 0;
2927
2928 err:
2929 if (orig_bufsz != 0)
2930 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2931 return DRM_ERR(EINVAL);
2932 }
2933
2934 static int radeon_cp_getparam(DRM_IOCTL_ARGS)
2935 {
2936 DRM_DEVICE;
2937 drm_radeon_private_t *dev_priv = dev->dev_private;
2938 drm_radeon_getparam_t param;
2939 int value;
2940
2941 DRM_COPY_FROM_USER_IOCTL(param, (drm_radeon_getparam_t __user *) data,
2942 sizeof(param));
2943
2944 DRM_DEBUG("pid=%d\n", DRM_CURRENTPID);
2945
2946 switch (param.param) {
2947 case RADEON_PARAM_GART_BUFFER_OFFSET:
2948 value = dev_priv->gart_buffers_offset;
2949 break;
2950 case RADEON_PARAM_LAST_FRAME:
2951 dev_priv->stats.last_frame_reads++;
2952 value = GET_SCRATCH(0);
2953 break;
2954 case RADEON_PARAM_LAST_DISPATCH:
2955 value = GET_SCRATCH(1);
2956 break;
2957 case RADEON_PARAM_LAST_CLEAR:
2958 dev_priv->stats.last_clear_reads++;
2959 value = GET_SCRATCH(2);
2960 break;
2961 case RADEON_PARAM_IRQ_NR:
2962 value = dev->irq;
2963 break;
2964 case RADEON_PARAM_GART_BASE:
2965 value = dev_priv->gart_vm_start;
2966 break;
2967 case RADEON_PARAM_REGISTER_HANDLE:
2968 value = dev_priv->mmio->offset;
2969 break;
2970 case RADEON_PARAM_STATUS_HANDLE:
2971 value = dev_priv->ring_rptr_offset;
2972 break;
2973 #if BITS_PER_LONG == 32
2974 /*
2975 * This ioctl() doesn't work on 64-bit platforms because hw_lock is a
2976 * pointer which can't fit into an int-sized variable. According to
2977 * Michel Dänzer, the ioctl() is only used on embedded platforms, so
2978 * not supporting it shouldn't be a problem. If the same functionality
2979 * is needed on 64-bit platforms, a new ioctl() would have to be added,
2980 * so backwards-compatibility for the embedded platforms can be
2981 * maintained. --davidm 4-Feb-2004.
2982 */
2983 case RADEON_PARAM_SAREA_HANDLE:
2984 /* The lock is the first dword in the sarea. */
2985 value = (long)dev->lock.hw_lock;
2986 break;
2987 #endif
2988 case RADEON_PARAM_GART_TEX_HANDLE:
2989 value = dev_priv->gart_textures_offset;
2990 break;
2991 case RADEON_PARAM_SCRATCH_OFFSET:
2992 if (!dev_priv->writeback_works)
2993 return DRM_ERR(EINVAL);
2994 value = RADEON_SCRATCH_REG_OFFSET;
2995 break;
2996 case RADEON_PARAM_CARD_TYPE:
2997 if (dev_priv->flags & RADEON_IS_PCIE)
2998 value = RADEON_CARD_PCIE;
2999 else if (dev_priv->flags & RADEON_IS_AGP)
3000 value = RADEON_CARD_AGP;
3001 else
3002 value = RADEON_CARD_PCI;
3003 break;
3004 default:
3005 DRM_DEBUG("Invalid parameter %d\n", param.param);
3006 return DRM_ERR(EINVAL);
3007 }
3008
3009 if (DRM_COPY_TO_USER(param.value, &value, sizeof(int))) {
3010 DRM_ERROR("copy_to_user\n");
3011 return DRM_ERR(EFAULT);
3012 }
3013
3014 return 0;
3015 }
3016
3017 static int radeon_cp_setparam(DRM_IOCTL_ARGS)
3018 {
3019 DRM_DEVICE;
3020 drm_radeon_private_t *dev_priv = dev->dev_private;
3021 drm_file_t *filp_priv;
3022 drm_radeon_setparam_t sp;
3023 struct drm_radeon_driver_file_fields *radeon_priv;
3024
3025 DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
3026
3027 DRM_COPY_FROM_USER_IOCTL(sp, (drm_radeon_setparam_t __user *) data,
3028 sizeof(sp));
3029
3030 switch (sp.param) {
3031 case RADEON_SETPARAM_FB_LOCATION:
3032 radeon_priv = filp_priv->driver_priv;
3033 radeon_priv->radeon_fb_delta = dev_priv->fb_location - sp.value;
3034 break;
3035 case RADEON_SETPARAM_SWITCH_TILING:
3036 if (sp.value == 0) {
3037 DRM_DEBUG("color tiling disabled\n");
3038 dev_priv->front_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3039 dev_priv->back_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3040 dev_priv->sarea_priv->tiling_enabled = 0;
3041 } else if (sp.value == 1) {
3042 DRM_DEBUG("color tiling enabled\n");
3043 dev_priv->front_pitch_offset |= RADEON_DST_TILE_MACRO;
3044 dev_priv->back_pitch_offset |= RADEON_DST_TILE_MACRO;
3045 dev_priv->sarea_priv->tiling_enabled = 1;
3046 }
3047 break;
3048 case RADEON_SETPARAM_PCIGART_LOCATION:
3049 dev_priv->pcigart_offset = sp.value;
3050 break;
3051 case RADEON_SETPARAM_NEW_MEMMAP:
3052 dev_priv->new_memmap = sp.value;
3053 break;
3054 default:
3055 DRM_DEBUG("Invalid parameter %d\n", sp.param);
3056 return DRM_ERR(EINVAL);
3057 }
3058
3059 return 0;
3060 }
3061
3062 /* When a client dies:
3063 * - Check for and clean up flipped page state
3064 * - Free any alloced GART memory.
3065 * - Free any alloced radeon surfaces.
3066 *
3067 * DRM infrastructure takes care of reclaiming dma buffers.
3068 */
3069 void radeon_driver_preclose(drm_device_t * dev, DRMFILE filp)
3070 {
3071 if (dev->dev_private) {
3072 drm_radeon_private_t *dev_priv = dev->dev_private;
3073 if (dev_priv->page_flipping) {
3074 radeon_do_cleanup_pageflip(dev);
3075 }
3076 radeon_mem_release(filp, dev_priv->gart_heap);
3077 radeon_mem_release(filp, dev_priv->fb_heap);
3078 radeon_surfaces_release(filp, dev_priv);
3079 }
3080 }
3081
3082 void radeon_driver_lastclose(drm_device_t * dev)
3083 {
3084 radeon_do_release(dev);
3085 }
3086
3087 int radeon_driver_open(drm_device_t * dev, drm_file_t * filp_priv)
3088 {
3089 drm_radeon_private_t *dev_priv = dev->dev_private;
3090 struct drm_radeon_driver_file_fields *radeon_priv;
3091
3092 DRM_DEBUG("\n");
3093 radeon_priv =
3094 (struct drm_radeon_driver_file_fields *)
3095 drm_alloc(sizeof(*radeon_priv), DRM_MEM_FILES);
3096
3097 if (!radeon_priv)
3098 return -ENOMEM;
3099
3100 filp_priv->driver_priv = radeon_priv;
3101
3102 if (dev_priv)
3103 radeon_priv->radeon_fb_delta = dev_priv->fb_location;
3104 else
3105 radeon_priv->radeon_fb_delta = 0;
3106 return 0;
3107 }
3108
3109 void radeon_driver_postclose(drm_device_t * dev, drm_file_t * filp_priv)
3110 {
3111 struct drm_radeon_driver_file_fields *radeon_priv =
3112 filp_priv->driver_priv;
3113
3114 drm_free(radeon_priv, sizeof(*radeon_priv), DRM_MEM_FILES);
3115 }
3116
3117 drm_ioctl_desc_t radeon_ioctls[] = {
3118 [DRM_IOCTL_NR(DRM_RADEON_CP_INIT)] = {radeon_cp_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3119 [DRM_IOCTL_NR(DRM_RADEON_CP_START)] = {radeon_cp_start, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3120 [DRM_IOCTL_NR(DRM_RADEON_CP_STOP)] = {radeon_cp_stop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3121 [DRM_IOCTL_NR(DRM_RADEON_CP_RESET)] = {radeon_cp_reset, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3122 [DRM_IOCTL_NR(DRM_RADEON_CP_IDLE)] = {radeon_cp_idle, DRM_AUTH},
3123 [DRM_IOCTL_NR(DRM_RADEON_CP_RESUME)] = {radeon_cp_resume, DRM_AUTH},
3124 [DRM_IOCTL_NR(DRM_RADEON_RESET)] = {radeon_engine_reset, DRM_AUTH},
3125 [DRM_IOCTL_NR(DRM_RADEON_FULLSCREEN)] = {radeon_fullscreen, DRM_AUTH},
3126 [DRM_IOCTL_NR(DRM_RADEON_SWAP)] = {radeon_cp_swap, DRM_AUTH},
3127 [DRM_IOCTL_NR(DRM_RADEON_CLEAR)] = {radeon_cp_clear, DRM_AUTH},
3128 [DRM_IOCTL_NR(DRM_RADEON_VERTEX)] = {radeon_cp_vertex, DRM_AUTH},
3129 [DRM_IOCTL_NR(DRM_RADEON_INDICES)] = {radeon_cp_indices, DRM_AUTH},
3130 [DRM_IOCTL_NR(DRM_RADEON_TEXTURE)] = {radeon_cp_texture, DRM_AUTH},
3131 [DRM_IOCTL_NR(DRM_RADEON_STIPPLE)] = {radeon_cp_stipple, DRM_AUTH},
3132 [DRM_IOCTL_NR(DRM_RADEON_INDIRECT)] = {radeon_cp_indirect, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3133 [DRM_IOCTL_NR(DRM_RADEON_VERTEX2)] = {radeon_cp_vertex2, DRM_AUTH},
3134 [DRM_IOCTL_NR(DRM_RADEON_CMDBUF)] = {radeon_cp_cmdbuf, DRM_AUTH},
3135 [DRM_IOCTL_NR(DRM_RADEON_GETPARAM)] = {radeon_cp_getparam, DRM_AUTH},
3136 [DRM_IOCTL_NR(DRM_RADEON_FLIP)] = {radeon_cp_flip, DRM_AUTH},
3137 [DRM_IOCTL_NR(DRM_RADEON_ALLOC)] = {radeon_mem_alloc, DRM_AUTH},
3138 [DRM_IOCTL_NR(DRM_RADEON_FREE)] = {radeon_mem_free, DRM_AUTH},
3139 [DRM_IOCTL_NR(DRM_RADEON_INIT_HEAP)] = {radeon_mem_init_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3140 [DRM_IOCTL_NR(DRM_RADEON_IRQ_EMIT)] = {radeon_irq_emit, DRM_AUTH},
3141 [DRM_IOCTL_NR(DRM_RADEON_IRQ_WAIT)] = {radeon_irq_wait, DRM_AUTH},
3142 [DRM_IOCTL_NR(DRM_RADEON_SETPARAM)] = {radeon_cp_setparam, DRM_AUTH},
3143 [DRM_IOCTL_NR(DRM_RADEON_SURF_ALLOC)] = {radeon_surface_alloc, DRM_AUTH},
3144 [DRM_IOCTL_NR(DRM_RADEON_SURF_FREE)] = {radeon_surface_free, DRM_AUTH}
3145 };
3146
3147 int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls);