brender-1997/pentprim/gsetuptf.asm
2022-05-03 14:31:40 -07:00

909 lines
24 KiB
NASM

; Copyright (c) 1992,1993-1996 Argonaut Technologies Limited. All rights reserved.
;
; $Id: gsetuptf.asm 1.3 1996/12/13 17:51:54 sam Exp $
; $Locker: $
;
; Generic triangle setup code from float component vertices
;
; Uses information in the primitive block to decide what parameters
; should be setup and what rasterisers to use.
;
.586p
.model flat,c
include drv.inc
include rastmacs.inc
include locmacs.inc
include setupdat.inc
.code
extrn RasteriseBufferFlush:proc
; Do all the per-triangle work for a single float parameter
;
; Optionally, remap value to unsigned by flipping sign bit of
; starting value.
;
; Optionally, prescale value by constant or interpolated alpha
;
; eax: ptr to top vertex
;
; edx: ptr to vertex0
; esi: ptr to vertex1
; edi: ptr to vertex2
;
; ebp: ptr to param block
;
;
SETUP_FLOAT_PARAM macro param,s_p,d_p_x,conv, unsigned:=<0>,alpha:=<<no>>
; work out parameter deltas
;
; dp1 = param_1 - param_0
; dp2 = param_2 - param_0
;
; 4 cycles
;
ifidni alpha,<no>
; 0 1 2 3 4 5 6 7
fld [edi].param ; p2
fsub [edx].param ; dp2
fld [esi].param ; p1 dp2
fsub [edx].param ; dp1 dp2
elseifidni alpha, <const>
fld [edi].param ; p2
fsub [edx].param ; dp2
fld [esi].param ; p1 dp2
fsub [edx].param ; dp1 dp2
fxch st(1) ; dp2 dp1
fmul [edx]._a ; dp2*a dp1
fxch st(1) ; dp1 dp2*a
; 1 cycle
fmul [edx]._a ; dp1*a dp2*a
elseifidni alpha, <interp>
fld [edx].param ; p0
fmul [edx]._a ; p0*a0
fld [edi].param ; p2 p0*a0
fmul [edi]._a ; p2*a2 p0*a0
fld [esi].param ; p1 p2*a2 p0*a0
fmul [esi]._a ; p1*a1 p2*a2 p0*a0
fxch st(1) ; p2*a2 p1*a1 p0*a0
fsub st(2) ; dp2 p1*a1 p0*a0
fxch st(1) ; p1*a1 dp2 p0*a0
fsubp st(2) ; dp1 dp2
endif
; Multiply deltas by precomputed values to get x & y gradients
; (Also interleaved load of parameter start and fractional x & y offsets of start position)
;
; pdx = dp1 * dy2_a - dp2 * dy1_a
; pdy = dp2 * dx1_a - dp1 * dx2_a
;
; 11 cycles
;
; 0 1 2 3 4 5 6 7
fld st(1) ; dp2 dp1 dp2
fmul dy1_a ; dp2*a dp1 dp2
fld st(1) ; dp1 dp2*a dp1 dp2
fmul dy2_a ; dp1*b dp2*a dp1 dp2
fld t_dx ; fdx dp1*b dp2*a dp1 dp2
fxch st(4) ; dp2 dp1*b dp2*a dp1 fdx
fmul dx1_a ; dp2*c dp1*b dp2*a dp1 fdx
fld t_dy ; fdy dp2*c dp1*b dp2*a dp1 fdx
fxch st(4) ; dp1 dp2*c dp1*b dp2*a fdy fdx
fmul dx2_a ; dp1*d dp2*c dp1*b dp2*a fdy fdx
fxch st(3) ; dp2*a dp2*c dp1*b dp1*d fdy fdx
fsubp st(2),st ; dp2*c d1b-d2a dp1*d fdy fdx
fld [eax].param ; param_t dp2*c d1b-d2a dp1*d fdy fdx
ifidni alpha,<const>
fmul [edx]._a
elseifidni alpha, <interp>
fmul [eax]._a
endif
fxch st(3) ; dp1*d dp2*c d1b-d2a param_t fdy fdx
fsubp st(1),st ; d2c-d1d d1b-d2a param_t fdy fdx
; pdy pdx param_t fdy fdx
; Build the inputs to the rasteriser
;
; pdy_0 = pdy + xstep_0 * pdx
; pdy_1 = pdy + xstep_1 * pdx
; pstart = param_t + pdx * fdx + pdy * fdy
;
; (A couple of the fixed points convertions are interleaved into this block)
; 12 cycles
;
; 0 1 2 3 4 5 6 7
fld st(1) ; pdx pdy pdx param_t fdy fdx
fmul xstep_0 ; pdx*xs0 pdy pdx param_t fdy fdx
fld st(2) ; pdx pdx*xs0 pdy pdx param_t fdy fdx
fmul xstep_1 ; pdx*xs1 pdx*xs0 pdy pdx param_t fdy fdx
fxch st(1) ; pdx*xs0 pdx*xs1 pdy pdx param_t fdy fdx
fadd st,st(2) ; pdy_0 pdx*xs1 pdy pdx param_t fdy fdx
fxch st(3) ; pdx pdx*xs1 pdy pdy_0 param_t fdy fdx
fmul st(6),st ; pdx pdx*xs1 pdy pdy_0 param_t fdy fdx*pdx
fxch st(2) ; pdy pdx*xs1 pdx pdy_0 param_t fdy fdx*pdx
fadd st(1),st ; pdy pdy_1 pdx pdy_0 param_t fdy fdx*pdx
fmulp st(5),st ; pdy_1 pdx pdy_0 param_t fdy*pdy fdx*pdx
fxch st(3) ; param_t pdx pdy_0 pdy_1 fdy*pdy fdx*pdx
faddp st(5),st ; pdx pdy_0 pdy_1 fdy*pdy fpx+pt
fxch st(1) ; pdy_0 pdx pdy_1 fdy*pdy fpx+pt
fadd conv ; C+pdy_0 pdx pdy_1 fdy*pdy fpx+pt
fxch st(2) ; pdy_1 pdx C+pdy_0 fdy*pdy fpx+pt
fadd conv ; C+pdy_1 pdx C+pdy_0 fdy*pdy fpx+pt
fxch st(3) ; fdy*pdy pdx C+pdy_0 C+pdy_1 fpx+pt
faddp st(4),st ; pdx C+pdy_0 C+pdy_1 pstart
; Convert to fixed point, pack and store in output block
;
; tsb->d_p_y0 = convert(pdy_0)
; tsb->d_p_y1 = convert(pdy_1)
; tsb->d_p_x = convert(pdx)
; tsb->s_p = convert(pstart)
;
; 13 cycles
; 0 1 2 3 4 5 6 7
fadd conv ; C+pdx C+pdy_0 C+pdy_1 pstart
fxch st(3) ; pstart C+pdy_0 C+pdy_1 C+pdx
; 1 Cycle
fadd conv ; C+pstrt C+pdy_0 C+pdy_1 C+pdx
fxch st(2) ; C+pdy_1 C+pdy_0 C+pstrt C+pdx
fstp real8 ptr s_p
fstp real8 ptr d_p_x
mov ebx,dword ptr s_p
mov ecx,dword ptr d_p_x
fstp real8 ptr s_p ;
fstp real8 ptr d_p_x ;
mov dword ptr s_p+4,ebx
mov dword ptr d_p_x+4,ecx
; Change from -1 to 1 signed to 0 to 1 unsigned
;
if unsigned
mov ebx,dword ptr s_p
xor ebx,080000000h
mov dword ptr s_p,ebx
endif
endm
; Set texture mapping info
;
SETUP_TEXTURE_INFO macro
mov ebx,work.texture.base
mov ecx,work.texture.palette
mov PARAM.tinfo.texture_address,ebx
mov PARAM.tinfo.map_address,ecx
mov ebx,dword ptr work.texture.width_s
mov dword ptr PARAM.tinfo.width_s,ebx
endm
; Set constant colour from vertex 0
;
SETUP_FLOAT_CONSTANT_COLOUR macro
; 0 1 2 3 4 5 6 7
fld [edx]._a
fadd fp_conv_s8 ; a+C
fld [edx]._r
fadd fp_conv_s ; r+C a+C
fld [edx]._g
fadd fp_conv_s ; g+C r+C a+C
fld [edx]._b
fadd fp_conv_s ; b+C g+C r+C a+C
fxch st(1) ; g+C b+C r+C a+C
fstp c_g
fstp c_b
fstp c_r
fstp c_a
mov ebx,c_b
mov ecx,c_g
mov byte ptr PARAM.h._c+0,bl
mov byte ptr PARAM.h._c+1,cl
mov ebx,c_r
mov ecx,c_a
mov byte ptr PARAM.h._c+2,bl
mov byte ptr PARAM.h._c+3,cl
endm
; Copy constant alpha to parameter block
;
SETUP_FLOAT_CONSTANT_ALPHA macro
; 0 1 2 3 4 5 6 7
fld [edx]._a
fadd fp_conv_s8 ; a+C
fstp c_a
mov ebx,c_a
shl ebx,24
mov PARAM.h._c+0,ebx
endm
; void BR_ASM_CALL GenericSetupTriangleFloat_A(struct local_block *pb, brp_vertex *v0,brp_vertex *v1,brp_vertex *v2)
;
; Setup a triangle given floating point components. Uses the generic_setup_info in
; the supplied parameter block to:
;
; Allocate a parameter block in the rasterise buffer (Flushing if necessary)
; Do commong trinagle setup
; Choose a rasteriser (LR/RL, Small/Large)
; Generate start/deltas for parameters (setup_info has pointer to specific setup code)
;
GenericSetupTriangleFloat_A proc
; Use ESP to access stack frame
;
LOCALS_BEGIN
PARAM_DWORD lblock
v0_offset equ _param_size
PARAM_DWORD v0
PARAM_DWORD v1
PARAM_DWORD v2
LOCAL_DWORD dx1_a
LOCAL_DWORD dx2_a
LOCAL_DWORD dy1_a
LOCAL_DWORD dy2_a
LOCAL_DWORD xstep_1
LOCAL_DWORD xstep_0
LOCAL_DWORD t_dx
LOCAL_DWORD_ALIAS m_y
LOCAL_DWORD t_dy
LOCAL_DWORD t_y
LOCAL_DWORD flip
LOCAL_DWORD iarea
LOCAL_DWORD c_b
LOCAL_DWORD c_g
LOCAL_DWORD c_r
LOCAL_DWORD c_a
LOCALS_END
; Load up vertex pointers and check that there is space in rasterise buffer
;
mov ecx,rasteriseBufferTop
mov eax,lblock
mov edx,v2
LOCALS_CREATE
sub ecx,[eax].local_block.setup.param_size
mov eax,v0
cmp ecx,offset rasteriseBuffer
jae no_flush
; Flush stacked triangles - have to do this before setup starts, as MMX rasterisers will
; trash the FPU stack
;
call RasteriseBufferFlush
mov eax,lblock
mov ecx,rasteriseBufferTop
mov edx,v2
sub ecx,[eax].local_block.setup.param_size
mov eax,v0
no_flush: mov rasteriseBufferTop,ecx
mov ecx,v1
assume eax: ptr brp_vertex, ebx:ptr brp_vertex, ecx: ptr brp_vertex, edx: ptr brp_vertex
; Calculate area of triangle and generate dx1/2area, dx1/2area, dx1/2area and dx1/2area
;
; Also sort the vertices in Y order whilst divide is happening
;
; 0 1 2 3 4 5 6 7
fld [edx]._x ; x2
fsub [eax]._x ; dx2
fld [ecx]._x ; x1 dx2
fsub [eax]._x ; dx1 dx2
fld [edx]._y ; y2 dx1 dx2
fsub [eax]._y ; dy2 dx1 dx2
fld [ecx]._y ; y1 dy2 dx1 dx2
fsub [eax]._y ; dy1 dy2 dx1 dx2
fld st(2) ; dx1 dy1 dy2 dx1 dx2
fmul st,st(2) ; dx1*dy2 dy1 dy2 dx1 dx2
fld st(4) ; dx2 dx1*dy2 dy1 dy2 dx1 dx2
fmul st,st(2) ; dx2*dy1 dx1*dy2 dy1 dy2 dx1 dx2
LPUSH ebx
mov eax,[eax]._y
LPUSH ebp
mov ecx,[ecx]._y
fsubp st(1),st ; 2area dy1 dy2 dx1 dx2
xor ebx,ebx
cmp ecx,eax
rcl ebx,1
mov edx,[edx]._y
fdivr fp_one ; 1/2area dy1 dy2 dx1 dx2
LPUSH edi
cmp edx,eax
rcl ebx,1
cmp edx,ecx
rcl ebx,1 ; ebx now has 3 bit number characterising the order of the vertices.
LPUSH esi
;U
;V
mov eax,sort_table_0[ebx*4]
mov edx,sort_table_2[ebx*4]
mov esi,flip_table[ebx*4]
mov ebx,sort_table_1[ebx*4]
; Load eax,ebx,edx with pointers to the three vertices in vertical order
;
mov eax,[v0_offset+_param_offset+esp+eax]
mov edx,[v0_offset+_param_offset+esp+edx]
mov ebx,[v0_offset+_param_offset+esp+ebx]
mov flip,esi
; Work out Y extents of triangle
;
; Convert float to int using integer instructions, because FPU is in use doing division
;
; NB: this convertion only works if the number is greater than 0 and less than 1<<24
;
MASK_MANTISSA equ 007fffffh
IMPLICIT_ONE equ 1 shl 23
EXPONENT_OFFSET equ ((127+23) shl 23) or 07fffffh
mov ebp,[eax]._y
mov ecx,EXPONENT_OFFSET
sub ecx,ebp ; Offset exponent to get shift value
and ebp,MASK_MANTISSA ; Mask out mantissa
shr ecx,23 ; Move shift value to low bits
or ebp,IMPLICIT_ONE ; Put the 1 back in top of mantissa
shr ebp,cl ; EBP = y_t
mov esi,[ebx]._y
mov ecx,EXPONENT_OFFSET
sub ecx,esi
and esi,MASK_MANTISSA
shr ecx,23
or esi,IMPLICIT_ONE
shr esi,cl ; ESI = y_m
mov edi,[edx]._y
mov ecx,EXPONENT_OFFSET
sub ecx,edi
and edi,MASK_MANTISSA
shr ecx,23
or edi,IMPLICIT_ONE
shr edi,cl ; EDI = y_b
; Catch special cases of empty top or bottom trapezoids
;
cmp ebp,esi
je top_zero
cmp esi,edi
je bottom_zero
; Parameter gradient startup and Y deltas for edge gradients
;
; 0 1 2 3 4 5 6 7
fmul st(1),st ; 1/2area dy1*a dy2 dx1 dx2
fld [ebx]._y ; sy2 1/2area dy1*a dy2 dx1 dx2
fsub [eax]._y ; dsy1 1/2area dy1*a dy2 dx1 dx2
fxch st(3) ; dy2 1/2area dy1*a dsy1 dx1 dx2
fmul st,st(1) ; dy2*a 1/2area dy1*a dsy1 dx1 dx2
fld [edx]._y ; sy3 dy2*a 1/2area dy1*a dsy1 dx1 dx2
fsub [ebx]._y ; dsy2 dy2*a 1/2area dy1*a dsy1 dx1 dx2
fxch st(5) ; dx1 dy2*a 1/2area dy1*a dsy1 dsy2 dx2
; top_zero and bottom_zero special cases re-enter here
;
count_cont:
fmul st,st(2) ; dx1*a dy2*a 1/2area dy1*a dsy1 dsy2 dx2
fld [edx]._y ; sy3 dx1*a dy2*a 1/2area dy1*a dsy1 dsy2 dx2
fsub [eax]._y ; dsy3 dx1*a dy2*a 1/2area dy1*a dsy1 dsy2 dx2
fxch st(7) ; dx2 dx1*a dy2*a 1/2area dy1*a dsy1 dsy2 dsy3
fmul st,st(3) ; dx2*a dx1*a dy2*a 1/2area dy1*a dsy1 dsy2 dsy3
fxch st(3) ; 1/2area dx1*a dy2*a dx2*a dy1*a dsy1 dsy2 dsy3
fstp iarea
fstp dx1_a
fstp dy2_a
fstp dx2_a
fstp dy1_a ; dy1 dy2 dy3
; Find edge gradients of triangle
;
; Uses 1 reciprocal for all three edges:
;
; R = 1/(dy1.dy2.dy3)
;
; gradient_major = dy1.dx2.dy3.R
; gradient_minor1 = dx1.dy2.dy3.R
; gradient_minor2 = dy1.dy2.dx3.R
;
; This could be dodgy - since it is possible for the gradients on two sides
; of an edge to wind up being different. In practice, this has not proved
; to be a problem (yet)
;
; 0 1 2 3 4 5 6 7
fld st(2) ; dy3 dy1 dy2 dy3
fmul st,st(2) ; dy2*dy3 dy1 dy2 dy3
fld [ebx]._x ; x2 dy2*dy3 dy1 dy2 dy3
fsub [eax]._x ; dx1 dy2*dy3 dy1 dy2 dy3
fld st(1) ; dy2*dy3 dx1 dy2*dy3 dy1 dy2 dy3
fmul st,st(3) ; dy123 dx1 dy2*dy3 dy1 dy2 dy3
fld [edx]._x ; x3 dy123 dx1 dy2*dy3 dy1 dy2 dy3
fsub [ebx]._x ; dx2 dy123 dx1 dy2*dy3 dy1 dy2 dy3
fxch st(2) ; dx1 dy123 dx2 dy2*dy3 dy1 dy2 dy3
fld fp_one ; 1.0 dx1 dy123 dx2 dy2*dy3 dy1 dy2 dy3
fdivrp st(2),st ; dx1 R dx2 dy2*dy3 dy1 dy2 dy3
; Generate counts
;
inc ebp
mov ecx,esi
sub ecx,ebp ; count_t = (y_m-y_t)-1
mov t_y,ebp ; save for X intercept calcs
shl ecx,16
inc esi
sub edi,esi ; count_b = (y_b-y_m)-1
mov m_y,esi ; save for X intercept calcs
and edi,0ffffh ; Count can go negative
mov ebp,rasteriseBufferTop ; Fetch pointer to parameter block
PARAMH equ [ebp].tsb_header
or ecx,edi ; combined counts
mov edi,t_y
; Setup stride and screen pointers
;
; XXX Some write buffer stalls here - could touch the cache line(s) in advance
;
mov PARAMH.counts,ecx
dec edi ; Input are offset by one scanline (to avoid FP convertion problems)
mov esi,lblock
mov PARAMH.start_scanline,edi
mov edi,work.colour.stride_b
mov ecx,work.depth.stride_b
mov PARAMH.screen_stride,edi
mov PARAMH.depth_stride,ecx
mov edi,work.colour.base
mov ecx,work.depth.base
mov PARAMH.screen_address,edi
mov edi,iarea
mov PARAMH.depth_address,ecx
and edi,7fffffffh ; Clear sign bit of area
; Setup rasterisation function, generate an index using area comparison and LR bit
;
; Generate LR/RL flag into ecx (also used to index convertion numbers below)
;
cmp edi,[esi].local_block.setup.iarea_limit
mov ecx,flip
sbb edi,edi
xor ecx,iarea ; Build LR flag in bit 31
shr ecx,31 ; move down to bit 0
add edi,edi
sub edi,ecx ; edi now has -3 = RL_S, -2 = LR_S , -1 = RL_L, 0 = LR_L
mov edi,[esi+edi*4].local_block.setup.rasterise_lr_s
mov PARAMH.function, edi
mov edi,[esi].local_block.setup.stride ; Get stride for rasteriser
neg edi
; Finish of gradient calculations, interleaved with working out t_dy, and m_dy, the fractions
; that the top and middle vertices are from the integer scanline boundaries
;
; t_dy = (yt+1) - vt->y
; m_dy = (ym+1) - vm->y
;
; eax - Vertex 0
; ebx - Vertex 1
; edx - Vertex 2
; ecx - L/R flag
; esi - Pointer to setup block
; edi - negative stride
; ebp - Pointer to parameter block header
;
; 0 1 2 3 4 5 6 7
fmulp st(3),st ; R dx2 XYY dy1 dy2 dy3
fld [edx]._x ; x3 R dx2 XYY dy1 dy2 dy3
fxch st(3) ; XYY R dx2 x3 dy1 dy2 dy3
fmul st,st(1) ; XYY*R R dx2 x3 dy1 dy2 dy3
fxch st(3) ; x3 R dx2 XYY*R dy1 dy2 dy3
fsub [eax]._x ; dx3 R dx2 XYY*R dy1 dy2 dy3
fxch st(1) ; R dx3 dx2 XYY*R dy1 dy2 dy3
fmulp st(4),st ; dx3 dx2 XYY*R dy1*R dy2 dy3
fxch st(2) ; XYY*R dx2 dx3 dy1*R dy2 dy3
fild m_y ; m_y XYY*R dx2 dx3 dy1*R dy2 dy3
fxch st(2) ; dx2 XYY*R m_y dx3 dy1*R dy2 dy3
fmulp st(6),st ; XYY*R m_y dx3 dy1*R dy2 dx2*dy3
fild t_y ; t_y XYY*R m_y dx3 dy1*R dy2 dx2*dy3
fxch st(3) ; dx3 XYY*R m_y t_y dy1*R dy2 dx2*dy3
fmulp st(5),st ; XYY*R m_y t_y dy1*R dy2*dx3 dx2*dy3
fxch st(1) ; m_y XYY*R t_y dy1*R dy2*dx3 dx2*dy3
fsub [ebx]._y ; m_dy XYY*R t_y dy1*R dy2*dx3 dx2*dy3
fxch st(3) ; dy1*R XYY*R t_y m_dy dy2*dx3 dx2*dy3
fmul st(4),st ; dy1*R XYY*R t_y m_dy YYX*R dx2*dy3
fxch st(2) ; t_y XYY*R dy1*R m_dy YYX*R dx2*dy3
fsub [eax]._y ; t_dy XYY*R dy1*R m_dy YYX*R dx2*dy3
fxch st(2) ; dy1*R XYY*R t_dy m_dy YYX*R dx2*dy3
fmulp st(5),st ; XYY*R t_dy m_dy YYX*R YXY*R
fxch st(2) ; m_dy t_dy XYY*R YYX*R YXY*R
; m_dy t_dy g1 gm g2
; Work out initial X intercepts with top and middle scanlines
;
; x_major = gm * t_dy + vt->x
; x_minor1 = g1 * t_dy + vt->x
; x_minor2 = g2 * m_dy + vm->x
;
; 0 1 2 3 4 5 6 7
fld st(1) ; t_dy m_dy t_dy g1 gm g2
fxch st(1) ; m_dy t_dy t_dy g1 gm g2
fmul st,st(5) ; m_dy*g2 t_dy t_dy g1 gm g2
fxch st(2) ; t_dy t_dy m_dy*g2 g1 gm g2
fst t_dy
fmul st,st(3) ; t_dy*g1 t_dy m_dy*g2 g1 gm g2
fxch st(2) ; m_dy*g2 t_dy t_dy*g1 g1 gm g2
fadd [ebx]._x ; x_2 t_dy t_dy*g1 g1 gm g2
fxch st(1) ; t_dy x_2 t_dy*g1 g1 gm g2
fmul st,st(4) ; t_dy*gm x_2 t_dy*g1 g1 gm g2
fxch st(2) ; t_dy*g1 x_2 t_dy*gm g1 gm g2
fadd [eax]._x ; x_1 x_2 t_dy*gm g1 gm g2
fxch st(3) ; g1 x_2 t_dy*gm x_1 gm g2
fadd fp_conv_d16 ; g1+C x_2 t_dy*gm x_1 gm g2
fxch st(2) ; t_dy*gm x_2 g1+C x_1 gm g2
fadd [eax]._x ; x_m x_2 g1+C x_1 gm g2
fxch st(4) ; gm x_2 g1+C x_1 x_m g2
fadd fp_conv_d16 ; gm+C x_2 g1+C x_1 x_m g2
fxch st(1) ; x_2 gm+C g1+C x_1 x_m g2
fadd fconv_d16_12[ecx*8] ; x_2+C gm+C g1+C x_1 x_m g2
fxch st(5) ; g2 gm+C g1+C x_1 x_m x_2+C
fadd fp_conv_d16 ; g2+C gm+C g1+C x_1 x_m x_2+C
fxch st(2) ; g1+C gm+C g2+C x_1 x_m x_2+C
fstp real8 ptr PARAMH.x1 ; gm+C g2+C x_1 x_m x_2+C
fstp real8 ptr PARAMH.xm ; g2+C x_1 x_m x_2+C
fstp real8 ptr PARAMH.x2 ; x_1 x_m x_2+C
fadd fconv_d16_12[ecx*8] ; x_1+C x_m x_2+C
fxch st(1) ; x_m x_1+C x_2+C
fadd fconv_d16_m[ecx*8] ; x_m+C x_1+C x_2+C
; Load deltas back in registers
;
mov edx,PARAMH.xm ; read fixed d_xm
mov ecx,PARAMH.x1 ; read fixed d_x1
fstp real8 ptr PARAMH.xm ; x_1+C x_2+C
fstp real8 ptr PARAMH.x1 ; x_2+C
mov PARAMH.x1+4,ecx
mov ecx,PARAMH.xm
sar ecx,16
mov PARAMH.xm+4,edx
sar edx,16 ; get integer part of x delta down major edge
and ecx,edi
and edx,edi ; Truncate x delta to nearest BOUNDARY pixels
mov t_dx,ecx
fild t_dx ; t_x x_2+C
; Generate floating point versions of x delta and x delta+BOUNDARY
;
mov xstep_0,edx
sub edx,edi
mov ecx,PARAMH.x2 ; read fixed d_x2
mov xstep_1,edx
; 0 1 2 3 4 5 6 7
fsub [eax]._x ; t_dx x_2+C
fxch st(1) ; x_2+C t_dx
fstp real8 ptr PARAMH.x2 ; t_dx
fild xstep_0 ; xstep_0 t_dx
fild xstep_1 ; xstep_1 xstep_0 t_dx
fxch st(2) ; tdx xstep_0 xstep_1
fstp t_dx ; xstep_0 xstep_1
mov PARAMH.x2+4,ecx
mov ebx,[esi].local_block.setup.setup_param ; get pointer to parameter setup code
fstp xstep_0 ; xstep_1
fstp xstep_1 ;
; Jump to parameter specific setup code
;
; eax = top vertex
;
; edx = v0
; esi = v1
; edi = v2
; ebp = pointer to parameter block
;
; ebc & ecx are scratch
;
mov edx,v0
mov esi,v1
mov edi,v2
jmp ebx
; Special cases for top or bottom counts == 0
;
top_zero: cmp ebp,edi ; Check for completely empty triangle
je empty_triangle
; 0 1 2 3 4 5 6 7
fmul st(1),st ; 1/2area dy1*a dy2 dx1 dx2
fld fp_one ; 1.0 1/2area dy1*a dy2 dx1 dx2
fxch st(3) ; dy2 1/2area dy1*a 1.0 dx1 dx2
fmul st,st(1) ; dy2*a 1/2area dy1*a 1.0 dx1 dx2
fld [edx]._y ; sy3 dy2*a 1/2area dy1*a 1.0 dx1 dx2
fsub [ebx]._y ; dsy2 dy2*a 1/2area dy1*a 1.0 dx1 dx2
fxch st(5) ; dx1 dy2*a 1/2area dy1*a 1.0 dsy2 dx2
jmp count_cont
bottom_zero: ; 0 1 2 3 4 5 6 7
fmul st(1),st ; 1/2area dy1*a dy2 dx1 dx2
fld [ebx]._y ; sy2 1/2area dy1*a dy2 dx1 dx2
fsub [eax]._y ; dsy1 1/2area dy1*a dy2 dx1 dx2
fxch st(3) ; dy2 1/2area dy1*a dsy1 dx1 dx2
fmul st,st(1) ; dy2*a 1/2area dy1*a dsy1 dx1 dx2
fld fp_one ; 1.0 dy2*a 1/2area dy1*a dsy1 dx1 dx2
fxch st(5) ; dx1 dy2*a 1/2area dy1*a dsy1 1.0 dx2
jmp count_cont
; Triangle has no pixels:
;
; Back out the buffer allocation, clear the FPU stack and return
;
assume eax:nothing, ebx:nothing, ecx:nothing, edx:nothing
empty_triangle: mov eax,rasteriseBufferTop
mov ecx,lblock
fstp st(0)
add eax,[ecx].local_block.setup.param_size
pop esi
fstp st(0)
mov rasteriseBufferTop,eax
pop edi
fstp st(0)
fstp st(0)
fstp st(0)
pop ebp
pop ebx
LOCALS_DESTROY
ret
assume eax:ptr brp_vertex,edx:ptr brp_vertex, esi:ptr brp_vertex, edi:ptr brp_vertex
; Setup for Z RGB UV S
;
; Arranged as a tower with several entry points to save code
;
; XXX This could be re-done so that parameter specific entry points simply call
; the general setup code first.
;
public GenericSetupFloatZRGBUVS_A
public GenericSetupFloatZRGBUV_A
public GenericSetupFloatZRGB_A
public GenericSetupFloatZ_A
PARAM equ [ebp].param_zrgbuv
GenericSetupFloatZRGBUVS_A label near
SETUP_FLOAT_CONSTANT_ALPHA
GenericSetupFloatZRGBUV_A label near
SETUP_FLOAT_PARAM _u,PARAM.s_u,PARAM.d_u_x,fp_conv_d12
SETUP_FLOAT_PARAM _v,PARAM.s_v,PARAM.d_v_x,fp_conv_d12
SETUP_TEXTURE_INFO
GenericSetupFloatZRGB_A label near
SETUP_FLOAT_PARAM _r,PARAM.s_r,PARAM.d_r_x,fp_conv_d24
SETUP_FLOAT_PARAM _g,PARAM.s_g,PARAM.d_g_x,fp_conv_d24
SETUP_FLOAT_PARAM _b,PARAM.s_b,PARAM.d_b_x,fp_conv_d24
GenericSetupFloatZ_A label near
SETUP_FLOAT_PARAM _z,PARAM.s_z,PARAM.d_z_x,fp_conv_d16,1
; Exit
;
pop esi
pop edi
pop ebp
pop ebx
LOCALS_DESTROY
ret
; Setup for Z RGB S
;
public GenericSetupFloatZRGBS_A
PARAM equ [ebp].param_zrgb
GenericSetupFloatZRGBS_A label near
SETUP_FLOAT_CONSTANT_ALPHA
SETUP_FLOAT_PARAM _r,PARAM.s_r,PARAM.d_r_x,fp_conv_d24
SETUP_FLOAT_PARAM _g,PARAM.s_g,PARAM.d_g_x,fp_conv_d24
SETUP_FLOAT_PARAM _b,PARAM.s_b,PARAM.d_b_x,fp_conv_d24
SETUP_FLOAT_PARAM _z,PARAM.s_z,PARAM.d_z_x,fp_conv_d16,1
; Exit
;
pop esi
pop edi
pop ebp
pop ebx
LOCALS_DESTROY
ret
; Setup for Z UV C
;
public GenericSetupFloatZUVC_A
public GenericSetupFloatZUV_A
PARAM equ [ebp].param_zuv
GenericSetupFloatZUVC_A label near
SETUP_FLOAT_CONSTANT_COLOUR
GenericSetupFloatZUV_A label near
SETUP_FLOAT_PARAM _u,PARAM.s_u,PARAM.d_u_x,fp_conv_d12
SETUP_FLOAT_PARAM _v,PARAM.s_v,PARAM.d_v_x,fp_conv_d12
SETUP_TEXTURE_INFO
SETUP_FLOAT_PARAM _z,PARAM.s_z,PARAM.d_z_x,fp_conv_d16,1
; Exit
;
pop esi
pop edi
pop ebp
pop ebx
LOCALS_DESTROY
ret
; Setup for Z C
;
public GenericSetupFloatZC_A
PARAM equ [ebp].param_z
GenericSetupFloatZC_A label near
SETUP_FLOAT_CONSTANT_COLOUR
SETUP_FLOAT_PARAM _z,PARAM.s_z,PARAM.d_z_x,fp_conv_d16,1
; Exit
;
pop esi
pop edi
pop ebp
pop ebx
LOCALS_DESTROY
ret
GenericSetupTriangleFloat_A endp
end