1305 lines
27 KiB
NASM
1305 lines
27 KiB
NASM
; Pentium optimized rasterisers using floating point setup code
|
|
;
|
|
.586p
|
|
.model flat,c
|
|
|
|
include drv.inc
|
|
include 586_macs.inc
|
|
include fpwork.inc
|
|
include fpsetup.inc
|
|
include common.inc
|
|
|
|
|
|
; Cheat case entry points for perspective correct texture mapper
|
|
;
|
|
externdef TriangleRasterise_ZI_I8_D16:proc
|
|
externdef TriangleRasterise_ZT_I8_D16_256:proc
|
|
externdef TriangleRasterise_ZTF_I8_D16_256:proc
|
|
|
|
|
|
if 0
|
|
.data
|
|
|
|
public setup_cycles
|
|
public outer_cycles
|
|
public inner_cycles
|
|
|
|
setup_cycles dq 0
|
|
outer_cycles dq 0
|
|
inner_cycles dq 0
|
|
|
|
CLEAR_TIMER macro section
|
|
mov [dword ptr §ion&_cycles],0
|
|
mov [dword ptr §ion&_cycles+4],0
|
|
endm
|
|
|
|
START_TIMER macro section
|
|
push eax
|
|
push edx
|
|
rdtsc
|
|
sub [dword ptr §ion&_cycles],eax
|
|
sbb [dword ptr §ion&_cycles+4],edx
|
|
pop edx
|
|
pop eax
|
|
endm
|
|
|
|
STOP_TIMER macro section
|
|
push eax
|
|
push edx
|
|
rdtsc
|
|
add [dword ptr §ion&_cycles],eax
|
|
adc [dword ptr §ion&_cycles+4],edx
|
|
pop edx
|
|
pop eax
|
|
endm
|
|
else
|
|
CLEAR_TIMER macro section
|
|
endm
|
|
START_TIMER macro section
|
|
endm
|
|
STOP_TIMER macro section
|
|
endm
|
|
endif
|
|
|
|
|
|
; Setup routines. These should be exported by a header file.
|
|
;
|
|
;extern TriangleSetup_ZTI_F:proc
|
|
;extern TriangleSetup_ZT_F:proc
|
|
;extern TriangleSetup_ZI_F:proc
|
|
;extern TriangleSetup_Z_F:proc
|
|
|
|
|
|
; Alternative versions of useful instructions for making forward and backward
|
|
; scanning versions of rasterisers
|
|
;
|
|
add_f equ add
|
|
add_b equ sub
|
|
|
|
adc_f equ adc
|
|
adc_b equ sbb
|
|
|
|
sub_f equ sub
|
|
sub_b equ add
|
|
|
|
sbb_f equ sbb
|
|
sbb_b equ adc
|
|
|
|
inc_f equ inc
|
|
inc_b equ dec
|
|
|
|
jg_f equ jg
|
|
jg_b equ jl
|
|
|
|
jle_f equ jle
|
|
jle_b equ jge
|
|
|
|
; Some equates that allow me to use the old style naming conventions (with .s replaced
|
|
; by _s) to refer to the workspace for the floating point setup
|
|
;
|
|
work_main_i equ workspace.xm
|
|
work_main_d_i equ workspace.d_xm
|
|
|
|
work_top_count equ workspace.topCount
|
|
work_top_i equ workspace.x1
|
|
work_top_d_i equ workspace.d_x1
|
|
|
|
work_bot_count equ workspace.bottomCount
|
|
work_bot_i equ workspace.x2
|
|
work_bot_d_i equ workspace.d_x2
|
|
|
|
work_pz_current equ workspace.s_z
|
|
work_pz_grad_x equ workspace.d_z_x
|
|
work_pz_d_nocarry equ workspace.d_z_y_0
|
|
work_pz_d_carry equ workspace.d_z_y_1
|
|
|
|
work_pu_current equ workspace.s_u
|
|
work_pu_grad_x equ workspace.d_u_x
|
|
work_pu_d_nocarry equ workspace.d_u_y_0
|
|
work_pu_d_carry equ workspace.d_u_y_1
|
|
|
|
work_pv_current equ workspace.s_v
|
|
work_pv_grad_x equ workspace.d_v_x
|
|
work_pv_d_nocarry equ workspace.d_v_y_0
|
|
work_pv_d_carry equ workspace.d_v_y_1
|
|
|
|
work_pi_current equ workspace.s_i
|
|
work_pi_grad_x equ workspace.d_i_x
|
|
work_pi_d_nocarry equ workspace.d_i_y_0
|
|
work_pi_d_carry equ workspace.d_i_y_1
|
|
|
|
|
|
.code
|
|
|
|
|
|
; Trapezium loop for gouraud shaded z buffered faces
|
|
;
|
|
; Arguments control:
|
|
; whether loop uses 'top' or 'bottom' variables
|
|
; which direction scanline are rendered from the long edge
|
|
;
|
|
TRAPEZIUM_ZI_I8_D16 macro half,dirn
|
|
local scan_loop,pixel_loop,pixel_behind,no_pixels
|
|
local done_trapezium
|
|
|
|
mov ebx,work_&half&_count ; check for empty trapezium
|
|
;vslot
|
|
|
|
test ebx,ebx
|
|
jl done_trapezium
|
|
|
|
mov eax,work_pi_current
|
|
mov edx,work_pz_current
|
|
|
|
mov edi,work_&half&_i
|
|
mov ebp,work_main_i
|
|
|
|
shr edi,16 ; get integer part of end of first scanline
|
|
mov ebx,workspace.depthAddress
|
|
|
|
shr ebp,16 ; get integer part of start of first scanline
|
|
mov esi,workspace.scanAddress
|
|
|
|
scan_loop:
|
|
|
|
; Calculate pixel count and end addresses for line
|
|
; Adjust i & z for the inner loop
|
|
;
|
|
sub ebp,edi ; calculate pixel count
|
|
jg_&dirn no_pixels
|
|
|
|
add esi,edi ; calculate end colour buffer pointer
|
|
lea edi,[ebx+edi*2] ; calculate end depth buffer pointer
|
|
|
|
ror eax,16 ; swap words of i
|
|
mov ecx,work_pi_grad_x
|
|
|
|
ror edx,16 ; swap words of z
|
|
sub_&dirn eax,ecx ; cancel out first step of i in loop
|
|
|
|
sbb_&dirn eax,0 ; also clear carry
|
|
mov ebx,edx ; need same junk in high word of old and new z
|
|
|
|
STOP_TIMER outer
|
|
START_TIMER inner
|
|
|
|
; eax = i
|
|
; ebx = old z, dz
|
|
; ecx = di
|
|
; edx = z
|
|
; ebp = count
|
|
; esi = frame buffer ptr
|
|
; edi = z buffer ptr
|
|
;
|
|
pixel_loop:
|
|
|
|
adc_&dirn edx,0 ; carry into integer part of z
|
|
mov bl,[edi+ebp*2] ; fetch old z
|
|
|
|
add_&dirn eax,ecx ; step i
|
|
mov bh,[edi+ebp*2+1]
|
|
|
|
adc_&dirn eax,0 ; carry into integer part of i
|
|
cmp edx,ebx ; compare z (identical junk in top words does not affect result)
|
|
|
|
mov ebx,work_pz_grad_x
|
|
ja pixel_behind
|
|
|
|
mov [edi+ebp*2],dx ; store pixel and depth (prefix cannot be avoided since
|
|
mov [esi+ebp],al ; two byte writes would fill the write buffers)
|
|
|
|
pixel_behind:
|
|
|
|
add_&dirn edx,ebx ; step z
|
|
inc_&dirn ebp ; increment (negative) counter/offset
|
|
|
|
mov ebx,edx ; need same junk in high word of old and new z
|
|
jle_&dirn pixel_loop ; loop
|
|
|
|
STOP_TIMER inner
|
|
START_TIMER outer
|
|
|
|
no_pixels:
|
|
|
|
; Updates for next scanline:
|
|
;
|
|
mov esi,workspace.scanAddress
|
|
mov ecx,work.colour.stride_b
|
|
|
|
mov ebx,workspace.depthAddress
|
|
mov edx,work.depth.stride_b
|
|
|
|
add esi,ecx ; move down one line in colour buffer
|
|
add ebx,edx ; move down one line in depth buffer
|
|
|
|
mov workspace.scanAddress,esi
|
|
mov workspace.depthAddress,ebx
|
|
|
|
mov ebp,work_main_i
|
|
mov edi,work_&half&_i
|
|
|
|
add ebp,work_main_d_i ; step major edge
|
|
add edi,work_&half&_d_i ; step minor edge
|
|
|
|
mov work_main_i,ebp
|
|
mov work_&half&_i,edi
|
|
|
|
mov eax,work.main.f
|
|
mov ecx,work.main.d_f
|
|
|
|
shr ebp,16 ; get integer part of start of first scanline
|
|
add eax,ecx
|
|
|
|
sbb ecx,ecx ; get (0 - carry)
|
|
|
|
shr edi,16 ; get integer part of end of first scanline
|
|
mov work.main.f,eax
|
|
|
|
mov eax,work_pi_current
|
|
mov edx,work_pz_current
|
|
|
|
add eax,[work_pi_d_nocarry+ecx*8] ; step i according to carry from major edge ; *4 for old workspace
|
|
add edx,[work_pz_d_nocarry+ecx*8] ; step z according to carry from major edge ; *4 for old workspace
|
|
|
|
mov work_pi_current,eax
|
|
mov ecx,work_&half&_count
|
|
|
|
mov work_pz_current,edx
|
|
dec ecx ; decrement line counter
|
|
|
|
mov work_&half&_count,ecx
|
|
jge scan_loop
|
|
|
|
done_trapezium:
|
|
endm
|
|
|
|
|
|
; Trapezium loop for gouraud shaded z buffered faces
|
|
;
|
|
; Arguments control:
|
|
; whether loop uses 'top' or 'bottom' variables
|
|
; which direction scanline are rendered from the long edge
|
|
;
|
|
|
|
majorScanAddress equ scratch0
|
|
|
|
TRAPEZIUM_ZIF_I8_D16 macro half,dirn
|
|
local scan_loop,pixel_loop,pixel_behind,no_pixels
|
|
local done_trapezium
|
|
|
|
mov ebx,work_&half&_count ; check for empty trapezium
|
|
;vslot
|
|
|
|
test ebx,ebx
|
|
jl done_trapezium
|
|
|
|
mov eax,work_pi_current
|
|
mov edx,work_pz_current
|
|
|
|
mov edi,work_&half&_i
|
|
mov ebp,work_main_i
|
|
|
|
shr edi,16 ; get integer part of end of first scanline
|
|
mov ebx,workspace.depthAddress
|
|
|
|
shr ebp,16 ; get integer part of start of first scanline
|
|
mov esi,workspace.scanAddress
|
|
|
|
scan_loop:
|
|
|
|
; Calculate pixel count and end addresses for line
|
|
; Adjust i & z for the inner loop
|
|
;
|
|
sub ebp,edi ; calculate pixel count
|
|
jg_&dirn no_pixels
|
|
|
|
add esi,edi ; calculate end colour buffer pointer
|
|
lea edi,[ebx+edi*2] ; calculate end depth buffer pointer
|
|
|
|
ror eax,16 ; swap words of i
|
|
xor ebx,ebx
|
|
|
|
mov workspace.majorScanAddress,esi
|
|
mov esi,work_pi_grad_x
|
|
|
|
STOP_TIMER outer
|
|
START_TIMER inner
|
|
|
|
; eax = i
|
|
; ebx = old z, dz, fog table
|
|
; ecx = z, fog lookup
|
|
; edx = z
|
|
; ebp = count
|
|
; esi = frame buffer ptr, di
|
|
; edi = z buffer ptr
|
|
;
|
|
pixel_loop:
|
|
xor ebx,ebx ; empty out z delta
|
|
mov ecx,edx
|
|
|
|
shr ecx,16 ; shift out fraction
|
|
mov bl,[edi+ebp*2] ; fetch old z
|
|
|
|
mov bh,[edi+ebp*2+1]
|
|
|
|
cmp ecx,ebx ; compare z
|
|
ja pixel_behind
|
|
|
|
mov [edi+ebp*2],cl ; store low byte of new z
|
|
mov cl,al ; low byte replaced with i
|
|
|
|
mov ebx,work.fog_table ; fetch fog table address
|
|
mov esi,workspace.majorScanAddress ; fetch screen address
|
|
|
|
mov [edi+ebp*2+1],ch ; store high byte of new z
|
|
; agi here:
|
|
mov cl,[ecx+ebx] ; perform fog lookup
|
|
|
|
mov [esi+ebp],cl ; store fogged intensity
|
|
mov esi,work_pi_grad_x ; fetch i gradient back in
|
|
pixel_behind:
|
|
mov ebx,work_pz_grad_x ; fetch z gradient back in
|
|
add_&dirn eax,esi ; step i
|
|
|
|
adc_&dirn eax,0 ; carry into integer part of i
|
|
add_&dirn edx,ebx ; step z
|
|
|
|
inc_&dirn ebp
|
|
jle_&dirn pixel_loop
|
|
|
|
STOP_TIMER inner
|
|
START_TIMER outer
|
|
|
|
no_pixels:
|
|
|
|
; Updates for next scanline:
|
|
;
|
|
mov esi,workspace.scanAddress
|
|
mov ecx,work.colour.stride_b
|
|
|
|
mov ebx,workspace.depthAddress
|
|
mov edx,work.depth.stride_b
|
|
|
|
add esi,ecx ; move down one line in colour buffer
|
|
add ebx,edx ; move down one line in depth buffer
|
|
|
|
mov workspace.scanAddress,esi
|
|
mov workspace.depthAddress,ebx
|
|
|
|
mov ebp,work_main_i
|
|
mov edi,work_&half&_i
|
|
|
|
add ebp,work_main_d_i ; step major edge
|
|
add edi,work_&half&_d_i ; step minor edge
|
|
|
|
mov work_main_i,ebp
|
|
mov work_&half&_i,edi
|
|
|
|
mov eax,work.main.f
|
|
mov ecx,work.main.d_f
|
|
|
|
shr ebp,16 ; get integer part of start of first scanline
|
|
add eax,ecx
|
|
|
|
sbb ecx,ecx ; get (0 - carry)
|
|
|
|
shr edi,16 ; get integer part of end of first scanline
|
|
mov work.main.f,eax
|
|
|
|
mov eax,work_pi_current
|
|
mov edx,work_pz_current
|
|
|
|
add eax,[work_pi_d_nocarry+ecx*8] ; step i according to carry from major edge ; *4 for old workspace
|
|
add edx,[work_pz_d_nocarry+ecx*8] ; step z according to carry from major edge ; *4 for old workspace
|
|
|
|
mov work_pi_current,eax
|
|
mov ecx,work_&half&_count
|
|
|
|
mov work_pz_current,edx
|
|
dec ecx ; decrement line counter
|
|
|
|
mov work_&half&_count,ecx
|
|
jge scan_loop
|
|
|
|
done_trapezium:
|
|
endm
|
|
|
|
|
|
; Trapezium loop for linear texture mapped z buffered faces
|
|
;
|
|
; Arguments control:
|
|
; whether loop uses 'top' or 'bottom' variables
|
|
; which direction scanline are rendered from the long edge
|
|
;
|
|
TRAPEZIUM_ZT_I8_D16_256 macro half,dirn
|
|
local scan_loop,pixel_loop,pixel_behind,no_pixels
|
|
local done_trapezium
|
|
|
|
mov ebx,work_&half&_count ; check for empty trapezium
|
|
mov edx,work_pz_current
|
|
|
|
test ebx,ebx
|
|
jl done_trapezium
|
|
|
|
mov edi,work_&half&_i
|
|
mov ebp,work_main_i
|
|
|
|
shr edi,16 ; get integer part of end of first scanline
|
|
mov ecx,work_pv_current
|
|
|
|
shr ebp,16 ; get integer part of start of first scanline
|
|
mov esi,work_pu_current
|
|
|
|
scan_loop:
|
|
|
|
; Calculate pixel count and end addresses for next scanline
|
|
;
|
|
mov ebx,workspace.depthAddress
|
|
sub ebp,edi ; calculate pixel count
|
|
|
|
mov eax,workspace.scanAddress
|
|
jg_&dirn no_pixels
|
|
|
|
add eax,edi ; calculate end colour buffer pointer
|
|
lea edi,[ebx+edi*2] ; calculate end depth buffer pointer
|
|
|
|
mov work.tsl.dest,eax
|
|
mov work.tsl.zdest,edi
|
|
|
|
ror edx,16 ; swap words of z for loop
|
|
mov eax,esi ; make copy of u for loop
|
|
|
|
shr eax,16 ; shift integer part of u into place for loop
|
|
mov ebx,edx ; make copy of z for loop
|
|
|
|
STOP_TIMER outer
|
|
START_TIMER inner
|
|
|
|
; eax = texel offset, texel
|
|
; ebx = oldz, base, temp
|
|
; ecx = v
|
|
; edx = z
|
|
; esi = u
|
|
; edi = zbase, texture base
|
|
; ebp = count
|
|
;
|
|
pixel_loop:
|
|
|
|
ror ecx,16
|
|
mov bl,[edi+ebp*2]
|
|
|
|
mov ah,cl
|
|
mov bh,[edi+ebp*2+1]
|
|
|
|
mov edi,work.texture.base
|
|
cmp edx,ebx
|
|
|
|
mov ebx,work.tsl.dest
|
|
ja pixel_behind
|
|
|
|
mov al,[eax+edi]
|
|
mov edi,work.tsl.zdest
|
|
|
|
and al,al
|
|
je pixel_behind
|
|
|
|
mov [edi+ebp*2],dx
|
|
mov [ebx+ebp],al
|
|
|
|
pixel_behind:
|
|
|
|
ror ecx,16
|
|
mov edi,work.tsl.zdest
|
|
|
|
add_&dirn esi,work_pu_grad_x
|
|
add_&dirn ecx,work_pv_grad_x
|
|
|
|
mov eax,esi
|
|
mov ebx,work_pz_grad_x
|
|
|
|
shr eax,16
|
|
add_&dirn edx,ebx
|
|
|
|
adc_&dirn edx,0
|
|
inc_&dirn ebp
|
|
|
|
mov ebx,edx
|
|
jle_&dirn pixel_loop
|
|
|
|
STOP_TIMER inner
|
|
START_TIMER outer
|
|
|
|
no_pixels:
|
|
|
|
; Updates for next scanline:
|
|
;
|
|
mov eax,workspace.scanAddress
|
|
mov ebx,work.colour.stride_b
|
|
|
|
mov ecx,workspace.depthAddress
|
|
mov edx,work.depth.stride_b
|
|
|
|
add eax,ebx ; move down one line in colour buffer
|
|
add ecx,edx ; move down one line in depth buffer
|
|
|
|
mov workspace.scanAddress,eax
|
|
mov workspace.depthAddress,ecx
|
|
|
|
mov ebp,work_main_i
|
|
mov edi,work_&half&_i
|
|
|
|
add ebp,work_main_d_i ; step major edge
|
|
add edi,work_&half&_d_i ; step minor edge
|
|
|
|
mov work_main_i,ebp
|
|
mov work_&half&_i,edi
|
|
|
|
mov eax,work.main.f
|
|
mov ebx,work.main.d_f
|
|
|
|
shr ebp,16 ; get integer part of start of first scanline
|
|
add eax,ebx
|
|
|
|
sbb ebx,ebx ; get (0 - carry)
|
|
|
|
shr edi,16 ; get integer part of end of first scanline
|
|
mov work.main.f,eax
|
|
|
|
mov edx,work_pz_current
|
|
mov eax,[work_pz_d_nocarry+ebx*8] ; *4 for old workspace
|
|
|
|
add edx,eax ; step z according to carry from major edge
|
|
mov ecx,work_pv_current
|
|
|
|
mov esi,work_pu_current
|
|
mov work_pz_current,edx
|
|
|
|
add esi,[work_pu_d_nocarry+ebx*8] ; step u according to carry from major edge ; *4 for old workspace
|
|
add ecx,[work_pv_d_nocarry+ebx*8] ; step v according to carry from major edge ; *4 for old workspace
|
|
|
|
mov work_pv_current,ecx
|
|
mov ebx,work_&half&_count
|
|
|
|
mov work_pu_current,esi
|
|
dec ebx ; decrement line counter
|
|
|
|
mov work_&half&_count,ebx
|
|
jge scan_loop
|
|
|
|
done_trapezium:
|
|
endm
|
|
|
|
|
|
; Trapezium loop for linear texture mapped z buffered faces
|
|
;
|
|
; Arguments control:
|
|
; whether loop uses 'top' or 'bottom' variables
|
|
; which direction scanline are rendered from the long edge
|
|
;
|
|
TRAPEZIUM_ZTF_I8_D16_256 macro half,dirn
|
|
local scan_loop,pixel_loop,pixel_behind,no_pixels
|
|
local done_trapezium
|
|
|
|
mov ebx,work_&half&_count ; check for empty trapezium
|
|
mov esi,work_pz_current
|
|
|
|
test ebx,ebx
|
|
jl done_trapezium
|
|
|
|
mov edi,work_&half&_i
|
|
mov ebp,work_main_i
|
|
|
|
shr edi,16 ; get integer part of end of first scanline
|
|
mov ebx,work_pv_current
|
|
|
|
shr ebp,16 ; get integer part of start of first scanline
|
|
mov eax,work_pu_current
|
|
|
|
scan_loop:
|
|
|
|
mov edx,workspace.depthAddress
|
|
sub ebp,edi ; calculate pixel count
|
|
|
|
mov ecx,workspace.scanAddress
|
|
jg_&dirn no_pixels
|
|
|
|
add ecx,edi ; calculate end colour buffer pointer
|
|
lea edi,[edx+edi*2] ; calculate end depth buffer pointer
|
|
|
|
and eax,000fffff0h ; set up u & v regs according to
|
|
and ebx,000fffff0h ; description below
|
|
|
|
rol ebx,12
|
|
mov work.tsl.dest,ecx ; store major scan colour buffer address
|
|
|
|
or bx,ax
|
|
mov work.tsl.zdest,edi ; store major scan z address
|
|
|
|
shr eax,16
|
|
mov ecx,esi ; preps for loop
|
|
|
|
ror ebx,16
|
|
xor edx,edx ; " " "
|
|
|
|
STOP_TIMER outer
|
|
START_TIMER inner
|
|
|
|
|
|
; eax - texel offset, u coord: 000000uu
|
|
; i=integer ------ii
|
|
; ebx - v coord, u fraction: uuuvvvvv
|
|
; f=fraction, i=integer fffiifff
|
|
; ecx - z temp, fog table
|
|
; edx - old z, z & u deltas, texel, fog lookup
|
|
; esi - z
|
|
; edi - z buffer, texture base, colour buffer
|
|
; ebp - pixel count
|
|
|
|
pixel_loop:
|
|
ror ebx,4 ; shift v to byte boundary
|
|
mov dl,[edi+ebp*2] ; fetch old z
|
|
|
|
mov dh,[edi+ebp*2+1]
|
|
mov ah,bh ; u+v -> texture offset
|
|
|
|
shr ecx,16 ; z integer
|
|
mov edi,work.texture.base ; fetch texture base
|
|
|
|
rol ebx,4 ; shift v back
|
|
cmp edx,ecx ; z test
|
|
|
|
mov dl,[edi+eax] ; read texel
|
|
mov edi,work.tsl.zdest ; fetch z buffer address
|
|
|
|
mov dh,ch ; want to use ecx soon, so store ch
|
|
jl pixel_behind ; skip stuff below if pixel is behind
|
|
|
|
test dl,dl ; transparency test
|
|
je pixel_behind
|
|
|
|
mov [edi+ebp*2],cl ; store lower z byte
|
|
mov ecx,work.fog_table ; fetch fog table address
|
|
|
|
mov [edi+ebp*2+1],dh ; store upper z byte (kept previously)
|
|
mov edi,work.tsl.dest ; fetch colour buffer address
|
|
|
|
mov ah,[ecx+edx] ; fog lookup
|
|
; no pairing
|
|
|
|
mov [edi+ebp],ah ; write texel
|
|
mov edi,work.tsl.zdest ; fetch z buffer address
|
|
|
|
pixel_behind:
|
|
|
|
mov edx,work_pz_grad_x ; fetch deltas
|
|
mov ecx,work_pv_grad_x
|
|
|
|
add_&dirn esi,edx ; step z
|
|
mov edx,work_pu_grad_x
|
|
|
|
add_&dirn ebx,ecx ; step v & u.frac
|
|
mov ecx,esi ; copy z for next iteration
|
|
|
|
adc_&dirn al,dl ; step u
|
|
xor edx,edx ; need edx empty
|
|
|
|
inc_&dirn ebp ; pixel loop
|
|
jle_&dirn pixel_loop
|
|
|
|
|
|
|
|
STOP_TIMER inner
|
|
START_TIMER outer
|
|
|
|
no_pixels:
|
|
|
|
; Updates for next scanline:
|
|
;
|
|
mov eax,workspace.scanAddress
|
|
mov edx,work.colour.stride_b
|
|
|
|
mov ecx,workspace.depthAddress
|
|
mov ebx,work.depth.stride_b
|
|
|
|
add eax,edx ; move down one line in colour buffer
|
|
add ecx,ebx ; move down one line in depth buffer
|
|
|
|
mov workspace.scanAddress,eax
|
|
mov workspace.depthAddress,ecx
|
|
|
|
mov ebp,work_main_i
|
|
mov edi,work_&half&_i
|
|
|
|
add ebp,work_main_d_i ; step major edge
|
|
add edi,work_&half&_d_i ; step minor edge
|
|
|
|
mov work_main_i,ebp
|
|
mov work_&half&_i,edi
|
|
|
|
mov eax,work.main.f
|
|
mov edx,work.main.d_f
|
|
|
|
shr ebp,16 ; get integer part of start of first scanline
|
|
add eax,edx
|
|
|
|
sbb edx,edx ; get (0 - carry)
|
|
|
|
shr edi,16 ; get integer part of end of first scanline
|
|
mov work.main.f,eax
|
|
|
|
mov esi,work_pz_current
|
|
mov eax,[work_pz_d_nocarry+edx*8] ; *4 for old workspace
|
|
|
|
add esi,eax ; step z according to carry from major edge
|
|
mov ebx,work_pv_current
|
|
|
|
mov eax,work_pu_current
|
|
mov work_pz_current,esi
|
|
|
|
add eax,[work_pu_d_nocarry+edx*8] ; step u according to carry from major edge ; *4 for old workspace
|
|
add ebx,[work_pv_d_nocarry+edx*8] ; step v according to carry from major edge ; *4 for old workspace
|
|
|
|
mov work_pv_current,ebx
|
|
mov ecx,work_&half&_count
|
|
|
|
mov work_pu_current,eax
|
|
dec ecx ; decrement line counter
|
|
|
|
mov work_&half&_count,ecx
|
|
jge scan_loop
|
|
|
|
done_trapezium:
|
|
endm
|
|
|
|
|
|
if PARTS and PART_8Z
|
|
|
|
; TriangleRender_ZI_I8_D16
|
|
;
|
|
; Render a triangle into frame buffer
|
|
;
|
|
; Linear interpolated colour index
|
|
; Linear interpolated Z value
|
|
; Real vertices
|
|
; Perfect point sampling
|
|
;
|
|
TriangleRender_ZI_I8_D16 proc uses eax ebx ecx edx esi edi,
|
|
pblock : ptr dword,
|
|
pvertex_0 : ptr word,
|
|
pvertex_1 : ptr word,
|
|
pvertex_2 : ptr word
|
|
|
|
CLEAR_TIMER setup
|
|
CLEAR_TIMER outer
|
|
CLEAR_TIMER inner
|
|
|
|
START_TIMER setup
|
|
|
|
; Get pointers to vertex structures
|
|
;
|
|
mov eax,pvertex_0
|
|
mov ecx,pvertex_1
|
|
|
|
mov edx,pvertex_2
|
|
mov workspace.v0,eax
|
|
|
|
mov workspace.v1,ecx
|
|
mov workspace.v2,edx
|
|
|
|
; Call new floating point setup routine
|
|
;
|
|
call TriangleSetup_ZI
|
|
TriangleRasterise_ZI_I8_D16 label proc
|
|
; Calculate address of first scanline in colour and depth buffers
|
|
;
|
|
mov esi,workspace.t_y
|
|
mov eax,work.colour.base
|
|
|
|
dec esi
|
|
mov ebx,work.colour.stride_b
|
|
|
|
mov ecx,work.depth.base
|
|
mov edx,work.depth.stride_b
|
|
|
|
imul ebx,esi
|
|
|
|
imul edx,esi
|
|
|
|
add eax,ebx
|
|
add ecx,edx
|
|
|
|
dec eax
|
|
sub ecx,2
|
|
|
|
mov workspace.scanAddress,eax
|
|
mov workspace.depthAddress,ecx
|
|
|
|
; Swap integer and fractional parts of major edge starting value and delta and z & i gradients
|
|
;
|
|
; This will cause carry into fractional part for negative gradients so
|
|
; subtract one from the fractional part to adjust accordingly
|
|
;
|
|
mov eax,work_main_i
|
|
mov ebx,work_main_d_i
|
|
|
|
shl eax,16
|
|
mov ecx,work_pz_grad_x
|
|
|
|
shl ebx,16
|
|
cmp ecx,80000000h
|
|
|
|
adc ecx,-1
|
|
mov edx,work_pi_grad_x
|
|
|
|
ror ecx,16
|
|
cmp edx,80000000h
|
|
|
|
adc edx,-1
|
|
mov work.main.f,eax
|
|
|
|
ror edx,16
|
|
mov work.main.d_f,ebx
|
|
|
|
mov work_pz_grad_x,ecx
|
|
mov work_pi_grad_x,edx
|
|
|
|
; Check scan direction and use appropriate rasteriser
|
|
;
|
|
mov eax,workspace.flip
|
|
;vslot
|
|
|
|
test eax,eax
|
|
jnz reversed
|
|
|
|
STOP_TIMER setup
|
|
START_TIMER outer
|
|
|
|
TRAPEZIUM_ZI_I8_D16 top,f
|
|
TRAPEZIUM_ZI_I8_D16 bot,f
|
|
|
|
STOP_TIMER outer
|
|
quit:
|
|
ret
|
|
|
|
reversed:
|
|
STOP_TIMER setup
|
|
START_TIMER outer
|
|
|
|
TRAPEZIUM_ZI_I8_D16 top,b
|
|
TRAPEZIUM_ZI_I8_D16 bot,b
|
|
|
|
STOP_TIMER outer
|
|
|
|
ret
|
|
|
|
TriangleRender_ZI_I8_D16 endp
|
|
|
|
endif
|
|
|
|
if PARTS and PART_8Z_FOG
|
|
|
|
; TriangleRender_ZIF_I8_D16
|
|
;
|
|
; Render a triangle into frame buffer
|
|
;
|
|
; Linear interpolated colour index
|
|
; Linear interpolated Z value
|
|
; Real vertices
|
|
; Perfect point sampling
|
|
;
|
|
TriangleRender_ZIF_I8_D16_FLAT proc uses eax ebx ecx edx esi edi,
|
|
pblock : ptr dword,
|
|
pvertex_0 : ptr word,
|
|
pvertex_1 : ptr word,
|
|
pvertex_2 : ptr word
|
|
|
|
CLEAR_TIMER setup
|
|
CLEAR_TIMER outer
|
|
CLEAR_TIMER inner
|
|
|
|
START_TIMER setup
|
|
|
|
; Get pointers to vertex structures
|
|
;
|
|
mov eax,pvertex_0
|
|
mov ecx,pvertex_1
|
|
|
|
mov edx,pvertex_2
|
|
mov workspace.v0,eax
|
|
|
|
mov workspace.v1,ecx
|
|
mov workspace.v2,edx
|
|
|
|
; Call new floating point setup routine
|
|
;
|
|
call TriangleSetup_ZI_FLAT
|
|
|
|
jmp TriangleRasterise_ZIF_I8_D16
|
|
|
|
TriangleRender_ZIF_I8_D16_FLAT endp
|
|
|
|
|
|
TriangleRender_ZIF_I8_D16 proc uses eax ebx ecx edx esi edi,
|
|
pblock : ptr dword,
|
|
pvertex_0 : ptr word,
|
|
pvertex_1 : ptr word,
|
|
pvertex_2 : ptr word
|
|
|
|
CLEAR_TIMER setup
|
|
CLEAR_TIMER outer
|
|
CLEAR_TIMER inner
|
|
|
|
START_TIMER setup
|
|
|
|
; Get pointers to vertex structures
|
|
;
|
|
mov eax,pvertex_0
|
|
mov ecx,pvertex_1
|
|
|
|
mov edx,pvertex_2
|
|
mov workspace.v0,eax
|
|
|
|
mov workspace.v1,ecx
|
|
mov workspace.v2,edx
|
|
|
|
; Call new floating point setup routine
|
|
;
|
|
call TriangleSetup_ZI
|
|
|
|
TriangleRasterise_ZIF_I8_D16 label proc
|
|
|
|
; Calculate address of first scanline in colour and depth buffers
|
|
;
|
|
mov esi,workspace.t_y
|
|
mov eax,work.colour.base
|
|
|
|
dec esi
|
|
mov ebx,work.colour.stride_b
|
|
|
|
mov ecx,work.depth.base
|
|
mov edx,work.depth.stride_b
|
|
|
|
imul ebx,esi
|
|
|
|
imul edx,esi
|
|
|
|
add eax,ebx
|
|
add ecx,edx
|
|
|
|
dec eax
|
|
sub ecx,2
|
|
|
|
mov workspace.scanAddress,eax
|
|
mov workspace.depthAddress,ecx
|
|
|
|
; Swap integer and fractional parts of major edge starting value and delta and z & i gradients
|
|
;
|
|
; This will cause carry into fractional part for negative gradients so
|
|
; subtract one from the fractional part to adjust accordingly
|
|
;
|
|
mov eax,work_main_i
|
|
mov ebx,work_main_d_i
|
|
|
|
shl eax,16
|
|
; mov ecx,work_pz_grad_x
|
|
|
|
shl ebx,16
|
|
; cmp ecx,80000000h
|
|
|
|
; adc ecx,-1
|
|
mov edx,work_pi_grad_x
|
|
|
|
; ror ecx,16
|
|
cmp edx,80000000h
|
|
|
|
adc edx,-1
|
|
mov work.main.f,eax
|
|
|
|
ror edx,16
|
|
mov work.main.d_f,ebx
|
|
|
|
; mov work_pz_grad_x,ecx
|
|
mov work_pi_grad_x,edx
|
|
|
|
; Check scan direction and use appropriate rasteriser
|
|
;
|
|
mov eax,workspace.flip
|
|
;vslot
|
|
|
|
test eax,eax
|
|
jnz reversed
|
|
|
|
STOP_TIMER setup
|
|
START_TIMER outer
|
|
|
|
TRAPEZIUM_ZIF_I8_D16 top,f
|
|
TRAPEZIUM_ZIF_I8_D16 bot,f
|
|
|
|
STOP_TIMER outer
|
|
quit:
|
|
ret
|
|
|
|
reversed:
|
|
STOP_TIMER setup
|
|
START_TIMER outer
|
|
|
|
TRAPEZIUM_ZIF_I8_D16 top,b
|
|
TRAPEZIUM_ZIF_I8_D16 bot,b
|
|
|
|
STOP_TIMER outer
|
|
|
|
ret
|
|
|
|
TriangleRender_ZIF_I8_D16 endp
|
|
|
|
endif
|
|
|
|
if PARTS and PART_8Z
|
|
|
|
; TriangleRender_ZT_I8_D16_256
|
|
;
|
|
; Render a triangle into frame buffer
|
|
;
|
|
; Linear interpolated colour texture
|
|
; Linear interpolated Z value
|
|
; Real vertices
|
|
; Perfect point sampling
|
|
;
|
|
TriangleRender_ZT_I8_D16_256 proc uses eax ebx ecx edx esi edi,
|
|
pblock : ptr dword,
|
|
pvertex_0 : ptr word,
|
|
pvertex_1 : ptr word,
|
|
pvertex_2 : ptr word
|
|
|
|
CLEAR_TIMER setup
|
|
CLEAR_TIMER outer
|
|
CLEAR_TIMER inner
|
|
|
|
START_TIMER setup
|
|
|
|
; Get pointers to vertex structures
|
|
;
|
|
mov eax,pvertex_0
|
|
mov ecx,pvertex_1
|
|
|
|
mov edx,pvertex_2
|
|
mov workspace.v0,eax
|
|
|
|
mov workspace.v1,ecx
|
|
mov workspace.v2,edx
|
|
|
|
; Call new floating point setup routine
|
|
;
|
|
call TriangleSetup_ZT
|
|
|
|
TriangleRasterise_ZT_I8_D16_256 label proc
|
|
|
|
; Calculate address of first scanline in colour and depth buffers
|
|
;
|
|
mov esi,workspace.t_y
|
|
mov eax,work.colour.base
|
|
|
|
dec esi
|
|
mov ebx,work.colour.stride_b
|
|
|
|
mov ecx,work.depth.base
|
|
mov edx,work.depth.stride_b
|
|
|
|
imul ebx,esi
|
|
|
|
imul edx,esi
|
|
|
|
add eax,ebx
|
|
add ecx,edx
|
|
|
|
dec eax
|
|
sub ecx,2
|
|
|
|
mov workspace.scanAddress,eax
|
|
mov workspace.depthAddress,ecx
|
|
|
|
; Swap integer and fractional parts of major edge starting value and delta and z gradient
|
|
;
|
|
; This will cause carry into fractional part for negative gradients so
|
|
; subtract one from the fractional part to adjust accordingly
|
|
;
|
|
mov eax,work_main_i
|
|
mov ebx,work_main_d_i
|
|
|
|
shl eax,16
|
|
mov ecx,work_pz_grad_x
|
|
|
|
shl ebx,16
|
|
cmp ecx,80000000h
|
|
|
|
adc ecx,-1
|
|
mov work.main.f,eax
|
|
|
|
ror ecx,16
|
|
mov work.main.d_f,ebx
|
|
|
|
mov work_pz_grad_x,ecx
|
|
mov eax,workspace.flip
|
|
|
|
; Check scan direction and use appropriate rasteriser
|
|
;
|
|
test eax,eax
|
|
jnz reversed
|
|
|
|
STOP_TIMER setup
|
|
START_TIMER outer
|
|
|
|
TRAPEZIUM_ZT_I8_D16_256 top,f
|
|
TRAPEZIUM_ZT_I8_D16_256 bot,f
|
|
|
|
STOP_TIMER outer
|
|
quit:
|
|
ret
|
|
|
|
reversed:
|
|
STOP_TIMER setup
|
|
START_TIMER outer
|
|
|
|
TRAPEZIUM_ZT_I8_D16_256 top,b
|
|
TRAPEZIUM_ZT_I8_D16_256 bot,b
|
|
|
|
STOP_TIMER outer
|
|
|
|
ret
|
|
|
|
TriangleRender_ZT_I8_D16_256 endp
|
|
|
|
endif
|
|
|
|
if PARTS and PART_8Z_FOG
|
|
|
|
; TriangleRender_ZTF_I8_D16_256
|
|
;
|
|
; Render a triangle into frame buffer
|
|
;
|
|
; Linear interpolated colour texture
|
|
; Linear interpolated Z value
|
|
; Real vertices
|
|
; Perfect point sampling
|
|
;
|
|
TriangleRender_ZTF_I8_D16_256 proc uses eax ebx ecx edx esi edi,
|
|
pblock : ptr dword,
|
|
pvertex_0 : ptr word,
|
|
pvertex_1 : ptr word,
|
|
pvertex_2 : ptr word
|
|
|
|
CLEAR_TIMER setup
|
|
CLEAR_TIMER outer
|
|
CLEAR_TIMER inner
|
|
|
|
START_TIMER setup
|
|
|
|
; Get pointers to vertex structures
|
|
;
|
|
mov eax,pvertex_0
|
|
mov ecx,pvertex_1
|
|
|
|
mov edx,pvertex_2
|
|
mov workspace.v0,eax
|
|
|
|
mov workspace.v1,ecx
|
|
mov workspace.v2,edx
|
|
|
|
; Call new floating point setup routine
|
|
;
|
|
call TriangleSetup_ZT
|
|
|
|
TriangleRasterise_ZTF_I8_D16_256 label proc
|
|
|
|
; Calculate address of first scanline in colour and depth buffers
|
|
;
|
|
mov esi,workspace.t_y
|
|
mov eax,work.colour.base
|
|
|
|
dec esi
|
|
mov ebx,work.colour.stride_b
|
|
|
|
mov ecx,work.depth.base
|
|
mov edx,work.depth.stride_b
|
|
|
|
imul ebx,esi
|
|
|
|
imul edx,esi
|
|
|
|
add eax,ebx
|
|
add ecx,edx
|
|
|
|
dec eax
|
|
sub ecx,2
|
|
|
|
mov workspace.scanAddress,eax
|
|
mov workspace.depthAddress,ecx
|
|
|
|
; Swap integer and fractional parts of major edge starting value and delta and z gradient
|
|
;
|
|
; This will cause carry into fractional part for negative gradients so
|
|
; subtract one from the fractional part to adjust accordingly
|
|
;
|
|
|
|
|
|
mov eax,work_main_i
|
|
mov ebx,work_main_d_i
|
|
|
|
shl eax,16
|
|
mov ecx,work_pu_grad_x
|
|
|
|
mov work.main.f,eax
|
|
mov edx,work_pv_grad_x
|
|
|
|
shl ebx,16
|
|
cmp edx,80000000h
|
|
|
|
adc ecx,-1
|
|
and edx,000fffff0h
|
|
|
|
rol edx,12
|
|
and ecx,000fffff0h
|
|
|
|
or dx,cx
|
|
|
|
ror edx,16
|
|
shr ecx,16
|
|
|
|
mov work_pu_grad_x,ecx
|
|
mov work_pv_grad_x,edx
|
|
|
|
mov work.main.d_f,ebx
|
|
mov eax,workspace.flip
|
|
|
|
|
|
; Check scan direction and use appropriate rasteriser
|
|
;
|
|
test eax,eax
|
|
jnz reversed
|
|
|
|
STOP_TIMER setup
|
|
START_TIMER outer
|
|
|
|
TRAPEZIUM_ZTF_I8_D16_256 top,f
|
|
TRAPEZIUM_ZTF_I8_D16_256 bot,f
|
|
|
|
STOP_TIMER outer
|
|
quit:
|
|
ret
|
|
|
|
reversed:
|
|
STOP_TIMER setup
|
|
START_TIMER outer
|
|
|
|
TRAPEZIUM_ZTF_I8_D16_256 top,b
|
|
TRAPEZIUM_ZTF_I8_D16_256 bot,b
|
|
|
|
STOP_TIMER outer
|
|
|
|
ret
|
|
|
|
TriangleRender_ZTF_I8_D16_256 endp
|
|
|
|
endif
|
|
|
|
end
|