brender-1997/pentprim/zb8.asm
2022-05-03 14:31:40 -07:00

910 lines
16 KiB
NASM

.586p
.model flat,c
include fpwork.inc
include drv.inc
include fpsetup.inc
include common.inc
; Cheat case entry points for perspective correct texture mapper
;
externdef c TriangleRasterise_Z_I8_D16:proc
externdef c TriangleRasterise_ZTI_I8_D16_256:proc
externdef c TriangleRasterise_ZTIF_I8_D16_256:proc
.code
;*****************
; FLAT RASTERISER
;*****************
DRAW_Z_I8_D16 macro minorX,direction,half
local drawPixel,drawLine,done,lineDrawn,noPlot
; height test
mov ebx,workspace.&half&Count
mov ebp,workspace.depthAddress
cmp ebx,0
jl done
mov eax,workspace.s_z
mov esi,workspace.d_z_x
mov ebx,workspace.minorX
mov ecx,workspace.xm
drawLine:
ror eax,16
shr ebx,16
mov edi,workspace.scanAddress
shr ecx,16
add edi,ebx
sub ecx,ebx
jg_d lineDrawn,direction
lea ebp,[ebp+2*ebx]
xor ebx,ebx ;used to insure that cf is clear
mov edx,eax ;needed to insure upper parts of reg are correct for the first pixel
mov bl,byte ptr workspace.colour
drawPixel:
;regs ebp,depth edi,dest esi,dz eax,c_z ebx, ecx,count edx,old_z
adc_d eax,0,direction
mov dl,[ebp+2*ecx]
;The following line needs some more experimentation to prove its usefullness in real application
mov dh,[ebp+2*ecx+1]
cmp eax,edx
ja noPlot
; writes
mov [ebp+2*ecx],ax
mov [edi+ecx],bl
noPlot:
add_d eax,esi,direction ;wastes a cycle here
inc_d ecx,direction
mov edx,eax
jle_d drawPixel,direction
lineDrawn:
;perform per line updates
mov ebp,workspace.xm_f
mov edi,workspace.d_xm_f
add ebp,edi
mov eax,workspace.s_z
sbb edi,edi
mov workspace.xm_f,ebp
mov ebp,workspace.depthAddress
mov edx,workspace.&half&Count
mov ebx,[workspace.d_z_y_0+edi*8]
mov edi,workspace.scanAddress
add eax,ebx
mov ebx,workspace.minorX
adc eax,0
mov ecx,workspace.xm
add ecx,workspace.d_xm
add edi,work.colour.stride_b
mov workspace.s_z,eax
mov workspace.scanAddress,edi
add ebp,work.depth.stride_b
add ebx,workspace.d_&minorX
mov workspace.minorX,ebx
mov workspace.xm,ecx
dec edx
mov workspace.depthAddress,ebp
mov workspace.&half&Count,edx
jge drawLine
done:
endm
if PARTS and PART_8Z
TriangleRender_Flat macro procName,rasteriseName,setupName
procName proc dummy:dword, v0:ptr brp_vertex, v1:ptr brp_vertex, v2:ptr brp_vertex
mov eax,v0
mov ecx,v1
mov edx,v2
mov workspace.v0,eax
mov workspace.v1,ecx
mov workspace.v2,edx
; half cycle wasted
call setupName
rasteriseName label proc
; Floating point address calculation - 20 cycles, (Integer=26)
; st(0) st(1) st(2) st(3) st(4) st(5) st(6) st(7)
fild work.colour.base ; cb
fild workspace.t_y ; ty cb
fild work.depth.base ; db ty cb
fild work.colour.stride_b ; cs db ty cb
fild work.depth.stride_b ; ds cs db ty cb
fxch st(4) ; cb cs db ty ds
fsub fp_one ; cb-1 cs db ty ds
fxch st(3) ; ty cs db cb-1 ds
fsub fp_one ; ty-1 cs db cb-1 ds
fxch st(2) ; db cs ty-1 cb-1 ds
fsub fp_two ; db-2 cs ty-1 cb-1 ds
fxch st(3) ; cb-1 cs ty-1 db-2 ds
fadd fp_conv_d ; cb-1I cs ty-1 db-2 ds
fxch st(1) ; cs cb-1I ty-1 db-2 ds
fmul st,st(2) ; csy cb-1I ty-1 db-2 ds
fxch st(3) ; db-2 cb-1I ty-1 csy ds
fadd fp_conv_d ; db-2I cb-1I ty-1 csy ds
fxch st(2) ; ty-1 cb-1I db-2I csy ds
fmulp st(4),st ; cb-1I db-2I csy dsy
faddp st(2),st ; db-2I ca dsy
;stall
faddp st(2),st ; ca da
fstp qword ptr workspace.scanAddress
fstp qword ptr workspace.depthAddress
mov eax,workspace.xm
shl eax,16
mov ebx,workspace.d_xm
shl ebx,16
mov workspace.xm_f,eax
mov edx,workspace.d_z_x
cmp edx,80000000
adc edx,-1
ror edx,16
mov workspace.d_xm_f,ebx
mov workspace.d_z_x,edx
mov eax,workspace.flip
;half cycle wasted
test eax,eax
jnz drawRL
DRAW_Z_I8_D16 x1,DRAW_LR,top
DRAW_Z_I8_D16 x2,DRAW_LR,bottom
ret
drawRL:
DRAW_Z_I8_D16 x1,DRAW_RL,top
DRAW_Z_I8_D16 x2,DRAW_RL,bottom
ret
procName endp
endm
endif
;***********************
; LIT LINEAR RASTERISER
;***********************
if 1 ; old but faster code.
majorScanAddress equ scratch0
DRAW_ZTI_I8_D16_256 macro minorX,direction,half
local drawPixel,drawLine,done,lineDrawn,noPlot
; height test
mov ebx,workspace.&half&Count
mov ebp,workspace.depthAddress
cmp ebx,0
jl done
; trapezoid setup
mov edx,workspace.s_z
mov eax,workspace.s_u
mov workspace.c_z,edx
mov workspace.c_u,eax
mov eax,workspace.s_i
mov ebx,workspace.s_v
mov workspace.c_v,ebx
mov workspace.c_i,eax
drawLine:
mov edx,workspace.minorX
mov ecx,workspace.xm
shr edx,16
mov edi,workspace.scanAddress
shr ecx,16
add edi,edx
sub ecx,edx
mov workspace.majorScanAddress,edi
lea ebp,[ebp+2*edx]
jg_d lineDrawn,direction
drawPixel:
;z-buffer
xor edx,edx ;/and edx,0ffffh
mov eax,workspace.c_z
shr eax,16
mov dh,[ebp+2*ecx+1]
xor ebx,ebx
mov dl,[ebp+2*ecx]
cmp eax,edx
mov dl,byte ptr[workspace.c_u+2]
mov esi,work.texture.base
mov dh,byte ptr[workspace.c_v+2]
mov bh,byte ptr[workspace.c_i+2]
ja noPlot
mov edi,work.shade_table
mov bl,[esi+edx]
mov dl,bl
add ebx,edi
test dl,dl
mov edi,workspace.majorScanAddress
mov bl,[ebx]
jz noPlot
; writes
mov [ebp+2*ecx],ax ;wasted cycle
mov [edi+ecx],bl
; add on deltas
noPlot:
;update z,a
mov eax,workspace.c_z
mov ebx,workspace.c_i
mov esi,workspace.d_z_x
mov edx,workspace.d_i_x
add_d eax,esi,direction
add_d ebx,edx,direction
mov workspace.c_z,eax
mov workspace.c_i,ebx
;update u,v
mov eax,workspace.c_u
mov ebx,workspace.c_v
mov esi,workspace.d_u_x
mov edx,workspace.d_v_x
add_d eax,esi,direction
add_d ebx,edx,direction
mov workspace.c_u,eax
mov workspace.c_v,ebx
inc_d ecx,direction
jle_d drawPixel,direction
lineDrawn:
;perform per line updates
mov eax,workspace.xm_f
mov ebx,workspace.d_xm_f
add eax,ebx
sbb esi,esi
mov workspace.xm_f,eax
mov eax,workspace.xm
mov ecx,workspace.minorX
mov edx,workspace.d_&minorX
mov ebx,workspace.d_xm
add eax,ebx
add ecx,edx
mov workspace.xm,eax
mov workspace.minorX,ecx
;update u,v
mov eax,workspace.s_u
mov ebx,[workspace.d_u_y_0+esi*8]
mov ecx,workspace.s_v
mov edx,[workspace.d_v_y_0+esi*8]
add eax,ebx
add ecx,edx
mov workspace.c_u,eax
mov workspace.c_v,ecx
mov workspace.s_u,eax
mov workspace.s_v,ecx
;update z,a
mov eax,workspace.s_z
mov ebx,[workspace.d_z_y_0+esi*8]
mov ecx,workspace.s_i
mov edx,[workspace.d_i_y_0+esi*8]
add eax,ebx
add ecx,edx
mov workspace.c_z,eax
mov workspace.c_i,ecx
mov workspace.s_z,eax
mov workspace.s_i,ecx
mov ebp,workspace.depthAddress
mov ebx,work.depth.stride_b
mov edi,workspace.scanAddress
mov eax,work.colour.stride_b
add ebp,ebx
add edi,eax
mov ecx,workspace.&half&Count
mov workspace.scanAddress,edi
dec ecx
mov workspace.depthAddress,ebp
mov workspace.&half&Count,ecx
jge drawLine
done:
endm
endif
if 0 ; needs some work
DRAW_ZTI_I8_D16_256 macro minorX,direction,half
local drawPixel,drawLine,done,lineDrawn,noPlot
mov ebx,workspace.&half&Count
mov edi,workspace.scanAddress
cmp ebx,0
jl done
mov eax,workspace.s_u
mov ebx,workspace.s_v
mov ecx,workspace.s_i
mov edx,workspace.s_z
mov workspace.c_u,eax
mov workspace.c_v,ebx
mov workspace.c_i,ecx
mov workspace.c_z,edx
mov ebp,workspace.depthAddress
mov edx,workspace.minorX
drawLine:
shr edx,16
mov ecx,workspace.xm
shr ecx,16
add edi,edx
xor eax,eax
sub ecx,edx
lea ebp,[ebp+2*edx]
jg_d lineDrawn,direction
drawPixel:
mov al,byte ptr[workspace.c_u+2]
mov esi,work.texture.base
mov ah,byte ptr[workspace.c_v+2]
mov bl,byte ptr[workspace.c_z+2]
mov dl,[ebp+2*ecx]
mov bh,byte ptr[workspace.c_z+3]
mov al,[esi+eax]
mov dh,[ebp+2*ecx+1]
test al,al
jz noPlot
mov ah,byte ptr[workspace.c_i+2]
mov esi,work.shade_table
cmp bx,dx ;2 cycles
ja noPlot
mov [ebp+2*ecx],bx ; two cycles
mov al,[esi+eax]
mov [edi+ecx],al
noPlot:
mov ebx,workspace.d_u_x
mov edx,workspace.d_v_x
add_d workspace.c_u,ebx,direction ; 3 cycles
add_d workspace.c_v,edx,direction
mov ebx,workspace.d_z_x
mov edx,workspace.d_i_x
add_d workspace.c_z,ebx,direction ; 3 cycles
add_d workspace.c_i,edx,direction
inc_d ecx,direction
jle_d drawPixel,direction
lineDrawn:
mov ebx,workspace.xm_f
mov edx,workspace.d_xm_f
add ebx,edx
mov ecx,workspace.&half&Count
sbb esi,esi
mov workspace.xm_f,ebx
mov edi,workspace.scanAddress
mov ebp,workspace.depthAddress
add edi,work.colour.stride_b ;two cycles
add ebp,work.depth.stride_b
mov workspace.scanAddress,edi
mov workspace.depthAddress,ebp
mov ebx,workspace.s_u
mov edx,workspace.s_v
add ebx,[workspace.d_u_y_0+8*esi] ;two cycles
add edx,[workspace.d_v_y_0+8*esi]
mov workspace.s_u,ebx
mov workspace.s_v,edx
mov workspace.c_u,ebx
mov workspace.c_v,edx
mov ebx,workspace.s_z
mov edx,workspace.s_i
add ebx,[workspace.d_z_y_0+8*esi] ;two cycles
add edx,[workspace.d_i_y_0+8*esi]
mov workspace.s_z,ebx
mov workspace.s_i,edx
mov workspace.c_z,ebx
mov workspace.c_i,edx
mov ebx,workspace.xm
mov edx,workspace.minorX
add ebx,workspace.d_xm ;two cycles
add edx,workspace.d_&minorX
mov workspace.xm,ebx
mov workspace.minorX,edx
dec ecx
mov workspace.&half&Count,ecx
jge drawLine
endif
if PARTS and PART_8Z
TriangleRender_ZTI_I8_D16_256_FLAT proc dummy:dword, v0:ptr brp_vertex, v1:ptr brp_vertex, v2:ptr brp_vertex
mov edx,v2
mov eax,v0
mov ecx,v1
mov workspace.v2,edx
mov workspace.v0,eax
mov workspace.v1,ecx
call TriangleSetup_ZTI_FLAT
jmp TriangleRasterise_ZTI_I8_D16_256
TriangleRender_ZTI_I8_D16_256_FLAT endp
TriangleRender_ZTI_I8_D16_256 proc dummy:dword, v0:ptr brp_vertex, v1:ptr brp_vertex, v2:ptr brp_vertex
mov edx,v2
mov eax,v0
mov ecx,v1
mov workspace.v2,edx
mov workspace.v0,eax
mov workspace.v1,ecx
call TriangleSetup_ZTI
TriangleRasterise_ZTI_I8_D16_256 label proc
; Floating point address calculation - 20 cycles, (Integer=26)
; st(0) st(1) st(2) st(3) st(4) st(5) st(6) st(7)
fild work.colour.base ; cb
fild workspace.t_y ; ty cb
fild work.depth.base ; db ty cb
fild work.colour.stride_b ; cs db ty cb
fild work.depth.stride_b ; ds cs db ty cb
fxch st(4) ; cb cs db ty ds
fsub fp_one ; cb-1 cs db ty ds
fxch st(3) ; ty cs db cb-1 ds
fsub fp_one ; ty-1 cs db cb-1 ds
fxch st(2) ; db cs ty-1 cb-1 ds
fsub fp_two ; db-2 cs ty-1 cb-1 ds
fxch st(3) ; cb-1 cs ty-1 db-2 ds
fadd fp_conv_d ; cb-1I cs ty-1 db-2 ds
fxch st(1) ; cs cb-1I ty-1 db-2 ds
fmul st,st(2) ; csy cb-1I ty-1 db-2 ds
fxch st(3) ; db-2 cb-1I ty-1 csy ds
fadd fp_conv_d ; db-2I cb-1I ty-1 csy ds
fxch st(2) ; ty-1 cb-1I db-2I csy ds
fmulp st(4),st ; cb-1I db-2I csy dsy
faddp st(2),st ; db-2I ca dsy
;stall
faddp st(2),st ; ca da
fstp qword ptr workspace.scanAddress
fstp qword ptr workspace.depthAddress
mov eax,workspace.xm
shl eax,16
mov ebx,workspace.d_xm
shl ebx,16
mov workspace.xm_f,eax
mov workspace.d_xm_f,ebx
mov eax,workspace.flip
test eax,eax
jnz drawRL
DRAW_ZTI_I8_D16_256 x1,DRAW_LR,top
DRAW_ZTI_I8_D16_256 x2,DRAW_LR,bottom
ret
drawRL:
DRAW_ZTI_I8_D16_256 x1,DRAW_RL,top
DRAW_ZTI_I8_D16_256 x2,DRAW_RL,bottom
ret
TriangleRender_ZTI_I8_D16_256 endp
endif
;******************************
; LIT LINEAR FOGGED RASTERISER
;******************************
majorScanAddress equ scratch0
DRAW_ZTIF_I8_D16_256 macro minorX,direction,half
local drawPixel,drawLine,done,lineDrawn,noPlot
; height test
mov ebx,workspace.&half&Count
mov ebp,workspace.depthAddress
cmp ebx,0
jl done
; trapezoid setup
mov edx,workspace.s_z
mov eax,workspace.s_u
mov workspace.c_z,edx
mov workspace.c_u,eax
mov eax,workspace.s_i
mov ebx,workspace.s_v
mov workspace.c_v,ebx
mov workspace.c_i,eax
drawLine:
mov edx,workspace.minorX
mov ecx,workspace.xm
shr edx,16
mov edi,workspace.scanAddress
shr ecx,16
add edi,edx
sub ecx,edx
mov workspace.majorScanAddress,edi
lea ebp,[ebp+2*edx]
jg_d lineDrawn,direction
drawPixel:
;z-buffer
xor edx,edx ;/and edx,0ffffh
mov eax,workspace.c_z
shr eax,16
mov dh,[ebp+2*ecx+1]
xor ebx,ebx
mov dl,[ebp+2*ecx]
cmp eax,edx
mov dl,byte ptr[workspace.c_u+2]
mov esi,work.texture.base
mov dh,byte ptr[workspace.c_v+2]
mov bh,byte ptr[workspace.c_i+2]
ja noPlot
mov edi,work.shade_table
mov bl,[esi+edx]
mov dh,ah
test bl,bl
mov dl,[edi+ebx]
mov edi,work.fog_table
mov esi,workspace.majorScanAddress
jz noPlot
; writes
mov bl,[edi+edx]
mov [ebp+2*ecx],al
mov [ebp+2*ecx+1],ah
mov [esi+ecx],bl
; add on deltas
noPlot:
;update z,a
mov eax,workspace.c_z
mov ebx,workspace.c_i
mov esi,workspace.d_z_x
mov edx,workspace.d_i_x
add_d eax,esi,direction
add_d ebx,edx,direction
mov workspace.c_z,eax
mov workspace.c_i,ebx
;update u,v
mov eax,workspace.c_u
mov ebx,workspace.c_v
mov esi,workspace.d_u_x
mov edx,workspace.d_v_x
add_d eax,esi,direction
add_d ebx,edx,direction
mov workspace.c_u,eax
mov workspace.c_v,ebx
inc_d ecx,direction
jle_d drawPixel,direction
lineDrawn:
;perform per line updates
mov eax,workspace.xm_f
mov ebx,workspace.d_xm_f
add eax,ebx
sbb esi,esi
mov workspace.xm_f,eax
mov eax,workspace.xm
mov ecx,workspace.minorX
mov edx,workspace.d_&minorX
mov ebx,workspace.d_xm
add eax,ebx
add ecx,edx
mov workspace.xm,eax
mov workspace.minorX,ecx
;update u,v
mov eax,workspace.s_u
mov ebx,[workspace.d_u_y_0+esi*8]
mov ecx,workspace.s_v
mov edx,[workspace.d_v_y_0+esi*8]
add eax,ebx
add ecx,edx
mov workspace.c_u,eax
mov workspace.c_v,ecx
mov workspace.s_u,eax
mov workspace.s_v,ecx
;update z,a
mov eax,workspace.s_z
mov ebx,[workspace.d_z_y_0+esi*8]
mov ecx,workspace.s_i
mov edx,[workspace.d_i_y_0+esi*8]
add eax,ebx
add ecx,edx
mov workspace.c_z,eax
mov workspace.c_i,ecx
mov workspace.s_z,eax
mov workspace.s_i,ecx
mov ebp,workspace.depthAddress
mov ebx,work.depth.stride_b
mov edi,workspace.scanAddress
mov eax,work.colour.stride_b
add ebp,ebx
add edi,eax
mov ecx,workspace.&half&Count
mov workspace.scanAddress,edi
dec ecx
mov workspace.depthAddress,ebp
mov workspace.&half&Count,ecx
jge drawLine
done:
endm
if PARTS and PART_8Z_FOG
TriangleRender_ZTIF_I8_D16_256_FLAT proc dummy:dword, v0:ptr brp_vertex, v1:ptr brp_vertex, v2:ptr brp_vertex
mov edx,v2
mov eax,v0
mov ecx,v1
mov workspace.v2,edx
mov workspace.v0,eax
mov workspace.v1,ecx
call TriangleSetup_ZTI_FLAT
jmp TriangleRasterise_ZTIF_I8_D16_256
TriangleRender_ZTIF_I8_D16_256_FLAT endp
TriangleRender_ZTIF_I8_D16_256 proc dummy:dword, v0:ptr brp_vertex, v1:ptr brp_vertex, v2:ptr brp_vertex
mov edx,v2
mov eax,v0
mov ecx,v1
mov workspace.v2,edx
mov workspace.v0,eax
mov workspace.v1,ecx
call TriangleSetup_ZTI
TriangleRasterise_ZTIF_I8_D16_256 label proc
mov eax,work.colour.stride_b
mov ebx,workspace.t_y
dec ebx
mov edi,work.colour.base
mov ebp,work.depth.base
imul ebx
add edi,eax
dec edi
mov eax,work.depth.stride_b
imul ebx
sub eax,2
add ebp,eax
mov workspace.scanAddress,edi
mov workspace.depthAddress,ebp
mov eax,workspace.xm
shl eax,16
mov ebx,workspace.d_xm
shl ebx,16
mov workspace.xm_f,eax
mov workspace.d_xm_f,ebx
mov eax,workspace.flip
test eax,eax
jnz drawRL
DRAW_ZTIF_I8_D16_256 x1,DRAW_LR,top
DRAW_ZTIF_I8_D16_256 x2,DRAW_LR,bottom
ret
drawRL:
DRAW_ZTIF_I8_D16_256 x1,DRAW_RL,top
DRAW_ZTIF_I8_D16_256 x2,DRAW_RL,bottom
ret
TriangleRender_ZTIF_I8_D16_256 endp
endif
if PARTS and PART_8Z
TriangleRender_Flat TriangleRender_Z_I8_D16,TriangleRasterise_Z_I8_D16,TriangleSetup_Z
TriangleRender_Flat TriangleRender_Z_I8_D16_ShadeTable,TriangleRasterise_Z_I8_D16_ShadeTable,TriangleSetup_Z_ShadeTable
endif
.data
fp_one dword 1.0
fp_two dword 2.0
fp_conv_d dword (127+52-0) shl 23 + (1 shl 22)
end