From 8a425599ef67428c82f9c0378f35d56db1d945cf Mon Sep 17 00:00:00 2001 From: sezero Date: Sat, 18 Aug 2018 13:00:24 +0300 Subject: [PATCH] glide2x, sst1: converted asm to nasm syntax, adjusted fxgasm.c for it. it builds for linux now. (not tested though..) --- glide2x/sst1/glide/src/fxgasm.c | 35 +--- glide2x/sst1/glide/src/xdraw.asm | 292 +++++++++++++------------- glide2x/sst1/glide/src/xdraw96.asm | 319 ++++++++++++++--------------- 3 files changed, 297 insertions(+), 349 deletions(-) diff --git a/glide2x/sst1/glide/src/fxgasm.c b/glide2x/sst1/glide/src/fxgasm.c index 8701a35..9b14cc5 100644 --- a/glide2x/sst1/glide/src/fxgasm.c +++ b/glide2x/sst1/glide/src/fxgasm.c @@ -32,7 +32,6 @@ * macros for creating assembler offset files *----------------------------------------------------------------------*/ -#ifndef __linux__ #define NEWLINE printf("\n") #define COMMENT printf(";----------------------------------------------------------------------\n") @@ -57,32 +56,10 @@ #define SIZEOF(p,pname) if (hex) \ printf("SIZEOF_%s\tequ %08lxh\n",pname,sizeof(p)); \ else printf("SIZEOF_%s\tequ %10ld\n",pname,sizeof(p)) -#else -#define NEWLINE printf("\n"); -#define COMMENT printf("#----------------------------------------------------------------------\n") -#define HEADER(str) NEWLINE; COMMENT; \ - printf("# Assembler offsets for %s struct\n",str);\ - COMMENT; NEWLINE - -#define OFFSET(p,o,pname) if (hex) \ - printf("%s\t .EQU %08x\n",pname,((int)&p.o)-(int)&p); \ - else printf("%s\t .EQU %10d\n",pname,((int)&p.o)-(int)&p) - -#if (GLIDE_PLATFORM & GLIDE_HW_SST96) -#define HWOFFSET(p, o, pname) if (hex) \ - printf("%s\t .EQU %08x\n",pname,(((int) &p.o)-(int)&p)>>2);\ - else printf("%s\t .EQU %10d\n",pname,(((int)&p.o)-(int)&p)) -#endif /* (GLIDE_PLATFORM & GLIDE_HW_SST96) */ - -#define OFFSET2(p,o,pname) if (hex) \ - printf("%s\t .EQU %08x\n",pname,((int)&o)-(int)&p); \ - else printf("%s\t .EQU %10d\n",pname,((int)&o)-(int)&p) - -#define SIZEOF(p,pname) if (hex) \ - printf("SIZEOF_%s\t .EQU %08x\n",pname,sizeof(p)); \ - else printf("SIZEOF_%s\t .EQU %10d\n",pname,sizeof(p)) -#endif +#define CONST(name) if (hex) \ + printf(#name " equ 0%xh\n", name); \ + else printf(#name " equ %d\n", name) int main (int argc, char **argv) @@ -114,11 +91,7 @@ main (int argc, char **argv) hex = 1; } -#ifndef __linux__ - printf("SST_CHIP_MASK equ 0%xh\n",SST_CHIP_MASK); -#else - printf("SST_CHIP_MASK .EQU 0x%x\n", SST_CHIP_MASK); -#endif + CONST (SST_CHIP_MASK); HEADER ("SSTREGS"); #if (GLIDE_PLATFORM & GLIDE_HW_SST1) diff --git a/glide2x/sst1/glide/src/xdraw.asm b/glide2x/sst1/glide/src/xdraw.asm index e303954..35c4638 100644 --- a/glide2x/sst1/glide/src/xdraw.asm +++ b/glide2x/sst1/glide/src/xdraw.asm @@ -36,86 +36,81 @@ ;; ;; -TITLE xdraw.asm -OPTION OLDSTRUCTS -.586P - -.model FLAT,C ; Flat memory, mangle publics with leading '_' +%include "xos.inc" -EXTRN _GlideRoot:DWORD -EXTRN _grSpinFifo:NEAR +extrn _GlideRoot +extrn _grSpinFifo ; some useful floating load and store macros -flds TEXTEQU -fsubs TEXTEQU -fmuls TEXTEQU +%define flds fld DWORD +%define fsubs fsub DWORD +%define fmuls fmul DWORD -_DATA SEGMENT - One DD 03f800000r +segment SEG_DATA + One DD 1.0 Area DD 0 dxAB DD 0 dxBC DD 0 dyAB DD 0 dyBC DD 0 culltest DD 0 - P6FenceVar DD 0 -_DATA ENDS + P6FenceVar DD 0 ; Ugly, but seems to workaround the problem with locally defined ; data segment globals not getting relocated properly when using ; djgpp. -zArea TEXTEQU -zdxAB TEXTEQU -zdxBC TEXTEQU -zdyAB TEXTEQU -zdyBC TEXTEQU -zculltest TEXTEQU +%define zArea One+04h +%define zdxAB One+08h +%define zdxBC One+0ch +%define zdyAB One+10h +%define zdyBC One+14h +%define zculltest One+18h ;;; Some useful SST-1 offsets -INCLUDE fxgasm.h +%include "fxgasm.h" ;; enables/disables trisProcessed and trisDrawn counters -STATS = 1 +%define STATS 1 ;-------------------------------------------------------------------------- ;;; Macro for P6 Fencing operation ;;; Note that this destroys eax, but if you do 2 fences eax will be restored -IFDEF GLIDE_DEBUG -EXTRN _grFence:NEAR +%ifdef GLIDE_DEBUG +extrn _grFence -P6Fence MACRO - pushd eax +%macro P6Fence 0 + push eax push ecx push edx call _grFence pop edx pop ecx pop eax - xchg eax, P6FenceVar -ENDM -ELSE -P6Fence MACRO - xchg eax, P6FenceVar -ENDM -ENDIF + xchg eax, [P6FenceVar] +%endmacro +%else +%macro P6Fence 0 + xchg eax, [P6FenceVar] +%endmacro +%endif -IFDEF GLIDE_HARDWARE -IFDEF GLIDE_DEBUG +%ifdef GLIDE_HARDWARE +%ifdef GLIDE_DEBUG -EXTRN _GR_SET:NEAR -EXTRN _GR_SETF:NEAR +extrn _GR_SET +extrn _GR_SETF -ENDIF +%endif -GR_SET MACRO hw,offset,value -IFDEF GLIDE_DEBUG +%macro GR_SET 3 +%ifdef GLIDE_DEBUG push eax push ecx push edx - push value - lea eax,[hw + offset] + push %3 + lea eax,[%1 + %2] push eax call _GR_SET add esp,8 @@ -123,19 +118,19 @@ IFDEF GLIDE_DEBUG pop edx pop ecx pop eax -ENDIF - mov DWORD PTR [hw + offset], value -ENDM +%endif + mov DWORD [%1 + %2], %3 +%endmacro -GR_SETF_P MACRO hw,foffset -IFDEF GLIDE_DEBUG +%macro GR_SETF_P 2 +%ifdef GLIDE_DEBUG push eax push ecx push edx - fst DWORD PTR [esp-4] + fst DWORD [esp-4] sub esp,4 - lea eax,[hw + foffset] + lea eax,[%1 + %2] push eax call _GR_SETF add esp,8 @@ -143,43 +138,49 @@ IFDEF GLIDE_DEBUG pop edx pop ecx pop eax -ENDIF - fstp DWORD PTR [hw + foffset] -ENDM +%endif + fstp DWORD [%1 + %2] +%endmacro -ENDIF +%endif ;-------------------------------------------------------------------------- ; Arguments (STKOFF = 16 from 4 pushes) -STKOFF = 16 -_va$ = 4 + STKOFF -_vb$ = 8 + STKOFF -_vc$ = 12 + STKOFF +STKOFF equ 16 +_va$ equ 4 + STKOFF +_vb$ equ 8 + STKOFF +_vc$ equ 12 + STKOFF -X = 0 -Y = 4 +X equ 0 +Y equ 4 -fa TEXTEQU ; vtx a from caller -fb TEXTEQU ; vtx b from caller -fc TEXTEQU ; vtx c from caller +%define fa eax ; vtx a from caller +%define fb ebx ; vtx b from caller +%define fc ecx ; vtx c from caller ; edx is used as index, loading from *src -gc TEXTEQU ; points to graphics context -dlp TEXTEQU ; points to dataList structure -hw TEXTEQU ; points to the hardware +%define gc esi ; points to graphics context +%define dlp esi ; points to dataList structure +%define hw edi ; points to the hardware -tmpx TEXTEQU ; temp X storage -i TEXTEQU ; i = dlp->i -tmpy TEXTEQU ; temp Y storage +%define tmpx edx ; temp X storage +%define i edx ; i = dlp->i +%define tmpy ebp ; temp Y storage -_TEXT SEGMENT +segment SEG_TEXT ;-------------------------------------------------------------------------- - align 4 - PUBLIC grDrawTriangle_asm -grDrawTriangle_asm PROC NEAR - .code -grDrawTriangle_asm ENDP + align 4 +proc grDrawTriangle, 12 +endp +%if XOS == XOS_WIN32 +%ifdef __MINGW32__ +; GNU LD fails with '_' prefix +export grDrawTriangle@12 +%else +export _grDrawTriangle@12 +%endif +%endif ; FALL THRU to _trisetup @@ -189,11 +190,9 @@ grDrawTriangle_asm ENDP ;; ;; USAGE: ;; -;; +;; align 4 - PUBLIC _trisetup_asm@12 -_trisetup_asm@12 PROC NEAR - .code +proc _trisetup_asm, 12 ; 28 ; save ebx, esi, edi, ebp push ebx @@ -232,8 +231,6 @@ _trisetup_asm@12 PROC NEAR ; with lowest y value on the stack, this will be used later for ; loading parameter values into the SST regs. ; -;;;;;;;;;;;;;; - ;-------------------------------------------------------------------------- mov fa, [esp + _va$] ; 1 @@ -255,7 +252,7 @@ a_positive: align 4 b_positive: mov fc, [fc + Y] ; 5 - mov gc, [_GlideRoot + curGC] + GET_GC cmp fc, 0 ; 6 jge c_positive xor fc, 7fffffffh @@ -341,19 +338,19 @@ Area_Computation: fsubs [fc + Y] ; | | dyBC flds [fa + Y] ; | | | ya fsubs [fb + Y] ; | | | dyAB - fld st(3) ; | | | | dxAB - fmul st, st(2) ; | | | | t0 t0=dxAB*dyBC - fld st(3) ; | | | | | dxBC - fmul st, st(2) ; | | | | | t1 t1=dxBC*dyAB + fld st3 ; | | | | dxAB + fmul st0, st2 ; | | | | t0 t0=dxAB*dyBC + fld st3 ; | | | | | dxBC + fmul st0, st2 ; | | | | | t1 t1=dxBC*dyAB mov hw, [gc + fifoFree] ; load gc->state.fifoFree -IFDEF STATS +%ifdef STATS inc tmpy ; _GlideRoot.stats.trisProcessed++; mov [_GlideRoot + trisProcessed], tmpy -ENDIF +%endif mov tmpy, [_GlideRoot + curTriSize] ; load _GlideRoot.curTriSize ; 69-12 ; dxAB dxBC dyBC dyAB t0 t1 - fsubp st(1),st ; | | | | area + fsubp st1,st0 ; | | | | area sub hw,tmpy ; fifoFree = gc->fifoFree - gc->curTriSize; jge nostall ; if (fifoFree < 0) push eax @@ -370,9 +367,9 @@ nostall: ; 52 with direct fall thru ; jmp ret_pop5f ;==============<<<<<<<================== mov [gc + fifoFree],hw ; gc->fifoFree = fifoFree; - fst zArea ; 2 clocks for fst + fst dword [zArea] ; 2 clocks for fst mov hw, [gc + reg_ptr ] ; hw = gc->reg_ptr - mov tmpy, zArea ; j = *(long *)&area + mov tmpy, [zArea] ; j = *(long *)&area add hw, 00200000H ; hw = SST_WRAP(hw,128) and tmpy, 7fffffffh ; if ((j & 0x7FFFFFFF) == 0) jz zero_area @@ -380,14 +377,14 @@ nostall: ; 57 with directy fall thru ;-------------------------------------------------------------------------- - fdivr One ; ooa = 1.0f / area; takes 20-38 clks + fdivr dword [One] ; ooa = 1.0f / area; takes 20-38 clks Cull_by_area_sign: sal tmpx, 31 ; culltest<<31 mov tmpy, [gc + cull_mode] ; load gc->state.cull_mode test tmpy, tmpy ; if (gc->state.cull_mode != GR_CULL_DISABLE) je nocull1 ; culling ENABLED - mov tmpy, zArea ; reload area + mov tmpy, [zArea] ; reload area xor tmpy,tmpx ; if (j ^ (culltest<<31)) jge backfaced @@ -420,16 +417,16 @@ nocull1: ; culling DISABLED ; dyBC *= ooa; merge1: ; Stack looks like ; dxAB dxBC dyBC dyAB ooa - fmul st(4), st ; DXAB | | | | - fmul st(3), st ; | DXBC | | | - fmul st(2), st ; | | DYBC | | - fmulp st(1), st ; | | | DYAB - fxch st(3) ; DYAB | | DXAB + fmul st4, st0 ; DXAB | | | | + fmul st3, st0 ; | DXBC | | | + fmul st2, st0 ; | | DYBC | | + fmulp st1, st0 ; | | | DYAB + fxch st3 ; DYAB | | DXAB ; 105-12 - fstp zdxAB ; | | DYBC - fstp zdyBC ; | DXBC - fstp zdxBC ; DYAB - fstp zdyAB ; + fstp dword [zdxAB] ; | | DYBC + fstp dword [zdyBC] ; | DXBC + fstp dword [zdxBC] ; DYAB + fstp dword [zdyAB] ; ; 112-3 ; jmp ret_pop0f @@ -451,7 +448,7 @@ merge1: ; Stack looks like next_parm: mov hw, [dlp + dl_addr] ; fp = dlp->addr mov tmpy, [fa + i] ; tmpy = fa[i] -IF 1 +%if 1 test i,1 ; if (i & 1) { je no_packer_fix test i,2 ; if (i & 2) @@ -473,26 +470,26 @@ IF 1 align 4 no_packer_fix: ; dpAB dpBC -ENDIF +%endif flds [fa + i] ; pa fsubs [fb + i] ; dpAB flds [fb + i] ; | pb fsubs [fc + i] ; dpAB dpBC - fld st(1) ; | | dpAB - fmuls zdyBC ; | | p0x - fld st(1) ; | | | dpBC - fmuls zdyAB ; | | | p1x - fxch st(3) ; p1x | | dpAB + fld st1 ; | | dpAB + fmuls [zdyBC] ; | | p0x + fld st1 ; | | | dpBC + fmuls [zdyAB] ; | | | p1x + fxch st3 ; p1x | | dpAB GR_SET hw,0,tmpy ; | | | | - fmuls zdxBC ; | | | p1y - fxch st(2) ; | p1y | dpBC - fmuls zdxAB ; | | | p0y - fxch st(3) ; p0y | | p1x - fsubp st(1),st ; | | dpdx - fxch st(2) ; dpdx | p0y - fsubrp st(1),st ; | dpdy - fxch st(1) ; dpdy dpdx + fmuls [zdxBC] ; | | | p1y + fxch st2 ; | p1y | dpBC + fmuls [zdxAB] ; | | | p0y + fxch st3 ; p0y | | p1x + fsubp st1,st0 ; | | dpdx + fxch st2 ; dpdx | p0y + fsubrp st1,st0 ; | dpdy + fxch st1 ; dpdy dpdx mov i, [dlp + SIZEOF_dataList + dl_i] ; i = dlp[1]->i add dlp, SIZEOF_dataList ; dlp++; GR_SETF_P hw,FDPDX_OFFSET ; | @@ -509,11 +506,11 @@ ENDIF ; write area, pop fp stack, pop stack, return dotri: mov hw, [dlp + SIZEOF_dataList + dl_addr] ; fp = dlp[1]->addr -IFDEF STATS +%ifdef STATS inc fa ; _GlideRoot.stats.trisDrawn++ mov [_GlideRoot + trisDrawn], fa -ENDIF - mov tmpx, zArea +%endif + mov tmpx, [zArea] test tmpy,2 ; if (i & 2) je no_p6_3 @@ -530,14 +527,14 @@ ENDIF pop esi pop ebx mov eax, 1h ; return 1 (triangle drawn) - ret 12 + ret ;----------------------------------------------------------------------------- ;; We get here if there is no interpolation to be done. Just set area to ;; draw the triangle. Unload fp stack, stack, return. align 4 no_interpolation: - mov tmpx, zArea ; reload area + mov tmpx, [zArea] ; reload area mov tmpy, [dlp + SIZEOF_dataList + dl_i] ; load up the CPU Type and tmpy, 2 ; Check for P6 je nofence1 @@ -551,24 +548,24 @@ nofence1: nop align 4 nofence2: - fstp st(0) - fstp st(0) - fstp st(0) - fstp st(0) - fstp st(0) -IFDEF STATS + fstp st0 + fstp st0 + fstp st0 + fstp st0 + fstp st0 +%ifdef STATS mov fa, [_GlideRoot + trisDrawn]; _GlideRoot.stats.trisDrawn++ -ENDIF +%endif pop ebp -IFDEF STATS +%ifdef STATS inc fa ; _GlideRoot.stats.trisDrawn++ mov [_GlideRoot + trisDrawn], fa -ENDIF +%endif pop edi pop esi pop ebx mov eax, 1h ; return 1 (triangles drawn) - ret 12 + ret ; These labels should only be jumped to when we are trying to rigorously ; measure times. Screwing up the floating point stack costs many clocks, @@ -577,20 +574,20 @@ ENDIF align 4 backfaced: - fstp st(0) - fstp st(0) - fstp st(0) - fstp st(0) - fstp st(0) + fstp st0 + fstp st0 + fstp st0 + fstp st0 + fstp st0 mov eax, 0ffffffffh ; return -1 (backface culled) pop ebp pop edi pop esi pop ebx - ret 12 + ret ret_pop6f: - fstp st(0) + fstp st0 align 4 zero_area: @@ -598,24 +595,24 @@ zero_area: ; but unload fp stack and then pop the stack, and return. ret_pop5f: ; dyBC dyAB dxBC dxAB area - fstp st(0) + fstp st0 ret_pop4f: - fstp st(0) + fstp st0 ret_pop3f: - fstp st(0) + fstp st0 ret_pop2f: - fstp st(0) + fstp st0 ret_pop1f: - fstp st(0) + fstp st0 ret_pop0f: mov eax, 0h ; return 0 (triangles drawn) pop ebp pop edi pop esi pop ebx - ret 12 + ret -_trisetup_asm@12 ENDP +endp ; [++++ from above] ; This comment was moved here to make the code in the loop more readable @@ -624,7 +621,7 @@ _trisetup_asm@12 ENDP ; we may not write to the PCI buffer without stalling. This causes ; the amount of clocks the workaround adds to the loop to vary in the ; following way++: -; +; ; CPU Bus/CPU Clock Total Bus Total Penalty ; Ratio* Clocks Since (add to later clocks) ;====================================================================== @@ -648,6 +645,3 @@ _trisetup_asm@12 ENDP ; P5-120: (2 * 4) + 9 = 17 clocks! ; P5-100/P5-90 (2 * 3) + 6 = 12 clocks ; - -_TEXT ENDS -END diff --git a/glide2x/sst1/glide/src/xdraw96.asm b/glide2x/sst1/glide/src/xdraw96.asm index 7f3a6e4..eb3150b 100644 --- a/glide2x/sst1/glide/src/xdraw96.asm +++ b/glide2x/sst1/glide/src/xdraw96.asm @@ -23,169 +23,165 @@ ; B4 Chip field fix. ;; -TITLE xdraw.asm -OPTION OLDSTRUCTS -.586P - -.model FLAT,C ; Flat memory, mangle publics with leading '_' +%include "xos.inc" -EXTRN _GlideRoot:DWORD -EXTRN _grSpinFifo:NEAR -EXTRN _grSst96FifoMakeRoom:NEAR +extrn _GlideRoot +extrn _grSpinFifo +extrn _grSst96FifoMakeRoom - ; some useful floating load and store macros -flds TEXTEQU -fsubs TEXTEQU -fmuls TEXTEQU +%define flds fld DWORD +%define fsubs fsub DWORD +%define fmuls fmul DWORD -_DATA SEGMENT - One DD 03f800000r +segment SEG_DATA + One DD 1.0 Area DD 0 dxAB DD 0 dxBC DD 0 dyAB DD 0 dyBC DD 0 culltest DD 0 - P6FenceVar DD 0 -_DATA ENDS + P6FenceVar DD 0 ; Ugly, but seems to workaround the problem with locally defined ; data segment globals not getting relocated properly when using ; djgpp. -zArea TEXTEQU -zdxAB TEXTEQU -zdxBC TEXTEQU -zdyAB TEXTEQU -zdyBC TEXTEQU -zculltest TEXTEQU +%define zArea One+04h +%define zdxAB One+08h +%define zdxBC One+0ch +%define zdyAB One+10h +%define zdyBC One+14h +%define zculltest One+18h ;;; Some useful SST-1 offsets -INCLUDE fxgasm.h +%include "fxgasm.h" ;; enables/disables trisProcessed and trisDrawn counters -STATS = 1 +%define STATS 1 ;-------------------------------------------------------------------------- ;;; Macro for P6 Fencing operation ;;; Note that this destroys eax, but if you do 2 fences eax will be restored -IFDEF GLIDE_DEBUG -EXTRN _GR_SET_GW_CMD:NEAR -EXTRN _GR_SET_GW_HEADER:NEAR -EXTRN _GR_SET_GW_ENTRY:NEAR -ENDIF +%ifdef GLIDE_DEBUG +extrn _GR_SET_GW_CMD +extrn _GR_SET_GW_HEADER +extrn _GR_SET_GW_ENTRY +%endif -GR_SET_GW_CMD MACRO addr, offset, data -IFDEF GLIDE_DEBUG +%macro GR_SET_GW_CMD 3 +%ifdef GLIDE_DEBUG push eax push ecx push edx - push data - lea eax, [addr+offset] + push %3 + lea eax, [%1 + %2] push eax call _GR_SET_GW_CMD add esp, 8 pop edx pop ecx pop eax -ELSE - mov [addr+offset], data -ENDIF -ENDM +%else + mov [%1 + %2], %3 +%endif +%endmacro -GR_SET_GW_HEADER MACRO addr, offset, data -IFDEF GLIDE_DEBUG +%macro GR_SET_GW_HEADER 3 +%ifdef GLIDE_DEBUG push eax push ecx push edx - push data - lea eax, [addr+offset] + push %3 + lea eax, [%1 + %2] push eax call _GR_SET_GW_HEADER add esp, 8 pop edx pop ecx pop eax -ELSE - mov [addr+offset], data -ENDIF -ENDM +%else + mov [%1 + %2], %3 +%endif +%endmacro -GR_SET_GW_ENTRY MACRO addr, offset, data -IFDEF GLIDE_DEBUG +%macro GR_SET_GW_ENTRY 3 +%ifdef GLIDE_DEBUG push eax push ecx push edx - push data - lea eax, [addr+offset] + push %3 + lea eax, [%1 + %2] push eax call _GR_SET_GW_ENTRY add esp, 8 pop edx pop ecx pop eax -ELSE - mov [addr+offset], data -ENDIF -ENDM +%else + mov [%1 + %2], %3 +%endif +%endmacro -GR_FSET_GW_ENTRY MACRO addr, offset -IFDEF GLIDE_DEBUG +%macro GR_FSET_GW_ENTRY 2 +%ifdef GLIDE_DEBUG push eax push ecx push edx sub esp, 4 - fstp DWORD PTR [esp] - lea eax, [addr+offset] + fstp DWORD [esp] + lea eax, [%1 + %2] push eax call _GR_SET_GW_ENTRY add esp, 8 pop edx pop ecx pop eax -ELSE - fstp DWORD PTR [addr+offset] -ENDIF -ENDM +%else + fstp DWORD [%1 + %2] +%endif +%endmacro ;-------------------------------------------------------------------------- ; Arguments (STKOFF = 16 from 4 pushes) -STKOFF = 16 +STKOFF equ 16 +_va$ equ 4 + STKOFF +_vb$ equ 8 + STKOFF +_vc$ equ 12 + STKOFF - - +X equ 0 +Y equ 4 -_va$ = 4 + STKOFF -_vb$ = 8 + STKOFF -_vc$ = 12 + STKOFF - -X = 0 -Y = 4 - -fa TEXTEQU ; vtx a from caller -fb TEXTEQU ; vtx b from caller -fc TEXTEQU ; vtx c from caller +%define fa eax ; vtx a from caller +%define fb ebx ; vtx b from caller +%define fc ecx ; vtx c from caller ; edx is used as index, loading from *src -gc TEXTEQU ; points to graphics context -dlp TEXTEQU ; points to dataList structure -hw TEXTEQU ; points to the hardware -fifo TEXTEQU ; points to next entry in fifo +%define gc esi ; points to graphics context +%define dlp esi ; points to dataList structure +%define hw edi ; points to the hardware +%define fifo edi ; points to next entry in fifo -tmpx TEXTEQU ; temp X storage -i TEXTEQU ; i = dlp->i -tmpy TEXTEQU ; temp Y storage +%define tmpx edx ; temp X storage +%define i edx ; i = dlp->i +%define tmpy ebp ; temp Y storage -_TEXT SEGMENT +segment SEG_TEXT ;-------------------------------------------------------------------------- - align 4 - PUBLIC grDrawTriangle_asm -grDrawTriangle_asm PROC NEAR - .code -grDrawTriangle_asm ENDP + align 4 +proc grDrawTriangle, 12 +endp +%if XOS == XOS_WIN32 +%ifdef __MINGW32__ +; GNU LD fails with '_' prefix +export grDrawTriangle@12 +%else +export _grDrawTriangle@12 +%endif +%endif ; FALL THRU to _trisetup @@ -195,11 +191,9 @@ grDrawTriangle_asm ENDP ;; ;; USAGE: ;; -;; +;; align 4 - PUBLIC _trisetup_asm@12 -_trisetup_asm@12 PROC NEAR - .code +proc _trisetup_asm, 12 ; 28 ; save ebx, esi, edi, ebp push ebx @@ -238,12 +232,9 @@ _trisetup_asm@12 PROC NEAR ; with lowest y value on the stack, this will be used later for ; loading parameter values into the SST regs. ; -;;;;;;;;;;;;;; - ;-------------------------------------------------------------------------- - mov fa, [esp + _va$] ; 1 mov fb, [esp + _vb$] mov fc, [esp + _vc$] @@ -263,7 +254,7 @@ a_positive: align 4 b_positive: mov fc, [fc + Y] ; 5 - mov gc, [_GlideRoot + curGC] + GET_GC cmp fc, 0 ; 6 jge c_positive xor fc, 7fffffffh @@ -349,16 +340,16 @@ Area_Computation: fsubs [fc + Y] ; | | dyBC flds [fa + Y] ; | | | ya fsubs [fb + Y] ; | | | dyAB - fld st(3) ; | | | | dxAB - fmul st, st(2) ; | | | | t0 t0=dxAB*dyBC - fld st(3) ; | | | | | dxBC - fmul st, st(2) ; | | | | | t1 t1=dxBC*dyAB - fsubp st(1),st ; | | | | area + fld st3 ; | | | | dxAB + fmul st0, st2 ; | | | | t0 t0=dxAB*dyBC + fld st3 ; | | | | | dxBC + fmul st0, st2 ; | | | | | t1 t1=dxBC*dyAB + fsubp st1,st0 ; | | | | area - fst zArea ; | | | | area + fst dword [zArea] ; | | | | area ; Zero Area Triangle Check - mov tmpy, zArea ; j = *(long *)&area + mov tmpy, [zArea] ; j = *(long *)&area and tmpy, 7fffffffh ; if ((j & 0x7FFFFFFF) == 0) jz zero_area @@ -368,13 +359,13 @@ Area_Computation: test tmpy, tmpy ; if (gc->state.cull_mode != GR_CULL_DISABLE) je nocull1 ; culling ENABLED - mov tmpy, zArea ; reload area + mov tmpy, [zArea] ; reload area xor tmpy,tmpx ; if (j ^ (culltest<<31)) jge backfaced nocull1: ; culling disabled ; OOA Calculation - fdivr One ; | | | | ooa + fdivr dword [One] ; | | | | ooa ; Fetch Fifo Ptr mov fifo, [gc + fifoPtr]; @@ -439,15 +430,15 @@ wrapDone: ; Setup for Parameter Calculator ; dxAB dxBC dyBC dyAB ooa - fmul st(4), st ; DXAB | | | | - fmul st(3), st ; | DXBC | | | - fmul st(2), st ; | | DYBC | | - fmulp st(1), st ; | | | DYAB - fxch st(3) ; DYAB | | DXAB - fstp zdxAB ; | | DYBC - fstp zdyBC ; | DXBC - fstp zdxBC ; DYAB - fstp zdyAB ; + fmul st4, st0 ; DXAB | | | | + fmul st3, st0 ; | DXBC | | | + fmul st2, st0 ; | | DYBC | | + fmulp st1, st0 ; | | | DYAB + fxch st3 ; DYAB | | DXAB + fstp dword [zdxAB] ; | | DYBC + fstp dword [zdyBC] ; | DXBC + fstp dword [zdxBC] ; DYAB + fstp dword [zdyAB] ; ; Parameter Calculator align 4 @@ -461,26 +452,23 @@ next_parm: flds [fb + i] ; | pb fsubs [fc + i] ; dpAB dpBC - fld st(1) ; | | dpAB - fmuls zdyBC ; | | p0x - fld st(1) ; | | | dpBC - fmuls zdyAB ; | | | p1x - fxch st(3) ; p1x | | dpAB - + fld st1 ; | | dpAB + fmuls [zdyBC] ; | | p0x + fld st1 ; | | | dpBC + fmuls [zdyAB] ; | | | p1x + fxch st3 ; p1x | | dpAB GR_SET_GW_ENTRY fifo, 0, tmpy ; | | | | - fmuls zdxBC ; | | | p1y - fxch st(2) ; | p1y | dpBC - fmuls zdxAB ; | | | p0y - fxch st(3) ; p0y | | p1x - fsubp st(1),st ; | | dpdx - fxch st(2) ; dpdx | p0y - fsubrp st(1),st ; | dpdy - fxch st(1) ; dpdy dpdx + fmuls [zdxBC] ; | | | p1y + fxch st2 ; | p1y | dpBC + fmuls [zdxAB] ; | | | p0y + fxch st3 ; p0y | | p1x + fsubp st1,st0 ; | | dpdx + fxch st2 ; dpdx | p0y + fsubrp st1,st0 ; | dpdy + fxch st1 ; dpdy dpdx mov i, [dlp + SIZEOF_dataList + dl_i] ; i = dlp[1]->i add dlp, SIZEOF_dataList ; dlp++; - GR_FSET_GW_ENTRY fifo, 4 ; | - GR_FSET_GW_ENTRY fifo, 8 ; empty add fifo, 12 test i,i ; while (i) @@ -489,7 +477,7 @@ next_parm: align 4 triangle_command: ; Write Triangle Command - mov tmpx, zArea + mov tmpx, [zArea] GR_SET_GW_ENTRY fifo, 0, tmpx test fifo, 7h jnz no_padding0 @@ -501,23 +489,22 @@ no_padding0: pop esi pop ebx mov eax, 1h ; return 1 (triangle drawn) - ret 12 + ret - align 4 zero_area: backfaced: - fstp st(0) ; 4 - fstp st(0) ; 3 - fstp st(0) ; 2 - fstp st(0) ; 1 - fstp st(0) ; 0 + fstp st0 ; 4 + fstp st0 ; 3 + fstp st0 ; 2 + fstp st0 ; 1 + fstp st0 ; 0 pop ebp pop edi pop esi pop ebx xor eax, eax ; return 0 (triangle drawn) - ret 12 + ret align 4 wrap: @@ -548,7 +535,7 @@ fence: align 4 dofence: push eax - xchg eax, P6FenceVar + xchg eax, [P6FenceVar] pop eax jmp fenceDone @@ -564,7 +551,7 @@ no_padding1: push gc mov tmpx, [dlp + dl_addr] - mov gc, [_GlideRoot + curGC] + GET_GC GR_SET_GW_CMD fifo, 0, tmpx mov tmpy, [gc + gwHeaders + 4] @@ -585,26 +572,23 @@ next_parm_1: flds [fb + i] ; | pb fsubs [fc + i] ; dpAB dpBC - fld st(1) ; | | dpAB - fmuls zdyBC ; | | p0x - fld st(1) ; | | | dpBC - fmuls zdyAB ; | | | p1x - fxch st(3) ; p1x | | dpAB - + fld st1 ; | | dpAB + fmuls [zdyBC] ; | | p0x + fld st1 ; | | | dpBC + fmuls [zdyAB] ; | | | p1x + fxch st3 ; p1x | | dpAB GR_SET_GW_ENTRY fifo, 0, tmpy ; | | | | - fmuls zdxBC ; | | | p1y - fxch st(2) ; | p1y | dpBC - fmuls zdxAB ; | | | p0y - fxch st(3) ; p0y | | p1x - fsubp st(1),st ; | | dpdx - fxch st(2) ; dpdx | p0y - fsubrp st(1),st ; | dpdy - fxch st(1) ; dpdy dpdx + fmuls [zdxBC] ; | | | p1y + fxch st2 ; | p1y | dpBC + fmuls [zdxAB] ; | | | p0y + fxch st3 ; p0y | | p1x + fsubp st1,st0 ; | | dpdx + fxch st2 ; dpdx | p0y + fsubrp st1,st0 ; | dpdy + fxch st1 ; dpdy dpdx mov i, [dlp + SIZEOF_dataList + dl_i] ; i = dlp[1]->i add dlp, SIZEOF_dataList ; dlp++; - GR_FSET_GW_ENTRY fifo, 4 ; | - GR_FSET_GW_ENTRY fifo, 8 ; empty add fifo, 12 test i,i ; while (i) @@ -616,11 +600,11 @@ next_parm_1: add fifo, 4 align 4 triangle_command_packet: - mov gc, [_GlideRoot + curGC] + GET_GC mov tmpy, 40000000h mov tmpx, [gc + gwCommand] - mov fa, zArea + mov fa, [zArea] GR_SET_GW_CMD fifo, 0, tmpx GR_SET_GW_HEADER fifo, 4, tmpy @@ -633,18 +617,18 @@ triangle_command_packet: pop esi pop ebx mov eax, 1h ; return 1 (triangle drawn) - ret 12 + ret align 4 no_interpolation: - fstp st(0) ; 4 - fstp st(0) ; 3 - fstp st(0) ; 2 - fstp st(0) ; 1 - fstp st(0) ; 0 + fstp st0 ; 4 + fstp st0 ; 3 + fstp st0 ; 2 + fstp st0 ; 1 + fstp st0 ; 0 jmp triangle_command -_trisetup_asm@12 ENDP +endp ; [++++ from above] ; This comment was moved here to make the code in the loop more readable @@ -653,7 +637,7 @@ _trisetup_asm@12 ENDP ; we may not write to the PCI buffer without stalling. This causes ; the amount of clocks the workaround adds to the loop to vary in the ; following way++: -; +; ; CPU Bus/CPU Clock Total Bus Total Penalty ; Ratio* Clocks Since (add to later clocks) ;====================================================================== @@ -677,6 +661,3 @@ _trisetup_asm@12 ENDP ; P5-120: (2 * 4) + 9 = 17 clocks! ; P5-100/P5-90 (2 * 3) + 6 = 12 clocks ; - -_TEXT ENDS -END