From 14b3357ab8f6fbaeb5bae2187889f2a7a14b7627 Mon Sep 17 00:00:00 2001 From: sezero Date: Sat, 18 Aug 2018 17:15:02 +0300 Subject: [PATCH] glide2x, h3: converted asm to nasm syntax, adjusted fxgasm.c for it. it builds for linux now. (not tested though..) --- glide2x/h3/glide/src/fxgasm.c | 36 +- glide2x/h3/glide/src/makefile.linux | 36 +- glide2x/h3/glide/src/xdraw2.asm | 220 +++++---- glide2x/h3/glide/src/xdraw2.inc | 676 ++++++++++++++-------------- glide2x/h3/glide/src/xtexdl.asm | 86 ++-- 5 files changed, 498 insertions(+), 556 deletions(-) diff --git a/glide2x/h3/glide/src/fxgasm.c b/glide2x/h3/glide/src/fxgasm.c index c283fa2..effb3bc 100644 --- a/glide2x/h3/glide/src/fxgasm.c +++ b/glide2x/h3/glide/src/fxgasm.c @@ -34,7 +34,6 @@ * macros for creating assembler offset files *----------------------------------------------------------------------*/ -#ifndef __linux__ #define NEWLINE printf("\n") #define COMMENT printf(";----------------------------------------------------------------------\n") @@ -43,35 +42,16 @@ COMMENT; NEWLINE #define OFFSET(p,o,pname) if (hex) \ - printf("%s\t= %08xh\n",pname,((int)&p.o)-(int)&p); \ - else printf("%s\t= %10d\n",pname,((int)&p.o)-(int)&p) + printf("%s\tequ %08xh\n",pname,((int)&p.o)-(int)&p); \ + else printf("%s\tequ %10d\n",pname,((int)&p.o)-(int)&p) #define OFFSET2(p,o,pname) if (hex) \ - printf("%s\t= %08xh\n",pname,((int)&o)-(int)&p); \ - else printf("%s\t= %10d\n",pname,((int)&o)-(int)&p) + printf("%s\tequ %08xh\n",pname,((int)&o)-(int)&p); \ + else printf("%s\tequ %10d\n",pname,((int)&o)-(int)&p) #define SIZEOF(p,pname) if (hex) \ - printf("SIZEOF_%s\t= %08xh\n",pname,sizeof(p)); \ - else printf("SIZEOF_%s\t= %10d\n",pname,sizeof(p)) -#else -#define NEWLINE printf("\n"); -#define COMMENT printf("#----------------------------------------------------------------------\n") - -#define HEADER(str) NEWLINE; COMMENT; \ - printf("# Assembler offsets for %s struct\n",str);\ - COMMENT; NEWLINE - -#define OFFSET(p,o,pname) if (hex) \ - printf("#define %s\t 0x%08x\n",pname,((int)&p.o)-(int)&p); \ - else printf("#define %s\t %10d\n",pname,((int)&p.o)-(int)&p) -#define OFFSET2(p,o,pname) if (hex) \ - printf("#define %s\t 0x%08x\n",pname,((int)&o)-(int)&p); \ - else printf("#define %s\t %10d\n",pname,((int)&o)-(int)&p) - -#define SIZEOF(p,pname) if (hex) \ - printf("#define SIZEOF_%s\t 0x%08x\n",pname,sizeof(p)); \ - else printf("#define SIZEOF_%s\t %10d\n",pname,sizeof(p)) -#endif + printf("SIZEOF_%s\tequ %08xh\n",pname,sizeof(p)); \ + else printf("SIZEOF_%s\tequ %10d\n",pname,sizeof(p)) int main (int argc, char **argv) @@ -100,7 +80,7 @@ main (int argc, char **argv) printf("#define kTriProcOffset 0x%XUL\n", offsetof(struct GrGC_s, archDispatchProcs.triSetupProc)); #endif /* GLIDE_DISPATCH_SETUP */ - + printf("/* The # of 2-byte entries in the hw fog table */\n"); printf("#define kInternalFogTableEntryCount 0x%XUL\n", sizeof(dummyRegs.fogTable) >> 1); @@ -167,7 +147,7 @@ main (int argc, char **argv) #if GLIDE_MULTIPLATFORM OFFSET(gc, gcFuncs, "gc_gcFuncs"); -#endif +#endif #if defined(GLIDE3) && defined(GLIDE3_ALPHA) OFFSET(gc, oemInit, "gc_oemInit"); diff --git a/glide2x/h3/glide/src/makefile.linux b/glide2x/h3/glide/src/makefile.linux index f3034ea..035620e 100644 --- a/glide2x/h3/glide/src/makefile.linux +++ b/glide2x/h3/glide/src/makefile.linux @@ -241,24 +241,24 @@ $(GLIDE_LIBDIR)/$(GLIDE_SHARED): $(GLIDE_OBJECTS:.o=.lo) # rules(2) ############################################################################### -## NASM SUPPORT IS NOT HERE YET... -## -#cpuid.o: cpudtect.asm -# $(AS) -o $@ $(ASFLAGS) $< -#xdraw2_def.o: xdraw2.asm -# $(AS) -o $@ $(ASFLAGS) $< -#xdraw2_3dnow.o: xdraw2.asm -# $(AS) -o $@ $(ASFLAGS) -DGL_AMD3D=1 $< -#xtexdl_3dnow.o: xtexdl.asm -# $(AS) -o $@ $(ASFLAGS) -DGL_AMD3D=1 $< -cpuid.o: cpudtect.S - $(CC) -o $@ -xassembler-with-cpp -c $(CDEFS) $< -xdraw2_def.o: xdraw2.S - $(CC) -o $@ -xassembler-with-cpp -c $(CDEFS) $< -xdraw2_3dnow.o: xdraw2.S - $(CC) -o $@ -xassembler-with-cpp -c $(CDEFS) -DGL_AMD3D=1 $< -xtexdl_3dnow.o: xtexdl.S - $(CC) -o $@ -xassembler-with-cpp -c $(CDEFS) -DGL_AMD3D=1 $< +#using nasm now +# +#cpuid.o: cpudtect.S +# $(CC) -o $@ -xassembler-with-cpp -c $(CDEFS) $< +#xdraw2_def.o: xdraw2.S +# $(CC) -o $@ -xassembler-with-cpp -c $(CDEFS) $< +#xdraw2_3dnow.o: xdraw2.S +# $(CC) -o $@ -xassembler-with-cpp -c $(CDEFS) -DGL_AMD3D=1 $< +#xtexdl_3dnow.o: xtexdl.S +# $(CC) -o $@ -xassembler-with-cpp -c $(CDEFS) -DGL_AMD3D=1 $< +cpuid.o: cpudtect.asm + $(AS) -o $@ $(ASFLAGS) $< +xdraw2_def.o: xdraw2.asm + $(AS) -o $@ $(ASFLAGS) $< +xdraw2_3dnow.o: xdraw2.asm + $(AS) -o $@ $(ASFLAGS) -DGL_AMD3D=1 $< +xtexdl_3dnow.o: xtexdl.asm + $(AS) -o $@ $(ASFLAGS) -DGL_AMD3D=1 $< cpuid.lo: cpuid.o $(CP) $< $@ diff --git a/glide2x/h3/glide/src/xdraw2.asm b/glide2x/h3/glide/src/xdraw2.asm index 4bd33df..7ee620a 100644 --- a/glide2x/h3/glide/src/xdraw2.asm +++ b/glide2x/h3/glide/src/xdraw2.asm @@ -52,161 +52,143 @@ ; B4 Chip field fix. ;; -TITLE xdraw2.asm -OPTION OLDSTRUCTS +%include "xos.inc" -.586P -.MMX -.K3D - -EXTRN __GlideRoot: DWORD -EXTRN __FifoMakeRoom: NEAR +extrn _GlideRoot +extrn _FifoMakeRoom -_DATA SEGMENT - One DD 03f800000r +segment SEG_DATA + One DD 1.0 Area DD 0 -IF GLIDE_PACKED_RGB +%if GLIDE_PACKED_RGB bias0 DD 0 bias1 DD 0 -ENDIF -_DATA ENDS +%endif ; Ugly, but seems to workaround the problem with locally defined ; data segment globals not getting relocated properly when using ; djgpp. -zArea TEXTEQU +%define zArea One+04h ;;; Definitions of cvg regs and glide root structures. -INCLUDE fxgasm.h +%include "fxgasm.h" ; Arguments (STKOFF = 16 from 4 pushes) -STKOFF = 16 -_va$ = 4 + STKOFF -_vb$ = 8 + STKOFF -_vc$ = 12 + STKOFF +STKOFF equ 16 +_va$ equ 4 + STKOFF +_vb$ equ 8 + STKOFF +_vc$ equ 12 + STKOFF + +X equ 0 +Y equ 4 + +%MACRO PROC_TYPE 1 + %IFDEF GL_AMD3D + proc _trisetup_3DNow_%1, 12 + %ELSE + proc _trisetup_Default_%1, 12 + %ENDIF +%ENDM -PROC_TYPE MACRO procType:= - IFDEF GL_AMD3D - EXITM <__trisetup_3DNow_&procType&@12> - ELSE - EXITM <__trisetup_Default_&procType&@12> - ENDIF - ENDM - ;-------------------------------------------------------------------------- -_TEXT SEGMENT PAGE PUBLIC USE32 'CODE' - ASSUME DS: FLAT, SS: FLAT +segment SEG_TEXT + + ALIGN 32 +PROC_TYPE cull + +%define GLIDE_CULLING 1 +%define GLIDE_PACK_RGB 0 +%define GLIDE_PACK_ALPHA 0 +%define GLIDE_GENERIC_SETUP 0 +%INCLUDE "xdraw2.inc" +%undef GLIDE_GENERIC_SETUP +%undef GLIDE_PACK_ALPHA +%undef GLIDE_PACK_RGB +%undef GLIDE_CULLING + +endp ALIGN 32 - PUBLIC PROC_TYPE(cull) -PROC_TYPE(cull) PROC NEAR +%IF GLIDE_PACKED_RGB -GLIDE_CULLING textequ <1> -GLIDE_PACK_RGB textequ <0> -GLIDE_PACK_ALPHA textequ <0> -GLIDE_GENERIC_SETUP textequ <0> -INCLUDE xdraw2.inc -GLIDE_GENERIC_SETUP textequ <0> -GLIDE_PACK_ALPHA textequ <0> -GLIDE_PACK_RGB textequ <0> -GLIDE_CULLING textequ <0> +PROC_TYPE cull_rgb -PROC_TYPE(cull) ENDP +%define GLIDE_CULLING 1 +%define GLIDE_PACK_RGB 1 +%define GLIDE_PACK_ALPHA 0 +%define GLIDE_GENERIC_SETUP 0 +%INCLUDE "xdraw2.inc" +%undef GLIDE_GENERIC_SETUP +%undef GLIDE_PACK_ALPHA +%undef GLIDE_PACK_RGB +%undef GLIDE_CULLING + +endp ALIGN 32 +PROC_TYPE cull_argb -IF GLIDE_PACKED_RGB - PUBLIC PROC_TYPE(cull_rgb) -PROC_TYPE(cull_rgb) PROC NEAR +%define GLIDE_CULLING 1 +%define GLIDE_PACK_RGB 1 +%define GLIDE_PACK_ALPHA 1 +%define GLIDE_GENERIC_SETUP 0 +%INCLUDE "xdraw2.inc" +%undef GLIDE_GENERIC_SETUP +%undef GLIDE_PACK_ALPHA +%undef GLIDE_PACK_RGB +%undef GLIDE_CULLING -GLIDE_CULLING textequ <1> -GLIDE_PACK_RGB textequ <1> -GLIDE_PACK_ALPHA textequ <0> -GLIDE_GENERIC_SETUP textequ <0> -INCLUDE xdraw2.inc -GLIDE_GENERIC_SETUP textequ <0> -GLIDE_PACK_ALPHA textequ <0> -GLIDE_PACK_RGB textequ <0> -GLIDE_CULLING textequ <0> - -PROC_TPYE(cull_rgb) ENDP - - ALIGN 32 - - PUBLIC PROC_TPYE(cull_argb) -PROC_TPYE(cull_argb) PROC NEAR - -GLIDE_CULLING textequ <1> -GLIDE_PACK_RGB textequ <1> -GLIDE_PACK_ALPHA textequ <1> -GLIDE_GENERIC_SETUP textequ <0> -INCLUDE xdraw2.inc -GLIDE_GENERIC_SETUP textequ <0> -GLIDE_PACK_ALPHA textequ <0> -GLIDE_PACK_RGB textequ <0> -GLIDE_CULLING textequ <0> - -PROC_TPYE(cull_argb) ENDP -ENDIF ; GLIDE_PACKED_RGB +endp +%ENDIF ; GLIDE_PACKED_RGB ALIGN 32 +PROC_TYPE Default - PUBLIC PROC_TYPE() -PROC_TYPE() PROC NEAR +%define GLIDE_CULLING 0 +%define GLIDE_PACK_RGB 0 +%define GLIDE_PACK_ALPHA 0 +%define GLIDE_GENERIC_SETUP 0 +%INCLUDE "xdraw2.inc" +%undef GLIDE_GENERIC_SETUP +%undef GLIDE_PACK_ALPHA +%undef GLIDE_PACK_RGB +%undef GLIDE_CULLING -GLIDE_CULLING textequ <0> -GLIDE_PACK_RGB textequ <0> -GLIDE_PACK_ALPHA textequ <0> -GLIDE_GENERIC_SETUP textequ <0> -INCLUDE xdraw2.inc -GLIDE_GENERIC_SETUP textequ <0> -GLIDE_PACK_ALPHA textequ <0> -GLIDE_PACK_RGB textequ <0> -GLIDE_CULLING textequ <0> - -PROC_TYPE() ENDP +endp ALIGN 32 -IF GLIDE_PACKED_RGB - PUBLIC PROC_TYPE(rgb) -PROC_TPYE(rgb) PROC NEAR +%IF GLIDE_PACKED_RGB -GLIDE_CULLING textequ <0> -GLIDE_PACK_RGB textequ <1> -GLIDE_PACK_ALPHA textequ <0> -GLIDE_GENERIC_SETUP textequ <0> -INCLUDE xdraw2.inc -GLIDE_GENERIC_SETUP textequ <0> -GLIDE_PACK_ALPHA textequ <0> -GLIDE_PACK_RGB textequ <0> -GLIDE_CULLING textequ <0> +PROC_TYPE rgb -PROC_TPYE(rgb) ENDP +%define GLIDE_CULLING 0 +%define GLIDE_PACK_RGB 1 +%define GLIDE_PACK_ALPHA 0 +%define GLIDE_GENERIC_SETUP 0 +%INCLUDE "xdraw2.inc" +%undef GLIDE_GENERIC_SETUP +%undef GLIDE_PACK_ALPHA +%undef GLIDE_PACK_RGB +%undef GLIDE_CULLING + +endp ALIGN 32 +PROC_TYPE argb - PUBLIC PROC_TPYE(argb) -PROC_TPYE(argb) PROC NEAR - -GLIDE_CULLING textequ <0> -GLIDE_PACK_RGB textequ <1> -GLIDE_PACK_ALPHA textequ <1> -GLIDE_GENERIC_SETUP textequ <0> -INCLUDE xdraw2.inc -GLIDE_GENERIC_SETUP textequ <0> -GLIDE_PACK_ALPHA textequ <0> -GLIDE_PACK_RGB textequ <0> -GLIDE_CULLING textequ <0> - -PROC_TPYE(argb) ENDP -ENDIF ; GLIDE_PACKED_RGB - - -_TEXT ENDS - -END +%define GLIDE_CULLING 0 +%define GLIDE_PACK_RGB 1 +%define GLIDE_PACK_ALPHA 1 +%define GLIDE_GENERIC_SETUP 0 +%INCLUDE "xdraw2.inc" +%undef GLIDE_GENERIC_SETUP +%undef GLIDE_PACK_ALPHA +%undef GLIDE_PACK_RGB +%undef GLIDE_CULLING +endp +%ENDIF ; GLIDE_PACKED_RGB diff --git a/glide2x/h3/glide/src/xdraw2.inc b/glide2x/h3/glide/src/xdraw2.inc index 6d00ceed..7abec64 100644 --- a/glide2x/h3/glide/src/xdraw2.inc +++ b/glide2x/h3/glide/src/xdraw2.inc @@ -30,33 +30,35 @@ ; B4 Chip field fix. ;; -TITLE xdraw2.inc +%ifnmacro GR_FIFO_WRITE +%MACRO GR_FIFO_WRITE 3 + mov [%1 + %2], %3 +%ENDM +%endif -ifdef GL_AMD3D +%ifdef GL_AMD3D -GR_FIFO_WRITE MACRO __addr, __offset, __data - mov [__addr + __offset], __data -ENDM ; GR_FIFO_WRITE - - -WRITE_MM1_FIFO_ALIGNED MACRO +%ifnmacro WRITE_MM1_FIFO_ALIGNED +%MACRO WRITE_MM1_FIFO_ALIGNED 0 movq [fifo], mm1 ; store current param | previous param -ENDM ; WRITE_MM1_FIFO_ALIGNED - -WRITE_MM1LOW_FIFO MACRO +%ENDM +%endif +%ifnmacro WRITE_MM1LOW_FIFO +%MACRO WRITE_MM1LOW_FIFO 0 movd [fifo], mm1 ; store current param | previous param -ENDM ; WRITE_MM1LOW_FIFO +%ENDM +%endif -gc TEXTEQU ; points to graphics context -fifo TEXTEQU ; points to fifo entries -tempVal TEXTEQU +%define gc edi ; points to graphics context +%define fifo ebp ; points to fifo entries +%define tempVal esi -IF GLIDE_CULLING -fa TEXTEQU ; vtx a from caller -fb TEXTEQU ; vtx b from caller -fc TEXTEQU ; vtx c from caller -cull TEXTEQU ; cull mode -intArea TEXTEQU ; area temp storage +%IF GLIDE_CULLING +%define fa eax ; vtx a from caller +%define fb ebx ; vtx b from caller +%define fc ecx ; vtx c from caller +%define cull edx ; cull mode +%define intArea ecx ; area temp storage ;; Prologue stuff @@ -64,7 +66,7 @@ intArea TEXTEQU ; area temp storage ;; don't need to check for GR_CULL_DISABLE push edi ; save caller's register variable - mov gc,[__GlideRoot+curGC]; GR_DCL_GC + mov gc,[_GlideRoot+curGC] ; GR_DCL_GC push esi ; save caller's register variable mov fc, [esp + _vc$ - 8] ; get base address of vertex C @@ -76,7 +78,7 @@ intArea TEXTEQU ; area temp storage mov cull, [gc + cull_mode]; get cull mode mov fa, [esp + _va$] ; get base address of vertex A - mov tempVal, [__GlideRoot + curTriSize] + mov tempVal, [_GlideRoot + curTriSize] femms ; will use AMD3D, clear FPU/MMX registers @@ -112,31 +114,31 @@ intArea TEXTEQU ; area temp storage ; Zero Area Triangle Check test intArea, 7fffffffh ; if ((j & 0x7FFFFFFF) == 0) - jz __cullFail ; area zero, triangle culled + jz .__cullFail ; area zero, triangle culled xor intArea, cull ; if (j ^ (culltest << 31)) - jge __cullFail ; triangle facing away from viewer, culled + jge .__cullFail ; triangle facing away from viewer, culled cmp ebx, tempVal ; fifo space required >= space available ? - jge __triBegin ; yup, push out triangle data to Voodoo + jge .__triBegin ; yup, push out triangle data to Voodoo - push @Line ; line number inside this function + push __LINE__ ; line number inside this function push 0h ; pointer to function name = NULL push tempVal ; fifo space required - call __FifoMakeRoom ; note: updates fifoPtr + call _FifoMakeRoom ; note: updates fifoPtr add esp, 12 ; remove 3 DWORD arguments from stack -ELSE ; !GLIDE_CULLING +%ELSE ; !GLIDE_CULLING ;; Prologue stuff push edi ; save caller's register variable - mov gc,[__GlideRoot+curGC]; GR_DCL_GC + mov gc,[_GlideRoot+curGC] ; GR_DCL_GC push esi ; save caller's register variable - mov eax, [__GlideRoot + curTriSize] - + mov eax, [_GlideRoot + curTriSize] + push ebx ; save caller's register variable mov ebx, [gc + fifoRoom] ; fifo space available @@ -147,25 +149,25 @@ ELSE ; !GLIDE_CULLING ;; the complete triangle packet. cmp ebx, eax ; space available >= space required ? - jge __triBegin ; yup, start drawing triangle + jge .__triBegin ; yup, start drawing triangle - push @Line ; line number inside this function + push __LINE__ ; line number inside this function push 0h ; pointer to function name = NULL push eax ; space required in fifo - call __FifoMakeRoom ; note: updates fifoPtr + call _FifoMakeRoom ; note: updates fifoPtr add esp, 12 ; remove 3 DWORD arguments from stack mov eax, eax ; filler -ENDIF +%ENDIF -dlp TEXTEQU ; points to dataList structure -dlpstrt TEXTEQU ; points to begin of dataList structure -vertex TEXTEQU ; the current vertex -packCol TEXTEQU +%define dlp ebx ; points to dataList structure +%define dlpstrt ecx ; points to begin of dataList structure +%define vertex edx ; the current vertex +%define packCol esi -__triBegin: +.__triBegin: mov eax, [gc+triPacketHdr]; Packet 3 header lea dlp,[gc + tsuDataList]; Reset the dataList @@ -175,7 +177,7 @@ __triBegin: mov dlpstrt, dlp ; save pointer to start of dataList test fifo, 4 ; is fifo pointer qword aligned ? - jz __fifo_aligned ; yes, it is qword aligned + jz .__fifo_aligned ; yes, it is qword aligned movq mm1, [vertex+x] ; y | x GR_FIFO_WRITE fifo, 0, eax ; write header to fifo; now qword aligned @@ -184,8 +186,8 @@ __triBegin: WRITE_MM1_FIFO_ALIGNED ; PCI write y | x add fifo, 8 ; fifoPtr += 2*sizeof(FxU32) -IF GLIDE_PACK_RGB -IF GLIDE_PACK_ALPHA +%IF GLIDE_PACK_RGB +%IF GLIDE_PACK_ALPHA ;; assumes color and alpha values < 256.0 movq mm1, [vertex+r] ; g | r movd mm2, [vertex+b] ; 0 | b @@ -204,7 +206,7 @@ IF GLIDE_PACK_ALPHA por mm1, mm2 ; 00000000 | 00rrggbb por mm1, mm3 ; 00000000 | aarrggbb -ELSE ; !GLIDE_PACK_ALPHA +%ELSE ; !GLIDE_PACK_ALPHA ;; assumes color values < 256.0 movq mm1, [vertex+r] ; g | r @@ -218,23 +220,23 @@ ELSE ; !GLIDE_PACK_ALPHA psrlq mm1, 24 ; 00000000 | 0000gg00 por mm1, mm2 ; 00000000 | 00rrggbb -ENDIF ; !GLIDE_PACK_ALPHA +%ENDIF ; !GLIDE_PACK_ALPHA ;; here: one DWORD in "write buffer", RGB(A) - mov eax, DWORD PTR [dlp] ; get first offset from the data list + mov eax, DWORD [dlp] ; get first offset from the data list add dlp, 4 ; dlp++ -IF GLIDE_PACK_ALPHA +%IF GLIDE_PACK_ALPHA cmp eax, 0 ; end of list ? -ELSE +%ELSE test eax, eax ; end of list ? -ENDIF - jz __paramLoopDoneWBone1 ; yes, one DWORD in "write buffer" - -__paramLoop1a: +%ENDIF + jz .__paramLoopDoneWBone1 ; yes, one DWORD in "write buffer" + +.__paramLoop1a: movd mm2, [eax+vertex] ; get next parameter - mov eax, DWORD PTR [dlp] ; offset = *(dlp + 1) + mov eax, DWORD [dlp] ; offset = *(dlp + 1) add dlp, 4 ; dlp++ punpckldq mm1, mm2 ; current param | previous param @@ -243,39 +245,39 @@ __paramLoop1a: add fifo, 8 ; fifoPtr += 2*sizeof(FxU32) cmp eax, 0 ; at end of offset list (offset == 0) ? - jz __paramLoopDoneWBzero1; exit, "write buffer" empty + jz .__paramLoopDoneWBzero1; exit, "write buffer" empty movd mm1, [eax+vertex] ; get next parameter - mov eax, DWORD PTR [dlp] ; offset = *(dlp + 1) + mov eax, DWORD [dlp] ; offset = *(dlp + 1) add dlp, 4 ; dlp++ cmp eax, 0 ; at end of offset list (offset == 0) ? - jnz __paramLoop1a ; nope, copy next parameter + jnz .__paramLoop1a ; nope, copy next parameter - jmp __paramLoopDoneWBone1 ; merge back into common stream + jmp .__paramLoopDoneWBone1 ; merge back into common stream lea esp, [esp] ; filler -ELSE ; ! GLIDE_PACK_RGB +%ELSE ; ! GLIDE_PACK_RGB ;; here: "write buffer" empty - mov eax, DWORD PTR [dlp] ; Get first offset from the data list + mov eax, DWORD [dlp] ; Get first offset from the data list test eax, eax ; at end of list ? lea dlp, [dlp+4] ; dlp++ - jz __paramLoopDoneWBzero1; yes, "write buffer" empty - -__paramLoop1a: + jz .__paramLoopDoneWBzero1; yes, "write buffer" empty + +.__paramLoop1a: movd mm1, [eax+vertex] ; get next parameter - mov eax, DWORD PTR [dlp] ; offset = *(dlp + 1) + mov eax, DWORD [dlp] ; offset = *(dlp + 1) add dlp, 4 ; dlp++ test eax, eax ; at end of offset list (offset == 0) ? - jz __paramLoopDoneWBone1 ; exit, write buffer contains one DWORD + jz .__paramLoopDoneWBone1 ; exit, write buffer contains one DWORD movd mm2, [eax+vertex] ; get next parameter - mov eax, DWORD PTR [dlp] ; offset = *(dlp + 1) + mov eax, DWORD [dlp] ; offset = *(dlp + 1) add dlp, 4 ; dlp++ punpckldq mm1, mm2 ; current param | previous param @@ -284,14 +286,14 @@ __paramLoop1a: add fifo, 8 ; fifoPtr += 2*sizeof(FxU32) test eax, eax ; at end of offset list (offset == 0) ? - jnz __paramLoop1a ; nope, copy next parameter + jnz .__paramLoop1a ; nope, copy next parameter mov esp, esp ; filler - jmp __paramLoopDoneWBzero1; write buffer empty - -ENDIF ; GLIDE_PACK_RGB + jmp .__paramLoopDoneWBzero1; write buffer empty -__fifo_aligned: +%ENDIF ; GLIDE_PACK_RGB + +.__fifo_aligned: movd mm2, [vertex+x] ; y | x of vertex A movd mm1, [gc+triPacketHdr]; Packet 3 header @@ -301,8 +303,8 @@ __fifo_aligned: add fifo, 8 ; fifoPtr += 2*sizeof(FxU32) movd mm1, [vertex+y] ; 0 | y of vertex A -IF GLIDE_PACK_RGB -IF GLIDE_PACK_ALPHA +%IF GLIDE_PACK_RGB +%IF GLIDE_PACK_ALPHA ;; assumes color and alpha values < 256.0 movq mm4, [vertex+r] ; g | r movd mm2, [vertex+b] ; 0 | b @@ -322,7 +324,7 @@ IF GLIDE_PACK_ALPHA add dlp, 8 ; skip data list entry "a" por mm4, mm3 ; 00000000 | aarrggbb -ELSE ; !GLIDE_PACK_ALPHA +%ELSE ; !GLIDE_PACK_ALPHA ;; assumes color values < 256.0 movq mm4, [vertex+r] ; g | r @@ -336,10 +338,10 @@ ELSE ; !GLIDE_PACK_ALPHA psrlq mm4, 24 ; 00000000 | 0000gg00 por mm4, mm2 ; 00000000 | 00rrggbb -ENDIF ; !GLIDE_PACK_ALPHA +%ENDIF ; !GLIDE_PACK_ALPHA punpckldq mm1, mm4 ; RGB(A) | y - mov eax, DWORD PTR [dlp] ; get first offset from the data list + mov eax, DWORD [dlp] ; get first offset from the data list WRITE_MM1_FIFO_ALIGNED ; PCI write y | RGB(A) add fifo, 8 ; fifoPtr += 2*sizeof(FxU32) @@ -347,72 +349,72 @@ ENDIF ; !GLIDE_PACK_ALPHA add dlp, 4 ; dlp++ test eax, eax ; end of list ? - jz __paramLoopDoneWBzero1; yes, "write buffer" is empty + jz .__paramLoopDoneWBzero1; yes, "write buffer" is empty nop ; filler -__paramLoop1b: +.__paramLoop1b: movd mm1, [eax+vertex] ; get next parameter - mov eax, DWORD PTR [dlp] ; offset = *(dlp + 1) + mov eax, DWORD [dlp] ; offset = *(dlp + 1) add dlp, 4 ; dlp++ test eax, eax ; at end of offset list (offset == 0) ? - jz __paramLoopDoneWBone1 ; exit, write buffer contains one DWORD + jz .__paramLoopDoneWBone1 ; exit, write buffer contains one DWORD movd mm2, [eax+vertex] ; get next parameter - mov eax, DWORD PTR [dlp] ; offset = *(dlp + 1) + mov eax, DWORD [dlp] ; offset = *(dlp + 1) add dlp, 4 ; dlp++ punpckldq mm1, mm2 ; current param | previous param WRITE_MM1_FIFO_ALIGNED ; PCI write current param | previous param -IF GLIDE_PACK_ALPHA +%IF GLIDE_PACK_ALPHA add fifo, 8 ; fifoPtr += 2*sizeof(FxU32) -ELSE +%ELSE add fifo, 8 ; fifoPtr += 2*sizeof(FxU32) nop -ENDIF +%ENDIF test eax, eax ; at end of offset list (offset == 0) ? - jnz __paramLoop1b ; nope, copy next parameter + jnz .__paramLoop1b ; nope, copy next parameter - jmp __paramLoopDoneWBzero1; write buffer empty + jmp .__paramLoopDoneWBzero1; write buffer empty -ELSE ; !GLIDE_PACK_RGB - mov eax, DWORD PTR [dlp] ; get first offset from the data list +%ELSE ; !GLIDE_PACK_RGB + mov eax, DWORD [dlp] ; get first offset from the data list add dlp, 4 ; dlp++ test eax, eax ; end of list ? - jz __paramLoopDoneWBone1 ; yes, "write buffer" has y data + jz .__paramLoopDoneWBone1 ; yes, "write buffer" has y data -__paramLoop1b: +.__paramLoop1b: movd mm2, [eax+vertex] ; get next parameter - mov eax, DWORD PTR [dlp] ; offset = *(dlp + 1) + mov eax, DWORD [dlp] ; offset = *(dlp + 1) punpckldq mm1, mm2 ; current param | previous param add dlp, 4 ; dlp++ - + WRITE_MM1_FIFO_ALIGNED ; PCI write current param | previous param add fifo, 8 ; fifoPtr += 2*sizeof(FxU32) -IF GLIDE_CULLING +%IF GLIDE_CULLING test eax, eax ; at end of offset list (offset == 0) ? -ELSE +%ELSE cmp eax, 0 ; at end of offset list (offset == 0) ? -ENDIF +%ENDIF - jz __paramLoopDoneWBzero1; exit, "write buffer" empty + jz .__paramLoopDoneWBzero1; exit, "write buffer" empty movd mm1, [eax+vertex] ; get next parameter - mov eax, DWORD PTR [dlp] ; offset = *(dlp + 1) + mov eax, DWORD [dlp] ; offset = *(dlp + 1) add dlp, 4 ; dlp++ test eax, eax ; at end of offset list (offset == 0) ? - jnz __paramLoop1b ; nope, copy next parameter -ENDIF + jnz .__paramLoop1b ; nope, copy next parameter +%ENDIF -__paramLoopDoneWBone1: +.__paramLoopDoneWBone1: ;; here: "write buffer" has one DWORD left over from vertex A @@ -426,13 +428,13 @@ __paramLoopDoneWBone1: add fifo, 8 ; fifoPtr += 2*sizeof(FxU32) movd mm1, [vertex+y] ; 0 | y of vertex B -IF GLIDE_PACK_RGB -ELSE +%IF GLIDE_PACK_RGB +%ELSE mov esp, esp ; filler -ENDIF +%ENDIF -IF GLIDE_PACK_RGB -IF GLIDE_PACK_ALPHA +%IF GLIDE_PACK_RGB +%IF GLIDE_PACK_ALPHA ;; assumes color and alpha values < 256.0 movq mm4, [vertex+r] ; g | r movd mm2, [vertex+b] ; 0 | b @@ -454,7 +456,7 @@ IF GLIDE_PACK_ALPHA por mm4, mm2 ; 00000000 | 00rrggbb por mm4, mm3 ; 00000000 | aarrggbb -ELSE ; !GLIDE_PACK_ALPHA +%ELSE ; !GLIDE_PACK_ALPHA ;; assumes color values < 256.0 movq mm4, [vertex+r] ; g | r @@ -468,10 +470,10 @@ ELSE ; !GLIDE_PACK_ALPHA add dlp, 4 ; next data list entry por mm4, mm2 ; 00000000 | 00rrggbb -ENDIF ; !GLIDE_PACK_ALPHA +%ENDIF ; !GLIDE_PACK_ALPHA punpckldq mm1, mm4 ; RGB(A) | y - mov eax, DWORD PTR [dlp] ; get first offset from the data list + mov eax, DWORD [dlp] ; get first offset from the data list WRITE_MM1_FIFO_ALIGNED ; PCI write y | RGB(A) add fifo, 8 ; fifoPtr += 2*sizeof(FxU32) @@ -479,20 +481,20 @@ ENDIF ; !GLIDE_PACK_ALPHA add dlp, 4 ; dlp++ test eax, eax ; end of list ? - jz __paramLoopDoneWBzero2; yes, "write buffer" is empty + jz .__paramLoopDoneWBzero2; yes, "write buffer" is empty mov esp, esp ; filler -__paramLoop2b: +.__paramLoop2b: movd mm1, [eax+vertex] ; get next parameter - mov eax, DWORD PTR [dlp] ; offset = *(dlp + 1) + mov eax, DWORD [dlp] ; offset = *(dlp + 1) test eax, eax ; at end of offset list (offset == 0) ? lea dlp, [dlp+4] ; dlp++ - jz __paramLoopDoneWBone2 ; exit, write buffer contains one DWORD + jz .__paramLoopDoneWBone2 ; exit, write buffer contains one DWORD movd mm2, [eax+vertex] ; get next parameter - mov eax, DWORD PTR [dlp] ; offset = *(dlp + 1) + mov eax, DWORD [dlp] ; offset = *(dlp + 1) add dlp, 4 ; dlp++ punpckldq mm1, mm2 ; current param | previous param @@ -501,46 +503,45 @@ __paramLoop2b: add fifo, 8 ; fifoPtr += 2*sizeof(FxU32) test eax, eax ; at end of offset list (offset == 0) ? - jnz __paramLoop2b ; nope, copy next parameter + jnz .__paramLoop2b ; nope, copy next parameter - jmp __paramLoopDoneWBzero2; write buffer empty -ELSE ; !GLIDE_PACK_RGB - mov eax, DWORD PTR [dlp] ; get first offset from the data list + jmp .__paramLoopDoneWBzero2; write buffer empty +%ELSE ; !GLIDE_PACK_RGB + mov eax, DWORD [dlp] ; get first offset from the data list add dlp, 4 ; dlp++ test eax, eax ; end of list ? - jz __paramLoopDoneWBone2 ; yes, "write buffer" has y data + jz .__paramLoopDoneWBone2 ; yes, "write buffer" has y data -__paramLoop2b: +.__paramLoop2b: movd mm2, [eax+vertex] ; get next parameter - mov eax, DWORD PTR [dlp] ; offset = *(dlp + 1) + mov eax, DWORD [dlp] ; offset = *(dlp + 1) add dlp, 4 ; dlp++ punpckldq mm1, mm2 ; current param | previous param - + WRITE_MM1_FIFO_ALIGNED ; PCI write current param | previous param add fifo, 8 ; fifoPtr += 2*sizeof(FxU32) -IF GLIDE_CULLING +%IF GLIDE_CULLING test eax, eax ; at end of offset list (offset == 0) ? -ELSE +%ELSE cmp eax, 0 ; at end of offset list (offset == 0) ? -ENDIF - jz __paramLoopDoneWBzero2; exit, "write buffer" empty +%ENDIF + jz .__paramLoopDoneWBzero2; exit, "write buffer" empty movd mm1, [eax+vertex] ; get next parameter - mov eax, DWORD PTR [dlp] ; offset = *(dlp + 1) + mov eax, DWORD [dlp] ; offset = *(dlp + 1) add dlp, 4 ; dlp++ test eax, eax ; at end of offset list (offset == 0) ? - jnz __paramLoop2b ; nope, copy next parameter + jnz .__paramLoop2b ; nope, copy next parameter - jmp __paramLoopDoneWBone2 ; write buffer contains one DWORD -ENDIF + jmp .__paramLoopDoneWBone2 ; write buffer contains one DWORD +%ENDIF - -__paramLoopDoneWBzero1: +.__paramLoopDoneWBzero1: mov vertex, [esp + _vb$] ; Current vertex = B mov dlp, dlpstrt ; Reset the dataList @@ -550,8 +551,8 @@ __paramLoopDoneWBzero1: add fifo, 8 ; fifoPtr += 2*sizeof(FxU32) -IF GLIDE_PACK_RGB -IF GLIDE_PACK_ALPHA +%IF GLIDE_PACK_RGB +%IF GLIDE_PACK_ALPHA ;; assumes color and alpha values < 256.0 movq mm1, [vertex+r] ; g | r movd mm2, [vertex+b] ; 0 | b @@ -570,7 +571,7 @@ IF GLIDE_PACK_ALPHA por mm1, mm3 ; 00000000 | aarrggbb add dlp, 8 ; skip data list entry "a" -ELSE ; !GLIDE_PACK_ALPHA +%ELSE ; !GLIDE_PACK_ALPHA ;; assumes color values < 256.0 movq mm1, [vertex+r] ; g | r @@ -584,20 +585,20 @@ ELSE ; !GLIDE_PACK_ALPHA por mm1, mm2 ; 00000000 | 00rrggbb add dlp, 4 ; next data list entry -ENDIF ; !GLIDE_PACK_ALPHA +%ENDIF ; !GLIDE_PACK_ALPHA ;; here: one DWORD in "write buffer", RGB(A) - mov eax, DWORD PTR [dlp] ; get first offset from the data list + mov eax, DWORD [dlp] ; get first offset from the data list add dlp, 4 ; dlp++ cmp eax, 0 ; end of list ? - jz __paramLoopDoneWBone2 ; yes, one DWORD in "write buffer" - -__paramLoop2a: + jz .__paramLoopDoneWBone2 ; yes, one DWORD in "write buffer" + +.__paramLoop2a: movd mm2, [eax+vertex] ; get next parameter - mov eax, DWORD PTR [dlp] ; offset = *(dlp + 1) + mov eax, DWORD [dlp] ; offset = *(dlp + 1) punpckldq mm1, mm2 ; current param | previous param add dlp, 4 ; dlp++ @@ -606,41 +607,41 @@ __paramLoop2a: add fifo, 8 ; fifoPtr += 2*sizeof(FxU32) test eax, eax ; at end of offset list (offset == 0) ? - jz __paramLoopDoneWBzero2; exit, "write buffer" empty + jz .__paramLoopDoneWBzero2; exit, "write buffer" empty movd mm1, [eax+vertex] ; get next parameter - mov eax, DWORD PTR [dlp] ; offset = *(dlp + 1) + mov eax, DWORD [dlp] ; offset = *(dlp + 1) add dlp, 4 ; dlp++ test eax, eax ; at end of offset list (offset == 0) ? - jnz __paramLoop2a ; nope, copy next parameter + jnz .__paramLoop2a ; nope, copy next parameter - jmp __paramLoopDoneWBone2 ; merge back into common stream -ELSE ; ! GLIDE_PACK_RGB + jmp .__paramLoopDoneWBone2 ; merge back into common stream +%ELSE ; ! GLIDE_PACK_RGB ;; here: "write buffer" empty - mov eax, DWORD PTR [dlp] ; Get first offset from the data list + mov eax, DWORD [dlp] ; Get first offset from the data list add dlp, 4 ; dlp++ cmp eax, 0 ; at end of list ? - jz __paramLoopDoneWBzero2; yes, "write buffer" empty - -__paramLoop2a: + jz .__paramLoopDoneWBzero2; yes, "write buffer" empty + +.__paramLoop2a: movd mm1, [eax+vertex] ; get next parameter - mov eax, DWORD PTR [dlp] ; offset = *(dlp + 1) + mov eax, DWORD [dlp] ; offset = *(dlp + 1) add dlp, 4 ; dlp++ -IF GLIDE_CULLING +%IF GLIDE_CULLING test eax, eax ; at end of offset list (offset == 0) ? -ELSE +%ELSE cmp eax, 0 ; at end of offset list (offset == 0) ? -ENDIF +%ENDIF - jz __paramLoopDoneWBone2 ; exit, write buffer contains one DWORD + jz .__paramLoopDoneWBone2 ; exit, write buffer contains one DWORD movd mm2, [eax+vertex] ; get next parameter - mov eax, DWORD PTR [dlp] ; offset = *(dlp + 1) + mov eax, DWORD [dlp] ; offset = *(dlp + 1) add dlp, 4 ; dlp++ punpckldq mm1, mm2 ; current param | previous param @@ -648,16 +649,15 @@ ENDIF WRITE_MM1_FIFO_ALIGNED ; PCI write current param | previous param add fifo, 8 ; fifoPtr += 2*sizeof(FxU32) -IF GLIDE_CULLING +%IF GLIDE_CULLING cmp eax, 0 ; at end of offset list (offset == 0) ? -ELSE +%ELSE test eax, eax ; at end of offset list (offset == 0) ? -ENDIF - jnz __paramLoop2a ; nope, copy next parameter -ENDIF ; GLIDE_PACK_RGB +%ENDIF + jnz .__paramLoop2a ; nope, copy next parameter +%ENDIF ; GLIDE_PACK_RGB - -__paramLoopDoneWBzero2: +.__paramLoopDoneWBzero2: mov vertex, [esp + _vc$] ; Current vertex = C mov dlp, dlpstrt ; Reset the dataList @@ -667,8 +667,8 @@ __paramLoopDoneWBzero2: add fifo, 8 ; fifoPtr += 2*sizeof(FxU32) -IF GLIDE_PACK_RGB -IF GLIDE_PACK_ALPHA +%IF GLIDE_PACK_RGB +%IF GLIDE_PACK_ALPHA ;; assumes color and alpha values < 256.0 movq mm1, [vertex+r] ; g | r movd mm2, [vertex+b] ; 0 | b @@ -687,7 +687,7 @@ IF GLIDE_PACK_ALPHA por mm1, mm3 ; 00000000 | aarrggbb add dlp, 8 ; skip data list entry "a" -ELSE ; !GLIDE_PACK_ALPHA +%ELSE ; !GLIDE_PACK_ALPHA ;; assumes color values < 256.0 movq mm1, [vertex+r] ; g | r @@ -701,77 +701,77 @@ ELSE ; !GLIDE_PACK_ALPHA por mm1, mm2 ; 00000000 | 00rrggbb add dlp, 4 ; next data list entry -ENDIF ; !GLIDE_PACK_ALPHA +%ENDIF ; !GLIDE_PACK_ALPHA ;; here: one DWORD in "write buffer", RGB(A) - mov eax, DWORD PTR [dlp] ; get first offset from the data list + mov eax, DWORD [dlp] ; get first offset from the data list add dlp, 4 ; dlp++ -IF GLIDE_PACK_ALPHA +%IF GLIDE_PACK_ALPHA cmp eax, 0 ; end of list ? -ELSE +%ELSE test eax, eax ; end of list ? -ENDIF - jz __paramLoopDoneWBone3 ; yes, one DWORD in "write buffer" - -__paramLoop3a: +%ENDIF + jz .__paramLoopDoneWBone3 ; yes, one DWORD in "write buffer" + +.__paramLoop3a: movd mm2, [eax+vertex] ; get next parameter - mov eax, DWORD PTR [dlp] ; offset = *(dlp + 1) + mov eax, DWORD [dlp] ; offset = *(dlp + 1) punpckldq mm1, mm2 ; current param | previous param add dlp, 4 ; dlp++ - + WRITE_MM1_FIFO_ALIGNED ; PCI write current param | previous param add fifo, 8 ; fifoPtr += 2*sizeof(FxU32) test eax, eax ; at end of offset list (offset == 0) ? - jz __paramLoopDoneWBzero3; exit, "write buffer" empty + jz .__paramLoopDoneWBzero3; exit, "write buffer" empty movd mm1, [eax+vertex] ; get next parameter - mov eax, DWORD PTR [dlp] ; offset = *(dlp + 1) + mov eax, DWORD [dlp] ; offset = *(dlp + 1) add dlp, 4 ; dlp++ test eax, eax ; at end of offset list (offset == 0) ? - jnz __paramLoop3a ; nope, copy next parameter + jnz .__paramLoop3a ; nope, copy next parameter - jmp __paramLoopDoneWBone3 ; merge back into common stream + jmp .__paramLoopDoneWBone3 ; merge back into common stream -ELSE ; ! GLIDE_PACK_RGB +%ELSE ; ! GLIDE_PACK_RGB ;; here: "write buffer" empty - mov eax, DWORD PTR [dlp] ; Get first offset from the data list + mov eax, DWORD [dlp] ; Get first offset from the data list add dlp, 4 ; dlp++ -IF GLIDE_CULLING +%IF GLIDE_CULLING test eax, eax ; at end of list ? -ELSE +%ELSE cmp eax, 0 ; at end of list ? -ENDIF - jz __paramLoopDoneWBzero3; yes, "write buffer" empty +%ENDIF + jz .__paramLoopDoneWBzero3; yes, "write buffer" empty -IF GLIDE_CULLING -ELSE +%IF GLIDE_CULLING +%ELSE mov esp, esp ; filler -ENDIF +%ENDIF -__paramLoop3a: +.__paramLoop3a: movd mm1, [eax+vertex] ; get next parameter - mov eax, DWORD PTR [dlp] ; offset = *(dlp + 1) + mov eax, DWORD [dlp] ; offset = *(dlp + 1) add dlp, 4 ; dlp++ -IF GLIDE_CULLING +%IF GLIDE_CULLING cmp eax, 0 ; at end of offset list (offset == 0) ? -ELSE +%ELSE test eax, eax ; at end of offset list (offset == 0) ? -ENDIF +%ENDIF - jz __paramLoopDoneWBone3 ; exit, write buffer contains one DWORD + jz .__paramLoopDoneWBone3 ; exit, write buffer contains one DWORD movd mm2, [eax+vertex] ; get next parameter - mov eax, DWORD PTR [dlp] ; offset = *(dlp + 1) + mov eax, DWORD [dlp] ; offset = *(dlp + 1) add dlp, 4 ; dlp++ punpckldq mm1, mm2 ; current param | previous param @@ -780,13 +780,12 @@ ENDIF add fifo, 8 ; fifoPtr += 2*sizeof(FxU32) test eax, eax ; at end of offset list (offset == 0) ? - jnz __paramLoop3a ; nope, copy next parameter + jnz .__paramLoop3a ; nope, copy next parameter - jmp __paramLoopDoneWBzero3; write buffer empty -ENDIF ; GLIDE_PACK_RGB + jmp .__paramLoopDoneWBzero3; write buffer empty +%ENDIF ; GLIDE_PACK_RGB - -__paramLoopDoneWBone2: +.__paramLoopDoneWBone2: ;; here: "write buffer" has one DWORD left over from vertex B @@ -801,8 +800,8 @@ __paramLoopDoneWBone2: movd mm1, [vertex+y] ; 0 | y of vertex C -IF GLIDE_PACK_RGB -IF GLIDE_PACK_ALPHA +%IF GLIDE_PACK_RGB +%IF GLIDE_PACK_ALPHA ;; assumes color and alpha values < 256.0 movq mm4, [vertex+r] ; g | r movd mm2, [vertex+b] ; 0 | b @@ -821,7 +820,7 @@ IF GLIDE_PACK_ALPHA por mm4, mm2 ; 00000000 | 00rrggbb por mm4, mm3 ; 00000000 | aarrggbb -ELSE ; !GLIDE_PACK_ALPHA +%ELSE ; !GLIDE_PACK_ALPHA ;; assumes color values < 256.0 movq mm4, [vertex+r] ; g | r @@ -835,10 +834,10 @@ ELSE ; !GLIDE_PACK_ALPHA add dlp, 4 ; next data list entry por mm4, mm2 ; 00000000 | 00rrggbb -ENDIF ; !GLIDE_PACK_ALPHA +%ENDIF ; !GLIDE_PACK_ALPHA punpckldq mm1, mm4 ; RGB(A) | y - mov eax, DWORD PTR [dlp] ; get first offset from the data list + mov eax, DWORD [dlp] ; get first offset from the data list WRITE_MM1_FIFO_ALIGNED ; PCI write y | RGB(A) add fifo, 8 ; fifoPtr += 2*sizeof(FxU32) @@ -846,19 +845,19 @@ ENDIF ; !GLIDE_PACK_ALPHA add dlp, 4 ; dlp++ test eax, eax ; end of list ? - jz __paramLoopDoneWBzero3; yes, "write buffer" is empty + jz .__paramLoopDoneWBzero3; yes, "write buffer" is empty -__paramLoop3b: +.__paramLoop3b: movd mm1, [eax+vertex] ; get next parameter - mov eax, DWORD PTR [dlp] ; offset = *(dlp + 1) + mov eax, DWORD [dlp] ; offset = *(dlp + 1) add dlp, 4 ; dlp++ test eax, eax ; at end of offset list (offset == 0) ? - jz __paramLoopDoneWBone3 ; exit, write buffer contains one DWORD + jz .__paramLoopDoneWBone3 ; exit, write buffer contains one DWORD movd mm2, [eax+vertex] ; get next parameter - mov eax, DWORD PTR [dlp] ; offset = *(dlp + 1) + mov eax, DWORD [dlp] ; offset = *(dlp + 1) add dlp, 4 ; dlp++ punpckldq mm1, mm2 ; current param | previous param @@ -867,72 +866,72 @@ __paramLoop3b: add fifo, 8 ; fifoPtr += 2*sizeof(FxU32) test eax, eax ; at end of offset list (offset == 0) ? - jnz __paramLoop3b ; nope, copy next parameter + jnz .__paramLoop3b ; nope, copy next parameter -IF GLIDE_PACK_ALPHA +%IF GLIDE_PACK_ALPHA nop ; filler - jmp __paramLoopDoneWBzero3; write buffer empty -ELSE - jmp __paramLoopDoneWBzero3; write buffer empty + jmp .__paramLoopDoneWBzero3; write buffer empty +%ELSE + jmp .__paramLoopDoneWBzero3; write buffer empty nop ; filler -ENDIF -ELSE ; !GLIDE_PACK_RGB - mov eax, DWORD PTR [dlp] ; get first offset from the data list +%ENDIF +%ELSE ; !GLIDE_PACK_RGB + mov eax, DWORD [dlp] ; get first offset from the data list add dlp, 4 ; dlp++ test eax, eax ; end of list ? - jz __paramLoopDoneWBone3 ; yes, "write buffer" has y data + jz .__paramLoopDoneWBone3 ; yes, "write buffer" has y data -__paramLoop3b: +.__paramLoop3b: movd mm2, [eax+vertex] ; get next parameter - mov eax, DWORD PTR [dlp] ; offset = *(dlp + 1) + mov eax, DWORD [dlp] ; offset = *(dlp + 1) punpckldq mm1, mm2 ; current param | previous param add dlp, 4 ; dlp++ - + WRITE_MM1_FIFO_ALIGNED ; PCI write current param | previous param add fifo, 8 ; fifoPtr += 2*sizeof(FxU32) test eax, eax ; at end of offset list (offset == 0) ? - jz __paramLoopDoneWBzero3; exit, "write buffer" empty + jz .__paramLoopDoneWBzero3; exit, "write buffer" empty movd mm1, [eax+vertex] ; get next parameter - mov eax, DWORD PTR [dlp] ; offset = *(dlp + 1) + mov eax, DWORD [dlp] ; offset = *(dlp + 1) add dlp, 4 ; dlp++ test eax, eax ; at end of offset list (offset == 0) ? - jnz __paramLoop3b ; nope, copy next parameter -ENDIF + jnz .__paramLoop3b ; nope, copy next parameter +%ENDIF -__paramLoopDoneWBone3: +.__paramLoopDoneWBone3: ; "write buffer" contains one DWORD that needs to be flushed WRITE_MM1LOW_FIFO ; add fifo, 4 ; -__paramLoopDoneWBzero3: +.__paramLoopDoneWBzero3: ;; Update gc->fifoPtr and gc->fifoRoom - mov ecx, [__GlideRoot + trisDrawn] ; _GlideRoot.stats.trisDrawn + mov ecx, [_GlideRoot + trisDrawn] ; _GlideRoot.stats.trisDrawn mov eax, fifo ; new fifo pointer mov ebx, [gc + fifoPtr] ; old fifo pointer mov [gc + fifoPtr], fifo ; save new fifo pointer mov edx, [gc + fifoRoom] ; old fifo space available -IF GLIDE_PACK_RGB +%IF GLIDE_PACK_RGB add ecx, 1 ; _GlideRoot.stats.trisDrawn++ -ELSE +%ELSE inc ecx ; _GlideRoot.stats.trisDrawn++ -ENDIF +%ENDIF - mov esi, [__GlideRoot + trisProcessed]; _GlideRoot.stats.trisProcessed + mov esi, [_GlideRoot + trisProcessed] ; _GlideRoot.stats.trisProcessed sub eax, ebx ; new fifo ptr - old fifo ptr = additional fifo space used - mov [__GlideRoot + trisDrawn], ecx ; + mov [_GlideRoot + trisDrawn], ecx ; sub edx, eax ; new fifo space available mov eax, 1h ; return value = triangle drawn @@ -943,24 +942,24 @@ ENDIF inc esi ; _GlideRoot.stats.trisProcessed++ pop ebp ; restore frame pointer -IF GLIDE_CULLING +%IF GLIDE_CULLING pop ebx ; restore caller's register variable - mov [__GlideRoot + trisProcessed], esi ; -ELSE - mov [__GlideRoot + trisProcessed], esi ; + mov [_GlideRoot + trisProcessed], esi ; +%ELSE + mov [_GlideRoot + trisProcessed], esi ; pop ebx ; restore caller's register variable -ENDIF +%ENDIF pop esi ; restore caller's register variable pop edi ; restore caller's register variable femms ; no more AMD3D code, clear FPU/MMX regs - ret 12 ; return to caller + ret ; return to caller -IF GLIDE_CULLING -__cullFail: - mov esi, [__GlideRoot + trisProcessed]; triangles processed so far +%IF GLIDE_CULLING +.__cullFail: + mov esi, [_GlideRoot + trisProcessed]; triangles processed so far xor eax, eax ; return value = triangle not drawn femms ; no more AMD3D code, clear FPU/MMX regs @@ -969,21 +968,21 @@ __cullFail: inc esi ; _GlideRoot.stats.trisProcessed++; pop ebp ; restore frame pointer - mov [__GlideRoot + trisProcessed], esi + mov [_GlideRoot + trisProcessed], esi pop ebx pop esi pop edi - ret 12 -ENDIF ; GLIDE_CULLING + ret +%ENDIF ; GLIDE_CULLING ;--------------------------------------------------------------------------- ; ; end AMD3D section ; ;--------------------------------------------------------------------------- -endif ; GL_AMD3D +%endif ; GL_AMD3D ;--------------------------------------------------------------------------- ; @@ -991,17 +990,14 @@ endif ; GL_AMD3D ; ;--------------------------------------------------------------------------- -ifndef GL_AMD3D +%ifndef GL_AMD3D ; some useful floating load and store macros -flds TEXTEQU -fsubs TEXTEQU -fmuls TEXTEQU - -X = 0 -Y = 4 +%define flds fld DWORD +%define fsubs fsub DWORD +%define fmuls fmul DWORD ; edx is used as index, loading from *src -gc TEXTEQU ; points to graphics context +%define gc esi ; points to graphics context ;; Prologue stuff push esi @@ -1010,16 +1006,16 @@ gc TEXTEQU ; points to graphics context push ebx push ebp - mov gc, [__GlideRoot + curGC] ;; GR_DCL_GC + mov gc, [_GlideRoot + curGC] ;; GR_DCL_GC align 4 -IF GLIDE_CULLING -fa TEXTEQU ; vtx a from caller -fb TEXTEQU ; vtx b from caller -fc TEXTEQU ; vtx c from caller +%IF GLIDE_CULLING +%define fa eax ; vtx a from caller +%define fb ebx ; vtx b from caller +%define fc ecx ; vtx c from caller -cull TEXTEQU -intArea TEXTEQU ; temp Y storage +%define cull edx +%define intArea ebp ; temp Y storage ;; Pre-load the current culling mode before all of the ;; floating point area stuff. @@ -1030,8 +1026,8 @@ intArea TEXTEQU ; temp Y storage mov fc, [esp + _vc$] shl cull, 31 ; culltest << 31 - -Area_Computation: + +.Area_Computation: ; 47-3 ; jmp ret_pop0f flds [fa + X] ; xa @@ -1042,69 +1038,65 @@ Area_Computation: fsubs [fc + Y] ; | | dyBC flds [fa + Y] ; | | | ya fsubs [fb + Y] ; | | | dyAB - fld st(3) ; | | | | dxAB - fmul st, st(2) ; | | | | t0 t0=dxAB*dyBC - fld st(3) ; | | | | | dxBC - fmul st, st(2) ; | | | | | t1 t1=dxBC*dyAB - fsubp st(1),st ; | | | | area - fst zArea ; | | | | area + fld st3 ; | | | | dxAB + fmul st0, st2 ; | | | | t0 t0=dxAB*dyBC + fld st3 ; | | | | | dxBC + fmul st0, st2 ; | | | | | t1 t1=dxBC*dyAB + fsubp st1, st0 ; | | | | area + fst dword [zArea] ; | | | | area ;; Pop temp things from the sw culling off the fp stack - fstp st(0) ; 4 - fstp st(0) ; 3 - fstp st(0) ; 2 - fstp st(0) ; 1 - fstp st(0) ; 0 + fstp st0 ; 4 + fstp st0 ; 3 + fstp st0 ; 2 + fstp st0 ; 1 + fstp st0 ; 0 - mov intArea, zArea ; j = *(long *)&area + mov intArea, [zArea] ; j = *(long *)&area xor eax, eax ; Clear the return value (0 == culled) ; Zero Area Triangle Check and intArea, 7fffffffh ; if ((j & 0x7FFFFFFF) == 0) - jz __triDone + jz .__triDone ;; Triangle area check vs culling mode - mov intArea, zArea ; reload area just in case we're culling + mov intArea, [zArea] ; reload area just in case we're culling xor intArea, cull ; if (j ^ (culltest << 31)) - jge __triDone -ENDIF ; GLIDE_CULLING + jge .__triDone +%ENDIF ; GLIDE_CULLING align 4 ;; Check to make sure that we have enough room for ;; the complete triangle packet. - mov eax, [__GlideRoot + curTriSize] + mov eax, [_GlideRoot + curTriSize] mov ebx, [gc + fifoRoom] add eax, 4 cmp ebx, eax - jge __triBegin + jge .__triBegin - push @Line + push __LINE__ push 0h push eax - call __FifoMakeRoom + call _FifoMakeRoom add esp, 12 ;; Send triangle parameters -dlp TEXTEQU ; points to dataList structure -fifo TEXTEQU ; points to next entry in fifo -vertex TEXTEQU ; the current vertex -vOffset TEXTEQU ; Current vertex offset +%define dlp ebx ; points to dataList structure +%define fifo ebp ; points to next entry in fifo +%define vertex edx ; the current vertex +%define vOffset ecx ; Current vertex offset -packCol TEXTEQU -tempVal TEXTEQU - -GR_FIFO_WRITE MACRO __addr, __offset, __data - mov [__addr + __offset], __data -ENDM ; GR_FIFO_WRITE +%define packCol edi +%define tempVal edi align 4 -__triBegin: +.__triBegin: mov fifo, [gc + fifoPtr] ; Fetch Fifo Ptr mov vOffset, 4 ; Starting vertex @@ -1115,44 +1107,44 @@ __triBegin: add fifo, 4 ; Advance fifo for hdr & x/y coordinate align 4 -__vertexStart: +.__vertexStart: mov vertex, [esp + STKOFF + vOffset] ; Current vertex add fifo, 8 nop ; Avoid p5 agi w/ load of vertex ptr nop - mov eax, DWORD PTR [vertex + x] ; X + mov eax, DWORD [vertex + x] ; X lea dlp, [gc + tsuDataList] ; Reset the dataList GR_FIFO_WRITE fifo, -8, eax ; PCI write X - mov eax, DWORD PTR [vertex + y] ; Y + mov eax, DWORD [vertex + y] ; Y xor packCol, packCol ; Clear packed color GR_FIFO_WRITE fifo, -4, eax ; PCI write Y -IF GLIDE_PACK_RGB - fld DWORD PTR [vertex + b] ; B - fadd DWORD PTR __GlideRoot + fBiasLo ; BC GC +%IF GLIDE_PACK_RGB + fld DWORD [vertex + b] ; B + fadd DWORD [_GlideRoot + fBiasLo]; BC GC - fld DWORD PTR [vertex + g] ; G B - fadd DWORD PTR __GlideRoot + fBiasHi ; GC B + fld DWORD [vertex + g] ; G B + fadd DWORD [_GlideRoot + fBiasHi]; GC B - fld DWORD PTR [vertex + r] ; R GC BC - fadd DWORD PTR __GlideRoot + fBiasHi ; RC GC BC + fld DWORD [vertex + r] ; R GC BC + fadd DWORD [_GlideRoot + fBiasHi]; RC GC BC - fxch st(2) ; BC GC RC - fstp DWORD PTR bias0 ; GC RC + fxch st2 ; BC GC RC + fstp DWORD [bias0] ; GC RC - fstp DWORD PTR bias1 ; RC - mov packCol, DWORD PTR bias0 ; B + bias + fstp DWORD [bias1] ; RC + mov packCol, DWORD [bias0] ; B + bias - fstp DWORD PTR bias0 - mov eax, DWORD PTR bias1 ; G + bias + fstp DWORD [bias0] + mov eax, DWORD [bias1] ; G + bias -IF GLIDE_PACK_ALPHA - fld DWORD PTR [vertex + a] - fadd DWORD PTR __GlideRoot + fBiasHi +%IF GLIDE_PACK_ALPHA + fld DWORD [vertex + a] + fadd DWORD [_GlideRoot + fBiasHi] and packCol, 00FFh ; B color component and eax, 0000FF00h ; G component << 8 @@ -1163,10 +1155,10 @@ IF GLIDE_PACK_ALPHA or packCol, eax ; 0000GGBB nop - fstp DWORD PTR bias1 - mov eax, DWORD PTR bias0 ; R + bias + fstp DWORD [bias1] + mov eax, DWORD [bias0] ; R + bias - mov esi, DWORD PTR bias1 ; A + bias + mov esi, DWORD [bias1] ; A + bias and eax, 0000FF00h ; R component << 8 and esi, 0FFFFFF00h ; A component << 8 @@ -1177,56 +1169,56 @@ IF GLIDE_PACK_ALPHA or packCol, esi ; AARRGGBB nop -ELSE ; !GLIDE_PACK_ALPHA +%ELSE ; !GLIDE_PACK_ALPHA and packCol, 00FFh ; B color component and eax, 0000FF00h ; G component << 8 add dlp, 4 ; Next dataList item or packCol, eax - mov eax, DWORD PTR bias0 ; R + bias + mov eax, DWORD [bias0] ; R + bias and eax, 0000FF00h ; R component << 8 shl eax, 8 ; R << 16 or packCol, eax ; 00RRGGBB -ENDIF ; !GLIDE_PACK_ALPHA +%ENDIF ; !GLIDE_PACK_ALPHA GR_FIFO_WRITE fifo, 0, packCol ; PCI write packed color value add fifo, 4 -ENDIF ; GLIDE_PACK_RGB +%ENDIF ; GLIDE_PACK_RGB -__doParams: - mov eax, DWORD PTR [dlp] ; Get first offset from the data list +.__doParams: + mov eax, DWORD [dlp] ; Get first offset from the data list add dlp, 4 ; dlp++ cmp eax, 0 ; Are we done? - je __nextVertex + je .__nextVertex ;; Not using align directive here because it sometimes ;; introduces an agi for the eax use below. nop nop - -__paramLoop: - mov tempVal, DWORD PTR [eax + vertex] ; Get the parameter from teh vertex + +.__paramLoop: + mov tempVal, DWORD [eax + vertex] ; Get the parameter from teh vertex add fifo, 4 ; fifoPtr += sizeof(FxU32) - mov eax, DWORD PTR [dlp] ; offset = *(dlp + 1) + mov eax, DWORD [dlp] ; offset = *(dlp + 1) add dlp, 4 ; dlp++ cmp eax, 0 ; Are we done? GR_FIFO_WRITE fifo, -4, tempVal ; *fifoPtr = data - jne SHORT __paramLoop + jne SHORT .__paramLoop - align 4 -__nextVertex: + align 4 +.__nextVertex: ;; On to the next vertex add vOffset, 4 - mov gc, [__GlideRoot + curGC] ; Reload gc incase we trashed it as a temp + mov gc, [_GlideRoot + curGC] ; Reload gc incase we trashed it as a temp cmp vOffset, 16 ; Offset of one past last vertex? - jne __vertexStart + jne .__vertexStart ;; Update gc->fifoPtr and gc->fifoRoom mov eax, fifo @@ -1235,28 +1227,28 @@ __nextVertex: mov [gc + fifoPtr], fifo sub eax, ebx - mov ebx, [__GlideRoot + trisDrawn] ; _GlideRoot.stats.trisDrawn++; + mov ebx, [_GlideRoot + trisDrawn] ; _GlideRoot.stats.trisDrawn++; sub [gc + fifoRoom], eax add ebx, 1 - mov [__GlideRoot + trisDrawn], ebx + mov [_GlideRoot + trisDrawn], ebx ;; return 1 (triangle drawn) mov eax, 1h -__triDone: +.__triDone: ;; Restore trashed registers - mov esi, [__GlideRoot + trisProcessed] + mov esi, [_GlideRoot + trisProcessed] pop ebp - + add esi, 1 ; _GlideRoot.stats.trisProcessed++; pop ebx pop edi - mov [__GlideRoot + trisProcessed], esi - + mov [_GlideRoot + trisProcessed], esi + pop esi - ret 12 + ret -endif ; !GL_AMD3D +%endif ; !GL_AMD3D diff --git a/glide2x/h3/glide/src/xtexdl.asm b/glide2x/h3/glide/src/xtexdl.asm index 3ca5323..68cb8d3 100644 --- a/glide2x/h3/glide/src/xtexdl.asm +++ b/glide2x/h3/glide/src/xtexdl.asm @@ -50,55 +50,46 @@ ;; mmx stuff for 3DNow!(tm) capable processors ;; -TITLE xtexdl.asm -OPTION OLDSTRUCTS - -.586P -.MMX -.K3D +%include "xos.inc" -ifdef USE_PACKET_FIFO -EXTRN __FifoMakeRoom: NEAR -endif - +%ifdef USE_PACKET_FIFO +extrn _FifoMakeRoom +%endif ;;; Definitions of cvg regs and glide root structures. -INCLUDE fxgasm.h +%INCLUDE "fxgasm.h" ; Arguments (STKOFF = 16 from 4 dword pushes) -STACKOFFSET = 16 -_gc$ = 4 + STACKOFFSET -_baseAddr$ = 8 + STACKOFFSET -_maxS$ = 12 + STACKOFFSET -_minT$ = 16 + STACKOFFSET -_maxT$ = 20 + STACKOFFSET -_texData$ = 24 + STACKOFFSET +STACKOFFSET equ 16 +_gc$ equ 4 + STACKOFFSET +_baseAddr$ equ 8 + STACKOFFSET +_maxS$ equ 12 + STACKOFFSET +_minT$ equ 16 + STACKOFFSET +_maxT$ equ 20 + STACKOFFSET +_texData$ equ 24 + STACKOFFSET ;; NB: The first set of registers (eax, ecx, and edx) are volatile across ;; function calls. The remaining registers are supposedly non-volatile ;; so they only store things that are non-volatile across the call. -fifo TEXTEQU ; fifo ptr in inner loop -gc TEXTEQU ; graphics context -dataPtr TEXTEQU ; pointer to exture data to be downloaded -curT TEXTEQU ; counter for texture scan lines (t-coordinate) -curS TEXTEQU ; texture s-coordinate -fRoom TEXTEQU ; room available in fifo (in bytes) +%define fifo ebp ; fifo ptr in inner loop +%define gc esi ; graphics context +%define dataPtr edi ; pointer to exture data to be downloaded +%define curT ebx ; counter for texture scan lines (t-coordinate) +%define curS ecx ; texture s-coordinate +%define fRoom edx ; room available in fifo (in bytes) -GR_FIFO_WRITE MACRO __addr, __offset, __data - mov [__addr + __offset], __data -ENDM ; GR_FIFO_WRITE +%MACRO GR_FIFO_WRITE 3 + mov [%1 + _%2], %3 +%ENDM ; GR_FIFO_WRITE ;-------------------------------------------------------------------------- -_TEXT SEGMENT PAGE PUBLIC USE32 'CODE' - ASSUME DS: FLAT, SS: FLAT +segment SEG_TEXT ALIGN 32 - PUBLIC __grTexDownload_3DNow_MMX@24 - -__grTexDownload_3DNow_MMX@24 PROC NEAR +proc _grTexDownload_3DNow_MMX, 24 push ebx ; save caller's register variable mov curT, [esp + _maxT$ - 12] ; curT = maxT @@ -154,15 +145,15 @@ __grTexDownload_3DNow_MMX@24 PROC NEAR cmp fRoom, 4 ; enough room for NULL packet in fifo? jge __mmxAlignFifo ; yes, write NULL packet to align fifo -ifdef USE_PACKET_FIFO - push @Line ; Line # inside this function +%ifdef USE_PACKET_FIFO + push __LINE__ ; Line # inside this function push 0 ; NULL file name push 4 ; fifo space required (bytes) - call __FifoMakeRoom ; make fifo room + call _FifoMakeRoom ; make fifo room add esp, 12 ; pop 3 DWORD parameters to FifoMakeRoom -endif +%endif mov fifo, [gc + fifoPtr] ; fifoPtr modified by FifoMakeRoom, reload mov fRoom, [gc + fifoRoom] ; fifoRoom modified by FifoMakeRoom, reload @@ -173,7 +164,7 @@ endif __mmxAlignFifo: - mov DWORD PTR [fifo], 0 ; write NULL packet + mov DWORD [fifo], 0 ; write NULL packet sub fRoom, 4 ; fifoRoom -= 4 mov [gc + fifoRoom], fRoom ; store new fifoRoom @@ -189,7 +180,7 @@ __mmxAlignFifo: __loopT: -IFDEF GLIDE_DEBUG +%IFDEF GLIDE_DEBUG ;; Make sure that we have a QWORD aligned fifoPtr; force GP if not aligned @@ -199,7 +190,7 @@ IFDEF GLIDE_DEBUG xor eax, eax ; create 0 mov [eax], eax ; move to DS:[0] forces GP __alignmentOK: -ENDIF ; GLIDE_DEBUG +%ENDIF ; GLIDE_DEBUG ;; Compute packet header words ;; hdr1: downloadSpace[31:30] numWords[21:3] packetType[2:0] @@ -245,19 +236,19 @@ __loopS: mov esp, esp ; filler __startDownload: lea eax, [curS+8] ; fifo space needed = scan line width + header size - + cmp fRoom, eax ; fifo space available >= fifo space required ? jge __loopT ; yup, write next scan line -ifdef USE_PACKET_FIFO - push @Line ; Line # inside this function +%ifdef USE_PACKET_FIFO + push __LINE__ ; Line # inside this function push 0h ; NULL file name push eax ; fifo space required - call __FifoMakeRoom ; make fifo room (if fifoPtr QWORD aligned before + call _FifoMakeRoom ; make fifo room (if fifoPtr QWORD aligned before add esp, 12 ; pop 3 DWORD parameters to FifoMakeRoom -endif +%endif mov fifo, [gc + fifoPtr] ; fifoPtr was modified by FifoMakeRoom, reload mov fRoom, [gc + fifoRoom] ; fifoRoom was modified by FifoMakeRoom, reload @@ -274,10 +265,7 @@ __dlDone: pop esi ; restore caller's register variable pop ebx ; restore caller's register variable - ret 24 ; pop 6 DWORD parameters and return + ret ; pop 6 DWORD parameters and return -__grTexDownload_3DNow_MMX@24 ENDP +endp -_TEXT ENDS - -END