diff --git a/glide3x/cvg/glide3/src/cpudtect.asm b/glide3x/cvg/glide3/src/cpudtect.asm index 6eb0ce0..17051f8 100644 --- a/glide3x/cvg/glide3/src/cpudtect.asm +++ b/glide3x/cvg/glide3/src/cpudtect.asm @@ -19,6 +19,9 @@ ;; ;; $Header$ ;; $Log$ +;; Revision 1.1.1.1 1999/12/07 21:42:30 joseph +;; Initial checkin into SourceForge. +;; ;; ;; 1 10/08/98 11:30a Brent ;; @@ -35,10 +38,7 @@ ;; ;; -TITLE cpudtect.asm - -.586P -.model FLAT,C ; Flat memory, mangle publics with leading '_' +%include "xos.inc" ;; Data for data segment goes here ;_DATA SEGMENT DWORD USE32 PUBLIC 'DATA'; @@ -46,14 +46,14 @@ TITLE cpudtect.asm ;;; Some useful constants ; CPU Type -CPUTypeUnknown = 0ffffffffh -CPUTypePrePent = 4h -CPUTypeP5 = 5h -CPUTypeP6 = 6h +CPUTypeUnknown equ 0ffffffffh +CPUTypePrePent equ 4h +CPUTypeP5 equ 5h +CPUTypeP6 equ 6h ;;; References to external data: -_TEXT SEGMENT +segment TEXT ;; ;; _cpu_detect_asm - detect the type of CPU ;; @@ -63,10 +63,8 @@ _TEXT SEGMENT ;; ;; returns 4 for non-pen -PUBLIC _cpu_detect_asm -_cpu_detect_asm PROC NEAR +proc _cpu_detect_asm P6Stuff: - .586 pushad ; save all regs. ; First, determine whether CPUID instruction is available. @@ -177,45 +175,38 @@ UnknownVendor: mov eax, 0ffffffffh ret -_cpu_detect_asm ENDP +endp ;------------------------------------------------------------------------------ ; this routine sets the precision to single ; which effects all adds, mults, and divs align 4 ; - PUBLIC single_precision_asm -single_precision_asm PROC NEAR -.586 +proc single_precision_asm push eax ; make room fnclex ; clear pending exceptions - fstcw WORD PTR [esp] - mov eax, DWORD PTR [esp] + fstcw WORD [esp] + mov eax, DWORD [esp] and eax, 0000fcffh ; clear bits 9:8 - mov DWORD PTR [esp], eax - fldcw WORD PTR [esp] + mov DWORD [esp], eax + fldcw WORD [esp] pop eax - ret 0 -single_precision_asm ENDP + ret +endp ;------------------------------------------------------------------------------ ; this routine sets the precision to double ; which effects all adds, mults, and divs align 4 ; - PUBLIC double_precision_asm -double_precision_asm PROC NEAR -.586 +proc double_precision_asm push eax ; make room fnclex ; clear pending exceptions - fstcw WORD PTR [esp] - mov eax, DWORD PTR [esp] + fstcw WORD [esp] + mov eax, DWORD [esp] and eax, 0000fcffh ; clear bits 9:8 or eax, 000002ffh ; set 9:8 to 10 - mov DWORD PTR [esp], eax - fldcw WORD PTR [esp] + mov DWORD [esp], eax + fldcw WORD [esp] pop eax - ret 0 -double_precision_asm ENDP - -_TEXT ENDS -END \ No newline at end of file + ret +endp diff --git a/glide3x/cvg/glide3/src/diget.c b/glide3x/cvg/glide3/src/diget.c index fb406ef..91574cb 100644 --- a/glide3x/cvg/glide3/src/diget.c +++ b/glide3x/cvg/glide3/src/diget.c @@ -19,6 +19,9 @@ ** ** $Header$ ** $Log$ +** Revision 1.1.1.1 1999/12/07 21:42:30 joseph +** Initial checkin into SourceForge. +** ** ** 1 10/08/98 11:30a Brent ** @@ -476,10 +479,10 @@ GR_DIENTRY(grGet, FxU32, (FxU32 pname, FxU32 plength, FxI32 *params)) switch(hwc->SSTs[_GlideRoot.current_sst].type) { case GR_SSTTYPE_VOODOO: case GR_SSTTYPE_Voodoo2: - *params = hwc->SSTs[_GlideRoot.current_sst].sstBoard.VoodooConfig.fbRam; + *params = hwc->SSTs[_GlideRoot.current_sst].sstBoard.VoodooConfig.fbRam << 20; break; case GR_SSTTYPE_SST96: - *params = hwc->SSTs[_GlideRoot.current_sst].sstBoard.SST96Config.fbRam; + *params = hwc->SSTs[_GlideRoot.current_sst].sstBoard.SST96Config.fbRam << 20; break; default: *params = 0; /* XXX UMA architecture */ @@ -493,10 +496,10 @@ GR_DIENTRY(grGet, FxU32, (FxU32 pname, FxU32 plength, FxI32 *params)) switch(hwc->SSTs[_GlideRoot.current_sst].type) { case GR_SSTTYPE_VOODOO: case GR_SSTTYPE_Voodoo2: - *params = hwc->SSTs[_GlideRoot.current_sst].sstBoard.VoodooConfig.tmuConfig[0].tmuRam; + *params = hwc->SSTs[_GlideRoot.current_sst].sstBoard.VoodooConfig.tmuConfig[0].tmuRam << 20; break; case GR_SSTTYPE_SST96: - *params = hwc->SSTs[_GlideRoot.current_sst].sstBoard.SST96Config.tmuConfig.tmuRam; + *params = hwc->SSTs[_GlideRoot.current_sst].sstBoard.SST96Config.tmuConfig.tmuRam << 20; break; default: *params = 0; /* XXX UMA architecture */ @@ -772,6 +775,32 @@ GR_DIENTRY(grGetString, const char *, (FxU32 pname)) #undef FN_NAME } /* grGetString */ +/*------------------------------------------------------------------- + Function: grGetRegistryOrEnvironmentStringExt + Date: 4/17/2000 + Implementor(s): atom + Description: + + This is here so the spooky code for finding the correct registry + tweak path in 9x/NT/2K does not have to be duplicated in 3dfxogl. + + Arguments: char* to the name of the setting to check for. + + Return: char* to the requested entry either from the registry + or the environment settings. NULL on error. + -------------------------------------------------------------------*/ +GR_DIENTRY(grGetRegistryOrEnvironmentString, char*, (char* theEntry)) +{ +#define FN_NAME "grGetRegistryOrEnvironmentString" + char* retval ; + + retval = getenv(theEntry) ; + + return retval ; + +#undef FN_NAME +} /* grGetRegistryOrEnvironmentString */ + /*------------------------------------------------------------------- Function: grReset Date: 16-Dec-97 @@ -881,6 +910,10 @@ GR_DIENTRY(grGetProcAddress, GrProc, (char *procName)) return (GrProc)_GlideRoot.deviceArchProcs.curLineProc; if (!strcmp(procName, "guQueryResolutionXYExt")) return (GrProc)guQueryResolutionXY; + if (!strcmp(procName, "grGetRegistryOrEnvironmentStringExt")) + return (GrProc)grGetRegistryOrEnvironmentString; + if (!strcmp(procName, "grTexDownloadTableExt")) + return (GrProc)grTexDownloadTableExt; } return NULL; diff --git a/glide3x/cvg/glide3/src/distate.c b/glide3x/cvg/glide3/src/distate.c index c625b8a..db39ff7 100644 --- a/glide3x/cvg/glide3/src/distate.c +++ b/glide3x/cvg/glide3/src/distate.c @@ -19,6 +19,9 @@ ** ** $Header$ ** $Log$ + ** Revision 1.1.1.1.8.1 2003/06/29 18:45:55 guillemj + ** Fixed preprocessor invalid token errors. + ** ** Revision 1.1.1.1 1999/12/07 21:42:31 joseph ** Initial checkin into SourceForge. ** @@ -507,6 +510,57 @@ GR_DIENTRY(grDepthBufferMode, void , (GrDepthBufferMode_t mode) ) #undef FN_NAME } /* grDepthBufferMode */ +/*------------------------------------------------------------------- + Function: grStipplePattern + Date: 23-Nov-2000 + Implementor(s): alanh + Description: + + Arguments: + + Return: + -------------------------------------------------------------------*/ +GR_DIENTRY(grStipplePattern, void , (GrStipplePattern_t stipple)) +{ + #define FN_NAME "grStipplePattern" + + /* [dBorca] TODO + * + GR_BEGIN_NOFIFOCHECK("grStipplePattern\n", 85); + + INVALIDATE(stipple); + + STOREARG(grStipplePattern, stipple); + */ + + #undef FN_NAME +} /* grStipplePattern */ + +/*------------------------------------------------------------------- + Function: grStippleMode + Date: 23-Nov-2000 + Implementor(s): alanh + Description: + + Arguments: + + Return: + -------------------------------------------------------------------*/ +GR_DIENTRY(grStippleMode, void , (GrStippleMode_t mode) ) +{ + #define FN_NAME "grStippleMode" + + /* [dBorca] TODO + * + GR_BEGIN_NOFIFOCHECK("grStippleMode\n", 85); + + INVALIDATE(fbzMode); + + STOREARG(grStippleMode, mode); + */ + + #undef FN_NAME +} /* grStippleMode */ /*------------------------------------------------------------------- Function: grDitherMode diff --git a/glide3x/cvg/glide3/src/fxgasm.c b/glide3x/cvg/glide3/src/fxgasm.c index eb78c91..09139de 100644 --- a/glide3x/cvg/glide3/src/fxgasm.c +++ b/glide3x/cvg/glide3/src/fxgasm.c @@ -37,7 +37,29 @@ * macros for creating assembler offset files *----------------------------------------------------------------------*/ -#ifndef __linux__ +#if 1 /* defined(NASM) - default */ +#define NEWLINE printf("\n") +#define COMMENT printf(";----------------------------------------------------------------------\n") + +#define HEADER(str) NEWLINE; COMMENT; \ + printf("; Assembler offsets for %s struct\n",str);\ + COMMENT; NEWLINE + +#define OFFSET(p,o,pname) if (hex) \ + printf("%s\tequ %08xh\n",pname,((int)&p.o)-(int)&p); \ + else printf("%s\tequ %10d\n",pname,((int)&p.o)-(int)&p) + +#define OFFSET2(p,o,pname) if (hex) \ + printf("%s\tequ %08xh\n",pname,((int)&o)-(int)&p); \ + else printf("%s\tequ %10d\n",pname,((int)&o)-(int)&p) + +#define SIZEOF(p,pname) if (hex) \ + printf("SIZEOF_%s\tequ %08lxh\n",pname,sizeof(p)); \ + else printf("SIZEOF_%s\tequ %10ld\n",pname,sizeof(p)) + +#else /* !NASM */ + +#if !defined(__linux__) && !defined(__DJGPP__) #define NEWLINE printf("\n") #define COMMENT printf(";----------------------------------------------------------------------\n") @@ -57,7 +79,7 @@ printf("SIZEOF_%s\t= %08xh\n",pname,sizeof(p)); \ else printf("SIZEOF_%s\t= %10d\n",pname,sizeof(p)) -#else +#else /* defined(__linux__) || defined(__DJGPP__) */ #define NEWLINE printf("\n"); #define COMMENT printf("/*----------------------------------------------------------------------*/\n") @@ -67,17 +89,19 @@ COMMENT; NEWLINE #define OFFSET(p,o,pname) if (hex) \ - printf("#define %s %08x\n",pname,((int)&p.o)-(int)&p); \ + printf("#define %s 0x%08x\n",pname,((int)&p.o)-(int)&p); \ else printf("#define %s %10d\n",pname,((int)&p.o)-(int)&p) #define OFFSET2(p,o,pname) if (hex) \ - printf("#define %s %08x\n",pname,((int)&o)-(int)&p); \ + printf("#define %s 0x%08x\n",pname,((int)&o)-(int)&p); \ else printf("#define %s %10d\n",pname,((int)&o)-(int)&p) #define SIZEOF(p,pname) if (hex) \ - printf("#define SIZEOF_%s %08x\n",pname,sizeof(p)); \ + printf("#define SIZEOF_%s 0x%08x\n",pname,sizeof(p)); \ else printf("#define SIZEOF_%s %10d\n",pname,sizeof(p)) -#endif +#endif /* defined(__linux__) || defined(__DJGPP__) */ + +#endif /* defined(NASM)*/ int main (int argc, char **argv) @@ -87,7 +111,7 @@ main (int argc, char **argv) static GrGC gc; #if !GLIDE_HW_TRI_SETUP - static Sstregs sst; + static SstRegs sst; static struct dataList_s dl; #endif /* !GLIDE_HW_TRI_SETUP */ @@ -99,18 +123,18 @@ main (int argc, char **argv) printf("#define __FX_INLINE_H__\n"); printf("\n"); - printf("#define kCurGCOffset 0x%XUL\n", + printf("#define kCurGCOffset 0x%lXUL\n", offsetof(struct _GlideRoot_s, curGC)); #if GLIDE_DISPATCH_SETUP - printf("#define kTriProcOffset 0x%XUL\n", + printf("#define kTriProcOffset 0x%lXUL\n", offsetof(struct GrGC_s, curArchProcs.triSetupProc)); - printf("#define kGCStateInvalid 0x%XUL\n", + printf("#define kGCStateInvalid 0x%lXUL\n", offsetof(struct GrGC_s, state.invalid)); #endif /* GLIDE_DISPATCH_SETUP */ printf("/* The # of 2-byte entries in the hw fog table */\n"); - printf("#define kInternalFogTableEntryCount 0x%XUL\n", + printf("#define kInternalFogTableEntryCount 0x%lXUL\n", sizeof(dummyRegs.fogTable) >> 1); printf("\n"); diff --git a/glide3x/cvg/glide3/src/fxglide.h b/glide3x/cvg/glide3/src/fxglide.h index 8f57366..85c3d12 100644 --- a/glide3x/cvg/glide3/src/fxglide.h +++ b/glide3x/cvg/glide3/src/fxglide.h @@ -19,6 +19,9 @@ ** ** $Header$ ** $Log$ +** Revision 1.1.1.1.8.2 2003/07/25 07:13:41 dborca +** removed debris +** ** Revision 1.1.1.1.8.1 2003/06/29 18:45:55 guillemj ** Fixed preprocessor invalid token errors. ** @@ -2937,7 +2940,7 @@ do { \ #define REG_GROUP_SETF_CLAMP(__regBase, __regAddr, __val) \ do { \ const FxU32 fpClampVal = FP_FLOAT_CLAMP(__val); \ - REG_GROUP_ASSERT(__regAddr, fpClampVal, FXTRUE); \ + REG_GROUP_ASSERT(__regAddr, fpClampVal, FXTRUE); \ SET(((FxU32*)(__regBase))[offsetof(SstRegs, __regAddr) >> 2], fpClampVal); \ GR_INC_SIZE(sizeof(FxU32)); \ } while(0) @@ -3360,5 +3363,9 @@ extern void _grSliOriginClear(void); #endif /* (GLIDE_PLATFORM & GLIDE_HW_CVG) */ +GR_ENTRY(grTexDownloadTableExt, + void, + (GrChipID_t tmu, GrTexTable_t type, void *data)); + #endif /* __FXGLIDE_H__ */ diff --git a/glide3x/cvg/glide3/src/g3df.c b/glide3x/cvg/glide3/src/g3df.c index 20446ad..8c3ce3d 100644 --- a/glide3x/cvg/glide3/src/g3df.c +++ b/glide3x/cvg/glide3/src/g3df.c @@ -19,6 +19,9 @@ ** ** $Header$ ** $Log$ +** Revision 1.1.1.1 1999/12/07 21:42:32 joseph +** Initial checkin into SourceForge. +** ** ** 1 10/08/98 11:30a Brent ** @@ -390,18 +393,13 @@ GR_DIENTRY(gu3dfGetInfo, FxBool, /* ** determine the color format of the input image */ -#ifdef __GNUC__ - /* This function is not found in libgcc.a */ { - char* tempStr = (char*)color_format; - while(*tempStr != '\0') *tempStr++ = toupper(*tempStr); + char *tempStr = (char*)color_format; + while (*tempStr != '\0') { + *tempStr = toupper(*tempStr); + tempStr++; + } } -#else - { - extern char* strupr(char*); - strupr(color_format); - } -#endif /* __GNUC__ */ i = 0; format_found = FXFALSE; diff --git a/glide3x/cvg/glide3/src/glide.h b/glide3x/cvg/glide3/src/glide.h index 4322353..da0f88b 100644 --- a/glide3x/cvg/glide3/src/glide.h +++ b/glide3x/cvg/glide3/src/glide.h @@ -52,6 +52,7 @@ extern "C" { typedef FxU32 GrColor_t; typedef FxU8 GrAlpha_t; typedef FxU32 GrMipMapId_t; +typedef FxU32 GrStipplePattern_t; typedef FxU8 GrFog_t; typedef FxU32 GrContext_t; typedef int (FX_CALL *GrProc)(); @@ -240,6 +241,11 @@ typedef FxI32 GrDitherMode_t; #define GR_DITHER_2x2 0x1 #define GR_DITHER_4x4 0x2 +typedef FxI32 GrStippleMode_t; +#define GR_STIPPLE_DISABLE 0x0 +#define GR_STIPPLE_PATTERN 0x1 +#define GR_STIPPLE_ROTATE 0x2 + typedef FxI32 GrFogMode_t; #define GR_FOG_DISABLE 0x0 #define GR_FOG_WITH_TABLE_ON_FOGCOORD_EXT 0x1 diff --git a/glide3x/cvg/glide3/src/gsst.c b/glide3x/cvg/glide3/src/gsst.c index f63755e..d40f3d5 100644 --- a/glide3x/cvg/glide3/src/gsst.c +++ b/glide3x/cvg/glide3/src/gsst.c @@ -19,6 +19,9 @@ ** ** $Header$ ** $Log$ +** Revision 1.1.1.1 1999/12/07 21:42:33 joseph +** Initial checkin into SourceForge. +** ** ** 1 10/08/98 11:30a Brent ** @@ -1684,6 +1687,28 @@ GR_ENTRY(grSstWinClose, FxBool, (GrContext_t context)) #undef FN_NAME } /* grSstWinClose */ +/*------------------------------------------------------------------- + Function: grSetNumPendingBuffers + Date: 13-Oct-2000 + Implementor(s): mmcclure + Description: + + Allow the application to supply the number of pending buffers + + Arguments: + + NumPendingBuffers - Sent to force number of pending buffers + + Return: + -------------------------------------------------------------------*/ +GR_DIENTRY(grSetNumPendingBuffers, void, (FxI32 NumPendingBuffers)) +{ + /* [dBorca] TODO + * + _GlideRoot.environment.swapPendingCount = NumPendingBuffers; + */ +} + /*------------------------------------------------------------------- Function: grSelectContext Date: 18-Jan-98 diff --git a/glide3x/cvg/glide3/src/gtexdl.c b/glide3x/cvg/glide3/src/gtexdl.c index 22b89f6..05434f4 100644 --- a/glide3x/cvg/glide3/src/gtexdl.c +++ b/glide3x/cvg/glide3/src/gtexdl.c @@ -19,6 +19,9 @@ ** ** $Header$ ** $Log$ +** Revision 1.1.1.1 1999/12/07 21:42:34 joseph +** Initial checkin into SourceForge. +** ** ** 1 10/08/98 11:30a Brent ** @@ -171,9 +174,297 @@ extern const int _grMipMapHostWH[G3_ASPECT_TRANSLATE(GR_ASPECT_1x8)+1][G3_LOD_TR extern const int _grMipMapHostWH[G3_ASPECT_TRANSLATE(GR_ASPECT_LOG2_1x8)+1][G3_LOD_TRANSLATE(GR_LOD_LOG2_1)+1][2]; #endif -#ifndef GLIDE3_ALPHA -#define GLIDE_POINTCAST_PALETTE +#define GLIDE_POINTCAST_PALETTE 1 + +/*--------------------------------------------------------------------------- +** _grTexDownloadNccTableExt +** +** Downloads an ncctable to the specified _physical_ TMU(s). This +** function is called internally by Glide and should not be executed +** by an application. +*/ +GR_DDFUNC(_grTexDownloadNccTableExt, + void, + (GrChipID_t tmu, FxU32 which, const GuNccTable *table, int start, int end)) +{ +#define FN_NAME "_grTexDownloadNccTableExt" + GR_BEGIN_NOFIFOCHECK(FN_NAME,89); + GDBG_INFO_MORE(gc->myLevel,"(%d,%d, 0x%x, %d,%d)\n",tmu,which,table,start,end); + GR_ASSERT(start==0); + GR_ASSERT(end==11); + + /* check for null pointer */ + if (table == NULL) return; + + _GlideRoot.stats.palDownloads++; + _GlideRoot.stats.palBytes += (end-start+1)<<2; + + if (gc->tmu_state[tmu].ncc_table[which] != table) { + SstRegs* texHW; + int i; +#ifdef GLIDE_POINTCAST_PALETTE + texHW = SST_TMU(hw,tmu); +#else + texHW = SST_CHIP(hw,0xE); #endif + + if (which == 0) { +#ifdef GLIDE_POINTCAST_PALETTE + REG_GROUP_BEGIN((0x02UL << tmu), nccTable0, 12, 0x0FFF); +#else + REG_GROUP_BEGIN(0x0EUL, nccTable0, 12, 0x0FFF); +#endif + for (i = 0; i < 12; i++) REG_GROUP_SET(texHW, nccTable0[i], table->packed_data[i]); + REG_GROUP_END(); + } else { +#ifdef GLIDE_POINTCAST_PALETTE + REG_GROUP_BEGIN((0x02UL << tmu), nccTable1, 12, 0x0FFF); +#else + REG_GROUP_BEGIN(0x0EUL, nccTable1, 12, 0x0FFF); +#endif + for (i = 0; i < 12; i++) REG_GROUP_SET(texHW, nccTable1[i], table->packed_data[i]); + REG_GROUP_END(); + } + + gc->tmu_state[tmu].ncc_table[which] = table; + } + + GR_END(); +#undef FN_NAME +} /* _grTexDownloadNccTableExt */ + +/*------------------------------------------------------------------- + Function: _grTexDownloadPaletteExt + Date: 6/9 + Implementor(s): jdt + Library: Glide + Description: + Private function to download a palette to the specified tmu + Arguments: + tmu - which tmu to download the palette to + pal - the pallete data + start - beginning index to download + end - ending index to download + Return: + none + -------------------------------------------------------------------*/ +GR_DDFUNC(_grTexDownloadPaletteExt, + void, + (GrChipID_t tmu, GrTexTable_t type, GuTexPalette *pal, int start, int end)) +{ +#define FN_NAME "_grTexDownloadPaletteExt" + GR_BEGIN_NOFIFOCHECK(FN_NAME, 89); + GDBG_INFO_MORE(gc->myLevel,"(%d,0x%x, %d,%d)\n",tmu,pal,start,end); + + GR_CHECK_F(FN_NAME, pal == NULL, "pal invalid"); + GR_CHECK_F(FN_NAME, start < 0, "invalid start index"); + GR_CHECK_F(FN_NAME, end > 255, "invalid end index"); + + /* NOTE: + ** + ** This code broadcasts the palette because in the future, we will + ** only support one global texture palette no matter how many TMUs + ** there are. This is fallout from the fact that future hardware + ** has a unified memory architecture. + ** + ** Source licensees (meaning arcade or LBE vendors that) require the + ** one palette/tmu mode should define GLIDE_POINTCAST_PALETTE on + ** the command line for this file. Understand, however, that this + ** will not work on future hardware. + */ + +#ifdef GLIDE_POINTCAST_PALETTE + /* + ** FURTHER NOTE: + ** There is a sublety (nice way of saying BUG) here. + ** If TMU0 is specified, then the palette will be broadcast to all + ** TMUS. So, if the user downloads TMU1's palette, then TMU0's + ** palette, TMU0's palette will be on *both* TMUs. This is a + ** pretty strong indicator that no one is using separate palettes + ** on different TMUs. + */ + hw = SST_TMU(hw,tmu); +#else + hw = SST_CHIP(hw,0xE); +#endif + + _GlideRoot.stats.palDownloads++; + _GlideRoot.stats.palBytes += ((end - start + 1) << 2); + + /* We divide the writes into 3 chunks trying to group things into + * complete 8 word grouped packets to fit the nccTable palette + * format: stuff before the 8 word alignment, aligned writes, and + * stuff after the 8 word alignment to the end. The slop regions + * are one packet apiece. + */ + { +#ifdef GLIDE_POINTCAST_PALETTE + const FifoChipField chipId = (FifoChipField)(0x02UL << tmu); +#else + const FifoChipField chipId = (FifoChipField)0x0EUL; +#endif + const int endSlop = (end & ~0x07); + const int startSlop = MIN(((start + 8) & ~0x07) - 1, end); + int i = start; + + /* Is the start of the palette range unaligned or is the end of + * the range less than a completely aligned range? + */ + if (type == GR_TEXTABLE_PALETTE) { + if (((start & 0x07) != 0) || (end < ((start + 8) & ~0x07))) { + const FxI32 slopCount = startSlop - start + 1; + GR_ASSERT((slopCount > 0) && (slopCount <= 8)); + + REG_GROUP_BEGIN(chipId, nccTable0[4 + (start & 0x07)], + slopCount, (0xFF >> (8 - slopCount))); + while(i < start + slopCount) { + REG_GROUP_SET(hw, nccTable0[4 + (i & 0x07)], + (0x80000000 | ((i & 0xFE) << 23) | pal->data[i] & 0xFFFFFF)); + i++; + } + REG_GROUP_END(); + } + + /* Do all of the aligned palette ranges. */ + while(i < endSlop) { + const int endIndex = i + 8; + + REG_GROUP_BEGIN(chipId, nccTable0[4], 8, 0xFF); + while(i < endIndex) { + REG_GROUP_SET(hw, nccTable0[4 + (i & 0x07)], + (0x80000000 | ((i & 0xFE) << 23) | pal->data[i] & 0xFFFFFF)); + i++; + } + REG_GROUP_END(); + } + + /* Do we have any more slop at the end of the ragne? */ + if (i <= end) { + const FxU32 slopCount = end - endSlop + 1; + + REG_GROUP_BEGIN(chipId, nccTable0[4], + slopCount, (0xFF >> (8 - slopCount))); + while(i <= end) { + REG_GROUP_SET(hw, nccTable0[4 + (i & 0x07)], + (0x80000000 | ((i & 0xFE) << 23) | pal->data[i] & 0xFFFFFF)); + i++; + } + REG_GROUP_END(); + } + } + else { + if (((start & 0x07) != 0) || (end < ((start + 8) & ~0x07))) { + const FxI32 slopCount = startSlop - start + 1; + GR_ASSERT((slopCount > 0) && (slopCount <= 8)); + + REG_GROUP_BEGIN(chipId, nccTable0[4 + (start & 0x07)], + slopCount, (0xFF >> (8 - slopCount))); + while(i < start + slopCount) { + FxU32 p1, p2, p3, p4; + p1 = p2 = pal->data[i]; + p1 &= 0xfc000000; p2 &= 0x00fc0000; + p1 >>= 8; p2 >>= 6; + p3 = p4 = pal->data[i]; + p3 &= 0x0000fc00; p4 &= 0x000000fc; + p3 >>= 4; p4 >>= 2; + p1 |= p2; p3 |= p4; p1 |= p3; + REG_GROUP_SET(hw, nccTable0[4 + (i & 0x07)], + (0x80000000 | ((i & 0xFE) << 23) | p1)); + i++; + } + REG_GROUP_END(); + } + + /* Do all of the aligned palette ranges. */ + while(i < endSlop) { + const int endIndex = i + 8; + + REG_GROUP_BEGIN(chipId, nccTable0[4], 8, 0xFF); + while(i < endIndex) { + FxU32 p1, p2, p3, p4; + p1 = p2 = pal->data[i]; + p1 &= 0xfc000000; p2 &= 0x00fc0000; + p1 >>= 8; p2 >>= 6; + p3 = p4 = pal->data[i]; + p3 &= 0x0000fc00; p4 &= 0x000000fc; + p3 >>= 4; p4 >>= 2; + p1 |= p2; p3 |= p4; p1 |= p3; + REG_GROUP_SET(hw, nccTable0[4 + (i & 0x07)], + (0x80000000 | ((i & 0xFE) << 23) | p1)); + i++; + } + REG_GROUP_END(); + } + + /* Do we have any more slop at the end of the ragne? */ + if (i <= end) { + const FxU32 slopCount = end - endSlop + 1; + + REG_GROUP_BEGIN(chipId, nccTable0[4], + slopCount, (0xFF >> (8 - slopCount))); + while(i <= end) { + FxU32 p1, p2, p3, p4; + p1 = p2 = pal->data[i]; + p1 &= 0xfc000000; p2 &= 0x00fc0000; + p1 >>= 8; p2 >>= 6; + p3 = p4 = pal->data[i]; + p3 &= 0x0000fc00; p4 &= 0x000000fc; + p3 >>= 4; p4 >>= 2; + p1 |= p2; p3 |= p4; p1 |= p3; + REG_GROUP_SET(hw, nccTable0[4 + (i & 0x07)], + (0x80000000 | ((i & 0xFE) << 23) | p1)); + i++; + } + REG_GROUP_END(); + } + } + } + + GR_END(); +#undef FN_NAME +} /* _grTexDownloadPaletteExt */ + +/*------------------------------------------------------------------- + Function: grTexDownloadTableExt + Date: 6/3 + Implementor(s): jdt, GaryMcT + Library: glide + Description: + download look up table data to a tmu + Arguments: + tmu - which tmu + type - what type of table to download + One of: + GR_TEXTABLE_NCC0 + GR_TEXTABLE_NCC1 + GR_TEXTABLE_PALETTE + void *data - pointer to table data + Return: + none + -------------------------------------------------------------------*/ +GR_ENTRY(grTexDownloadTableExt, + void, + (GrChipID_t tmu, GrTexTable_t type, void *data)) +{ + GR_BEGIN_NOFIFOCHECK("grTexDownloadTableExt",89); + GDBG_INFO_MORE(gc->myLevel,"(%d,%d,0x%x)\n",tmu,type,data); + GR_CHECK_TMU(FN_NAME,tmu); + GR_CHECK_F(myName, type > GR_TEXTABLE_PALETTE_6666_EXT, "invalid table specified"); + GR_CHECK_F(myName, !data, "invalid data pointer"); + + gc->state.tex_table = type; + + if ((type == GR_TEXTABLE_PALETTE) || (type == GR_TEXTABLE_PALETTE_6666_EXT)) /* Need Palette Download Code */ + _grTexDownloadPaletteExt(tmu, type, (GuTexPalette *)data, 0, 255); + else { /* Type is an ncc table */ + _grTexDownloadNccTableExt(tmu, type, (GuNccTable*)data, 0, 11); + /* _grTexDownloadNccTable(tmu, type, (GuNccTable*)data, 0, 11); */ + } + GR_END(); +} /* grTexDownloadTableExt */ + +#undef GLIDE_POINTCAST_PALETTE + /*--------------------------------------------------------------------------- ** _grTexDownloadNccTable ** @@ -440,7 +731,6 @@ GR_DDFUNC(_grTexDownloadPalette, Return: none -------------------------------------------------------------------*/ -#if defined(GLIDE3) && defined(GLIDE3_ALPHA) GR_ENTRY(grTexDownloadTable, void, (GrTexTable_t type, void *data)) @@ -460,27 +750,6 @@ GR_ENTRY(grTexDownloadTable, } GR_END(); } /* grTexDownloadTable */ -#else -GR_ENTRY(grTexDownloadTable, - void, - (GrChipID_t tmu, GrTexTable_t type, void *data)) -{ - GR_BEGIN_NOFIFOCHECK("grTexDownloadTable",89); - GDBG_INFO_MORE(gc->myLevel,"(%d,%d,0x%x)\n",tmu,type,data); - GR_CHECK_TMU(FN_NAME,tmu); - GR_CHECK_F(FN_NAME, type > GR_TEXTABLE_PALETTE, "invalid table specified"); - GR_CHECK_F(FN_NAME, !data, "invalid data pointer"); - - if ((type == GR_TEXTABLE_PALETTE) && (GR_TEXTABLE_PALETTE_6666)) /* Need Palette Download Code */ - _grTexDownloadPalette(tmu, type, (GuTexPalette *)data, 0, 255); - else { /* Type is an ncc table */ - _grTexDownloadNccTable(tmu, type, (GuNccTable*)data, 0, 11); - /* _grTexDownloadNccTable(tmu, type, (GuNccTable*)data, 0, 11); */ - } - GR_END(); -#undef FN_NAME -} /* grTexDownloadTable */ -#endif /*------------------------------------------------------------------- diff --git a/glide3x/cvg/glide3/src/xdraw2.asm b/glide3x/cvg/glide3/src/xdraw2.asm index b452706..43fa5b0 100644 --- a/glide3x/cvg/glide3/src/xdraw2.asm +++ b/glide3x/cvg/glide3/src/xdraw2.asm @@ -19,6 +19,9 @@ ;; $Header$ ;; $Revision$ ;; $Log$ +;; Revision 1.1.1.1 1999/12/07 21:42:35 joseph +;; Initial checkin into SourceForge. +;; ;; ;; 1 10/08/98 11:30a Brent ;; @@ -60,117 +63,127 @@ ; B4 Chip field fix. ;; -TITLE xdraw2.asm -OPTION OLDSTRUCTS +%include "xos.inc" -.586P -.MMX -.K3D +extrn _GlideRoot +extrn _FifoMakeRoom + +%MACRO GR_FIFO_WRITE 3 + mov [%1 + %2], %3 +%ENDMACRO ; GR_FIFO_WRITE -EXTRN __GlideRoot : DWORD -EXTRN __FifoMakeRoom : NEAR +%MACRO WRITE_MM1_FIFO_ALIGNED 1 -_DATA SEGMENT - One DD 03f800000r +; 3DNow! +%ifdef GL_AMD3D + movq [fifo+%1], mm1 ; store current param | previous param +%endif + +%ENDMACRO ; WRITE_MM1_FIFO_ALIGNED + +%MACRO WRITE_MM1LOW_FIFO 0 + +; 3DNow +%ifdef GL_AMD3D + movd [fifo], mm1 ; store current param | previous param +%endif + +%ENDMACRO ; WRITE_MM1LOW_FIFO + +segment DATA + One DD 1.0 Area DD 0 -_DATA ENDS ;;; Definitions of cvg regs and glide root structures. -INCLUDE fxgasm.h +%INCLUDE "fxgasm.h" ;; enables/disables trisProcessed and trisDrawn counters -STATS = 1 +%define STATS 1 ; Arguments (STKOFF = 16 from 4 pushes) -STKOFF = 16 -_va$ = 4 + STKOFF -_vb$ = 8 + STKOFF -_vc$ = 12 + STKOFF +STKOFF equ 16 +_va$ equ 4 + STKOFF +_vb$ equ 8 + STKOFF +_vc$ equ 12 + STKOFF ;; coordinate offsets into vertex. ;; NB: These are constants and are not ;; user settable like the rest of the ;; parameter offset. Weird. -X = 0 -Y = 4 +X equ 0 +Y equ 4 -CONST SEGMENT -$T2003 DD 046400000r ; 12288 -$T2005 DD 03f800000r ; 1 -$T2006 DD 043800000r ; 256 -CONST ENDS +segment CONST +T2003 DD 12288.0 ; 12288 +T2005 DD 1.0 ; 1 +T2006 DD 256.0 ; 256 -PROC_TYPE MACRO procType:= - IFDEF GL_AMD3D - EXITM <__trisetup_3DNow_&procType&@12> - ELSE - EXITM <__trisetup_Default_&procType&@12> - ENDIF - ENDM +%MACRO PROC_TYPE 1 + %IFDEF GL_AMD3D + proc _trisetup_3DNow_%1, 12 + %ELSE + proc _trisetup_Default_%1, 12 + %ENDIF +%ENDM ;-------------------------------------------------------------------------- -_TEXT SEGMENT PAGE PUBLIC USE32 'CODE' - ASSUME DS: FLAT, SS: FLAT +segment TEXT ALIGN 32 - PUBLIC PROC_TYPE(cull) -PROC_TYPE(cull) PROC NEAR +PROC_TYPE cull -GLIDE_CULLING textequ <1> -GLIDE_PACK_RGB textequ <0> -GLIDE_PACK_ALPHA textequ <0> -GLIDE_GENERIC_SETUP textequ <0> -INCLUDE xdraw2.inc -GLIDE_GENERIC_SETUP textequ <0> -GLIDE_PACK_ALPHA textequ <0> -GLIDE_PACK_RGB textequ <0> -GLIDE_CULLING textequ <0> +%define GLIDE_CULLING 1 +%define GLIDE_PACK_RGB 0 +%define GLIDE_PACK_ALPHA 0 +%define GLIDE_GENERIC_SETUP 0 +%INCLUDE "xdraw2.inc" +%undef GLIDE_GENERIC_SETUP +%undef GLIDE_PACK_ALPHA +%undef GLIDE_PACK_RGB +%undef GLIDE_CULLING -PROC_TYPE(cull) ENDP +endp ALIGN 32 - PUBLIC PROC_TYPE() -PROC_TYPE() PROC NEAR +PROC_TYPE Default -GLIDE_CULLING textequ <0> -GLIDE_PACK_RGB textequ <0> -GLIDE_PACK_ALPHA textequ <0> -GLIDE_GENERIC_SETUP textequ <0> -INCLUDE xdraw2.inc -GLIDE_GENERIC_SETUP textequ <0> -GLIDE_PACK_ALPHA textequ <0> -GLIDE_PACK_RGB textequ <0> -GLIDE_CULLING textequ <0> +%define GLIDE_CULLING 0 +%define GLIDE_PACK_RGB 0 +%define GLIDE_PACK_ALPHA 0 +%define GLIDE_GENERIC_SETUP 0 +%INCLUDE "xdraw2.inc" +%undef GLIDE_GENERIC_SETUP +%undef GLIDE_PACK_ALPHA +%undef GLIDE_PACK_RGB +%undef GLIDE_CULLING -PROC_TYPE() ENDP +endp -IFNDEF GL_AMD3D +%IFNDEF GL_AMD3D ALIGN 32 - PUBLIC __trisetup_clip_coor_thunk@12 -__trisetup_clip_coor_thunk@12 PROC NEAR +proc _trisetup_clip_coor_thunk, 12 -gc TEXTEQU ; Current graphics context -procPtr TEXTEQU -vPtr TEXTEQU +%define gc eax ; Current graphics context +%define procPtr edx +%define vPtr ecx ;; Call through to the gc->curArchProcs.drawTrianglesProc w/o ;; adding extra stuff to the stack. I wish we could actually ;; do a direct return here w/o too much work. lea vPtr, [esp + _va$ - STKOFF] ; Get vertex pointer address - mov gc, [__GlideRoot + curGC]; GR_DCL_GC + mov gc, [_GlideRoot + curGC]; GR_DCL_GC ;; If debugging make sure that we're in clip coordinates -IFDEF GLIDE_DEBUG - mov ebx, [gc + CoordinateSpace] - test ebx, 1 +%IFDEF GLIDE_DEBUG + test dword [gc + CoordinateSpace], 1 jnz __clipSpace xor eax, eax mov [eax], eax __clipSpace: -ENDIF ; GLIDE_DEBUG +%ENDIF ; GLIDE_DEBUG mov procPtr, [gc + drawTrianglesProc]; Prefetch drawTriangles proc addr push vPtr ; vertex array address @@ -180,10 +193,7 @@ ENDIF ; GLIDE_DEBUG call procPtr ; (*gc->curArchProcs.drawTrianglesProc)(grDrawVertexArray, 3, vPtr) - ret 12 ; pop 3 dwords (vertex addrs) and return -__trisetup_clip_coor_thunk@12 ENDP + ret ; pop 3 dwords (vertex addrs) and return +endp -ENDIF ; !GL_AMD3D - -_TEXT ENDS - END +%ENDIF ; !GL_AMD3D diff --git a/glide3x/cvg/glide3/src/xdraw2.inc b/glide3x/cvg/glide3/src/xdraw2.inc index b8802c1..9dc2f89 100644 --- a/glide3x/cvg/glide3/src/xdraw2.inc +++ b/glide3x/cvg/glide3/src/xdraw2.inc @@ -20,6 +20,9 @@ ;; $Header$ ;; $Revision$ ;; $Log$ +;; Revision 1.1.1.1 1999/12/07 21:42:35 joseph +;; Initial checkin into SourceForge. +;; ; ; 2 10/30/97 6:53p Peter ; first real cut at tri asm @@ -38,42 +41,26 @@ ;; AMD3D version ;;-------------------------------------------------------------------------- -ifdef GL_AMD3D +%ifdef GL_AMD3D -TITLE xdraw2.inc - -GR_FIFO_WRITE MACRO __addr, __offset, __data - mov [__addr + __offset], __data -ENDM ; GR_FIFO_WRITE - - -WRITE_MM1_FIFO_ALIGNED MACRO __offset - movq [fifo+__offset], mm1 ; store current param | previous param -ENDM ; WRITE_MM1_FIFO_ALIGNED - - -WRITE_MM1LOW_FIFO MACRO - movd [fifo], mm1 ; store current param | previous param -ENDM ; WRITE_MM1LOW_FIFO - -gc TEXTEQU ; points to graphics context -fifo TEXTEQU ; points to fifo entries -tempVal TEXTEQU +%define gc edi ; points to graphics context +%define fifo ebp ; points to fifo entries +%define tempVal esi ;; Prologue stuff push edi ; save caller's register variable - mov gc,[__GlideRoot+curGC]; GR_DCL_GC + mov gc,[_GlideRoot+curGC]; GR_DCL_GC push ebp ; save frame pointer push ebx ; save caller's register variable -IF GLIDE_CULLING -fa TEXTEQU ; vtx a from caller -fb TEXTEQU ; vtx b from caller -fc TEXTEQU ; vtx c from caller +%IF GLIDE_CULLING +%define fa eax ; vtx a from caller +%define fb ebx ; vtx b from caller +%define fc ecx ; vtx c from caller -cull TEXTEQU ; cull mode -intArea TEXTEQU ; area temp storage +%define cull edx ; cull mode +%define intArea ecx ; area temp storage mov fb, [esp + _vb$ - 4] ; get base address of vertex B push esi ; save caller's register variable @@ -84,10 +71,10 @@ intArea TEXTEQU ; area temp storage femms ; will use AMD3D, clear FPU/MMX registers cmp cull, 0 ; culling enabled ? - mov tempVal, [__GlideRoot + curTriSize] + mov tempVal, [_GlideRoot + curTriSize] ;; Cull Check - jz nocull ; nope, no culling + jz .nocull ; nope, no culling mov fa, [esp + _va$] ; get base address of vertex A movq mm2, [fc + X] ; yc | xc @@ -126,20 +113,20 @@ intArea TEXTEQU ; area temp storage jge __cullFail ; triangle facing away from viewer, culled cmp ebx, tempVal ; fifo space required >= space available ? - jge __triBegin ; yup, push out triangle data to Voodoo + jge .__triBegin ; yup, push out triangle data to Voodoo - push @Line ; line number inside this function + push __LINE__ ; line number inside this function push 0h ; pointer to function name = NULL push tempVal ; fifo space required - call __FifoMakeRoom ; note: updates fifoPtr + call _FifoMakeRoom ; note: updates fifoPtr add esp, 12 ; remove 3 DWORD arguments from stack - jmp __triBegin ; merge back with short path + jmp .__triBegin ; merge back with short path ;; culling disabled -nocull: +.nocull: ;; Check to make sure that we have enough room for ;; the complete triangle packet. @@ -147,48 +134,48 @@ nocull: mov ebx, [gc + fifoRoom] ; fifo space available cmp ebx, tempVal ; fifo spce available >= space needed ? - jge __triBegin ; yup, ready to draw triangle + jge .__triBegin ; yup, ready to draw triangle - push @Line ; line number inside this function + push __LINE__ ; line number inside this function push 0h ; pointer to function name = NULL push tempVal ; fifo space needed - call __FifoMakeRoom ; note: updates fifoPtr + call _FifoMakeRoom ; note: updates fifoPtr add esp, 12 ; remove 3 DWORD arguments from stack nop ; filler -ELSE ; !GLIDE_CULLING +%ELSE ; !GLIDE_CULLING lea eax, [esp+ _va$] ; pointer to vertex pointers push esi ; save caller's register variable - mov tempVal, [__GlideRoot + curTriSize] ; data for whole triangle in bytes + mov tempVal, [_GlideRoot + curTriSize] ; data for whole triangle in bytes mov ebx, [gc + fifoRoom] ; fifo space available add tempVal, 4 ; fifo space needed (include 4-byte header) femms ; will use AMD3D, clear FPU/MMX registers cmp ebx, tempVal ; fifo spce available >= space needed ? - jge __triBegin ; yup, ready to draw triangle + jge .__triBegin ; yup, ready to draw triangle - push @Line ; line number inside this function + push __LINE__ ; line number inside this function push 0h ; pointer to function name = NULL push tempVal ; fifo space needed - call __FifoMakeRoom ; note: updates fifoPtr + call _FifoMakeRoom ; note: updates fifoPtr add esp, 12 ; remove 3 DWORD arguments from stack nop ; filler -ENDIF ; GLIDE_CULLING +%ENDIF ; GLIDE_CULLING -dlp TEXTEQU ; points to dataList structure -dlpstrt TEXTEQU ; points to begin of dataList structure -vertex TEXTEQU ; the current vertex -packCol TEXTEQU +%define dlp ebx ; points to dataList structure +%define dlpstrt ecx ; points to begin of dataList structure +%define vertex edx ; the current vertex +%define packCol esi -__triBegin: +.__triBegin: mov eax, [gc+triPacketHdr]; Packet 3 header lea dlp,[gc + tsuDataList]; Reset the dataList @@ -198,7 +185,7 @@ __triBegin: mov dlpstrt, dlp ; save pointer to start of dataList test fifo, 4 ; is fifo pointer qword aligned ? - jz __fifo_aligned ; yes, it is qword aligned + jz .__fifo_aligned ; yes, it is qword aligned movq mm1, [vertex+X] ; y | x GR_FIFO_WRITE fifo, 0, eax ; write header to fifo; now qword aligned @@ -207,8 +194,8 @@ __triBegin: WRITE_MM1_FIFO_ALIGNED -8 ; PCI write y | x nop ; filler -IF GLIDE_PACK_RGB -IF GLIDE_PACK_ALPHA +%IF GLIDE_PACK_RGB +%IF GLIDE_PACK_ALPHA ;; assumes color and alpha values < 256.0 movq mm1, [vertex+r] ; g | r movd mm2, [vertex+b] ; 0 | b @@ -227,7 +214,7 @@ IF GLIDE_PACK_ALPHA por mm1, mm2 ; 00000000 | 00rrggbb por mm1, mm3 ; 00000000 | aarrggbb -ELSE ; !GLIDE_PACK_ALPHA +%ELSE ; !GLIDE_PACK_ALPHA ;; assumes color values < 256.0 movq mm1, [vertex+r] ; g | r @@ -241,7 +228,7 @@ ELSE ; !GLIDE_PACK_ALPHA psrlq mm1, 24 ; 00000000 | 0000gg00 por mm1, mm2 ; 00000000 | 00rrggbb -ENDIF ; !GLIDE_PACK_ALPHA +%ENDIF ; !GLIDE_PACK_ALPHA ;; here: one DWORD in "write buffer", RGB(A) @@ -273,7 +260,7 @@ __paramLoop1a: nop ; filler jmp __paramLoopDoneWBone1 ; merge back into common stream -ELSE ; ! GLIDE_PACK_RGB +%ELSE ; ! GLIDE_PACK_RGB ;; here: "write buffer" empty @@ -281,14 +268,14 @@ ELSE ; ! GLIDE_PACK_RGB test eax, eax ; at end of list ? lea dlp, [dlp+4] ; dlp++ - jz __paramLoopDoneWBzero1; yes, "write buffer" empty + jz .__paramLoopDoneWBzero1; yes, "write buffer" empty -__paramLoop1a: +.__paramLoop1a: movd mm1, [eax+vertex] ; get next parameter mov eax, [dlp] ; offset = *(dlp + 1) test eax, eax ; at end of offset list (offset == 0) ? - jz __paramLoopDoneWBone1 ; exit, write buffer contains one DWORD + jz .__paramLoopDoneWBone1 ; exit, write buffer contains one DWORD movd mm2, [eax+vertex] ; get next parameter add dlp, 8 ; dlp += 2 @@ -300,14 +287,14 @@ __paramLoop1a: test eax, eax ; at end of offset list (offset == 0) ? WRITE_MM1_FIFO_ALIGNED -8 ; PCI write current param | previous param - jnz __paramLoop1a ; nope, copy next parameter + jnz .__paramLoop1a ; nope, copy next parameter nop ; filler - jmp __paramLoopDoneWBzero1; write buffer empty + jmp .__paramLoopDoneWBzero1; write buffer empty -ENDIF ; GLIDE_PACK_RGB +%ENDIF ; GLIDE_PACK_RGB -__fifo_aligned: +.__fifo_aligned: movd mm2, [vertex+X] ; y | x of vertex A add fifo, 8 ; fifoPtr += 2*sizeof(FxU32) @@ -317,8 +304,8 @@ __fifo_aligned: WRITE_MM1_FIFO_ALIGNED -8 ; PCI write x | header movd mm1, [vertex+Y] ; 0 | y of vertex A -IF GLIDE_PACK_RGB -IF GLIDE_PACK_ALPHA +%IF GLIDE_PACK_RGB +%IF GLIDE_PACK_ALPHA ;; assumes color and alpha values < 256.0 movq mm4, [vertex+r] ; g | r movd mm2, [vertex+b] ; 0 | b @@ -337,7 +324,7 @@ IF GLIDE_PACK_ALPHA por mm4, mm2 ; 00000000 | 00rrggbb por mm4, mm3 ; 00000000 | aarrggbb -ELSE ; !GLIDE_PACK_ALPHA +%ELSE ; !GLIDE_PACK_ALPHA ;; assumes color values < 256.0 movq mm4, [vertex+r] ; g | r @@ -351,7 +338,7 @@ ELSE ; !GLIDE_PACK_ALPHA psrlq mm4, 24 ; 00000000 | 0000gg00 por mm4, mm2 ; 00000000 | 00rrggbb -ENDIF ; !GLIDE_PACK_ALPHA +%ENDIF ; !GLIDE_PACK_ALPHA punpckldq mm1, mm4 ; RGB(A) | y mov eax, [dlp] ; get first offset from the data list @@ -387,14 +374,14 @@ __paramLoop1b: nop ; filler jmp __paramLoopDoneWBzero1; write buffer empty -ELSE ; !GLIDE_PACK_RGB +%ELSE ; !GLIDE_PACK_RGB mov eax, [dlp] ; get first offset from the data list add dlp, 4 ; dlp++ cmp eax, 0 ; end of list ? - jz __paramLoopDoneWBone1 ; yes, "write buffer" has y data + jz .__paramLoopDoneWBone1 ; yes, "write buffer" has y data -__paramLoop1b: +.__paramLoop1b: movd mm2, [eax+vertex] ; get next parameter add fifo, 8 ; fifoPtr += 2*sizeof(FxU32) @@ -405,16 +392,16 @@ __paramLoop1b: test eax, eax ; at end of offset list (offset == 0) ? WRITE_MM1_FIFO_ALIGNED -8 ; PCI write current param | previous param - jz __paramLoopDoneWBzero1; exit, "write buffer" empty + jz .__paramLoopDoneWBzero1; exit, "write buffer" empty movd mm1, [eax+vertex] ; get next parameter mov eax, [dlp-4] ; offset = *(dlp + 1) test eax, eax ; at end of offset list (offset == 0) ? - jnz __paramLoop1b ; nope, copy next parameter -ENDIF + jnz .__paramLoop1b ; nope, copy next parameter +%ENDIF -__paramLoopDoneWBone1: +.__paramLoopDoneWBone1: ;; here: "write buffer" has one DWORD left over from vertex A @@ -430,8 +417,8 @@ __paramLoopDoneWBone1: movd mm1, [vertex+Y] ; 0 | y of vertex B nop ; filler -IF GLIDE_PACK_RGB -IF GLIDE_PACK_ALPHA +%IF GLIDE_PACK_RGB +%IF GLIDE_PACK_ALPHA ;; assumes color and alpha values < 256.0 movq mm4, [vertex+r] ; g | r movd mm2, [vertex+b] ; 0 | b @@ -450,7 +437,7 @@ IF GLIDE_PACK_ALPHA por mm4, mm2 ; 00000000 | 00rrggbb por mm4, mm3 ; 00000000 | aarrggbb -ELSE ; !GLIDE_PACK_ALPHA +%ELSE ; !GLIDE_PACK_ALPHA ;; assumes color values < 256.0 movq mm4, [vertex+r] ; g | r @@ -464,7 +451,7 @@ ELSE ; !GLIDE_PACK_ALPHA add dlp, 4 ; next data list entry por mm4, mm2 ; 00000000 | 00rrggbb -ENDIF ; !GLIDE_PACK_ALPHA +%ENDIF ; !GLIDE_PACK_ALPHA punpckldq mm1, mm4 ; RGB(A) | y mov eax, [dlp] ; get first offset from the data list @@ -500,14 +487,14 @@ __paramLoop2b: nop ; filler jmp __paramLoopDoneWBzero2; write buffer empty -ELSE ; !GLIDE_PACK_RGB +%ELSE ; !GLIDE_PACK_RGB mov eax, [dlp] ; get first offset from the data list add dlp, 4 ; dlp++ test eax, eax ; end of list ? - jz __paramLoopDoneWBone2 ; yes, "write buffer" has y data + jz .__paramLoopDoneWBone2 ; yes, "write buffer" has y data -__paramLoop2b: +.__paramLoop2b: movd mm2, [eax+vertex] ; get next parameter add fifo, 8 ; fifoPtr += 2*sizeof(FxU32) @@ -518,20 +505,20 @@ __paramLoop2b: test eax, eax ; at end of offset list (offset == 0) ? WRITE_MM1_FIFO_ALIGNED -8 ; PCI write current param | previous param - jz __paramLoopDoneWBzero2; exit, "write buffer" empty + jz .__paramLoopDoneWBzero2; exit, "write buffer" empty movd mm1, [eax+vertex] ; get next parameter mov eax, [dlp-4] ; offset = *(dlp + 1) test eax, eax ; at end of offset list (offset == 0) ? - jnz __paramLoop2b ; nope, copy next parameter + jnz .__paramLoop2b ; nope, copy next parameter nop ; filler - jmp __paramLoopDoneWBone2 ; write buffer contains one DWORD -ENDIF + jmp .__paramLoopDoneWBone2 ; write buffer contains one DWORD +%ENDIF -__paramLoopDoneWBzero1: +.__paramLoopDoneWBzero1: mov vertex, [esp + _vb$] ; Current vertex = B mov dlp, dlpstrt ; Reset the dataList @@ -542,8 +529,8 @@ __paramLoopDoneWBzero1: WRITE_MM1_FIFO_ALIGNED -8 ; PCI write y | x of vertex B nop ; filler -IF GLIDE_PACK_RGB -IF GLIDE_PACK_ALPHA +%IF GLIDE_PACK_RGB +%IF GLIDE_PACK_ALPHA ;; assumes color and alpha values < 256.0 movq mm1, [vertex+r] ; g | r movd mm2, [vertex+b] ; 0 | b @@ -562,7 +549,7 @@ IF GLIDE_PACK_ALPHA por mm1, mm3 ; 00000000 | aarrggbb add dlp, 8 ; skip data list entry "a" -ELSE ; !GLIDE_PACK_ALPHA +%ELSE ; !GLIDE_PACK_ALPHA ;; assumes color values < 256.0 movq mm1, [vertex+r] ; g | r @@ -576,11 +563,11 @@ ELSE ; !GLIDE_PACK_ALPHA por mm1, mm2 ; 00000000 | 00rrggbb add dlp, 4 ; next data list entry -ENDIF ; !GLIDE_PACK_ALPHA +%ENDIF ; !GLIDE_PACK_ALPHA ;; here: one DWORD in "write buffer", RGB(A) - mov eax, DWORD PTR [dlp] ; get first offset from the data list + mov eax, dword [dlp] ; get first offset from the data list add dlp, 4 ; dlp++ test eax, eax ; end of list ? @@ -608,7 +595,7 @@ __paramLoop2a: nop ; filler jmp __paramLoopDoneWBone2 ; merge back into common stream -ELSE ; ! GLIDE_PACK_RGB +%ELSE ; ! GLIDE_PACK_RGB ;; here: "write buffer" empty @@ -616,14 +603,14 @@ ELSE ; ! GLIDE_PACK_RGB add dlp, 4 ; dlp++ cmp eax, 0 ; at end of list ? - jz __paramLoopDoneWBzero2; yes, "write buffer" empty + jz .__paramLoopDoneWBzero2; yes, "write buffer" empty -__paramLoop2a: +.__paramLoop2a: movd mm1, [eax+vertex] ; get next parameter mov eax, [dlp] ; offset = *(dlp + 1) test eax, eax ; at end of offset list (offset == 0) ? - jz __paramLoopDoneWBone2 ; exit, write buffer contains one DWORD + jz .__paramLoopDoneWBone2 ; exit, write buffer contains one DWORD movd mm2, [eax+vertex] ; get next parameter add dlp, 8 ; dlp += 2 @@ -635,12 +622,12 @@ __paramLoop2a: test eax, eax ; at end of offset list (offset == 0) ? WRITE_MM1_FIFO_ALIGNED -8 ; PCI write current param | previous param - jnz __paramLoop2a ; nope, copy next parameter + jnz .__paramLoop2a ; nope, copy next parameter -ENDIF ; GLIDE_PACK_RGB +%ENDIF ; GLIDE_PACK_RGB -__paramLoopDoneWBzero2: +.__paramLoopDoneWBzero2: mov vertex, [esp + _vc$] ; Current vertex = C mov dlp, dlpstrt ; Reset the dataList @@ -652,8 +639,8 @@ __paramLoopDoneWBzero2: nop ; filler -IF GLIDE_PACK_RGB -IF GLIDE_PACK_ALPHA +%IF GLIDE_PACK_RGB +%IF GLIDE_PACK_ALPHA ;; assumes color and alpha values < 256.0 movq mm1, [vertex+r] ; g | r movd mm2, [vertex+b] ; 0 | b @@ -672,7 +659,7 @@ IF GLIDE_PACK_ALPHA por mm1, mm3 ; 00000000 | aarrggbb add dlp, 8 ; skip data list entry "a" -ELSE ; !GLIDE_PACK_ALPHA +%ELSE ; !GLIDE_PACK_ALPHA ;; assumes color values < 256.0 movq mm1, [vertex+r] ; g | r @@ -686,7 +673,7 @@ ELSE ; !GLIDE_PACK_ALPHA por mm1, mm2 ; 00000000 | 00rrggbb add dlp, 4 ; next data list entry -ENDIF ; !GLIDE_PACK_ALPHA +%ENDIF ; !GLIDE_PACK_ALPHA ;; here: one DWORD in "write buffer", RGB(A) @@ -718,7 +705,7 @@ __paramLoop3a: nop ; filler jmp __paramLoopDoneWBone3 ; merge back into common stream -ELSE ; ! GLIDE_PACK_RGB +%ELSE ; ! GLIDE_PACK_RGB ;; here: "write buffer" empty @@ -726,14 +713,14 @@ ELSE ; ! GLIDE_PACK_RGB add dlp, 4 ; dlp++ test eax, eax ; at end of list ? - jz __paramLoopDoneWBzero3; yes, "write buffer" empty + jz .__paramLoopDoneWBzero3; yes, "write buffer" empty -__paramLoop3a: +.__paramLoop3a: movd mm1, [eax+vertex] ; get next parameter mov eax, [dlp] ; offset = *(dlp + 1) test eax, eax ; at end of offset list (offset == 0) ? - jz __paramLoopDoneWBone3 ; exit, write buffer contains one DWORD + jz .__paramLoopDoneWBone3 ; exit, write buffer contains one DWORD movd mm2, [eax+vertex] ; get next parameter add dlp, 8 ; dlp += 2 @@ -745,15 +732,15 @@ __paramLoop3a: test eax, eax ; at end of offset list (offset == 0) ? WRITE_MM1_FIFO_ALIGNED -8 ; PCI write current param | previous param - jnz __paramLoop3a ; nope, copy next parameter + jnz .__paramLoop3a ; nope, copy next parameter mov esp, esp ; filler - jmp __paramLoopDoneWBzero3; write buffer empty + jmp .__paramLoopDoneWBzero3; write buffer empty -ENDIF ; GLIDE_PACK_RGB +%ENDIF ; GLIDE_PACK_RGB -__paramLoopDoneWBone2: +.__paramLoopDoneWBone2: ;; here: "write buffer" has one DWORD left over from vertex B @@ -769,8 +756,8 @@ __paramLoopDoneWBone2: movd mm1, [vertex+Y] ; 0 | y of vertex C mov esp, esp ; filler -IF GLIDE_PACK_RGB -IF GLIDE_PACK_ALPHA +%IF GLIDE_PACK_RGB +%IF GLIDE_PACK_ALPHA ;; assumes color and alpha values < 256.0 movq mm4, [vertex+r] ; g | r movd mm2, [vertex+b] ; 0 | b @@ -789,7 +776,7 @@ IF GLIDE_PACK_ALPHA por mm4, mm2 ; 00000000 | 00rrggbb por mm4, mm3 ; 00000000 | aarrggbb -ELSE ; !GLIDE_PACK_ALPHA +%ELSE ; !GLIDE_PACK_ALPHA ;; assumes color values < 256.0 movq mm4, [vertex+r] ; g | r @@ -803,7 +790,7 @@ ELSE ; !GLIDE_PACK_ALPHA add dlp, 4 ; next data list entry por mm4, mm2 ; 00000000 | 00rrggbb -ENDIF ; !GLIDE_PACK_ALPHA +%ENDIF ; !GLIDE_PACK_ALPHA punpckldq mm1, mm4 ; RGB(A) | y add fifo, 8 ; fifoPtr += 2*sizeof(FxU32) @@ -839,15 +826,15 @@ __paramLoop3b: nop ; filler jmp __paramLoopDoneWBzero3; write buffer empty -ELSE ; !GLIDE_PACK_RGB +%ELSE ; !GLIDE_PACK_RGB mov eax, [dlp] ; get first offset from the data list add dlp, 4 ; dlp++ test eax, eax ; end of list ? - jz __paramLoopDoneWBone3 ; yes, "write buffer" has y data + jz .__paramLoopDoneWBone3 ; yes, "write buffer" has y data -__paramLoop3b: +.__paramLoop3b: movd mm2, [eax+vertex] ; get next parameter add fifo, 8 ; fifoPtr += 2*sizeof(FxU32) @@ -858,27 +845,27 @@ __paramLoop3b: cmp eax, 0 ; at end of offset list (offset == 0) ? WRITE_MM1_FIFO_ALIGNED -8 ; PCI write current param | previous param - jz __paramLoopDoneWBzero3; exit, "write buffer" empty + jz .__paramLoopDoneWBzero3; exit, "write buffer" empty movd mm1, [eax+vertex] ; get next parameter mov eax, [dlp-4] ; offset = *(dlp + 1) test eax, eax ; at end of offset list (offset == 0) ? - jnz __paramLoop3b ; nope, copy next parameter -ENDIF + jnz .__paramLoop3b ; nope, copy next parameter +%ENDIF -__paramLoopDoneWBone3: +.__paramLoopDoneWBone3: ; "write buffer" contains one DWORD that needs to be flushed WRITE_MM1LOW_FIFO ; add fifo, 4 ; -__paramLoopDoneWBzero3: +.__paramLoopDoneWBzero3: ;; Update gc->fifoPtr and gc->fifoRoom - mov ecx, [__GlideRoot + trisDrawn] ; _GlideRoot.stats.trisDrawn + mov ecx, [_GlideRoot + trisDrawn] ; _GlideRoot.stats.trisDrawn mov eax, fifo ; new fifo pointer mov ebx, [gc + fifoPtr] ; old fifo pointer @@ -887,10 +874,10 @@ __paramLoopDoneWBzero3: mov edx, [gc + fifoRoom] ; old fifo space available inc ecx ; _GlideRoot.stats.trisDrawn++ - mov ebp, [__GlideRoot + trisProcessed]; _GlideRoot.stats.trisProcessed + mov ebp, [_GlideRoot + trisProcessed]; _GlideRoot.stats.trisProcessed sub eax, ebx ; new fifo ptr - old fifo ptr = additional fifo space used - mov [__GlideRoot + trisDrawn], ecx ; + mov [_GlideRoot + trisDrawn], ecx ; sub edx, eax ; new fifo space available mov eax, 1h ; return value = triangle drawn @@ -901,7 +888,7 @@ __paramLoopDoneWBzero3: inc ebp ; _GlideRoot.stats.trisProcessed++ pop esi ; restore caller's register variable - mov [__GlideRoot + trisProcessed], ebp ; + mov [_GlideRoot + trisProcessed], ebp ; pop ebx ; restore caller's register variable pop ebp ; restore frame pointer @@ -909,12 +896,12 @@ __paramLoopDoneWBzero3: femms ; no more AMD3D code, clear FPU/MMX regs - ret 12 ; return to caller + ret ; return to caller -IF GLIDE_CULLING +%IF GLIDE_CULLING __cullFail: - mov ebp, [__GlideRoot + trisProcessed]; triangles processed so far + mov ebp, [_GlideRoot + trisProcessed]; triangles processed so far xor eax, eax ; return value = triangle not drawn femms ; no more AMD3D code, clear FPU/MMX regs @@ -923,14 +910,14 @@ __cullFail: inc ebp ; _GlideRoot.stats.trisProcessed++; pop esi - mov [__GlideRoot + trisProcessed], ebp + mov [_GlideRoot + trisProcessed], ebp pop ebx pop ebp ; restore frame pointer pop edi - ret 12 -ENDIF ; GLIDE_CULLING + ret +%ENDIF ; GLIDE_CULLING ;--------------------------------------------------------------------------- ; @@ -938,26 +925,24 @@ ENDIF ; GLIDE_CULLING ; ;--------------------------------------------------------------------------- -endif ; !defined GL_AMD3D +%endif ; !defined GL_AMD3D ;;-------------------------------------------------------------------------- ;; start original code ;;-------------------------------------------------------------------------- -ifndef GL_AMD3D +%ifndef GL_AMD3D -TITLE xdraw2.inc - ; Ugly, but seems to workaround the problem with locally defined ; data segment globals not getting relocated properly when using ; djgpp. -zArea TEXTEQU -gc TEXTEQU ; points to graphics context +%define zArea One + 04h +%define gc esi ; points to graphics context ;; Prologue stuff push esi - mov gc, [__GlideRoot + curGC] ;; GR_DCL_GC + mov gc, [_GlideRoot + curGC] ;; GR_DCL_GC push edi push ebx @@ -966,18 +951,18 @@ gc TEXTEQU ; points to graphics context nop align 4 -IF GLIDE_CULLING -fa TEXTEQU ; vtx a from caller -fb TEXTEQU ; vtx b from caller -fc TEXTEQU ; vtx c from caller +%IF GLIDE_CULLING +%define fa eax ; vtx a from caller +%define fb ebx ; vtx b from caller +%define fc ecx ; vtx c from caller -cull TEXTEQU -intArea TEXTEQU ; temp Y storage +%define cull edx +%define intArea ebp ; temp Y storage ; some useful floating load and store macros -flds TEXTEQU -fsubs TEXTEQU -fmuls TEXTEQU +%define flds fld DWORD +%define fsubs fsub DWORD +%define fmuls fmul DWORD ;; Pre-load the current culling mode before all of the ;; floating point area stuff. @@ -988,11 +973,11 @@ fmuls TEXTEQU mov fc, [esp + _vc$] test cull, cull - jz nocull + jz .nocull shl cull, 31 ; culltest << 31 -Area_Computation: +;Area_Computation: ; 47-3 ; jmp ret_pop0f flds [fa + X] ; xa @@ -1003,70 +988,66 @@ Area_Computation: fsubs [fc + Y] ; | | dyBC flds [fa + Y] ; | | | ya fsubs [fb + Y] ; | | | dyAB - fld st(3) ; | | | | dxAB - fmul st, st(2) ; | | | | t0 t0=dxAB*dyBC - fld st(3) ; | | | | | dxBC - fmul st, st(2) ; | | | | | t1 t1=dxBC*dyAB - fsubp st(1),st ; | | | | area - fst zArea ; | | | | area + fld st3 ; | | | | dxAB + fmul st0, st2 ; | | | | t0 t0=dxAB*dyBC + fld st3 ; | | | | | dxBC + fmul st0, st2 ; | | | | | t1 t1=dxBC*dyAB + fsubp st1,st0 ; | | | | area + fst dword [zArea] ; | | | | area ;; Pop temp things from the sw culling off the fp stack - fstp st(0) ; 4 - fstp st(0) ; 3 - fstp st(0) ; 2 - fstp st(0) ; 1 - fstp st(0) ; 0 + fstp st0 ; 4 + fstp st0 ; 3 + fstp st0 ; 2 + fstp st0 ; 1 + fstp st0 ; 0 - mov intArea, zArea ; j = *(long *)&area + mov intArea, [zArea] ; j = *(long *)&area xor eax, eax ; Clear the return value (0 == culled) ; Zero Area Triangle Check and intArea, 7fffffffh ; if ((j & 0x7FFFFFFF) == 0) - jz __triDone + jz .__triDone ;; Triangle area check vs culling mode - mov intArea, zArea ; reload area just in case we're culling + mov intArea, [zArea] ; reload area just in case we're culling xor intArea, cull ; if (j ^ (culltest << 31)) - jge __triDone -nocull: -ENDIF ; GLIDE_CULLING + jge .__triDone +.nocull: +%ENDIF ; GLIDE_CULLING align 4 ;; Check to make sure that we have enough room for ;; the complete triangle packet. - mov eax, [__GlideRoot + curTriSize] + mov eax, [_GlideRoot + curTriSize] mov ebx, [gc + fifoRoom] add eax, 4 cmp ebx, eax - jge __triBegin + jge .__triBegin - push @Line + push __LINE__ push 0h push eax - call __FifoMakeRoom + call _FifoMakeRoom add esp, 12 ;; Send triangle parameters -dlp TEXTEQU ; points to dataList structure -fifo TEXTEQU ; points to next entry in fifo -vertex TEXTEQU ; the current vertex -vOffset TEXTEQU ; Current vertex offset +%define dlp ebx ; points to dataList structure +%define fifo ebp ; points to next entry in fifo +%define vertex edx ; the current vertex +%define vOffset ecx ; Current vertex offset -packCol TEXTEQU -tempVal TEXTEQU - -GR_FIFO_WRITE MACRO __addr, __offset, __data - mov [__addr + __offset], __data -ENDM ; GR_FIFO_WRITE +%define packCol edi +%define tempVal edi align 4 -__triBegin: +.__triBegin: mov fifo, [gc + fifoPtr] ; Fetch Fifo Ptr mov vOffset, 4 ; Starting vertex @@ -1077,44 +1058,44 @@ __triBegin: add fifo, 4 ; Advance fifo for hdr & x/y coordinate align 4 -__vertexStart: +.__vertexStart: mov vertex, [esp + STKOFF + vOffset] ; Current vertex add fifo, 8 nop ; Avoid p5 agi w/ load of vertex ptr nop - mov eax, DWORD PTR [vertex] ; X + mov eax, dword [vertex] ; X lea dlp, [gc + tsuDataList] ; Reset the dataList GR_FIFO_WRITE fifo, -8, eax ; PCI write X - mov eax, DWORD PTR [vertex + 4] ; Y + mov eax, dword [vertex + 4] ; Y xor packCol, packCol ; Clear packed color GR_FIFO_WRITE fifo, -4, eax ; PCI write Y -IF GLIDE_PACK_RGB - fld DWORD PTR [vertex + b] ; B - fadd DWORD PTR __GlideRoot + fBiasLo ; BC GC +%IF GLIDE_PACK_RGB + fld dword [vertex + b] ; B + fadd dword [_GlideRoot + fBiasLo] ; BC GC - fld DWORD PTR [vertex + g] ; G B - fadd DWORD PTR __GlideRoot + fBiasHi ; GC B + fld dword [vertex + g] ; G B + fadd dword [_GlideRoot + fBiasHi] ; GC B - fld DWORD PTR [vertex + r] ; R GC BC - fadd DWORD PTR __GlideRoot + fBiasHi ; RC GC BC + fld dword [vertex + r] ; R GC BC + fadd dword [_GlideRoot + fBiasHi] ; RC GC BC - fxch st(2) ; BC GC RC - fstp DWORD PTR bias0 ; GC RC + fxch st2 ; BC GC RC + fstp dword [bias0] ; GC RC - fstp DWORD PTR bias1 ; RC - mov packCol, DWORD PTR bias0 ; B + bias + fstp dword [bias1] ; RC + mov packCol, dword [bias0] ; B + bias - fstp DWORD PTR bias0 - mov eax, DWORD PTR bias1 ; G + bias + fstp dword [bias0] + mov eax, dword [bias1] ; G + bias -IF GLIDE_PACK_ALPHA - fld DWORD PTR [vertex + a] - fadd DWORD PTR __GlideRoot + fBiasHi +%IF GLIDE_PACK_ALPHA + fld dword [vertex + a] + fadd dword [_GlideRoot + fBiasHi] and packCol, 00FFh ; B color component and eax, 0000FF00h ; G component << 8 @@ -1125,10 +1106,10 @@ IF GLIDE_PACK_ALPHA or packCol, eax ; 0000GGBB nop - fstp DWORD PTR bias1 - mov eax, DWORD PTR bias0 ; R + bias + fstp dword [bias1] + mov eax, dword [bias0] ; R + bias - mov esi, DWORD PTR bias1 ; A + bias + mov esi, dword [bias1] ; A + bias and eax, 0000FF00h ; R component << 8 and esi, 0FFFFFF00h ; A component << 8 @@ -1139,56 +1120,56 @@ IF GLIDE_PACK_ALPHA or packCol, esi ; AARRGGBB nop -ELSE ; !GLIDE_PACK_ALPHA +%ELSE ; !GLIDE_PACK_ALPHA and packCol, 00FFh ; B color component and eax, 0000FF00h ; G component << 8 add dlp, 4 ; Next dataList item or packCol, eax - mov eax, DWORD PTR bias0 ; R + bias + mov eax, dword [bias0] ; R + bias and eax, 0000FF00h ; R component << 8 shl eax, 8 ; R << 16 or packCol, eax ; 00RRGGBB -ENDIF ; !GLIDE_PACK_ALPHA +%ENDIF ; !GLIDE_PACK_ALPHA GR_FIFO_WRITE fifo, 0, packCol ; PCI write packed color value add fifo, 4 -ENDIF ; GLIDE_PACK_RGB +%ENDIF ; GLIDE_PACK_RGB -__doParams: - mov eax, DWORD PTR [dlp] ; Get first offset from the data list +.__doParams: + mov eax, dword [dlp] ; Get first offset from the data list add dlp, 4 ; dlp++ cmp eax, 0 ; Are we done? - je __nextVertex + je .__nextVertex ;; Not using align directive here because it sometimes ;; introduces an agi for the eax use below. nop nop -__paramLoop: - mov tempVal, DWORD PTR [eax + vertex] ; Get the parameter from teh vertex +.__paramLoop: + mov tempVal, dword [eax + vertex] ; Get the parameter from teh vertex add fifo, 4 ; fifoPtr += sizeof(FxU32) - mov eax, DWORD PTR [dlp] ; offset = *(dlp + 1) + mov eax, dword [dlp] ; offset = *(dlp + 1) add dlp, 4 ; dlp++ cmp eax, 0 ; Are we done? GR_FIFO_WRITE fifo, -4, tempVal ; *fifoPtr = data - jne SHORT __paramLoop + jne .__paramLoop align 4 -__nextVertex: +.__nextVertex: ;; On to the next vertex add vOffset, 4 - mov gc, [__GlideRoot + curGC] ; Reload gc incase we trashed it as a temp + mov gc, [_GlideRoot + curGC] ; Reload gc incase we trashed it as a temp cmp vOffset, 16 ; Offset of one past last vertex? - jne __vertexStart + jne .__vertexStart ;; Update gc->fifoPtr and gc->fifoRoom mov eax, fifo @@ -1197,27 +1178,27 @@ __nextVertex: mov [gc + fifoPtr], fifo sub eax, ebx - mov ebx, [__GlideRoot + trisDrawn] ; _GlideRoot.stats.trisDrawn++; + mov ebx, [_GlideRoot + trisDrawn] ; _GlideRoot.stats.trisDrawn++; sub [gc + fifoRoom], eax add ebx, 1 - mov [__GlideRoot + trisDrawn], ebx + mov [_GlideRoot + trisDrawn], ebx ;; return 1 (triangle drawn) mov eax, 1h -__triDone: +.__triDone: ;; Restore trashed registers - mov esi, [__GlideRoot + trisProcessed] + mov esi, [_GlideRoot + trisProcessed] pop ebp add esi, 1 ; _GlideRoot.stats.trisProcessed++; pop ebx pop edi - mov [__GlideRoot + trisProcessed], esi + mov [_GlideRoot + trisProcessed], esi pop esi - ret 12 + ret -endif ; !GL_AMD3D +%endif ; !GL_AMD3D diff --git a/glide3x/cvg/glide3/src/xdraw3.asm b/glide3x/cvg/glide3/src/xdraw3.asm index eb19950..3dd66e5 100644 --- a/glide3x/cvg/glide3/src/xdraw3.asm +++ b/glide3x/cvg/glide3/src/xdraw3.asm @@ -16,65 +16,58 @@ ;; ;; COPYRIGHT 3DFX INTERACTIVE, INC. 1999, ALL RIGHTS RESERVED +%include "xos.inc" + ;;-------------------------------------------------------------------------- ;; start AMD3D version ;;-------------------------------------------------------------------------- -ifdef GL_AMD3D - -TITLE xdraw3.asm -.586P -.MMX -.K3D +%ifdef GL_AMD3D ;;; include listing.inc -INCLUDE fxgasm.h +%INCLUDE "fxgasm.h" -EXTRN __GlideRoot:DWORD -EXTRN __FifoMakeRoom:NEAR +extrn _GlideRoot +extrn _FifoMakeRoom -CONST SEGMENT +segment CONST ALIGN 8 -_F256_F256 DQ 04380000043800000h ; 256 | 256 -CONST ENDS +_F256_F256 DD 043800000h, 43800000h ; 256 | 256 -_DATA SEGMENT +segment DATA ALIGN 8 -btab DD 8 DUP(0) -atab DD 8 DUP(0) +btab DD 0, 0, 0, 0, 0, 0, 0, 0 +atab DD 0, 0, 0, 0, 0, 0, 0, 0 vSize DD 0 strideinbytes DD 0 vertices DD 0 -_DATA ENDS -_TEXT SEGMENT PAGE PUBLIC USE32 'CODE' - ASSUME DS: FLAT, SS: FLAT +segment TEXT -_pktype = 20 -_type = 24 -_mode = 28 -_count = 32 -_pointers = 36 +_pktype equ 20 +_type equ 24 +_mode equ 28 +%define _count 32 +%define _pointers 36 -gc TEXTEQU ; points to graphics context -fifo TEXTEQU ; points to next entry in fifo -dlp TEXTEQU ; points to dataList structure -vertexCount TEXTEQU ; Current vertex counter in the packet -vertexPtr TEXTEQU ; Current vertex pointer (in deref mode) -vertex TEXTEQU ; Current vertex (in non-deref mode) -dlpStart TEXTEQU ; Pointer to start of offset list +%define gc edi ; points to graphics context +%define fifo ecx ; points to next entry in fifo +%define dlp ebp ; points to dataList structure +%define vertexCount esi ; Current vertex counter in the packet +%define vertexPtr ebx ; Current vertex pointer (in deref mode) +%define vertex ebx ; Current vertex (in non-deref mode) +%define dlpStart edx ; Pointer to start of offset list -X TEXTEQU <0> -Y TEXTEQU <4> +%define X 0 +%define Y 4 ALIGN 32 - PUBLIC __grDrawVertexList_3DNow_Window@20 -__grDrawVertexList_3DNow_Window@20 PROC NEAR +proc _grDrawVertexList_3DNow_Window, 20 ; 132 : { push edi ; save caller's register variable - mov gc, [__GlideRoot + curGC] ; get current graphics context + mov gc, [_GlideRoot + curGC] ; get current graphics context push esi ; save caller's register variable mov vertexCount, [esp+_count-8]; number of vertices in strip/fan @@ -86,7 +79,7 @@ __grDrawVertexList_3DNow_Window@20 PROC NEAR ; get current vertex (non-deref mode) test vertexCount, vertexCount ; number of vertices <= 0 ? - jle strip_done ; yup, the strip/fan is done + jle .strip_done ; yup, the strip/fan is done ;;; vSize = gc->state.vData.vSize ;;; if (stride == 0) @@ -110,7 +103,7 @@ __grDrawVertexList_3DNow_Window@20 PROC NEAR test edx, edx ; mode 0 (array of vertices) ? mov edx, [gc + vertexStride] ; get stride in DWORDs - jnz deref_mode ; nope, it's mode 1 (array of pointers to vertices) + jnz .deref_mode ; nope, it's mode 1 (array of pointers to vertices) femms ; we'll use MMX; clear MMX/3DX state @@ -129,7 +122,7 @@ __grDrawVertexList_3DNow_Window@20 PROC NEAR ;;; TRI_STRIP_BEGIN(type, vcount, vSize, pktype); -win_coords_loop_ND: +.win_coords_loop_ND: sub vertexCount, 15 ; vertexCount >= 15 ? CF=0 : CF=1 mov ecx, [gc + vertexSize] ; bytes of data for each vertex @@ -145,18 +138,18 @@ win_coords_loop_ND: add ecx, 4 ; add header size ==> total packet size cmp eax, ecx ; fifo space avail >= packet size ? - jge win_strip_begin_ND ; yup, start writing strip data + jge .win_strip_begin_ND ; yup, start writing strip data - push @Line ; line number inside this function + push __LINE__ ; line number inside this function push 0h ; pointer to function name = NULL push ecx ; fifo space needed - call __FifoMakeRoom ; note: updates fifoPtr + call _FifoMakeRoom ; note: updates fifoPtr lea esp, [esp+12] ; remove 3 DWORD arguments from stack -win_strip_begin_ND: +.win_strip_begin_ND: ;;; Setup packet header ;;; @@ -179,7 +172,7 @@ win_strip_begin_ND: lea dlpStart, [gc+tsuDataList] ; pointer to start of offset list test fifo, ebp ; fifoPtr QWORD aligned ? - jz fifo_aligned_ND ; yup + jz .fifo_aligned_ND ; yup mov [fifo], eax ; PCI write packet type add fifo, 4 ; fifo pointer now QWORD aligned @@ -197,7 +190,7 @@ win_strip_begin_ND: ;;; TRI_SETF(FARRAY(vPtr, 4)); ;;; i = gc->tsuDataList[dataElem]; -win_vertex_loop_ND_WB0: ; nothing in "write buffer" +.win_vertex_loop_ND_WB0: ; nothing in "write buffer" mov eax, [dlpStart] ; get first offset from offset list lea dlp, [dlpStart+4] ; point to start of offset list @@ -209,7 +202,7 @@ win_vertex_loop_ND_WB0: ; nothing in "write buffer" test eax, eax ; if offset == 0, end of list movq [fifo-8], mm1 ; PCI write x, y - jz win_datalist_end_ND_WB0 ; no more vertex data, nothing in "write buffer" + jz .win_datalist_end_ND_WB0 ; no more vertex data, nothing in "write buffer" ;;; while (i != GR_DLIST_END) { ;;; TRI_SETF(FARRAY(vPtr, i)); @@ -217,13 +210,13 @@ win_vertex_loop_ND_WB0: ; nothing in "write buffer" ;;; i = gc->tsuDataList[dataElem]; ;;; } -win_datalist_loop_ND_WB0: ; nothing in "write buffer" +.win_datalist_loop_ND_WB0: ; nothing in "write buffer" movd mm1, [vertex + eax] ; get next parameter mov eax, [dlp] ; get next offset from offset list test eax, eax ; at end of offset list (offset == 0) ? - jz win_datalist_end_ND_WB1 ; exit, write buffer contains one DWORD + jz .win_datalist_end_ND_WB1 ; exit, write buffer contains one DWORD movd mm2, [vertex + eax] ; get next parameter add dlp, 8 ; dlp++ @@ -235,17 +228,17 @@ win_datalist_loop_ND_WB0: ; nothing in "write buffer" punpckldq mm1, mm2 ; current param | previous param movq [fifo-8], mm1 ; PCI write current param | previous param - jnz win_datalist_loop_ND_WB0 ; nope, copy next parameter + jnz .win_datalist_loop_ND_WB0 ; nope, copy next parameter -win_datalist_end_ND_WB0: +.win_datalist_end_ND_WB0: mov eax, [strideinbytes] ; get offset to next vertex sub vertexCount, 1 ; another vertex done. Any left? lea vertex, [vertex + eax] ; points to next vertex - jnz win_vertex_loop_ND_WB0 ; yup, output next vertex + jnz .win_vertex_loop_ND_WB0 ; yup, output next vertex -win_vertex_end_ND_WB0: +.win_vertex_end_ND_WB0: ;;; TRI_END; ;;; Prepare for the next packet (if the strip size is longer than 15) @@ -273,7 +266,7 @@ win_vertex_end_ND_WB0: test vertexCount, vertexCount ; any vertices left to process ? nop ; filler - jg win_coords_loop_ND ; loop if number of vertices to process >= 0 + jg .win_coords_loop_ND ; loop if number of vertices to process >= 0 femms ; no more MMX code; clear MMX/FPU state @@ -286,11 +279,11 @@ win_vertex_end_ND_WB0: ret 20 ; return, pop 5 DWORD parameters off stack -fifo_aligned_ND: +.fifo_aligned_ND: movd mm1, eax ; move header into "write buffer" -win_vertex_loop_ND_WB1: ; one DWORD in "write buffer" +.win_vertex_loop_ND_WB1: ; one DWORD in "write buffer" movd mm2, [vertex + X] ; 0 | x of vertex add fifo, 8 ; fifoPtr += 2*sizeof(FxU32) @@ -305,7 +298,7 @@ win_vertex_loop_ND_WB1: ; one DWORD in "write buffer" movd mm1, [vertex + Y] ; 0 | y of vertex cmp eax, 0 ; offset == 0 (list empty) ? - jz win_datalist_end_ND_WB1 ; yup, no more vertex data, one DWORD in "write buffer" + jz .win_datalist_end_ND_WB1 ; yup, no more vertex data, one DWORD in "write buffer" ;;; while (i != GR_DLIST_END) { ;;; TRI_SETF(FARRAY(vPtr, i)); @@ -313,7 +306,7 @@ win_vertex_loop_ND_WB1: ; one DWORD in "write buffer" ;;; i = gc->tsuDataList[dataElem]; ;;; } -win_datalist_loop_ND_WB1: ; one DWORD in "write buffer" +.win_datalist_loop_ND_WB1: ; one DWORD in "write buffer" movd mm2, [vertex + eax] ; get next parameter add fifo, 8 ; fifoPtr += 2*sizeof(FxU32) @@ -325,23 +318,23 @@ win_datalist_loop_ND_WB1: ; one DWORD in "write buffer" cmp eax, 0 ; at end of offset list (offset == 0) ? movq [fifo-8], mm1 ; PCI write current param | previous param - jz win_datalist_end_ND_WB0 ; yes, exit, "write buffer" empty + jz .win_datalist_end_ND_WB0 ; yes, exit, "write buffer" empty movd mm1, [vertex+eax] ; get next parameter mov eax, [dlp-4] ; get next offset from offset list test eax, eax ; at end of offset list (offset == 0) ? - jnz win_datalist_loop_ND_WB1 ; nope, copy next parameter + jnz .win_datalist_loop_ND_WB1 ; nope, copy next parameter -win_datalist_end_ND_WB1: +.win_datalist_end_ND_WB1: mov eax, [strideinbytes] ; get offset to next vertex sub vertexCount, 1 ; another vertex done. Any left? lea vertex, [vertex + eax] ; points to next vertex - jnz win_vertex_loop_ND_WB1 ; yup, output next vertex + jnz .win_vertex_loop_ND_WB1 ; yup, output next vertex -win_vertex_end_ND_WB1: +.win_vertex_end_ND_WB1: movd [fifo], mm1 ; flush "write buffer" add fifo, 4 ; fifoPtr += sizeof(FxU32) @@ -372,7 +365,7 @@ win_vertex_end_ND_WB1: test vertexCount, vertexCount ; any vertices left to process ? nop ; filler - jg win_coords_loop_ND ; loop if number of vertices to process >= 0 + jg .win_coords_loop_ND ; loop if number of vertices to process >= 0 femms ; no more MMX code; clear MMX/FPU state @@ -386,13 +379,13 @@ win_vertex_end_ND_WB1: db 08dh,064h,024h,000h ; filler (not reachable) -deref_mode: +.deref_mode: femms ; we'll use MMX; clear FPU/MMX state prefetch [vertexPtr] ; pre-load first group of pointers -win_coords_loop_D: +.win_coords_loop_D: sub vertexCount, 15 ; vertexCount >= 15 ? CF=0 : CF=1 mov ecx, [gc + vertexSize] ; bytes of data for each vertex @@ -408,18 +401,18 @@ win_coords_loop_D: add ecx, 4 ; add header size ==> total packet size cmp eax, ecx ; fifo space avail >= packet size ? - jge win_strip_begin_D ; yup, start writing strip data + jge .win_strip_begin_D ; yup, start writing strip data - push @Line ; line number inside this function + push __LINE__ ; line number inside this function push 0h ; pointer to function name = NULL push ecx ; fifo space needed - call __FifoMakeRoom ; note: updates fifoPtr + call _FifoMakeRoom ; note: updates fifoPtr add esp, 12 ; remove 3 DWORD arguments from stack nop ; filler -win_strip_begin_D: +.win_strip_begin_D: ;;; Setup packet header ;;; @@ -442,7 +435,7 @@ win_strip_begin_D: lea dlpStart, [gc+tsuDataList] ; pointer to start of offset list test fifo, ebp ; fifoPtr QWORD aligned ? - jz fifo_aligned_D ; yup + jz .fifo_aligned_D ; yup mov [fifo], eax ; PCI write packet type add fifo, 4 ; fifo pointer now QWORD aligned @@ -461,7 +454,7 @@ win_strip_begin_D: ;;; i = gc->tsuDataList[dataElem]; -win_vertex_loop_D_WB0: ; nothing in "write buffer" +.win_vertex_loop_D_WB0: ; nothing in "write buffer" mov edx, [vertexPtr] ; dereference pointer, edx points to vertex add vertexPtr, 4 ; next pointer @@ -476,7 +469,7 @@ win_vertex_loop_D_WB0: ; nothing in "write buffer" movq [fifo-8], mm1 ; PCI write x, y cmp eax, 0 ; if offset == 0, end of offset list - je win_datalist_end_D_WB0 ; no more vertex data, nothing in "write buffer" + je .win_datalist_end_D_WB0 ; no more vertex data, nothing in "write buffer" ;;; while (i != GR_DLIST_END) { ;;; TRI_SETF(FARRAY(vPtr, i)); @@ -484,13 +477,13 @@ win_vertex_loop_D_WB0: ; nothing in "write buffer" ;;; i = gc->tsuDataList[dataElem]; ;;; } -win_datalist_loop_D_WB0: ; nothing in "write buffer" +.win_datalist_loop_D_WB0: ; nothing in "write buffer" movd mm1, [edx + eax] ; get next parameter mov eax, [dlp] ; get next offset from offset list cmp eax, 0 ; at end of offset list (offset == 0) ? - jz win_datalist_end_D_WB1 ; exit, write buffer contains one DWORD + jz .win_datalist_end_D_WB1 ; exit, write buffer contains one DWORD add dlp, 8 ; dlp++ movd mm2, [edx + eax] ; get next parameter @@ -502,14 +495,14 @@ win_datalist_loop_D_WB0: ; nothing in "write buffer" test eax, eax ; at end of offset list (offset == 0) ? movq [fifo-8], mm1 ; PCI write current param | previous param - jnz win_datalist_loop_D_WB0 ; nope, copy next parameter + jnz .win_datalist_loop_D_WB0 ; nope, copy next parameter -win_datalist_end_D_WB0: +.win_datalist_end_D_WB0: dec vertexCount ; another vertex done. Any left? - jnz win_vertex_loop_D_WB0 ; yup, output next vertex + jnz .win_vertex_loop_D_WB0 ; yup, output next vertex -win_vertex_end_D_WB0: +.win_vertex_end_D_WB0: ;;; TRI_END; ;;; Prepare for the next packet (if the strip size is longer than 15) @@ -537,7 +530,7 @@ win_vertex_end_D_WB0: test vertexCount, vertexCount ; any vertices left to process ? nop ; filler - jg win_coords_loop_D ; loop if number of vertices to process >= 0 + jg .win_coords_loop_D ; loop if number of vertices to process >= 0 femms ; no more MMX code; clear MMX/FPU state @@ -550,11 +543,11 @@ win_vertex_end_D_WB0: ret 20 ; return, pop 5 DWORD parameters off stack mov esp, esp ; filler (unreachable) -fifo_aligned_D: +.fifo_aligned_D: movd mm1, eax ; move header into "write buffer" -win_vertex_loop_D_WB1: ; one DWORD in "write buffer" +.win_vertex_loop_D_WB1: ; one DWORD in "write buffer" mov edx, [vertexPtr] ; dereference pointer, edx points to vertex add vertexPtr, 4 ; next pointer @@ -572,7 +565,7 @@ win_vertex_loop_D_WB1: ; one DWORD in "write buffer" movd mm1, [edx + Y] ; 0 | y of vertex cmp eax, 0 ; offset == 0 (list empty) ? - je win_datalist_end_D_WB1 ; yup, no more vertex data, one DWORD in "write buffer" + je .win_datalist_end_D_WB1 ; yup, no more vertex data, one DWORD in "write buffer" ;;; while (i != GR_DLIST_END) { ;;; TRI_SETF(FARRAY(vPtr, i)); @@ -580,7 +573,7 @@ win_vertex_loop_D_WB1: ; one DWORD in "write buffer" ;;; i = gc->tsuDataList[dataElem]; ;;; } -win_datalist_loop_D_WB1: ; one DWORD in "write buffer" = MM1 +.win_datalist_loop_D_WB1: ; one DWORD in "write buffer" = MM1 movd mm2, [edx + eax] ; get next parameter add fifo, 8 ; fifoPtr += 2*sizeof(FxU32) @@ -592,20 +585,20 @@ win_datalist_loop_D_WB1: ; one DWORD in "write buffer" = MM1 test eax, eax ; at end of offset list (offset == 0) ? movq [fifo-8], mm1 ; PCI write current param | previous param - jz win_datalist_end_D_WB0 ; yes, exit, "write buffer" empty + jz .win_datalist_end_D_WB0 ; yes, exit, "write buffer" empty movd mm1, [edx + eax] ; get next parameter mov eax, [dlp-4] ; get next offset from offset list test eax, eax ; at end of offset list (offset == 0) ? - jnz win_datalist_loop_D_WB1 ; nope, copy next parameter + jnz .win_datalist_loop_D_WB1 ; nope, copy next parameter -win_datalist_end_D_WB1: +.win_datalist_end_D_WB1: dec vertexCount ; another vertex done. Any left? - jnz win_vertex_loop_D_WB1 ; yup, output next vertex + jnz .win_vertex_loop_D_WB1 ; yup, output next vertex -win_vertex_end_D_WB1: +.win_vertex_end_D_WB1: movd [fifo], mm1 ; flush "write buffer" add fifo, 4 ; fifoPtr++ @@ -636,11 +629,11 @@ win_vertex_end_D_WB1: cmp vertexCount, 0 ; any vertices left to process ? mov [esp + _count], vertexCount; remaining number of vertices to process - jg win_coords_loop_D ; loop if number of vertices to process >= 0 + jg .win_coords_loop_D ; loop if number of vertices to process >= 0 femms ; no more MMX code; clear MMX/FPU state -strip_done: +.strip_done: pop ebp ; restore frame pointer pop ebx ; restore caller's register variable @@ -649,14 +642,13 @@ strip_done: ret 20 ; return, pop 5 DWORD parameters off stack -__grDrawVertexList_3DNow_Window@20 ENDP +endp - PUBLIC __grDrawVertexList_3DNow_Clip@20 -__grDrawVertexList_3DNow_Clip@20 PROC NEAR +proc _grDrawVertexList_3DNow_Clip, 20 ; 132 : { push edi ; save caller's register variable - mov gc, [__GlideRoot + curGC] ; get current graphics context + mov gc, [_GlideRoot + curGC] ; get current graphics context push esi ; save caller's register variable mov vertexCount, [esp+_count-8]; number of vertices in strip/fan @@ -668,7 +660,7 @@ __grDrawVertexList_3DNow_Clip@20 PROC NEAR ; get current vertex (non-deref mode) test vertexCount, vertexCount ; number of vertices <= 0 ? - jle strip_done ; yup, the strip/fan is done + jle .strip_done ; yup, the strip/fan is done ;;; vSize = gc->state.vData.vSize ;;; if (stride == 0) @@ -692,20 +684,20 @@ __grDrawVertexList_3DNow_Clip@20 PROC NEAR test edx, edx ; mode 0 (array of vertices) ? mov edx, [gc + vertexStride] ; get stride in DWORDs - movd mm6, [__GlideRoot+pool_f255]; GlideRoot.pool.f255 - mov [strideinbytes], 4 ; array of pointers + movd mm6, [_GlideRoot+pool_f255]; GlideRoot.pool.f255 + mov dword [strideinbytes], 4 ; array of pointers - jnz clip_coords_begin ; nope, it's mode 1 + jnz .clip_coords_begin ; nope, it's mode 1 -clip_coordinates_ND: +.clip_coordinates_ND: shl edx, 2 ; stride in bytes mov [strideinbytes], edx ; save off stride (in bytes) align 32 -clip_coords_begin: +.clip_coords_begin: -dataElem textequ ; number of vertex components processed +%define dataElem ebp ; number of vertex components processed ;;; { ;;; float oow; @@ -727,17 +719,17 @@ dataElem textequ ; number of vertex components processed nop ; filler cmp eax, ecx ; fifo space avail >= packet size ? - jge clip_strip_begin ; yup, start writing strip data + jge .clip_strip_begin ; yup, start writing strip data - push @Line ; line number inside this function + push __LINE__ ; line number inside this function push 0h ; pointer to function name = NULL push ecx ; fifo space needed - call __FifoMakeRoom ; note: updates fifoPtr + call _FifoMakeRoom ; note: updates fifoPtr add esp, 12 ; remove 3 DWORD arguments from stack -clip_strip_begin: +.clip_strip_begin: ;;; TRI_STRIP_BEGIN(type, vcount, vSize, pktype) @@ -763,7 +755,7 @@ clip_strip_begin: ;;; float *vPtr ;;; vPtr = pointers -clip_for_begin: +.clip_for_begin: ;;; if (mode) ;;; vPtr = *(float **)vPtr @@ -775,12 +767,12 @@ clip_for_begin: test eax, eax ; deref mode ? mov eax, [gc+wInfo_offset] ; get offset of W into vertex struct - jz clip_noderef ; yup, no-deref mode + jz .clip_noderef ; yup, no-deref mode mov edx, [vertexPtr] ; vertex = *vertexPtr lea esp, [esp] ; filler -clip_noderef: +.clip_noderef: ;;; oow = 1.0f / FARRAY(vPtr, gc->state.vData.wInfo.offset) @@ -821,13 +813,13 @@ clip_noderef: ;;; TRI_VP_SETFS(vPtr, oow); movq [fifo-8], mm2 ; PCI write transformed x, y - jz clip_setup_ooz ; nope, no color at all needed + jz .clip_setup_ooz ; nope, no color at all needed - cmp DWORD PTR [gc+colorType], 0; gc->state.vData.colorType == GR_FLOAT ? - jne clip_setup_pargb ; nope, packed ARGB format + cmp dword [gc+colorType], 0; gc->state.vData.colorType == GR_FLOAT ? + jne .clip_setup_pargb ; nope, packed ARGB format test esi, 1 ; STATE_REQUIRES_IT_DRGB ? - jz clip_setup_a ; no, but definitely A + jz .clip_setup_a ; no, but definitely A movd mm2, [edx + eax] ; 0 | r mov eax, [gc+tsuDataList+4] ; offset of g part of vertex data @@ -851,9 +843,9 @@ clip_noderef: lea fifo, [fifo+12] ; fifoPtr += 3*sizeof(FxFloat) movd [fifo-4], mm2 ; PCI write b*255 - jz clip_setup_ooz ; nope, no alpha, proceeed with ooz + jz .clip_setup_ooz ; nope, no alpha, proceeed with ooz -clip_setup_a: +.clip_setup_a: movd mm2, [eax+edx] ; 0 | a add fifo, 4 ; fifoPtr += sizeof(FxFloat) @@ -864,11 +856,11 @@ clip_setup_a: mov eax, [gc+dataElem+tsuDataList]; offset of next part of vertex data movd [fifo-4], mm2 ; PCI write a*255 - jmp clip_setup_ooz ; check whether we need to push out z + jmp .clip_setup_ooz ; check whether we need to push out z ALIGN 32 -clip_setup_pargb: +.clip_setup_pargb: movd mm2, [eax+edx] ; get packed ARGB data add fifo, 4 ; fifoPtr += sizeof(FxU32) @@ -877,10 +869,10 @@ clip_setup_pargb: movd [fifo-4], mm2 ; PCI write packed ARGB -clip_setup_ooz: +.clip_setup_ooz: test esi, 4 ; STATE_REQUIRES_OOZ ? - jz clip_setup_qow ; nope + jz .clip_setup_qow ; nope movd mm2, [eax+edx] ; 0 | z component of vertex add fifo, 4 ; fifoPtr += sizeof(FxFloat) @@ -897,12 +889,12 @@ clip_setup_ooz: pfadd mm2, mm4 ; 0 | TRI_SETF(FARRAY(_s, i)*_oow*gc->state.Viewport.hdepth+gc->state.Viewport.oz movd [fifo-4], mm2 ; PCI write transformed Z -clip_setup_qow: +.clip_setup_qow: test esi, 8 ; STATE_REQUIRES_OOW_FBI ? - jz clip_setup_qow0 ; nope + jz .clip_setup_qow0 ; nope - cmp DWORD PTR [gc+qInfo_mode],0; does vertex have Q component ? - je clip_setup_oow ; nope, not Q but W + cmp dword [gc+qInfo_mode],0; does vertex have Q component ? + je .clip_setup_oow ; nope, not Q but W add fifo, 4 ; fifoPtr += sizeof(FxFloat) mov eax, [gc+qInfo_offset] ; offset of Q component of vertex @@ -914,23 +906,23 @@ clip_setup_qow: pfmul mm2, mm0 ; q*oow movd [fifo-4], mm2 ; PCI write transformed Q - jmp clip_setup_qow0 ; continue with q0 + jmp .clip_setup_qow0 ; continue with q0 ALIGN 32 -clip_setup_oow: +.clip_setup_oow: add fifo, 4 ; fifoPtr += sizeof(FxFloat) add dataElem, 4 ; dataElem++ movd [fifo-4], mm0 ; PCI write oow mov eax,[gc+dataElem+tsuDataList]; pointer to next vertex component -clip_setup_qow0: +.clip_setup_qow0: test esi, 16 ; STATE_REQUIRES_W_TMU0 ? - jz clip_setup_stow0 ; nope + jz .clip_setup_stow0 ; nope - cmp DWORD PTR [gc+q0Info_mode],0; does vertex have Q component ? - je clip_setup_oow0 ; nope, not Q but W + cmp dword [gc+q0Info_mode],0; does vertex have Q component ? + je .clip_setup_oow0 ; nope, not Q but W mov eax, [gc+q0Info_offset] ; offset of Q component of vertex add fifo, 4 ; fifoPtr += sizeof(FxFloat) @@ -942,21 +934,21 @@ clip_setup_qow0: pfmul mm2, mm0 ; q0*oow movd [fifo-4], mm2 ; PCI write transformed q0 - jmp clip_setup_stow0 ; continue with stow0 + jmp .clip_setup_stow0 ; continue with stow0 ALIGN 32 -clip_setup_oow0: +.clip_setup_oow0: add fifo, 4 ; fifoPtr += sizeof(FxFloat) add dataElem, 4 ; dataElem++ movd [fifo-4], mm0 ; PCI write oow mov eax,[gc+dataElem+tsuDataList]; pointer to next vertex component -clip_setup_stow0: +.clip_setup_stow0: test esi, 32 ; STATE_REQUIRES_ST_TMU0 ? - jz clip_setup_qow1 ; nope + jz .clip_setup_qow1 ; nope movq mm7, [gc + tmu0_s_scale] ; state.tmu_config[0].t_scale | state.tmu_config[0].s_scale add fifo, 8 ; fifoPtr += 2*sizeof(FxFloat) @@ -975,12 +967,12 @@ clip_setup_stow0: movq [fifo-8], mm2 ; PCI write param2*oow*tmu0_t_scale | param1*oow*tmu0_s_scale mov eax, [gc+dataElem+tsuDataList]; pointer to next vertex component -clip_setup_qow1: +.clip_setup_qow1: test esi, 64 ; STATE_REQUIRES_W_TMU1 ? - jz clip_setup_stow1 ; nope + jz .clip_setup_stow1 ; nope - cmp DWORD PTR [gc+q1Info_mode],0; does vertex have Q component ? - je clip_setup_oow1 ; nope, not Q but W + cmp dword [gc+q1Info_mode],0; does vertex have Q component ? + je .clip_setup_oow1 ; nope, not Q but W mov eax, [gc+q1Info_offset] ; offset of Q component of vertex add fifo, 4 ; fifoPtr += sizeof(FxFloat) @@ -992,24 +984,24 @@ clip_setup_qow1: pfmul mm2, mm0 ; q1*oow movd [fifo-4], mm2 ; PCI write transformed q1 - jmp clip_setup_stow1 ; continue with stow1 + jmp .clip_setup_stow1 ; continue with stow1 ALIGN 32 -clip_setup_oow1: +.clip_setup_oow1: add fifo, 4 ; fifoPtr += sizeof(FxFloat) add dataElem, 4 ; dataElem++ movd [fifo-4], mm0 ; PCI write oow mov eax,[gc+dataElem+tsuDataList]; pointer to next vertex component -clip_setup_stow1: +.clip_setup_stow1: test esi, 128 ; STATE_REQUIRES_ST_TMU1 ? mov vertexCount, [vertices] ; get number of vertices movq mm7, [gc + tmu1_s_scale] ; state.tmu_config[1].t_scale | state.tmu_config[1].s_scale - jz clip_setup_end ; nope + jz .clip_setup_end ; nope movd mm2, [edx+eax] ; param1 add fifo, 8 ; fifoPtr += 2*sizeof(FxFloat) @@ -1023,13 +1015,13 @@ clip_setup_stow1: pfmul mm2, mm7 ; param2*oow*state.tmu_config[1].t_scale | param1*oow*state.tmu_config[1].s_scale movq [fifo-8], mm2 ; PCI write param2*oow*state.tmu_config[1].t_scale | param1*oow*state.tmu_config[1].s_scale -clip_setup_end: +.clip_setup_end: ; 206 : for (k = 0; k < vcount; k++) { dec vertexCount ; vcount-- - jnz clip_for_begin ; until -clip_for_end: + jnz .clip_for_begin ; until +.clip_for_end: ; 221 : } ; 222 : TRI_END; @@ -1052,12 +1044,12 @@ clip_for_end: mov [esp + _count], vertexCount; remaining number of vertices to process cmp vertexCount, 0 ; any vertices left to process ? - mov DWORD PTR [esp+_pktype], 16; pktype = SSTCP_PKT3_DDDDDD (strip continuation) - jg clip_coords_begin ; loop if number of vertices to process >= 0 + mov dword [esp+_pktype], 16; pktype = SSTCP_PKT3_DDDDDD (strip continuation) + jg .clip_coords_begin ; loop if number of vertices to process >= 0 femms ; no more MMX code; clear MMX/FPU state -strip_done: +.strip_done: ;;; } ;;; #undef FN_NAME ;;; } /* _grDrawVertexList */ @@ -1070,26 +1062,25 @@ strip_done: ret 20 ; return, pop 5 DWORD parameters off stack -__grDrawVertexList_3DNow_Clip@20 ENDP +endp ALIGN 32 -_a$ TEXTEQU <20> -_b$ TEXTEQU <24> +%define _a$ 20 +%define _b$ 24 -gc TEXTEQU -vb TEXTEQU -va TEXTEQU -i TEXTEQU -j TEXTEQU -dlp TEXTEQU -fifo TEXTEQU -dlpStart TEXTEQU -ADX TEXTEQU <__GlideRoot+pool_fTemp1> -ADY TEXTEQU <__GlideRoot+pool_fTemp2> +%define gc esi +%define vb edi +%define va ebx +%define i ebp +%define j edx +%define dlp edx +%define fifo ecx +%define dlpStart ebp +%define ADX _GlideRoot+pool_fTemp1 +%define ADY _GlideRoot+pool_fTemp2 - PUBLIC __grDrawTextureLine_3DNow@8 -__grDrawTextureLine_3DNow@8 PROC NEAR +proc _grDrawTextureLine_3DNow, 8 ; 227 : { ; 228 : #define FN_NAME "grDrawTextureLine" @@ -1102,16 +1093,16 @@ __grDrawTextureLine_3DNow@8 PROC NEAR ; 235 : GR_FLUSH_STATE(); push esi ; save caller's register variable - mov gc, [__GlideRoot + curGC] ; get current graphics context + mov gc, [_GlideRoot + curGC] ; get current graphics context push edi ; save caller's register variable - mov vb, _b$[esp-8] ; b + mov vb, [_b$ + esp-8] ; b push ebx ; save caller's register variable push ebp ; save caller's frame pointer femms ; we'll use MMX; empty FPU/MMX state - mov va, _a$[esp] ; a + mov va, [_a$ + esp] ; a ; 236 : ; 237 : { @@ -1150,7 +1141,7 @@ __grDrawTextureLine_3DNow@8 PROC NEAR mov i, [ADY] ; i = *(long *)&ADY test i, i ; i < 0 ? - jge $dont_swap_ij ; nope, no need to swap i and j + jge .dont_swap_ij ; nope, no need to swap i and j xor va, vb ; va ^ vb xor vb, va ; vb ^ (va ^ vb) = va @@ -1158,7 +1149,7 @@ __grDrawTextureLine_3DNow@8 PROC NEAR xor va, vb ; (va ^ vb) ^ va = vb xor i, 80000000h ; i ^= 0x80000000 -$dont_swap_ij: +.dont_swap_ij: ; 253 : ; 254 : DX = FARRAY(b, 0) - FARRAY(a, 0); @@ -1176,32 +1167,32 @@ $dont_swap_ij: and j, 7fffffffh ; j = abs(j) cmp j, i ; j < i ? - jl $j_lt_i ; yup + jl .j_lt_i ; yup test j, j ; j == 0 ? - jz $line_all_done ; yup, nothing to draw + jz .line_all_done ; yup, nothing to draw -$j_lt_i: +.j_lt_i: ; 264 : vSize = gc->state.vData.vSize + 8; ; 265 : GR_SET_EXPECTED_SIZE((vSize<< 2), 1); - lea eax, DWORD PTR [eax*4+36] ; we have vertices + 4 bytes for header + lea eax, [eax*4+36] ; we have vertices + 4 bytes for header cmp ecx, eax ; fifo room avail >= fifo room required ? - jge $enough_fifo_room ; yup, sufficient fifo room + jge .enough_fifo_room ; yup, sufficient fifo room push j ; preserve j - push @Line ; line number in source file + push __LINE__ ; line number in source file push 0 ; pointer to filename = NULL push eax ; fifo space required - call __FifoMakeRoom ; allocate new fifo space (modified fifoPtr) + call _FifoMakeRoom ; allocate new fifo space (modified fifoPtr) add esp, 12 ; pop 3 DWORD parameters off stack pop j ; restore j -$enough_fifo_room: +.enough_fifo_room: ; 266 : TRI_STRIP_BEGIN(kSetupCullDisable | kSetupStrip, 4, vSize, ; 267 : SSTCP_PKT3_BDDDDD | (1<<15)); @@ -1211,11 +1202,11 @@ $enough_fifo_room: mov fifo, [gc+fifoPtr] ; gc->cmdTransportInfo.fifoPtr pxor mm3, mm3 ; 0 | 0 - movd mm2, [__GlideRoot+pool_fHalf] ; 0 | _GlideRoot.pool.fHalf + movd mm2, [_GlideRoot+pool_fHalf] ; 0 | _GlideRoot.pool.fHalf movq mm4, [_F256_F256] ; 256.0f | 256.0f test fifo, 4 ; fifo QWORD aligned ? - jz $drawline_fifo_aligned ; yup + jz .drawline_fifo_aligned ; yup add fifo, 4 ; fifoPtr += sizeof(FxU32) mov eax, [gc+cullStripHdr] ; gc->cmdTransportInfo.cullStripHdr @@ -1227,7 +1218,7 @@ $enough_fifo_room: cmp j, i ; j < i ? mov [fifo-4], eax ; PCI write header (fifo now aligned) - jl $j_lt_i2_WB0 ; yup, j < i + jl .j_lt_i2_WB0 ; yup, j < i ; 270 : TRI_SETF(FARRAY(b, 0)); ; 271 : dataElem = 0; @@ -1252,14 +1243,14 @@ $enough_fifo_room: movq [fifo-8], mm6 ; PCI write FARRAY(b,4)-_GlideRoot.pool.fHalf | FARRAY(b,0) test eax, eax ; i != GR_DLIST_END ? - je $vertex_loop1_done_WB0 ; i == GR_DLIST_END, no further components need to be pushed out + je .vertex_loop1_done_WB0 ; i == GR_DLIST_END, no further components need to be pushed out -$vertex_loop1_WB0: +.vertex_loop1_WB0: movd mm7, [vb + eax] ; FARRAY(b,i) mov eax, [dlp] ; get next offset from offset list test eax, eax ; offset == 0 (end of offset list) ? - jz $vertex_loop1_done_WB1 ; nope, output next vertex component + jz .vertex_loop1_done_WB1 ; nope, output next vertex component movd mm6, [vb + eax] ; FARRAY(b,i) add dlp, 8 ; point to next entry in offset list @@ -1271,9 +1262,9 @@ $vertex_loop1_WB0: test eax, eax ; offset == 0 (end of offset list) ? movq [fifo-8], mm7 ; PCI write current component | previous component - jnz $vertex_loop1_WB0 ; offset != 0, process next component + jnz .vertex_loop1_WB0 ; offset != 0, process next component -$vertex_loop1_done_WB0: +.vertex_loop1_done_WB0: ; 279 : TRI_SETF(0.f); ; 280 : TRI_SETF(0.f); @@ -1301,14 +1292,14 @@ $vertex_loop1_done_WB0: movq [fifo-8], mm6 ; PCI write FARRAY(a,4)-_GlideRoot.pool.fHalf | FARRAY(a,0) cmp eax, 0 ; i == GR_DLIST_END ? - je $vertex_loop2_done_WB0 ; i == GR_DLIST_END, no further components need to be pushed out + je .vertex_loop2_done_WB0 ; i == GR_DLIST_END, no further components need to be pushed out -$vertex_loop2_WB0: +.vertex_loop2_WB0: movd mm7, [va + eax] ; FARRAY(a,i) mov eax, [dlp] ; get next offset from offset list test eax, eax ; offset == 0 (end of offset list) ? - jz $vertex_loop2_done_WB1 ; nope, output next vertex component + jz .vertex_loop2_done_WB1 ; nope, output next vertex component movd mm6, [va + eax] ; FARRAY(a,i) add dlp, 8 ; point to next entry in offset list @@ -1320,9 +1311,9 @@ $vertex_loop2_WB0: test eax, eax ; offset == 0 (end of offset list) ? movq [fifo-8], mm7 ; PCI write current component | previous component - jnz $vertex_loop2_WB0 ; offset != 0, process next component + jnz .vertex_loop2_WB0 ; offset != 0, process next component -$vertex_loop2_done_WB0: +.vertex_loop2_done_WB0: ; 291 : TRI_SETF(0.f); ; 292 : TRI_SETF(0.f); @@ -1350,14 +1341,14 @@ $vertex_loop2_done_WB0: test eax, eax ; i != GR_DLIST_END ? movq [fifo-8], mm6 ; PCI write FARRAY(b,4)+_GlideRoot.pool.fHalf | FARRAY(b,0) - je $vertex_loop3_done_WB0 ; i == GR_DLIST_END, no further components need to be pushed out + je .vertex_loop3_done_WB0 ; i == GR_DLIST_END, no further components need to be pushed out -$vertex_loop3_WB0: +.vertex_loop3_WB0: movd mm7, [vb + eax] ; FARRAY(b, i) mov eax, [dlp] ; get next offset from offset list test eax, eax ; offset == 0 (end of offset list) ? - jz $vertex_loop3_done_WB1 ; nope, output next vertex component + jz .vertex_loop3_done_WB1 ; nope, output next vertex component movd mm6, [vb + eax] ; FARRAY(b, i) add dlp, 8 ; point to next entry in offset list @@ -1369,9 +1360,9 @@ $vertex_loop3_WB0: cmp eax, 0 ; offset == 0 (end of offset list) ? movq [fifo-8], mm7 ; PCI write current component | previous component - jne $vertex_loop3_WB0 ; offset != 0, process next component + jne .vertex_loop3_WB0 ; offset != 0, process next component -$vertex_loop3_done_WB0: +.vertex_loop3_done_WB0: ; 303 : TRI_SETF(256.f); ; 304 : TRI_SETF(0.f); @@ -1401,14 +1392,14 @@ $vertex_loop3_done_WB0: movq [fifo-8], mm6 ; PCI write FARRAY(a,4)+_GlideRoot.pool.fHalf | FARRAY(a,0) test eax, eax ; i != GR_DLIST_END ? - jz $vertex_loop4_done_WB0 ; i == GR_DLIST_END, done with all four vertices + jz .vertex_loop4_done_WB0 ; i == GR_DLIST_END, done with all four vertices -$vertex_loop4_WB0: +.vertex_loop4_WB0: movd mm7, [va + eax] ; FARRAY(a, i) mov eax, [dlp] ; get next offset from offset list test eax, eax ; offset == 0 (end of offset list) ? - jz $vertex_loop4_done_WB1 ; nope, output next vertex component + jz .vertex_loop4_done_WB1 ; nope, output next vertex component movd mm6, [va + eax] ; FARRAY(a, i) add dlp, 8 ; point to next entry in offset list @@ -1420,16 +1411,16 @@ $vertex_loop4_WB0: test eax, eax ; offset == 0 (end of offset list) ? movq [fifo-8], mm7 ; PCI write current component | previous component - jnz $vertex_loop4_WB0 ; offset != 0, process next component + jnz .vertex_loop4_WB0 ; offset != 0, process next component -$vertex_loop4_done_WB0: +.vertex_loop4_done_WB0: add fifo, 8 ; fifoPtr += 2*sizeof(FxFloat) lea esp, [esp] ; filler movq [fifo-8], mm4 ; PCI write TRI_SETF(0.f) | TRI_SETF(256.f) - jmp $line_done_WB0 ; last vertex done + jmp .line_done_WB0 ; last vertex done -$j_lt_i2_WB0: +.j_lt_i2_WB0: ; 317 : } else { /* y major */ ; 318 : TRI_SETF(FARRAY(b, 0) - _GlideRoot.pool.fHalf); @@ -1455,14 +1446,14 @@ $j_lt_i2_WB0: movq [fifo-8], mm6 ; PCI write FARRAY(b,4)-_GlideRoot.pool.fHalf | FARRAY(b,0) cmp eax, 0 ; i != GR_DLIST_END ? - je $vertex_loop5_done_WB0 ; i == GR_DLIST_END, no more components in vertex + je .vertex_loop5_done_WB0 ; i == GR_DLIST_END, no more components in vertex -$vertex_loop5_WB0: +.vertex_loop5_WB0: movd mm7, [vb + eax] ; FARRAY(b,i) mov eax, [dlp] ; get next offset from offset list test eax, eax ; offset == 0 (end of offset list) ? - jz $vertex_loop5_done_WB1 ; nope, output next vertex component + jz .vertex_loop5_done_WB1 ; nope, output next vertex component movd mm6, [vb + eax] ; FARRAY(b,i) add dlp, 8 ; point to next entry in offset list @@ -1474,9 +1465,9 @@ $vertex_loop5_WB0: test eax, eax ; offset == 0 (end of offset list) ? movq [fifo-8], mm7 ; PCI write current component | previous component - jnz $vertex_loop5_WB0 ; offset != 0, process next component + jnz .vertex_loop5_WB0 ; offset != 0, process next component -$vertex_loop5_done_WB0: +.vertex_loop5_done_WB0: ; 327 : TRI_SETF(0.f); ; 328 : TRI_SETF(0.f); @@ -1504,14 +1495,14 @@ $vertex_loop5_done_WB0: movq [fifo-8], mm6 ; PCI write FARRAY(a,4) | FARRAY(a,0)-_GlideRoot.pool.fHalf test eax, eax ; i != GR_DLIST_END ? - jz $vertex_loop6_done_WB0 ; i == GR_DLIST_END, no further components need to be pushed out + jz .vertex_loop6_done_WB0 ; i == GR_DLIST_END, no further components need to be pushed out -$vertex_loop6_WB0: +.vertex_loop6_WB0: movd mm7, [va + eax] ; get next vertex component mov eax, [dlp] ; get next offset from offset list cmp eax, 0 ; offset == 0 (end of offset list) ? - je $vertex_loop6_done_WB1 ; nope, output next vertex component + je .vertex_loop6_done_WB1 ; nope, output next vertex component movd mm6, [va + eax] ; get next vertex component add dlp, 8 ; point to next entry in offset list @@ -1523,9 +1514,9 @@ $vertex_loop6_WB0: test eax, eax ; offset == 0 (end of offset list) ? movq [fifo-8], mm7 ; PCI write current component | previous component - jnz $vertex_loop6_WB0 ; offset != 0, process next component + jnz .vertex_loop6_WB0 ; offset != 0, process next component -$vertex_loop6_done_WB0: +.vertex_loop6_done_WB0: ; 339 : TRI_SETF(0.f); ; 340 : TRI_SETF(0.f); @@ -1553,15 +1544,15 @@ $vertex_loop6_done_WB0: movq [fifo-8], mm6 ; PCI write FARRAY(b,4) | FARRAY(b,0)+_GlideRoot.pool.fHalf test eax, eax ; i != GR_DLIST_END ? - jz $vertex_loop7_done_WB0 ; i == GR_DLIST_END, no further components need to be pushed out + jz .vertex_loop7_done_WB0 ; i == GR_DLIST_END, no further components need to be pushed out -$vertex_loop7_WB0: +.vertex_loop7_WB0: movd mm7, [vb + eax] ; FARRAY(b, i) mov eax, [dlp] ; get next offset from offset list test eax, eax ; offset == 0 (end of offset list) ? - jz $vertex_loop7_done_WB1 ; nope, output next vertex component + jz .vertex_loop7_done_WB1 ; nope, output next vertex component movd mm6, [vb + eax] ; FARRAY(b, i) add dlp, 8 ; point to next entry in offset list @@ -1573,9 +1564,9 @@ $vertex_loop7_WB0: test eax, eax ; offset == 0 (end of offset list) ? movq [fifo-8], mm7 ; PCI write current component | previous component - jnz $vertex_loop7_WB0 ; offset != 0, process next component + jnz .vertex_loop7_WB0 ; offset != 0, process next component -$vertex_loop7_done_WB0: +.vertex_loop7_done_WB0: ; 351 : TRI_SETF(256.f); ; 352 : TRI_SETF(0.f); @@ -1605,14 +1596,14 @@ $vertex_loop7_done_WB0: movq [fifo-8], mm6 ; PCI write FARRAY(a,4)+_GlideRoot.pool.fHalf | FARRAY(a,0) test eax, eax ; i != GR_DLIST_END ? - jz $vertex_loop8_done_WB0 ; i == GR_DLIST_END, no further components need to be pushed out + jz .vertex_loop8_done_WB0 ; i == GR_DLIST_END, no further components need to be pushed out -$vertex_loop8_WB0: +.vertex_loop8_WB0: movd mm7, [va + eax] ; FARRAY(a, i) mov eax, [dlp] ; get next offset from offset list cmp eax, 0 ; offset == 0 (end of offset list) ? - je $vertex_loop8_done_WB1 ; nope, output next vertex component + je .vertex_loop8_done_WB1 ; nope, output next vertex component movd mm6, [va + eax] ; FARRAY(a, i) add dlp, 8 ; point to next entry in offset list @@ -1624,17 +1615,17 @@ $vertex_loop8_WB0: movq [fifo-8], mm7 ; PCI write current component | previous component test eax, eax ; offset == 0 (end of offset list) ? - jnz $vertex_loop8_WB0 ; offset != 0, process next component + jnz .vertex_loop8_WB0 ; offset != 0, process next component -$vertex_loop8_done_WB0: +.vertex_loop8_done_WB0: movq [fifo], mm4 ; PCI write TRI_SETF(0.f) | TRI_SETF(256.f) add fifo, 8 ; fifoPtr += 2*sizeof(FxFloat) nop ; filler - jmp $line_done_WB0 ; done with line + jmp .line_done_WB0 ; done with line -$drawline_fifo_aligned: +.drawline_fifo_aligned: mov eax, [gc+cullStripHdr] ; gc->cmdTransportInfo.cullStripHdr psllq mm2, 32 ; _GlideRoot.pool.fHalf | 0 @@ -1645,7 +1636,7 @@ $drawline_fifo_aligned: movd mm7, eax ; move header to "write buffer" = MM7 cmp j, i ; j < i ? - jl $j_lt_i2_WB1 ; yup, j < i + jl .j_lt_i2_WB1 ; yup, j < i ; 270 : TRI_SETF(FARRAY(b, 0)); ; 271 : dataElem = 0; @@ -1673,9 +1664,9 @@ $drawline_fifo_aligned: movq mm7, mm6 ; FARRAY(b,4)-_GlideRoot.pool.fHalf | FARRAY(b,0) punpckhdq mm7, mm7 ; write buffer = FARRAY(b,4)-_GlideRoot.pool.fHalf - jz $vertex_loop1_done_WB1 ; i == GR_DLIST_END, no further components need to be pushed out + jz .vertex_loop1_done_WB1 ; i == GR_DLIST_END, no further components need to be pushed out -$vertex_loop1_WB1: +.vertex_loop1_WB1: movd mm6, [vb + eax] ; FARRAY(b,i) add fifo, 8 ; fifoPtr += 2*sizeof(FxFloat) @@ -1686,15 +1677,15 @@ $vertex_loop1_WB1: test eax, eax ; i == GR_DLIST_END ? movq [fifo-8], mm7 ; PCI write current component | previous component - jz $vertex_loop1_done_WB0 ; yup, i == GR_DLIST_END + jz .vertex_loop1_done_WB0 ; yup, i == GR_DLIST_END movd mm7, [vb + eax] ; FARRAY(b,i) mov eax, [dlp-4] ; get next offset from offset list cmp eax, 0 ; i == GR_DLIST_END ? - jne $vertex_loop1_WB1 ; nope, more components to handle + jne .vertex_loop1_WB1 ; nope, more components to handle -$vertex_loop1_done_WB1: +.vertex_loop1_done_WB1: ; 279 : TRI_SETF(0.f); ; 280 : TRI_SETF(0.f); @@ -1728,9 +1719,9 @@ $vertex_loop1_done_WB1: lea dlp, [dlpStart+4] ; point to next entry in offset list test eax, eax ; i == GR_DLIST_END ? - je $vertex_loop2_done_WB1 ; i == GR_DLIST_END, no further components need to be pushed out + je .vertex_loop2_done_WB1 ; i == GR_DLIST_END, no further components need to be pushed out -$vertex_loop2_WB1: +.vertex_loop2_WB1: movd mm6, [va + eax] ; FARRAY(a,i) add fifo, 8 ; fifoPtr += 2*sizeof(FxFloat) @@ -1741,15 +1732,15 @@ $vertex_loop2_WB1: test eax, eax ; i == GR_DLIST_END ? movq [fifo-8], mm7 ; PCI write current component | previous component - jz $vertex_loop2_done_WB0 ; yup, i == GR_DLIST_END + jz .vertex_loop2_done_WB0 ; yup, i == GR_DLIST_END movd mm7, [va + eax] ; FARRAY(a,i) mov eax, [dlp-4] ; get next offset from offset list test eax, eax ; i == GR_DLIST_END ? - jnz $vertex_loop2_WB1 ; nope, more components to handle + jnz .vertex_loop2_WB1 ; nope, more components to handle -$vertex_loop2_done_WB1: +.vertex_loop2_done_WB1: ; 291 : TRI_SETF(0.f); ; 292 : TRI_SETF(0.f); @@ -1783,9 +1774,9 @@ $vertex_loop2_done_WB1: lea dlp, [dlpStart+4] ; point to next entry in offset list test eax, eax ; i == GR_DLIST_END ? - jz $vertex_loop3_done_WB1 ; yup, i == GR_DLIST_END, no further components need to be pushed out + jz .vertex_loop3_done_WB1 ; yup, i == GR_DLIST_END, no further components need to be pushed out -$vertex_loop3_WB1: +.vertex_loop3_WB1: movd mm6, [vb + eax] ; FARRAY(b,i) add fifo, 8 ; fifoPtr += 2*sizeof(FxFloat) @@ -1796,15 +1787,15 @@ $vertex_loop3_WB1: test eax, eax ; i == GR_DLIST_END ? movq [fifo-8], mm7 ; PCI write current component | previous component - jz $vertex_loop3_done_WB0 ; yup, i == GR_DLIST_END + jz .vertex_loop3_done_WB0 ; yup, i == GR_DLIST_END movd mm7, [vb + eax] ; FARRAY(b,i) mov eax, [dlp-4] ; get next offset from offset list cmp eax, 0 ; i == GR_DLIST_END ? - jnz $vertex_loop3_WB1 ; nope, more components to handle + jnz .vertex_loop3_WB1 ; nope, more components to handle -$vertex_loop3_done_WB1: +.vertex_loop3_done_WB1: ; 303 : TRI_SETF(256.f); ; 304 : TRI_SETF(0.f); @@ -1840,9 +1831,9 @@ $vertex_loop3_done_WB1: lea dlp, [dlpStart+4] ; point to next entry in offset list test eax, eax ; i == GR_DLIST_END ? - jz $vertex_loop4_WB1 ; i == GR_DLIST_END, done with all four vertices + jz .vertex_loop4_WB1 ; i == GR_DLIST_END, done with all four vertices -$vertex_loop4_WB1: +.vertex_loop4_WB1: movd mm6, [va + eax] ; FARRAY(a,i) add fifo, 8 ; fifoPtr += 2*sizeof(FxFloat) @@ -1853,15 +1844,15 @@ $vertex_loop4_WB1: test eax, eax ; i == GR_DLIST_END ? movq [fifo-8], mm7 ; PCI write current component | previous component - jz $vertex_loop4_done_WB0 ; yup, i == GR_DLIST_END + jz .vertex_loop4_done_WB0 ; yup, i == GR_DLIST_END movd mm7, [va + eax] ; FARRAY(a,i) mov eax, [dlp-4] ; get next offset from offset list test eax, eax ; i == GR_DLIST_END ? - jnz $vertex_loop4_WB1 ; nope, more components to handle + jnz .vertex_loop4_WB1 ; nope, more components to handle -$vertex_loop4_done_WB1: +.vertex_loop4_done_WB1: add fifo, 8 ; fifoPtr += 2*sizeof(FxFloat) punpckldq mm7, mm4 ; previous component | TRI_SETF(256.f) @@ -1869,9 +1860,9 @@ $vertex_loop4_done_WB1: movq mm7, mm4 ; TRI_SETF(0.f) | TRI_SETF(256.f) punpckhdq mm7, mm7 ; write buffer = TRI_SETF(0.f) - jmp $line_done_WB1 ; last vertex done + jmp .line_done_WB1 ; last vertex done -$j_lt_i2_WB1: +.j_lt_i2_WB1: ; 317 : } else { /* y major */ ; 318 : TRI_SETF(FARRAY(b, 0) - _GlideRoot.pool.fHalf); @@ -1903,9 +1894,9 @@ $j_lt_i2_WB1: cmp eax, 0 ; i != GR_DLIST_END ? nop ; filler - je $vertex_loop5_done_WB1 ; i == GR_DLIST_END, no more components in vertex + je .vertex_loop5_done_WB1 ; i == GR_DLIST_END, no more components in vertex -$vertex_loop5_WB1: +.vertex_loop5_WB1: movd mm6, [vb + eax] ; FARRAY(b,i) add fifo, 8 ; fifoPtr += 2*sizeof(FxFloat) @@ -1916,15 +1907,15 @@ $vertex_loop5_WB1: test eax, eax ; i == GR_DLIST_END ? movq [fifo-8], mm7 ; PCI write current component | previous component - jz $vertex_loop5_done_WB0 ; yup, i == GR_DLIST_END + jz .vertex_loop5_done_WB0 ; yup, i == GR_DLIST_END movd mm7, [vb + eax] ; FARRAY(b,i) mov eax, [dlp-4] ; get next offset from offset list test eax, eax ; i == GR_DLIST_END ? - jnz $vertex_loop5_WB1 ; nope, more components to handle + jnz .vertex_loop5_WB1 ; nope, more components to handle -$vertex_loop5_done_WB1: +.vertex_loop5_done_WB1: ; 327 : TRI_SETF(0.f); ; 328 : TRI_SETF(0.f); @@ -1958,9 +1949,9 @@ $vertex_loop5_done_WB1: lea dlp, [dlpStart+4] ; point to next entry in offset list test eax, eax ; i == GR_DLIST_END ? - jz $vertex_loop6_done_WB1 ; i == GR_DLIST_END, no further components need to be pushed out + jz .vertex_loop6_done_WB1 ; i == GR_DLIST_END, no further components need to be pushed out -$vertex_loop6_WB1: +.vertex_loop6_WB1: movd mm6, [va + eax] ; FARRAY(a,i) add fifo, 8 ; fifoPtr += 2*sizeof(FxFloat) @@ -1971,15 +1962,15 @@ $vertex_loop6_WB1: movq [fifo-8], mm7 ; PCI write current component | previous component test eax, eax ; i == GR_DLIST_END ? - jz $vertex_loop6_done_WB0 ; yup, i == GR_DLIST_END + jz .vertex_loop6_done_WB0 ; yup, i == GR_DLIST_END movd mm7, [va + eax] ; FARRAY(a,i) mov eax, [dlp-4] ; get next offset from offset list cmp eax, 0 ; i == GR_DLIST_END ? - jne $vertex_loop6_WB1 ; nope, more components to handle + jne .vertex_loop6_WB1 ; nope, more components to handle -$vertex_loop6_done_WB1: +.vertex_loop6_done_WB1: ; 339 : TRI_SETF(0.f); ; 340 : TRI_SETF(0.f); @@ -2013,9 +2004,9 @@ $vertex_loop6_done_WB1: lea dlp, [dlpStart+4] ; point to next entry in offset list cmp eax, 0 ; i == GR_DLIST_END ? - je $vertex_loop7_done_WB1 ; i == GR_DLIST_END, no further components need to be pushed out + je .vertex_loop7_done_WB1 ; i == GR_DLIST_END, no further components need to be pushed out -$vertex_loop7_WB1: +.vertex_loop7_WB1: movd mm6, [vb + eax] ; FARRAY(b,i) add fifo, 8 ; fifoPtr += 2*sizeof(FxFloat) @@ -2026,15 +2017,15 @@ $vertex_loop7_WB1: cmp eax, 0 ; i == GR_DLIST_END ? movq [fifo-8], mm7 ; PCI write current component | previous component - jz $vertex_loop7_done_WB0 ; yup, i == GR_DLIST_END + jz .vertex_loop7_done_WB0 ; yup, i == GR_DLIST_END movd mm7, [vb + eax] ; FARRAY(b,i) mov eax, [dlp-4] ; get next offset from offset list test eax, eax ; i == GR_DLIST_END ? - jnz $vertex_loop7_WB1 ; nope, more components to handle + jnz .vertex_loop7_WB1 ; nope, more components to handle -$vertex_loop7_done_WB1: +.vertex_loop7_done_WB1: ; 351 : TRI_SETF(256.f); ; 352 : TRI_SETF(0.f); @@ -2070,9 +2061,9 @@ $vertex_loop7_done_WB1: lea dlp, [dlpStart+4] ; point to next entry in offset list test eax, eax ; i == GR_DLIST_END ? - jz $vertex_loop8_done_WB1 ; i == GR_DLIST_END, no further components need to be pushed out + jz .vertex_loop8_done_WB1 ; i == GR_DLIST_END, no further components need to be pushed out -$vertex_loop8_WB1: +.vertex_loop8_WB1: movd mm6, [va + eax] ; FARRAY(a,i) add fifo, 8 ; fifoPtr += 2*sizeof(FxFloat) @@ -2083,15 +2074,15 @@ $vertex_loop8_WB1: punpckldq mm7, mm6 ; current component | previous component movq [fifo-8], mm7 ; PCI write current component | previous component - jz $vertex_loop8_done_WB0 ; yup, i == GR_DLIST_END + jz .vertex_loop8_done_WB0 ; yup, i == GR_DLIST_END movd mm7, [va + eax] ; FARRAY(a,i) mov eax, [dlp-4] ; get next offset from offset list cmp eax, 0 ; i == GR_DLIST_END ? - jnz $vertex_loop8_WB1 ; nope, more components to handle + jnz .vertex_loop8_WB1 ; nope, more components to handle -$vertex_loop8_done_WB1: +.vertex_loop8_done_WB1: punpckldq mm7, mm4 ; 256.0f | previous component add fifo, 8 ; fifoPtr += 2*sizeof(FxFloat) @@ -2102,12 +2093,12 @@ $vertex_loop8_done_WB1: punpckhdq mm7, mm7 ; write buffer = 256.0f nop ; filler -$line_done_WB1: +.line_done_WB1: movd [fifo], mm7 ; flush "write buffer" add fifo, 4 ; fifoPtr += sizeof(FxFloat) -$line_done_WB0: +.line_done_WB0: ; 365 : } ; 366 : TRI_END; @@ -2126,20 +2117,20 @@ $line_done_WB0: nop ; filler sub eax, fifo ; fifo space used up - mov ebx, [__GlideRoot+stats_linesDrawn] ; _GlideRoot.stats.linesDrawn + mov ebx, [_GlideRoot+stats_linesDrawn] ; _GlideRoot.stats.linesDrawn add edx, eax ; fifo space available now - mov ebp, [__GlideRoot+stats_otherTrisDrawn] ; _GlideRoot.stats.othertrisDrawn + mov ebp, [_GlideRoot+stats_otherTrisDrawn] ; _GlideRoot.stats.othertrisDrawn mov [gc + fifoRoom], edx ; save available fifo space inc ebx ; _GlideRoot.stats.linesDrawn++ - mov [__GlideRoot+stats_linesDrawn], ebx ; save _GlideRoot.stats.linesDrawn + mov [_GlideRoot+stats_linesDrawn], ebx ; save _GlideRoot.stats.linesDrawn add ebp, 2 ; _GlideRoot.stats.othertrisDrawn+=2 - mov [__GlideRoot+stats_otherTrisDrawn], ebp ; _GlideRoot.stats.othertrisDrawn+=2 + mov [_GlideRoot+stats_otherTrisDrawn], ebp ; _GlideRoot.stats.othertrisDrawn+=2 nop ; filler -$line_all_done: +.line_all_done: femms ; done with MMX; empty FPU/MMX state @@ -2150,14 +2141,13 @@ $line_all_done: pop esi ; restore caller's register variable ret 8 ; return and pop 2 DWORD parameters -__grDrawTextureLine_3DNow@8 ENDP +endp - PUBLIC __grDrawTriangles_3DNow@12 -__grDrawTriangles_3DNow@12 PROC NEAR +proc _grDrawTriangles_3DNow, 12 -_mode = 20 -_count = 24 -_pointers = 28 +%define _mode 20 +%define _count 24 +%define _pointers 28 ; 930 : { ; 931 : #define FN_NAME "_grDrawTriangles_3DNow" @@ -2177,16 +2167,16 @@ _pointers = 28 ; 945 : ; 946 : GR_FLUSH_STATE(); -gc TEXTEQU ; points to graphics context -fifo TEXTEQU ; points to next entry in fifo -dlp TEXTEQU ; points to dataList structure -vertexCount TEXTEQU ; Current vertex counter in the packet -vertexPtr TEXTEQU ; Current vertex pointer (in deref mode) -vertex TEXTEQU ; Current vertex (in non-deref mode) -dlpStart TEXTEQU ; Pointer to start of offset list +%define gc edi ; points to graphics context +%define fifo ecx ; points to next entry in fifo +%define dlp ebp ; points to dataList structure +%define vertexCount esi ; Current vertex counter in the packet +%define vertexPtr ebx ; Current vertex pointer (in deref mode) +%define vertex ebx ; Current vertex (in non-deref mode) +%define dlpStart edx ; Pointer to start of offset list push edi ; save caller's register variable - mov gc, [__GlideRoot + curGC] ; get current graphics context + mov gc, [_GlideRoot + curGC] ; get current graphics context push esi ; save caller's register variable mov vertexCount, [esp+_count-8]; number of vertices in triangles @@ -2197,7 +2187,7 @@ dlpStart TEXTEQU ; Pointer to start of offset list mov vertexPtr, [esp+_pointers] ; get current vertex pointer (deref mode) test vertexCount, vertexCount ; number of vertices <= 0 ? - jle $tris_done ; yup, triangles are done + jle .tris_done ; yup, triangles are done ; 947 : ; 948 : #ifdef GLIDE_DEBUG @@ -2233,17 +2223,17 @@ dlpStart TEXTEQU ; Pointer to start of offset list mul ebp ; edx:eax = 1/3*2*2^32*count; edx = 1/3*2*count - mov eax, [__GlideRoot+trisProcessed] ; trisProcessed + mov eax, [_GlideRoot+trisProcessed] ; trisProcessed shr edx, 1 ; count/3 add eax, edx ; trisProcessed += count/3 mov edx, [esp + _mode] ; get mode (0 or 1) mov ecx, [gc + CoordinateSpace]; coordinates space (window/clip) - mov [__GlideRoot+trisProcessed], eax ; trisProcessed + mov [_GlideRoot+trisProcessed], eax ; trisProcessed test edx, edx ; mode 0 (array of vertices) ? - jnz $deref_mode ; nope, it's mode 1 (array of pointers to vertices) + jnz .deref_mode ; nope, it's mode 1 (array of pointers to vertices) mov edx, [gc + vertexStride] ; get stride in DWORDs nop ; filler @@ -2252,7 +2242,7 @@ dlpStart TEXTEQU ; Pointer to start of offset list cmp ecx, 0 ; coordinate space == 0 (window) ? mov [strideinbytes], edx ; save off stride (in bytes) - jnz $clip_coordinates_ND ; nope, coordinate space != window + jnz .clip_coordinates_ND ; nope, coordinate space != window ; 961 : while (count > 0) { ; 962 : FxI32 vcount = count >=15 ? 15 : count; @@ -2260,7 +2250,7 @@ dlpStart TEXTEQU ; Pointer to start of offset list ; 964 : TRI_STRIP_BEGIN(kSetupStrip, vcount, gc->state.vData.vSize, SSTCP_PKT3_BDDBDD); ; 965 : -$win_coords_loop_ND: +.win_coords_loop_ND: sub vertexCount, 15 ; vertexCount >= 15 ? CF=0 : CF=1 mov ecx, [gc + vertexSize] ; bytes of data for each vertex @@ -2276,17 +2266,17 @@ $win_coords_loop_ND: add ecx, 4 ; add header size ==> total packet size cmp eax, ecx ; fifo space avail >= packet size ? - jge $win_tri_begin_ND ; yup, start writing triangle data + jge .win_tri_begin_ND ; yup, start writing triangle data - push @Line ; line number inside this function + push __LINE__ ; line number inside this function push 0h ; pointer to function name = NULL push ecx ; fifo space needed - call __FifoMakeRoom ; note: updates fifoPtr + call _FifoMakeRoom ; note: updates fifoPtr add esp, 12 ; remove 3 DWORD arguments from stack -$win_tri_begin_ND: +.win_tri_begin_ND: mov eax, vertexCount ; number of vertices in triangles mov fifo, [gc + fifoPtr] ; get fifoPtr @@ -2300,7 +2290,7 @@ $win_tri_begin_ND: lea dlpStart, [gc+tsuDataList] ; pointer to start of offset list test fifo, ebp ; fifoPtr QWORD aligned ? - jz $fifo_aligned_ND ; yup + jz .fifo_aligned_ND ; yup mov [fifo], eax ; PCI write packet type add fifo, 4 ; fifo pointer now QWORD aligned @@ -2329,7 +2319,7 @@ $win_tri_begin_ND: ; 987 : count -= 15; ; 988 : } -$win_vertex_loop_ND_WB0: ; nothing in "write buffer" +.win_vertex_loop_ND_WB0: ; nothing in "write buffer" mov eax, [dlpStart] ; get first offset from offset list mov dlp, dlpStart ; point to start of offset list @@ -2341,15 +2331,15 @@ $win_vertex_loop_ND_WB0: ; nothing in "write buffer" test eax, eax ; if offset == 0, end of list movq [fifo-8], mm1 ; PCI write x, y - jz $win_datalist_end_ND_WB0 ; no more vertex data, nothing in "write buffer" + jz .win_datalist_end_ND_WB0 ; no more vertex data, nothing in "write buffer" -$win_datalist_loop_ND_WB0: ; nothing in "write buffer" +.win_datalist_loop_ND_WB0: ; nothing in "write buffer" movd mm1, [vertex + eax] ; get next parameter mov eax, [dlp] ; get next offset from offset list cmp eax, 0 ; at end of offset list (offset == 0) ? - jz $win_datalist_end_ND_WB1 ; exit, write buffer contains one DWORD + jz .win_datalist_end_ND_WB1 ; exit, write buffer contains one DWORD movd mm2, [vertex + eax] ; get next parameter add dlp, 8 ; dlp++ @@ -2361,17 +2351,17 @@ $win_datalist_loop_ND_WB0: ; nothing in "write buffer" punpckldq mm1, mm2 ; current param | previous param movq [fifo-8], mm1 ; PCI write current param | previous param - jnz $win_datalist_loop_ND_WB0 ; nope, copy next parameter + jnz .win_datalist_loop_ND_WB0 ; nope, copy next parameter -$win_datalist_end_ND_WB0: +.win_datalist_end_ND_WB0: mov eax, [strideinbytes] ; get offset to next vertex dec vertexCount ; another vertex done. Any left? lea vertex, [vertex + eax] ; points to next vertex - jnz $win_vertex_loop_ND_WB0 ; yup, output next vertex + jnz .win_vertex_loop_ND_WB0 ; yup, output next vertex -$win_vertex_end_ND_WB0: +.win_vertex_end_ND_WB0: mov eax, [gc + fifoPtr] ; old fifoPtr mov ebp, [gc + fifoRoom] ; old number of bytes available in fifo @@ -2389,7 +2379,7 @@ $win_vertex_end_ND_WB0: test vertexCount, vertexCount ; any vertices left to process ? nop ; filler - jg $win_coords_loop_ND ; loop if number of vertices to process >= 0 + jg .win_coords_loop_ND ; loop if number of vertices to process >= 0 femms ; no more MMX code; clear MMX/FPU state @@ -2401,11 +2391,11 @@ $win_vertex_end_ND_WB0: ret 12 ; return, pop 3 DWORD parameters off stack -$fifo_aligned_ND: +.fifo_aligned_ND: movd mm1, eax ; move header into "write buffer" -$win_vertex_loop_ND_WB1: ; one DWORD in "write buffer" +.win_vertex_loop_ND_WB1: ; one DWORD in "write buffer" movd mm2, [vertex] ; 0 | x of vertex add fifo, 8 ; fifoPtr += 2*sizeof(FxU32) @@ -2420,9 +2410,9 @@ $win_vertex_loop_ND_WB1: ; one DWORD in "write buffer" movd mm1, [vertex+4] ; 0 | y of vertex test eax, eax ; offset == 0 (list empty) ? - jz $win_datalist_end_ND_WB1 ; yup, no more vertex data, one DWORD in "write buffer" + jz .win_datalist_end_ND_WB1 ; yup, no more vertex data, one DWORD in "write buffer" -$win_datalist_loop_ND_WB1: ; one DWORD in "write buffer" +.win_datalist_loop_ND_WB1: ; one DWORD in "write buffer" movd mm2, [vertex + eax] ; get next parameter add fifo, 8 ; fifoPtr += 2*sizeof(FxU32) @@ -2434,23 +2424,23 @@ $win_datalist_loop_ND_WB1: ; one DWORD in "write buffer" cmp eax, 0 ; at end of offset list (offset == 0) ? movq [fifo-8], mm1 ; PCI write current param | previous param - jz $win_datalist_end_ND_WB0 ; yes, exit, "write buffer" empty + jz .win_datalist_end_ND_WB0 ; yes, exit, "write buffer" empty movd mm1, [vertex + eax] ; get next parameter mov eax, [dlp-4] ; get next offset from offset list test eax, eax ; at end of offset list (offset == 0) ? - jnz $win_datalist_loop_ND_WB1 ; nope, copy next parameter + jnz .win_datalist_loop_ND_WB1 ; nope, copy next parameter -$win_datalist_end_ND_WB1: +.win_datalist_end_ND_WB1: mov eax, [strideinbytes] ; get offset to next vertex dec vertexCount ; another vertex done. Any left? lea vertex, [vertex + eax] ; points to next vertex - jnz $win_vertex_loop_ND_WB1 ; yup, output next vertex + jnz .win_vertex_loop_ND_WB1 ; yup, output next vertex -$win_vertex_end_ND_WB1: +.win_vertex_end_ND_WB1: movd [fifo], mm1 ; flush "write buffer" mov eax, [gc + fifoPtr] ; old fifoPtr @@ -2471,7 +2461,7 @@ $win_vertex_end_ND_WB1: test vertexCount, vertexCount ; any vertices left to process ? nop ; filler - jg $win_coords_loop_ND ; loop if number of vertices to process >= 0 + jg .win_coords_loop_ND ; loop if number of vertices to process >= 0 femms ; no more MMX code; clear MMX/FPU state @@ -2483,14 +2473,14 @@ $win_vertex_end_ND_WB1: ret 12 ; return, pop 3 DWORD parameters off stack -$deref_mode: +.deref_mode: prefetch [vertexPtr] ; pre-load first group of pointers test ecx, ecx ; coordinate space == 0 (window) ? - jnz $clip_coordinates_D ; nope, coordinate space != window + jnz .clip_coordinates_D ; nope, coordinate space != window -$win_coords_loop_D: +.win_coords_loop_D: sub vertexCount, 15 ; vertexCount >= 15 ? CF=0 : CF=1 mov ecx, [gc + vertexSize] ; bytes of data for each vertex @@ -2506,18 +2496,18 @@ $win_coords_loop_D: add ecx, 4 ; add header size ==> total packet size cmp eax, ecx ; fifo space avail >= packet size ? - jge $win_tri_begin_D ; yup, start writing triangle data + jge .win_tri_begin_D ; yup, start writing triangle data - push @Line ; line number inside this function + push __LINE__ ; line number inside this function push 0h ; pointer to function name = NULL push ecx ; fifo space needed - call __FifoMakeRoom ; note: updates fifoPtr + call _FifoMakeRoom ; note: updates fifoPtr add esp, 12 ; remove 3 DWORD arguments from stack nop ; filler -$win_tri_begin_D: +.win_tri_begin_D: mov eax, vertexCount ; number of vertices in triangles mov fifo, [gc + fifoPtr] ; get fifoPtr @@ -2532,12 +2522,12 @@ $win_tri_begin_D: test fifo, ebp ; fifoPtr QWORD aligned ? nop ; filler - jz $fifo_aligned_D ; yup + jz .fifo_aligned_D ; yup mov [fifo], eax ; PCI write packet type add fifo, 4 ; fifo pointer now QWORD aligned -$win_vertex_loop_D_WB0: ; nothing in "write buffer" +.win_vertex_loop_D_WB0: ; nothing in "write buffer" mov edx, [vertexPtr] ; dereference pointer, edx points to vertex add vertexPtr, 4 ; next pointer @@ -2552,15 +2542,15 @@ $win_vertex_loop_D_WB0: ; nothing in "write buffer" add fifo, 8 ; fifo += 2 test eax, eax ; if offset == 0, end of offset list - je $win_datalist_end_D_WB0 ; no more vertex data, nothing in "write buffer" + je .win_datalist_end_D_WB0 ; no more vertex data, nothing in "write buffer" -$win_datalist_loop_D_WB0: ; nothing in "write buffer" +.win_datalist_loop_D_WB0: ; nothing in "write buffer" movd mm1, [edx + eax] ; get next parameter mov eax, [dlp] ; get next offset from offset list test eax, eax ; at end of offset list (offset == 0) ? - jz $win_datalist_end_D_WB1 ; exit, write buffer contains one DWORD + jz .win_datalist_end_D_WB1 ; exit, write buffer contains one DWORD movd mm2, [edx + eax] ; get next parameter add dlp, 8 ; dlp++ @@ -2572,14 +2562,14 @@ $win_datalist_loop_D_WB0: ; nothing in "write buffer" cmp eax, 0 ; at end of offset list (offset == 0) ? movq [fifo-8], mm1 ; PCI write current param | previous param - jnz $win_datalist_loop_D_WB0 ; nope, copy next parameter + jnz .win_datalist_loop_D_WB0 ; nope, copy next parameter -$win_datalist_end_D_WB0: +.win_datalist_end_D_WB0: dec vertexCount ; another vertex done. Any left? - jnz $win_vertex_loop_D_WB0 ; yup, output next vertex + jnz .win_vertex_loop_D_WB0 ; yup, output next vertex -$win_vertex_end_D_WB0: +.win_vertex_end_D_WB0: mov eax, [gc + fifoPtr] ; old fifoPtr mov ebp, [gc + fifoRoom] ; old number of bytes available in fifo @@ -2597,7 +2587,7 @@ $win_vertex_end_D_WB0: test vertexCount, vertexCount ; any vertices left to process ? mov [esp + _count], vertexCount; remaining number of vertices to process - jg $win_coords_loop_D ; loop if number of vertices to process >= 0 + jg .win_coords_loop_D ; loop if number of vertices to process >= 0 femms ; no more MMX code; clear MMX/FPU state @@ -2609,11 +2599,11 @@ $win_vertex_end_D_WB0: ret 12 ; return, pop 3 DWORD parameters off stack -$fifo_aligned_D: +.fifo_aligned_D: movd mm1, eax ; move header into "write buffer" -$win_vertex_loop_D_WB1: ; one DWORD in "write buffer" +.win_vertex_loop_D_WB1: ; one DWORD in "write buffer" mov edx, [vertexPtr] ; dereference pointer, edx points to vertex add vertexPtr, 4 ; next pointer @@ -2631,9 +2621,9 @@ $win_vertex_loop_D_WB1: ; one DWORD in "write buffer" movd mm1, [edx + 4] ; 0 | y of vertex cmp eax, 0 ; offset == 0 (list empty) ? - je $win_datalist_end_D_WB1 ; yup, no more vertex data, one DWORD in "write buffer" + je .win_datalist_end_D_WB1 ; yup, no more vertex data, one DWORD in "write buffer" -$win_datalist_loop_D_WB1: ; one DWORD in "write buffer" = MM1 +.win_datalist_loop_D_WB1: ; one DWORD in "write buffer" = MM1 movd mm2, [edx + eax] ; get next parameter add fifo, 8 ; fifoPtr += 2*sizeof(FxU32) @@ -2645,20 +2635,20 @@ $win_datalist_loop_D_WB1: ; one DWORD in "write buffer" = MM1 test eax, eax ; at end of offset list (offset == 0) ? movq [fifo-8], mm1 ; PCI write current param | previous param - jz $win_datalist_end_D_WB0 ; yes, exit, "write buffer" empty + jz .win_datalist_end_D_WB0 ; yes, exit, "write buffer" empty movd mm1, [edx + eax] ; get next parameter mov eax, [dlp-4] ; get next offset from offset list test eax, eax ; at end of offset list (offset == 0) ? - jnz $win_datalist_loop_D_WB1 ; nope, copy next parameter + jnz .win_datalist_loop_D_WB1 ; nope, copy next parameter -$win_datalist_end_D_WB1: +.win_datalist_end_D_WB1: dec vertexCount ; another vertex done. Any left? - jnz $win_vertex_loop_D_WB1 ; yup, output next vertex + jnz .win_vertex_loop_D_WB1 ; yup, output next vertex -$win_vertex_end_D_WB1: +.win_vertex_end_D_WB1: movd [fifo], mm1 ; flush "write buffer" mov eax, [gc + fifoPtr] ; old fifoPtr @@ -2679,7 +2669,7 @@ $win_vertex_end_D_WB1: cmp vertexCount, 0 ; any vertices left to process ? nop ; filler - jg $win_coords_loop_D ; loop if number of vertices to process >= 0 + jg .win_coords_loop_D ; loop if number of vertices to process >= 0 femms ; no more MMX code; clear MMX/FPU state @@ -2723,19 +2713,19 @@ $win_vertex_end_D_WB1: ; 1016 : } ; 1017 : TRI_END; -ifndef GLIDE3_SCALER +%ifndef GLIDE3_SCALER -$clip_coordinates_D: +.clip_coordinates_D: - mov [strideinbytes], 4 ; unit stride for array of pointers to vertices + mov dword [strideinbytes], 4 ; unit stride for array of pointers to vertices -$clip_coordinates_ND: +.clip_coordinates_ND: -dataElem textequ ; number of vertex components processed +%define dataElem ebp ; number of vertex components processed - movd mm6, [__GlideRoot+pool_f255] ; GlideRoot.pool.f255 + movd mm6, [_GlideRoot+pool_f255] ; GlideRoot.pool.f255 -$clip_coords_begin: +.clip_coords_begin: ;;; } ;;; else { @@ -2760,17 +2750,17 @@ $clip_coords_begin: add ecx, 4 ; add header size ==> total packet size cmp eax, ecx ; fifo space avail >= packet size ? - jge $clip_tri_begin ; yup, start writing triangle data + jge .clip_tri_begin ; yup, start writing triangle data - push @Line ; line number inside this function + push __LINE__ ; line number inside this function push 0h ; pointer to function name = NULL push ecx ; fifo space needed - call __FifoMakeRoom ; note: updates fifoPtr + call _FifoMakeRoom ; note: updates fifoPtr add esp, 12 ; remove 3 DWORD arguments from stack -$clip_tri_begin: +.clip_tri_begin: mov edx, vertexCount ; number of vertices in triangles mov fifo, [gc + fifoPtr] ; get fifoPtr @@ -2783,7 +2773,7 @@ $clip_tri_begin: mov [fifo], edx ; PCI write packet type add fifo, 4 ; fifo pointer now QWORD aligned -$clip_for_begin: +.clip_for_begin: mov edx, vertexPtr ; vertex = vertexPtr (assume no-deref mode) mov eax, [esp+_mode] ; mode 0 = no deref, mode 1 = deref @@ -2792,11 +2782,11 @@ $clip_for_begin: test eax, eax ; deref mode ? mov eax, [gc + wInfo_offset] ; get offset of W into vertex struct - jz $clip_noderef ; yup, no-deref mode + jz .clip_noderef ; yup, no-deref mode mov edx, [vertexPtr] ; vertex = *vertexPtr -$clip_noderef: +.clip_noderef: movd mm0, [edx + eax] ; 0 | W of current vertex pfrcp mm1, mm0 ; 0 | 1/W approx @@ -2826,13 +2816,13 @@ $clip_noderef: mov eax, [gc + tsuDataList] ; first entry from offset list movq [fifo-8], mm2 ; PCI write transformed x, y - jz $clip_setup_ooz ; nope, no color at all needed + jz .clip_setup_ooz ; nope, no color at all needed - cmp DWORD PTR [gc+colorType], 0; gc->state.vData.colorType == GR_FLOAT ? - jne $clip_setup_pargb ; nope, packed ARGB format + cmp dword [gc+colorType], 0; gc->state.vData.colorType == GR_FLOAT ? + jne .clip_setup_pargb ; nope, packed ARGB format test esi, 1 ; STATE_REQUIRES_IT_DRGB ? - jz $clip_setup_a ; no, but definitely A + jz .clip_setup_a ; no, but definitely A movd mm2, [edx + eax] ; 0 | r mov eax, [gc + tsuDataList+4] ; offset of g part of vertex data @@ -2856,9 +2846,9 @@ $clip_noderef: lea fifo, [fifo+12] ; fifoPtr += 3*sizeof(FxFloat) movd [fifo-4], mm2 ; PCI write b*255 - jz $clip_setup_ooz ; nope, no alpha, proceeed with ooz + jz .clip_setup_ooz ; nope, no alpha, proceeed with ooz -$clip_setup_a: +.clip_setup_a: movd mm2, [eax+edx] ; 0 | a add fifo, 4 ; fifoPtr += sizeof(FxFloat) @@ -2869,9 +2859,9 @@ $clip_setup_a: mov eax, [gc+dataElem+tsuDataList]; offset of next part of vertex data movd [fifo-4], mm2 ; PCI write a*255 - jmp $clip_setup_ooz ; check whether we need to push out z + jmp .clip_setup_ooz ; check whether we need to push out z -$clip_setup_pargb: +.clip_setup_pargb: movd mm2, [eax+edx] ; get packed ARGB data add fifo, 4 ; fifoPtr += sizeof(FxU32) @@ -2881,10 +2871,10 @@ $clip_setup_pargb: movd [fifo-4], mm2 ; PCI write packed ARGB nop ; filler -$clip_setup_ooz: +.clip_setup_ooz: test esi, 4 ; STATE_REQUIRES_OOZ ? - jz $clip_setup_qow ; nope + jz .clip_setup_qow ; nope movd mm2, [eax + edx] ; 0 | z component of vertex add fifo, 4 ; fifoPtr += sizeof(FxFloat) @@ -2901,12 +2891,12 @@ $clip_setup_ooz: pfadd mm2, mm4 ; 0 | TRI_SETF(FARRAY(_s, i)*_oow*gc->state.Viewport.hdepth+gc->state.Viewport.oz movd [fifo-4], mm2 ; PCI write transformed Z -$clip_setup_qow: +.clip_setup_qow: test esi, 8 ; STATE_REQUIRES_OOW_FBI ? - jz $clip_setup_qow0 ; nope + jz .clip_setup_qow0 ; nope - cmp DWORD PTR [gc+qInfo_mode],0; does vertex have Q component ? - je $clip_setup_oow ; nope, not Q but W + cmp dword [gc+qInfo_mode],0; does vertex have Q component ? + je .clip_setup_oow ; nope, not Q but W mov eax, [gc + qInfo_offset] ; offset of Q component of vertex add fifo, 4 ; fifoPtr += sizeof(FxFloat) @@ -2918,21 +2908,21 @@ $clip_setup_qow: pfmul mm2, mm0 ; q*oow movd [fifo-4], mm2 ; PCI write transformed Q - jmp $clip_setup_qow0 ; continue with q0 + jmp .clip_setup_qow0 ; continue with q0 -$clip_setup_oow: +.clip_setup_oow: add fifo, 4 ; fifoPtr += sizeof(FxFloat) add dataElem, 4 ; dataElem++ movd [fifo-4], mm0 ; PCI write oow mov eax,[gc+dataElem+tsuDataList]; pointer to next vertex component -$clip_setup_qow0: +.clip_setup_qow0: test esi, 16 ; STATE_REQUIRES_W_TMU0 ? - jz $clip_setup_stow0 ; nope + jz .clip_setup_stow0 ; nope - cmp DWORD PTR [gc+q0Info_mode],0; does vertex have Q component ? - je $clip_setup_oow0 ; nope, not Q but W + cmp dword [gc+q0Info_mode],0; does vertex have Q component ? + je .clip_setup_oow0 ; nope, not Q but W mov eax, [gc+q0Info_offset] ; offset of Q component of vertex add fifo, 4 ; fifoPtr += sizeof(FxFloat) @@ -2944,21 +2934,21 @@ $clip_setup_qow0: pfmul mm2, mm0 ; q0*oow movd [fifo-4], mm2 ; PCI write transformed q0 - jmp $clip_setup_stow0 ; continue with stow0 + jmp .clip_setup_stow0 ; continue with stow0 nop ; filler -$clip_setup_oow0: +.clip_setup_oow0: add fifo, 4 ; fifoPtr += sizeof(FxFloat) add dataElem, 4 ; dataElem++ movd [fifo-4], mm0 ; PCI write oow mov eax,[gc+dataElem+tsuDataList]; pointer to next vertex component -$clip_setup_stow0: +.clip_setup_stow0: test esi, 32 ; STATE_REQUIRES_ST_TMU0 ? - jz $clip_setup_qow1 ; nope + jz .clip_setup_qow1 ; nope movq mm7, [gc + tmu0_s_scale] ; state.tmu_config[0].t_scale | state.tmu_config[0].s_scale add fifo, 8 ; fifoPtr += 2*sizeof(FxFloat) @@ -2978,12 +2968,12 @@ $clip_setup_stow0: movq [fifo-8], mm2 ; PCI write param2*oow*tmu0_t_scale | param1*oow*tmu0_s_scale mov eax, [gc+dataElem+tsuDataList]; pointer to next vertex component -$clip_setup_qow1: +.clip_setup_qow1: test esi, 64 ; STATE_REQUIRES_W_TMU1 ? - jz $clip_setup_stow1 ; nope + jz .clip_setup_stow1 ; nope - cmp DWORD PTR [gc+q1Info_mode],0; does vertex have Q component ? - je $clip_setup_oow1 ; nope, not Q but W + cmp dword [gc+q1Info_mode],0; does vertex have Q component ? + je .clip_setup_oow1 ; nope, not Q but W mov eax, [gc+q1Info_offset] ; offset of Q component of vertex add fifo, 4 ; fifoPtr += sizeof(FxFloat) @@ -2995,22 +2985,22 @@ $clip_setup_qow1: pfmul mm2, mm0 ; q1*oow movd [fifo-4], mm2 ; PCI write transformed q1 - jmp $clip_setup_stow1 ; continue with stow1 + jmp .clip_setup_stow1 ; continue with stow1 -$clip_setup_oow1: +.clip_setup_oow1: add fifo, 4 ; fifoPtr += sizeof(FxFloat) add dataElem, 4 ; dataElem++ movd [fifo-4], mm0 ; PCI write oow mov eax,[gc+dataElem+tsuDataList]; pointer to next vertex component -$clip_setup_stow1: +.clip_setup_stow1: test esi, 128 ; STATE_REQUIRES_ST_TMU1 ? mov vertexCount, [vertices] ; get number of vertices movq mm7, [gc + tmu1_s_scale] ; state.tmu_config[1].t_scale | state.tmu_config[1].s_scale - jz $clip_setup_end ; nope + jz .clip_setup_end ; nope movd mm2, [edx + eax] ; param1 add fifo, 8 ; fifoPtr += 2*sizeof(FxFloat) @@ -3024,12 +3014,12 @@ $clip_setup_stow1: pfmul mm2, mm7 ; param2*oow*state.tmu_config[1].t_scale | param1*oow*state.tmu_config[1].s_scale movq [fifo-8], mm2 ; PCI write param2*oow*state.tmu_config[1].t_scale | param1*oow*state.tmu_config[1].s_scale -$clip_setup_end: +.clip_setup_end: dec vertexCount ; vcount-- - jnz $clip_for_begin ; until + jnz .clip_for_begin ; until -$clip_for_end: +.clip_for_end: mov eax, [gc + fifoPtr] ; old fifoPtr mov ebp, [gc + fifoRoom] ; old number of bytes available in fifo @@ -3046,11 +3036,11 @@ $clip_for_end: mov [esp + _count], vertexCount; remaining number of vertices to process cmp vertexCount, 0 ; any vertices left to process ? - jg $clip_coords_begin ; loop if number of vertices to process >= 0 + jg .clip_coords_begin ; loop if number of vertices to process >= 0 femms ; no more MMX code; clear MMX/FPU state -else ; GLIDE3_SCALER +%else ; GLIDE3_SCALER ;---------------------------------------------------------------------------- @@ -3065,14 +3055,14 @@ else ; GLIDE3_SCALER ;---------------------------------------------------------------------------- -dataElem textequ ; number of vertex components processed +%define dataElem ebp ; number of vertex components processed nop ; filler for code alignment -$clip_coordinates_ND: +.clip_coordinates_ND: pxor mm0, mm0 ; load 0 - movd mm1, [__GlideRoot+pool_f255]; GlideRoot.pool.f255 + movd mm1, [_GlideRoot+pool_f255]; GlideRoot.pool.f255 movd [atab], mm0 ; atable[0] = 0.0f movd mm5, [gc + vp_oz] ; gc->state.Viewport.oz @@ -3083,13 +3073,13 @@ $clip_coordinates_ND: movq mm1, [gc + vp_ox] ; gc->state.Viewport.oy | gc->state.Viewport.ox movd [btab+4], mm5 ; btable[1] = gc->state.Viewport.oz - movq QWORD PTR [btab+8], mm0 ; btable[3] = 0.0f | btable[2] = 0.0f + movq QWORD [btab+8], mm0 ; btable[3] = 0.0f | btable[2] = 0.0f nop ; filler - movq QWORD PTR [btab+16], mm0 ; btable[5] = 0.0f | btable[4] = 0.0f - movq QWORD PTR [btab+24], mm0 ; btable[7] = 0.0f | btable[6] = 0.0f + movq QWORD [btab+16], mm0 ; btable[5] = 0.0f | btable[4] = 0.0f + movq QWORD [btab+24], mm0 ; btable[7] = 0.0f | btable[6] = 0.0f -$clip_coords_begin_ND: +.clip_coords_begin_ND: sub vertexCount, 15 ; vertexCount >= 15 ? CF=0 : CF=1 mov ecx, [gc + vertexSize] ; bytes of data for each vertex @@ -3105,18 +3095,18 @@ $clip_coords_begin_ND: add ecx, 4 ; add header size ==> total packet size cmp eax, ecx ; fifo space avail >= packet size ? - jge $clip_tri_begin_ND ; yup, start writing triangle data + jge .clip_tri_begin_ND ; yup, start writing triangle data - push @Line ; line number inside this function + push __LINE__ ; line number inside this function push 0h ; pointer to function name = NULL push ecx ; fifo space needed - call __FifoMakeRoom ; note: updates fifoPtr + call _FifoMakeRoom ; note: updates fifoPtr add esp, 12 ; remove 3 DWORD arguments from stack nop ; filler for code alignment -$clip_tri_begin_ND: +.clip_tri_begin_ND: mov eax, vertexCount ; number of vertices in triangles mov fifo, [gc + fifoPtr] ; get fifoPtr @@ -3130,12 +3120,12 @@ $clip_tri_begin_ND: test fifo, ebp ; fifoPtr QWORD aligned ? mov edx, [gc + wInfo_offset] ; gc->state.vData.wInfo.offset - jz $clip_fifo_aligned_ND ; yup + jz .clip_fifo_aligned_ND ; yup mov [fifo], eax ; PCI write packet type add fifo, 4 ; fifo pointer now QWORD aligned -$clip_for_begin_WB0_ND: ; "write buffer" = MM7 is empty +.clip_for_begin_WB0_ND: ; "write buffer" = MM7 is empty ;; here: ebx = vertex ;; ecx = fifo @@ -3182,11 +3172,11 @@ $clip_for_begin_WB0_ND: ; "write buffer" = MM7 is empty movq [fifo-8], mm7 ; PCI write transformed x, y; write buffer=mm7 empty nop ; filler - movq QWORD PTR [atab+12], mm3 ; atable[4] = oow*gc->state.tmu_config[0].t_scale | atable[3] = oow*gc->state.tmu_config[0].s_scale + movq QWORD [atab+12], mm3 ; atable[4] = oow*gc->state.tmu_config[0].t_scale | atable[3] = oow*gc->state.tmu_config[0].s_scale pfmul mm4, mm0 ; oow*gc->state.tmu_config[1].t_scale | oow*gc->state.tmu_config[1].s_scale - movq QWORD PTR [atab+20], mm4 ; atable[6] = oow*gc->state.tmu_config[1].t_scale | atable[5] = oow*gc->state.tmu_config[1].s_scale - jne $not_pargb_WB0_ND ; nope, gc->state.vData.colorType != GR_U8 + movq QWORD [atab+20], mm4 ; atable[6] = oow*gc->state.tmu_config[1].t_scale | atable[5] = oow*gc->state.tmu_config[1].s_scale + jne .not_pargb_WB0_ND ; nope, gc->state.vData.colorType != GR_U8 movd mm7, [vertex + eax] ; get packed ARGB data; 1 DWORD in "write buffer"=mm7 nop ; filler @@ -3194,10 +3184,10 @@ $clip_for_begin_WB0_ND: ; "write buffer" = MM7 is empty mov eax, [gc + tsuDataList + 4]; get offset of next vertex component (after pargb) add dataElem, 4 ; dataElem = 1 (namely pargb) -$not_pargb_WB1_ND: +.not_pargb_WB1_ND: test eax, eax ; end of offset list ? - jz $clip_setup_end_WB1_ND ; yup + jz .clip_setup_end_WB1_ND ; yup ;; here: eax = offset into vertex ;; ebx = vertex @@ -3210,14 +3200,14 @@ $not_pargb_WB1_ND: ;; mm7 = write buffer ;; avail: edx, mm0, mm3, mm4, mm5, mm6 -$inner_loop_WB1_ND: +.inner_loop_WB1_ND: mov edx, [gc+dataElem+tsuDatLstScal]; iscaler add fifo, 8 ; fifoPtr += 2*sizeof(FxFloat) movd mm4, [vertex + eax] ; TRI_SETF(FARRAY(vPtr, i) - movd mm6, atab[edx*4] ; atable [iscaler] + movd mm6, [atab + edx*4] ; atable [iscaler] - movd mm5, btab[edx*4] ; btable [iscaler] + movd mm5, [btab + edx*4] ; btable [iscaler] pfmul mm4, mm6 ; TRI_SETF(FARRAY(vPtr, i)*atable[iscaler] mov eax, [gc+dataElem+tsuDataList+4]; next offset from offset list @@ -3229,22 +3219,22 @@ $inner_loop_WB1_ND: mov edx, [gc+dataElem+tsuDatLstScal+4]; next iscaler movq [fifo-8], mm7 ; PCI write new param | previous param - jz $clip_setup_end_WB0_ND ; yup, end of offset list, this vertex done + jz .clip_setup_end_WB0_ND ; yup, end of offset list, this vertex done add dataElem, 8 ; dataElem++ movd mm7, [vertex+eax] ; TRI_SETF(FARRAY(vPtr, i) - movd mm6, atab[edx*4] ; atable [iscaler] + movd mm6, [atab + edx*4] ; atable [iscaler] - movd mm5, btab[edx*4] ; btable [iscaler] + movd mm5, [btab + edx*4] ; btable [iscaler] pfmul mm7, mm6 ; TRI_SETF(FARRAY(vPtr, i)*atable[iscaler] mov eax, [gc+dataElem+tsuDataList]; next offset from offset list pfadd mm7, mm5 ; TRI_SETF(FARRAY(vPtr, i)*atable[iscaler]+btable [iscaler] cmp eax, 0 ; offset == 0 (end of offset list) ? - jnz $inner_loop_WB1_ND ; nope, get next component (1 DWORD in "write buffer") + jnz .inner_loop_WB1_ND ; nope, get next component (1 DWORD in "write buffer") -$clip_setup_end_WB1_ND: +.clip_setup_end_WB1_ND: mov eax, [strideinbytes] ; offset to next vertex nop ; filler @@ -3253,15 +3243,15 @@ $clip_setup_end_WB1_ND: dec vertexCount ; one less vertex to handle lea vertex, [vertex + eax] ; points to next vertex - jnz $clip_for_begin_WB1_ND ; until all vertices done; 1 DWORD in "write buffer" = MM7 + jnz .clip_for_begin_WB1_ND ; until all vertices done; 1 DWORD in "write buffer" = MM7 movd [fifo], mm7 ; flush "write buffer" add fifo, 4 ; fifoPtr += sizeof(FxFloat) mov esp, esp ; filler - jmp $clip_setup_done_ND ; all vertices handled, tri strip done + jmp .clip_setup_done_ND ; all vertices handled, tri strip done -$clip_fifo_aligned_ND: +.clip_fifo_aligned_ND: movd mm7, eax ; write buffer has 1 DWORD now @@ -3275,7 +3265,7 @@ $clip_fifo_aligned_ND: ;;; TRI_SETF(FARRAY(vPtr, 4) ;;; *oow*gc->state.Viewport.hheight + gc->state.Viewport.oy) -$clip_for_begin_WB1_ND: +.clip_for_begin_WB1_ND: ;; here: ebx = vertex ;; ecx = fifo @@ -3318,7 +3308,7 @@ $clip_for_begin_WB1_ND: movd [atab+4], mm6 ; atable[1] = oow*gc->state.Viewport.hdepth pfadd mm5, mm1 ; TRI_SETF(FARRAY(vPtr, 4)*oow*gc->state.Viewport.hheight + gc->state.Viewport.oy) | - movq QWORD PTR [atab+12], mm3 ; atable[4] = oow * gc->state.tmu_config[0].t_scale | atable[3] = oow * gc->state.tmu_config[0].s_scale + movq QWORD [atab+12], mm3 ; atable[4] = oow * gc->state.tmu_config[0].t_scale | atable[3] = oow * gc->state.tmu_config[0].s_scale movq mm3, [gc + tmu1_s_scale] ; gc->state.tmu_config[1].t_scale | gc->state.tmu_config[1].s_scale punpckldq mm7, mm5 ; header | transformed x @@ -3329,8 +3319,8 @@ $clip_for_begin_WB1_ND: pfmul mm3, mm0 ; oow*gc->state.tmu_config[1].t_scale | oow*gc->state.tmu_config[1].s_scale movq mm7, mm5 ; 1 DWORD in "write buffer" (transformed y) - movq QWORD PTR [atab+20], mm3 ; atable[6] = oow * gc->state.tmu_config[1].t_scale | oow * gc->state.tmu_config[1].s_scale - jne $not_pargb_WB1_ND ; nope, gc->state.vData.colorType != GR_U8 + movq qword [atab+20], mm3 ; atable[6] = oow * gc->state.tmu_config[1].t_scale | oow * gc->state.tmu_config[1].s_scale + jne .not_pargb_WB1_ND ; nope, gc->state.vData.colorType != GR_U8 movd mm6, [vertex + eax] ; get packed ARGB data; mov dataElem, 4 ; dataElem = 1 (namely pargb) @@ -3341,9 +3331,9 @@ $clip_for_begin_WB1_ND: movq [fifo], mm7 ; PCI write pargb | transformed y add fifo, 8 ; fifoPtr += 2*sizeof(FxFloat) -$not_pargb_WB0_ND: +.not_pargb_WB0_ND: cmp eax, 0 ; end of data offset list ? - jz $clip_setup_end_WB0_ND ; yup, this vertex done + jz .clip_setup_end_WB0_ND ; yup, this vertex done ;; here: eax = offset into vertex ;; ebx = vertex @@ -3356,13 +3346,13 @@ $not_pargb_WB0_ND: ;; mm3 = F256_F256 ;; avail: edx, mm0, mm4, mm5, mm6, mm7 -$inner_loop_WB0_ND: +.inner_loop_WB0_ND: mov edx, [gc+dataElem+tsuDatLstScal]; iscaler movd mm7, [vertex + eax] ; TRI_SETF(FARRAY(vPtr, i) - movd mm6, atab[edx*4] ; atable[iscaler] - movd mm5, btab[edx*4] ; btable[iscaler] + movd mm6, [atab + edx*4] ; atable[iscaler] + movd mm5, [btab + edx*4] ; btable[iscaler] pfmul mm7, mm6 ; TRI_SETF(FARRAY(vPtr, i)*atable[iscaler] mov eax, [gc+dataElem+tsuDataList+4] ; next offset from offset list @@ -3371,12 +3361,12 @@ $inner_loop_WB0_ND: mov edx, [gc+dataElem+tsuDatLstScal+4]; next iscaler cmp eax, 0 ; offset == 0 (end of offset list) ? - jz $clip_setup_end_WB1_ND ; yup, vertex done, 1 DWORD in "write buffer" + jz .clip_setup_end_WB1_ND ; yup, vertex done, 1 DWORD in "write buffer" movd mm4, [vertex + eax] ; TRI_SETF(FARRAY(vPtr, i) - movd mm6, atab[edx*4] ; atable[iscaler] + movd mm6, [atab + edx*4] ; atable[iscaler] - movd mm5, btab[edx*4] ; btable[iscaler] + movd mm5, [btab + edx*4] ; btable[iscaler] add dataElem, 8 ; dataElem += 2 add fifo, 8 ; fifoPtr += 2*sizeof*FxFloat) @@ -3392,9 +3382,9 @@ $inner_loop_WB0_ND: nop ; filler movq [fifo-8], mm7 ; PCI write current component | previous component - jnz $inner_loop_WB0_ND ; nope, vertex not done yet; "write buffer" empty + jnz .inner_loop_WB0_ND ; nope, vertex not done yet; "write buffer" empty -$clip_setup_end_WB0_ND: +.clip_setup_end_WB0_ND: mov eax, [strideinbytes] ; offset to next vertex mov esp, esp ; filler @@ -3403,9 +3393,9 @@ $clip_setup_end_WB0_ND: sub vertexCount, 1 ; one less vertex to process, any left? lea vertex, [vertex + eax] ; points to next vertex - jnz $clip_for_begin_WB0_ND ; yup, vertices left, push out next vertex + jnz .clip_for_begin_WB0_ND ; yup, vertices left, push out next vertex -$clip_setup_done_ND: +.clip_setup_done_ND: mov eax, [gc + fifoPtr] ; old fifoPtr mov ebp, [gc + fifoRoom] ; old number of bytes available in fifo @@ -3422,7 +3412,7 @@ $clip_setup_done_ND: cmp vertexCount, 0 ; any vertices left to process ? nop ; filler - jg $clip_coords_begin_ND ; loop if number of vertices to process >= 0 + jg .clip_coords_begin_ND ; loop if number of vertices to process >= 0 femms ; no more MMX code; clear MMX/FPU state @@ -3439,15 +3429,15 @@ $clip_setup_done_ND: nop ; filler for code alignment -$clip_coordinates_D: +.clip_coordinates_D: pxor mm0, mm0 ; load 0 - movd mm1, [__GlideRoot+pool_f255]; GlideRoot.pool.f255 + movd mm1, [_GlideRoot+pool_f255]; GlideRoot.pool.f255 movd [atab], mm0 ; atable[0] = 0.0f movd mm5, [gc + vp_oz] ; gc->state.Viewport.oz - movq QWORD PTR [btab+8], mm0 ; btable[3] = 0.0f | btable[2] = 0.0f + movq QWORD [btab+8], mm0 ; btable[3] = 0.0f | btable[2] = 0.0f movq mm2, [gc + vp_hwidth] ; gc->state.Viewport.hheight | gc->state.Viewport.hwidth movd [atab+8], mm1 ; atable[2] = GlideRoot.pool.f255 @@ -3456,10 +3446,10 @@ $clip_coordinates_D: movd [btab+4], mm5 ; btable[1] = gc->state.Viewport.oz nop ; filler - movq QWORD PTR [btab+16], mm0 ; btable[5] = 0.0f | btable[4] = 0.0f - movq QWORD PTR [btab+24], mm0 ; btable[7] = 0.0f | btable[6] = 0.0f + movq QWORD [btab+16], mm0 ; btable[5] = 0.0f | btable[4] = 0.0f + movq QWORD [btab+24], mm0 ; btable[7] = 0.0f | btable[6] = 0.0f -$clip_coords_begin_D: +.clip_coords_begin_D: sub vertexCount, 15 ; vertexCount >= 15 ? CF=0 : CF=1 mov ecx, [gc + vertexSize] ; bytes of data for each vertex @@ -3475,18 +3465,18 @@ $clip_coords_begin_D: add ecx, 4 ; add header size ==> total packet size cmp eax, ecx ; fifo space avail >= packet size ? - jge $clip_tri_begin_D ; yup, start writing triangle data + jge .clip_tri_begin_D ; yup, start writing triangle data - push @Line ; line number inside this function + push __LINE__ ; line number inside this function push 0h ; pointer to function name = NULL push ecx ; fifo space needed - call __FifoMakeRoom ; note: updates fifoPtr + call _FifoMakeRoom ; note: updates fifoPtr add esp, 12 ; remove 3 DWORD arguments from stack nop ; filler -$clip_tri_begin_D: +.clip_tri_begin_D: mov edx, vertexCount ; number of vertices in triangles mov fifo, [gc + fifoPtr] ; get fifoPtr @@ -3500,12 +3490,12 @@ $clip_tri_begin_D: test fifo, ebp ; fifoPtr QWORD aligned ? mov eax, [gc + wInfo_offset] ; gc->state.vData.wInfo.offset - jz $clip_fifo_aligned_D ; yup + jz .clip_fifo_aligned_D ; yup mov [fifo], edx ; PCI write packet type add fifo, 4 ; fifo pointer now QWORD aligned -$clip_for_begin_WB0_D : ; "write buffer" = MM7 is empty +.clip_for_begin_WB0_D : ; "write buffer" = MM7 is empty ;; here: eax = gc->state.vData.wInfo.offset ;; ebx = vertexPtr @@ -3556,11 +3546,11 @@ $clip_for_begin_WB0_D : ; "write buffer" = MM7 is empty movd [atab+4], mm6 ; atable[1] = oow*gc->state.Viewport.hdepth pfmul mm4, mm0 ; oow * gc->state.tmu_config[1].t_scale | oow * gc->state.tmu_config[1].s_scale - movq QWORD PTR [atab+12], mm3 ; atable[4] = oow * gc->state.tmu_config[0].t_scale | atable[3] = oow * gc->state.tmu_config[0].s_scale + movq qword [atab+12], mm3 ; atable[4] = oow * gc->state.tmu_config[0].t_scale | atable[3] = oow * gc->state.tmu_config[0].s_scale movq [fifo-8], mm7 ; PCI write transformed x, y; write buffer=mm7 empty - movq QWORD PTR [atab+20], mm4 ; atable[6] = oow * gc->state.tmu_config[1].t_scale | atable[5] = oow * gc->state.tmu_config[1].s_scale - jne $not_pargb_WB0_D ; nope, gc->state.vData.colorType != GR_U8 + movq qword [atab+20], mm4 ; atable[6] = oow * gc->state.tmu_config[1].t_scale | atable[5] = oow * gc->state.tmu_config[1].s_scale + jne .not_pargb_WB0_D ; nope, gc->state.vData.colorType != GR_U8 movd mm7, [edx + eax] ; get packed ARGB data; 1 DWORD in "write buffer"=mm7 nop ; filler @@ -3568,10 +3558,10 @@ $clip_for_begin_WB0_D : ; "write buffer" = MM7 is empty mov eax, [gc + tsuDataList + 4]; get offset of next vertex component (after pargb) add dataElem, 4 ; dataElem = 1 (namely pargb) -$not_pargb_WB1_D: +.not_pargb_WB1_D: test eax, eax ; end of offset list ? - jz $clip_setup_end_WB1_D ; yup + jz .clip_setup_end_WB1_D ; yup ;; here: eax = offset into vertex ;; ebx = vertexPtr @@ -3584,14 +3574,14 @@ $not_pargb_WB1_D: ;; mm7 = write buffer ;; avail: eax, esi, mm0, mm3, mm4, mm5, mm6 -$inner_loop_WB1_D: +.inner_loop_WB1_D: mov esi, [gc+dataElem+tsuDatLstScal]; iscaler add fifo, 8 ; fifoPtr += 2*sizeof(FxFloat) movd mm4, [edx + eax] ; TRI_SETF(FARRAY(vPtr, i) - movd mm6, atab[esi*4] ; atable [iscaler] + movd mm6, [atab + esi*4] ; atable [iscaler] - movd mm5, btab[esi*4] ; btable [iscaler] + movd mm5, [btab + esi*4] ; btable [iscaler] pfmul mm4, mm6 ; TRI_SETF(FARRAY(vPtr, i)*atable[iscaler] mov eax, [gc+dataElem+tsuDataList+4]; next offset from offset list @@ -3603,13 +3593,13 @@ $inner_loop_WB1_D: mov esi, [gc+dataElem+tsuDatLstScal+4]; next iscaler movq [fifo-8], mm7 ; PCI write new param | previous param - jz $clip_setup_end_WB0_D ; yup, end of offset list, this vertex done + jz .clip_setup_end_WB0_D ; yup, end of offset list, this vertex done movd mm7, [edx + eax] ; TRI_SETF(FARRAY(vPtr, i) add dataElem, 8 ; dataElem += 2 - movd mm6, atab[esi*4] ; atable [iscaler] + movd mm6, [atab + esi*4] ; atable [iscaler] - movd mm5, btab[esi*4] ; btable [iscaler] + movd mm5, [btab + esi*4] ; btable [iscaler] pfmul mm7, mm6 ; TRI_SETF(FARRAY(vPtr, i)*atable[iscaler] mov eax, [gc+dataElem+tsuDataList]; next offset from offset list @@ -3618,27 +3608,27 @@ $inner_loop_WB1_D: nop ; filler test eax, eax ; offset == 0 (end of offset list) ? - jnz $inner_loop_WB1_D ; nope, get next component (1 DWORD in "write buffer") + jnz .inner_loop_WB1_D ; nope, get next component (1 DWORD in "write buffer") nop ; filler -$clip_setup_end_WB1_D: +.clip_setup_end_WB1_D: mov vertexCount, [vertices] ; get back number of vertices left to process mov eax, [gc + wInfo_offset] ; gc->state.vData.wInfo.offset dec vertexCount ; one less vertex to handle - jnz $clip_for_begin_WB1_D ; until all vertices done; 1 DWORD in "write buffer" = MM7 + jnz .clip_for_begin_WB1_D ; until all vertices done; 1 DWORD in "write buffer" = MM7 movd [fifo], mm7 ; flush "write buffer" add fifo, 4 ; fifoPtr += sizeof(FxFloat) - jmp $clip_setup_done_D ; all vertices handled, triangles done + jmp .clip_setup_done_D ; all vertices handled, triangles done nop ; filler -$clip_fifo_aligned_D: +.clip_fifo_aligned_D: movd mm7, edx ; write buffer has 1 DWORD now -$clip_for_begin_WB1_D: +.clip_for_begin_WB1_D: ;; here: eax = gc->state.vData.wInfo.offset ;; ebx = vertexPtr @@ -3686,7 +3676,7 @@ $clip_for_begin_WB1_D: pfadd mm5, mm1 ; TRI_SETF(FARRAY(vPtr, 4)*oow*gc->state.Viewport.hheight + gc->state.Viewport.oy) | movd [atab+4], mm6 ; atable[1] = oow*gc->state.Viewport.hdepth - movq QWORD PTR [atab+12], mm3 ; atable[4] = oow * gc->state.tmu_config[0].t_scale | atable[3] = oow * gc->state.tmu_config[0].s_scale + movq qword [atab+12], mm3 ; atable[4] = oow * gc->state.tmu_config[0].t_scale | atable[3] = oow * gc->state.tmu_config[0].s_scale movq mm3, [gc + tmu1_s_scale] ; gc->state.tmu_config[1].t_scale | gc->state.tmu_config[1].s_scale punpckldq mm7, mm5 ; header | transformed x @@ -3698,8 +3688,8 @@ $clip_for_begin_WB1_D: movq mm7, mm5 ; 1 DWORD in "write buffer" (transformed y) nop ; filler - movq QWORD PTR [atab+20], mm3 ; atable[6] = oow * gc->state.tmu_config[1].t_scale | atable[5] = oow * gc->state.tmu_config[1].s_scale - jne $not_pargb_WB1_D ; nope, gc->state.vData.colorType != GR_U8 + movq qword [atab+20], mm3 ; atable[6] = oow * gc->state.tmu_config[1].t_scale | atable[5] = oow * gc->state.tmu_config[1].s_scale + jne .not_pargb_WB1_D ; nope, gc->state.vData.colorType != GR_U8 movd mm6, [edx + eax] ; get packed ARGB data; mov dataElem, 4 ; dataElem = 1 (namely pargb) @@ -3710,9 +3700,9 @@ $clip_for_begin_WB1_D: movq [fifo], mm7 ; PCI write pargb | transformed y add fifo, 8 ; fifoPtr += 2*sizeof(FxFloat) -$not_pargb_WB0_D: +.not_pargb_WB0_D: test eax, eax ; end of data offset list ? - jz $clip_setup_end_WB0_D ; yup, this vertex done + jz .clip_setup_end_WB0_D ; yup, this vertex done ;; here: eax = offset into vertex ;; ebx = vertexPtr @@ -3724,12 +3714,12 @@ $not_pargb_WB0_D: ;; mm2 = gc->state.Viewport.hheight|gc->state.Viewport.hwidth ;; avail: eax, esi, mm0, mm3, mm4, mm5, mm6, mm7 -$inner_loop_WB0_D: +.inner_loop_WB0_D: mov esi, [gc+dataElem+tsuDatLstScal]; iscaler movd mm7, [edx+eax] ; TRI_SETF(FARRAY(vPtr, i) - movd mm6, atab[esi*4] ; atable[iscaler] - movd mm5, btab[esi*4] ; btable[iscaler] + movd mm6, [atab + esi*4] ; atable[iscaler] + movd mm5, [btab + esi*4] ; btable[iscaler] pfmul mm7, mm6 ; TRI_SETF(FARRAY(vPtr, i)*atable[iscaler] mov eax, [gc+dataElem+tsuDataList+4] @@ -3738,12 +3728,12 @@ $inner_loop_WB0_D: mov esi, [gc+dataElem+tsuDatLstScal+4]; next iscaler test eax, eax ; offset == 0 (end of offset list) ? - jz $clip_setup_end_WB1_D ; yup, vertex done, 1 DWORD in "write buffer" + jz .clip_setup_end_WB1_D ; yup, vertex done, 1 DWORD in "write buffer" movd mm4, [edx+eax] ; TRI_SETF(FARRAY(vPtr, i) - movd mm6, atab[esi*4] ; atable[iscaler] + movd mm6, [atab + esi*4] ; atable[iscaler] - movd mm5, btab[esi*4] ; btable[iscaler] + movd mm5, [btab + esi*4] ; btable[iscaler] add dataElem, 8 ; dataElem += 2 add fifo, 8 ; fifoPtr += 2*sizeof*FxFloat) @@ -3756,16 +3746,16 @@ $inner_loop_WB0_D: punpckldq mm7, mm4 ; current component | previous component movq [fifo-8], mm7 ; PCI write current component | previous component - jnz $inner_loop_WB0_D ; nope, vertex not done yet; "write buffer" empty + jnz .inner_loop_WB0_D ; nope, vertex not done yet; "write buffer" empty -$clip_setup_end_WB0_D: +.clip_setup_end_WB0_D: mov vertexCount, [vertices] ; get back number of vertices left to process mov eax, [gc + wInfo_offset] ; gc->state.vData.wInfo.offset dec vertexCount ; one less vertex to process, any left? - jnz $clip_for_begin_WB0_D ; yup, vertices left, push out next vertex + jnz .clip_for_begin_WB0_D ; yup, vertices left, push out next vertex -$clip_setup_done_D: +.clip_setup_done_D: mov eax, [gc + fifoPtr] ; old fifoPtr mov ebp, [gc + fifoRoom] ; old number of bytes available in fifo @@ -3782,13 +3772,13 @@ $clip_setup_done_D: mov [esp + _count], vertexCount; remaining number of vertices to process cmp vertexCount, 0 ; any vertices left to process - jg $clip_coords_begin_D ; loop if number of vertices to process >= 0 + jg .clip_coords_begin_D ; loop if number of vertices to process >= 0 femms ; no more MMX code; clear MMX/FPU state -endif ; GLIDE3_SCALER +%endif ; GLIDE3_SCALER -$tris_done: +.tris_done: pop ebp ; restore frame pointer pop ebx ; restore caller's register variable @@ -3796,40 +3786,33 @@ $tris_done: pop edi ; restore caller's register variable ret 12 ; return, pop 3 DWORD parameters -__grDrawTriangles_3DNow@12 ENDP - -_TEXT ENDS +endp ;;-------------------------------------------------------------------------- ;; end AMD3D version ;;-------------------------------------------------------------------------- -endif ; GL_AMD3D +%endif ; GL_AMD3D ;;-------------------------------------------------------------------------- ;; start original code ;;-------------------------------------------------------------------------- -ifndef GL_AMD3D +%ifndef GL_AMD3D -TITLE xdraw3.asm -.586P ;;; include listing.inc -INCLUDE fxgasm.h +%INCLUDE "fxgasm.h" -.model FLAT,C ; Flat memory, mangle publics with leading '_' +extrn _GlideRoot +extrn _FifoMakeRoom -EXTRN _GlideRoot:DWORD -EXTRN _FifoMakeRoom:NEAR +segment CONST +_F1 DD 1.0 +_F256 DD 256.0 -CONST SEGMENT -_F1 DD 03f800000r ; 1 -_F256 DD 043800000r ; 256 +_VPF1 DD 1.0 +_VPF256 DD 256.0 -_VPF1 DD 03f800000r ; 1 -_VPF256 DD 043800000r ; 256 -CONST ENDS - -_DATA SEGMENT +segment DATA vSize DD 0 ccoow DD 0 packetVal DD 0 @@ -3851,22 +3834,21 @@ yb DD 0 xc DD 0 yc DD 0 zArea DD 0 -_DATA ENDS -_TEXT SEGMENT -_pktype = 20 -_type = 24 -_mode = 28 -_count = 32 -_pointers = 36 +segment TEXT +%define _pktype 20 +%define _type 24 +%define _mode 28 +%define _count 32 +%define _pointers 36 -gc TEXTEQU ; points to graphics context -fifo TEXTEQU ; points to next entry in fifo -dlp TEXTEQU ; points to dataList structure -vertexCount TEXTEQU ; Current vertex counter in the packet -vertexPtr TEXTEQU ; Current vertex pointer +%define gc esi ; points to graphics context +%define fifo ecx ; points to next entry in fifo +%define dlp ebp ; points to dataList structure +%define vertexCount ebx ; Current vertex counter in the packet +%define vertexPtr edi ; Current vertex pointer -_drawvertexlist@20 PROC NEAR +proc _drawvertexlist, 20 ; 132 : { push esi @@ -3879,24 +3861,24 @@ _drawvertexlist@20 PROC NEAR ;;; if (stride == 0) ;;; stride = gc->state.vData.vStride; - mov gc, DWORD PTR _GlideRoot+curGC - mov ecx, DWORD PTR [gc+vertexSize] + mov gc, dword [_GlideRoot+curGC] + mov ecx, dword [gc+vertexSize] - mov edx, DWORD PTR [esp+_mode] - mov vertexCount, DWORD PTR [esp+_count] + mov edx, dword [esp+_mode] + mov vertexCount, dword [esp+_count] - mov vertexPtr, DWORD PTR [esp+_pointers] - mov DWORD PTR vSize, ecx + mov vertexPtr, dword [esp+_pointers] + mov dword [vSize], ecx shl edx, 2 ;;; mov ecx, DWORD PTR [gc+CoordinateSpace] test edx, edx - jne SHORT no_stride - mov edx, DWORD PTR [gc+vertexStride] + jne .no_stride + mov edx, dword [gc+vertexStride] shl edx, 2 align 4 -no_stride: +.no_stride: ;;; Draw the first (or possibly only) set. This is necessary because ;;; the packet is 3_BDDDDDD, and in the next set, the packet is 3_DDDDDD @@ -3906,59 +3888,59 @@ no_stride: ;;; if (gc->state.grCoordinateSpaceArgs.coordinate_space_mode == GR_WINDOW_COORDS) { ;;; test ecx, ecx - mov DWORD PTR strideinbytes, edx + mov dword [strideinbytes], edx ;;; while (count > 0) { ;;; FxI32 k, vcount = count >= 15 ? 15 : count; ;;; GR_SET_EXPECTED_SIZE(vcount * vSize, 1); ;;; TRI_STRIP_BEGIN(type, vcount, vSize, pktype); - mov eax, DWORD PTR [esp+_count] + mov eax, dword [esp+_count] ;;; jne clip_coordinates test eax, eax - jle strip_done + jle .strip_done align 4 -window_coords_begin: +.window_coords_begin: cmp vertexCount, 15 ; 0000000fH - jl SHORT win_partial_packet + jl .win_partial_packet mov vertexCount, 15 ; 0000000fH align 4 -win_partial_packet: +.win_partial_packet: - mov eax, DWORD PTR vSize - mov ecx, DWORD PTR [gc+fifoRoom] + mov eax, dword [vSize] + mov ecx, dword [gc+fifoRoom] imul eax, vertexCount add eax, 4 cmp ecx, eax - jge SHORT win_strip_begin - push @Line + jge .win_strip_begin + push __LINE__ push 0h push eax call _FifoMakeRoom add esp, 12 ; 0000000cH align 4 -win_strip_begin: +.win_strip_begin: ;;; Setup pacet header ;;; - mov fifo, DWORD PTR [gc+fifoPtr] + mov fifo, dword [gc+fifoPtr] mov eax, vertexCount - mov edx, DWORD PTR [esp+_type] - mov ebp, DWORD PTR [gc+cullStripHdr] + mov edx, dword [esp+_type] + mov ebp, dword [gc+cullStripHdr] shl edx, 22 ; 00000010H add fifo, 4 shl eax, 6 or ebp, edx or eax, ebp - mov edx, DWORD PTR [esp+_pktype] + mov edx, dword [esp+_pktype] or eax, edx nop - mov DWORD PTR [fifo-4], eax + mov dword [fifo-4], eax ;;; for (k = 0; k < vcount; k++) { ;;; FxI32 i; @@ -3974,31 +3956,31 @@ win_strip_begin: ;;; i = gc->tsuDataList[dataElem]; align 4 -win_for_begin: +.win_for_begin: mov edx, vertexPtr - mov eax, DWORD PTR strideinbytes + mov eax, dword [strideinbytes] cmp eax, 4 - jne SHORT win_no_deref - mov edx, DWORD PTR [vertexPtr] + jne .win_no_deref + mov edx, dword [vertexPtr] align 4 -win_no_deref: +.win_no_deref: add fifo, 8 add vertexPtr, eax - mov eax, DWORD PTR [edx] - mov ebp, DWORD PTR [edx+4] + mov eax, dword [edx] + mov ebp, dword [edx+4] - mov DWORD PTR [fifo-8], eax - mov eax, DWORD PTR [gc+tsuDataList] + mov dword [fifo-8], eax + mov eax, dword [gc+tsuDataList] - mov DWORD PTR [fifo-4], ebp + mov dword [fifo-4], ebp test eax, eax - lea dlp, DWORD PTR [gc+tsuDataList] - je SHORT win_datalist_end + lea dlp, [gc+tsuDataList] + je .win_datalist_end align 4 @@ -4008,25 +3990,25 @@ win_no_deref: ;;; i = gc->tsuDataList[dataElem]; ;;; } -win_datalist_begin: +.win_datalist_begin: add fifo, 4 add dlp, 4 - mov eax, DWORD PTR [edx+eax] + mov eax, dword [edx+eax] nop - mov DWORD PTR [fifo-4], eax - mov eax, DWORD PTR [dlp] + mov dword [fifo-4], eax + mov eax, dword [dlp] test eax, eax - jne SHORT win_datalist_begin -win_datalist_end: + jne .win_datalist_begin +.win_datalist_end: dec vertexCount - jne SHORT win_for_begin -win_for_end: + jne .win_for_begin +.win_for_end: ;;; TRI_END; ;;; Prepare for the next packet (if the strip size is longer than 15) @@ -4035,488 +4017,481 @@ win_for_end: ;;; pktype = SSTCP_PKT3_DDDDDD; ;;; } - mov eax, DWORD PTR [gc+fifoPtr] - mov edx, DWORD PTR [gc+fifoRoom] + mov eax, dword [gc+fifoPtr] + mov edx, dword [gc+fifoRoom] sub eax, fifo - mov vertexCount, DWORD PTR [esp+_count] + mov vertexCount, dword [esp+_count] add edx, eax sub vertexCount, 15 ; 0000000fH - mov DWORD PTR [gc+fifoRoom], edx - mov DWORD PTR [esp+_count], vertexCount + mov dword [gc+fifoRoom], edx + mov dword [esp+_count], vertexCount - mov DWORD PTR [gc+fifoPtr], fifo + mov dword [gc+fifoPtr], fifo test vertexCount, vertexCount - mov DWORD PTR [esp+_pktype], 16 ; 00000010H - jg window_coords_begin + mov dword [esp+_pktype], 16 ; 00000010H + jg .window_coords_begin -strip_done: +.strip_done: pop ebp pop ebx pop edi pop esi - ret 20 ; 00000014H + ret ; 00000014H -_drawvertexlist@20 ENDP +endp -_pktype = 20 -_type = 24 -_mode = 28 -_count = 32 -_pointers = 36 +%define gc esi ; points to graphics context +%define fifo ecx ; points to next entry in fifo +%define vertexPtr edx ; pointer to vertex or vertex array -gc TEXTEQU ; points to graphics context -fifo TEXTEQU ; points to next entry in fifo -vertexPtr TEXTEQU ; pointer to vertex or vertex array - -_vpdrawvertexlist@20 PROC NEAR +proc _vpdrawvertexlist, 20 push esi push edi push ebx - mov gc, DWORD PTR _GlideRoot+curGC + mov gc, dword [_GlideRoot+curGC] push ebp - mov ecx, DWORD PTR [esp+_mode] + mov ecx, dword [esp+_mode] - mov edi, DWORD PTR [esp+_pointers] - mov eax, DWORD PTR [gc+wInfo_offset] + mov edi, dword [esp+_pointers] + mov eax, dword [gc+wInfo_offset] test ecx, ecx - je w_no_dref + je .w_no_dref - mov edi, DWORD PTR [edi] + mov edi, dword [edi] align 4 -w_no_dref: +.w_no_dref: ;;; load first w - fld DWORD PTR [edi+eax] - fdivr DWORD PTR _F1 + fld dword [edi+eax] + fdivr dword [_F1] - mov ecx, DWORD PTR [gc+vertexSize] - mov edx, DWORD PTR [esp+_mode] + mov ecx, dword [gc+vertexSize] + mov edx, dword [esp+_mode] - mov edi, DWORD PTR [esp+_count] + mov edi, dword [esp+_count] ;;; mov vertexArray, DWORD PTR [esp+_pointers] shl edx, 2 - mov DWORD PTR vSize, ecx + mov dword [vSize], ecx test edx, edx - jne SHORT no_stride + jne .no_stride - mov edx, DWORD PTR [gc+vertexStride] + mov edx, dword [gc+vertexStride] shl edx, 2 align 4 -no_stride: +.no_stride: - mov DWORD PTR strideinbytes, edx - mov eax, DWORD PTR [esp+_type] + mov dword [strideinbytes], edx + mov eax, dword [esp+_type] shl eax, 16 ; 00000010H - mov DWORD PTR packetVal, eax + mov dword [packetVal], eax -clip_coords_begin: +.clip_coords_begin: cmp edi, 15 - jl SHORT clip_partial_packet + jl .clip_partial_packet mov edi, 15 -clip_partial_packet: +.clip_partial_packet: ;;; GR_SET_EXPECTED_SIZE(vcount * vSize, 1) - mov eax, DWORD PTR vSize - mov ecx, DWORD PTR [gc+fifoRoom] + mov eax, dword [vSize] + mov ecx, dword [gc+fifoRoom] imul eax, edi add eax, 4 cmp ecx, eax - jge SHORT clip_strip_begin - push @Line + jge .clip_strip_begin + push __LINE__ push 0h push eax call _FifoMakeRoom add esp, 12 ; 0000000cH -clip_strip_begin: +.clip_strip_begin: ;;; TRI_STRIP_BEGIN(type, vcount, vSize, pktype) - mov fifo, DWORD PTR [gc+fifoPtr] + mov fifo, dword [gc+fifoPtr] mov eax, edi - mov edx, DWORD PTR packetVal - mov ebp, DWORD PTR [gc+cullStripHdr] + mov edx, dword [packetVal] + mov ebp, dword [gc+cullStripHdr] or eax, edx add fifo, 4 shl eax, 6 - mov edx, DWORD PTR [esp+_pktype] + mov edx, dword [esp+_pktype] or eax, ebp or eax, edx - mov DWORD PTR [fifo-4], eax + mov dword [fifo-4], eax - mov vertexPtr, DWORD PTR [esp+_pointers] - mov eax, DWORD PTR [esp+_mode] + mov vertexPtr, dword [esp+_pointers] + mov eax, dword [esp+_mode] test eax, eax - je SHORT clip_for_begin - mov vertexPtr, DWORD PTR [vertexPtr] + je .clip_for_begin + mov vertexPtr, dword [vertexPtr] align 4 -clip_for_begin: +.clip_for_begin: add fifo, 8 - mov ebp, DWORD PTR strideinbytes + mov ebp, dword [strideinbytes] - add DWORD PTR [esp+_pointers], ebp - mov eax, DWORD PTR [gc+paramIndex] + add dword [esp+_pointers], ebp + mov eax, dword [gc+paramIndex] xor ebp, ebp - mov ebx, DWORD PTR [gc+tsuDataList] + mov ebx, dword [gc+tsuDataList] ;;; ; setup x and y - fld DWORD PTR [gc+vp_hwidth] - fmul DWORD PTR [vertexPtr] + fld dword [gc+vp_hwidth] + fmul dword [vertexPtr] test al, 3 - fld DWORD PTR [gc+vp_hheight] - fmul DWORD PTR [vertexPtr+4] + fld dword [gc+vp_hheight] + fmul dword [vertexPtr+4] fxch - fmul st, st(2) + fmul st0, st2 fxch - fmul st, st(2) + fmul st0, st2 fxch - fadd DWORD PTR [gc+vp_ox] + fadd dword [gc+vp_ox] fxch - fadd DWORD PTR [gc+vp_oy] - fxch st(2) - fstp DWORD PTR ccoow - fstp DWORD PTR [fifo-8] - fstp DWORD PTR [fifo-4] + fadd dword [gc+vp_oy] + fxch st2 + fstp dword [ccoow] + fstp dword [fifo-8] + fstp dword [fifo-4] ;;; ; set up color - je clip_setup_ooz + je .clip_setup_ooz - cmp DWORD PTR [gc+colorType], ebp - jne SHORT clip_setup_pargb + cmp dword [gc+colorType], ebp + jne .clip_setup_pargb test al, 1 - je SHORT clip_setup_a + je .clip_setup_a add fifo, 12 mov ebp, 3 - fld DWORD PTR _GlideRoot+pool_f255 - fmul DWORD PTR [ebx+vertexPtr] - fld DWORD PTR _GlideRoot+pool_f255 - fmul DWORD PTR [ebx+vertexPtr+4] - fld DWORD PTR _GlideRoot+pool_f255 - fmul DWORD PTR [ebx+vertexPtr+8] - fxch st(2) - fstp DWORD PTR [fifo-12] - fstp DWORD PTR [fifo-8] - fstp DWORD PTR [fifo-4] - mov ebx, DWORD PTR [gc+tsuDataList+12] + fld dword [_GlideRoot+pool_f255] + fmul dword [ebx+vertexPtr] + fld dword [_GlideRoot+pool_f255] + fmul dword [ebx+vertexPtr+4] + fld dword [_GlideRoot+pool_f255] + fmul dword [ebx+vertexPtr+8] + fxch st2 + fstp dword [fifo-12] + fstp dword [fifo-8] + fstp dword [fifo-4] + mov ebx, dword [gc+tsuDataList+12] align 4 -clip_setup_a: +.clip_setup_a: test al, 2 - je SHORT clip_setup_ooz + je .clip_setup_ooz add fifo, 4 inc ebp - fld DWORD PTR [ebx+vertexPtr] - fmul DWORD PTR _GlideRoot+pool_f255 - fstp DWORD PTR [fifo-4] + fld dword [ebx+vertexPtr] + fmul dword [_GlideRoot+pool_f255] + fstp dword [fifo-4] - mov ebx, DWORD PTR [gc+ebp*4+tsuDataList] - jmp SHORT clip_setup_ooz + mov ebx, dword [gc+ebp*4+tsuDataList] + jmp .clip_setup_ooz align 4 -clip_setup_pargb: +.clip_setup_pargb: add fifo, 4 - mov ebx, DWORD PTR [ebx+vertexPtr] + mov ebx, dword [ebx+vertexPtr] - mov DWORD PTR [fifo-4], ebx + mov dword [fifo-4], ebx nop mov ebp, 1 - mov ebx, DWORD PTR [gc+tsuDataList+4] -clip_setup_ooz: + mov ebx, dword [gc+tsuDataList+4] +.clip_setup_ooz: test al, 4 - je SHORT clip_setup_qow + je .clip_setup_qow add fifo, 4 inc ebp - fld DWORD PTR [ebx+vertexPtr] - fmul DWORD PTR [gc+vp_hdepth] - fmul DWORD PTR ccoow - fadd DWORD PTR [gc+vp_oz] - fstp DWORD PTR [fifo-4] + fld dword [ebx+vertexPtr] + fmul dword [gc+vp_hdepth] + fmul dword [ccoow] + fadd dword [gc+vp_oz] + fstp dword [fifo-4] - mov ebx, DWORD PTR [gc+ebp*4+tsuDataList] + mov ebx, dword [gc+ebp*4+tsuDataList] align 4 -clip_setup_qow: +.clip_setup_qow: test al, 8 - je SHORT clip_setup_qow0 + je .clip_setup_qow0 - mov ebx, DWORD PTR [gc+qInfo_mode] + mov ebx, dword [gc+qInfo_mode] test ebx, ebx - je SHORT clip_setup_oow - mov ebx, DWORD PTR [gc+qInfo_offset] + je .clip_setup_oow + mov ebx, dword [gc+qInfo_offset] - fld DWORD PTR [vertexPtr+ebx] - fmul DWORD PTR ccoow - fstp DWORD PTR [fifo] + fld dword [vertexPtr+ebx] + fmul dword [ccoow] + fstp dword [fifo] - jmp SHORT clip_setup_oow_inc + jmp .clip_setup_oow_inc align 4 -clip_setup_oow: - mov ebx, DWORD PTR ccoow +.clip_setup_oow: + mov ebx, dword [ccoow] - mov DWORD PTR [fifo], ebx + mov dword [fifo], ebx align 4 -clip_setup_oow_inc: +.clip_setup_oow_inc: - mov ebx, DWORD PTR [gc+ebp*4+tsuDataList+4] + mov ebx, dword [gc+ebp*4+tsuDataList+4] add fifo, 4 inc ebp align 4 -clip_setup_qow0: +.clip_setup_qow0: test al, 16 - je SHORT clip_setup_stow0 + je .clip_setup_stow0 - mov ebx, DWORD PTR [gc+q0Info_mode] + mov ebx, dword [gc+q0Info_mode] cmp ebx, 1 - jne SHORT clip_setup_oow0 + jne .clip_setup_oow0 - mov ebx, DWORD PTR [gc+q0Info_offset] + mov ebx, dword [gc+q0Info_offset] - fld DWORD PTR [ebx+vertexPtr] - fmul DWORD PTR ccoow - fstp DWORD PTR [fifo] + fld dword [ebx+vertexPtr] + fmul dword [ccoow] + fstp dword [fifo] - jmp SHORT clip_setup_oow0_inc + jmp .clip_setup_oow0_inc align 4 -clip_setup_oow0: - mov ebx, DWORD PTR ccoow +.clip_setup_oow0: + mov ebx, dword [ccoow] - mov DWORD PTR [fifo], ebx + mov dword [fifo], ebx align 4 -clip_setup_oow0_inc: - mov ebx, DWORD PTR [gc+ebp*4+tsuDataList+4] +.clip_setup_oow0_inc: + mov ebx, dword [gc+ebp*4+tsuDataList+4] add fifo, 4 inc ebp align 4 -clip_setup_stow0: +.clip_setup_stow0: test al, 32 - je SHORT clip_setup_qow1 + je .clip_setup_qow1 - fld DWORD PTR ccoow - fmul DWORD PTR [ebx+vertexPtr] + fld dword [ccoow] + fmul dword [ebx+vertexPtr] add fifo, 8 add ebp, 2 - fmul DWORD PTR [gc+tmu0_s_scale] - fld DWORD PTR ccoow - fmul DWORD PTR [ebx+vertexPtr+4] - mov ebx, DWORD PTR [gc+ebp*4+tsuDataList] - fmul DWORD PTR [gc+tmu0_t_scale] + fmul dword [gc+tmu0_s_scale] + fld dword [ccoow] + fmul dword [ebx+vertexPtr+4] + mov ebx, dword [gc+ebp*4+tsuDataList] + fmul dword [gc+tmu0_t_scale] fxch - fstp DWORD PTR [fifo-8] - fstp DWORD PTR [fifo-4] + fstp dword [fifo-8] + fstp dword [fifo-4] align 4 -clip_setup_qow1: +.clip_setup_qow1: test al, 64 - je SHORT clip_setup_stow1 + je .clip_setup_stow1 - mov ebx, DWORD PTR [gc+q1Info_mode] + mov ebx, dword [gc+q1Info_mode] cmp ebx, 1 - jne SHORT clip_setup_oow1 + jne .clip_setup_oow1 - mov ebx, DWORD PTR [gc+q1Info_offset] + mov ebx, dword [gc+q1Info_offset] - fld DWORD PTR [ebx+vertexPtr] - fmul DWORD PTR ccoow - fstp DWORD PTR [fifo] + fld dword [ebx+vertexPtr] + fmul dword [ccoow] + fstp dword [fifo] - jmp SHORT clip_setup_oow1_inc + jmp .clip_setup_oow1_inc align 4 -clip_setup_oow1: - mov ebx, DWORD PTR ccoow +.clip_setup_oow1: + mov ebx, dword [ccoow] - mov DWORD PTR [fifo], ebx + mov dword [fifo], ebx align 4 -clip_setup_oow1_inc: +.clip_setup_oow1_inc: - mov ebx, DWORD PTR [gc+ebp*4+tsuDataList+4] + mov ebx, dword [gc+ebp*4+tsuDataList+4] add fifo, 4 inc ebp align 4 -clip_setup_stow1: +.clip_setup_stow1: test al, 128 - je SHORT clip_setup_end + je .clip_setup_end - fld DWORD PTR ccoow - fmul DWORD PTR [ebx+vertexPtr] + fld dword [ccoow] + fmul dword [ebx+vertexPtr] add fifo, 8 - fmul DWORD PTR [gc+tmu1_s_scale] - fld DWORD PTR ccoow - fmul DWORD PTR [ebx+vertexPtr+4] - mov ebx, DWORD PTR [gc+ebp*4+tsuDataList+4] - fmul DWORD PTR [gc+tmu1_t_scale] + fmul dword [gc+tmu1_s_scale] + fld dword [ccoow] + fmul dword [ebx+vertexPtr+4] + mov ebx, dword [gc+ebp*4+tsuDataList+4] + fmul dword [gc+tmu1_t_scale] fxch - fstp DWORD PTR [fifo-8] - fstp DWORD PTR [fifo-4] + fstp dword [fifo-8] + fstp dword [fifo-4] align 4 -clip_setup_end: +.clip_setup_end: dec edi - jz clip_for_end + jz .clip_for_end - mov vertexPtr, DWORD PTR [esp+_pointers] - mov ebx, DWORD PTR [esp+_mode] + mov vertexPtr, dword [esp+_pointers] + mov ebx, dword [esp+_mode] test ebx, ebx - je SHORT w_clip_no_deref + je .w_clip_no_deref - mov vertexPtr, DWORD PTR [vertexPtr] + mov vertexPtr, dword [vertexPtr] align 4 -w_clip_no_deref: +.w_clip_no_deref: - mov ebx, DWORD PTR [gc+wInfo_offset] + mov ebx, dword [gc+wInfo_offset] - fld DWORD PTR [ebx+vertexPtr] - fdivr DWORD PTR _F1 + fld dword [ebx+vertexPtr] + fdivr dword [_F1] - jmp clip_for_begin + jmp .clip_for_begin align 4 -clip_for_end: +.clip_for_end: - mov ebx, DWORD PTR [gc+fifoPtr] - mov edx, DWORD PTR [gc+fifoRoom] + mov ebx, dword [gc+fifoPtr] + mov edx, dword [gc+fifoRoom] sub ebx, fifo - mov edi, DWORD PTR [esp+_count] + mov edi, dword [esp+_count] add edx, ebx sub edi, 15 ; 0000000fH - mov DWORD PTR [gc+fifoRoom], edx - mov DWORD PTR [esp+_count], edi + mov dword [gc+fifoRoom], edx + mov dword [esp+_count], edi - mov DWORD PTR [gc+fifoPtr], fifo - mov DWORD PTR [esp+_pktype], 16 ; 00000010H + mov dword [gc+fifoPtr], fifo + mov dword [esp+_pktype], 16 ; 00000010H - jle strip_done - mov edx, DWORD PTR [esp+_pointers] + jle .strip_done + mov edx, dword [esp+_pointers] - mov ebx, DWORD PTR [esp+_mode] + mov ebx, dword [esp+_mode] test ebx, ebx - je SHORT w1_clip_no_deref - mov edx, DWORD PTR [edx] + je .w1_clip_no_deref + mov edx, dword [edx] align 4 -w1_clip_no_deref: +.w1_clip_no_deref: - mov ebx, DWORD PTR [gc+wInfo_offset] - fld DWORD PTR [ebx+edx] - fdivr DWORD PTR _F1 + mov ebx, dword [gc+wInfo_offset] + fld dword [ebx+edx] + fdivr dword [_F1] - jmp clip_coords_begin + jmp .clip_coords_begin align 4 -strip_done: +.strip_done: pop ebp pop ebx pop edi pop esi - ret 20 ; 00000014H -_vpdrawvertexlist@20 ENDP + ret ; 00000014H +endp -gc TEXTEQU ; points to graphics context -fifo TEXTEQU ; points to next entry in fifo -vertexPtr TEXTEQU ; Current vertex pointer +%define gc esi ; points to graphics context +%define fifo ecx ; points to next entry in fifo +%define vertexPtr edi ; Current vertex pointer - PUBLIC _vptrisetup_nocull@12 -_vptrisetup_nocull@12 PROC NEAR -_va = 20 -_vb = 24 -_vc = 28 +proc _vptrisetup_nocull, 12 +_va equ 20 +_vb equ 24 +_vc equ 28 push ebx push esi push edi - mov gc, DWORD PTR _GlideRoot+curGC + mov gc, dword [_GlideRoot+curGC] - mov ecx, DWORD PTR [esp+_va-4] - mov eax, DWORD PTR [gc+wInfo_offset] + mov ecx, dword [esp+_va-4] + mov eax, dword [gc+wInfo_offset] push ebp nop ;;; ; oow[0] = 1.0f / FARRAY(va, gc->state.vData.wInfo.offset) - fld DWORD PTR [eax+ecx] + fld dword [eax+ecx] - fdivr DWORD PTR _F1 + fdivr dword [_F1] - mov ecx, DWORD PTR [esp+_vb] - mov ebx, DWORD PTR [esp+_vc] + mov ecx, dword [esp+_vb] + mov ebx, dword [esp+_vc] nop nop - mov ebp, DWORD PTR [eax+ecx] - mov edi, DWORD PTR [eax+ebx] + mov ebp, dword [eax+ecx] + mov edi, dword [eax+ebx] - mov DWORD PTR vPtr1, ebp - mov DWORD PTR vPtr2, edi + mov dword [vPtr1], ebp + mov dword [vPtr2], edi ;;; ; GR_SET_EXPECTED_SIZE(_GlideRoot.curTriSize, 1) - mov eax, DWORD PTR _GlideRoot+curTriSize - mov ecx, DWORD PTR [gc+fifoRoom] + mov eax, dword [_GlideRoot+curTriSize] + mov ecx, dword [gc+fifoRoom] add eax, 4 nop cmp ecx, eax - jge SHORT setup_pkt_hdr + jge .setup_pkt_hdr - push @Line ; line number inside this function + push __LINE__ ; line number inside this function push 0h ; pointer to function name = NULL push eax @@ -4525,265 +4500,265 @@ _vc = 28 add esp, 12 ; 0000000cH align 4 -setup_pkt_hdr: +.setup_pkt_hdr: ;;; ; TRI_STRIP_BEGIN(kSetupStrip, 3, gc->state.vData.vSize, SSTCP_PKT3_BDDBDD) - mov fifo, DWORD PTR [gc+fifoPtr] - mov eax, DWORD PTR [gc+cullStripHdr] + mov fifo, dword [gc+fifoPtr] + mov eax, dword [gc+cullStripHdr] add fifo, 4 - lea ebp, DWORD PTR [esp+_va] + lea ebp, [esp+_va] or eax, 192 ; 000000c0H mov edx, 0 - mov DWORD PTR [fifo-4], eax - mov vertexPtr, DWORD PTR [ebp] + mov dword [fifo-4], eax + mov vertexPtr, dword [ebp] - mov eax, DWORD PTR [gc+paramIndex] + mov eax, dword [gc+paramIndex] nop ;;; Begin loop align 4 -begin_for_loop: +.begin_for_loop: add edx, 4 add fifo, 8 xor ebx, ebx - mov ebp, DWORD PTR [gc+tsuDataList] + mov ebp, dword [gc+tsuDataList] ;;; ; setup x and y - fld DWORD PTR [gc+vp_hwidth] - fmul DWORD PTR [vertexPtr] + fld dword [gc+vp_hwidth] + fmul dword [vertexPtr] test al, 3 - fld DWORD PTR [gc+vp_hheight] - fmul DWORD PTR [vertexPtr+4] + fld dword [gc+vp_hheight] + fmul dword [vertexPtr+4] fxch - fmul st, st(2) + fmul st0, st2 fxch - fmul st, st(2) + fmul st0, st2 fxch - fadd DWORD PTR [gc+vp_ox] + fadd dword [gc+vp_ox] fxch - fadd DWORD PTR [gc+vp_oy] - fxch st(2) - fstp DWORD PTR oowa - fstp DWORD PTR [fifo-8] - fstp DWORD PTR [fifo-4] + fadd dword [gc+vp_oy] + fxch st2 + fstp dword [oowa] + fstp dword [fifo-8] + fstp dword [fifo-4] ;;; ; set up color - je clip_setup_ooz + je .clip_setup_ooz - cmp DWORD PTR [gc+colorType], ebx - jne SHORT clip_setup_pargb + cmp dword [gc+colorType], ebx + jne .clip_setup_pargb test al, 1 - je SHORT clip_setup_a + je .clip_setup_a add fifo, 12 add ebx, 3 - fld DWORD PTR _GlideRoot+pool_f255 - fmul DWORD PTR [vertexPtr+ebp] - fld DWORD PTR _GlideRoot+pool_f255 - fmul DWORD PTR [vertexPtr+ebp+4] - fld DWORD PTR _GlideRoot+pool_f255 - fmul DWORD PTR [vertexPtr+ebp+8] - fxch st(2) - fstp DWORD PTR [fifo-12] - fstp DWORD PTR [fifo-8] - fstp DWORD PTR [fifo-4] - mov ebp, DWORD PTR [gc+tsuDataList+12] + fld dword [_GlideRoot+pool_f255] + fmul dword [vertexPtr+ebp] + fld dword [_GlideRoot+pool_f255] + fmul dword [vertexPtr+ebp+4] + fld dword [_GlideRoot+pool_f255] + fmul dword [vertexPtr+ebp+8] + fxch st2 + fstp dword [fifo-12] + fstp dword [fifo-8] + fstp dword [fifo-4] + mov ebp, dword [gc+tsuDataList+12] align 4 -clip_setup_a: +.clip_setup_a: test al, 2 - je SHORT clip_setup_ooz + je .clip_setup_ooz add fifo, 4 inc ebx - fld DWORD PTR [vertexPtr+ebp] - fmul DWORD PTR _GlideRoot+pool_f255 - fstp DWORD PTR [fifo-4] + fld dword [vertexPtr+ebp] + fmul dword [_GlideRoot+pool_f255] + fstp dword [fifo-4] - mov ebp, DWORD PTR [gc+ebx*4+tsuDataList] - jmp SHORT clip_setup_ooz + mov ebp, dword [gc+ebx*4+tsuDataList] + jmp .clip_setup_ooz align 4 -clip_setup_pargb: +.clip_setup_pargb: add fifo, 4 - mov ebx, DWORD PTR [vertexPtr+ebp] + mov ebx, dword [vertexPtr+ebp] - mov DWORD PTR [fifo-4], ebx + mov dword [fifo-4], ebx nop mov ebx, 1 - mov ebp, DWORD PTR [gc+tsuDataList+4] + mov ebp, dword [gc+tsuDataList+4] align 4 -clip_setup_ooz: +.clip_setup_ooz: test al, 4 - je SHORT clip_setup_qow + je .clip_setup_qow add fifo, 4 inc ebx - fld DWORD PTR [vertexPtr+ebp] - fmul DWORD PTR [gc+vp_hdepth] - fmul DWORD PTR oowa - fadd DWORD PTR [gc+vp_oz] - fstp DWORD PTR [fifo-4] + fld dword [vertexPtr+ebp] + fmul dword [gc+vp_hdepth] + fmul dword [oowa] + fadd dword [gc+vp_oz] + fstp dword [fifo-4] - mov ebp, DWORD PTR [gc+ebx*4+tsuDataList] + mov ebp, dword [gc+ebx*4+tsuDataList] align 4 -clip_setup_qow: +.clip_setup_qow: test al, 8 - je SHORT clip_setup_qow0 + je .clip_setup_qow0 - cmp DWORD PTR [gc+qInfo_mode], 1 - jne SHORT clip_setup_oow + cmp dword [gc+qInfo_mode], 1 + jne .clip_setup_oow - mov ebp, DWORD PTR [gc+qInfo_offset] - fld DWORD PTR oowa - fmul DWORD PTR [ebp+vertexPtr] - fstp DWORD PTR [fifo] + mov ebp, dword [gc+qInfo_offset] + fld dword [oowa] + fmul dword [ebp+vertexPtr] + fstp dword [fifo] - jmp SHORT clip_setup_oow_inc + jmp .clip_setup_oow_inc align 4 -clip_setup_oow: +.clip_setup_oow: - mov ebp, DWORD PTR oowa + mov ebp, dword [oowa] - mov DWORD PTR [fifo], ebp + mov dword [fifo], ebp align 4 -clip_setup_oow_inc: - mov ebp, DWORD PTR [gc+ebx*4+tsuDataList+4] +.clip_setup_oow_inc: + mov ebp, dword [gc+ebx*4+tsuDataList+4] add fifo, 4 inc ebx align 4 -clip_setup_qow0: +.clip_setup_qow0: test al, 16 ; 00000010H - je SHORT clip_setup_stow0 + je .clip_setup_stow0 - cmp DWORD PTR [gc+q0Info_mode], 1 - jne SHORT clip_setup_oow0 + cmp dword [gc+q0Info_mode], 1 + jne .clip_setup_oow0 - mov ebp, DWORD PTR [gc+q0Info_offset] + mov ebp, dword [gc+q0Info_offset] - fld DWORD PTR oowa - fmul DWORD PTR [ebp+vertexPtr] - fstp DWORD PTR [fifo] + fld dword [oowa] + fmul dword [ebp+vertexPtr] + fstp dword [fifo] - jmp SHORT clip_setup_oow0_inc + jmp .clip_setup_oow0_inc align 4 -clip_setup_oow0: - mov ebp, DWORD PTR oowa +.clip_setup_oow0: + mov ebp, dword [oowa] - mov DWORD PTR [fifo], ebp + mov dword [fifo], ebp align 4 -clip_setup_oow0_inc: - mov ebp, DWORD PTR [gc+ebx*4+tsuDataList+4] +.clip_setup_oow0_inc: + mov ebp, dword [gc+ebx*4+tsuDataList+4] add fifo, 4 inc ebx align 4 -clip_setup_stow0: +.clip_setup_stow0: test al, 32 - je SHORT clip_setup_qow1 + je .clip_setup_qow1 - fld DWORD PTR oowa - fmul DWORD PTR [vertexPtr+ebp] + fld dword [oowa] + fmul dword [vertexPtr+ebp] add fifo, 8 add ebx, 2 - fmul DWORD PTR [gc+tmu0_s_scale] - fld DWORD PTR oowa - fmul DWORD PTR [vertexPtr+ebp+4] - mov ebp, DWORD PTR [gc+ebx*4+tsuDataList] - fmul DWORD PTR [gc+tmu0_t_scale] + fmul dword [gc+tmu0_s_scale] + fld dword [oowa] + fmul dword [vertexPtr+ebp+4] + mov ebp, dword [gc+ebx*4+tsuDataList] + fmul dword [gc+tmu0_t_scale] fxch - fstp DWORD PTR [fifo-8] - fstp DWORD PTR [fifo-4] + fstp dword [fifo-8] + fstp dword [fifo-4] align 4 -clip_setup_qow1: +.clip_setup_qow1: test al, 64 - je SHORT clip_setup_stow1 + je .clip_setup_stow1 - cmp DWORD PTR [gc+q1Info_mode], 1 - jne SHORT clip_setup_oow1 + cmp dword [gc+q1Info_mode], 1 + jne .clip_setup_oow1 - mov ebp, DWORD PTR [gc+q1Info_offset] + mov ebp, dword [gc+q1Info_offset] - fld DWORD PTR [ebp+vertexPtr] - fmul DWORD PTR oowa - fstp DWORD PTR [fifo] + fld dword [ebp+vertexPtr] + fmul dword [oowa] + fstp dword [fifo] - jmp SHORT clip_setup_oow1_inc + jmp .clip_setup_oow1_inc align 4 -clip_setup_oow1: - mov ebp, DWORD PTR oowa +.clip_setup_oow1: + mov ebp, dword [oowa] - mov DWORD PTR [fifo], ebp + mov dword [fifo], ebp align 4 -clip_setup_oow1_inc: - mov ebp, DWORD PTR [gc+ebx*4+tsuDataList+4] +.clip_setup_oow1_inc: + mov ebp, dword [gc+ebx*4+tsuDataList+4] add fifo, 4 inc ebx align 4 -clip_setup_stow1: +.clip_setup_stow1: test al, 128 - je SHORT clip_setup_end + je .clip_setup_end - fld DWORD PTR oowa - fmul DWORD PTR [vertexPtr+ebp] + fld dword [oowa] + fmul dword [vertexPtr+ebp] add fifo, 8 - fmul DWORD PTR [gc+tmu1_s_scale] - fld DWORD PTR oowa - fmul DWORD PTR [vertexPtr+ebp+4] - fmul DWORD PTR [gc+tmu1_t_scale] + fmul dword [gc+tmu1_s_scale] + fld dword [oowa] + fmul dword [vertexPtr+ebp+4] + fmul dword [gc+tmu1_t_scale] fxch - fstp DWORD PTR [fifo-8] - fstp DWORD PTR [fifo-4] + fstp dword [fifo-8] + fstp dword [fifo-4] align 4 -clip_setup_end: +.clip_setup_end: cmp edx, 12 - je update_fifo_ptr + je .update_fifo_ptr - fld DWORD PTR vPtr0[edx] - fdivr DWORD PTR _F1 + fld dword [vPtr0+edx] + fdivr dword [_F1] - lea ebx, DWORD PTR [esp+_va] - mov ebp, DWORD PTR [gc+wInfo_offset] + lea ebx, [esp+_va] + mov ebp, dword [gc+wInfo_offset] - mov vertexPtr, DWORD PTR [ebx+edx] - jmp begin_for_loop + mov vertexPtr, dword [ebx+edx] + jmp .begin_for_loop align 4 -update_fifo_ptr: +.update_fifo_ptr: - mov ebx, DWORD PTR [gc+fifoPtr] - mov edx, DWORD PTR [gc+fifoRoom] + mov ebx, dword [gc+fifoPtr] + mov edx, dword [gc+fifoRoom] sub ebx, fifo mov eax, 1 @@ -4791,11 +4766,11 @@ update_fifo_ptr: add edx, ebx pop ebp - mov DWORD PTR [gc+fifoRoom], edx + mov dword [gc+fifoRoom], edx pop edi - mov DWORD PTR [gc+fifoPtr], fifo - mov ebx, DWORD PTR _GlideRoot+trisProcessed + mov dword [gc+fifoPtr], fifo + mov ebx, dword [_GlideRoot+trisProcessed] ;;; ; _GlideRoot.stats.trisProcessed++ @@ -4803,63 +4778,59 @@ update_fifo_ptr: pop esi inc ebx - mov DWORD PTR _GlideRoot+trisProcessed, ebx + mov dword [_GlideRoot+trisProcessed], ebx pop ebx - ret 12 ; 0000000cH + ret ; 0000000cH -_vptrisetup_nocull@12 ENDP +endp - PUBLIC _vptrisetup_cull@12 -_vptrisetup_cull@12 PROC NEAR +proc _vptrisetup_cull, 12 -_va = 20 -_vb = 24 -_vc = 28 push ebx push esi push edi - mov gc, DWORD PTR _GlideRoot+curGC + mov gc, dword [_GlideRoot+curGC] - mov ecx, DWORD PTR [esp+_va-4] + mov ecx, dword [esp+_va-4] push ebp - mov eax, DWORD PTR [gc+wInfo_offset] + mov eax, dword [gc+wInfo_offset] nop ;;; ; oow[0] = 1.0f / FARRAY(va, gc->state.vData.wInfo.offset) - fld DWORD PTR [eax+ecx] + fld dword [eax+ecx] - fdivr DWORD PTR _F1 + fdivr dword [_F1] - mov DWORD PTR vPtr0, ecx + mov dword [vPtr0], ecx - mov ecx, DWORD PTR [esp+_vb] - mov ebx, DWORD PTR [esp+_vc] + mov ecx, dword [esp+_vb] + mov ebx, dword [esp+_vc] - fld DWORD PTR [eax+ecx] - fdivr DWORD PTR _F1 + fld dword [eax+ecx] + fdivr dword [_F1] - mov DWORD PTR vPtr1, ecx - mov DWORD PTR vPtr2, ebx + mov dword [vPtr1], ecx + mov dword [vPtr2], ebx - fld DWORD PTR [eax+ebx] - fdivr DWORD PTR _F1 - fxch st(2) ; oow2 oow1 oow0 + fld dword [eax+ebx] + fdivr dword [_F1] + fxch st2 ; oow2 oow1 oow0 ;;; ; GR_SET_EXPECTED_SIZE(_GlideRoot.curTriSize, 1) - mov eax, DWORD PTR _GlideRoot+curTriSize - mov ecx, DWORD PTR [gc+fifoRoom] + mov eax, dword [_GlideRoot+curTriSize] + mov ecx, dword [gc+fifoRoom] add eax, 4 nop cmp ecx, eax - jge SHORT setup_pkt_hdr + jge .setup_pkt_hdr - push @Line ; line number inside this function + push __LINE__ ; line number inside this function push 0h ; pointer to function name = NULL push eax @@ -4868,102 +4839,102 @@ _vc = 28 add esp, 12 ; 0000000cH align 4 -setup_pkt_hdr: +.setup_pkt_hdr: mov edx, [gc + cull_mode] shl edx, 31 ; culltest << 31 ;;; ; culling - mov vertexPtr, DWORD PTR [esp+_va] ; - fld DWORD PTR [gc+vp_hwidth] ; oow2 oow1 oow0 tx - fmul DWORD PTR [vertexPtr] ; | | | | - fld DWORD PTR [gc+vp_hheight] ; | | | | ty - fmul DWORD PTR [vertexPtr+4] ; | | | | | - fxch ; | | | ty tx - fmul st, st(2) ; | | | | | - fxch ; | | | tx ty - fmul st, st(2) ; | | | | | - fxch ; | | | ty tx - fadd DWORD PTR [gc+vp_ox] ; | | | | xa - fxch ; | | | xa ty - fadd DWORD PTR [gc+vp_oy] ; | | | | ya - fxch st(2) ; | | ya | oow0 - fstp DWORD PTR oow0 ; | | | | - fstp DWORD PTR xa ; | | | - fstp DWORD PTR ya ; | | + mov vertexPtr, dword [esp+_va] ; + fld dword [gc+vp_hwidth] ; oow2 oow1 oow0 tx + fmul dword [vertexPtr] ; | | | | + fld dword [gc+vp_hheight] ; | | | | ty + fmul dword [vertexPtr+4] ; | | | | | + fxch ; | | | ty tx + fmul st0, st2 ; | | | | | + fxch ; | | | tx ty + fmul st0, st2 ; | | | | | + fxch ; | | | ty tx + fadd dword [gc+vp_ox] ; | | | | xa + fxch ; | | | xa ty + fadd dword [gc+vp_oy] ; | | | | ya + fxch st2 ; | | ya | oow0 + fstp dword [oow0] ; | | | | + fstp dword [xa] ; | | | + fstp dword [ya] ; | | - mov vertexPtr, DWORD PTR [esp+_vb] ; - fld DWORD PTR [gc+vp_hwidth] ; oow2 oow1 tx - fmul DWORD PTR [vertexPtr] ; | | | - fld DWORD PTR [gc+vp_hheight] ; | | | ty - fmul DWORD PTR [vertexPtr+4] ; | | | | - fxch ; | | ty tx - fmul st, st(2) ; | | | | - fxch ; | | tx ty - fmul st, st(2) ; | | | | - fxch ; | | ty tx - fadd DWORD PTR [gc+vp_ox] ; | | | xb - fxch ; | | xb ty - fadd DWORD PTR [gc+vp_oy] ; | | | yb - fxch st(2) ; | yb | oow1 - fstp DWORD PTR oow1 ; | | | - fstp DWORD PTR xb ; | | - fstp DWORD PTR yb ; | + mov vertexPtr, dword [esp+_vb] ; + fld dword [gc+vp_hwidth] ; oow2 oow1 tx + fmul dword [vertexPtr] ; | | | + fld dword [gc+vp_hheight] ; | | | ty + fmul dword [vertexPtr+4] ; | | | | + fxch ; | | ty tx + fmul st0, st2 ; | | | | + fxch ; | | tx ty + fmul st0, st2 ; | | | | + fxch ; | | ty tx + fadd dword [gc+vp_ox] ; | | | xb + fxch ; | | xb ty + fadd dword [gc+vp_oy] ; | | | yb + fxch st2 ; | yb | oow1 + fstp dword [oow1] ; | | | + fstp dword [xb] ; | | + fstp dword [yb] ; | - mov vertexPtr, DWORD PTR [esp+_vc] ; - fld DWORD PTR [gc+vp_hwidth] ; | tx - fmul DWORD PTR [vertexPtr] ; | | - fld DWORD PTR [gc+vp_hheight] ; | | ty - fmul DWORD PTR [vertexPtr+4] ; | | | - fxch ; | ty tx - fmul st, st(2) ; | | | - fxch ; | tx ty - fmul st, st(2) ; | | | - fxch ; | ty tx - fadd DWORD PTR [gc+vp_ox] ; | | xc - fxch ; | xc ty - fadd DWORD PTR [gc+vp_oy] ; | | yc - fxch st(2) ; yc | oow2 - fstp DWORD PTR oow1 ; | | - fstp DWORD PTR xc ; | - fstp DWORD PTR yc ; + mov vertexPtr, dword [esp+_vc] ; + fld dword [gc+vp_hwidth] ; | tx + fmul dword [vertexPtr] ; | | + fld dword [gc+vp_hheight] ; | | ty + fmul dword [vertexPtr+4] ; | | | + fxch ; | ty tx + fmul st0, st2 ; | | | + fxch ; | tx ty + fmul st0, st2 ; | | | + fxch ; | ty tx + fadd dword [gc+vp_ox] ; | | xc + fxch ; | xc ty + fadd dword [gc+vp_oy] ; | | yc + fxch st2 ; yc | oow2 + fstp dword [oow1] ; | | + fstp dword [xc] ; | + fstp dword [yc] ; - fld DWORD PTR xa ; xa - fsub DWORD PTR xb ; dxAB - fld DWORD PTR yb ; | yb - fsub DWORD PTR yc ; | dyBC - fld DWORD PTR xb ; | | xb - fsub DWORD PTR xc ; | | dxBC - fxch ; | dxBC dyBC - fld DWORD PTR ya ; | | | ya - fsub DWORD PTR yb ; | | | dyAB - fxch st(3) ; dyAB | | dxAB - fmulp st(1), st ; | | t0=dxAB*dyBC - fxch st(2) ; t0 | dxAB - fmulp st(1), st ; t0 t1 - fsubp st(1),st ; area - fstp DWORD PTR zArea + fld dword [xa] ; xa + fsub dword [xb] ; dxAB + fld dword [yb] ; | yb + fsub dword [yc] ; | dyBC + fld dword [xb] ; | | xb + fsub dword [xc] ; | | dxBC + fxch ; | dxBC dyBC + fld dword [ya] ; | | | ya + fsub dword [yb] ; | | | dyAB + fxch st3 ; dyAB | | dxAB + fmulp st1, st0 ; | | t0=dxAB*dyBC + fxch st2 ; t0 | dxAB + fmulp st1, st0 ; t0 t1 + fsubp st1,st0 ; area + fstp dword [zArea] - mov ecx, DWORD PTR zArea ; j = *(long *)&area + mov ecx, dword [zArea] ; j = *(long *)&area xor eax, eax ; Clear the return value (0 == culled) ; Zero Area Triangle Check and ecx, 7fffffffh ; if ((j & 0x7FFFFFFF) == 0) - jz __triDone + jz .__triDone ;; Triangle area check vs culling mode - mov ecx, DWORD PTR zArea ; reload area just in case we're culling + mov ecx, dword [zArea] ; reload area just in case we're culling xor ecx, edx ; if (j ^ (culltest << 31)) - jge __triDone + jge .__triDone ;;; ; TRI_STRIP_BEGIN(kSetupStrip, 3, gc->state.vData.vSize, SSTCP_PKT3_BDDBDD) - mov fifo, DWORD PTR [gc+fifoPtr] - mov eax, DWORD PTR [gc+cullStripHdr] + mov fifo, dword [gc+fifoPtr] + mov eax, dword [gc+cullStripHdr] add fifo, 4 mov edx, 0 @@ -4971,233 +4942,233 @@ setup_pkt_hdr: or eax, 192 ; 000000c0H nop - mov DWORD PTR [fifo-4], eax - mov eax, DWORD PTR [gc+paramIndex] + mov dword [fifo-4], eax + mov eax, dword [gc+paramIndex] ;;; Begin loop align 4 -begin_for_loop: - mov ebp, DWORD PTR oow0[edx] +.begin_for_loop: + mov ebp, dword [oow0+edx] add fifo, 8 - mov vertexPtr, DWORD PTR vPtr0[edx] - mov DWORD PTR oowa, ebp + mov vertexPtr, dword [vPtr0+edx] + mov dword [oowa], ebp ;;; ; setup x and y - mov ebx, DWORD PTR xa[edx*2] - mov ebp, DWORD PTR xa[edx*2+4] + mov ebx, dword [xa+edx*2] + mov ebp, dword [xa+edx*2+4] - mov DWORD PTR [fifo-8], ebx + mov dword [fifo-8], ebx add edx, 4 - mov DWORD PTR [fifo-4], ebp + mov dword [fifo-4], ebp xor ebx, ebx - mov ebp, DWORD PTR [gc+tsuDataList] + mov ebp, dword [gc+tsuDataList] test al, 3 ;;; ; set up color - je clip_setup_ooz + je .clip_setup_ooz - cmp DWORD PTR [gc+colorType], ebx - jne SHORT clip_setup_pargb + cmp dword [gc+colorType], ebx + jne .clip_setup_pargb test al, 1 - je SHORT clip_setup_a + je .clip_setup_a add fifo, 12 add ebx, 3 - fld DWORD PTR _GlideRoot+pool_f255 - fmul DWORD PTR [vertexPtr+ebp] - fld DWORD PTR _GlideRoot+pool_f255 - fmul DWORD PTR [vertexPtr+ebp+4] - fld DWORD PTR _GlideRoot+pool_f255 - fmul DWORD PTR [vertexPtr+ebp+8] - fxch st(2) - fstp DWORD PTR [fifo-12] - fstp DWORD PTR [fifo-8] - fstp DWORD PTR [fifo-4] - mov ebp, DWORD PTR [gc+tsuDataList+12] + fld dword [_GlideRoot+pool_f255] + fmul dword [vertexPtr+ebp] + fld dword [_GlideRoot+pool_f255] + fmul dword [vertexPtr+ebp+4] + fld dword [_GlideRoot+pool_f255] + fmul dword [vertexPtr+ebp+8] + fxch st2 + fstp dword [fifo-12] + fstp dword [fifo-8] + fstp dword [fifo-4] + mov ebp, dword [gc+tsuDataList+12] align 4 -clip_setup_a: +.clip_setup_a: test al, 2 - je SHORT clip_setup_ooz + je .clip_setup_ooz add fifo, 4 inc ebx - fld DWORD PTR [vertexPtr+ebp] - fmul DWORD PTR _GlideRoot+pool_f255 - fstp DWORD PTR [fifo-4] + fld dword [vertexPtr+ebp] + fmul dword [_GlideRoot+pool_f255] + fstp dword [fifo-4] - mov ebp, DWORD PTR [gc+ebx*4+tsuDataList] - jmp SHORT clip_setup_ooz + mov ebp, dword [gc+ebx*4+tsuDataList] + jmp .clip_setup_ooz align 4 -clip_setup_pargb: +.clip_setup_pargb: add fifo, 4 - mov ebx, DWORD PTR [vertexPtr+ebp] + mov ebx, dword [vertexPtr+ebp] - mov DWORD PTR [fifo-4], ebx + mov dword [fifo-4], ebx nop mov ebx, 1 - mov ebp, DWORD PTR [gc+tsuDataList+4] + mov ebp, dword [gc+tsuDataList+4] align 4 -clip_setup_ooz: +.clip_setup_ooz: test al, 4 - je SHORT clip_setup_qow + je .clip_setup_qow add fifo, 4 inc ebx - fld DWORD PTR [vertexPtr+ebp] - fmul DWORD PTR [gc+vp_hdepth] - fmul DWORD PTR oowa - fadd DWORD PTR [gc+vp_oz] - fstp DWORD PTR [fifo-4] + fld dword [vertexPtr+ebp] + fmul dword [gc+vp_hdepth] + fmul dword [oowa] + fadd dword [gc+vp_oz] + fstp dword [fifo-4] - mov ebp, DWORD PTR [gc+ebx*4+tsuDataList] + mov ebp, dword [gc+ebx*4+tsuDataList] align 4 -clip_setup_qow: +.clip_setup_qow: test al, 8 - je SHORT clip_setup_qow0 + je .clip_setup_qow0 - cmp DWORD PTR [gc+qInfo_mode], 1 - jne SHORT clip_setup_oow + cmp dword [gc+qInfo_mode], 1 + jne .clip_setup_oow - mov ebp, DWORD PTR [gc+qInfo_offset] - fld DWORD PTR oowa - fmul DWORD PTR [ebp+vertexPtr] - fstp DWORD PTR [fifo] + mov ebp, dword [gc+qInfo_offset] + fld dword [oowa] + fmul dword [ebp+vertexPtr] + fstp dword [fifo] - jmp SHORT clip_setup_oow_inc + jmp .clip_setup_oow_inc align 4 -clip_setup_oow: +.clip_setup_oow: - mov ebp, DWORD PTR oowa + mov ebp, dword [oowa] - mov DWORD PTR [fifo], ebp + mov dword [fifo], ebp align 4 -clip_setup_oow_inc: - mov ebp, DWORD PTR [gc+ebx*4+tsuDataList+4] +.clip_setup_oow_inc: + mov ebp, dword [gc+ebx*4+tsuDataList+4] add fifo, 4 inc ebx align 4 -clip_setup_qow0: +.clip_setup_qow0: test al, 16 ; 00000010H - je SHORT clip_setup_stow0 + je .clip_setup_stow0 - cmp DWORD PTR [gc+q0Info_mode], 1 - jne SHORT clip_setup_oow0 + cmp dword [gc+q0Info_mode], 1 + jne .clip_setup_oow0 - mov ebp, DWORD PTR [gc+q0Info_offset] + mov ebp, dword [gc+q0Info_offset] - fld DWORD PTR oowa - fmul DWORD PTR [ebp+vertexPtr] - fstp DWORD PTR [fifo] + fld dword [oowa] + fmul dword [ebp+vertexPtr] + fstp dword [fifo] - jmp SHORT clip_setup_oow0_inc + jmp .clip_setup_oow0_inc align 4 -clip_setup_oow0: - mov ebp, DWORD PTR oowa +.clip_setup_oow0: + mov ebp, dword [oowa] - mov DWORD PTR [fifo], ebp + mov dword [fifo], ebp align 4 -clip_setup_oow0_inc: - mov ebp, DWORD PTR [gc+ebx*4+tsuDataList+4] +.clip_setup_oow0_inc: + mov ebp, dword [gc+ebx*4+tsuDataList+4] add fifo, 4 inc ebx align 4 -clip_setup_stow0: +.clip_setup_stow0: test al, 32 - je SHORT clip_setup_qow1 + je .clip_setup_qow1 - fld DWORD PTR oowa - fmul DWORD PTR [vertexPtr+ebp] + fld dword [oowa] + fmul dword [vertexPtr+ebp] add fifo, 8 add ebx, 2 - fmul DWORD PTR [gc+tmu0_s_scale] - fld DWORD PTR oowa - fmul DWORD PTR [vertexPtr+ebp+4] - mov ebp, DWORD PTR [gc+ebx*4+tsuDataList] - fmul DWORD PTR [gc+tmu0_t_scale] + fmul dword [gc+tmu0_s_scale] + fld dword [oowa] + fmul dword [vertexPtr+ebp+4] + mov ebp, dword [gc+ebx*4+tsuDataList] + fmul dword [gc+tmu0_t_scale] fxch - fstp DWORD PTR [fifo-8] - fstp DWORD PTR [fifo-4] + fstp dword [fifo-8] + fstp dword [fifo-4] align 4 -clip_setup_qow1: +.clip_setup_qow1: test al, 64 - je SHORT clip_setup_stow1 + je .clip_setup_stow1 - cmp DWORD PTR [gc+q1Info_mode], 1 - jne SHORT clip_setup_oow1 + cmp dword [gc+q1Info_mode], 1 + jne .clip_setup_oow1 - mov ebp, DWORD PTR [gc+q1Info_offset] + mov ebp, dword [gc+q1Info_offset] - fld DWORD PTR [ebp+vertexPtr] - fmul DWORD PTR oowa - fstp DWORD PTR [fifo] + fld dword [ebp+vertexPtr] + fmul dword [oowa] + fstp dword [fifo] - jmp SHORT clip_setup_oow1_inc + jmp .clip_setup_oow1_inc align 4 -clip_setup_oow1: - mov ebp, DWORD PTR oowa +.clip_setup_oow1: + mov ebp, dword [oowa] - mov DWORD PTR [fifo], ebp + mov dword [fifo], ebp align 4 -clip_setup_oow1_inc: - mov ebp, DWORD PTR [gc+ebx*4+tsuDataList+4] +.clip_setup_oow1_inc: + mov ebp, dword [gc+ebx*4+tsuDataList+4] add fifo, 4 inc ebx align 4 -clip_setup_stow1: +.clip_setup_stow1: test al, 128 - je SHORT clip_setup_end + je .clip_setup_end - fld DWORD PTR oowa - fmul DWORD PTR [vertexPtr+ebp] + fld dword [oowa] + fmul dword [vertexPtr+ebp] add fifo, 8 - fmul DWORD PTR [gc+tmu1_s_scale] - fld DWORD PTR oowa - fmul DWORD PTR [vertexPtr+ebp+4] - fmul DWORD PTR [gc+tmu1_t_scale] + fmul dword [gc+tmu1_s_scale] + fld dword [oowa] + fmul dword [vertexPtr+ebp+4] + fmul dword [gc+tmu1_t_scale] fxch - fstp DWORD PTR [fifo-8] - fstp DWORD PTR [fifo-4] + fstp dword [fifo-8] + fstp dword [fifo-4] align 4 -clip_setup_end: +.clip_setup_end: cmp edx, 12 - jl begin_for_loop + jl .begin_for_loop align 4 -update_fifo_ptr: +.update_fifo_ptr: - mov ebx, DWORD PTR [gc+fifoPtr] - mov edx, DWORD PTR [gc+fifoRoom] + mov ebx, dword [gc+fifoPtr] + mov edx, dword [gc+fifoRoom] sub ebx, fifo mov eax, 1 @@ -5205,11 +5176,11 @@ update_fifo_ptr: add edx, ebx pop ebp - mov DWORD PTR [gc+fifoRoom], edx + mov dword [gc+fifoRoom], edx pop edi - mov DWORD PTR [gc+fifoPtr], fifo - mov ebx, DWORD PTR _GlideRoot+trisProcessed + mov dword [gc+fifoPtr], fifo + mov ebx, dword [_GlideRoot+trisProcessed] ;;; ; _GlideRoot.stats.trisProcessed++ @@ -5217,12 +5188,12 @@ update_fifo_ptr: pop esi inc ebx - mov DWORD PTR _GlideRoot+trisProcessed, ebx + mov dword [_GlideRoot+trisProcessed], ebx pop ebx - ret 12 ; 0000000cH + ret ; 0000000cH align 4 -__triDone: +.__triDone: ;; Restore trashed registers mov esi, [_GlideRoot + trisProcessed] pop ebp @@ -5234,12 +5205,8 @@ __triDone: mov [_GlideRoot + trisProcessed], esi pop ebx - ret 12 + ret -_vptrisetup_cull@12 ENDP - -_TEXT ENDS -endif ; !GL_AMD3D - -END +endp +%endif ; !GL_AMD3D diff --git a/glide3x/cvg/glide3/src/xos.inc b/glide3x/cvg/glide3/src/xos.inc new file mode 100644 index 0000000..f4964bd --- /dev/null +++ b/glide3x/cvg/glide3/src/xos.inc @@ -0,0 +1,135 @@ +; +; compulsory header for cvg/glide3/xdraw* assembly specializations (NASM) +; +; $Header$ +; $Log$ +; Revision 1.1.2.2 2003/06/13 07:22:59 dborca +; more fixes to NASM sources +; +; Revision 1.1.2.1 2003/06/07 09:53:25 dborca +; initial checkin for NASM sources +; + +;--------------------------------------- +; platform defines +;--------------------------------------- +%define XOS_DJGPP 1 +%define XOS_LINUX 2 +%define XOS_WIN32 4 + +%define STDCALL 0 + +;--------------------------------------- +; pick up the right OS +;--------------------------------------- +%ifdef __DJGPP__ +%define XOS XOS_DJGPP +%elifdef __linux__ +%define XOS XOS_LINUX +%elifdef __WIN32__ +%define XOS XOS_WIN32 +%define STDCALL 1 +%else +%error Unknown OS +%endif + +;--------------------------------------- +; general purpose macros +;--------------------------------------- +%macro extrn 1-2 0 + %if (XOS == XOS_WIN32) && STDCALL && (%2 > 0) + %define %1 %1@%2 + %endif + extern %1 +%endmacro + +%macro globl 1-2 0 + %if (XOS == XOS_WIN32) && STDCALL && (%2 > 0) + %define %1 %1@%2 + %endif + global %1 +%endmacro + +%macro proc 1-2 0 + %push proc + %if STDCALL && (%2 > 0) + %define %$ret RET %2 + %else + %define %$ret RET + %endif + globl %1, %2 +%1: +%endmacro + +%macro endp 0 + %ifnctx proc + %error Mismatched `endp'/`proc' + %else + %pop + %endif +%endmacro + +%macro ret 0 + %ifnctx proc + RET + %else + %$ret + %endif +%endmacro + +%macro invoke 1-* + %rep %0 - 1 + %rotate -1 + push %1 + %endrep + %rotate -1 + call %1 +%if (STDCALL == 0) && (%0 > 1) + add esp, 4 * (%0 - 1) +%endif +%endmacro + +;--------------------------------------- +; Windows +;--------------------------------------- +%if XOS == XOS_WIN32 + +%define TEXT .text align=32 +%define DATA .data align=32 +%define CONST .rdata align=32 + +%macro GET_GC 0 + mov gc, [_GlideRoot + curGC] +%endmacro + +%endif + +;--------------------------------------- +; DJGPP +;--------------------------------------- +%if XOS == XOS_DJGPP + +%define TEXT .text +%define DATA .data +%define CONST .rodata + +%macro GET_GC 0 + mov gc, [_GlideRoot + curGC] +%endmacro + +%endif + +;--------------------------------------- +; Linux +;--------------------------------------- +%if XOS == XOS_LINUX + +%define TEXT .text align=32 +%define DATA .data align=32 +%define CONST .rodata align=32 + +%macro GET_GC 0 + mov gc, [_GlideRoot + curGC] +%endmacro + +%endif diff --git a/glide3x/cvg/glide3/src/xtexdl.asm b/glide3x/cvg/glide3/src/xtexdl.asm index 9520f2c..4ddd746 100644 --- a/glide3x/cvg/glide3/src/xtexdl.asm +++ b/glide3x/cvg/glide3/src/xtexdl.asm @@ -19,8 +19,45 @@ ;; $Header$ ;; $Revision$ ;; $Log$ +;; Revision 1.1.8.7 2003/09/12 05:08:35 koolsmoky +;; preparing for graphic context checks +;; +;; Revision 1.1.8.6 2003/07/07 23:29:06 koolsmoky +;; cleaned logs +;; +;; +;; Revision 1.1 2000/06/15 00:27:43 joseph +;; Initial checkin into SourceForge. ;; -;; 1 10/08/98 11:30a Brent +;; 10 8/17/99 6:35p Atai +;; fixed amd debug mode +;; +;; 9 4/08/99 1:22p Atai +;; added contect check for _grTexDownload_3DNow_MMX +;; +;; 8 3/19/99 11:26a Peter +;; expose direct fifo for gl +;; +;; 7 2/02/99 4:36p Peter +;; download through lfb rather than texture port +;; +;; 6 12/17/98 2:36p Atai +;; check in Norbert's fix for texture download width correction +;; +;; 5 12/07/98 11:33a Peter +;; norbert's re-fixes of my merge +;; +;; 4 11/02/98 5:34p Atai +;; merge direct i/o code +;; +;; 3 10/20/98 5:34p Atai +;; added #ifdefs for hwc +;; +;; 2 10/14/98 12:05p Peter +;; fixed my effed up assumption about non-volatile regs +;; +;; 1 10/09/98 6:48p Peter +;; 3DNow!(tm) version of wide texture downloads ;; ;; 3 10/07/98 9:43p Peter ;; triangle procs for 3DNow!(tm) @@ -32,247 +69,646 @@ ;; mmx stuff for 3DNow!(tm) capable processors ;; -TITLE xtexdl.asm -OPTION OLDSTRUCTS +%include "xos.inc" -.586P -.MMX -.K3D - -EXTRN __FifoMakeRoom: NEAR +extrn _FifoMakeRoom + +%MACRO _grCommandTransportMakeRoom 3 + push %3 + push %2 + push %1 + call _FifoMakeRoom + add esp, 12 +%ENDMACRO ; _grCommandTransportMakeRoom ;;; Definitions of cvg regs and glide root structures. -INCLUDE fxgasm.h +%INCLUDE "fxgasm.h" -; Arguments (STKOFF = 12 from 3 dword pushes) -STACKOFFSET = 12 -_gc$ = 4 + STACKOFFSET -_baseAddr$ = 8 + STACKOFFSET -_maxS$ = 12 + STACKOFFSET -_minT$ = 16 + STACKOFFSET -_maxT$ = 20 + STACKOFFSET -_texData$ = 24 + STACKOFFSET +; Arguments (STKOFF = 16 from 4 dword pushes) +STACKOFFSET equ 16 +_gc$ equ 4 + STACKOFFSET +_baseAddr$ equ 8 + STACKOFFSET +_maxS$ equ 12 + STACKOFFSET +_minT$ equ 16 + STACKOFFSET +_maxT$ equ 20 + STACKOFFSET +_texData$ equ 24 + STACKOFFSET - ;; NB: The first set of registers (eax-edx) are volatile across + ;; NB: The first set of registers (eax, ecx, and edx) are volatile across ;; function calls. The remaining registers are supposedly non-volatile ;; so they only store things that are non-volatile across the call. -fifo TEXTEQU ; Current fifo ptr in inner loop -texAddr TEXTEQU ; Physical download address of the current scanline -gc TEXTEQU ; Current graphics context -curS TEXTEQU ; Current texture scanline - -maxT TEXTEQU ; Max scanline line value (inclusive) -dataPtr TEXTEQU ; Current user texture data ptr -curT TEXTEQU ; Current s coordinate in inner loop -temp1 TEXTEQU curS -temp2 TEXTEQU texAddr -temp3 TEXTEQU gc - -GR_FIFO_WRITE MACRO __addr, __offset, __data - mov [__addr + __offset], __data -ENDM ; GR_FIFO_WRITE +%define fifo ebp ; fifo ptr in inner loop +%define gc esi ; graphics context +%define dataPtr edi ; pointer to exture data to be downloaded +%define curT ebx ; counter for texture scan lines (t-coordinate) +%define curS ecx ; texture s-coordinate +%define fRoom edx ; room available in fifo (in bytes) ;-------------------------------------------------------------------------- -_TEXT SEGMENT PAGE PUBLIC USE32 'CODE' - ASSUME DS: FLAT, SS: FLAT - ALIGN 32 +%IFNDEF GL_SSE2 - PUBLIC __grTexDownload_3DNow_MMX@24 -__grTexDownload_3DNow_MMX@24 PROC NEAR +;-------------------------------------------------------------------------- +; +; GL_AMD3D, GL_MMX +; +;-------------------------------------------------------------------------- - ;; Function prologue type things - ;; NB: We are not bothering to preserve the contents - ;; of eax, ebx, ecx, edx because they are volatile - ;; by convention. +segment TEXT + + ALIGN 32 + +%IFDEF GL_AMD3D +proc _grTexDownload_3DNow_MMX, 24 +%ENDIF +%IFDEF GL_MMX +proc _grTexDownload_MMX, 24 +%ENDIF + + push ebx ; save caller's register variable + mov curT, [esp + _maxT$ - 12] ; curT = maxT + + push esi ; save caller's register variable + mov eax, [esp + _minT$ - 8] ; minT + + push edi ; save caller's register variable + mov gc, [esp + _gc$ - 4] ; gc + + push ebp ; save caller's register variable + mov dataPtr, [esp + _texData$]; dataPtr + +%IFDEF GLIDE_ALT_TAB + test gc, gc + je .dlDone +; mov edx, [gc + windowed] +; test edx, 1 +; jnz .pastContextTest + mov edx, DWORD [gc+lostContext] + mov ecx, [edx] + test ecx, 1 + jnz .dlDone +;.pastContextTest: +%ENDIF + + sub curT, eax ; curT = maxT - minT + mov fifo, [gc + fifoPtr] ; fifoPtr + + mov curS, [esp + _maxS$] ; curS = maxS + add curT, 1 ; curT = maxT - minT + 1 + +%IFDEF GL_AMD3D + femms ; we'll use MMX/3DNow!, make sure FPU register cleared +%ENDIF +%IFDEF GL_MMX + emms ; we'll use MMX +%ENDIF + + mov edx, curS ; curS = maxS = scanline width in DWORDs + movd mm3, [esp + _baseAddr$] ; 0 | address of texture to download + + shl curS, 2 ; scan line width (in bytes) + mov eax, [esp + _minT$] ; 0 | minT + + mov [esp + _maxS$], curS ; save scan line width (in bytes) + shl edx, 3 ; packetHdr<21:3> = maxS = scanline width in DWORDs + + imul eax, curS ; TEX_ROW_ADDR_INCR(minT) = minT * TEX_ROW_ADDR_INCR(1) + + movd mm2, curS ; 0 | TEX_ROW_ADDR_INCR(1) + or edx, 00000005h ; packetHdr<31:30> = lfb port + ; packetHdr<21:3> = maxS + ; packetHdr<2:0> = packetType 5 + + movd mm1, edx ; 0 | packetHdr + movd mm4, eax ; 0 | TEX_ROW_ADDR_INCR(minT) + + psllq mm2, 32 ; TEX_ROW_ADDR_INCR(1) | 0 + paddd mm3, mm4 ; 0 | texAddr = texBaseAddr + TEX_ROW_ADDR_INCR(minT) + + mov fRoom, [gc + fifoRoom] ; get available fifoRoom (in bytes) + punpckldq mm1, mm3 ; hdr2 = texAddr | hdr1 = packetHdr + + ;; ebx = curT, edi = dataPtr, esi = gc, ebp = fifo, ecx = curS = maxS + ;; edx = fifoRoom, mm1 = texAddr|packetHdr, mm2 = TEX_ROW_ADDR_INCR(1)|0 + + test fifo, 4 ; is fifo QWORD aligned ? + jz .startDownload ; yup, start texture download + + cmp fRoom, 4 ; enough room for NULL packet in fifo? + jge .mmxAlignFifo ; yes, write NULL packet to align fifo + +%ifdef USE_PACKET_FIFO + _grCommandTransportMakeRoom 4, 0, __LINE__; make fifo room +%endif - ;; Enter 3DNow!(tm) state for the duration of the function - ;; because we don't use or call anything that uses fp. - femms - - mov gc, [esp + _gc$ - STACKOFFSET + 0] - push esi - - mov maxT, [esp + _maxT$ - STACKOFFSET + 4] - push edi + mov fifo, [gc + fifoPtr] ; fifoPtr modified by _grCommandTransportMakeRoom, reload - shl maxT, 9 ; Convert maxT to rowAddr format - push ebp - - mov dataPtr, [esp + _texData$] - mov curT, [esp + _minT$] + mov fRoom, [gc + fifoRoom] ; fifoRoom modified by _grCommandTransportMakeRoom, reload + mov curS, [esp + _maxS$] ; reload maxS (destroyed by call to _grCommandTransportMakeRoom) - ;; Pad out fifo so that we can use mmx writes the whole way w/o - ;; any intermediate tests in the inner loop for fifo alignment. - ;; Conveniently, the packet header is 2 dwords which matches - ;; the size of the mmx write. - mov fifo, [gc + fifoPtr]; Cache fifo ptr - mov texAddr, [esp + _baseAddr$]; Texture physical address + test fifo, 4 ; new fifoPtr QWORD aligned ? + jz .startDownload ; yup, start texture download - mov temp1, [esp + _maxS$]; Pre-convert maxS into packet 5 field format - sub texAddr, [gc + tex_ptr]; Convert to hw base relative address +.mmxAlignFifo: - shl temp1, 2 ; Write size dwords -> bytes - mov [esp + _baseAddr$], texAddr + mov DWORD [fifo], 0 ; write NULL packet + sub fRoom, 4 ; fifoRoom -= 4 - shl curT, 9 ; curT = TEX_ROW_ADDR_INCR(curT) - mov [esp + _maxS$], temp1; Write back converted s coordinate + mov [gc + fifoRoom], fRoom ; store new fifoRoom + add fifo, 4 ; fifoPtr += 4 - shl temp1, 1 ; Write size to packet 5 field format - test fifo, 4h ; Aligned fifo ptr? - - mov temp2, [gc + fifoRoom]; temp2 = gc->fifoRoom - mov [esp + _maxT$], temp1; Write back converted field format size - - jz __loopT +%IFDEF GLIDE_DEBUG + mov [gc + checkPtr], fifo ; checkPtr +%ENDIF - ;; Check to make sure there's room in the fifo. If not then - ;; we'll wrap and then it should be aligned for the remainder of - ;; this function invocation. - cmp temp2, 4h - jg __mmxAlignFifo + mov [gc + fifoPtr], fifo ; store new fifoPtr + jmp .startDownload ; fifo aligned, download texture now - push @Line ; Line # inside this function - push 0h ; NULL file name + align 32 - push 4h ; fifo space required - call __FifoMakeRoom ; Get fifo room + ;; ebx = curT, edi = dataPtr, esi = gc, ebp = fifo, ecx = maxS = curS + ;; edx=fifoRoom, mm1 = texAddr|packetHdr, mm2 = TEX_ROW_ADDR_INCR(1)|0 - ;; Calling out to external code means that our registers can get - ;; trashed in the same way that we trash things. Plus we need to - ;; re-cache the fifoPtr since we may have wrapped. +.loopT: - add esp, 12 ; Pop the 3 DWORDs for the fifoWrap parameters - mov gc, [esp + _gc$] +%IFDEF GLIDE_DEBUG - ;; Setup the regs to do the alignment - mov fifo, [gc + fifoPtr] - test fifo, 4h - - mov temp2, [gc + fifoRoom] - jz __loopT + ;; Make sure that we have a QWORD aligned fifoPtr; force GP if not aligned -__mmxAlignFifo: - add fifo, 4h ; packetPtr++ - xor temp1, temp1 ; Clear the nop packet - - mov [gc + fifoPtr], fifo; gc->fifoPtr = packetPtr - sub temp2, 4h ; fifoRoom -= 4 + test fifo, 4 ; is fifoPtr QWORD aligned ? + jz .alignmentOK ; yup, continue - mov [gc + fifoRoom], temp2; gc->fifoRoom = fifoRoom - GR_FIFO_WRITE fifo, -4, temp1; NOP packet(0) + xor eax, eax ; create 0 + mov [eax], eax ; move to DS:[0] forces GP +.alignmentOK: +%ENDIF ; GLIDE_DEBUG - align 4 -__loopT: - ;; Check for room to write the current texture scanline - mov temp1, [esp + _maxS$]; temp1 = width of scanline (bytes) - mov temp2, [gc + fifoRoom]; temp2 = gc->fifoRoom (bytes) - - add temp1, 0Ch ; scanline width + sizeof(packet hdr) (bytes) + nop packet to mmx align - cmp temp2, temp1 ; fifo space required >= space availible ? - - jge __dlBegin ; Yes, start download now w/ no more checking - - push @Line ; Line # inside this function - push 0h ; NULL file name - - push temp1 ; fifo space required - call __FifoMakeRoom ; Get fifo room - - add esp, 12 ; Pop the 3 DWORDs for the fifoWrap parameters - nop - - ;; Calling out to external code means that our registers can get - ;; trashed in the same way that we trash things. Plus we need to - ;; re-cache the fifoPtr since we may have wrapped. - mov gc, [esp + _gc$] - mov fifo, [gc + fifoPtr] - -IFDEF GLIDE_DEBUG - ;; Make sure that we have an mmx happy aligned fifoPtr - test fifo, 4 - jz @1 - - ;; Fault right away because this would be a huge suck - xor eax, eax - mov [eax], eax -@1: -ENDIF ; GLIDE_DEBUG - - align 4 -__dlBegin: - -IFDEF GLIDE_DEBUG - ;; Make sure that we have an mmx happy aligned fifoPtr - test fifo, 4 - jz @2 - - ;; Fault right away because this would be a huge suck - xor eax, eax - mov [eax], eax -@2: -ENDIF ; GLIDE_DEBUG - ;; Compute packet header words - ;; hdr1: downloadSpace[31:30] numWords[21:3] packetType[2:0] + ;; hdr1: downloadSpace[31:30] numWords[21:3] packetType[2:0] ;; hdr2: download address[29:0] - mov texAddr, [esp + _baseAddr$]; Download base address - mov temp1, [esp + _maxT$]; Pre-Converted # of words per packet/scanline - - mov temp3, 0C0000005h ; Base packet header (texture port | packet type 5) - add fifo, 8 ; Pre-increment fifo ptr (hdr1) - - or temp3, temp1 ; Base packet hdr | # of words - add texAddr, curT ; texAddr = texBaseAddr + TEX_ROW_ADDR_INCR(curT) - - GR_FIFO_WRITE fifo, -8, temp3; Write hdr1 - add curT, 200h ; curT += TEX_ROW_ADDR_INCR(1) - - GR_FIFO_WRITE fifo, -4, texAddr; write hdr2 - mov curS, [esp + _maxS$]; curS = maxS - align 4 + movq [fifo], mm1 ; store hdr2 | hdr1 + add fifo, 8 ; increment fifo ptr (hdr1 + hdr2) + ;; S coordinate inner loop unrolled for 8 texels a write -__loopS: - movq mm0, [dataPtr] ; load (mmx) 64 bit data (8 texels) - add fifo, 8h ; pre-increment fifoPtr += 2 * sizeof(FxU32) - add dataPtr, 8h ; dataPtr += 2 * sizeof(FxU32) - sub curS, 8h ; curS -= 2 * sizeof(FxU32) +.loopS: - movq [fifo - 8], mm0 ; *fifoPtr = texelData[64 bits] - jnz __loopS ; if curS > 0 + movq mm0, [dataPtr] ; load 64 bit data (8 texels) + add fifo, 8 ; pre-increment fifoPtr += 2 * sizeof(FxU32) - mov gc, [esp + _gc$] ; Re-cache gc which was trashed in the dl loop - mov temp1, fifo + add dataPtr, 8 ; dataPtr += 2 * sizeof(FxU32) + sub curS, 8 ; curS -= 2 * sizeof(FxU32) - ;; Update gc->fifoPtr and gc->fifoRoom for the wrap/stall check - mov temp2, [gc + fifoPtr] - sub temp1, temp2 ; # of bytes written to the fifo + movq [fifo - 8], mm0 ; *fifoPtr = texelData[64 bits] + jnz .loopS ; loop while curS > 0 - mov [gc + fifoPtr], fifo; gc->fifoPtr = packetPtr - mov temp2, [gc + fifoRoom] + mov ecx, [gc + fifoPtr] ; old fifo ptr + nop ; filler + + mov eax, fifo ; new fifo ptr + mov [gc + fifoPtr], fifo ; save new fifo ptr + +%IFDEF GLIDE_DEBUG + mov [gc + checkPtr], fifo ; checkPtr +%ENDIF + + sub eax, ecx ; new fifo ptr - old fifo ptr = fifo space used up + mov curS, [esp + _maxS$] ; curS = maxS = width of scanline (bytes) + + sub fRoom, eax ; new fifo space available = old fifo space available - fifo space used up = new fifo space available + sub curT, 1 ; curT-- + + mov [gc + fifoRoom], fRoom ; save new fifo space available + jz .dlDone ; loop while curT > 0 + + ;; Check for room to write the next texture scanline + + ;; ebx = curT, edi = dataPtr, esi = gc, ebp = fifo + ;; edx = fifoRoom, mm1 = texAddr|packetHdr, mm2 = TEX_ROW_ADDR_INCR(1)|0 + + paddd mm1, mm2 ; texAddr+=TEX_ROW_ADDR_INCR(1) | packetHdr + mov esp, esp ; filler +.startDownload: + lea eax, [curS+8] ; fifo space needed = scan line width + header size + + cmp fRoom, eax ; fifo space available >= fifo space required ? + jge .loopT ; yup, write next scan line + +%ifdef USE_PACKET_FIFO + _grCommandTransportMakeRoom eax, 0, __LINE__; make fifo room (if fifoPtr QWORD aligned before +%endif - sub temp2, temp1 ; # of bytes left in fifo - cmp curT, maxT ; if (curT <= maxT) ? + mov fifo, [gc + fifoPtr] ; fifoPtr was modified by _grCommandTransportMakeRoom, reload - mov [gc + fifoRoom], temp2 - jle __loopT + mov fRoom, [gc + fifoRoom] ; fifoRoom was modified by _grCommandTransportMakeRoom, reload + mov curS, [esp + _maxS$] ; curS = maxS = width of scanline (bytes) + jmp .loopT ; we now have enough fifo room, write next scanline -__dlDone: - align 4 +.dlDone: +%IFDEF GL_AMD3D + femms ; exit 3DNow!(tm) state +%ENDIF +%IFDEF GL_MMX + emms ; exit MMX state +%ENDIF - femms ; Exit 3DNow!(tm) state - pop ebp + pop ebp ; restore caller's register variable + pop edi ; restore caller's register variable - pop edi - pop esi + pop esi ; restore caller's register variable + pop ebx ; restore caller's register variable - ret 18h ; Pop 6 parameters and return + ret ; pop 6 DWORD parameters and return +endp -__grTexDownload_3DNow_MMX@24 ENDP +%ELSE ; !GL_SSE2 -_TEXT ENDS +;-------------------------------------------------------------------------- +; +; GL_SSE2 +; +;-------------------------------------------------------------------------- -END +segment TEXT + + ALIGN 32 + +proc _grTexDownload_SSE2_64, 24 + + push ebx ; save caller's register variable + mov curT, [esp + _maxT$ - 12] ; curT = maxT + + push esi ; save caller's register variable + mov eax, [esp + _minT$ - 8] ; minT + + push edi ; save caller's register variable + mov gc, [esp + _gc$ - 4] ; gc + + push ebp ; save caller's register variable + mov dataPtr, [esp + _texData$]; dataPtr + +%IFDEF GLIDE_ALT_TAB + test gc, gc + je .dlDone +; mov edx, [gc + windowed] +; test edx, 1 +; jnz .pastContextTest + mov edx, DWORD [gc+lostContext] + mov ecx, [edx] + test ecx, 1 + jnz .dlDone +;.pastContextTest: +%ENDIF + + sub curT, eax ; curT = maxT - minT + mov fifo, [gc + fifoPtr] ; fifoPtr + + mov curS, [esp + _maxS$] ; curS = maxS + add curT, 1 ; curT = maxT - minT + 1 + + mov edx, curS ; curS = maxS = scanline width in DWORDs + movd xmm3,[esp + _baseAddr$] ; 0 | 0 | 0 | address of texture to download + + shl curS, 2 ; scan line width (in bytes) + mov eax, [esp + _minT$] ; 0 | 0 | 0 | minT + + mov [esp + _maxS$], curS ; save scan line width (in bytes) + shl edx, 3 ; packetHdr<21:3> = maxS = scanline width in DWORDs + + imul eax, curS ; TEX_ROW_ADDR_INCR(minT) = minT * TEX_ROW_ADDR_INCR(1) + + movd xmm2,curS ; 0 | 0 | TEX_ROW_ADDR_INCR(1) + or edx, 00000005h ; packetHdr<31:30> = lfb port + ; packetHdr<21:3> = maxS + ; packetHdr<2:0> = packetType 5 + + movd xmm1,edx ; 0 | 0 | packetHdr + movd xmm4,eax ; 0 | 0 | TEX_ROW_ADDR_INCR(minT) + + psllq xmm2,32 ; 0 | 0 | TEX_ROW_ADDR_INCR(1) | 0 + paddd xmm3,xmm4 ; 0 | 0 | texAddr = texBaseAddr + TEX_ROW_ADDR_INCR(minT) + + mov fRoom, [gc + fifoRoom] ; get available fifoRoom (in bytes) + punpckldq xmm1,xmm3 ; 0 | 0 | hdr2 = texAddr | hdr1 = packetHdr + + ;; ebx = curT, edi = dataPtr, esi = gc, ebp = fifo, ecx = curS = maxS + ;; edx = fifoRoom, xmm1 = texAddr|packetHdr, xmm2 = TEX_ROW_ADDR_INCR(1)|0 + + test fifo, 4 ; is fifo QWORD aligned ? + jz .startDownload ; yup, start texture download + + cmp fRoom, 4 ; enough room for NULL packet in fifo? + jge .xmmAlignFifo ; yes, write NULL packet to align fifo + +%ifdef USE_PACKET_FIFO + _grCommandTransportMakeRoom 4, 0, __LINE__; make fifo room +%endif + + mov fifo, [gc + fifoPtr] ; fifoPtr modified by _grCommandTransportMakeRoom, reload + + mov fRoom, [gc + fifoRoom] ; fifoRoom modified by _grCommandTransportMakeRoom, reload + mov curS, [esp + _maxS$] ; reload maxS (destroyed by call to _grCommandTransportMakeRoom) + + test fifo, 4 ; new fifoPtr QWORD aligned ? + jz .startDownload ; yup, start texture download + +.xmmAlignFifo: + + mov DWORD [fifo], 0 ; write NULL packet + sub fRoom, 4 ; fifoRoom -= 4 + + mov [gc + fifoRoom], fRoom ; store new fifoRoom + add fifo, 4 ; fifoPtr += 4 + +%IFDEF GLIDE_DEBUG + mov [gc + checkPtr], fifo ; checkPtr +%ENDIF + + mov [gc + fifoPtr], fifo ; store new fifoPtr + jmp .startDownload ; fifo aligned, download texture now + + align 32 + + ;; ebx = curT, edi = dataPtr, esi = gc, ebp = fifo, ecx = maxS = curS + ;; edx=fifoRoom, xmm1 = texAddr|packetHdr, xmm2 = TEX_ROW_ADDR_INCR(1)|0 + +.loopT: + +%IFDEF GLIDE_DEBUG + + ;; Make sure that we have a QWORD aligned fifoPtr; force GP if not aligned + + test fifo, 4 ; is fifoPtr QWORD aligned ? + jz .alignmentOK ; yup, continue + + xor eax, eax ; create 0 + mov [eax], eax ; move to DS:[0] forces GP +.alignmentOK: +%ENDIF ; GLIDE_DEBUG + + ;; Compute packet header words + ;; hdr1: downloadSpace[31:30] numWords[21:3] packetType[2:0] + ;; hdr2: download address[29:0] + + movq [fifo],xmm1 ; store hdr2 | hdr1 + add fifo, 8 ; increment fifo ptr (hdr1 + hdr2) + + ;; S coordinate inner loop unrolled for 8 texels a write + +.loopS: + + movq xmm0,[dataPtr] ; load 64 bit data (8 texels) + add fifo, 8 ; pre-increment fifoPtr += 2 * sizeof(FxU32) + + add dataPtr, 8 ; dataPtr += 2 * sizeof(FxU32) + sub curS, 8 ; curS -= 2 * sizeof(FxU32) + + movq [fifo - 8],xmm0 ; *fifoPtr = texelData[64 bits] + jnz .loopS ; loop while curS > 0 + + mov ecx, [gc + fifoPtr] ; old fifo ptr + nop ; filler + + mov eax, fifo ; new fifo ptr + mov [gc + fifoPtr], fifo ; save new fifo ptr + +%IFDEF GLIDE_DEBUG + mov [gc + checkPtr], fifo ; checkPtr +%ENDIF + + sub eax, ecx ; new fifo ptr - old fifo ptr = fifo space used up + mov curS, [esp + _maxS$] ; curS = maxS = width of scanline (bytes) + + sub fRoom, eax ; new fifo space available = old fifo space available - fifo space used up = new fifo space available + sub curT, 1 ; curT-- + + mov [gc + fifoRoom], fRoom ; save new fifo space available + jz .dlDone ; loop while curT > 0 + + ;; Check for room to write the next texture scanline + + ;; ebx = curT, edi = dataPtr, esi = gc, ebp = fifo + ;; edx = fifoRoom, xmm1 = texAddr|packetHdr, xmm2 = TEX_ROW_ADDR_INCR(1)|0 + + paddd xmm1,xmm2 ; 0 | 0 | texAddr+=TEX_ROW_ADDR_INCR(1) | packetHdr + mov esp, esp ; filler +.startDownload: + lea eax, [curS+8] ; fifo space needed = scan line width + header size + + cmp fRoom, eax ; fifo space available >= fifo space required ? + jge .loopT ; yup, write next scan line + +%ifdef USE_PACKET_FIFO + _grCommandTransportMakeRoom eax, 0, __LINE__; make fifo room (if fifoPtr QWORD aligned before +%endif + + mov fifo, [gc + fifoPtr] ; fifoPtr was modified by _grCommandTransportMakeRoom, reload + + mov fRoom, [gc + fifoRoom] ; fifoRoom was modified by _grCommandTransportMakeRoom, reload + mov curS, [esp + _maxS$] ; curS = maxS = width of scanline (bytes) + jmp .loopT ; we now have enough fifo room, write next scanline + +.dlDone: + pop ebp ; restore caller's register variable + pop edi ; restore caller's register variable + + pop esi ; restore caller's register variable + pop ebx ; restore caller's register variable + + ret ; pop 6 DWORD parameters and return +endp + + + +segment TEXT + + ALIGN 32 + +proc _grTexDownload_SSE2_128, 24 + + push ebx ; save caller's register variable + mov curT, [esp + _maxT$ - 12] ; curT = maxT + + push esi ; save caller's register variable + mov eax, [esp + _minT$ - 8] ; minT + + push edi ; save caller's register variable + mov gc, [esp + _gc$ - 4] ; gc + + push ebp ; save caller's register variable + mov dataPtr, [esp + _texData$]; dataPtr + +%IFDEF GLIDE_ALT_TAB + test gc, gc + je .dlDone +; mov edx, [gc + windowed] +; test edx, 1 +; jnz .pastContextTest + mov edx, DWORD [gc+lostContext] + mov ecx, [edx] + test ecx, 1 + jnz .dlDone +;.pastContextTest: +%ENDIF + + sub curT, eax ; curT = maxT - minT + mov fifo, [gc + fifoPtr] ; fifoPtr + + mov curS, [esp + _maxS$] ; curS = maxS + add curT, 1 ; curT = maxT - minT + 1 + + mov edx, curS ; curS = maxS = scanline width in DWORDs + movd xmm3,[esp + _baseAddr$] ; 0 | 0 | 0 | address of texture to download + + shl curS, 2 ; scan line width (in bytes) + mov eax, [esp + _minT$] ; 0 | minT + + mov [esp + _maxS$], curS ; save scan line width (in bytes) + shl edx, 3 ; packetHdr<21:3> = maxS = scanline width in DWORDs + + imul eax, curS ; TEX_ROW_ADDR_INCR(minT) = minT * TEX_ROW_ADDR_INCR(1) + + movd xmm2,curS ; 0 | 0 | 0 | TEX_ROW_ADDR_INCR(1) + or edx, 00000005h ; packetHdr<31:30> = lfb port + ; packetHdr<21:3> = maxS + ; packetHdr<2:0> = packetType 5 + + movd xmm1,edx ; 0 | 0 | 0 | packetHdr + movd xmm4,eax ; 0 | 0 | 0 | TEX_ROW_ADDR_INCR(minT) + + psllq xmm2,32 ; 0 | 0 | TEX_ROW_ADDR_INCR(1) | 0 + paddd xmm3,xmm4 ; 0 | 0 | 0 | texAddr = texBaseAddr + TEX_ROW_ADDR_INCR(minT) + + mov fRoom, [gc + fifoRoom] ; get available fifoRoom (in bytes) + punpckldq xmm1,xmm3 ; 0 | 0 | hdr2 = texAddr | hdr1 = packetHdr + + ;; ebx = curT, edi = dataPtr, esi = gc, ebp = fifo, ecx = curS = maxS + ;; edx = fifoRoom, xmm1 = texAddr|packetHdr, xmm2 = TEX_ROW_ADDR_INCR(1)|0 + + test fifo, 4 ; is fifo QWORD aligned ? + jz .startDownload ; yup, start texture download + + cmp fRoom, 4 ; enough room for NULL packet in fifo? + jge .xmmAlignFifo ; yes, write NULL packet to align fifo + +%ifdef USE_PACKET_FIFO + _grCommandTransportMakeRoom 4, 0, __LINE__; make fifo room +%endif + + mov fifo, [gc + fifoPtr] ; fifoPtr modified by _grCommandTransportMakeRoom, reload + + mov fRoom, [gc + fifoRoom] ; fifoRoom modified by _grCommandTransportMakeRoom, reload + mov curS, [esp + _maxS$] ; reload maxS (destroyed by call to _grCommandTransportMakeRoom) + + test fifo, 4 ; new fifoPtr QWORD aligned ? + jz .startDownload ; yup, start texture download + +.xmmAlignFifo: + + mov DWORD [fifo], 0 ; write NULL packet + sub fRoom, 4 ; fifoRoom -= 4 + + mov [gc + fifoRoom], fRoom ; store new fifoRoom + add fifo, 4 ; fifoPtr += 4 + +%IFDEF GLIDE_DEBUG + mov [gc + checkPtr], fifo ; checkPtr +%ENDIF + + mov [gc + fifoPtr], fifo ; store new fifoPtr + jmp .startDownload ; fifo aligned, download texture now + + align 32 + + ;; ebx = curT, edi = dataPtr, esi = gc, ebp = fifo, ecx = maxS = curS + ;; edx=fifoRoom, xmm1 = texAddr|packetHdr, xmm2 = TEX_ROW_ADDR_INCR(1)|0 + +.loopT: + +%IFDEF GLIDE_DEBUG + + ;; Make sure that we have a QWORD aligned fifoPtr; force GP if not aligned + + test fifo, 4 ; is fifoPtr QWORD aligned ? + jz .alignmentOK ; yup, continue + + xor eax, eax ; create 0 + mov [eax], eax ; move to DS:[0] forces GP +.alignmentOK: +%ENDIF ; GLIDE_DEBUG + + ;; Compute packet header words + ;; hdr1: downloadSpace[31:30] numWords[21:3] packetType[2:0] + ;; hdr2: download address[29:0] + + movq [fifo],xmm1 ; store hdr2 | hdr1 + add fifo, 8 ; increment fifo ptr (hdr1 + hdr2) + + ;; S coordinate inner loop unrolled for 8 texels a write + +.loopS: + + movdqu xmm0, [dataPtr] ; load 128 bit data (8 texels) ; isn't 16 bytes aligned? + add fifo, 16 ; pre-increment fifoPtr += 4 * sizeof(FxU32) + + add dataPtr, 16 ; dataPtr += 4 * sizeof(FxU32) + sub curS, 16 ; curS -= 4 * sizeof(FxU32) + + movdqu [fifo - 16], xmm0 ; *fifoPtr = texelData[128 bits] ; isn't 16 bytes aligned? + jnz .loopS ; loop while curS > 0 + + mov ecx, [gc + fifoPtr] ; old fifo ptr + nop ; filler + + mov eax, fifo ; new fifo ptr + mov [gc + fifoPtr], fifo ; save new fifo ptr + +%IFDEF GLIDE_DEBUG + mov [gc + checkPtr], fifo ; checkPtr +%ENDIF + + sub eax, ecx ; new fifo ptr - old fifo ptr = fifo space used up + mov curS, [esp + _maxS$] ; curS = maxS = width of scanline (bytes) + + sub fRoom, eax ; new fifo space available = old fifo space available - fifo space used up = new fifo space available + sub curT, 1 ; curT-- + + mov [gc + fifoRoom], fRoom ; save new fifo space available + jz .dlDone ; loop while curT > 0 + + ;; Check for room to write the next texture scanline + + ;; ebx = curT, edi = dataPtr, esi = gc, ebp = fifo + ;; edx = fifoRoom, xmm1 = texAddr|packetHdr, xmm2 = TEX_ROW_ADDR_INCR(1)|0 + + paddd xmm1,xmm2 ; 0 | 0 | texAddr+=TEX_ROW_ADDR_INCR(1) | packetHdr + mov esp, esp ; filler +.startDownload: + lea eax, [curS+8] ; fifo space needed = scan line width + header size + + cmp fRoom, eax ; fifo space available >= fifo space required ? + jge .loopT ; yup, write next scan line + +%ifdef USE_PACKET_FIFO + _grCommandTransportMakeRoom eax, 0, __LINE__; make fifo room (if fifoPtr QWORD aligned before +%endif + + mov fifo, [gc + fifoPtr] ; fifoPtr was modified by _grCommandTransportMakeRoom, reload + + mov fRoom, [gc + fifoRoom] ; fifoRoom was modified by _grCommandTransportMakeRoom, reload + mov curS, [esp + _maxS$] ; curS = maxS = width of scanline (bytes) + jmp .loopT ; we now have enough fifo room, write next scanline + +.dlDone: + pop ebp ; restore caller's register variable + pop edi ; restore caller's register variable + + pop esi ; restore caller's register variable + pop ebx ; restore caller's register variable + + ret ; pop 6 DWORD parameters and return +endp + + +%ENDIF ; GL_SSE2 diff --git a/glide3x/cvg/glide3/tests/Makefile.DJ b/glide3x/cvg/glide3/tests/Makefile.DJ new file mode 100644 index 0000000..37c680b --- /dev/null +++ b/glide3x/cvg/glide3/tests/Makefile.DJ @@ -0,0 +1,54 @@ +# DOS/DJGPP tests makefile for Glide3 +# +# Copyright (c) 2002 - Borca Daniel +# Email : dborca@users.sourceforge.net +# Web : http://www.geocities.com/dborca +# +# $Header$ +# + + +# +# Available options: +# +# Environment variables: +# CPU optimize for the given processor. +# default = pentium +# DXE=1 use DXE modules. +# default = no +# +# Targets: +# build a specific file +# + + +.PHONY: all +.SUFFIXES: .c .o .exe +.SECONDARY: tlib.o + +FX_GLIDE_HW = cvg +TOP = ../../.. +CPU ?= pentium + +CC = gcc +CFLAGS = -Wall -O2 -ffast-math -mcpu=$(CPU) +CFLAGS += -I$(TOP)/$(FX_GLIDE_HW)/glide3/src -I$(TOP)/$(FX_GLIDE_HW)/incsrc +CFLAGS += -I$(TOP)/swlibs/fxmisc +CFLAGS += -D__DOS__ -DCVG +CFLAGS += -D__DOS32__ + +LDFLAGS = -s -L$(TOP)/$(FX_GLIDE_HW)/lib + +ifdef DXE +LDLIBS = -lgld3i +else +LDLIBS = -lgld3x +endif + +.c.o: + $(CC) -o $@ $(CFLAGS) -c $< +%.exe: tlib.o %.o + $(CC) -o $@ $(LDFLAGS) $^ $(LDLIBS) + +all: + $(error Must specify to build) diff --git a/glide3x/cvg/glide3/tests/makefile.linux b/glide3x/cvg/glide3/tests/makefile.linux index 0ad8911..98b6c7b 100644 --- a/glide3x/cvg/glide3/tests/makefile.linux +++ b/glide3x/cvg/glide3/tests/makefile.linux @@ -1,78 +1,48 @@ +# Linux tests makefile for Glide3 # -# Insert new header here +# Copyright (c) 2002 - Borca Daniel +# Email : dborca@users.sourceforge.net +# Web : http://www.geocities.com/dborca +# +# $Header$ # -LDIRT= $(wildcard *.exe *.map *.sys *.o *.a) - -# Special case rush because its built off of the sst1 tree -ifeq ($(FX_GLIDE_HW),SST96) -GLIDE_ROOT = $(BUILD_ROOT)/sst1 -else -GLIDE_ROOT = $(BUILD_ROOT)/$(FX_GLIDE_HW) -endif +# +# Available options: +# +# Environment variables: +# CPU optimize for the given processor. +# default = pentium +# +# Targets: +# build a specific file +# -LCINCS += -I$(BUILD_ROOT)/$(FX_GLIDE_HW)/include +.PHONY: all +.SUFFIXES: .c .o .exe +.SECONDARY: tlib.o -LIBOBJS = tlib.o +FX_GLIDE_HW = cvg +TOP = ../../.. +CPU ?= pentium -GLIDELIB = -L$(GLIDE_ROOT)/lib -lglide3 +CC = gcc +CFLAGS = -Wall -O2 -ffast-math -mcpu=$(CPU) +CFLAGS += -I$(TOP)/$(FX_GLIDE_HW)/glide3/src -I$(TOP)/$(FX_GLIDE_HW)/incsrc +CFLAGS += -I$(TOP)/swlibs/fxmisc +CFLAGS += -DCVG -LLDLIBS = $(LIBOBJS) $(GLIDELIB) +LDFLAGS = -s -L$(TOP)/$(FX_GLIDE_HW)/lib -ifeq ($(HAL_CSIM),1) -LLDLIBS += $(BUILD_ROOT)/$(FX_GLIDE_HW)/lib/lib$(FX_GLIDE_HW)hal.a -endif +LDLIBS = -lglide3 +LDLIBS += -lm -PRIVATE_HEADERS = tlib.h tlib.c tldata.inc +.c.o: + $(CC) -o $@ $(CFLAGS) -c $< +%.exe: tlib.o %.o + $(CC) -o $@ $(LDFLAGS) $^ $(LDLIBS) -CFILES = test00.c \ - test01.c \ - test02.c \ - test03.c \ - test04.c \ - test05.c \ - test06.c \ - test07.c \ - test08.c \ - test09.c \ - test10.c \ - test11.c \ - test12.c \ - test13.c \ - test14.c \ - test15.c \ - test16.c \ - test17.c \ - test18.c \ - test19.c \ - test20.c \ - test21.c \ - test22.c \ - test23.c \ - test24.c \ - test25.c \ - test26.c \ - test27.c \ - test28.c \ - test29.c \ - test30.c \ - test31.c \ - test32.c \ - test33.c \ - test34.c \ - test35.c \ - test36.c \ - display.c \ - sbench.c - -PROGRAMS = $(CFILES:.c=) - -DATAFILES = alpha.3df decal1.3df lava.3df light.3df matt1.3df miro.3df - -include $(BUILD_ROOT)/swlibs/include/make/3dfx.linux.mak - -$(PROGRAMS): $(LIBOBJS) - - +all: + $(error Must specify to build) diff --git a/glide3x/cvg/glide3/tests/tlib.c b/glide3x/cvg/glide3/tests/tlib.c index ceeb56f..46514c2 100644 --- a/glide3x/cvg/glide3/tests/tlib.c +++ b/glide3x/cvg/glide3/tests/tlib.c @@ -1636,6 +1636,12 @@ tlErrorMessage( char *err) { fprintf(stderr, err); } /* tlErrorMessage */ +FxU32 +tlGethWnd(void) +{ + return -1; +} + #elif __WIN32__ diff --git a/glide3x/cvg/init/fxremap.c b/glide3x/cvg/init/fxremap.c new file mode 100644 index 0000000..1204da4 --- /dev/null +++ b/glide3x/cvg/init/fxremap.c @@ -0,0 +1,884 @@ +#include +#include +#include +#ifndef __linux__ +#include +#endif +#include <3dfx.h> +#include + +#define null 0 +#define SIZE_SST1_NEEDED 0x100000 +#define END_ADDRESS 0x10000000 +#define S3_SHIFT 0x400000 + +struct RangeSTRUCT +{ + FxU32 address; + FxU32 range; + FxU32 id; + FxU32 is_voodoo; /* 1 = is voodoo, 2 = hidden SLI */ + FxU32 is_S3; + struct RangeSTRUCT *next; + struct RangeSTRUCT *prev; +}; + +int silent = 1; + +typedef struct RangeSTRUCT RangeStruct; + +void InitRemap(void); +void CloseRemap(void); +void GetMemoryMap(void); +void RemapVoodoo(RangeStruct *conflict); +void AdjustMapForS3(void); +RangeStruct *TestForConflicts(void); +void RemoveEntry(RangeStruct *del); +void InsertEntry(RangeStruct *ins); +FxBool FindHole(RangeStruct *conflict); +FxU32 SnapToDecentAddress(FxU32 address,RangeStruct *conflict); +FxBool fits_in_hole(RangeStruct *begin,FxU32 end,RangeStruct *hole,RangeStruct *conflict); +FxBool fits_under(RangeStruct *first,FxU32 minimum,RangeStruct *hole,RangeStruct *conflict); +FxU32 pciGetType(long i); +void pciGetRange(PciRegister reg,FxU32 device_number,FxU32 *data); +FxBool pciGetAddress(PciRegister reg,FxU32 device_number,FxU32 *data); + +void ForceCleanUp(void); +FxBool FindNecessaryCards(void); +void ProcessCommandLine(char **argv,int argc); +FxBool IsCardVoodoo(long i); +FxBool IsCardS3(long i); +FxBool ReadHex(char *string,FxU32 *num); +void AddMapEntry(FxU32 address,FxU32 range,FxU32 id,FxBool VoodooCard,FxBool S3Card); +void HandleMemoryOverlap(void); +FxBool overlap_map(RangeStruct *begin,FxU32 end); + +FxBool switch_S3_flag_ignore=FXFALSE; +//FxBool switch_force=FXFALSE; +FxBool switch_C0_bias=FXTRUE; +int switch_voodoo_loc = 0; +FxU32 num_voodoos=0; + +//#define TESTING 1 + +#ifdef TESTING +RangeStruct test_data[6]= {{0xF0000000,0x100000,1,0,0,0,0}, + {0xF3000000,0x200000,4,1,0,0,0}, + {0xF3000000,0x200000,8,0,0,0,0}, + {0xF5000000,0x200000,2,0,0,0,0}, + {0xE6000000,0x200000,5,0,0,0,0}, + {0xD3001000,0x200000,3,0,0,0,0}}; +#endif + +RangeStruct map[80]; +RangeStruct hole[80]; +long num_holes=0; +RangeStruct *first_entry; +RangeStruct *last_entry; +long entries=0; +RangeStruct master_hole; +long voodoo_loc; +FxU32 conflicts_found=0; + +void fxremap_dowork(int argc,char **argv,int doit_silently) +{ + RangeStruct *conflict; + + silent = doit_silently; + + ProcessCommandLine(argv,argc); + + + + InitRemap(); + + if (!FindNecessaryCards()) + { + if (!silent) { + printf("This program was only meant to be used with the 3dfx Voodoo chipset\n"); + printf("to correct possible pci address conflicts.\n"); + printf("No Voodoo chipset was detected\n"); + } + ForceCleanUp(); + } + + GetMemoryMap(); + + /* expand region of mapping for S3 card */ + AdjustMapForS3(); + /* see if we find any conflicts with any voodoo card */ + while (conflict=TestForConflicts()) + { + conflicts_found++; + /* since it is going to move */ + /* remove entry, so we can possibly use it as a hole */ + RemoveEntry(conflict); + if (FindHole(conflict)) + { + conflict->address=master_hole.address; + RemapVoodoo(conflict); + } + else + { + if (!silent) { + printf("Unable to find region to map conflicting board\n"); + } + ForceCleanUp(); + return; + } + } + + if (!conflicts_found) { + if (!silent) { + printf("No conflict with the Voodoo cards was found\n"); + } + } + CloseRemap(); +} + +void fxremap() { + fxremap_dowork(0,NULL,1); +} + +void fxremap_main(int argc,char **argv) { + fxremap_dowork(argc,argv,0); +} + + +void InitRemap(void) +{ + pciOpen(); +} + +void CloseRemap(void) +{ + // pciClose(); +} + +FxU32 pciGetConfigData_R(PciRegister reg, FxU32 devNum) { + FxU32 data; + + if (pciGetConfigData(reg,devNum,&data) == FXTRUE) { + return (data); + } else { + return (0xFFFFFFFF); + } +} + +#define PCI_NORMAL_TYPE 0 +#define PCI_BRIDGE_TYPE 1 + +void GetMemoryMap(void) +{ + FxU32 temp,temp2; + FxU32 type; + long devNum; + int fn; /* function number iterator */ + int maxFnNumber; + int multi_fn = 0; + +#ifdef TESTING + for (i=0;i<6;i++) + { + temp=test_data[i].address; + temp2=~(test_data[i].range - 0x1); + AddMapEntry(temp,temp2,test_data[i].id,test_data[i].is_voodoo,test_data[i].is_S3); + } +#else + for (devNum=0;devNumis_S3) + { + cur->address-=S3_SHIFT; + cur->range=S3_SHIFT<<1; + } + cur=cur->next; + } +} + +RangeStruct *TestForConflicts(void) +{ + RangeStruct *cur,*next; + + cur=first_entry; + + while(cur) + { + /* if this is a poorly mapped voodoo2 single board SLI, then remap */ + if ((cur->is_voodoo == 2) && (cur->address == 0xFF00000)) { + return (cur); + } + + if (cur->next) + { + if ((cur->address + cur->range) > cur->next->address) + { + next=cur->next; + if ((cur->is_voodoo)||(next->is_voodoo)) + { + if (cur->is_voodoo) + { + return cur; + } + return next; + } + else { + if (!silent) { + printf("FxRemap: Possible PCI conflict not with Voodoo device\n"); + printf("%X (%X) <-> %X:%X (%X)\n",cur->id, cur->address, + cur->next->id, cur->next->address); + } + } + } + } + else + { + if ((cur->address + cur->range) > END_ADDRESS) + return cur; + } + cur=cur->next; + } + return null; +} + +void AddMapEntry(FxU32 address,FxU32 range,FxU32 id,FxBool VoodooCard,FxBool S3Card) +{ + RangeStruct *temp,*cur,*next; + +//jcochrane@3dfx.com + long entry=0; + FxU32 tmp_address=0; +//END + + +#if 0 + static long test_entry=0; + + address=test_data[test_entry].address; + range=~(test_data[test_entry++].range - 0x1); +#endif + /* only if address != 0 */ + + +//jcochrane@3dfx.com +//check for duplicate entries in the map table,ignore if there is + + tmp_address=address>>4; + for(entry=0;entry>4; + map[entries].range=((~range)>>4)+0x1; + + map[entries].id=id; + map[entries].is_voodoo=VoodooCard; + map[entries].is_S3=S3Card; + + temp=&map[entries++]; + if (entries<=1) + { + first_entry=temp; + last_entry=temp; + temp->next=null; + temp->prev=null; + return; + } + + cur=first_entry; + next=null; + while(cur) + { + if (temp->address < cur->address) + { + next=cur; + break; + } + cur=cur->next; + } + if (next) + { + temp->next=next; + temp->prev=next->prev; + next->prev=temp; + if (next==first_entry) + first_entry=temp; + else + (temp->prev)->next=temp; + } + else + { + last_entry->next=temp; + temp->prev=last_entry; + last_entry=temp; + temp->next=null; + } + } +} + +void RemoveEntry(RangeStruct *del) +{ + RangeStruct *prev; + + if (!(del->next)) + { + if (!(del->prev)) + { + if (!silent) { + printf("FxRemap: No entries mapped\n"); + } + ForceCleanUp(); + return; + } + prev=del->prev; + last_entry=prev; + + prev->next=null; + del->prev=null; + del->next=null; + } + else + { + if (!(del->prev)) + { + del->next->prev=null; + first_entry=del->next; + } + else + { + del->next->prev=del->prev; + del->prev->next=del->next; + } + del->next=null; + del->prev=null; + } +} + +void InsertEntry(RangeStruct *ins) +{ + RangeStruct *cur; + + cur=first_entry; + + ins->next=null; + ins->prev=null; + + if (!first_entry) + { + first_entry=ins; + last_entry=ins; + return; + } + while(cur) + { + if (ins->address < cur->address) + { + ins->next=cur; + ins->prev=cur->prev; + cur->prev=ins; + + if (!ins->prev) + { + first_entry=ins; + } + else + { + (ins->prev)->next=ins; + } + return; + } + cur=cur->next; + } + + /* if it got this far it needs to go at the end */ + ins->prev=last_entry; + last_entry->next=ins; + last_entry=ins; +} + +FxU32 SnapToDecentAddress(FxU32 address,RangeStruct *conflict) +{ + FxU32 range; + FxU32 mask; + FxU32 not_mask; + + range=conflict->range; + if (range<0x10000) + range=0x10000; + mask=range; + mask-=1; + not_mask=~mask; + + if (address & mask) + { + address=(address & not_mask) + range; + } + return address; +} + +FxBool fits_in_hole(RangeStruct *begin,FxU32 end,RangeStruct *hole,RangeStruct *conflict) +{ + FxU32 address; + + address=begin->address+begin->range; + + address=SnapToDecentAddress(address,conflict); + + /* note could be <= */ + /* this is safer but more inefficient memory wise */ + if ((address+conflict->range)address=address; + hole->range=end-address; + return FXTRUE; + } + return FXFALSE; +} + +FxBool fits_under(RangeStruct *first,FxU32 minimum,RangeStruct *hole,RangeStruct *conflict) +{ + FxU32 address; + + address=minimum; + address=SnapToDecentAddress(address,conflict); + + if ((address+conflict->range) < first->address) + { + hole->address=address; + hole->range=first->address - address; + return FXTRUE; + } + return FXFALSE; +} + + +FxBool FindHole(RangeStruct *conflict) +{ + RangeStruct *cur; + + cur=first_entry; + + while(cur) + { + if (!(cur->next)) + { + if (fits_in_hole(cur,END_ADDRESS,&master_hole,conflict)) + { + return FXTRUE; + } + } + else + { + if (fits_in_hole(cur,cur->next->address,&master_hole,conflict)) + { + return FXTRUE; + } + } + cur=cur->next; + } + + /* see if we can find a whole located below addressed boards */ + /* don't want to go below 0xA000000 for addressing our boards */ + if (first_entry->address > 0xA000000) + { + if (fits_under(first_entry,0xA000000,&master_hole,conflict)) + return FXTRUE; + } + return FXFALSE; +} + +void RemapVoodoo(RangeStruct *conflict) +{ + FxU32 address; + + /* put conflict back into memory map */ + InsertEntry(conflict); + +#ifndef TESTING + address=(conflict->address)<<4; + pciSetConfigData(PCI_BASE_ADDRESS_0,conflict->id,&address); +#endif + if (!silent) { + printf("Remapped Voodoo Board to avoid a conflict\n"); + } +} + +void pciGetRange(PciRegister reg,FxU32 device_number,FxU32 *data) +{ + FxU32 temp=0xFFFFFFFF; + FxU32 size,save; + + pciGetConfigData(reg,device_number,&save); + pciSetConfigData(reg,device_number,&temp); + pciGetConfigData(reg,device_number,&size); + pciSetConfigData(reg,device_number,&save); + +#ifdef TESTING + printf("PciGetRange: save %08x \n",save); + printf("PciGetRange: temp %08x \n",temp); + printf("PciGetRange: size %08x \n",size); + printf("PciGetRange: save %08x \n",save); +#endif + *data=size; +} + +FxBool pciGetAddress(PciRegister reg,FxU32 device_number,FxU32 *data) +{ + pciGetConfigData(reg,device_number,data); + if ((*data)==0) + return FXFALSE; + if (*data & 0x01) + return FXFALSE; + return FXTRUE; +} + +void ForceCleanUp(void) +{ + // pciClose(); + // exit(1); +} + +FxBool FindNecessaryCards(void) +{ + FxBool voodoo_found=FXFALSE; + long i; + + for (i=0;i> 13) & 0x7; + int true_val; + + if (fn_num) { + true_val = 2; + } else { + true_val = 1; + } + + pciGetConfigData(PCI_VENDOR_ID,i,&vendor); + pciGetConfigData(PCI_DEVICE_ID,i,&dev_id); + /* if sst1 */ + if ((vendor==0x121a)&&(dev_id==0x0001)) + return FXTRUE; + /* if voodoo2 */ + if ((vendor==0x121a)&&(dev_id==0x0002)) { + if (true_val == 2) if (!silent) { printf("found voodoo2 hidden sli\n"); } + return true_val; + } + /* if banshee */ + if ((vendor==0x121a)&&(dev_id==0x0003)) + return FXTRUE; + /* if h4? or whatever is next */ + if ((vendor==0x121a)&&(dev_id==0x0004)) + return FXTRUE; + return FXFALSE; +} + +FxBool IsCardS3(long i) +{ + FxU32 vendor,dev_id; + + pciGetConfigData(PCI_VENDOR_ID,i,&vendor); + pciGetConfigData(PCI_DEVICE_ID,i,&dev_id); + if ((vendor==0x5333)&&((dev_id==0x88f0)||(dev_id==0x8880))) + return FXTRUE; + + return FXFALSE; +} + +FxBool ReadHex(char *string,FxU32 *num) +{ + long i=0; + FxU32 temp=0,temp2; + long num_count=0; + + /* bypass leading spaces */ + while((string[i])&&(string[i]==' ')) + i++; + /* verify leading 0x */ + if (string[i]=='0') + i++; + else + return FXFALSE; + if (string[i]=='x') + i++; + else + return FXFALSE; + + /* read in number */ + while(((string[i]>=0x30)&&(string[i]<0x3A))||((string[i]>=0x41)&&(string[i]<0x47))||((string[i]>=0x61)&&(string[i]<0x67))) + { + if ((string[i]>=0x30)&&(string[i]<0x3A)) + temp2=string[i] - 0x30; + else if ((string[i]>=0x41)&&(string[i]<0x47)) + temp2=string[i] - 0x37; + else if ((string[i]>=0x61)&&(string[i]<0x67)) + temp2=string[i] - 0x57; + if (num_count!=0) + temp=(temp<<4)+temp2; + else if (num_count<8) + temp=temp2; + else + return FXFALSE; + num_count++;i++; + } + *num=temp; + return FXTRUE; +} + +void HandleMemoryOverlap(void) +{ + RangeStruct *cur; + + cur=first_entry; + while(cur) + { + if (cur!=last_entry) + { + if (overlap_map(cur,cur->next->address)) + { + if (cur->range<(cur->next->address+cur->next->range-cur->address)) + cur->range=cur->next->address+cur->next->range-cur->address; + if (cur->next==last_entry) + { + last_entry=cur; + cur->next=null; + } + else + { + cur->next=cur->next->next; + cur->next->prev=cur; + } + } + else + cur=cur->next; + } + else + cur=cur->next; + } +} + +FxBool overlap_map(RangeStruct *begin,FxU32 end) +{ + if ((begin->address+begin->range)>end) + return FXTRUE; + return FXFALSE; +} diff --git a/glide3x/cvg/init/fxremap.h b/glide3x/cvg/init/fxremap.h new file mode 100644 index 0000000..bbd5d03 --- /dev/null +++ b/glide3x/cvg/init/fxremap.h @@ -0,0 +1,14 @@ +/* + * fxremap.h + * + * pci remapper, used to remap the single board SLI slave to a valid + * PCI address + */ + +#ifndef _FXREMAP_H_ +#define _FXREMAP_H_ + +void fxremap(void); +int fxremap_main(int argc, char **argv); + +#endif