Voodoo2 happiness (DJGPP & Linux)

This commit is contained in:
dborca
2003-11-03 13:34:30 +00:00
parent b19d6d3bf5
commit 2ff376b852
19 changed files with 3655 additions and 1791 deletions

View File

@@ -19,6 +19,9 @@
;;
;; $Header$
;; $Log$
;; Revision 1.1.1.1 1999/12/07 21:42:30 joseph
;; Initial checkin into SourceForge.
;;
;;
;; 1 10/08/98 11:30a Brent
;;
@@ -35,10 +38,7 @@
;;
;;
TITLE cpudtect.asm
.586P
.model FLAT,C ; Flat memory, mangle publics with leading '_'
%include "xos.inc"
;; Data for data segment goes here
;_DATA SEGMENT DWORD USE32 PUBLIC 'DATA';
@@ -46,14 +46,14 @@ TITLE cpudtect.asm
;;; Some useful constants
; CPU Type
CPUTypeUnknown = 0ffffffffh
CPUTypePrePent = 4h
CPUTypeP5 = 5h
CPUTypeP6 = 6h
CPUTypeUnknown equ 0ffffffffh
CPUTypePrePent equ 4h
CPUTypeP5 equ 5h
CPUTypeP6 equ 6h
;;; References to external data:
_TEXT SEGMENT
segment TEXT
;;
;; _cpu_detect_asm - detect the type of CPU
;;
@@ -63,10 +63,8 @@ _TEXT SEGMENT
;;
;; returns 4 for non-pen
PUBLIC _cpu_detect_asm
_cpu_detect_asm PROC NEAR
proc _cpu_detect_asm
P6Stuff:
.586
pushad ; save all regs.
; First, determine whether CPUID instruction is available.
@@ -177,45 +175,38 @@ UnknownVendor:
mov eax, 0ffffffffh
ret
_cpu_detect_asm ENDP
endp
;------------------------------------------------------------------------------
; this routine sets the precision to single
; which effects all adds, mults, and divs
align 4 ;
PUBLIC single_precision_asm
single_precision_asm PROC NEAR
.586
proc single_precision_asm
push eax ; make room
fnclex ; clear pending exceptions
fstcw WORD PTR [esp]
mov eax, DWORD PTR [esp]
fstcw WORD [esp]
mov eax, DWORD [esp]
and eax, 0000fcffh ; clear bits 9:8
mov DWORD PTR [esp], eax
fldcw WORD PTR [esp]
mov DWORD [esp], eax
fldcw WORD [esp]
pop eax
ret 0
single_precision_asm ENDP
ret
endp
;------------------------------------------------------------------------------
; this routine sets the precision to double
; which effects all adds, mults, and divs
align 4 ;
PUBLIC double_precision_asm
double_precision_asm PROC NEAR
.586
proc double_precision_asm
push eax ; make room
fnclex ; clear pending exceptions
fstcw WORD PTR [esp]
mov eax, DWORD PTR [esp]
fstcw WORD [esp]
mov eax, DWORD [esp]
and eax, 0000fcffh ; clear bits 9:8
or eax, 000002ffh ; set 9:8 to 10
mov DWORD PTR [esp], eax
fldcw WORD PTR [esp]
mov DWORD [esp], eax
fldcw WORD [esp]
pop eax
ret 0
double_precision_asm ENDP
_TEXT ENDS
END
ret
endp

View File

@@ -19,6 +19,9 @@
**
** $Header$
** $Log$
** Revision 1.1.1.1 1999/12/07 21:42:30 joseph
** Initial checkin into SourceForge.
**
**
** 1 10/08/98 11:30a Brent
**
@@ -476,10 +479,10 @@ GR_DIENTRY(grGet, FxU32, (FxU32 pname, FxU32 plength, FxI32 *params))
switch(hwc->SSTs[_GlideRoot.current_sst].type) {
case GR_SSTTYPE_VOODOO:
case GR_SSTTYPE_Voodoo2:
*params = hwc->SSTs[_GlideRoot.current_sst].sstBoard.VoodooConfig.fbRam;
*params = hwc->SSTs[_GlideRoot.current_sst].sstBoard.VoodooConfig.fbRam << 20;
break;
case GR_SSTTYPE_SST96:
*params = hwc->SSTs[_GlideRoot.current_sst].sstBoard.SST96Config.fbRam;
*params = hwc->SSTs[_GlideRoot.current_sst].sstBoard.SST96Config.fbRam << 20;
break;
default:
*params = 0; /* XXX UMA architecture */
@@ -493,10 +496,10 @@ GR_DIENTRY(grGet, FxU32, (FxU32 pname, FxU32 plength, FxI32 *params))
switch(hwc->SSTs[_GlideRoot.current_sst].type) {
case GR_SSTTYPE_VOODOO:
case GR_SSTTYPE_Voodoo2:
*params = hwc->SSTs[_GlideRoot.current_sst].sstBoard.VoodooConfig.tmuConfig[0].tmuRam;
*params = hwc->SSTs[_GlideRoot.current_sst].sstBoard.VoodooConfig.tmuConfig[0].tmuRam << 20;
break;
case GR_SSTTYPE_SST96:
*params = hwc->SSTs[_GlideRoot.current_sst].sstBoard.SST96Config.tmuConfig.tmuRam;
*params = hwc->SSTs[_GlideRoot.current_sst].sstBoard.SST96Config.tmuConfig.tmuRam << 20;
break;
default:
*params = 0; /* XXX UMA architecture */
@@ -772,6 +775,32 @@ GR_DIENTRY(grGetString, const char *, (FxU32 pname))
#undef FN_NAME
} /* grGetString */
/*-------------------------------------------------------------------
Function: grGetRegistryOrEnvironmentStringExt
Date: 4/17/2000
Implementor(s): atom
Description:
This is here so the spooky code for finding the correct registry
tweak path in 9x/NT/2K does not have to be duplicated in 3dfxogl.
Arguments: char* to the name of the setting to check for.
Return: char* to the requested entry either from the registry
or the environment settings. NULL on error.
-------------------------------------------------------------------*/
GR_DIENTRY(grGetRegistryOrEnvironmentString, char*, (char* theEntry))
{
#define FN_NAME "grGetRegistryOrEnvironmentString"
char* retval ;
retval = getenv(theEntry) ;
return retval ;
#undef FN_NAME
} /* grGetRegistryOrEnvironmentString */
/*-------------------------------------------------------------------
Function: grReset
Date: 16-Dec-97
@@ -881,6 +910,10 @@ GR_DIENTRY(grGetProcAddress, GrProc, (char *procName))
return (GrProc)_GlideRoot.deviceArchProcs.curLineProc;
if (!strcmp(procName, "guQueryResolutionXYExt"))
return (GrProc)guQueryResolutionXY;
if (!strcmp(procName, "grGetRegistryOrEnvironmentStringExt"))
return (GrProc)grGetRegistryOrEnvironmentString;
if (!strcmp(procName, "grTexDownloadTableExt"))
return (GrProc)grTexDownloadTableExt;
}
return NULL;

View File

@@ -19,6 +19,9 @@
**
** $Header$
** $Log$
** Revision 1.1.1.1.8.1 2003/06/29 18:45:55 guillemj
** Fixed preprocessor invalid token errors.
**
** Revision 1.1.1.1 1999/12/07 21:42:31 joseph
** Initial checkin into SourceForge.
**
@@ -507,6 +510,57 @@ GR_DIENTRY(grDepthBufferMode, void , (GrDepthBufferMode_t mode) )
#undef FN_NAME
} /* grDepthBufferMode */
/*-------------------------------------------------------------------
Function: grStipplePattern
Date: 23-Nov-2000
Implementor(s): alanh
Description:
Arguments:
Return:
-------------------------------------------------------------------*/
GR_DIENTRY(grStipplePattern, void , (GrStipplePattern_t stipple))
{
#define FN_NAME "grStipplePattern"
/* [dBorca] TODO
*
GR_BEGIN_NOFIFOCHECK("grStipplePattern\n", 85);
INVALIDATE(stipple);
STOREARG(grStipplePattern, stipple);
*/
#undef FN_NAME
} /* grStipplePattern */
/*-------------------------------------------------------------------
Function: grStippleMode
Date: 23-Nov-2000
Implementor(s): alanh
Description:
Arguments:
Return:
-------------------------------------------------------------------*/
GR_DIENTRY(grStippleMode, void , (GrStippleMode_t mode) )
{
#define FN_NAME "grStippleMode"
/* [dBorca] TODO
*
GR_BEGIN_NOFIFOCHECK("grStippleMode\n", 85);
INVALIDATE(fbzMode);
STOREARG(grStippleMode, mode);
*/
#undef FN_NAME
} /* grStippleMode */
/*-------------------------------------------------------------------
Function: grDitherMode

View File

@@ -37,7 +37,29 @@
* macros for creating assembler offset files
*----------------------------------------------------------------------*/
#ifndef __linux__
#if 1 /* defined(NASM) - default */
#define NEWLINE printf("\n")
#define COMMENT printf(";----------------------------------------------------------------------\n")
#define HEADER(str) NEWLINE; COMMENT; \
printf("; Assembler offsets for %s struct\n",str);\
COMMENT; NEWLINE
#define OFFSET(p,o,pname) if (hex) \
printf("%s\tequ %08xh\n",pname,((int)&p.o)-(int)&p); \
else printf("%s\tequ %10d\n",pname,((int)&p.o)-(int)&p)
#define OFFSET2(p,o,pname) if (hex) \
printf("%s\tequ %08xh\n",pname,((int)&o)-(int)&p); \
else printf("%s\tequ %10d\n",pname,((int)&o)-(int)&p)
#define SIZEOF(p,pname) if (hex) \
printf("SIZEOF_%s\tequ %08lxh\n",pname,sizeof(p)); \
else printf("SIZEOF_%s\tequ %10ld\n",pname,sizeof(p))
#else /* !NASM */
#if !defined(__linux__) && !defined(__DJGPP__)
#define NEWLINE printf("\n")
#define COMMENT printf(";----------------------------------------------------------------------\n")
@@ -57,7 +79,7 @@
printf("SIZEOF_%s\t= %08xh\n",pname,sizeof(p)); \
else printf("SIZEOF_%s\t= %10d\n",pname,sizeof(p))
#else
#else /* defined(__linux__) || defined(__DJGPP__) */
#define NEWLINE printf("\n");
#define COMMENT printf("/*----------------------------------------------------------------------*/\n")
@@ -67,17 +89,19 @@
COMMENT; NEWLINE
#define OFFSET(p,o,pname) if (hex) \
printf("#define %s %08x\n",pname,((int)&p.o)-(int)&p); \
printf("#define %s 0x%08x\n",pname,((int)&p.o)-(int)&p); \
else printf("#define %s %10d\n",pname,((int)&p.o)-(int)&p)
#define OFFSET2(p,o,pname) if (hex) \
printf("#define %s %08x\n",pname,((int)&o)-(int)&p); \
printf("#define %s 0x%08x\n",pname,((int)&o)-(int)&p); \
else printf("#define %s %10d\n",pname,((int)&o)-(int)&p)
#define SIZEOF(p,pname) if (hex) \
printf("#define SIZEOF_%s %08x\n",pname,sizeof(p)); \
printf("#define SIZEOF_%s 0x%08x\n",pname,sizeof(p)); \
else printf("#define SIZEOF_%s %10d\n",pname,sizeof(p))
#endif
#endif /* defined(__linux__) || defined(__DJGPP__) */
#endif /* defined(NASM)*/
int
main (int argc, char **argv)
@@ -87,7 +111,7 @@ main (int argc, char **argv)
static GrGC gc;
#if !GLIDE_HW_TRI_SETUP
static Sstregs sst;
static SstRegs sst;
static struct dataList_s dl;
#endif /* !GLIDE_HW_TRI_SETUP */
@@ -99,18 +123,18 @@ main (int argc, char **argv)
printf("#define __FX_INLINE_H__\n");
printf("\n");
printf("#define kCurGCOffset 0x%XUL\n",
printf("#define kCurGCOffset 0x%lXUL\n",
offsetof(struct _GlideRoot_s, curGC));
#if GLIDE_DISPATCH_SETUP
printf("#define kTriProcOffset 0x%XUL\n",
printf("#define kTriProcOffset 0x%lXUL\n",
offsetof(struct GrGC_s, curArchProcs.triSetupProc));
printf("#define kGCStateInvalid 0x%XUL\n",
printf("#define kGCStateInvalid 0x%lXUL\n",
offsetof(struct GrGC_s, state.invalid));
#endif /* GLIDE_DISPATCH_SETUP */
printf("/* The # of 2-byte entries in the hw fog table */\n");
printf("#define kInternalFogTableEntryCount 0x%XUL\n",
printf("#define kInternalFogTableEntryCount 0x%lXUL\n",
sizeof(dummyRegs.fogTable) >> 1);
printf("\n");

View File

@@ -19,6 +19,9 @@
**
** $Header$
** $Log$
** Revision 1.1.1.1.8.2 2003/07/25 07:13:41 dborca
** removed debris
**
** Revision 1.1.1.1.8.1 2003/06/29 18:45:55 guillemj
** Fixed preprocessor invalid token errors.
**
@@ -2937,7 +2940,7 @@ do { \
#define REG_GROUP_SETF_CLAMP(__regBase, __regAddr, __val) \
do { \
const FxU32 fpClampVal = FP_FLOAT_CLAMP(__val); \
REG_GROUP_ASSERT(__regAddr, fpClampVal, FXTRUE); \
REG_GROUP_ASSERT(__regAddr, fpClampVal, FXTRUE); \
SET(((FxU32*)(__regBase))[offsetof(SstRegs, __regAddr) >> 2], fpClampVal); \
GR_INC_SIZE(sizeof(FxU32)); \
} while(0)
@@ -3360,5 +3363,9 @@ extern void
_grSliOriginClear(void);
#endif /* (GLIDE_PLATFORM & GLIDE_HW_CVG) */
GR_ENTRY(grTexDownloadTableExt,
void,
(GrChipID_t tmu, GrTexTable_t type, void *data));
#endif /* __FXGLIDE_H__ */

View File

@@ -19,6 +19,9 @@
**
** $Header$
** $Log$
** Revision 1.1.1.1 1999/12/07 21:42:32 joseph
** Initial checkin into SourceForge.
**
**
** 1 10/08/98 11:30a Brent
**
@@ -390,18 +393,13 @@ GR_DIENTRY(gu3dfGetInfo, FxBool,
/*
** determine the color format of the input image
*/
#ifdef __GNUC__
/* This function is not found in libgcc.a */
{
char* tempStr = (char*)color_format;
while(*tempStr != '\0') *tempStr++ = toupper(*tempStr);
char *tempStr = (char*)color_format;
while (*tempStr != '\0') {
*tempStr = toupper(*tempStr);
tempStr++;
}
}
#else
{
extern char* strupr(char*);
strupr(color_format);
}
#endif /* __GNUC__ */
i = 0;
format_found = FXFALSE;

View File

@@ -52,6 +52,7 @@ extern "C" {
typedef FxU32 GrColor_t;
typedef FxU8 GrAlpha_t;
typedef FxU32 GrMipMapId_t;
typedef FxU32 GrStipplePattern_t;
typedef FxU8 GrFog_t;
typedef FxU32 GrContext_t;
typedef int (FX_CALL *GrProc)();
@@ -240,6 +241,11 @@ typedef FxI32 GrDitherMode_t;
#define GR_DITHER_2x2 0x1
#define GR_DITHER_4x4 0x2
typedef FxI32 GrStippleMode_t;
#define GR_STIPPLE_DISABLE 0x0
#define GR_STIPPLE_PATTERN 0x1
#define GR_STIPPLE_ROTATE 0x2
typedef FxI32 GrFogMode_t;
#define GR_FOG_DISABLE 0x0
#define GR_FOG_WITH_TABLE_ON_FOGCOORD_EXT 0x1

View File

@@ -19,6 +19,9 @@
**
** $Header$
** $Log$
** Revision 1.1.1.1 1999/12/07 21:42:33 joseph
** Initial checkin into SourceForge.
**
**
** 1 10/08/98 11:30a Brent
**
@@ -1684,6 +1687,28 @@ GR_ENTRY(grSstWinClose, FxBool, (GrContext_t context))
#undef FN_NAME
} /* grSstWinClose */
/*-------------------------------------------------------------------
Function: grSetNumPendingBuffers
Date: 13-Oct-2000
Implementor(s): mmcclure
Description:
Allow the application to supply the number of pending buffers
Arguments:
NumPendingBuffers - Sent to force number of pending buffers
Return:
-------------------------------------------------------------------*/
GR_DIENTRY(grSetNumPendingBuffers, void, (FxI32 NumPendingBuffers))
{
/* [dBorca] TODO
*
_GlideRoot.environment.swapPendingCount = NumPendingBuffers;
*/
}
/*-------------------------------------------------------------------
Function: grSelectContext
Date: 18-Jan-98

View File

@@ -19,6 +19,9 @@
**
** $Header$
** $Log$
** Revision 1.1.1.1 1999/12/07 21:42:34 joseph
** Initial checkin into SourceForge.
**
**
** 1 10/08/98 11:30a Brent
**
@@ -171,9 +174,297 @@ extern const int _grMipMapHostWH[G3_ASPECT_TRANSLATE(GR_ASPECT_1x8)+1][G3_LOD_TR
extern const int _grMipMapHostWH[G3_ASPECT_TRANSLATE(GR_ASPECT_LOG2_1x8)+1][G3_LOD_TRANSLATE(GR_LOD_LOG2_1)+1][2];
#endif
#ifndef GLIDE3_ALPHA
#define GLIDE_POINTCAST_PALETTE
#define GLIDE_POINTCAST_PALETTE 1
/*---------------------------------------------------------------------------
** _grTexDownloadNccTableExt
**
** Downloads an ncctable to the specified _physical_ TMU(s). This
** function is called internally by Glide and should not be executed
** by an application.
*/
GR_DDFUNC(_grTexDownloadNccTableExt,
void,
(GrChipID_t tmu, FxU32 which, const GuNccTable *table, int start, int end))
{
#define FN_NAME "_grTexDownloadNccTableExt"
GR_BEGIN_NOFIFOCHECK(FN_NAME,89);
GDBG_INFO_MORE(gc->myLevel,"(%d,%d, 0x%x, %d,%d)\n",tmu,which,table,start,end);
GR_ASSERT(start==0);
GR_ASSERT(end==11);
/* check for null pointer */
if (table == NULL) return;
_GlideRoot.stats.palDownloads++;
_GlideRoot.stats.palBytes += (end-start+1)<<2;
if (gc->tmu_state[tmu].ncc_table[which] != table) {
SstRegs* texHW;
int i;
#ifdef GLIDE_POINTCAST_PALETTE
texHW = SST_TMU(hw,tmu);
#else
texHW = SST_CHIP(hw,0xE);
#endif
if (which == 0) {
#ifdef GLIDE_POINTCAST_PALETTE
REG_GROUP_BEGIN((0x02UL << tmu), nccTable0, 12, 0x0FFF);
#else
REG_GROUP_BEGIN(0x0EUL, nccTable0, 12, 0x0FFF);
#endif
for (i = 0; i < 12; i++) REG_GROUP_SET(texHW, nccTable0[i], table->packed_data[i]);
REG_GROUP_END();
} else {
#ifdef GLIDE_POINTCAST_PALETTE
REG_GROUP_BEGIN((0x02UL << tmu), nccTable1, 12, 0x0FFF);
#else
REG_GROUP_BEGIN(0x0EUL, nccTable1, 12, 0x0FFF);
#endif
for (i = 0; i < 12; i++) REG_GROUP_SET(texHW, nccTable1[i], table->packed_data[i]);
REG_GROUP_END();
}
gc->tmu_state[tmu].ncc_table[which] = table;
}
GR_END();
#undef FN_NAME
} /* _grTexDownloadNccTableExt */
/*-------------------------------------------------------------------
Function: _grTexDownloadPaletteExt
Date: 6/9
Implementor(s): jdt
Library: Glide
Description:
Private function to download a palette to the specified tmu
Arguments:
tmu - which tmu to download the palette to
pal - the pallete data
start - beginning index to download
end - ending index to download
Return:
none
-------------------------------------------------------------------*/
GR_DDFUNC(_grTexDownloadPaletteExt,
void,
(GrChipID_t tmu, GrTexTable_t type, GuTexPalette *pal, int start, int end))
{
#define FN_NAME "_grTexDownloadPaletteExt"
GR_BEGIN_NOFIFOCHECK(FN_NAME, 89);
GDBG_INFO_MORE(gc->myLevel,"(%d,0x%x, %d,%d)\n",tmu,pal,start,end);
GR_CHECK_F(FN_NAME, pal == NULL, "pal invalid");
GR_CHECK_F(FN_NAME, start < 0, "invalid start index");
GR_CHECK_F(FN_NAME, end > 255, "invalid end index");
/* NOTE:
**
** This code broadcasts the palette because in the future, we will
** only support one global texture palette no matter how many TMUs
** there are. This is fallout from the fact that future hardware
** has a unified memory architecture.
**
** Source licensees (meaning arcade or LBE vendors that) require the
** one palette/tmu mode should define GLIDE_POINTCAST_PALETTE on
** the command line for this file. Understand, however, that this
** will not work on future hardware.
*/
#ifdef GLIDE_POINTCAST_PALETTE
/*
** FURTHER NOTE:
** There is a sublety (nice way of saying BUG) here.
** If TMU0 is specified, then the palette will be broadcast to all
** TMUS. So, if the user downloads TMU1's palette, then TMU0's
** palette, TMU0's palette will be on *both* TMUs. This is a
** pretty strong indicator that no one is using separate palettes
** on different TMUs.
*/
hw = SST_TMU(hw,tmu);
#else
hw = SST_CHIP(hw,0xE);
#endif
_GlideRoot.stats.palDownloads++;
_GlideRoot.stats.palBytes += ((end - start + 1) << 2);
/* We divide the writes into 3 chunks trying to group things into
* complete 8 word grouped packets to fit the nccTable palette
* format: stuff before the 8 word alignment, aligned writes, and
* stuff after the 8 word alignment to the end. The slop regions
* are one packet apiece.
*/
{
#ifdef GLIDE_POINTCAST_PALETTE
const FifoChipField chipId = (FifoChipField)(0x02UL << tmu);
#else
const FifoChipField chipId = (FifoChipField)0x0EUL;
#endif
const int endSlop = (end & ~0x07);
const int startSlop = MIN(((start + 8) & ~0x07) - 1, end);
int i = start;
/* Is the start of the palette range unaligned or is the end of
* the range less than a completely aligned range?
*/
if (type == GR_TEXTABLE_PALETTE) {
if (((start & 0x07) != 0) || (end < ((start + 8) & ~0x07))) {
const FxI32 slopCount = startSlop - start + 1;
GR_ASSERT((slopCount > 0) && (slopCount <= 8));
REG_GROUP_BEGIN(chipId, nccTable0[4 + (start & 0x07)],
slopCount, (0xFF >> (8 - slopCount)));
while(i < start + slopCount) {
REG_GROUP_SET(hw, nccTable0[4 + (i & 0x07)],
(0x80000000 | ((i & 0xFE) << 23) | pal->data[i] & 0xFFFFFF));
i++;
}
REG_GROUP_END();
}
/* Do all of the aligned palette ranges. */
while(i < endSlop) {
const int endIndex = i + 8;
REG_GROUP_BEGIN(chipId, nccTable0[4], 8, 0xFF);
while(i < endIndex) {
REG_GROUP_SET(hw, nccTable0[4 + (i & 0x07)],
(0x80000000 | ((i & 0xFE) << 23) | pal->data[i] & 0xFFFFFF));
i++;
}
REG_GROUP_END();
}
/* Do we have any more slop at the end of the ragne? */
if (i <= end) {
const FxU32 slopCount = end - endSlop + 1;
REG_GROUP_BEGIN(chipId, nccTable0[4],
slopCount, (0xFF >> (8 - slopCount)));
while(i <= end) {
REG_GROUP_SET(hw, nccTable0[4 + (i & 0x07)],
(0x80000000 | ((i & 0xFE) << 23) | pal->data[i] & 0xFFFFFF));
i++;
}
REG_GROUP_END();
}
}
else {
if (((start & 0x07) != 0) || (end < ((start + 8) & ~0x07))) {
const FxI32 slopCount = startSlop - start + 1;
GR_ASSERT((slopCount > 0) && (slopCount <= 8));
REG_GROUP_BEGIN(chipId, nccTable0[4 + (start & 0x07)],
slopCount, (0xFF >> (8 - slopCount)));
while(i < start + slopCount) {
FxU32 p1, p2, p3, p4;
p1 = p2 = pal->data[i];
p1 &= 0xfc000000; p2 &= 0x00fc0000;
p1 >>= 8; p2 >>= 6;
p3 = p4 = pal->data[i];
p3 &= 0x0000fc00; p4 &= 0x000000fc;
p3 >>= 4; p4 >>= 2;
p1 |= p2; p3 |= p4; p1 |= p3;
REG_GROUP_SET(hw, nccTable0[4 + (i & 0x07)],
(0x80000000 | ((i & 0xFE) << 23) | p1));
i++;
}
REG_GROUP_END();
}
/* Do all of the aligned palette ranges. */
while(i < endSlop) {
const int endIndex = i + 8;
REG_GROUP_BEGIN(chipId, nccTable0[4], 8, 0xFF);
while(i < endIndex) {
FxU32 p1, p2, p3, p4;
p1 = p2 = pal->data[i];
p1 &= 0xfc000000; p2 &= 0x00fc0000;
p1 >>= 8; p2 >>= 6;
p3 = p4 = pal->data[i];
p3 &= 0x0000fc00; p4 &= 0x000000fc;
p3 >>= 4; p4 >>= 2;
p1 |= p2; p3 |= p4; p1 |= p3;
REG_GROUP_SET(hw, nccTable0[4 + (i & 0x07)],
(0x80000000 | ((i & 0xFE) << 23) | p1));
i++;
}
REG_GROUP_END();
}
/* Do we have any more slop at the end of the ragne? */
if (i <= end) {
const FxU32 slopCount = end - endSlop + 1;
REG_GROUP_BEGIN(chipId, nccTable0[4],
slopCount, (0xFF >> (8 - slopCount)));
while(i <= end) {
FxU32 p1, p2, p3, p4;
p1 = p2 = pal->data[i];
p1 &= 0xfc000000; p2 &= 0x00fc0000;
p1 >>= 8; p2 >>= 6;
p3 = p4 = pal->data[i];
p3 &= 0x0000fc00; p4 &= 0x000000fc;
p3 >>= 4; p4 >>= 2;
p1 |= p2; p3 |= p4; p1 |= p3;
REG_GROUP_SET(hw, nccTable0[4 + (i & 0x07)],
(0x80000000 | ((i & 0xFE) << 23) | p1));
i++;
}
REG_GROUP_END();
}
}
}
GR_END();
#undef FN_NAME
} /* _grTexDownloadPaletteExt */
/*-------------------------------------------------------------------
Function: grTexDownloadTableExt
Date: 6/3
Implementor(s): jdt, GaryMcT
Library: glide
Description:
download look up table data to a tmu
Arguments:
tmu - which tmu
type - what type of table to download
One of:
GR_TEXTABLE_NCC0
GR_TEXTABLE_NCC1
GR_TEXTABLE_PALETTE
void *data - pointer to table data
Return:
none
-------------------------------------------------------------------*/
GR_ENTRY(grTexDownloadTableExt,
void,
(GrChipID_t tmu, GrTexTable_t type, void *data))
{
GR_BEGIN_NOFIFOCHECK("grTexDownloadTableExt",89);
GDBG_INFO_MORE(gc->myLevel,"(%d,%d,0x%x)\n",tmu,type,data);
GR_CHECK_TMU(FN_NAME,tmu);
GR_CHECK_F(myName, type > GR_TEXTABLE_PALETTE_6666_EXT, "invalid table specified");
GR_CHECK_F(myName, !data, "invalid data pointer");
gc->state.tex_table = type;
if ((type == GR_TEXTABLE_PALETTE) || (type == GR_TEXTABLE_PALETTE_6666_EXT)) /* Need Palette Download Code */
_grTexDownloadPaletteExt(tmu, type, (GuTexPalette *)data, 0, 255);
else { /* Type is an ncc table */
_grTexDownloadNccTableExt(tmu, type, (GuNccTable*)data, 0, 11);
/* _grTexDownloadNccTable(tmu, type, (GuNccTable*)data, 0, 11); */
}
GR_END();
} /* grTexDownloadTableExt */
#undef GLIDE_POINTCAST_PALETTE
/*---------------------------------------------------------------------------
** _grTexDownloadNccTable
**
@@ -440,7 +731,6 @@ GR_DDFUNC(_grTexDownloadPalette,
Return:
none
-------------------------------------------------------------------*/
#if defined(GLIDE3) && defined(GLIDE3_ALPHA)
GR_ENTRY(grTexDownloadTable,
void,
(GrTexTable_t type, void *data))
@@ -460,27 +750,6 @@ GR_ENTRY(grTexDownloadTable,
}
GR_END();
} /* grTexDownloadTable */
#else
GR_ENTRY(grTexDownloadTable,
void,
(GrChipID_t tmu, GrTexTable_t type, void *data))
{
GR_BEGIN_NOFIFOCHECK("grTexDownloadTable",89);
GDBG_INFO_MORE(gc->myLevel,"(%d,%d,0x%x)\n",tmu,type,data);
GR_CHECK_TMU(FN_NAME,tmu);
GR_CHECK_F(FN_NAME, type > GR_TEXTABLE_PALETTE, "invalid table specified");
GR_CHECK_F(FN_NAME, !data, "invalid data pointer");
if ((type == GR_TEXTABLE_PALETTE) && (GR_TEXTABLE_PALETTE_6666)) /* Need Palette Download Code */
_grTexDownloadPalette(tmu, type, (GuTexPalette *)data, 0, 255);
else { /* Type is an ncc table */
_grTexDownloadNccTable(tmu, type, (GuNccTable*)data, 0, 11);
/* _grTexDownloadNccTable(tmu, type, (GuNccTable*)data, 0, 11); */
}
GR_END();
#undef FN_NAME
} /* grTexDownloadTable */
#endif
/*-------------------------------------------------------------------

View File

@@ -19,6 +19,9 @@
;; $Header$
;; $Revision$
;; $Log$
;; Revision 1.1.1.1 1999/12/07 21:42:35 joseph
;; Initial checkin into SourceForge.
;;
;;
;; 1 10/08/98 11:30a Brent
;;
@@ -60,117 +63,127 @@
; B4 Chip field fix.
;;
TITLE xdraw2.asm
OPTION OLDSTRUCTS
%include "xos.inc"
.586P
.MMX
.K3D
extrn _GlideRoot
extrn _FifoMakeRoom
%MACRO GR_FIFO_WRITE 3
mov [%1 + %2], %3
%ENDMACRO ; GR_FIFO_WRITE
EXTRN __GlideRoot : DWORD
EXTRN __FifoMakeRoom : NEAR
%MACRO WRITE_MM1_FIFO_ALIGNED 1
_DATA SEGMENT
One DD 03f800000r
; 3DNow!
%ifdef GL_AMD3D
movq [fifo+%1], mm1 ; store current param | previous param
%endif
%ENDMACRO ; WRITE_MM1_FIFO_ALIGNED
%MACRO WRITE_MM1LOW_FIFO 0
; 3DNow
%ifdef GL_AMD3D
movd [fifo], mm1 ; store current param | previous param
%endif
%ENDMACRO ; WRITE_MM1LOW_FIFO
segment DATA
One DD 1.0
Area DD 0
_DATA ENDS
;;; Definitions of cvg regs and glide root structures.
INCLUDE fxgasm.h
%INCLUDE "fxgasm.h"
;; enables/disables trisProcessed and trisDrawn counters
STATS = 1
%define STATS 1
; Arguments (STKOFF = 16 from 4 pushes)
STKOFF = 16
_va$ = 4 + STKOFF
_vb$ = 8 + STKOFF
_vc$ = 12 + STKOFF
STKOFF equ 16
_va$ equ 4 + STKOFF
_vb$ equ 8 + STKOFF
_vc$ equ 12 + STKOFF
;; coordinate offsets into vertex.
;; NB: These are constants and are not
;; user settable like the rest of the
;; parameter offset. Weird.
X = 0
Y = 4
X equ 0
Y equ 4
CONST SEGMENT
$T2003 DD 046400000r ; 12288
$T2005 DD 03f800000r ; 1
$T2006 DD 043800000r ; 256
CONST ENDS
segment CONST
T2003 DD 12288.0 ; 12288
T2005 DD 1.0 ; 1
T2006 DD 256.0 ; 256
PROC_TYPE MACRO procType:=<Default>
IFDEF GL_AMD3D
EXITM <__trisetup_3DNow_&procType&@12>
ELSE
EXITM <__trisetup_Default_&procType&@12>
ENDIF
ENDM
%MACRO PROC_TYPE 1
%IFDEF GL_AMD3D
proc _trisetup_3DNow_%1, 12
%ELSE
proc _trisetup_Default_%1, 12
%ENDIF
%ENDM
;--------------------------------------------------------------------------
_TEXT SEGMENT PAGE PUBLIC USE32 'CODE'
ASSUME DS: FLAT, SS: FLAT
segment TEXT
ALIGN 32
PUBLIC PROC_TYPE(cull)
PROC_TYPE(cull) PROC NEAR
PROC_TYPE cull
GLIDE_CULLING textequ <1>
GLIDE_PACK_RGB textequ <0>
GLIDE_PACK_ALPHA textequ <0>
GLIDE_GENERIC_SETUP textequ <0>
INCLUDE xdraw2.inc
GLIDE_GENERIC_SETUP textequ <0>
GLIDE_PACK_ALPHA textequ <0>
GLIDE_PACK_RGB textequ <0>
GLIDE_CULLING textequ <0>
%define GLIDE_CULLING 1
%define GLIDE_PACK_RGB 0
%define GLIDE_PACK_ALPHA 0
%define GLIDE_GENERIC_SETUP 0
%INCLUDE "xdraw2.inc"
%undef GLIDE_GENERIC_SETUP
%undef GLIDE_PACK_ALPHA
%undef GLIDE_PACK_RGB
%undef GLIDE_CULLING
PROC_TYPE(cull) ENDP
endp
ALIGN 32
PUBLIC PROC_TYPE()
PROC_TYPE() PROC NEAR
PROC_TYPE Default
GLIDE_CULLING textequ <0>
GLIDE_PACK_RGB textequ <0>
GLIDE_PACK_ALPHA textequ <0>
GLIDE_GENERIC_SETUP textequ <0>
INCLUDE xdraw2.inc
GLIDE_GENERIC_SETUP textequ <0>
GLIDE_PACK_ALPHA textequ <0>
GLIDE_PACK_RGB textequ <0>
GLIDE_CULLING textequ <0>
%define GLIDE_CULLING 0
%define GLIDE_PACK_RGB 0
%define GLIDE_PACK_ALPHA 0
%define GLIDE_GENERIC_SETUP 0
%INCLUDE "xdraw2.inc"
%undef GLIDE_GENERIC_SETUP
%undef GLIDE_PACK_ALPHA
%undef GLIDE_PACK_RGB
%undef GLIDE_CULLING
PROC_TYPE() ENDP
endp
IFNDEF GL_AMD3D
%IFNDEF GL_AMD3D
ALIGN 32
PUBLIC __trisetup_clip_coor_thunk@12
__trisetup_clip_coor_thunk@12 PROC NEAR
proc _trisetup_clip_coor_thunk, 12
gc TEXTEQU <eax> ; Current graphics context
procPtr TEXTEQU <ebx>
vPtr TEXTEQU <ecx>
%define gc eax ; Current graphics context
%define procPtr edx
%define vPtr ecx
;; Call through to the gc->curArchProcs.drawTrianglesProc w/o
;; adding extra stuff to the stack. I wish we could actually
;; do a direct return here w/o too much work.
lea vPtr, [esp + _va$ - STKOFF] ; Get vertex pointer address
mov gc, [__GlideRoot + curGC]; GR_DCL_GC
mov gc, [_GlideRoot + curGC]; GR_DCL_GC
;; If debugging make sure that we're in clip coordinates
IFDEF GLIDE_DEBUG
mov ebx, [gc + CoordinateSpace]
test ebx, 1
%IFDEF GLIDE_DEBUG
test dword [gc + CoordinateSpace], 1
jnz __clipSpace
xor eax, eax
mov [eax], eax
__clipSpace:
ENDIF ; GLIDE_DEBUG
%ENDIF ; GLIDE_DEBUG
mov procPtr, [gc + drawTrianglesProc]; Prefetch drawTriangles proc addr
push vPtr ; vertex array address
@@ -180,10 +193,7 @@ ENDIF ; GLIDE_DEBUG
call procPtr ; (*gc->curArchProcs.drawTrianglesProc)(grDrawVertexArray, 3, vPtr)
ret 12 ; pop 3 dwords (vertex addrs) and return
__trisetup_clip_coor_thunk@12 ENDP
ret ; pop 3 dwords (vertex addrs) and return
endp
ENDIF ; !GL_AMD3D
_TEXT ENDS
END
%ENDIF ; !GL_AMD3D

View File

@@ -20,6 +20,9 @@
;; $Header$
;; $Revision$
;; $Log$
;; Revision 1.1.1.1 1999/12/07 21:42:35 joseph
;; Initial checkin into SourceForge.
;;
;
; 2 10/30/97 6:53p Peter
; first real cut at tri asm
@@ -38,42 +41,26 @@
;; AMD3D version
;;--------------------------------------------------------------------------
ifdef GL_AMD3D
%ifdef GL_AMD3D
TITLE xdraw2.inc
GR_FIFO_WRITE MACRO __addr, __offset, __data
mov [__addr + __offset], __data
ENDM ; GR_FIFO_WRITE
WRITE_MM1_FIFO_ALIGNED MACRO __offset
movq [fifo+__offset], mm1 ; store current param | previous param
ENDM ; WRITE_MM1_FIFO_ALIGNED
WRITE_MM1LOW_FIFO MACRO
movd [fifo], mm1 ; store current param | previous param
ENDM ; WRITE_MM1LOW_FIFO
gc TEXTEQU <edi> ; points to graphics context
fifo TEXTEQU <ebp> ; points to fifo entries
tempVal TEXTEQU <esi>
%define gc edi ; points to graphics context
%define fifo ebp ; points to fifo entries
%define tempVal esi
;; Prologue stuff
push edi ; save caller's register variable
mov gc,[__GlideRoot+curGC]; GR_DCL_GC
mov gc,[_GlideRoot+curGC]; GR_DCL_GC
push ebp ; save frame pointer
push ebx ; save caller's register variable
IF GLIDE_CULLING
fa TEXTEQU <eax> ; vtx a from caller
fb TEXTEQU <ebx> ; vtx b from caller
fc TEXTEQU <ecx> ; vtx c from caller
%IF GLIDE_CULLING
%define fa eax ; vtx a from caller
%define fb ebx ; vtx b from caller
%define fc ecx ; vtx c from caller
cull TEXTEQU <edx> ; cull mode
intArea TEXTEQU <ecx> ; area temp storage
%define cull edx ; cull mode
%define intArea ecx ; area temp storage
mov fb, [esp + _vb$ - 4] ; get base address of vertex B
push esi ; save caller's register variable
@@ -84,10 +71,10 @@ intArea TEXTEQU <ecx> ; area temp storage
femms ; will use AMD3D, clear FPU/MMX registers
cmp cull, 0 ; culling enabled ?
mov tempVal, [__GlideRoot + curTriSize]
mov tempVal, [_GlideRoot + curTriSize]
;; Cull Check
jz nocull ; nope, no culling
jz .nocull ; nope, no culling
mov fa, [esp + _va$] ; get base address of vertex A
movq mm2, [fc + X] ; yc | xc
@@ -126,20 +113,20 @@ intArea TEXTEQU <ecx> ; area temp storage
jge __cullFail ; triangle facing away from viewer, culled
cmp ebx, tempVal ; fifo space required >= space available ?
jge __triBegin ; yup, push out triangle data to Voodoo
jge .__triBegin ; yup, push out triangle data to Voodoo
push @Line ; line number inside this function
push __LINE__ ; line number inside this function
push 0h ; pointer to function name = NULL
push tempVal ; fifo space required
call __FifoMakeRoom ; note: updates fifoPtr
call _FifoMakeRoom ; note: updates fifoPtr
add esp, 12 ; remove 3 DWORD arguments from stack
jmp __triBegin ; merge back with short path
jmp .__triBegin ; merge back with short path
;; culling disabled
nocull:
.nocull:
;; Check to make sure that we have enough room for
;; the complete triangle packet.
@@ -147,48 +134,48 @@ nocull:
mov ebx, [gc + fifoRoom] ; fifo space available
cmp ebx, tempVal ; fifo spce available >= space needed ?
jge __triBegin ; yup, ready to draw triangle
jge .__triBegin ; yup, ready to draw triangle
push @Line ; line number inside this function
push __LINE__ ; line number inside this function
push 0h ; pointer to function name = NULL
push tempVal ; fifo space needed
call __FifoMakeRoom ; note: updates fifoPtr
call _FifoMakeRoom ; note: updates fifoPtr
add esp, 12 ; remove 3 DWORD arguments from stack
nop ; filler
ELSE ; !GLIDE_CULLING
%ELSE ; !GLIDE_CULLING
lea eax, [esp+ _va$] ; pointer to vertex pointers
push esi ; save caller's register variable
mov tempVal, [__GlideRoot + curTriSize] ; data for whole triangle in bytes
mov tempVal, [_GlideRoot + curTriSize] ; data for whole triangle in bytes
mov ebx, [gc + fifoRoom] ; fifo space available
add tempVal, 4 ; fifo space needed (include 4-byte header)
femms ; will use AMD3D, clear FPU/MMX registers
cmp ebx, tempVal ; fifo spce available >= space needed ?
jge __triBegin ; yup, ready to draw triangle
jge .__triBegin ; yup, ready to draw triangle
push @Line ; line number inside this function
push __LINE__ ; line number inside this function
push 0h ; pointer to function name = NULL
push tempVal ; fifo space needed
call __FifoMakeRoom ; note: updates fifoPtr
call _FifoMakeRoom ; note: updates fifoPtr
add esp, 12 ; remove 3 DWORD arguments from stack
nop ; filler
ENDIF ; GLIDE_CULLING
%ENDIF ; GLIDE_CULLING
dlp TEXTEQU <ebx> ; points to dataList structure
dlpstrt TEXTEQU <ecx> ; points to begin of dataList structure
vertex TEXTEQU <edx> ; the current vertex
packCol TEXTEQU <esi>
%define dlp ebx ; points to dataList structure
%define dlpstrt ecx ; points to begin of dataList structure
%define vertex edx ; the current vertex
%define packCol esi
__triBegin:
.__triBegin:
mov eax, [gc+triPacketHdr]; Packet 3 header
lea dlp,[gc + tsuDataList]; Reset the dataList
@@ -198,7 +185,7 @@ __triBegin:
mov dlpstrt, dlp ; save pointer to start of dataList
test fifo, 4 ; is fifo pointer qword aligned ?
jz __fifo_aligned ; yes, it is qword aligned
jz .__fifo_aligned ; yes, it is qword aligned
movq mm1, [vertex+X] ; y | x
GR_FIFO_WRITE fifo, 0, eax ; write header to fifo; now qword aligned
@@ -207,8 +194,8 @@ __triBegin:
WRITE_MM1_FIFO_ALIGNED -8 ; PCI write y | x
nop ; filler
IF GLIDE_PACK_RGB
IF GLIDE_PACK_ALPHA
%IF GLIDE_PACK_RGB
%IF GLIDE_PACK_ALPHA
;; assumes color and alpha values < 256.0
movq mm1, [vertex+r] ; g | r
movd mm2, [vertex+b] ; 0 | b
@@ -227,7 +214,7 @@ IF GLIDE_PACK_ALPHA
por mm1, mm2 ; 00000000 | 00rrggbb
por mm1, mm3 ; 00000000 | aarrggbb
ELSE ; !GLIDE_PACK_ALPHA
%ELSE ; !GLIDE_PACK_ALPHA
;; assumes color values < 256.0
movq mm1, [vertex+r] ; g | r
@@ -241,7 +228,7 @@ ELSE ; !GLIDE_PACK_ALPHA
psrlq mm1, 24 ; 00000000 | 0000gg00
por mm1, mm2 ; 00000000 | 00rrggbb
ENDIF ; !GLIDE_PACK_ALPHA
%ENDIF ; !GLIDE_PACK_ALPHA
;; here: one DWORD in "write buffer", RGB(A)
@@ -273,7 +260,7 @@ __paramLoop1a:
nop ; filler
jmp __paramLoopDoneWBone1 ; merge back into common stream
ELSE ; ! GLIDE_PACK_RGB
%ELSE ; ! GLIDE_PACK_RGB
;; here: "write buffer" empty
@@ -281,14 +268,14 @@ ELSE ; ! GLIDE_PACK_RGB
test eax, eax ; at end of list ?
lea dlp, [dlp+4] ; dlp++
jz __paramLoopDoneWBzero1; yes, "write buffer" empty
jz .__paramLoopDoneWBzero1; yes, "write buffer" empty
__paramLoop1a:
.__paramLoop1a:
movd mm1, [eax+vertex] ; get next parameter
mov eax, [dlp] ; offset = *(dlp + 1)
test eax, eax ; at end of offset list (offset == 0) ?
jz __paramLoopDoneWBone1 ; exit, write buffer contains one DWORD
jz .__paramLoopDoneWBone1 ; exit, write buffer contains one DWORD
movd mm2, [eax+vertex] ; get next parameter
add dlp, 8 ; dlp += 2
@@ -300,14 +287,14 @@ __paramLoop1a:
test eax, eax ; at end of offset list (offset == 0) ?
WRITE_MM1_FIFO_ALIGNED -8 ; PCI write current param | previous param
jnz __paramLoop1a ; nope, copy next parameter
jnz .__paramLoop1a ; nope, copy next parameter
nop ; filler
jmp __paramLoopDoneWBzero1; write buffer empty
jmp .__paramLoopDoneWBzero1; write buffer empty
ENDIF ; GLIDE_PACK_RGB
%ENDIF ; GLIDE_PACK_RGB
__fifo_aligned:
.__fifo_aligned:
movd mm2, [vertex+X] ; y | x of vertex A
add fifo, 8 ; fifoPtr += 2*sizeof(FxU32)
@@ -317,8 +304,8 @@ __fifo_aligned:
WRITE_MM1_FIFO_ALIGNED -8 ; PCI write x | header
movd mm1, [vertex+Y] ; 0 | y of vertex A
IF GLIDE_PACK_RGB
IF GLIDE_PACK_ALPHA
%IF GLIDE_PACK_RGB
%IF GLIDE_PACK_ALPHA
;; assumes color and alpha values < 256.0
movq mm4, [vertex+r] ; g | r
movd mm2, [vertex+b] ; 0 | b
@@ -337,7 +324,7 @@ IF GLIDE_PACK_ALPHA
por mm4, mm2 ; 00000000 | 00rrggbb
por mm4, mm3 ; 00000000 | aarrggbb
ELSE ; !GLIDE_PACK_ALPHA
%ELSE ; !GLIDE_PACK_ALPHA
;; assumes color values < 256.0
movq mm4, [vertex+r] ; g | r
@@ -351,7 +338,7 @@ ELSE ; !GLIDE_PACK_ALPHA
psrlq mm4, 24 ; 00000000 | 0000gg00
por mm4, mm2 ; 00000000 | 00rrggbb
ENDIF ; !GLIDE_PACK_ALPHA
%ENDIF ; !GLIDE_PACK_ALPHA
punpckldq mm1, mm4 ; RGB(A) | y
mov eax, [dlp] ; get first offset from the data list
@@ -387,14 +374,14 @@ __paramLoop1b:
nop ; filler
jmp __paramLoopDoneWBzero1; write buffer empty
ELSE ; !GLIDE_PACK_RGB
%ELSE ; !GLIDE_PACK_RGB
mov eax, [dlp] ; get first offset from the data list
add dlp, 4 ; dlp++
cmp eax, 0 ; end of list ?
jz __paramLoopDoneWBone1 ; yes, "write buffer" has y data
jz .__paramLoopDoneWBone1 ; yes, "write buffer" has y data
__paramLoop1b:
.__paramLoop1b:
movd mm2, [eax+vertex] ; get next parameter
add fifo, 8 ; fifoPtr += 2*sizeof(FxU32)
@@ -405,16 +392,16 @@ __paramLoop1b:
test eax, eax ; at end of offset list (offset == 0) ?
WRITE_MM1_FIFO_ALIGNED -8 ; PCI write current param | previous param
jz __paramLoopDoneWBzero1; exit, "write buffer" empty
jz .__paramLoopDoneWBzero1; exit, "write buffer" empty
movd mm1, [eax+vertex] ; get next parameter
mov eax, [dlp-4] ; offset = *(dlp + 1)
test eax, eax ; at end of offset list (offset == 0) ?
jnz __paramLoop1b ; nope, copy next parameter
ENDIF
jnz .__paramLoop1b ; nope, copy next parameter
%ENDIF
__paramLoopDoneWBone1:
.__paramLoopDoneWBone1:
;; here: "write buffer" has one DWORD left over from vertex A
@@ -430,8 +417,8 @@ __paramLoopDoneWBone1:
movd mm1, [vertex+Y] ; 0 | y of vertex B
nop ; filler
IF GLIDE_PACK_RGB
IF GLIDE_PACK_ALPHA
%IF GLIDE_PACK_RGB
%IF GLIDE_PACK_ALPHA
;; assumes color and alpha values < 256.0
movq mm4, [vertex+r] ; g | r
movd mm2, [vertex+b] ; 0 | b
@@ -450,7 +437,7 @@ IF GLIDE_PACK_ALPHA
por mm4, mm2 ; 00000000 | 00rrggbb
por mm4, mm3 ; 00000000 | aarrggbb
ELSE ; !GLIDE_PACK_ALPHA
%ELSE ; !GLIDE_PACK_ALPHA
;; assumes color values < 256.0
movq mm4, [vertex+r] ; g | r
@@ -464,7 +451,7 @@ ELSE ; !GLIDE_PACK_ALPHA
add dlp, 4 ; next data list entry
por mm4, mm2 ; 00000000 | 00rrggbb
ENDIF ; !GLIDE_PACK_ALPHA
%ENDIF ; !GLIDE_PACK_ALPHA
punpckldq mm1, mm4 ; RGB(A) | y
mov eax, [dlp] ; get first offset from the data list
@@ -500,14 +487,14 @@ __paramLoop2b:
nop ; filler
jmp __paramLoopDoneWBzero2; write buffer empty
ELSE ; !GLIDE_PACK_RGB
%ELSE ; !GLIDE_PACK_RGB
mov eax, [dlp] ; get first offset from the data list
add dlp, 4 ; dlp++
test eax, eax ; end of list ?
jz __paramLoopDoneWBone2 ; yes, "write buffer" has y data
jz .__paramLoopDoneWBone2 ; yes, "write buffer" has y data
__paramLoop2b:
.__paramLoop2b:
movd mm2, [eax+vertex] ; get next parameter
add fifo, 8 ; fifoPtr += 2*sizeof(FxU32)
@@ -518,20 +505,20 @@ __paramLoop2b:
test eax, eax ; at end of offset list (offset == 0) ?
WRITE_MM1_FIFO_ALIGNED -8 ; PCI write current param | previous param
jz __paramLoopDoneWBzero2; exit, "write buffer" empty
jz .__paramLoopDoneWBzero2; exit, "write buffer" empty
movd mm1, [eax+vertex] ; get next parameter
mov eax, [dlp-4] ; offset = *(dlp + 1)
test eax, eax ; at end of offset list (offset == 0) ?
jnz __paramLoop2b ; nope, copy next parameter
jnz .__paramLoop2b ; nope, copy next parameter
nop ; filler
jmp __paramLoopDoneWBone2 ; write buffer contains one DWORD
ENDIF
jmp .__paramLoopDoneWBone2 ; write buffer contains one DWORD
%ENDIF
__paramLoopDoneWBzero1:
.__paramLoopDoneWBzero1:
mov vertex, [esp + _vb$] ; Current vertex = B
mov dlp, dlpstrt ; Reset the dataList
@@ -542,8 +529,8 @@ __paramLoopDoneWBzero1:
WRITE_MM1_FIFO_ALIGNED -8 ; PCI write y | x of vertex B
nop ; filler
IF GLIDE_PACK_RGB
IF GLIDE_PACK_ALPHA
%IF GLIDE_PACK_RGB
%IF GLIDE_PACK_ALPHA
;; assumes color and alpha values < 256.0
movq mm1, [vertex+r] ; g | r
movd mm2, [vertex+b] ; 0 | b
@@ -562,7 +549,7 @@ IF GLIDE_PACK_ALPHA
por mm1, mm3 ; 00000000 | aarrggbb
add dlp, 8 ; skip data list entry "a"
ELSE ; !GLIDE_PACK_ALPHA
%ELSE ; !GLIDE_PACK_ALPHA
;; assumes color values < 256.0
movq mm1, [vertex+r] ; g | r
@@ -576,11 +563,11 @@ ELSE ; !GLIDE_PACK_ALPHA
por mm1, mm2 ; 00000000 | 00rrggbb
add dlp, 4 ; next data list entry
ENDIF ; !GLIDE_PACK_ALPHA
%ENDIF ; !GLIDE_PACK_ALPHA
;; here: one DWORD in "write buffer", RGB(A)
mov eax, DWORD PTR [dlp] ; get first offset from the data list
mov eax, dword [dlp] ; get first offset from the data list
add dlp, 4 ; dlp++
test eax, eax ; end of list ?
@@ -608,7 +595,7 @@ __paramLoop2a:
nop ; filler
jmp __paramLoopDoneWBone2 ; merge back into common stream
ELSE ; ! GLIDE_PACK_RGB
%ELSE ; ! GLIDE_PACK_RGB
;; here: "write buffer" empty
@@ -616,14 +603,14 @@ ELSE ; ! GLIDE_PACK_RGB
add dlp, 4 ; dlp++
cmp eax, 0 ; at end of list ?
jz __paramLoopDoneWBzero2; yes, "write buffer" empty
jz .__paramLoopDoneWBzero2; yes, "write buffer" empty
__paramLoop2a:
.__paramLoop2a:
movd mm1, [eax+vertex] ; get next parameter
mov eax, [dlp] ; offset = *(dlp + 1)
test eax, eax ; at end of offset list (offset == 0) ?
jz __paramLoopDoneWBone2 ; exit, write buffer contains one DWORD
jz .__paramLoopDoneWBone2 ; exit, write buffer contains one DWORD
movd mm2, [eax+vertex] ; get next parameter
add dlp, 8 ; dlp += 2
@@ -635,12 +622,12 @@ __paramLoop2a:
test eax, eax ; at end of offset list (offset == 0) ?
WRITE_MM1_FIFO_ALIGNED -8 ; PCI write current param | previous param
jnz __paramLoop2a ; nope, copy next parameter
jnz .__paramLoop2a ; nope, copy next parameter
ENDIF ; GLIDE_PACK_RGB
%ENDIF ; GLIDE_PACK_RGB
__paramLoopDoneWBzero2:
.__paramLoopDoneWBzero2:
mov vertex, [esp + _vc$] ; Current vertex = C
mov dlp, dlpstrt ; Reset the dataList
@@ -652,8 +639,8 @@ __paramLoopDoneWBzero2:
nop ; filler
IF GLIDE_PACK_RGB
IF GLIDE_PACK_ALPHA
%IF GLIDE_PACK_RGB
%IF GLIDE_PACK_ALPHA
;; assumes color and alpha values < 256.0
movq mm1, [vertex+r] ; g | r
movd mm2, [vertex+b] ; 0 | b
@@ -672,7 +659,7 @@ IF GLIDE_PACK_ALPHA
por mm1, mm3 ; 00000000 | aarrggbb
add dlp, 8 ; skip data list entry "a"
ELSE ; !GLIDE_PACK_ALPHA
%ELSE ; !GLIDE_PACK_ALPHA
;; assumes color values < 256.0
movq mm1, [vertex+r] ; g | r
@@ -686,7 +673,7 @@ ELSE ; !GLIDE_PACK_ALPHA
por mm1, mm2 ; 00000000 | 00rrggbb
add dlp, 4 ; next data list entry
ENDIF ; !GLIDE_PACK_ALPHA
%ENDIF ; !GLIDE_PACK_ALPHA
;; here: one DWORD in "write buffer", RGB(A)
@@ -718,7 +705,7 @@ __paramLoop3a:
nop ; filler
jmp __paramLoopDoneWBone3 ; merge back into common stream
ELSE ; ! GLIDE_PACK_RGB
%ELSE ; ! GLIDE_PACK_RGB
;; here: "write buffer" empty
@@ -726,14 +713,14 @@ ELSE ; ! GLIDE_PACK_RGB
add dlp, 4 ; dlp++
test eax, eax ; at end of list ?
jz __paramLoopDoneWBzero3; yes, "write buffer" empty
jz .__paramLoopDoneWBzero3; yes, "write buffer" empty
__paramLoop3a:
.__paramLoop3a:
movd mm1, [eax+vertex] ; get next parameter
mov eax, [dlp] ; offset = *(dlp + 1)
test eax, eax ; at end of offset list (offset == 0) ?
jz __paramLoopDoneWBone3 ; exit, write buffer contains one DWORD
jz .__paramLoopDoneWBone3 ; exit, write buffer contains one DWORD
movd mm2, [eax+vertex] ; get next parameter
add dlp, 8 ; dlp += 2
@@ -745,15 +732,15 @@ __paramLoop3a:
test eax, eax ; at end of offset list (offset == 0) ?
WRITE_MM1_FIFO_ALIGNED -8 ; PCI write current param | previous param
jnz __paramLoop3a ; nope, copy next parameter
jnz .__paramLoop3a ; nope, copy next parameter
mov esp, esp ; filler
jmp __paramLoopDoneWBzero3; write buffer empty
jmp .__paramLoopDoneWBzero3; write buffer empty
ENDIF ; GLIDE_PACK_RGB
%ENDIF ; GLIDE_PACK_RGB
__paramLoopDoneWBone2:
.__paramLoopDoneWBone2:
;; here: "write buffer" has one DWORD left over from vertex B
@@ -769,8 +756,8 @@ __paramLoopDoneWBone2:
movd mm1, [vertex+Y] ; 0 | y of vertex C
mov esp, esp ; filler
IF GLIDE_PACK_RGB
IF GLIDE_PACK_ALPHA
%IF GLIDE_PACK_RGB
%IF GLIDE_PACK_ALPHA
;; assumes color and alpha values < 256.0
movq mm4, [vertex+r] ; g | r
movd mm2, [vertex+b] ; 0 | b
@@ -789,7 +776,7 @@ IF GLIDE_PACK_ALPHA
por mm4, mm2 ; 00000000 | 00rrggbb
por mm4, mm3 ; 00000000 | aarrggbb
ELSE ; !GLIDE_PACK_ALPHA
%ELSE ; !GLIDE_PACK_ALPHA
;; assumes color values < 256.0
movq mm4, [vertex+r] ; g | r
@@ -803,7 +790,7 @@ ELSE ; !GLIDE_PACK_ALPHA
add dlp, 4 ; next data list entry
por mm4, mm2 ; 00000000 | 00rrggbb
ENDIF ; !GLIDE_PACK_ALPHA
%ENDIF ; !GLIDE_PACK_ALPHA
punpckldq mm1, mm4 ; RGB(A) | y
add fifo, 8 ; fifoPtr += 2*sizeof(FxU32)
@@ -839,15 +826,15 @@ __paramLoop3b:
nop ; filler
jmp __paramLoopDoneWBzero3; write buffer empty
ELSE ; !GLIDE_PACK_RGB
%ELSE ; !GLIDE_PACK_RGB
mov eax, [dlp] ; get first offset from the data list
add dlp, 4 ; dlp++
test eax, eax ; end of list ?
jz __paramLoopDoneWBone3 ; yes, "write buffer" has y data
jz .__paramLoopDoneWBone3 ; yes, "write buffer" has y data
__paramLoop3b:
.__paramLoop3b:
movd mm2, [eax+vertex] ; get next parameter
add fifo, 8 ; fifoPtr += 2*sizeof(FxU32)
@@ -858,27 +845,27 @@ __paramLoop3b:
cmp eax, 0 ; at end of offset list (offset == 0) ?
WRITE_MM1_FIFO_ALIGNED -8 ; PCI write current param | previous param
jz __paramLoopDoneWBzero3; exit, "write buffer" empty
jz .__paramLoopDoneWBzero3; exit, "write buffer" empty
movd mm1, [eax+vertex] ; get next parameter
mov eax, [dlp-4] ; offset = *(dlp + 1)
test eax, eax ; at end of offset list (offset == 0) ?
jnz __paramLoop3b ; nope, copy next parameter
ENDIF
jnz .__paramLoop3b ; nope, copy next parameter
%ENDIF
__paramLoopDoneWBone3:
.__paramLoopDoneWBone3:
; "write buffer" contains one DWORD that needs to be flushed
WRITE_MM1LOW_FIFO ;
add fifo, 4 ;
__paramLoopDoneWBzero3:
.__paramLoopDoneWBzero3:
;; Update gc->fifoPtr and gc->fifoRoom
mov ecx, [__GlideRoot + trisDrawn] ; _GlideRoot.stats.trisDrawn
mov ecx, [_GlideRoot + trisDrawn] ; _GlideRoot.stats.trisDrawn
mov eax, fifo ; new fifo pointer
mov ebx, [gc + fifoPtr] ; old fifo pointer
@@ -887,10 +874,10 @@ __paramLoopDoneWBzero3:
mov edx, [gc + fifoRoom] ; old fifo space available
inc ecx ; _GlideRoot.stats.trisDrawn++
mov ebp, [__GlideRoot + trisProcessed]; _GlideRoot.stats.trisProcessed
mov ebp, [_GlideRoot + trisProcessed]; _GlideRoot.stats.trisProcessed
sub eax, ebx ; new fifo ptr - old fifo ptr = additional fifo space used
mov [__GlideRoot + trisDrawn], ecx ;
mov [_GlideRoot + trisDrawn], ecx ;
sub edx, eax ; new fifo space available
mov eax, 1h ; return value = triangle drawn
@@ -901,7 +888,7 @@ __paramLoopDoneWBzero3:
inc ebp ; _GlideRoot.stats.trisProcessed++
pop esi ; restore caller's register variable
mov [__GlideRoot + trisProcessed], ebp ;
mov [_GlideRoot + trisProcessed], ebp ;
pop ebx ; restore caller's register variable
pop ebp ; restore frame pointer
@@ -909,12 +896,12 @@ __paramLoopDoneWBzero3:
femms ; no more AMD3D code, clear FPU/MMX regs
ret 12 ; return to caller
ret ; return to caller
IF GLIDE_CULLING
%IF GLIDE_CULLING
__cullFail:
mov ebp, [__GlideRoot + trisProcessed]; triangles processed so far
mov ebp, [_GlideRoot + trisProcessed]; triangles processed so far
xor eax, eax ; return value = triangle not drawn
femms ; no more AMD3D code, clear FPU/MMX regs
@@ -923,14 +910,14 @@ __cullFail:
inc ebp ; _GlideRoot.stats.trisProcessed++;
pop esi
mov [__GlideRoot + trisProcessed], ebp
mov [_GlideRoot + trisProcessed], ebp
pop ebx
pop ebp ; restore frame pointer
pop edi
ret 12
ENDIF ; GLIDE_CULLING
ret
%ENDIF ; GLIDE_CULLING
;---------------------------------------------------------------------------
;
@@ -938,26 +925,24 @@ ENDIF ; GLIDE_CULLING
;
;---------------------------------------------------------------------------
endif ; !defined GL_AMD3D
%endif ; !defined GL_AMD3D
;;--------------------------------------------------------------------------
;; start original code
;;--------------------------------------------------------------------------
ifndef GL_AMD3D
%ifndef GL_AMD3D
TITLE xdraw2.inc
; Ugly, but seems to workaround the problem with locally defined
; data segment globals not getting relocated properly when using
; djgpp.
zArea TEXTEQU <One + 04h>
gc TEXTEQU <esi> ; points to graphics context
%define zArea One + 04h
%define gc esi ; points to graphics context
;; Prologue stuff
push esi
mov gc, [__GlideRoot + curGC] ;; GR_DCL_GC
mov gc, [_GlideRoot + curGC] ;; GR_DCL_GC
push edi
push ebx
@@ -966,18 +951,18 @@ gc TEXTEQU <esi> ; points to graphics context
nop
align 4
IF GLIDE_CULLING
fa TEXTEQU <eax> ; vtx a from caller
fb TEXTEQU <ebx> ; vtx b from caller
fc TEXTEQU <ecx> ; vtx c from caller
%IF GLIDE_CULLING
%define fa eax ; vtx a from caller
%define fb ebx ; vtx b from caller
%define fc ecx ; vtx c from caller
cull TEXTEQU <edx>
intArea TEXTEQU <ebp> ; temp Y storage
%define cull edx
%define intArea ebp ; temp Y storage
; some useful floating load and store macros <ala gmt>
flds TEXTEQU <fld DWORD PTR>
fsubs TEXTEQU <fsub DWORD PTR>
fmuls TEXTEQU <fmul DWORD PTR>
%define flds fld DWORD
%define fsubs fsub DWORD
%define fmuls fmul DWORD
;; Pre-load the current culling mode before all of the
;; floating point area stuff.
@@ -988,11 +973,11 @@ fmuls TEXTEQU <fmul DWORD PTR>
mov fc, [esp + _vc$]
test cull, cull
jz nocull
jz .nocull
shl cull, 31 ; culltest << 31
Area_Computation:
;Area_Computation:
; 47-3
; jmp ret_pop0f
flds [fa + X] ; xa
@@ -1003,70 +988,66 @@ Area_Computation:
fsubs [fc + Y] ; | | dyBC
flds [fa + Y] ; | | | ya
fsubs [fb + Y] ; | | | dyAB
fld st(3) ; | | | | dxAB
fmul st, st(2) ; | | | | t0 t0=dxAB*dyBC
fld st(3) ; | | | | | dxBC
fmul st, st(2) ; | | | | | t1 t1=dxBC*dyAB
fsubp st(1),st ; | | | | area
fst zArea ; | | | | area
fld st3 ; | | | | dxAB
fmul st0, st2 ; | | | | t0 t0=dxAB*dyBC
fld st3 ; | | | | | dxBC
fmul st0, st2 ; | | | | | t1 t1=dxBC*dyAB
fsubp st1,st0 ; | | | | area
fst dword [zArea] ; | | | | area
;; Pop temp things from the sw culling off the fp stack
fstp st(0) ; 4
fstp st(0) ; 3
fstp st(0) ; 2
fstp st(0) ; 1
fstp st(0) ; 0
fstp st0 ; 4
fstp st0 ; 3
fstp st0 ; 2
fstp st0 ; 1
fstp st0 ; 0
mov intArea, zArea ; j = *(long *)&area
mov intArea, [zArea] ; j = *(long *)&area
xor eax, eax ; Clear the return value (0 == culled)
; Zero Area Triangle Check
and intArea, 7fffffffh ; if ((j & 0x7FFFFFFF) == 0)
jz __triDone
jz .__triDone
;; Triangle area check vs culling mode
mov intArea, zArea ; reload area just in case we're culling
mov intArea, [zArea] ; reload area just in case we're culling
xor intArea, cull ; if (j ^ (culltest << 31))
jge __triDone
nocull:
ENDIF ; GLIDE_CULLING
jge .__triDone
.nocull:
%ENDIF ; GLIDE_CULLING
align 4
;; Check to make sure that we have enough room for
;; the complete triangle packet.
mov eax, [__GlideRoot + curTriSize]
mov eax, [_GlideRoot + curTriSize]
mov ebx, [gc + fifoRoom]
add eax, 4
cmp ebx, eax
jge __triBegin
jge .__triBegin
push @Line
push __LINE__
push 0h
push eax
call __FifoMakeRoom
call _FifoMakeRoom
add esp, 12
;; Send triangle parameters
dlp TEXTEQU <ebx> ; points to dataList structure
fifo TEXTEQU <ebp> ; points to next entry in fifo
vertex TEXTEQU <edx> ; the current vertex
vOffset TEXTEQU <ecx> ; Current vertex offset
%define dlp ebx ; points to dataList structure
%define fifo ebp ; points to next entry in fifo
%define vertex edx ; the current vertex
%define vOffset ecx ; Current vertex offset
packCol TEXTEQU <edi>
tempVal TEXTEQU <edi>
GR_FIFO_WRITE MACRO __addr, __offset, __data
mov [__addr + __offset], __data
ENDM ; GR_FIFO_WRITE
%define packCol edi
%define tempVal edi
align 4
__triBegin:
.__triBegin:
mov fifo, [gc + fifoPtr] ; Fetch Fifo Ptr
mov vOffset, 4 ; Starting vertex
@@ -1077,44 +1058,44 @@ __triBegin:
add fifo, 4 ; Advance fifo for hdr & x/y coordinate
align 4
__vertexStart:
.__vertexStart:
mov vertex, [esp + STKOFF + vOffset] ; Current vertex
add fifo, 8
nop ; Avoid p5 agi w/ load of vertex ptr
nop
mov eax, DWORD PTR [vertex] ; X
mov eax, dword [vertex] ; X
lea dlp, [gc + tsuDataList] ; Reset the dataList
GR_FIFO_WRITE fifo, -8, eax ; PCI write X
mov eax, DWORD PTR [vertex + 4] ; Y
mov eax, dword [vertex + 4] ; Y
xor packCol, packCol ; Clear packed color
GR_FIFO_WRITE fifo, -4, eax ; PCI write Y
IF GLIDE_PACK_RGB
fld DWORD PTR [vertex + b] ; B
fadd DWORD PTR __GlideRoot + fBiasLo ; BC GC
%IF GLIDE_PACK_RGB
fld dword [vertex + b] ; B
fadd dword [_GlideRoot + fBiasLo] ; BC GC
fld DWORD PTR [vertex + g] ; G B
fadd DWORD PTR __GlideRoot + fBiasHi ; GC B
fld dword [vertex + g] ; G B
fadd dword [_GlideRoot + fBiasHi] ; GC B
fld DWORD PTR [vertex + r] ; R GC BC
fadd DWORD PTR __GlideRoot + fBiasHi ; RC GC BC
fld dword [vertex + r] ; R GC BC
fadd dword [_GlideRoot + fBiasHi] ; RC GC BC
fxch st(2) ; BC GC RC
fstp DWORD PTR bias0 ; GC RC
fxch st2 ; BC GC RC
fstp dword [bias0] ; GC RC
fstp DWORD PTR bias1 ; RC
mov packCol, DWORD PTR bias0 ; B + bias
fstp dword [bias1] ; RC
mov packCol, dword [bias0] ; B + bias
fstp DWORD PTR bias0
mov eax, DWORD PTR bias1 ; G + bias
fstp dword [bias0]
mov eax, dword [bias1] ; G + bias
IF GLIDE_PACK_ALPHA
fld DWORD PTR [vertex + a]
fadd DWORD PTR __GlideRoot + fBiasHi
%IF GLIDE_PACK_ALPHA
fld dword [vertex + a]
fadd dword [_GlideRoot + fBiasHi]
and packCol, 00FFh ; B color component
and eax, 0000FF00h ; G component << 8
@@ -1125,10 +1106,10 @@ IF GLIDE_PACK_ALPHA
or packCol, eax ; 0000GGBB
nop
fstp DWORD PTR bias1
mov eax, DWORD PTR bias0 ; R + bias
fstp dword [bias1]
mov eax, dword [bias0] ; R + bias
mov esi, DWORD PTR bias1 ; A + bias
mov esi, dword [bias1] ; A + bias
and eax, 0000FF00h ; R component << 8
and esi, 0FFFFFF00h ; A component << 8
@@ -1139,56 +1120,56 @@ IF GLIDE_PACK_ALPHA
or packCol, esi ; AARRGGBB
nop
ELSE ; !GLIDE_PACK_ALPHA
%ELSE ; !GLIDE_PACK_ALPHA
and packCol, 00FFh ; B color component
and eax, 0000FF00h ; G component << 8
add dlp, 4 ; Next dataList item
or packCol, eax
mov eax, DWORD PTR bias0 ; R + bias
mov eax, dword [bias0] ; R + bias
and eax, 0000FF00h ; R component << 8
shl eax, 8 ; R << 16
or packCol, eax ; 00RRGGBB
ENDIF ; !GLIDE_PACK_ALPHA
%ENDIF ; !GLIDE_PACK_ALPHA
GR_FIFO_WRITE fifo, 0, packCol ; PCI write packed color value
add fifo, 4
ENDIF ; GLIDE_PACK_RGB
%ENDIF ; GLIDE_PACK_RGB
__doParams:
mov eax, DWORD PTR [dlp] ; Get first offset from the data list
.__doParams:
mov eax, dword [dlp] ; Get first offset from the data list
add dlp, 4 ; dlp++
cmp eax, 0 ; Are we done?
je __nextVertex
je .__nextVertex
;; Not using align directive here because it sometimes
;; introduces an agi for the eax use below.
nop
nop
__paramLoop:
mov tempVal, DWORD PTR [eax + vertex] ; Get the parameter from teh vertex
.__paramLoop:
mov tempVal, dword [eax + vertex] ; Get the parameter from teh vertex
add fifo, 4 ; fifoPtr += sizeof(FxU32)
mov eax, DWORD PTR [dlp] ; offset = *(dlp + 1)
mov eax, dword [dlp] ; offset = *(dlp + 1)
add dlp, 4 ; dlp++
cmp eax, 0 ; Are we done?
GR_FIFO_WRITE fifo, -4, tempVal ; *fifoPtr = data
jne SHORT __paramLoop
jne .__paramLoop
align 4
__nextVertex:
.__nextVertex:
;; On to the next vertex
add vOffset, 4
mov gc, [__GlideRoot + curGC] ; Reload gc incase we trashed it as a temp
mov gc, [_GlideRoot + curGC] ; Reload gc incase we trashed it as a temp
cmp vOffset, 16 ; Offset of one past last vertex?
jne __vertexStart
jne .__vertexStart
;; Update gc->fifoPtr and gc->fifoRoom
mov eax, fifo
@@ -1197,27 +1178,27 @@ __nextVertex:
mov [gc + fifoPtr], fifo
sub eax, ebx
mov ebx, [__GlideRoot + trisDrawn] ; _GlideRoot.stats.trisDrawn++;
mov ebx, [_GlideRoot + trisDrawn] ; _GlideRoot.stats.trisDrawn++;
sub [gc + fifoRoom], eax
add ebx, 1
mov [__GlideRoot + trisDrawn], ebx
mov [_GlideRoot + trisDrawn], ebx
;; return 1 (triangle drawn)
mov eax, 1h
__triDone:
.__triDone:
;; Restore trashed registers
mov esi, [__GlideRoot + trisProcessed]
mov esi, [_GlideRoot + trisProcessed]
pop ebp
add esi, 1 ; _GlideRoot.stats.trisProcessed++;
pop ebx
pop edi
mov [__GlideRoot + trisProcessed], esi
mov [_GlideRoot + trisProcessed], esi
pop esi
ret 12
ret
endif ; !GL_AMD3D
%endif ; !GL_AMD3D

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,135 @@
;
; compulsory header for cvg/glide3/xdraw* assembly specializations (NASM)
;
; $Header$
; $Log$
; Revision 1.1.2.2 2003/06/13 07:22:59 dborca
; more fixes to NASM sources
;
; Revision 1.1.2.1 2003/06/07 09:53:25 dborca
; initial checkin for NASM sources
;
;---------------------------------------
; platform defines
;---------------------------------------
%define XOS_DJGPP 1
%define XOS_LINUX 2
%define XOS_WIN32 4
%define STDCALL 0
;---------------------------------------
; pick up the right OS
;---------------------------------------
%ifdef __DJGPP__
%define XOS XOS_DJGPP
%elifdef __linux__
%define XOS XOS_LINUX
%elifdef __WIN32__
%define XOS XOS_WIN32
%define STDCALL 1
%else
%error Unknown OS
%endif
;---------------------------------------
; general purpose macros
;---------------------------------------
%macro extrn 1-2 0
%if (XOS == XOS_WIN32) && STDCALL && (%2 > 0)
%define %1 %1@%2
%endif
extern %1
%endmacro
%macro globl 1-2 0
%if (XOS == XOS_WIN32) && STDCALL && (%2 > 0)
%define %1 %1@%2
%endif
global %1
%endmacro
%macro proc 1-2 0
%push proc
%if STDCALL && (%2 > 0)
%define %$ret RET %2
%else
%define %$ret RET
%endif
globl %1, %2
%1:
%endmacro
%macro endp 0
%ifnctx proc
%error Mismatched `endp'/`proc'
%else
%pop
%endif
%endmacro
%macro ret 0
%ifnctx proc
RET
%else
%$ret
%endif
%endmacro
%macro invoke 1-*
%rep %0 - 1
%rotate -1
push %1
%endrep
%rotate -1
call %1
%if (STDCALL == 0) && (%0 > 1)
add esp, 4 * (%0 - 1)
%endif
%endmacro
;---------------------------------------
; Windows
;---------------------------------------
%if XOS == XOS_WIN32
%define TEXT .text align=32
%define DATA .data align=32
%define CONST .rdata align=32
%macro GET_GC 0
mov gc, [_GlideRoot + curGC]
%endmacro
%endif
;---------------------------------------
; DJGPP
;---------------------------------------
%if XOS == XOS_DJGPP
%define TEXT .text
%define DATA .data
%define CONST .rodata
%macro GET_GC 0
mov gc, [_GlideRoot + curGC]
%endmacro
%endif
;---------------------------------------
; Linux
;---------------------------------------
%if XOS == XOS_LINUX
%define TEXT .text align=32
%define DATA .data align=32
%define CONST .rodata align=32
%macro GET_GC 0
mov gc, [_GlideRoot + curGC]
%endmacro
%endif

View File

@@ -19,8 +19,45 @@
;; $Header$
;; $Revision$
;; $Log$
;; Revision 1.1.8.7 2003/09/12 05:08:35 koolsmoky
;; preparing for graphic context checks
;;
;; Revision 1.1.8.6 2003/07/07 23:29:06 koolsmoky
;; cleaned logs
;;
;;
;; Revision 1.1 2000/06/15 00:27:43 joseph
;; Initial checkin into SourceForge.
;;
;; 1 10/08/98 11:30a Brent
;; 10 8/17/99 6:35p Atai
;; fixed amd debug mode
;;
;; 9 4/08/99 1:22p Atai
;; added contect check for _grTexDownload_3DNow_MMX
;;
;; 8 3/19/99 11:26a Peter
;; expose direct fifo for gl
;;
;; 7 2/02/99 4:36p Peter
;; download through lfb rather than texture port
;;
;; 6 12/17/98 2:36p Atai
;; check in Norbert's fix for texture download width correction
;;
;; 5 12/07/98 11:33a Peter
;; norbert's re-fixes of my merge
;;
;; 4 11/02/98 5:34p Atai
;; merge direct i/o code
;;
;; 3 10/20/98 5:34p Atai
;; added #ifdefs for hwc
;;
;; 2 10/14/98 12:05p Peter
;; fixed my effed up assumption about non-volatile regs
;;
;; 1 10/09/98 6:48p Peter
;; 3DNow!(tm) version of wide texture downloads
;;
;; 3 10/07/98 9:43p Peter
;; triangle procs for 3DNow!(tm)
@@ -32,247 +69,646 @@
;; mmx stuff for 3DNow!(tm) capable processors
;;
TITLE xtexdl.asm
OPTION OLDSTRUCTS
%include "xos.inc"
.586P
.MMX
.K3D
EXTRN __FifoMakeRoom: NEAR
extrn _FifoMakeRoom
%MACRO _grCommandTransportMakeRoom 3
push %3
push %2
push %1
call _FifoMakeRoom
add esp, 12
%ENDMACRO ; _grCommandTransportMakeRoom
;;; Definitions of cvg regs and glide root structures.
INCLUDE fxgasm.h
%INCLUDE "fxgasm.h"
; Arguments (STKOFF = 12 from 3 dword pushes)
STACKOFFSET = 12
_gc$ = 4 + STACKOFFSET
_baseAddr$ = 8 + STACKOFFSET
_maxS$ = 12 + STACKOFFSET
_minT$ = 16 + STACKOFFSET
_maxT$ = 20 + STACKOFFSET
_texData$ = 24 + STACKOFFSET
; Arguments (STKOFF = 16 from 4 dword pushes)
STACKOFFSET equ 16
_gc$ equ 4 + STACKOFFSET
_baseAddr$ equ 8 + STACKOFFSET
_maxS$ equ 12 + STACKOFFSET
_minT$ equ 16 + STACKOFFSET
_maxT$ equ 20 + STACKOFFSET
_texData$ equ 24 + STACKOFFSET
;; NB: The first set of registers (eax-edx) are volatile across
;; NB: The first set of registers (eax, ecx, and edx) are volatile across
;; function calls. The remaining registers are supposedly non-volatile
;; so they only store things that are non-volatile across the call.
fifo TEXTEQU <eax> ; Current fifo ptr in inner loop
texAddr TEXTEQU <ebx> ; Physical download address of the current scanline
gc TEXTEQU <ecx> ; Current graphics context
curS TEXTEQU <edx> ; Current texture scanline
maxT TEXTEQU <esi> ; Max scanline line value (inclusive)
dataPtr TEXTEQU <edi> ; Current user texture data ptr
curT TEXTEQU <ebp> ; Current s coordinate in inner loop
temp1 TEXTEQU curS
temp2 TEXTEQU texAddr
temp3 TEXTEQU gc
GR_FIFO_WRITE MACRO __addr, __offset, __data
mov [__addr + __offset], __data
ENDM ; GR_FIFO_WRITE
%define fifo ebp ; fifo ptr in inner loop
%define gc esi ; graphics context
%define dataPtr edi ; pointer to exture data to be downloaded
%define curT ebx ; counter for texture scan lines (t-coordinate)
%define curS ecx ; texture s-coordinate
%define fRoom edx ; room available in fifo (in bytes)
;--------------------------------------------------------------------------
_TEXT SEGMENT PAGE PUBLIC USE32 'CODE'
ASSUME DS: FLAT, SS: FLAT
ALIGN 32
%IFNDEF GL_SSE2
PUBLIC __grTexDownload_3DNow_MMX@24
__grTexDownload_3DNow_MMX@24 PROC NEAR
;--------------------------------------------------------------------------
;
; GL_AMD3D, GL_MMX
;
;--------------------------------------------------------------------------
;; Function prologue type things
;; NB: We are not bothering to preserve the contents
;; of eax, ebx, ecx, edx because they are volatile
;; by convention.
segment TEXT
ALIGN 32
%IFDEF GL_AMD3D
proc _grTexDownload_3DNow_MMX, 24
%ENDIF
%IFDEF GL_MMX
proc _grTexDownload_MMX, 24
%ENDIF
push ebx ; save caller's register variable
mov curT, [esp + _maxT$ - 12] ; curT = maxT
push esi ; save caller's register variable
mov eax, [esp + _minT$ - 8] ; minT
push edi ; save caller's register variable
mov gc, [esp + _gc$ - 4] ; gc
push ebp ; save caller's register variable
mov dataPtr, [esp + _texData$]; dataPtr
%IFDEF GLIDE_ALT_TAB
test gc, gc
je .dlDone
; mov edx, [gc + windowed]
; test edx, 1
; jnz .pastContextTest
mov edx, DWORD [gc+lostContext]
mov ecx, [edx]
test ecx, 1
jnz .dlDone
;.pastContextTest:
%ENDIF
sub curT, eax ; curT = maxT - minT
mov fifo, [gc + fifoPtr] ; fifoPtr
mov curS, [esp + _maxS$] ; curS = maxS
add curT, 1 ; curT = maxT - minT + 1
%IFDEF GL_AMD3D
femms ; we'll use MMX/3DNow!, make sure FPU register cleared
%ENDIF
%IFDEF GL_MMX
emms ; we'll use MMX
%ENDIF
mov edx, curS ; curS = maxS = scanline width in DWORDs
movd mm3, [esp + _baseAddr$] ; 0 | address of texture to download
shl curS, 2 ; scan line width (in bytes)
mov eax, [esp + _minT$] ; 0 | minT
mov [esp + _maxS$], curS ; save scan line width (in bytes)
shl edx, 3 ; packetHdr<21:3> = maxS = scanline width in DWORDs
imul eax, curS ; TEX_ROW_ADDR_INCR(minT) = minT * TEX_ROW_ADDR_INCR(1)
movd mm2, curS ; 0 | TEX_ROW_ADDR_INCR(1)
or edx, 00000005h ; packetHdr<31:30> = lfb port
; packetHdr<21:3> = maxS
; packetHdr<2:0> = packetType 5
movd mm1, edx ; 0 | packetHdr
movd mm4, eax ; 0 | TEX_ROW_ADDR_INCR(minT)
psllq mm2, 32 ; TEX_ROW_ADDR_INCR(1) | 0
paddd mm3, mm4 ; 0 | texAddr = texBaseAddr + TEX_ROW_ADDR_INCR(minT)
mov fRoom, [gc + fifoRoom] ; get available fifoRoom (in bytes)
punpckldq mm1, mm3 ; hdr2 = texAddr | hdr1 = packetHdr
;; ebx = curT, edi = dataPtr, esi = gc, ebp = fifo, ecx = curS = maxS
;; edx = fifoRoom, mm1 = texAddr|packetHdr, mm2 = TEX_ROW_ADDR_INCR(1)|0
test fifo, 4 ; is fifo QWORD aligned ?
jz .startDownload ; yup, start texture download
cmp fRoom, 4 ; enough room for NULL packet in fifo?
jge .mmxAlignFifo ; yes, write NULL packet to align fifo
%ifdef USE_PACKET_FIFO
_grCommandTransportMakeRoom 4, 0, __LINE__; make fifo room
%endif
;; Enter 3DNow!(tm) state for the duration of the function
;; because we don't use or call anything that uses fp.
femms
mov gc, [esp + _gc$ - STACKOFFSET + 0]
push esi
mov maxT, [esp + _maxT$ - STACKOFFSET + 4]
push edi
mov fifo, [gc + fifoPtr] ; fifoPtr modified by _grCommandTransportMakeRoom, reload
shl maxT, 9 ; Convert maxT to rowAddr format
push ebp
mov dataPtr, [esp + _texData$]
mov curT, [esp + _minT$]
mov fRoom, [gc + fifoRoom] ; fifoRoom modified by _grCommandTransportMakeRoom, reload
mov curS, [esp + _maxS$] ; reload maxS (destroyed by call to _grCommandTransportMakeRoom)
;; Pad out fifo so that we can use mmx writes the whole way w/o
;; any intermediate tests in the inner loop for fifo alignment.
;; Conveniently, the packet header is 2 dwords which matches
;; the size of the mmx write.
mov fifo, [gc + fifoPtr]; Cache fifo ptr
mov texAddr, [esp + _baseAddr$]; Texture physical address
test fifo, 4 ; new fifoPtr QWORD aligned ?
jz .startDownload ; yup, start texture download
mov temp1, [esp + _maxS$]; Pre-convert maxS into packet 5 field format
sub texAddr, [gc + tex_ptr]; Convert to hw base relative address
.mmxAlignFifo:
shl temp1, 2 ; Write size dwords -> bytes
mov [esp + _baseAddr$], texAddr
mov DWORD [fifo], 0 ; write NULL packet
sub fRoom, 4 ; fifoRoom -= 4
shl curT, 9 ; curT = TEX_ROW_ADDR_INCR(curT)
mov [esp + _maxS$], temp1; Write back converted s coordinate
mov [gc + fifoRoom], fRoom ; store new fifoRoom
add fifo, 4 ; fifoPtr += 4
shl temp1, 1 ; Write size to packet 5 field format
test fifo, 4h ; Aligned fifo ptr?
mov temp2, [gc + fifoRoom]; temp2 = gc->fifoRoom
mov [esp + _maxT$], temp1; Write back converted field format size
jz __loopT
%IFDEF GLIDE_DEBUG
mov [gc + checkPtr], fifo ; checkPtr
%ENDIF
;; Check to make sure there's room in the fifo. If not then
;; we'll wrap and then it should be aligned for the remainder of
;; this function invocation.
cmp temp2, 4h
jg __mmxAlignFifo
mov [gc + fifoPtr], fifo ; store new fifoPtr
jmp .startDownload ; fifo aligned, download texture now
push @Line ; Line # inside this function
push 0h ; NULL file name
align 32
push 4h ; fifo space required
call __FifoMakeRoom ; Get fifo room
;; ebx = curT, edi = dataPtr, esi = gc, ebp = fifo, ecx = maxS = curS
;; edx=fifoRoom, mm1 = texAddr|packetHdr, mm2 = TEX_ROW_ADDR_INCR(1)|0
;; Calling out to external code means that our registers can get
;; trashed in the same way that we trash things. Plus we need to
;; re-cache the fifoPtr since we may have wrapped.
.loopT:
add esp, 12 ; Pop the 3 DWORDs for the fifoWrap parameters
mov gc, [esp + _gc$]
%IFDEF GLIDE_DEBUG
;; Setup the regs to do the alignment
mov fifo, [gc + fifoPtr]
test fifo, 4h
mov temp2, [gc + fifoRoom]
jz __loopT
;; Make sure that we have a QWORD aligned fifoPtr; force GP if not aligned
__mmxAlignFifo:
add fifo, 4h ; packetPtr++
xor temp1, temp1 ; Clear the nop packet
mov [gc + fifoPtr], fifo; gc->fifoPtr = packetPtr
sub temp2, 4h ; fifoRoom -= 4
test fifo, 4 ; is fifoPtr QWORD aligned ?
jz .alignmentOK ; yup, continue
mov [gc + fifoRoom], temp2; gc->fifoRoom = fifoRoom
GR_FIFO_WRITE fifo, -4, temp1; NOP packet(0)
xor eax, eax ; create 0
mov [eax], eax ; move to DS:[0] forces GP
.alignmentOK:
%ENDIF ; GLIDE_DEBUG
align 4
__loopT:
;; Check for room to write the current texture scanline
mov temp1, [esp + _maxS$]; temp1 = width of scanline (bytes)
mov temp2, [gc + fifoRoom]; temp2 = gc->fifoRoom (bytes)
add temp1, 0Ch ; scanline width + sizeof(packet hdr) (bytes) + nop packet to mmx align
cmp temp2, temp1 ; fifo space required >= space availible ?
jge __dlBegin ; Yes, start download now w/ no more checking
push @Line ; Line # inside this function
push 0h ; NULL file name
push temp1 ; fifo space required
call __FifoMakeRoom ; Get fifo room
add esp, 12 ; Pop the 3 DWORDs for the fifoWrap parameters
nop
;; Calling out to external code means that our registers can get
;; trashed in the same way that we trash things. Plus we need to
;; re-cache the fifoPtr since we may have wrapped.
mov gc, [esp + _gc$]
mov fifo, [gc + fifoPtr]
IFDEF GLIDE_DEBUG
;; Make sure that we have an mmx happy aligned fifoPtr
test fifo, 4
jz @1
;; Fault right away because this would be a huge suck
xor eax, eax
mov [eax], eax
@1:
ENDIF ; GLIDE_DEBUG
align 4
__dlBegin:
IFDEF GLIDE_DEBUG
;; Make sure that we have an mmx happy aligned fifoPtr
test fifo, 4
jz @2
;; Fault right away because this would be a huge suck
xor eax, eax
mov [eax], eax
@2:
ENDIF ; GLIDE_DEBUG
;; Compute packet header words
;; hdr1: downloadSpace[31:30] numWords[21:3] packetType[2:0]
;; hdr1: downloadSpace[31:30] numWords[21:3] packetType[2:0]
;; hdr2: download address[29:0]
mov texAddr, [esp + _baseAddr$]; Download base address
mov temp1, [esp + _maxT$]; Pre-Converted # of words per packet/scanline
mov temp3, 0C0000005h ; Base packet header (texture port | packet type 5)
add fifo, 8 ; Pre-increment fifo ptr (hdr1)
or temp3, temp1 ; Base packet hdr | # of words
add texAddr, curT ; texAddr = texBaseAddr + TEX_ROW_ADDR_INCR(curT)
GR_FIFO_WRITE fifo, -8, temp3; Write hdr1
add curT, 200h ; curT += TEX_ROW_ADDR_INCR(1)
GR_FIFO_WRITE fifo, -4, texAddr; write hdr2
mov curS, [esp + _maxS$]; curS = maxS
align 4
movq [fifo], mm1 ; store hdr2 | hdr1
add fifo, 8 ; increment fifo ptr (hdr1 + hdr2)
;; S coordinate inner loop unrolled for 8 texels a write
__loopS:
movq mm0, [dataPtr] ; load (mmx) 64 bit data (8 texels)
add fifo, 8h ; pre-increment fifoPtr += 2 * sizeof(FxU32)
add dataPtr, 8h ; dataPtr += 2 * sizeof(FxU32)
sub curS, 8h ; curS -= 2 * sizeof(FxU32)
.loopS:
movq [fifo - 8], mm0 ; *fifoPtr = texelData[64 bits]
jnz __loopS ; if curS > 0
movq mm0, [dataPtr] ; load 64 bit data (8 texels)
add fifo, 8 ; pre-increment fifoPtr += 2 * sizeof(FxU32)
mov gc, [esp + _gc$] ; Re-cache gc which was trashed in the dl loop
mov temp1, fifo
add dataPtr, 8 ; dataPtr += 2 * sizeof(FxU32)
sub curS, 8 ; curS -= 2 * sizeof(FxU32)
;; Update gc->fifoPtr and gc->fifoRoom for the wrap/stall check
mov temp2, [gc + fifoPtr]
sub temp1, temp2 ; # of bytes written to the fifo
movq [fifo - 8], mm0 ; *fifoPtr = texelData[64 bits]
jnz .loopS ; loop while curS > 0
mov [gc + fifoPtr], fifo; gc->fifoPtr = packetPtr
mov temp2, [gc + fifoRoom]
mov ecx, [gc + fifoPtr] ; old fifo ptr
nop ; filler
mov eax, fifo ; new fifo ptr
mov [gc + fifoPtr], fifo ; save new fifo ptr
%IFDEF GLIDE_DEBUG
mov [gc + checkPtr], fifo ; checkPtr
%ENDIF
sub eax, ecx ; new fifo ptr - old fifo ptr = fifo space used up
mov curS, [esp + _maxS$] ; curS = maxS = width of scanline (bytes)
sub fRoom, eax ; new fifo space available = old fifo space available - fifo space used up = new fifo space available
sub curT, 1 ; curT--
mov [gc + fifoRoom], fRoom ; save new fifo space available
jz .dlDone ; loop while curT > 0
;; Check for room to write the next texture scanline
;; ebx = curT, edi = dataPtr, esi = gc, ebp = fifo
;; edx = fifoRoom, mm1 = texAddr|packetHdr, mm2 = TEX_ROW_ADDR_INCR(1)|0
paddd mm1, mm2 ; texAddr+=TEX_ROW_ADDR_INCR(1) | packetHdr
mov esp, esp ; filler
.startDownload:
lea eax, [curS+8] ; fifo space needed = scan line width + header size
cmp fRoom, eax ; fifo space available >= fifo space required ?
jge .loopT ; yup, write next scan line
%ifdef USE_PACKET_FIFO
_grCommandTransportMakeRoom eax, 0, __LINE__; make fifo room (if fifoPtr QWORD aligned before
%endif
sub temp2, temp1 ; # of bytes left in fifo
cmp curT, maxT ; if (curT <= maxT) ?
mov fifo, [gc + fifoPtr] ; fifoPtr was modified by _grCommandTransportMakeRoom, reload
mov [gc + fifoRoom], temp2
jle __loopT
mov fRoom, [gc + fifoRoom] ; fifoRoom was modified by _grCommandTransportMakeRoom, reload
mov curS, [esp + _maxS$] ; curS = maxS = width of scanline (bytes)
jmp .loopT ; we now have enough fifo room, write next scanline
__dlDone:
align 4
.dlDone:
%IFDEF GL_AMD3D
femms ; exit 3DNow!(tm) state
%ENDIF
%IFDEF GL_MMX
emms ; exit MMX state
%ENDIF
femms ; Exit 3DNow!(tm) state
pop ebp
pop ebp ; restore caller's register variable
pop edi ; restore caller's register variable
pop edi
pop esi
pop esi ; restore caller's register variable
pop ebx ; restore caller's register variable
ret 18h ; Pop 6 parameters and return
ret ; pop 6 DWORD parameters and return
endp
__grTexDownload_3DNow_MMX@24 ENDP
%ELSE ; !GL_SSE2
_TEXT ENDS
;--------------------------------------------------------------------------
;
; GL_SSE2
;
;--------------------------------------------------------------------------
END
segment TEXT
ALIGN 32
proc _grTexDownload_SSE2_64, 24
push ebx ; save caller's register variable
mov curT, [esp + _maxT$ - 12] ; curT = maxT
push esi ; save caller's register variable
mov eax, [esp + _minT$ - 8] ; minT
push edi ; save caller's register variable
mov gc, [esp + _gc$ - 4] ; gc
push ebp ; save caller's register variable
mov dataPtr, [esp + _texData$]; dataPtr
%IFDEF GLIDE_ALT_TAB
test gc, gc
je .dlDone
; mov edx, [gc + windowed]
; test edx, 1
; jnz .pastContextTest
mov edx, DWORD [gc+lostContext]
mov ecx, [edx]
test ecx, 1
jnz .dlDone
;.pastContextTest:
%ENDIF
sub curT, eax ; curT = maxT - minT
mov fifo, [gc + fifoPtr] ; fifoPtr
mov curS, [esp + _maxS$] ; curS = maxS
add curT, 1 ; curT = maxT - minT + 1
mov edx, curS ; curS = maxS = scanline width in DWORDs
movd xmm3,[esp + _baseAddr$] ; 0 | 0 | 0 | address of texture to download
shl curS, 2 ; scan line width (in bytes)
mov eax, [esp + _minT$] ; 0 | 0 | 0 | minT
mov [esp + _maxS$], curS ; save scan line width (in bytes)
shl edx, 3 ; packetHdr<21:3> = maxS = scanline width in DWORDs
imul eax, curS ; TEX_ROW_ADDR_INCR(minT) = minT * TEX_ROW_ADDR_INCR(1)
movd xmm2,curS ; 0 | 0 | TEX_ROW_ADDR_INCR(1)
or edx, 00000005h ; packetHdr<31:30> = lfb port
; packetHdr<21:3> = maxS
; packetHdr<2:0> = packetType 5
movd xmm1,edx ; 0 | 0 | packetHdr
movd xmm4,eax ; 0 | 0 | TEX_ROW_ADDR_INCR(minT)
psllq xmm2,32 ; 0 | 0 | TEX_ROW_ADDR_INCR(1) | 0
paddd xmm3,xmm4 ; 0 | 0 | texAddr = texBaseAddr + TEX_ROW_ADDR_INCR(minT)
mov fRoom, [gc + fifoRoom] ; get available fifoRoom (in bytes)
punpckldq xmm1,xmm3 ; 0 | 0 | hdr2 = texAddr | hdr1 = packetHdr
;; ebx = curT, edi = dataPtr, esi = gc, ebp = fifo, ecx = curS = maxS
;; edx = fifoRoom, xmm1 = texAddr|packetHdr, xmm2 = TEX_ROW_ADDR_INCR(1)|0
test fifo, 4 ; is fifo QWORD aligned ?
jz .startDownload ; yup, start texture download
cmp fRoom, 4 ; enough room for NULL packet in fifo?
jge .xmmAlignFifo ; yes, write NULL packet to align fifo
%ifdef USE_PACKET_FIFO
_grCommandTransportMakeRoom 4, 0, __LINE__; make fifo room
%endif
mov fifo, [gc + fifoPtr] ; fifoPtr modified by _grCommandTransportMakeRoom, reload
mov fRoom, [gc + fifoRoom] ; fifoRoom modified by _grCommandTransportMakeRoom, reload
mov curS, [esp + _maxS$] ; reload maxS (destroyed by call to _grCommandTransportMakeRoom)
test fifo, 4 ; new fifoPtr QWORD aligned ?
jz .startDownload ; yup, start texture download
.xmmAlignFifo:
mov DWORD [fifo], 0 ; write NULL packet
sub fRoom, 4 ; fifoRoom -= 4
mov [gc + fifoRoom], fRoom ; store new fifoRoom
add fifo, 4 ; fifoPtr += 4
%IFDEF GLIDE_DEBUG
mov [gc + checkPtr], fifo ; checkPtr
%ENDIF
mov [gc + fifoPtr], fifo ; store new fifoPtr
jmp .startDownload ; fifo aligned, download texture now
align 32
;; ebx = curT, edi = dataPtr, esi = gc, ebp = fifo, ecx = maxS = curS
;; edx=fifoRoom, xmm1 = texAddr|packetHdr, xmm2 = TEX_ROW_ADDR_INCR(1)|0
.loopT:
%IFDEF GLIDE_DEBUG
;; Make sure that we have a QWORD aligned fifoPtr; force GP if not aligned
test fifo, 4 ; is fifoPtr QWORD aligned ?
jz .alignmentOK ; yup, continue
xor eax, eax ; create 0
mov [eax], eax ; move to DS:[0] forces GP
.alignmentOK:
%ENDIF ; GLIDE_DEBUG
;; Compute packet header words
;; hdr1: downloadSpace[31:30] numWords[21:3] packetType[2:0]
;; hdr2: download address[29:0]
movq [fifo],xmm1 ; store hdr2 | hdr1
add fifo, 8 ; increment fifo ptr (hdr1 + hdr2)
;; S coordinate inner loop unrolled for 8 texels a write
.loopS:
movq xmm0,[dataPtr] ; load 64 bit data (8 texels)
add fifo, 8 ; pre-increment fifoPtr += 2 * sizeof(FxU32)
add dataPtr, 8 ; dataPtr += 2 * sizeof(FxU32)
sub curS, 8 ; curS -= 2 * sizeof(FxU32)
movq [fifo - 8],xmm0 ; *fifoPtr = texelData[64 bits]
jnz .loopS ; loop while curS > 0
mov ecx, [gc + fifoPtr] ; old fifo ptr
nop ; filler
mov eax, fifo ; new fifo ptr
mov [gc + fifoPtr], fifo ; save new fifo ptr
%IFDEF GLIDE_DEBUG
mov [gc + checkPtr], fifo ; checkPtr
%ENDIF
sub eax, ecx ; new fifo ptr - old fifo ptr = fifo space used up
mov curS, [esp + _maxS$] ; curS = maxS = width of scanline (bytes)
sub fRoom, eax ; new fifo space available = old fifo space available - fifo space used up = new fifo space available
sub curT, 1 ; curT--
mov [gc + fifoRoom], fRoom ; save new fifo space available
jz .dlDone ; loop while curT > 0
;; Check for room to write the next texture scanline
;; ebx = curT, edi = dataPtr, esi = gc, ebp = fifo
;; edx = fifoRoom, xmm1 = texAddr|packetHdr, xmm2 = TEX_ROW_ADDR_INCR(1)|0
paddd xmm1,xmm2 ; 0 | 0 | texAddr+=TEX_ROW_ADDR_INCR(1) | packetHdr
mov esp, esp ; filler
.startDownload:
lea eax, [curS+8] ; fifo space needed = scan line width + header size
cmp fRoom, eax ; fifo space available >= fifo space required ?
jge .loopT ; yup, write next scan line
%ifdef USE_PACKET_FIFO
_grCommandTransportMakeRoom eax, 0, __LINE__; make fifo room (if fifoPtr QWORD aligned before
%endif
mov fifo, [gc + fifoPtr] ; fifoPtr was modified by _grCommandTransportMakeRoom, reload
mov fRoom, [gc + fifoRoom] ; fifoRoom was modified by _grCommandTransportMakeRoom, reload
mov curS, [esp + _maxS$] ; curS = maxS = width of scanline (bytes)
jmp .loopT ; we now have enough fifo room, write next scanline
.dlDone:
pop ebp ; restore caller's register variable
pop edi ; restore caller's register variable
pop esi ; restore caller's register variable
pop ebx ; restore caller's register variable
ret ; pop 6 DWORD parameters and return
endp
segment TEXT
ALIGN 32
proc _grTexDownload_SSE2_128, 24
push ebx ; save caller's register variable
mov curT, [esp + _maxT$ - 12] ; curT = maxT
push esi ; save caller's register variable
mov eax, [esp + _minT$ - 8] ; minT
push edi ; save caller's register variable
mov gc, [esp + _gc$ - 4] ; gc
push ebp ; save caller's register variable
mov dataPtr, [esp + _texData$]; dataPtr
%IFDEF GLIDE_ALT_TAB
test gc, gc
je .dlDone
; mov edx, [gc + windowed]
; test edx, 1
; jnz .pastContextTest
mov edx, DWORD [gc+lostContext]
mov ecx, [edx]
test ecx, 1
jnz .dlDone
;.pastContextTest:
%ENDIF
sub curT, eax ; curT = maxT - minT
mov fifo, [gc + fifoPtr] ; fifoPtr
mov curS, [esp + _maxS$] ; curS = maxS
add curT, 1 ; curT = maxT - minT + 1
mov edx, curS ; curS = maxS = scanline width in DWORDs
movd xmm3,[esp + _baseAddr$] ; 0 | 0 | 0 | address of texture to download
shl curS, 2 ; scan line width (in bytes)
mov eax, [esp + _minT$] ; 0 | minT
mov [esp + _maxS$], curS ; save scan line width (in bytes)
shl edx, 3 ; packetHdr<21:3> = maxS = scanline width in DWORDs
imul eax, curS ; TEX_ROW_ADDR_INCR(minT) = minT * TEX_ROW_ADDR_INCR(1)
movd xmm2,curS ; 0 | 0 | 0 | TEX_ROW_ADDR_INCR(1)
or edx, 00000005h ; packetHdr<31:30> = lfb port
; packetHdr<21:3> = maxS
; packetHdr<2:0> = packetType 5
movd xmm1,edx ; 0 | 0 | 0 | packetHdr
movd xmm4,eax ; 0 | 0 | 0 | TEX_ROW_ADDR_INCR(minT)
psllq xmm2,32 ; 0 | 0 | TEX_ROW_ADDR_INCR(1) | 0
paddd xmm3,xmm4 ; 0 | 0 | 0 | texAddr = texBaseAddr + TEX_ROW_ADDR_INCR(minT)
mov fRoom, [gc + fifoRoom] ; get available fifoRoom (in bytes)
punpckldq xmm1,xmm3 ; 0 | 0 | hdr2 = texAddr | hdr1 = packetHdr
;; ebx = curT, edi = dataPtr, esi = gc, ebp = fifo, ecx = curS = maxS
;; edx = fifoRoom, xmm1 = texAddr|packetHdr, xmm2 = TEX_ROW_ADDR_INCR(1)|0
test fifo, 4 ; is fifo QWORD aligned ?
jz .startDownload ; yup, start texture download
cmp fRoom, 4 ; enough room for NULL packet in fifo?
jge .xmmAlignFifo ; yes, write NULL packet to align fifo
%ifdef USE_PACKET_FIFO
_grCommandTransportMakeRoom 4, 0, __LINE__; make fifo room
%endif
mov fifo, [gc + fifoPtr] ; fifoPtr modified by _grCommandTransportMakeRoom, reload
mov fRoom, [gc + fifoRoom] ; fifoRoom modified by _grCommandTransportMakeRoom, reload
mov curS, [esp + _maxS$] ; reload maxS (destroyed by call to _grCommandTransportMakeRoom)
test fifo, 4 ; new fifoPtr QWORD aligned ?
jz .startDownload ; yup, start texture download
.xmmAlignFifo:
mov DWORD [fifo], 0 ; write NULL packet
sub fRoom, 4 ; fifoRoom -= 4
mov [gc + fifoRoom], fRoom ; store new fifoRoom
add fifo, 4 ; fifoPtr += 4
%IFDEF GLIDE_DEBUG
mov [gc + checkPtr], fifo ; checkPtr
%ENDIF
mov [gc + fifoPtr], fifo ; store new fifoPtr
jmp .startDownload ; fifo aligned, download texture now
align 32
;; ebx = curT, edi = dataPtr, esi = gc, ebp = fifo, ecx = maxS = curS
;; edx=fifoRoom, xmm1 = texAddr|packetHdr, xmm2 = TEX_ROW_ADDR_INCR(1)|0
.loopT:
%IFDEF GLIDE_DEBUG
;; Make sure that we have a QWORD aligned fifoPtr; force GP if not aligned
test fifo, 4 ; is fifoPtr QWORD aligned ?
jz .alignmentOK ; yup, continue
xor eax, eax ; create 0
mov [eax], eax ; move to DS:[0] forces GP
.alignmentOK:
%ENDIF ; GLIDE_DEBUG
;; Compute packet header words
;; hdr1: downloadSpace[31:30] numWords[21:3] packetType[2:0]
;; hdr2: download address[29:0]
movq [fifo],xmm1 ; store hdr2 | hdr1
add fifo, 8 ; increment fifo ptr (hdr1 + hdr2)
;; S coordinate inner loop unrolled for 8 texels a write
.loopS:
movdqu xmm0, [dataPtr] ; load 128 bit data (8 texels) ; isn't 16 bytes aligned?
add fifo, 16 ; pre-increment fifoPtr += 4 * sizeof(FxU32)
add dataPtr, 16 ; dataPtr += 4 * sizeof(FxU32)
sub curS, 16 ; curS -= 4 * sizeof(FxU32)
movdqu [fifo - 16], xmm0 ; *fifoPtr = texelData[128 bits] ; isn't 16 bytes aligned?
jnz .loopS ; loop while curS > 0
mov ecx, [gc + fifoPtr] ; old fifo ptr
nop ; filler
mov eax, fifo ; new fifo ptr
mov [gc + fifoPtr], fifo ; save new fifo ptr
%IFDEF GLIDE_DEBUG
mov [gc + checkPtr], fifo ; checkPtr
%ENDIF
sub eax, ecx ; new fifo ptr - old fifo ptr = fifo space used up
mov curS, [esp + _maxS$] ; curS = maxS = width of scanline (bytes)
sub fRoom, eax ; new fifo space available = old fifo space available - fifo space used up = new fifo space available
sub curT, 1 ; curT--
mov [gc + fifoRoom], fRoom ; save new fifo space available
jz .dlDone ; loop while curT > 0
;; Check for room to write the next texture scanline
;; ebx = curT, edi = dataPtr, esi = gc, ebp = fifo
;; edx = fifoRoom, xmm1 = texAddr|packetHdr, xmm2 = TEX_ROW_ADDR_INCR(1)|0
paddd xmm1,xmm2 ; 0 | 0 | texAddr+=TEX_ROW_ADDR_INCR(1) | packetHdr
mov esp, esp ; filler
.startDownload:
lea eax, [curS+8] ; fifo space needed = scan line width + header size
cmp fRoom, eax ; fifo space available >= fifo space required ?
jge .loopT ; yup, write next scan line
%ifdef USE_PACKET_FIFO
_grCommandTransportMakeRoom eax, 0, __LINE__; make fifo room (if fifoPtr QWORD aligned before
%endif
mov fifo, [gc + fifoPtr] ; fifoPtr was modified by _grCommandTransportMakeRoom, reload
mov fRoom, [gc + fifoRoom] ; fifoRoom was modified by _grCommandTransportMakeRoom, reload
mov curS, [esp + _maxS$] ; curS = maxS = width of scanline (bytes)
jmp .loopT ; we now have enough fifo room, write next scanline
.dlDone:
pop ebp ; restore caller's register variable
pop edi ; restore caller's register variable
pop esi ; restore caller's register variable
pop ebx ; restore caller's register variable
ret ; pop 6 DWORD parameters and return
endp
%ENDIF ; GL_SSE2

View File

@@ -0,0 +1,54 @@
# DOS/DJGPP tests makefile for Glide3
#
# Copyright (c) 2002 - Borca Daniel
# Email : dborca@users.sourceforge.net
# Web : http://www.geocities.com/dborca
#
# $Header$
#
#
# Available options:
#
# Environment variables:
# CPU optimize for the given processor.
# default = pentium
# DXE=1 use DXE modules.
# default = no
#
# Targets:
# <file.exe> build a specific file
#
.PHONY: all
.SUFFIXES: .c .o .exe
.SECONDARY: tlib.o
FX_GLIDE_HW = cvg
TOP = ../../..
CPU ?= pentium
CC = gcc
CFLAGS = -Wall -O2 -ffast-math -mcpu=$(CPU)
CFLAGS += -I$(TOP)/$(FX_GLIDE_HW)/glide3/src -I$(TOP)/$(FX_GLIDE_HW)/incsrc
CFLAGS += -I$(TOP)/swlibs/fxmisc
CFLAGS += -D__DOS__ -DCVG
CFLAGS += -D__DOS32__
LDFLAGS = -s -L$(TOP)/$(FX_GLIDE_HW)/lib
ifdef DXE
LDLIBS = -lgld3i
else
LDLIBS = -lgld3x
endif
.c.o:
$(CC) -o $@ $(CFLAGS) -c $<
%.exe: tlib.o %.o
$(CC) -o $@ $(LDFLAGS) $^ $(LDLIBS)
all:
$(error Must specify <filename.exe> to build)

View File

@@ -1,78 +1,48 @@
# Linux tests makefile for Glide3
#
# Insert new header here
# Copyright (c) 2002 - Borca Daniel
# Email : dborca@users.sourceforge.net
# Web : http://www.geocities.com/dborca
#
# $Header$
#
LDIRT= $(wildcard *.exe *.map *.sys *.o *.a)
# Special case rush because its built off of the sst1 tree
ifeq ($(FX_GLIDE_HW),SST96)
GLIDE_ROOT = $(BUILD_ROOT)/sst1
else
GLIDE_ROOT = $(BUILD_ROOT)/$(FX_GLIDE_HW)
endif
#
# Available options:
#
# Environment variables:
# CPU optimize for the given processor.
# default = pentium
#
# Targets:
# <file.exe> build a specific file
#
LCINCS += -I$(BUILD_ROOT)/$(FX_GLIDE_HW)/include
.PHONY: all
.SUFFIXES: .c .o .exe
.SECONDARY: tlib.o
LIBOBJS = tlib.o
FX_GLIDE_HW = cvg
TOP = ../../..
CPU ?= pentium
GLIDELIB = -L$(GLIDE_ROOT)/lib -lglide3
CC = gcc
CFLAGS = -Wall -O2 -ffast-math -mcpu=$(CPU)
CFLAGS += -I$(TOP)/$(FX_GLIDE_HW)/glide3/src -I$(TOP)/$(FX_GLIDE_HW)/incsrc
CFLAGS += -I$(TOP)/swlibs/fxmisc
CFLAGS += -DCVG
LLDLIBS = $(LIBOBJS) $(GLIDELIB)
LDFLAGS = -s -L$(TOP)/$(FX_GLIDE_HW)/lib
ifeq ($(HAL_CSIM),1)
LLDLIBS += $(BUILD_ROOT)/$(FX_GLIDE_HW)/lib/lib$(FX_GLIDE_HW)hal.a
endif
LDLIBS = -lglide3
LDLIBS += -lm
PRIVATE_HEADERS = tlib.h tlib.c tldata.inc
.c.o:
$(CC) -o $@ $(CFLAGS) -c $<
%.exe: tlib.o %.o
$(CC) -o $@ $(LDFLAGS) $^ $(LDLIBS)
CFILES = test00.c \
test01.c \
test02.c \
test03.c \
test04.c \
test05.c \
test06.c \
test07.c \
test08.c \
test09.c \
test10.c \
test11.c \
test12.c \
test13.c \
test14.c \
test15.c \
test16.c \
test17.c \
test18.c \
test19.c \
test20.c \
test21.c \
test22.c \
test23.c \
test24.c \
test25.c \
test26.c \
test27.c \
test28.c \
test29.c \
test30.c \
test31.c \
test32.c \
test33.c \
test34.c \
test35.c \
test36.c \
display.c \
sbench.c
PROGRAMS = $(CFILES:.c=)
DATAFILES = alpha.3df decal1.3df lava.3df light.3df matt1.3df miro.3df
include $(BUILD_ROOT)/swlibs/include/make/3dfx.linux.mak
$(PROGRAMS): $(LIBOBJS)
all:
$(error Must specify <filename.exe> to build)

View File

@@ -1636,6 +1636,12 @@ tlErrorMessage( char *err) {
fprintf(stderr, err);
} /* tlErrorMessage */
FxU32
tlGethWnd(void)
{
return -1;
}
#elif __WIN32__

884
glide3x/cvg/init/fxremap.c Normal file
View File

@@ -0,0 +1,884 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifndef __linux__
#include <conio.h>
#endif
#include <3dfx.h>
#include <fxpci.h>
#define null 0
#define SIZE_SST1_NEEDED 0x100000
#define END_ADDRESS 0x10000000
#define S3_SHIFT 0x400000
struct RangeSTRUCT
{
FxU32 address;
FxU32 range;
FxU32 id;
FxU32 is_voodoo; /* 1 = is voodoo, 2 = hidden SLI */
FxU32 is_S3;
struct RangeSTRUCT *next;
struct RangeSTRUCT *prev;
};
int silent = 1;
typedef struct RangeSTRUCT RangeStruct;
void InitRemap(void);
void CloseRemap(void);
void GetMemoryMap(void);
void RemapVoodoo(RangeStruct *conflict);
void AdjustMapForS3(void);
RangeStruct *TestForConflicts(void);
void RemoveEntry(RangeStruct *del);
void InsertEntry(RangeStruct *ins);
FxBool FindHole(RangeStruct *conflict);
FxU32 SnapToDecentAddress(FxU32 address,RangeStruct *conflict);
FxBool fits_in_hole(RangeStruct *begin,FxU32 end,RangeStruct *hole,RangeStruct *conflict);
FxBool fits_under(RangeStruct *first,FxU32 minimum,RangeStruct *hole,RangeStruct *conflict);
FxU32 pciGetType(long i);
void pciGetRange(PciRegister reg,FxU32 device_number,FxU32 *data);
FxBool pciGetAddress(PciRegister reg,FxU32 device_number,FxU32 *data);
void ForceCleanUp(void);
FxBool FindNecessaryCards(void);
void ProcessCommandLine(char **argv,int argc);
FxBool IsCardVoodoo(long i);
FxBool IsCardS3(long i);
FxBool ReadHex(char *string,FxU32 *num);
void AddMapEntry(FxU32 address,FxU32 range,FxU32 id,FxBool VoodooCard,FxBool S3Card);
void HandleMemoryOverlap(void);
FxBool overlap_map(RangeStruct *begin,FxU32 end);
FxBool switch_S3_flag_ignore=FXFALSE;
//FxBool switch_force=FXFALSE;
FxBool switch_C0_bias=FXTRUE;
int switch_voodoo_loc = 0;
FxU32 num_voodoos=0;
//#define TESTING 1
#ifdef TESTING
RangeStruct test_data[6]= {{0xF0000000,0x100000,1,0,0,0,0},
{0xF3000000,0x200000,4,1,0,0,0},
{0xF3000000,0x200000,8,0,0,0,0},
{0xF5000000,0x200000,2,0,0,0,0},
{0xE6000000,0x200000,5,0,0,0,0},
{0xD3001000,0x200000,3,0,0,0,0}};
#endif
RangeStruct map[80];
RangeStruct hole[80];
long num_holes=0;
RangeStruct *first_entry;
RangeStruct *last_entry;
long entries=0;
RangeStruct master_hole;
long voodoo_loc;
FxU32 conflicts_found=0;
void fxremap_dowork(int argc,char **argv,int doit_silently)
{
RangeStruct *conflict;
silent = doit_silently;
ProcessCommandLine(argv,argc);
InitRemap();
if (!FindNecessaryCards())
{
if (!silent) {
printf("This program was only meant to be used with the 3dfx Voodoo chipset\n");
printf("to correct possible pci address conflicts.\n");
printf("No Voodoo chipset was detected\n");
}
ForceCleanUp();
}
GetMemoryMap();
/* expand region of mapping for S3 card */
AdjustMapForS3();
/* see if we find any conflicts with any voodoo card */
while (conflict=TestForConflicts())
{
conflicts_found++;
/* since it is going to move */
/* remove entry, so we can possibly use it as a hole */
RemoveEntry(conflict);
if (FindHole(conflict))
{
conflict->address=master_hole.address;
RemapVoodoo(conflict);
}
else
{
if (!silent) {
printf("Unable to find region to map conflicting board\n");
}
ForceCleanUp();
return;
}
}
if (!conflicts_found) {
if (!silent) {
printf("No conflict with the Voodoo cards was found\n");
}
}
CloseRemap();
}
void fxremap() {
fxremap_dowork(0,NULL,1);
}
void fxremap_main(int argc,char **argv) {
fxremap_dowork(argc,argv,0);
}
void InitRemap(void)
{
pciOpen();
}
void CloseRemap(void)
{
// pciClose();
}
FxU32 pciGetConfigData_R(PciRegister reg, FxU32 devNum) {
FxU32 data;
if (pciGetConfigData(reg,devNum,&data) == FXTRUE) {
return (data);
} else {
return (0xFFFFFFFF);
}
}
#define PCI_NORMAL_TYPE 0
#define PCI_BRIDGE_TYPE 1
void GetMemoryMap(void)
{
FxU32 temp,temp2;
FxU32 type;
long devNum;
int fn; /* function number iterator */
int maxFnNumber;
int multi_fn = 0;
#ifdef TESTING
for (i=0;i<6;i++)
{
temp=test_data[i].address;
temp2=~(test_data[i].range - 0x1);
AddMapEntry(temp,temp2,test_data[i].id,test_data[i].is_voodoo,test_data[i].is_S3);
}
#else
for (devNum=0;devNum<MAX_PCI_DEVICES;devNum++)
{
if (pciDeviceExists(devNum))
{
if (pciGetConfigData_R(PCI_HEADER_TYPE,devNum) & (1<<7)) {
maxFnNumber = 8; /* multifunction! */
multi_fn = 1;
} else {
multi_fn = 0;
if ((pciGetConfigData_R(PCI_VENDOR_ID,devNum) == 0x121a) &&
(pciGetConfigData_R(PCI_DEVICE_ID,devNum) == 0x02)) {
maxFnNumber = 8; /* single board SLI! */
} else {
maxFnNumber = 1;
}
}
for(fn=0;fn<maxFnNumber;fn++) {
int i = devNum | (fn << 13); /* add function number */
if (pciGetConfigData_R(PCI_VENDOR_ID,i) != 0xFFFF) {
/* two header types */
/* one for bridges and one for everything else */
type=pciGetType(i);
if (type==PCI_NORMAL_TYPE) {
if (pciGetAddress(PCI_BASE_ADDRESS_0,i,&temp)) {
pciGetRange(PCI_BASE_ADDRESS_0,i,&temp2);
AddMapEntry(temp,temp2,i,IsCardVoodoo(i),IsCardS3(i));
}
if (pciGetAddress(PCI_BASE_ADDRESS_1,i,&temp)) {
pciGetRange(PCI_BASE_ADDRESS_1,i,&temp2);
AddMapEntry(temp,temp2,i,IsCardVoodoo(i),IsCardS3(i));
}
#if 0
/* Legacy address which is not needed for sst1 type
* things w/o 2d.
*/
if (pciGetAddress(PCI_IO_BASE_ADDRESS,i,&temp)) {
pciGetRange(PCI_IO_BASE_ADDRESS,i,&temp2);
AddMapEntry(temp,temp2,i,IsCardVoodoo(i),IsCardS3(i));
}
#endif
if (pciGetAddress(PCI_ROM_BASE_ADDRESS,i,&temp)) {
pciGetRange(PCI_ROM_BASE_ADDRESS,i,&temp2);
AddMapEntry(temp,temp2,i,IsCardVoodoo(i),IsCardS3(i));
}
} else if (type==PCI_BRIDGE_TYPE) {
if (pciGetAddress(PCI_BASE_ADDRESS_0,i,&temp)) {
pciGetRange(PCI_BASE_ADDRESS_0,i,&temp2);
AddMapEntry(temp,temp2,i,IsCardVoodoo(i),IsCardS3(i));
}
if (pciGetAddress(PCI_BASE_ADDRESS_1,i,&temp)) {
pciGetRange(PCI_BASE_ADDRESS_1,i,&temp2);
AddMapEntry(temp,temp2,i,IsCardVoodoo(i),IsCardS3(i));
}
}
} /* if function number exists */
} /* for all function numbers */
}
}
#endif
}
void AdjustMapForS3(void)
{
RangeStruct *cur;
cur=first_entry;
while(cur)
{
if (cur->is_S3)
{
cur->address-=S3_SHIFT;
cur->range=S3_SHIFT<<1;
}
cur=cur->next;
}
}
RangeStruct *TestForConflicts(void)
{
RangeStruct *cur,*next;
cur=first_entry;
while(cur)
{
/* if this is a poorly mapped voodoo2 single board SLI, then remap */
if ((cur->is_voodoo == 2) && (cur->address == 0xFF00000)) {
return (cur);
}
if (cur->next)
{
if ((cur->address + cur->range) > cur->next->address)
{
next=cur->next;
if ((cur->is_voodoo)||(next->is_voodoo))
{
if (cur->is_voodoo)
{
return cur;
}
return next;
}
else {
if (!silent) {
printf("FxRemap: Possible PCI conflict not with Voodoo device\n");
printf("%X (%X) <-> %X:%X (%X)\n",cur->id, cur->address,
cur->next->id, cur->next->address);
}
}
}
}
else
{
if ((cur->address + cur->range) > END_ADDRESS)
return cur;
}
cur=cur->next;
}
return null;
}
void AddMapEntry(FxU32 address,FxU32 range,FxU32 id,FxBool VoodooCard,FxBool S3Card)
{
RangeStruct *temp,*cur,*next;
//jcochrane@3dfx.com
long entry=0;
FxU32 tmp_address=0;
//END
#if 0
static long test_entry=0;
address=test_data[test_entry].address;
range=~(test_data[test_entry++].range - 0x1);
#endif
/* only if address != 0 */
//jcochrane@3dfx.com
//check for duplicate entries in the map table,ignore if there is
tmp_address=address>>4;
for(entry=0;entry<entries;entry++)
{
if( tmp_address == map[entry].address)
address=0;
}
//END
if(address)
{
map[entries].address=address>>4;
map[entries].range=((~range)>>4)+0x1;
map[entries].id=id;
map[entries].is_voodoo=VoodooCard;
map[entries].is_S3=S3Card;
temp=&map[entries++];
if (entries<=1)
{
first_entry=temp;
last_entry=temp;
temp->next=null;
temp->prev=null;
return;
}
cur=first_entry;
next=null;
while(cur)
{
if (temp->address < cur->address)
{
next=cur;
break;
}
cur=cur->next;
}
if (next)
{
temp->next=next;
temp->prev=next->prev;
next->prev=temp;
if (next==first_entry)
first_entry=temp;
else
(temp->prev)->next=temp;
}
else
{
last_entry->next=temp;
temp->prev=last_entry;
last_entry=temp;
temp->next=null;
}
}
}
void RemoveEntry(RangeStruct *del)
{
RangeStruct *prev;
if (!(del->next))
{
if (!(del->prev))
{
if (!silent) {
printf("FxRemap: No entries mapped\n");
}
ForceCleanUp();
return;
}
prev=del->prev;
last_entry=prev;
prev->next=null;
del->prev=null;
del->next=null;
}
else
{
if (!(del->prev))
{
del->next->prev=null;
first_entry=del->next;
}
else
{
del->next->prev=del->prev;
del->prev->next=del->next;
}
del->next=null;
del->prev=null;
}
}
void InsertEntry(RangeStruct *ins)
{
RangeStruct *cur;
cur=first_entry;
ins->next=null;
ins->prev=null;
if (!first_entry)
{
first_entry=ins;
last_entry=ins;
return;
}
while(cur)
{
if (ins->address < cur->address)
{
ins->next=cur;
ins->prev=cur->prev;
cur->prev=ins;
if (!ins->prev)
{
first_entry=ins;
}
else
{
(ins->prev)->next=ins;
}
return;
}
cur=cur->next;
}
/* if it got this far it needs to go at the end */
ins->prev=last_entry;
last_entry->next=ins;
last_entry=ins;
}
FxU32 SnapToDecentAddress(FxU32 address,RangeStruct *conflict)
{
FxU32 range;
FxU32 mask;
FxU32 not_mask;
range=conflict->range;
if (range<0x10000)
range=0x10000;
mask=range;
mask-=1;
not_mask=~mask;
if (address & mask)
{
address=(address & not_mask) + range;
}
return address;
}
FxBool fits_in_hole(RangeStruct *begin,FxU32 end,RangeStruct *hole,RangeStruct *conflict)
{
FxU32 address;
address=begin->address+begin->range;
address=SnapToDecentAddress(address,conflict);
/* note could be <= */
/* this is safer but more inefficient memory wise */
if ((address+conflict->range)<end)
{
hole->address=address;
hole->range=end-address;
return FXTRUE;
}
return FXFALSE;
}
FxBool fits_under(RangeStruct *first,FxU32 minimum,RangeStruct *hole,RangeStruct *conflict)
{
FxU32 address;
address=minimum;
address=SnapToDecentAddress(address,conflict);
if ((address+conflict->range) < first->address)
{
hole->address=address;
hole->range=first->address - address;
return FXTRUE;
}
return FXFALSE;
}
FxBool FindHole(RangeStruct *conflict)
{
RangeStruct *cur;
cur=first_entry;
while(cur)
{
if (!(cur->next))
{
if (fits_in_hole(cur,END_ADDRESS,&master_hole,conflict))
{
return FXTRUE;
}
}
else
{
if (fits_in_hole(cur,cur->next->address,&master_hole,conflict))
{
return FXTRUE;
}
}
cur=cur->next;
}
/* see if we can find a whole located below addressed boards */
/* don't want to go below 0xA000000 for addressing our boards */
if (first_entry->address > 0xA000000)
{
if (fits_under(first_entry,0xA000000,&master_hole,conflict))
return FXTRUE;
}
return FXFALSE;
}
void RemapVoodoo(RangeStruct *conflict)
{
FxU32 address;
/* put conflict back into memory map */
InsertEntry(conflict);
#ifndef TESTING
address=(conflict->address)<<4;
pciSetConfigData(PCI_BASE_ADDRESS_0,conflict->id,&address);
#endif
if (!silent) {
printf("Remapped Voodoo Board to avoid a conflict\n");
}
}
void pciGetRange(PciRegister reg,FxU32 device_number,FxU32 *data)
{
FxU32 temp=0xFFFFFFFF;
FxU32 size,save;
pciGetConfigData(reg,device_number,&save);
pciSetConfigData(reg,device_number,&temp);
pciGetConfigData(reg,device_number,&size);
pciSetConfigData(reg,device_number,&save);
#ifdef TESTING
printf("PciGetRange: save %08x \n",save);
printf("PciGetRange: temp %08x \n",temp);
printf("PciGetRange: size %08x \n",size);
printf("PciGetRange: save %08x \n",save);
#endif
*data=size;
}
FxBool pciGetAddress(PciRegister reg,FxU32 device_number,FxU32 *data)
{
pciGetConfigData(reg,device_number,data);
if ((*data)==0)
return FXFALSE;
if (*data & 0x01)
return FXFALSE;
return FXTRUE;
}
void ForceCleanUp(void)
{
// pciClose();
// exit(1);
}
FxBool FindNecessaryCards(void)
{
FxBool voodoo_found=FXFALSE;
long i;
for (i=0;i<MAX_PCI_DEVICES;i++)
{
if (pciDeviceExists(i))
{
if(IsCardVoodoo(i))
{
voodoo_found=FXTRUE;
num_voodoos++;
}
}
}
if (!voodoo_found)
{
if (!silent) {
printf("Warning no known voodoo card was found\n");
}
return FXFALSE;
}
return FXTRUE;
}
void ProcessCommandLine(char **argv,int argc)
{
long i;
FxU32 temp,temp2;
FxU32 address,range;
char *hex_ptr;
for (i=1;i<argc;i++)
{
if (strcmp(argv[i],"/dS3")==0)
{
switch_S3_flag_ignore=FXTRUE;
}
else if(strcmp(argv[i],"/f")==0)
{
if ((i+1)<argc)
{
if (ReadHex(argv[i+1],&temp))
{
i++;
}
else
{
if (!silent) {
printf("Command line: improper format\n");
printf("ex: fxremap.exe /f 0xC0000000\n");
}
ForceCleanUp();
}
}
else
{
if (!silent) {
printf("Command line: improper format\n");
printf("ex: fxremap.exe /f 0xC0000000\n");
}
ForceCleanUp();
}
if (!silent) {
printf("Command line option /f ignored in this version\n");
}
/* this stuff was from the interactive test version */
#if 0
while(!kbhit())
{
;
}
getch();
#endif
}
else if(strcmp(argv[i],"/x")==0)
{
if ((i+1)<argc)
{
hex_ptr=strchr(argv[i+1],'-');
if (!hex_ptr)
{
if (!silent) {
printf("Command line: improper format\n");
printf("ex: fxremap.exe /x 0xE0000000-0xF0000000\n");
}
ForceCleanUp();
}
if ((ReadHex(argv[i+1],&temp))&&(ReadHex(hex_ptr+1,&temp2)))
{
address=temp;
range=temp2-temp;
range=~(range - 0x1);
i++;
AddMapEntry(address,range,0x500,FXFALSE,FXFALSE);
}
else
{
if (!silent) {
printf("Command line: improper format\n");
printf("ex: fxremap.exe /x 0xE0000000-0xF0000000\n");
}
ForceCleanUp();
}
}
else
{
if (!silent) {
printf("Command line: improper format\n");
printf("ex: fxremap.exe /x 0xE0000000-0xF0000000\n");
}
ForceCleanUp();
}
}
else if (strcmp(argv[i],"/nb")==0)
{
switch_C0_bias=FXFALSE;
}
else if (strcmp(argv[i],"/i")==0)
{
switch_voodoo_loc = atoi(argv[++i]);
}
else
{
if (!silent) {
printf("Command line: improper options specified\n");
printf("Valid options are /dS3 /f /x /i\n");
}
}
}
}
FxU32 pciGetType(long i)
{
FxU32 header_type;
pciGetConfigData(PCI_HEADER_TYPE,i,&header_type);
return header_type;
}
FxBool IsCardVoodoo(long i)
{
FxU32 vendor,dev_id;
FxU32 fn_num = (i >> 13) & 0x7;
int true_val;
if (fn_num) {
true_val = 2;
} else {
true_val = 1;
}
pciGetConfigData(PCI_VENDOR_ID,i,&vendor);
pciGetConfigData(PCI_DEVICE_ID,i,&dev_id);
/* if sst1 */
if ((vendor==0x121a)&&(dev_id==0x0001))
return FXTRUE;
/* if voodoo2 */
if ((vendor==0x121a)&&(dev_id==0x0002)) {
if (true_val == 2) if (!silent) { printf("found voodoo2 hidden sli\n"); }
return true_val;
}
/* if banshee */
if ((vendor==0x121a)&&(dev_id==0x0003))
return FXTRUE;
/* if h4? or whatever is next */
if ((vendor==0x121a)&&(dev_id==0x0004))
return FXTRUE;
return FXFALSE;
}
FxBool IsCardS3(long i)
{
FxU32 vendor,dev_id;
pciGetConfigData(PCI_VENDOR_ID,i,&vendor);
pciGetConfigData(PCI_DEVICE_ID,i,&dev_id);
if ((vendor==0x5333)&&((dev_id==0x88f0)||(dev_id==0x8880)))
return FXTRUE;
return FXFALSE;
}
FxBool ReadHex(char *string,FxU32 *num)
{
long i=0;
FxU32 temp=0,temp2;
long num_count=0;
/* bypass leading spaces */
while((string[i])&&(string[i]==' '))
i++;
/* verify leading 0x */
if (string[i]=='0')
i++;
else
return FXFALSE;
if (string[i]=='x')
i++;
else
return FXFALSE;
/* read in number */
while(((string[i]>=0x30)&&(string[i]<0x3A))||((string[i]>=0x41)&&(string[i]<0x47))||((string[i]>=0x61)&&(string[i]<0x67)))
{
if ((string[i]>=0x30)&&(string[i]<0x3A))
temp2=string[i] - 0x30;
else if ((string[i]>=0x41)&&(string[i]<0x47))
temp2=string[i] - 0x37;
else if ((string[i]>=0x61)&&(string[i]<0x67))
temp2=string[i] - 0x57;
if (num_count!=0)
temp=(temp<<4)+temp2;
else if (num_count<8)
temp=temp2;
else
return FXFALSE;
num_count++;i++;
}
*num=temp;
return FXTRUE;
}
void HandleMemoryOverlap(void)
{
RangeStruct *cur;
cur=first_entry;
while(cur)
{
if (cur!=last_entry)
{
if (overlap_map(cur,cur->next->address))
{
if (cur->range<(cur->next->address+cur->next->range-cur->address))
cur->range=cur->next->address+cur->next->range-cur->address;
if (cur->next==last_entry)
{
last_entry=cur;
cur->next=null;
}
else
{
cur->next=cur->next->next;
cur->next->prev=cur;
}
}
else
cur=cur->next;
}
else
cur=cur->next;
}
}
FxBool overlap_map(RangeStruct *begin,FxU32 end)
{
if ((begin->address+begin->range)>end)
return FXTRUE;
return FXFALSE;
}

View File

@@ -0,0 +1,14 @@
/*
* fxremap.h
*
* pci remapper, used to remap the single board SLI slave to a valid
* PCI address
*/
#ifndef _FXREMAP_H_
#define _FXREMAP_H_
void fxremap(void);
int fxremap_main(int argc, char **argv);
#endif