converted to nasm syntax

added x86 asm, 3dnow! triangle and mmx, 3dnow! texture download optimizations
This commit is contained in:
koolsmoky
2004-12-23 20:45:56 +00:00
parent 471935edbb
commit 5e0365a93a
15 changed files with 2734 additions and 329 deletions

View File

@@ -23,10 +23,16 @@
# default = 6 (PentiumPro)
# DEBUG=1 disable optimizations and build for debug.
# default = no
# USE_X86=1 use assembler triangle specializations
# USE_X86=1 use assembler triangle specializations; req by CVG
# default = yes
# USE_3DNOW=1 allow 3DNow! specializations. However, the true CPU
# capabilities are still checked at run-time to avoid
# crashes.
# default = no
# TEXUS2=1 embed Texus2 functions into Glide2.
# default = no
# FXOEM2X=1 build fxoem2x.dll
# default = no
#
# Targets:
# all: build everything
@@ -97,13 +103,24 @@ CDEFS += -DGDBG_INFO_ON -DGLIDE_DEBUG -DGLIDE_SANITY_ASSERT -DGLIDE_SANITY_SIZE
endif
override USE_FIFO = 1
override USE_X86 = 1
# cpu optimized triangle
ifeq ($(USE_MMX),1)
CFLAGS += -DGL_MMX
override USE_X86 = 1
endif
ifeq ($(USE_3DNOW),1)
CFLAGS += -DGL_AMD3D
override USE_X86 = 1
endif
ifeq ($(USE_X86),1)
CDEFS += -DGLIDE_DISPATCH_SETUP
CDEFS += -DGLIDE_DISPATCH_SETUP=1 -DGLIDE_DISPATCH_DOWNLOAD=1
override USE_FIFO = 1
else
CDEFS += -DGLIDE_USE_C_TRISETUP
CDEFS += -DGLIDE_USE_C_TRISETUP=1
endif
# fifo
@@ -155,14 +172,19 @@ CFLAGS += $(CDEFS)
GLIDE_OBJECTS = \
fifo.obj \
gsplash.obj \
g3df.obj \
g3df.obj \
gu.obj \
guclip.obj \
gpci.obj \
gump.obj \
diglide.obj \
disst.obj \
ditex.obj \
gbanner.obj \
gerror.obj \
gmovie.obj \
digutex.obj \
ddgump.obj \
gaa.obj \
gdraw.obj \
gglide.obj \
@@ -170,18 +192,28 @@ GLIDE_OBJECTS = \
gsst.obj \
gtex.obj \
gtexdl.obj \
gutex.obj \
cpuid.obj
ifeq ($(USE_X86),1)
GLIDE_OBJECTS += \
xdraw2_def.obj
xdraw2_def.obj \
xtexdl_def.obj
ifeq ($(USE_MMX),1)
GLIDE_OBJECTS += \
xtexdl_mmx.obj
endif
ifeq ($(USE_3DNOW),1)
GLIDE_OBJECTS += \
xdraw2_3dnow.obj \
xtexdl_3dnow.obj
endif
else
GLIDE_OBJECTS += \
gxdraw.obj
endif
GLIDE_OBJECTS += \
digutex.obj \
$(FX_GLIDE_SW)/newpci/pcilib/fxpci.obj \
$(FX_GLIDE_SW)/newpci/pcilib/fxw32.obj \
$(FX_GLIDE_SW)/newpci/pcilib/fxvxd.obj \
@@ -241,7 +273,7 @@ endif
###############################################################################
# main
###############################################################################
all: glide2x $(TEXUS_EXEDIR)/$(TEXUS_EXE)
all: glide2x $(TEXUS_EXEDIR)/$(TEXUS_EXE) fxoem2x
glide2x: $(GLIDE_LIBDIR)/$(GLIDE_DLL)
@@ -249,10 +281,20 @@ $(GLIDE_LIBDIR)/$(GLIDE_DLL): $(GLIDE_OBJECTS) $(GLIDE_RES)
$(LD) -out:$@ $(LDFLAGS) $(GLIDE_OBJECTS) $(LDLIBS) $(GLIDE_RES)
$(TEXUS_EXEDIR)/$(TEXUS_EXE): $(FX_GLIDE_SW)/texus2/cmd/cmd.c $(GLIDE_LIBDIR)/$(GLIDE_IMP)
ifeq ($(TEXUS2),1)
$(CC) -o $@ $(CFLAGS) $^
#ifeq ($(TEXUS2),1)
# $(CC) -o $@ $(CFLAGS) $^
#else
# $(warning Texus2 not enabled... Skipping $(TEXUS_EXE))
#endif
fxoem2x: $(GLIDE_LIBDIR)/fxoem2x.dll
ifeq ($(FXOEM2X),1)
$(GLIDE_LIBDIR)/fxoem2x.dll: ../oem/oeminit.obj ../oem/oeminit.res
# $(LD) -out:$@ $(LDFLAGS) ../oem/oeminit.obj $(LDLIBS) ../oem/oeminit.res
else
$(warning Texus2 not enabled... Skipping $(TEXUS_EXE))
$(GLIDE_LIBDIR)/fxoem2x.dll:
# $(warning FxOem2x not enabled... Skipping fxoem2x.dll)
endif
###############################################################################
@@ -263,6 +305,14 @@ endif
# $(AS) -o $@ $(ASFLAGS) $<
xdraw2_def.obj: xdraw2.asm
$(AS) -o $@ $(ASFLAGS) $<
xtexdl_def.obj: xtexdl.c
$(CC) -Fo$@ $(CFLAGS) -c $<
xtexdl_mmx.obj: xtexdl.asm
$(AS) -o $@ $(ASFLAGS) -DGL_MMX=1 $<
xdraw2_3dnow.obj: xdraw2.asm
$(AS) -o $@ $(ASFLAGS) -DGL_AMD3D=1 $<
xtexdl_3dnow.obj: xtexdl.asm
$(AS) -o $@ $(ASFLAGS) -DGL_AMD3D=1 $<
$(GLIDE_OBJECTS): fxinline.h fxgasm.h
@@ -286,10 +336,14 @@ clean:
-$(call UNLINK,fxinline.h)
-$(call UNLINK,fxgasm.h)
-$(call UNLINK,$(FX_GLIDE_SW)/texus2/lib/*.obj)
-$(call UNLINK,$(GLIDE_RES))
-$(call UNLINK,../oem/oeminit.obj)
-$(call UNLINK,../oem/oeminit.res)
realclean: clean
-$(call UNLINK,$(GLIDE_LIBDIR)/glide2x.exp)
-$(call UNLINK,$(GLIDE_LIBDIR)/$(GLIDE_DLL))
-$(call UNLINK,$(GLIDE_LIBDIR)/$(GLIDE_IMP))
-$(call UNLINK,$(TEXUS_EXEDIR)/$(TEXUS_EXE))
-$(call UNLINK,$(GLIDE_LIBDIR)/fxoem2x.exp)
-$(call UNLINK,$(GLIDE_LIBDIR)/fxoem2x.dll)
-$(call UNLINK,$(GLIDE_LIBDIR)/fxoem2x.lib)

View File

@@ -19,6 +19,9 @@
**
** $Header$
** $Log$
** Revision 1.1.1.1.2.1 2004/12/12 15:18:58 koolsmoky
** changes to support new cpuid; moved single_precision_asm(), double_precision_asm() from cpudetect.asm
**
** Revision 1.1.1.1 1999/12/07 21:49:08 joseph
** Initial checkin into SourceForge.
**
@@ -134,6 +137,7 @@ struct _GlideRoot_s GR_CDECL _GlideRoot;
*/
void single_precision_asm()
{
#if __MSC__
__asm {
push eax ; make room
fnclex ; clear pending exceptions
@@ -144,6 +148,9 @@ void single_precision_asm()
fldcw WORD PTR [esp]
pop eax
}
#else
#error "Need to implement single_precision_asm() for this compiler"
#endif
}
/*;------------------------------------------------------------------------------
@@ -152,6 +159,7 @@ void single_precision_asm()
*/
void double_precision_asm()
{
#if __MSC__
__asm {
push eax ; make room
fnclex ; clear pending exceptions
@@ -163,6 +171,9 @@ void double_precision_asm()
fldcw WORD PTR [esp]
pop eax
}
#else
#error "Need to implement double_precision_asm() for this compiler"
#endif
}
/*---------------------------------------------------------------------------

View File

@@ -19,6 +19,9 @@
**
** $Header$
** $Log$
** Revision 1.1.1.1 1999/12/07 21:49:08 joseph
** Initial checkin into SourceForge.
**
**
** 22 4/21/98 10:05a Peter
** grSstSelect cleanup (Thanks John/Rufus)
@@ -109,7 +112,7 @@ GR_DIENTRY(grSstQueryBoards, FxBool, ( GrHwConfiguration *hwc ))
GDBG_INIT();
GDBG_INFO(80,"grSstQueryBoards(0x%x)\n",hwc);
#if GLIDE_INIT_HAL
#if 1 /*GLIDE_INIT_HAL*/
hwc->num_sst = (_grSstDetectResources()
? _GlideRoot.hwConfig.num_sst
: 0);

View File

@@ -46,16 +46,16 @@
COMMENT; NEWLINE
#define OFFSET(p,o,pname) if (hex) \
printf("%s\t= %08xh\n",pname,((int)&p.o)-(int)&p); \
else printf("%s\t= %10d\n",pname,((int)&p.o)-(int)&p)
printf("%s\tequ %08xh\n",pname,((int)&p.o)-(int)&p); \
else printf("%s\tequ %10d\n",pname,((int)&p.o)-(int)&p)
#define OFFSET2(p,o,pname) if (hex) \
printf("%s\t= %08xh\n",pname,((int)&o)-(int)&p); \
else printf("%s\t= %10d\n",pname,((int)&o)-(int)&p)
printf("%s\tequ %08xh\n",pname,((int)&o)-(int)&p); \
else printf("%s\tequ %10d\n",pname,((int)&o)-(int)&p)
#define SIZEOF(p,pname) if (hex) \
printf("SIZEOF_%s\t= %08xh\n",pname,sizeof(p)); \
else printf("SIZEOF_%s\t= %10d\n",pname,sizeof(p))
printf("SIZEOF_%s\tequ %08lxh\n",pname,sizeof(p)); \
else printf("SIZEOF_%s\tequ %10ld\n",pname,sizeof(p))
#else
@@ -110,9 +110,9 @@ main (int argc, char **argv)
offsetof(struct _GlideRoot_s, curGC));
printf("#define kTriProcOffset 0x%XUL\n",
offsetof(struct GrGC_s, cmdTransportInfo.triSetupProc));
offsetof(struct GrGC_s, curArchProcs.triSetupProc));
printf("#define kTriProcOffsetClean %d\n",
offsetof(struct GrGC_s, cmdTransportInfo.triSetupProc));
offsetof(struct GrGC_s, curArchProcs.triSetupProc));
#endif /* GLIDE_DISPATCH_SETUP */
printf("/* The # of 2-byte entries in the hw fog table */\n");
@@ -135,6 +135,7 @@ main (int argc, char **argv)
OFFSET (gc,base_ptr,"base_ptr\t");
OFFSET (gc,reg_ptr,"reg_ptr\t\t");
OFFSET (gc,lfb_ptr,"lfb_ptr\t\t");
OFFSET (gc,tex_ptr,"tex_ptr\t\t");
OFFSET (gc,state.cull_mode,"cull_mode\t");
OFFSET (gc, regDataList,"regDataList\t");
OFFSET (gc, tsuDataList,"tsuDataList\t");

View File

@@ -19,6 +19,9 @@
**
** $Header$
** $Log$
** Revision 1.2.2.1 2004/12/12 15:17:18 koolsmoky
** support new cpuid
**
** Revision 1.2 2003/06/28 19:31:06 guillemj
** Fix compilation warnings.
** Removed some trailing spaces.
@@ -508,6 +511,102 @@ struct _GrState_s
a, r, g, b; /* Constant color values for Delta0 mode */
};
#if GLIDE_DISPATCH_SETUP
/* gpci.c
*
* Set of procs for the current cpu type. These are selected out of
* the _archXXXX proc list that is selected at grGlideInit time.
*/
typedef FxI32 (FX_CALL* GrTriSetupProc)(const void*, const void*, const void*);
#if GLIDE_PACKED_RBG
typedef GrTriSetupProc GrTriSetupProcVector[6];
#else
typedef GrTriSetupProc GrTriSetupProcVector[2];
#endif
typedef GrTriSetupProcVector GrTriSetupProcArchVector;
/* Decalrations of the dispatchable procs found in xdraw2.asm and
* xtexdl.c for teh triangle and texture download procs respectively.
*/
extern FxI32 FX_CALL _trisetup(const void*, const void*, const void*);
extern FxI32 FX_CALL _trisetup_cull(const void*, const void*, const void*);
#if GLIDE_PACKED_RGB
extern FxI32 FX_CALL _trisetup_rgb(const void*, const void*, const void*);
extern FxI32 FX_CALL _trisetup_cull_rgb(const void*, const void*, const void*);
extern FxI32 FX_CALL _trisetup_argb(const void*, const void*, const void*);
extern FxI32 FX_CALL _trisetup_cull_argb(const void*, const void*, const void*);
#endif /* GLIDE_PACKED_RBG */
#if GL_AMD3D
extern FxI32 FX_CALL _trisetup_3DNow(const void*, const void*, const void*);
extern FxI32 FX_CALL _trisetup_cull_3DNow(const void*, const void*, const void*);
#if GLIDE_PACKED_RGB
extern FxI32 FX_CALL _trisetup_rgb_3DNow(const void*, const void*, const void*);
extern FxI32 FX_CALL _trisetup_cull_rgb_3DNow(const void*, const void*, const void*);
extern FxI32 FX_CALL _trisetup_argb_3DNow(const void*, const void*, const void*);
extern FxI32 FX_CALL _trisetup_cull_argb_3DNow(const void*, const void*, const void*);
#endif /* GLIDE_PACKED_RBG */
#endif /* GL_AMD3D */
#endif /* GLIDE_DISPATCH_SETUP */
#if GLIDE_DISPATCH_DOWNLOAD
/* _GlideRoot.curTexProcs is an array of (possibly specialized
* function pointers indexed by texture format size (8/16 bits) and
* texture line width (1/2/4/>4).
*
* xtexdl.c
*/
struct GrGC_s;
typedef void (FX_CALL* GrTexDownloadProc)(struct GrGC_s* gc, const FxU32 tmuBaseAddr,
const FxU32 maxS, const FxU32 minT, const FxU32 maxT,
void* texData);
typedef GrTexDownloadProc GrTexDownloadProcVector[2][4];
extern void FX_CALL _grTexDownload_Default_8_1(struct GrGC_s* gc, const FxU32 tmuBaseAddr,
const FxU32 maxS, const FxU32 minT, const FxU32 maxT,
void* texData);
extern void FX_CALL _grTexDownload_Default_8_2(struct GrGC_s* gc, const FxU32 tmuBaseAddr,
const FxU32 maxS, const FxU32 minT, const FxU32 maxT,
void* texData);
extern void FX_CALL _grTexDownload_Default_8_4(struct GrGC_s* gc, const FxU32 tmuBaseAddr,
const FxU32 maxS, const FxU32 minT, const FxU32 maxT,
void* texData);
extern void FX_CALL _grTexDownload_Default_8_WideS(struct GrGC_s* gc, const FxU32 tmuBaseAddr,
const FxU32 maxS, const FxU32 minT, const FxU32 maxT,
void* texData);
extern void FX_CALL _grTexDownload_Default_16_1(struct GrGC_s* gc, const FxU32 tmuBaseAddr,
const FxU32 maxS, const FxU32 minT, const FxU32 maxT,
void* texData);
extern void FX_CALL _grTexDownload_Default_16_2(struct GrGC_s* gc, const FxU32 tmuBaseAddr,
const FxU32 maxS, const FxU32 minT, const FxU32 maxT,
void* texData);
extern void FX_CALL _grTexDownload_Default_16_4(struct GrGC_s* gc, const FxU32 tmuBaseAddr,
const FxU32 maxS, const FxU32 minT, const FxU32 maxT,
void* texData);
extern void FX_CALL _grTexDownload_Default_16_WideS(struct GrGC_s* gc, const FxU32 tmuBaseAddr,
const FxU32 maxS, const FxU32 minT, const FxU32 maxT,
void* texData);
#if GL_AMD3D
/* xtexdl.asm */
extern void FX_CALL _grTexDownload_3DNow_MMX(struct GrGC_s* gc, const FxU32 tmuBaseAddr,
const FxU32 maxS, const FxU32 minT, const FxU32 maxT,
void* texData);
#endif /* GL_AMD3D */
#if GL_MMX
/* xtexdl.asm */
extern void FX_CALL _grTexDownload_MMX(struct GrGC_s* gc, const FxU32 tmuBaseAddr,
const FxU32 maxS, const FxU32 minT, const FxU32 maxT,
void* texData);
#endif /* GL_MMX */
#endif /* GLIDE_DISPATCH_DOWNLOAD */
typedef struct GrGC_s
{
FxU32
@@ -532,6 +631,30 @@ typedef struct GrGC_s
GrState
state; /* state of Glide/SST */
#if GLIDE_DISPATCH_SETUP || GLIDE_DISPATCH_DOWNLOAD
struct {
#if GLIDE_DISPATCH_SETUP
/* Current triangle rendering proc specialized for culling/no
* culling and viewport/window coordinates.
*/
GrTriSetupProc triSetupProc;
/* Vector to choose triangle rendering proc from based
* on culling or no-cull this vector should be specialized
* on viewport vs window coordinates.
*/
GrTriSetupProcVector* coorTriSetupVector;
#endif /* GLIDE_DISPATCH_SETUP */
#if GLIDE_DISPATCH_DOWNLOAD
/* Vector of texture download procs specialized by size
* and processor vendor type.
*/
GrTexDownloadProcVector* texDownloadProcs;
#endif /* GLIDE_DISPATCH_DOWNLOAD */
} curArchProcs;
#endif /* GLIDE_DISPATCH_SETUP || GLIDE_DISPATCH_DOWNLOAD */
struct cmdTransportInfo {
FxU32 triPacketHdr; /* Pre-computed packet header for
* independent triangles.
@@ -573,10 +696,6 @@ typedef struct GrGC_s
FxBool fifoLfbP; /* Do we expect lfb writes to go through the fifo? */
FxBool lfbLockCount; /* Have we done an lfb lock? Count of the locks. */
#if GLIDE_DISPATCH_SETUP
FxI32 (FX_CALL *triSetupProc)(const GrVertex* a, const GrVertex* b, const GrVertex* c);
#endif /* GLIDE_DISPATCH_SETUP */
#if GLIDE_USE_SHADOW_FIFO
FxU32* fifoShadowBase; /* Buffer that shadows the hw fifo for debugging */
@@ -820,6 +939,17 @@ struct _GlideRoot_s {
* sst's and actual boards.
*/
GrGC GCs[MAX_NUM_SST]; /* one GC per board */
#if GLIDE_DISPATCH_SETUP || GLIDE_DISPATCH_DOWNLOAD
struct {
#if GLIDE_DISPATCH_SETUP
GrTriSetupProcArchVector* curTriProcs;
#endif /* GLIDE_DISPATCH_SETUP */
#if GLIDE_DISPATCH_DOWNLOAD
GrTexDownloadProcVector* curTexProcs;
#endif /* GLIDE_DISPATCH_DOWNLOAD */
} deviceArchProcs;
#endif /* GLIDE_DISPATCH_SETUP || GLIDE_DISPATCH_DOWNLOAD */
};
extern struct _GlideRoot_s GR_CDECL _GlideRoot;
@@ -887,37 +1017,22 @@ extern GrGCFuncs _curGCFuncs;
void _grMipMapInit(void);
#if GLIDE_DISPATCH_SETUP
FxI32 FX_CSTYLE
_trisetup_cull(const GrVertex *va, const GrVertex *vb, const GrVertex *vc);
FxI32 FX_CSTYLE
_trisetup(const GrVertex *va, const GrVertex *vb, const GrVertex *vc);
#define TRISETUP_NORGB(__cullMode) (((__cullMode) == GR_CULL_DISABLE) \
? _trisetup \
: _trisetup_cull)
#if GLIDE_PACKED_RGB
FxI32 FX_CSTYLE
_trisetup_cull_rgb(const GrVertex *va, const GrVertex *vb, const GrVertex *vc);
FxI32 FX_CSTYLE
_trisetup_cull_argb(const GrVertex *va, const GrVertex *vb, const GrVertex *vc);
FxI32 FX_CSTYLE
_trisetup_rgb(const GrVertex *va, const GrVertex *vb, const GrVertex *vc);
FxI32 FX_CSTYLE
_trisetup_argb(const GrVertex *va, const GrVertex *vb, const GrVertex *vc);
? (*gc->curArchProcs.coorTriSetupVector)[0] \
: (*gc->curArchProcs.coorTriSetupVector)[1])
#if GLIDE_PACKED_RBG
#define TRISETUP_RGB(__cullMode) (((__cullMode) == GR_CULL_DISABLE) \
? _trisetup_rgb \
: _trisetup_cull_rgb)
? (*gc->curArchProcs.coorTriSetupVector)[2] \
: (*gc->curArchProcs.coorTriSetupVector)[3])
#define TRISETUP_ARGB(__cullMode) (((__cullMode) == GR_CULL_DISABLE) \
? _trisetup_argb \
: _trisetup_cull_argb)
? (*gc->curArchProcs.coorTriSetupVector)[4] \
: (*gc->curArchProcs.coorTriSetupVector)[5])
#else /* !GLIDE_PACKED_RGB */
#define TRISETUP_RGB(__cullMode) TRISETUP_NORGB(__cullMode)
#define TRISETUP_ARGB(__cullMode) TRISETUP_NORGB(__cullMode)
#endif /* !GLIDE_PACKED_RGB */
#define TRISETUP (*gc->cmdTransportInfo.triSetupProc)
#define TRISETUP (*gc->curArchProcs.triSetupProc)
#else /* !GLIDE_DISPATCH_SETUP */
FxI32 FX_CSTYLE
_trisetup_asm(const GrVertex *va, const GrVertex *vb, const GrVertex *vc);
@@ -2086,7 +2201,7 @@ enum {
};
#if (GLIDE_PLATFORM & GLIDE_HW_CVG)
#define TEX_ROW_ADDR_INCR(__t, __lod) ((__t) << 9)
#define TEX_ROW_ADDR_INCR(__t) ((__t) << 9)
#elif (GLIDE_PLATFORM & GLIDE_HW_H3)
#define TEX_ROW_ADDR_INCR(__t, __lod) ((__t) << 7)
#else
@@ -2380,8 +2495,8 @@ _grErrorCallback(const char* const procName,
va_list args);
#endif
extern FxU32 GR_CDECL
_cpu_detect_asm(void);
/*extern FxU32 GR_CDECL
_cpu_detect_asm(void);*/
extern void GR_CDECL
single_precision_asm(void);

View File

@@ -19,6 +19,9 @@
**
** $Header$
** $Log$
** Revision 1.4 2000/01/28 20:52:17 joseph
** Changes to support building shared libraries with PIC support.
**
** Revision 1.3 2000/01/17 22:18:41 joseph
** A nicer, cleaner fix than the evil hack.
**
@@ -680,13 +683,19 @@ all_done:
#else
#if defined(__MSC__)
{
/* XXX [koolsmoky] fix this. for now we just call TRISETUP
extern struct _GlideRoot_s _GlideRoot;
_asm {
mov eax, [_GlideRoot + kCurGCOffset];
mov eax, [eax + kTriProcOffset];
jmp eax;
}
}*/
GR_BEGIN_NOFIFOCHECK("grDrawTriangle",92);
GR_CHECK_F(myName, !a || !b || !c, "NULL pointer passed");
TRISETUP(a, b, c);
GR_END();
}
#endif
#if defined( __linux__ )
@@ -723,7 +732,7 @@ all_done:
#endif
"jmp *%0"
: /* no outputs */
: "m" (_GlideRoot.curGC->cmdTransportInfo.triSetupProc)
: "m" (_GlideRoot.curGC->curArchProcs.triSetupProc)
#if defined (PIC) || !defined (BIG_OPT)
:
#endif

View File

@@ -19,6 +19,10 @@
**
** $Header$
** $Log$
** Revision 1.2.2.1 2004/12/12 15:24:40 koolsmoky
** grDitherMode(): force 2x2 dithering for 4x1 dither
** grBufferSwap(): enable vsync and set swapbufferinterval to 0 for tripple buffering.
**
** Revision 1.2 2000/10/03 18:28:33 mercury
** 003-clean_up_cvg-000, cvg tree cleanup.
**
@@ -767,9 +771,13 @@ GR_ENTRY(grBufferSwap, void, (int swapInterval))
* any better. If, however, the user has not chosen, but the app
* wants something other than 0 then we need to honor their choice.
*/
swapInterval = ((_GlideRoot.environment.swapInterval >= 0)
/*swapInterval = ((_GlideRoot.environment.swapInterval >= 0)
? _GlideRoot.environment.swapInterval
: (gc->scanline_interleaved ? MAX(swapInterval, 1) : swapInterval));
: (gc->scanline_interleaved ? MAX(swapInterval, 1) : swapInterval));*/
/* always allow user override */
if (_GlideRoot.environment.swapInterval >= 0) {
swapInterval = _GlideRoot.environment.swapInterval;
}
GR_CHECK_F(myName,
(swapInterval > 255) || (swapInterval < 0),
@@ -1713,9 +1721,9 @@ GR_ENTRY(grGlideShutdown, void, (void))
*/
for(i = 0; i < _GlideRoot.hwConfig.num_sst; i++) {
if (_GlideRoot.GCs[i].hwInitP) {
if (_GlideRoot.CPUType.family >= 6) {
/*if (_GlideRoot.CPUType.family >= 6) {*/
sst1InitCaching(_GlideRoot.GCs[i].base_ptr, FXFALSE);
}
/*}*/
sst1InitShutdown(_GlideRoot.GCs[i].base_ptr);
_GlideRoot.GCs[i].hwInitP = FXFALSE;
@@ -2141,11 +2149,11 @@ _grUpdateTriPacketHdr(FxU32 paramMask,
switch(colorComp) {
case COLOR_COMP_ARGB:
gc->cmdTransportInfo.triSetupProc = TRISETUP_ARGB(cullMode);
gc->curArchProcs.triSetupProc = TRISETUP_ARGB(cullMode);
break;
case COLOR_COMP_RGB:
gc->cmdTransportInfo.triSetupProc = TRISETUP_RGB(cullMode);
gc->curArchProcs.triSetupProc = TRISETUP_RGB(cullMode);
break;
/* If no rgb data then it is not worthwhile to pack
@@ -2153,12 +2161,12 @@ _grUpdateTriPacketHdr(FxU32 paramMask,
* and just use the looping proc.
*/
default:
gc->cmdTransportInfo.triSetupProc = TRISETUP_NORGB(cullMode);
gc->curArchProcs.triSetupProc = TRISETUP_NORGB(cullMode);
paramMask &= ~SSTCP_PKT3_PACKEDCOLOR;
break;
}
} else {
gc->cmdTransportInfo.triSetupProc = TRISETUP_NORGB(cullMode);
gc->curArchProcs.triSetupProc = TRISETUP_NORGB(cullMode);
}
#endif /* GLIDE_DISPATCH_SETUP */

View File

@@ -19,6 +19,10 @@
**
** $Header$
** $Log$
** Revision 1.1.1.1.2.1 2004/12/12 15:27:47 koolsmoky
** changes to support new cpuid
** set default to disable alpha dither subtraction
**
** Revision 1.1.1.1 1999/12/07 21:49:10 joseph
** Initial checkin into SourceForge.
**
@@ -229,6 +233,84 @@
#include <glide.h>
#include "fxglide.h"
#if GLIDE_DISPATCH_SETUP
/* Collection of all of the known procs for a given system */
#if GLIDE_PACKED_RBG
static GrTriSetupProc _triSetupProcs[][6] =
#else /* !GLIDE_PACKED_RBG */
static GrTriSetupProc _triSetupProcs[][2] =
#endif /* !GLIDE_PACKED_RBG */
{
/* Default Procs */
{ _trisetup, _trisetup_cull
#if GLIDE_PACKED_RBG
, _trisetup_rgb, _trisetup_cull_rgb, _trisetup_argb, _trisetup_cull_argb
#endif /* GLIDE_PACKED_RBG */
}
#if GL_AMD3D
/* 3DNow!(tm) Procs */
,{ _trisetup_3DNow, _trisetup_cull_3DNow
#if GLIDE_PACKED_RBG
, _trisetup_rgb_3DNow, _trisetup_cull_rgb_3DNow, _trisetup_argb_3DNow, _trisetup_cull_argb_3DNow
#endif /* GLIDE_PACKED_RBG */
}
#endif /* GL_AMD3D */
};
#endif /* GLIDE_DISPATCH_SETUP */
#if GLIDE_DISPATCH_DOWNLOAD
static GrTexDownloadProc _texDownloadProcs[][2][4] =
{
/* Default Procs */
{
{
_grTexDownload_Default_8_1,
_grTexDownload_Default_8_2,
_grTexDownload_Default_8_4,
_grTexDownload_Default_8_WideS
},
{
_grTexDownload_Default_16_1,
_grTexDownload_Default_16_2,
_grTexDownload_Default_16_WideS,
_grTexDownload_Default_16_WideS
}
},
#if GL_AMD3D
{
{
_grTexDownload_Default_8_1,
_grTexDownload_Default_8_2,
_grTexDownload_Default_8_4,
_grTexDownload_3DNow_MMX,
},
{
_grTexDownload_Default_16_1,
_grTexDownload_Default_16_2,
_grTexDownload_3DNow_MMX,
_grTexDownload_3DNow_MMX,
},
}
#endif /* GL_AMD3D */
#if GL_MMX
,{
{
_grTexDownload_Default_8_1,
_grTexDownload_Default_8_2,
_grTexDownload_Default_8_4,
_grTexDownload_MMX,
},
{
_grTexDownload_Default_16_1,
_grTexDownload_Default_16_2,
_grTexDownload_MMX,
_grTexDownload_MMX,
},
}
#endif /* GL_MMX */
};
#endif /* GLIDE_DISPATCH_DOWNLOAD */
/*-------------------------------------------------------------------
Function: _grSstDetectResources
Date: --
@@ -327,9 +409,9 @@ _grSstDetectResources(void)
* pair then we don't want to waste mtrr's that we're never
* really going to write to.
*/
if (!inSliPairP && (_GlideRoot.CPUType.family >= 6)) {
/*if (!inSliPairP && (_GlideRoot.CPUType.family >= 6)) {*/
sst1InitCaching((FxU32*)devRegs, FXTRUE);
}
/*}*/
if (!sst1InitRegisters((FxU32*)devRegs) ||
!sst1InitGetDeviceInfo((FxU32*)devRegs, &dummyDevInfo)) goto __errRegFailure;
@@ -445,9 +527,9 @@ _grSstDetectResources(void)
#if GLIDE_INIT_HAL
fxHalShutdown(devRegs);
#else /* !GLIDE_INIT_HAL */
if (_GlideRoot.CPUType.family >= 6) {
/*if (_GlideRoot.CPUType.family >= 6) {*/
sst1InitCaching((FxU32*)devRegs, FXFALSE);
}
/*}*/
pciUnmapPhysical((FxU32)devRegs, 0x1000000UL);
#endif /* !GLIDE_INIT_HAL */
}
@@ -565,6 +647,37 @@ _GlideInitEnvironment(void)
GDBG_INFO(0,"CPU Extensions disabled\n");
}
#if GLIDE_DISPATCH_SETUP || GLIDE_DISPATCH_DOWNLOAD
/* Default case */
#if GLIDE_DISPATCH_SETUP
_GlideRoot.deviceArchProcs.curTriProcs = _triSetupProcs + 0;
#endif /* GLIDE_DISPATCH_SETUP */
#if GLIDE_DISPATCH_DOWNLOAD
_GlideRoot.deviceArchProcs.curTexProcs = _texDownloadProcs + 0;
#endif /* GLIDE_DISPATCH_DOWNLOAD */
/* Check for vendor specific optimization cases */
#ifdef GL_MMX
if (_GlideRoot.CPUType.os_support & _CPU_FEATURE_MMX) {
#if GLIDE_DISPATCH_DOWNLOAD
_GlideRoot.deviceArchProcs.curTexProcs = _texDownloadProcs + 2;
#endif /* GLIDE_DISPATCH_DOWNLOAD */
}
#endif /* GL_MMX */
#ifdef GL_AMD3D
if (_GlideRoot.CPUType.os_support & _CPU_FEATURE_3DNOW) {
#if GLIDE_DISPATCH_SETUP
_GlideRoot.deviceArchProcs.curTriProcs = _triSetupProcs + 1;
#endif /* GLIDE_DISPATCH_SETUP */
#if GLIDE_DISPATCH_DOWNLOAD
_GlideRoot.deviceArchProcs.curTexProcs = _texDownloadProcs + 1;
#endif /* GLIDE_DISPATCH_DOWNLOAD */
}
#endif /* GL_AMD3D */
#endif /* GLIDE_DISPATCH_SETUP || GLIDE_DISPATCH_DOWNLOAD */
/* Check for user environment tweaks */
{
const char* envStr;
@@ -602,6 +715,7 @@ _GlideInitEnvironment(void)
GDBG_INFO(80," swapInterval: %d\n",_GlideRoot.environment.swapInterval);
GDBG_INFO(80," noSplash: %d\n",_GlideRoot.environment.noSplash);
GDBG_INFO(80," shamelessPlug: %d\n",_GlideRoot.environment.shamelessPlug);
/*GDBG_INFO(80," cpu: %d\n",_GlideRoot.CPUType);*/
GDBG_INFO(80," snapshot: %d\n",_GlideRoot.environment.snapshot);
GDBG_INFO(80," disableDitherSub: %d\n",_GlideRoot.environment.disableDitherSub);
}

View File

@@ -19,6 +19,9 @@
**
** $Header$
** $Log$
** Revision 1.2.2.1 2004/12/12 15:26:04 koolsmoky
** changes to support new cpuid
**
** Revision 1.2 2000/10/03 18:28:33 mercury
** 003-clean_up_cvg-000, cvg tree cleanup.
**
@@ -333,6 +336,7 @@
#include <fxdll.h>
#include <glide.h>
#include "fxglide.h"
#include "fxsplash.h"
#if (GLIDE_PLATFORM & GLIDE_OS_WIN32)
#define WIN32_LEAN_AND_MEAN
@@ -487,9 +491,9 @@ __tryReOpen:
}
/* Re-enabled write combining */
if (_GlideRoot.CPUType.family >= 6) {
/*if (_GlideRoot.CPUType.family >= 6) {*/
sst1InitCaching(sstRegs, FXTRUE);
}
/*}*/
rv = sst1InitRegisters(sstRegs);
if (!rv) {
@@ -880,10 +884,10 @@ __tryReOpen:
* NB: The order of the caching changes is relevant here since
* we don't want to waste mtrr's, even briefly.
*/
if (_GlideRoot.CPUType.family >= 6) {
/*if (_GlideRoot.CPUType.family >= 6) {*/
sst1InitCaching(gc->base_ptr, FXFALSE);
sst1InitCaching((gc + 1)->base_ptr, FXTRUE);
}
/*}*/
sst1InitShutdown(gc->base_ptr);
@@ -977,10 +981,10 @@ __tryReOpen:
* NB: The order of the caching changes is relevant here since
* we don't want to waste mtrr's, even briefly.
*/
if (_GlideRoot.CPUType.family >= 6) {
/*if (_GlideRoot.CPUType.family >= 6) {*/
sst1InitCaching(gc->slave_ptr, FXFALSE);
sst1InitCaching(gc->base_ptr, FXTRUE);
}
/*}*/
}
/* Save that we swapped teh master sense so that we can
@@ -1338,9 +1342,9 @@ __errSliExit:
* This is currently being done in _grDetectResources so that we
* can match the nt driver semantics in win95.
*/
if (_GlideRoot.CPUType.family >= 6) {
/*if (_GlideRoot.CPUType.family >= 6) {*/
sst1InitCaching(gc->reg_ptr, FXTRUE);
}
/*}*/
#endif /* !GLIDE_INIT_HAL */
#else
#error "Need to write command transport init for glide for this hw"
@@ -1350,6 +1354,12 @@ __errSliExit:
/* We're effectively open now */
gc->open = FXTRUE;
/* Setup the procs that we can do w/o any mode knowledge */
#if GLIDE_DISPATCH_SETUP || GLIDE_DISPATCH_DOWNLOAD
gc->curArchProcs.texDownloadProcs = _GlideRoot.deviceArchProcs.curTexProcs;
gc->curArchProcs.coorTriSetupVector = _GlideRoot.deviceArchProcs.curTriProcs;
#endif /* GLIDE_DISPATCH_SETUP || GLIDE_DISPATCH_DOWNLOAD */
/*------------------------------------------------------
GC Init
------------------------------------------------------*/
@@ -1558,13 +1568,53 @@ __errSliExit:
if (!_GlideRoot.environment.noSplash) {
HMODULE newSplash;
if (newSplash = LoadLibrary("3dfxsplash2.dll")) {
FARPROC fxSplash;
if (newSplash = LoadLibrary("3dfxspl2.dll")) {
GrState glideState;
FxBool didLoad;
GrSplashProc fxSplash;
GrSplashInitProc fxSplashInit;
GrSplashPlugProc fxSplashPlug;
GrSplashShutdownProc fxSplashShutdown;
if (fxSplash = GetProcAddress(newSplash, "_fxSplash@16")) {
fxSplash(hWnd, gc->state.screen_width, gc->state.screen_height, nAuxBuffers);
_GlideRoot.environment.noSplash = 1;
}
fxSplash = (GrSplashProc)GetProcAddress(newSplash, "_fxSplash@20");
fxSplashInit = (GrSplashInitProc)GetProcAddress(newSplash, "_fxSplashInit@24");
fxSplashPlug = (GrSplashPlugProc)GetProcAddress(newSplash, "_fxSplashPlug@16");
fxSplashShutdown = (GrSplashShutdownProc)GetProcAddress(newSplash, "_fxSplashShutdown@0");
didLoad = ((fxSplash != NULL) &&
(fxSplashInit != NULL) &&
(fxSplashPlug != NULL) &&
(fxSplashShutdown != NULL));
if (didLoad & 0/* [dBorca] i am evil! harr-harr */) {
/* new style DLL */
grGlideGetState(&glideState);
didLoad = fxSplashInit(hWnd,
gc->state.screen_width, gc->state.screen_height,
nColBuffers, nAuxBuffers,
format);
if (didLoad) {
fxSplash(0.0f, 0.0f,
(float)gc->state.screen_width,
(float)gc->state.screen_height,
0);
fxSplashShutdown();
_GlideRoot.environment.noSplash = 1;
}
grGlideSetState((const void*)&glideState);
} else {
/* old style DLL */
typedef int (FX_CALL *GrSplashOld) (FxU32 hWind, FxU32 scrWidth, FxU32 scrHeight, FxU32 nAuxBuffers);
GrSplashOld fxSplashOld = (GrSplashOld)GetProcAddress(newSplash, "_fxSplash@16");
if (fxSplashOld) {
grGlideGetState(&glideState);
fxSplashOld(hWnd, gc->state.screen_width, gc->state.screen_height, nAuxBuffers);
_GlideRoot.environment.noSplash = 1;
grGlideSetState((const void*)&glideState);
}
}
FreeLibrary(newSplash);
}
}
#endif /* (GLIDE_PLATFORM & GLIDE_OS_WIN32) */
@@ -1682,9 +1732,9 @@ GR_ENTRY(grSstWinClose, void, (void))
* track of the mtrr's.
*/
sst1InitIdle(gc->reg_ptr);
if (_GlideRoot.CPUType.family >= 6) {
/*if (_GlideRoot.CPUType.family >= 6) {*/
sst1InitCaching(gc->base_ptr, FXFALSE);
}
/*}*/
sst1InitShutdown(gc->reg_ptr);
#endif /* !GLIDE_INIT_HAL */

View File

@@ -19,6 +19,9 @@
**
** $Header$
** $Log$
** Revision 1.2 2000/10/03 18:28:33 mercury
** 003-clean_up_cvg-000, cvg tree cleanup.
**
** Revision 1.1.1.1 1999/12/07 21:49:11 joseph
** Initial checkin into SourceForge.
**
@@ -391,7 +394,9 @@ GR_ENTRY(grTexDownloadMipMapLevelPartial,
{
const FifoChipField chipId = (FifoChipField)(0x02UL << tmu);
FxI32 sh;
#if !GLIDE_DISPATCH_DOWNLOAD
FxU32 max_s, width, tmu_baseaddress;
#endif
#define FN_NAME "grTexDownloadMipMapLevelPartial"
GR_BEGIN_NOFIFOCHECK(FN_NAME, 89);
@@ -477,6 +482,35 @@ GR_ENTRY(grTexDownloadMipMapLevelPartial,
REG_GROUP_END();
}
#if GLIDE_DISPATCH_DOWNLOAD /* cpu optimized texture downloads */
/* XXX [koolsmoky] need to implement work around for 8bit-wide downloads
* for old revision TMUs.
*/
/* Do the download */
{
const FxU32
width = _grMipMapHostWH[aspectRatio][thisLod][0],
formatSel = (format >= GR_TEXFMT_16BIT),
tmuBaseAddr = ((FxU32)gc->tex_ptr +
(((FxU32)tmu) << 21UL) +
(((FxU32)thisLod) << 17UL));
FxU32
widthSel = (width >> 0x01UL),
max_s = width >> (formatSel
? 1 /* 16-bit texture */
: 2); /* 8-bit texture */
if (max_s <= 0) max_s = 1;
if (widthSel > 2) widthSel = 3;
_GlideRoot.stats.texBytes += max_s * (max_t - t + 1) * 4;
(*((*gc->curArchProcs.texDownloadProcs)[formatSel][widthSel]))(gc,
tmuBaseAddr,
max_s, t, max_t,
data);
}
#else
/*------------------------------------------------------------
Determine max_s
------------------------------------------------------------*/
@@ -499,7 +533,7 @@ GR_ENTRY(grTexDownloadMipMapLevelPartial,
------------------------------------------------------------*/
if (format < GR_TEXFMT_16BIT) { /* 8 bit textures */
/* Hoisted initialization */
FxU32 tex_address = tmu_baseaddress + TEX_ROW_ADDR_INCR(t, thisLod);
FxU32 tex_address = tmu_baseaddress + TEX_ROW_ADDR_INCR(t);
const FxU8* src8 = (const FxU8*)data;
switch(width) {
@@ -525,7 +559,7 @@ GR_ENTRY(grTexDownloadMipMapLevelPartial,
LINEAR_WRITE_END();
src8 += width;
tex_address += TEX_ROW_ADDR_INCR(1, thisLod);
tex_address += TEX_ROW_ADDR_INCR(1);
}
break;
@@ -538,7 +572,7 @@ GR_ENTRY(grTexDownloadMipMapLevelPartial,
LINEAR_WRITE_END();
src8 += width;
tex_address += TEX_ROW_ADDR_INCR(1, thisLod);
tex_address += TEX_ROW_ADDR_INCR(1);
}
break;
@@ -550,7 +584,7 @@ GR_ENTRY(grTexDownloadMipMapLevelPartial,
for (; t <= max_t; t++) {
FxU32 s;
tex_address = tmu_baseaddress + TEX_ROW_ADDR_INCR(t, thisLod);
tex_address = tmu_baseaddress + TEX_ROW_ADDR_INCR(t);
LINEAR_WRITE_BEGIN(max_s, kLinearWriteTex,
(FxU32)tex_address - (FxU32)gc->tex_ptr,
@@ -572,7 +606,7 @@ GR_ENTRY(grTexDownloadMipMapLevelPartial,
}
} else { /* 16-bit Textures */
/* Hoisted initialization */
FxU32 tex_address = tmu_baseaddress + TEX_ROW_ADDR_INCR(t, thisLod);
FxU32 tex_address = tmu_baseaddress + TEX_ROW_ADDR_INCR(t);
const FxU16* src16 = (const FxU16*)data;
/* Cases 1, 2 don't need inner loops for s */
@@ -586,7 +620,7 @@ GR_ENTRY(grTexDownloadMipMapLevelPartial,
LINEAR_WRITE_END();
src16 += width;
tex_address += TEX_ROW_ADDR_INCR(1, thisLod);
tex_address += TEX_ROW_ADDR_INCR(1);
}
break;
@@ -599,7 +633,7 @@ GR_ENTRY(grTexDownloadMipMapLevelPartial,
LINEAR_WRITE_END();
src16 += width;
tex_address += TEX_ROW_ADDR_INCR(1, thisLod);
tex_address += TEX_ROW_ADDR_INCR(1);
}
break;
@@ -607,7 +641,7 @@ GR_ENTRY(grTexDownloadMipMapLevelPartial,
for (; t <= max_t; t++) {
FxU32 s;
tex_address = tmu_baseaddress + TEX_ROW_ADDR_INCR(t, thisLod);
tex_address = tmu_baseaddress + TEX_ROW_ADDR_INCR(t);
/* Loop unrolled to process 2 dwords per iteration */
LINEAR_WRITE_BEGIN(max_s, kLinearWriteTex,
@@ -632,6 +666,7 @@ GR_ENTRY(grTexDownloadMipMapLevelPartial,
break;
}
}
#endif
/*------------------------------------------------------------
Restore TLOD, texMode, baseAddress
@@ -771,7 +806,7 @@ GR_ENTRY(ConvertAndDownloadRle,
rle_line_end=rle_line+width+u0;
for(t=0;t<max_t;t++) {
tex_address = tmu_baseaddress + TEX_ROW_ADDR_INCR(t, thisLod);
tex_address = tmu_baseaddress + TEX_ROW_ADDR_INCR(t);
src = rle_line + u0;
rle_decode_line_asm(tlut,&bm_data[offset],rle_line);
@@ -789,7 +824,7 @@ GR_ENTRY(ConvertAndDownloadRle,
}
if (dest_height>height) {
tex_address = tmu_baseaddress + TEX_ROW_ADDR_INCR(t, thisLod);
tex_address = tmu_baseaddress + TEX_ROW_ADDR_INCR(t);
src=rle_line+u0;
LINEAR_WRITE_BEGIN(max_s, kLinearWriteTex,

View File

@@ -19,6 +19,9 @@
;; $Header$
;; $Revision$
;; $Log$
;; Revision 1.1.1.1 1999/12/07 21:49:13 joseph
;; Initial checkin into SourceForge.
;;
;;
;; 7 5/18/98 12:16p Peter
;; culling enabling
@@ -47,174 +50,164 @@
; B4 Chip field fix.
;;
TITLE xdraw2.asm
OPTION OLDSTRUCTS
.586P
%include "xos.inc"
.model FLAT,C ; Flat memory, mangle publics with leading '_'
; some useful floating load and store macros <ala gmt>
flds TEXTEQU <fld DWORD PTR>
fsubs TEXTEQU <fsub DWORD PTR>
fmuls TEXTEQU <fmul DWORD PTR>
extrn _GlideRoot
extrn _FifoMakeRoom
_DATA SEGMENT
One DD 03f800000r
%MACRO GR_FIFO_WRITE 3
mov [%1 + %2], %3
%ENDMACRO ; GR_FIFO_WRITE
%ifdef GL_AMD3D
;; 3dnow!
%MACRO WRITE_MM1_FIFO_ALIGNED 1
movq [fifo+%1], mm1 ; store current param | previous param
%ENDMACRO ; WRITE_MM1_FIFO_ALIGNED
%MACRO WRITE_MM1LOW_FIFO 0
movd [fifo], mm1 ; store current param | previous param
%ENDMACRO ; WRITE_MM1LOW_FIFO
%MACRO PROC_TYPE 1
proc %1_3DNow, 12
%ENDM
%else
;; original code
%MACRO PROC_TYPE 1
proc %1, 12
%ENDM
%endif
segment DATA
One DD 1.0
Area DD 0
dxAB DD 0
dxBC DD 0
dyAB DD 0
dyBC DD 0
culltest DD 0
;; Extra junk to pad out to the next cache line.
bias0 DD 0
pad1 DD 0
pad2 DD 0
pad3 DD 0
pad4 DD 0
pad5 DD 0
pad6 DD 0
pad7 DD 0
bias1 DD 0
_DATA ENDS
; Ugly, but seems to workaround the problem with locally defined
; data segment globals not getting relocated properly when using
; djgpp.
zArea TEXTEQU <One+04h>
zdxAB TEXTEQU <One+08h>
zdxBC TEXTEQU <One+0ch>
zdyAB TEXTEQU <One+10h>
zdyBC TEXTEQU <One+14h>
zculltest TEXTEQU <One+18h>
;;; Definitions of cvg regs and glide root structures.
INCLUDE fxgasm.h
%INCLUDE "fxgasm.h"
;; enables/disables trisProcessed and trisDrawn counters
STATS = 1
%define STATS 1
;--------------------------------------------------------------------------
; Arguments (STKOFF = 16 from 4 pushes)
STKOFF = 16
_va$ = 4 + STKOFF
_vb$ = 8 + STKOFF
_vc$ = 12 + STKOFF
;;; Arguments (STKOFF = 16 from 4 pushes)
STKOFF equ 16
_va$ equ 4 + STKOFF
_vb$ equ 8 + STKOFF
_vc$ equ 12 + STKOFF
X = 0
Y = 4
; edx is used as index, loading from *src
gc TEXTEQU <esi> ; points to graphics context
;; coordinate offsets into vertex.
;; NB: These are constants and are not
;; user settable like the rest of the
;; parameter offset. Weird.
X equ 0
Y equ 4
_TEXT SEGMENT
segment CONST
T2003 DD 12288.0 ; 12288
T2005 DD 1.0 ; 1
T2006 DD 256.0 ; 256
align 4
PUBLIC _trisetup_cull@12
_trisetup_cull@12 PROC NEAR
.code
segment TEXT
GLIDE_CULLING textequ <1>
GLIDE_PACK_RGB textequ <0>
GLIDE_PACK_ALPHA textequ <0>
GLIDE_GENERIC_SETUP textequ <0>
INCLUDE xdraw2.inc
GLIDE_GENERIC_SETUP textequ <0>
GLIDE_PACK_ALPHA textequ <0>
GLIDE_PACK_RGB textequ <0>
GLIDE_CULLING textequ <0>
ALIGN 32
_trisetup_cull@12 ENDP
PROC_TYPE _trisetup_cull
IF GLIDE_PACKED_RGB
align 4
PUBLIC _trisetup_cull_rgb@12
_trisetup_cull_rgb@12 PROC NEAR
.code
%define GLIDE_CULLING 1
%define GLIDE_PACK_RGB 0
%define GLIDE_PACK_ALPHA 0
%define GLIDE_GENERIC_SETUP 0
%INCLUDE "xdraw2.inc"
%undef GLIDE_GENERIC_SETUP
%undef GLIDE_PACK_ALPHA
%undef GLIDE_PACK_RGB
%undef GLIDE_CULLING
GLIDE_CULLING textequ <1>
GLIDE_PACK_RGB textequ <1>
GLIDE_PACK_ALPHA textequ <0>
GLIDE_GENERIC_SETUP textequ <0>
INCLUDE xdraw2.inc
GLIDE_GENERIC_SETUP textequ <0>
GLIDE_PACK_ALPHA textequ <0>
GLIDE_PACK_RGB textequ <0>
GLIDE_CULLING textequ <0>
endp
_trisetup_cull_rgb@12 ENDP
%IF GLIDE_PACKED_RGB
align 4
PUBLIC _trisetup_cull_argb@12
_trisetup_cull_argb@12 PROC NEAR
.code
ALIGN 32
GLIDE_CULLING textequ <1>
GLIDE_PACK_RGB textequ <1>
GLIDE_PACK_ALPHA textequ <1>
GLIDE_GENERIC_SETUP textequ <0>
INCLUDE xdraw2.inc
GLIDE_GENERIC_SETUP textequ <0>
GLIDE_PACK_ALPHA textequ <0>
GLIDE_PACK_RGB textequ <0>
GLIDE_CULLING textequ <0>
PROC_TYPE _trisetup_cull_rgb
_trisetup_cull_argb@12 ENDP
ENDIF ; GLIDE_PACKED_RGB
%define GLIDE_CULLING 1
%define GLIDE_PACK_RGB 1
%define GLIDE_PACK_ALPHA 0
%define GLIDE_GENERIC_SETUP 0
%INCLUDE "xdraw2.inc"
%undef GLIDE_GENERIC_SETUP
%undef GLIDE_PACK_ALPHA
%undef GLIDE_PACK_RGB
%undef GLIDE_CULLING
endp
ALIGN 32
PROC_TYPE _trisetup_cull_argb
%define GLIDE_CULLING 1
%define GLIDE_PACK_RGB 1
%define GLIDE_PACK_ALPHA 1
%define GLIDE_GENERIC_SETUP 0
%INCLUDE "xdraw2.inc"
%undef GLIDE_GENERIC_SETUP
%undef GLIDE_PACK_ALPHA
%undef GLIDE_PACK_RGB
%undef GLIDE_CULLING
endp
%ENDIF ; GLIDE_PACKED_RGB
align 4
PUBLIC _trisetup@12
_trisetup@12 PROC NEAR
.code
ALIGN 32
GLIDE_CULLING textequ <0>
GLIDE_PACK_RGB textequ <0>
GLIDE_PACK_ALPHA textequ <0>
GLIDE_GENERIC_SETUP textequ <0>
INCLUDE xdraw2.inc
GLIDE_GENERIC_SETUP textequ <0>
GLIDE_PACK_ALPHA textequ <0>
GLIDE_PACK_RGB textequ <0>
GLIDE_CULLING textequ <0>
PROC_TYPE _trisetup
_trisetup@12 ENDP
%define GLIDE_CULLING 0
%define GLIDE_PACK_RGB 0
%define GLIDE_PACK_ALPHA 0
%define GLIDE_GENERIC_SETUP 0
%INCLUDE "xdraw2.inc"
%undef GLIDE_GENERIC_SETUP
%undef GLIDE_PACK_ALPHA
%undef GLIDE_PACK_RGB
%undef GLIDE_CULLING
IF GLIDE_PACKED_RGB
align 4
PUBLIC _trisetup_rgb@12
_trisetup_rgb@12 PROC NEAR
.code
endp
GLIDE_CULLING textequ <0>
GLIDE_PACK_RGB textequ <1>
GLIDE_PACK_ALPHA textequ <0>
GLIDE_GENERIC_SETUP textequ <0>
INCLUDE xdraw2.inc
GLIDE_GENERIC_SETUP textequ <0>
GLIDE_PACK_ALPHA textequ <0>
GLIDE_PACK_RGB textequ <0>
GLIDE_CULLING textequ <0>
%IF GLIDE_PACKED_RGB
_trisetup_rgb@12 ENDP
ALIGN 32
align 4
PUBLIC _trisetup_argb@12
_trisetup_argb@12 PROC NEAR
.code
PROC_TYPE _trisetup_rgb
GLIDE_CULLING textequ <0>
GLIDE_PACK_RGB textequ <1>
GLIDE_PACK_ALPHA textequ <1>
GLIDE_GENERIC_SETUP textequ <0>
INCLUDE xdraw2.inc
GLIDE_GENERIC_SETUP textequ <0>
GLIDE_PACK_ALPHA textequ <0>
GLIDE_PACK_RGB textequ <0>
GLIDE_CULLING textequ <0>
%define GLIDE_CULLING 0
%define GLIDE_PACK_RGB 1
%define GLIDE_PACK_ALPHA 0
%define GLIDE_GENERIC_SETUP 0
%INCLUDE "xdraw2.inc"
%undef GLIDE_GENERIC_SETUP
%undef GLIDE_PACK_ALPHA
%undef GLIDE_PACK_RGB
%undef GLIDE_CULLING
_trisetup_argb@12 ENDP
ENDIF ; GLIDE_PACKED_RGB
endp
_TEXT ENDS
END
ALIGN 32
PROC_TYPE _trisetup_argb
%define GLIDE_CULLING 0
%define GLIDE_PACK_RGB 1
%define GLIDE_PACK_ALPHA 1
%define GLIDE_GENERIC_SETUP 0
%INCLUDE "xdraw2.inc"
%undef GLIDE_GENERIC_SETUP
%undef GLIDE_PACK_ALPHA
%undef GLIDE_PACK_RGB
%undef GLIDE_CULLING
endp
%ENDIF ; GLIDE_PACKED_RGB

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,161 @@
;
; compulsory header for glide3/xdraw* assembly specializations (NASM)
;
; $Header$
; $Log$
; Revision 1.1.2.3 2004/10/04 08:57:52 dborca
; supporting DOS/OpenWatcom in Assembly files
;
;
; Revision 1.1.2.1 2003/06/07 09:53:25 dborca
; initial checkin for NASM sources
;
;---------------------------------------
; platform defines
;---------------------------------------
%define XOS_DJGPP 1
%define XOS_LINUX 2
%define XOS_WIN32 4
%define XOS_WATCD 8
%define STDCALL 0
%define ELFTYPE 0
;---------------------------------------
; pick up the right OS
;---------------------------------------
%ifdef __DJGPP__
%define XOS XOS_DJGPP
%elifdef __linux__
%define XOS XOS_LINUX
%define ELFTYPE 1
%elifdef __WIN32__
%define XOS XOS_WIN32
%define STDCALL 1
%elifdef __WATCOMD__
%define XOS XOS_WATCD
%define STDCALL 1
%else
%error Unknown OS
%endif
;---------------------------------------
; general purpose macros
;---------------------------------------
%macro extrn 1-2 0
%if STDCALL && (%2 > 0)
%define %1 %1@%2
%endif
extern %1
%endmacro
%macro globl 1-2 0
%if STDCALL && (%2 > 0)
%define %1 %1@%2
%endif
global %1
%endmacro
%macro proc 1-2 0
%push proc
%if STDCALL && (%2 > 0)
%define %$ret RET %2
%else
%define %$ret RET
%endif
%if ELFTYPE
globl %1:function, %2
%else
globl %1, %2
%endif
%1:
%endmacro
%macro endp 0
%ifnctx proc
%error Mismatched `endp'/`proc'
%else
%pop
%endif
%endmacro
%macro ret 0
%ifnctx proc
RET
%else
%$ret
%endif
%endmacro
%macro invoke 1-*
%rep %0 - 1
%rotate -1
push %1
%endrep
%rotate -1
call %1
%if (STDCALL == 0) && (%0 > 1)
add esp, 4 * (%0 - 1)
%endif
%endmacro
;---------------------------------------
; Windows
;---------------------------------------
%if XOS == XOS_WIN32
%define TEXT .text align=32
%define DATA .data align=32
%define CONST .rdata align=32
%macro GET_GC 0
mov gc, [_GlideRoot + curGC]
%endmacro
%endif
;---------------------------------------
; DJGPP
;---------------------------------------
%if XOS == XOS_DJGPP
%define TEXT .text
%define DATA .data
%define CONST .rodata
%macro GET_GC 0
mov gc, [_GlideRoot + curGC]
%endmacro
%endif
;---------------------------------------
; Linux
;---------------------------------------
%if XOS == XOS_LINUX
%define TEXT .text align=32
%define DATA .data align=32
%define CONST .rodata align=32
%macro GET_GC 0
mov gc, [_GlideRoot + curGC]
%endmacro
%endif
;---------------------------------------
; WATCOM/DOS
;---------------------------------------
%if XOS == XOS_WATCD
%define TEXT _TEXT align=1 public use32 class=CODE FLAT
%define DATA _DATA align=4 public use32 class=DATA FLAT
%define CONST CONST2 align=4 public use32 class=DATA FLAT
%macro GET_GC 0
mov gc, [_GlideRoot + curGC]
%endmacro
%endif

View File

@@ -0,0 +1,720 @@
;; THIS SOFTWARE IS SUBJECT TO COPYRIGHT PROTECTION AND IS OFFERED ONLY
;; PURSUANT TO THE 3DFX GLIDE GENERAL PUBLIC LICENSE. THERE IS NO RIGHT
;; TO USE THE GLIDE TRADEMARK WITHOUT PRIOR WRITTEN PERMISSION OF 3DFX
;; INTERACTIVE, INC. A COPY OF THIS LICENSE MAY BE OBTAINED FROM THE
;; DISTRIBUTOR OR BY CONTACTING 3DFX INTERACTIVE INC(info@3dfx.com).
;; THIS PROGRAM IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER
;; EXPRESSED OR IMPLIED. SEE THE 3DFX GLIDE GENERAL PUBLIC LICENSE FOR A
;; FULL TEXT OF THE NON-WARRANTY PROVISIONS.
;;
;; USE, DUPLICATION OR DISCLOSURE BY THE GOVERNMENT IS SUBJECT TO
;; RESTRICTIONS AS SET FORTH IN SUBDIVISION (C)(1)(II) OF THE RIGHTS IN
;; TECHNICAL DATA AND COMPUTER SOFTWARE CLAUSE AT DFARS 252.227-7013,
;; AND/OR IN SIMILAR OR SUCCESSOR CLAUSES IN THE FAR, DOD OR NASA FAR
;; SUPPLEMENT. UNPUBLISHED RIGHTS RESERVED UNDER THE COPYRIGHT LAWS OF
;; THE UNITED STATES.
;;
;; COPYRIGHT 3DFX INTERACTIVE, INC. 1999, ALL RIGHTS RESERVED
;;
;; $Header$
;; $Revision$
;; $Log$
;; Revision 1.1.1.1.8.1 2003/11/03 13:34:30 dborca
;; Voodoo2 happiness (DJGPP & Linux)
;;
;; Revision 1.1.8.7 2003/09/12 05:08:35 koolsmoky
;; preparing for graphic context checks
;;
;; Revision 1.1.8.6 2003/07/07 23:29:06 koolsmoky
;; cleaned logs
;;
;;
;; Revision 1.1 2000/06/15 00:27:43 joseph
;; Initial checkin into SourceForge.
;;
;; 10 8/17/99 6:35p Atai
;; fixed amd debug mode
;;
;; 9 4/08/99 1:22p Atai
;; added contect check for _grTexDownload_3DNow_MMX
;;
;; 8 3/19/99 11:26a Peter
;; expose direct fifo for gl
;;
;; 7 2/02/99 4:36p Peter
;; download through lfb rather than texture port
;;
;; 6 12/17/98 2:36p Atai
;; check in Norbert's fix for texture download width correction
;;
;; 5 12/07/98 11:33a Peter
;; norbert's re-fixes of my merge
;;
;; 4 11/02/98 5:34p Atai
;; merge direct i/o code
;;
;; 3 10/20/98 5:34p Atai
;; added #ifdefs for hwc
;;
;; 2 10/14/98 12:05p Peter
;; fixed my effed up assumption about non-volatile regs
;;
;; 1 10/09/98 6:48p Peter
;; 3DNow!(tm) version of wide texture downloads
;;
;; 3 10/07/98 9:43p Peter
;; triangle procs for 3DNow!(tm)
;;
;; 2 10/05/98 7:43p Peter
;; 3DNow!(tm) happiness everywhere
;;
;; 1 10/05/98 6:01p Peter
;; mmx stuff for 3DNow!(tm) capable processors
;;
%include "xos.inc"
extrn _FifoMakeRoom
%MACRO _grCommandTransportMakeRoom 3
push %3
push %2
push %1
call _FifoMakeRoom
add esp, 12
%ENDMACRO ; _grCommandTransportMakeRoom
;;; Definitions of cvg regs and glide root structures.
%INCLUDE "fxgasm.h"
; Arguments (STKOFF = 16 from 4 dword pushes)
STACKOFFSET equ 16
_gc$ equ 4 + STACKOFFSET
_baseAddr$ equ 8 + STACKOFFSET
_maxS$ equ 12 + STACKOFFSET
_minT$ equ 16 + STACKOFFSET
_maxT$ equ 20 + STACKOFFSET
_texData$ equ 24 + STACKOFFSET
;; NB: The first set of registers (eax, ecx, and edx) are volatile across
;; function calls. The remaining registers are supposedly non-volatile
;; so they only store things that are non-volatile across the call.
%define fifo ebp ; fifo ptr in inner loop
%define gc esi ; graphics context
%define dataPtr edi ; pointer to exture data to be downloaded
%define curT ebx ; counter for texture scan lines (t-coordinate)
%define curS ecx ; texture s-coordinate
%define fRoom edx ; room available in fifo (in bytes)
;--------------------------------------------------------------------------
%IFNDEF GL_SSE2
;--------------------------------------------------------------------------
;
; GL_AMD3D, GL_MMX
;
;--------------------------------------------------------------------------
segment TEXT
ALIGN 32
%IFDEF GL_AMD3D
proc _grTexDownload_3DNow_MMX, 24
%ENDIF
%IFDEF GL_MMX
proc _grTexDownload_MMX, 24
%ENDIF
push ebx ; save caller's register variable
mov curT, [esp + _maxT$ - 12] ; curT = maxT
push esi ; save caller's register variable
mov eax, [esp + _minT$ - 8] ; minT
push edi ; save caller's register variable
mov gc, [esp + _gc$ - 4] ; gc
push ebp ; save caller's register variable
mov dataPtr, [esp + _texData$]; dataPtr
%IFDEF GLIDE_ALT_TAB
test gc, gc
je .dlDone
; mov edx, [gc + windowed]
; test edx, 1
; jnz .pastContextTest
mov edx, DWORD [gc+lostContext]
mov ecx, [edx]
test ecx, 1
jnz .dlDone
;.pastContextTest:
%ENDIF
sub curT, eax ; curT = maxT - minT
mov fifo, [gc + fifoPtr] ; fifoPtr
mov curS, [esp + _maxS$] ; curS = maxS
add curT, 1 ; curT = maxT - minT + 1
%IFDEF GL_AMD3D
femms ; we'll use MMX/3DNow!, make sure FPU register cleared
%ENDIF
%IFDEF GL_MMX
emms ; we'll use MMX
%ENDIF
mov edx, curS ; curS = maxS = scanline width in DWORDs
movd mm3, [esp + _baseAddr$] ; 0 | address of texture to download
shl curS, 2 ; scan line width (in bytes)
mov eax, [esp + _minT$] ; 0 | minT
mov [esp + _maxS$], curS ; save scan line width (in bytes)
shl edx, 3 ; packetHdr<21:3> = maxS = scanline width in DWORDs
shl eax, 9 ; TEX_ROW_ADDR_INCR(minT) = minT << 9
or edx, 0xc0000005 ; packetHdr<31:30> = texture port
; packetHdr<21:3> = maxS
; packetHdr<2:0> = packetType 5
movd mm1, edx ; 0 | packetHdr
movd mm2, eax ; 0 | TEX_ROW_ADDR_INCR(minT)
paddd mm3, mm2 ; 0 | texAddr = texBaseAddr + TEX_ROW_ADDR_INCR(minT)
movd mm2, [gc + tex_ptr] ; 0 | gc->tex_ptr
psubd mm3, mm2 ; 0 | texAddr - gc->tex_ptr
mov eax, 0x200 ; TEX_ROW_ADDR_INCR(1) = 1 << 9
movd mm2, eax ; 0 | TEX_ROW_ADDR_INCR(1)
psllq mm2, 32 ; TEX_ROW_ADDR_INCR(1) | 0
mov fRoom, [gc + fifoRoom] ; get available fifoRoom (in bytes)
punpckldq mm1, mm3 ; hdr2 = texAddr - gc->tex_ptr | hdr1 = packetHdr
;; ebx = curT, edi = dataPtr, esi = gc, ebp = fifo, ecx = curS = maxS
;; edx = fifoRoom, mm1 = texAddr-gc->tex_ptr|packetHdr, mm2 = TEX_ROW_ADDR_INCR(1)|0
test fifo, 4 ; is fifo QWORD aligned ?
jz .startDownload ; yup, start texture download
cmp fRoom, 4 ; enough room for NULL packet in fifo?
jge .mmxAlignFifo ; yes, write NULL packet to align fifo
%ifdef USE_PACKET_FIFO
_grCommandTransportMakeRoom 4, 0, __LINE__; make fifo room
%endif
mov fifo, [gc + fifoPtr] ; fifoPtr modified by _grCommandTransportMakeRoom, reload
mov fRoom, [gc + fifoRoom] ; fifoRoom modified by _grCommandTransportMakeRoom, reload
mov curS, [esp + _maxS$] ; reload maxS (destroyed by call to _grCommandTransportMakeRoom)
test fifo, 4 ; new fifoPtr QWORD aligned ?
jz .startDownload ; yup, start texture download
.mmxAlignFifo:
mov DWORD [fifo], 0 ; write NULL packet
sub fRoom, 4 ; fifoRoom -= 4
mov [gc + fifoRoom], fRoom ; store new fifoRoom
add fifo, 4 ; fifoPtr += 4
%IFDEF GLIDE_DEBUG
mov [gc + checkPtr], fifo ; checkPtr
%ENDIF
mov [gc + fifoPtr], fifo ; store new fifoPtr
jmp .startDownload ; fifo aligned, download texture now
align 32
;; ebx = curT, edi = dataPtr, esi = gc, ebp = fifo, ecx = maxS = curS
;; edx=fifoRoom, mm1 = texAddr-gc->tex_ptr|packetHdr, mm2 = TEX_ROW_ADDR_INCR(1)|0
.loopT:
%IFDEF GLIDE_DEBUG
;; Make sure that we have a QWORD aligned fifoPtr; force GP if not aligned
test fifo, 4 ; is fifoPtr QWORD aligned ?
jz .alignmentOK ; yup, continue
xor eax, eax ; create 0
mov [eax], eax ; move to DS:[0] forces GP
.alignmentOK:
%ENDIF ; GLIDE_DEBUG
;; Compute packet header words
;; hdr1: downloadSpace[31:30] numWords[21:3] packetType[2:0]
;; hdr2: download address[29:0]
movq [fifo], mm1 ; store hdr2 | hdr1
add fifo, 8 ; increment fifo ptr (hdr1 + hdr2)
;; S coordinate inner loop unrolled for 8 texels a write
.loopS:
movq mm0, [dataPtr] ; load 64 bit data (8 texels)
add fifo, 8 ; pre-increment fifoPtr += 2 * sizeof(FxU32)
add dataPtr, 8 ; dataPtr += 2 * sizeof(FxU32)
sub curS, 8 ; curS -= 2 * sizeof(FxU32)
movq [fifo - 8], mm0 ; *fifoPtr = texelData[64 bits]
jnz .loopS ; loop while curS > 0
mov ecx, [gc + fifoPtr] ; old fifo ptr
nop ; filler
mov eax, fifo ; new fifo ptr
mov [gc + fifoPtr], fifo ; save new fifo ptr
%IFDEF GLIDE_DEBUG
mov [gc + checkPtr], fifo ; checkPtr
%ENDIF
sub eax, ecx ; new fifo ptr - old fifo ptr = fifo space used up
mov curS, [esp + _maxS$] ; curS = maxS = width of scanline (bytes)
sub fRoom, eax ; new fifo space available = old fifo space available - fifo space used up = new fifo space available
sub curT, 1 ; curT--
mov [gc + fifoRoom], fRoom ; save new fifo space available
jz .dlDone ; loop while curT > 0
;; Check for room to write the next texture scanline
;; ebx = curT, edi = dataPtr, esi = gc, ebp = fifo
;; edx = fifoRoom, mm1 = texAddr|packetHdr, mm2 = TEX_ROW_ADDR_INCR(1)|0
paddd mm1, mm2 ; texAddr+=TEX_ROW_ADDR_INCR(1) | packetHdr
mov esp, esp ; filler
.startDownload:
lea eax, [curS+8] ; fifo space needed = scan line width + header size
cmp fRoom, eax ; fifo space available >= fifo space required ?
jge .loopT ; yup, write next scan line
%ifdef USE_PACKET_FIFO
_grCommandTransportMakeRoom eax, 0, __LINE__; make fifo room (if fifoPtr QWORD aligned before
%endif
mov fifo, [gc + fifoPtr] ; fifoPtr was modified by _grCommandTransportMakeRoom, reload
mov fRoom, [gc + fifoRoom] ; fifoRoom was modified by _grCommandTransportMakeRoom, reload
mov curS, [esp + _maxS$] ; curS = maxS = width of scanline (bytes)
jmp .loopT ; we now have enough fifo room, write next scanline
.dlDone:
%IFDEF GL_AMD3D
femms ; exit 3DNow!(tm) state
%ENDIF
%IFDEF GL_MMX
emms ; exit MMX state
%ENDIF
pop ebp ; restore caller's register variable
pop edi ; restore caller's register variable
pop esi ; restore caller's register variable
pop ebx ; restore caller's register variable
ret ; pop 6 DWORD parameters and return
endp
%ELSE ; !GL_SSE2
;--------------------------------------------------------------------------
;
; GL_SSE2
;
;--------------------------------------------------------------------------
segment TEXT
ALIGN 32
proc _grTexDownload_SSE2_64, 24
push ebx ; save caller's register variable
mov curT, [esp + _maxT$ - 12] ; curT = maxT
push esi ; save caller's register variable
mov eax, [esp + _minT$ - 8] ; minT
push edi ; save caller's register variable
mov gc, [esp + _gc$ - 4] ; gc
push ebp ; save caller's register variable
mov dataPtr, [esp + _texData$]; dataPtr
%IFDEF GLIDE_ALT_TAB
test gc, gc
je .dlDone
; mov edx, [gc + windowed]
; test edx, 1
; jnz .pastContextTest
mov edx, DWORD [gc+lostContext]
mov ecx, [edx]
test ecx, 1
jnz .dlDone
;.pastContextTest:
%ENDIF
sub curT, eax ; curT = maxT - minT
mov fifo, [gc + fifoPtr] ; fifoPtr
mov curS, [esp + _maxS$] ; curS = maxS
add curT, 1 ; curT = maxT - minT + 1
mov edx, curS ; curS = maxS = scanline width in DWORDs
movd xmm3,[esp + _baseAddr$] ; 0 | 0 | 0 | address of texture to download
shl curS, 2 ; scan line width (in bytes)
mov eax, [esp + _minT$] ; 0 | 0 | 0 | minT
mov [esp + _maxS$], curS ; save scan line width (in bytes)
shl edx, 3 ; packetHdr<21:3> = maxS = scanline width in DWORDs
imul eax, curS ; TEX_ROW_ADDR_INCR(minT) = minT * TEX_ROW_ADDR_INCR(1)
movd xmm2,curS ; 0 | 0 | TEX_ROW_ADDR_INCR(1)
or edx, 00000005h ; packetHdr<31:30> = lfb port
; packetHdr<21:3> = maxS
; packetHdr<2:0> = packetType 5
movd xmm1,edx ; 0 | 0 | packetHdr
movd xmm4,eax ; 0 | 0 | TEX_ROW_ADDR_INCR(minT)
psllq xmm2,32 ; 0 | 0 | TEX_ROW_ADDR_INCR(1) | 0
paddd xmm3,xmm4 ; 0 | 0 | texAddr = texBaseAddr + TEX_ROW_ADDR_INCR(minT)
mov fRoom, [gc + fifoRoom] ; get available fifoRoom (in bytes)
punpckldq xmm1,xmm3 ; 0 | 0 | hdr2 = texAddr | hdr1 = packetHdr
;; ebx = curT, edi = dataPtr, esi = gc, ebp = fifo, ecx = curS = maxS
;; edx = fifoRoom, xmm1 = texAddr|packetHdr, xmm2 = TEX_ROW_ADDR_INCR(1)|0
test fifo, 4 ; is fifo QWORD aligned ?
jz .startDownload ; yup, start texture download
cmp fRoom, 4 ; enough room for NULL packet in fifo?
jge .xmmAlignFifo ; yes, write NULL packet to align fifo
%ifdef USE_PACKET_FIFO
_grCommandTransportMakeRoom 4, 0, __LINE__; make fifo room
%endif
mov fifo, [gc + fifoPtr] ; fifoPtr modified by _grCommandTransportMakeRoom, reload
mov fRoom, [gc + fifoRoom] ; fifoRoom modified by _grCommandTransportMakeRoom, reload
mov curS, [esp + _maxS$] ; reload maxS (destroyed by call to _grCommandTransportMakeRoom)
test fifo, 4 ; new fifoPtr QWORD aligned ?
jz .startDownload ; yup, start texture download
.xmmAlignFifo:
mov DWORD [fifo], 0 ; write NULL packet
sub fRoom, 4 ; fifoRoom -= 4
mov [gc + fifoRoom], fRoom ; store new fifoRoom
add fifo, 4 ; fifoPtr += 4
%IFDEF GLIDE_DEBUG
mov [gc + checkPtr], fifo ; checkPtr
%ENDIF
mov [gc + fifoPtr], fifo ; store new fifoPtr
jmp .startDownload ; fifo aligned, download texture now
align 32
;; ebx = curT, edi = dataPtr, esi = gc, ebp = fifo, ecx = maxS = curS
;; edx=fifoRoom, xmm1 = texAddr|packetHdr, xmm2 = TEX_ROW_ADDR_INCR(1)|0
.loopT:
%IFDEF GLIDE_DEBUG
;; Make sure that we have a QWORD aligned fifoPtr; force GP if not aligned
test fifo, 4 ; is fifoPtr QWORD aligned ?
jz .alignmentOK ; yup, continue
xor eax, eax ; create 0
mov [eax], eax ; move to DS:[0] forces GP
.alignmentOK:
%ENDIF ; GLIDE_DEBUG
;; Compute packet header words
;; hdr1: downloadSpace[31:30] numWords[21:3] packetType[2:0]
;; hdr2: download address[29:0]
movq [fifo],xmm1 ; store hdr2 | hdr1
add fifo, 8 ; increment fifo ptr (hdr1 + hdr2)
;; S coordinate inner loop unrolled for 8 texels a write
.loopS:
movq xmm0,[dataPtr] ; load 64 bit data (8 texels)
add fifo, 8 ; pre-increment fifoPtr += 2 * sizeof(FxU32)
add dataPtr, 8 ; dataPtr += 2 * sizeof(FxU32)
sub curS, 8 ; curS -= 2 * sizeof(FxU32)
movq [fifo - 8],xmm0 ; *fifoPtr = texelData[64 bits]
jnz .loopS ; loop while curS > 0
mov ecx, [gc + fifoPtr] ; old fifo ptr
nop ; filler
mov eax, fifo ; new fifo ptr
mov [gc + fifoPtr], fifo ; save new fifo ptr
%IFDEF GLIDE_DEBUG
mov [gc + checkPtr], fifo ; checkPtr
%ENDIF
sub eax, ecx ; new fifo ptr - old fifo ptr = fifo space used up
mov curS, [esp + _maxS$] ; curS = maxS = width of scanline (bytes)
sub fRoom, eax ; new fifo space available = old fifo space available - fifo space used up = new fifo space available
sub curT, 1 ; curT--
mov [gc + fifoRoom], fRoom ; save new fifo space available
jz .dlDone ; loop while curT > 0
;; Check for room to write the next texture scanline
;; ebx = curT, edi = dataPtr, esi = gc, ebp = fifo
;; edx = fifoRoom, xmm1 = texAddr|packetHdr, xmm2 = TEX_ROW_ADDR_INCR(1)|0
paddd xmm1,xmm2 ; 0 | 0 | texAddr+=TEX_ROW_ADDR_INCR(1) | packetHdr
mov esp, esp ; filler
.startDownload:
lea eax, [curS+8] ; fifo space needed = scan line width + header size
cmp fRoom, eax ; fifo space available >= fifo space required ?
jge .loopT ; yup, write next scan line
%ifdef USE_PACKET_FIFO
_grCommandTransportMakeRoom eax, 0, __LINE__; make fifo room (if fifoPtr QWORD aligned before
%endif
mov fifo, [gc + fifoPtr] ; fifoPtr was modified by _grCommandTransportMakeRoom, reload
mov fRoom, [gc + fifoRoom] ; fifoRoom was modified by _grCommandTransportMakeRoom, reload
mov curS, [esp + _maxS$] ; curS = maxS = width of scanline (bytes)
jmp .loopT ; we now have enough fifo room, write next scanline
.dlDone:
pop ebp ; restore caller's register variable
pop edi ; restore caller's register variable
pop esi ; restore caller's register variable
pop ebx ; restore caller's register variable
ret ; pop 6 DWORD parameters and return
endp
segment TEXT
ALIGN 32
proc _grTexDownload_SSE2_128, 24
push ebx ; save caller's register variable
mov curT, [esp + _maxT$ - 12] ; curT = maxT
push esi ; save caller's register variable
mov eax, [esp + _minT$ - 8] ; minT
push edi ; save caller's register variable
mov gc, [esp + _gc$ - 4] ; gc
push ebp ; save caller's register variable
mov dataPtr, [esp + _texData$]; dataPtr
%IFDEF GLIDE_ALT_TAB
test gc, gc
je .dlDone
; mov edx, [gc + windowed]
; test edx, 1
; jnz .pastContextTest
mov edx, DWORD [gc+lostContext]
mov ecx, [edx]
test ecx, 1
jnz .dlDone
;.pastContextTest:
%ENDIF
sub curT, eax ; curT = maxT - minT
mov fifo, [gc + fifoPtr] ; fifoPtr
mov curS, [esp + _maxS$] ; curS = maxS
add curT, 1 ; curT = maxT - minT + 1
mov edx, curS ; curS = maxS = scanline width in DWORDs
movd xmm3,[esp + _baseAddr$] ; 0 | 0 | 0 | address of texture to download
shl curS, 2 ; scan line width (in bytes)
mov eax, [esp + _minT$] ; 0 | minT
mov [esp + _maxS$], curS ; save scan line width (in bytes)
shl edx, 3 ; packetHdr<21:3> = maxS = scanline width in DWORDs
imul eax, curS ; TEX_ROW_ADDR_INCR(minT) = minT * TEX_ROW_ADDR_INCR(1)
movd xmm2,curS ; 0 | 0 | 0 | TEX_ROW_ADDR_INCR(1)
or edx, 00000005h ; packetHdr<31:30> = lfb port
; packetHdr<21:3> = maxS
; packetHdr<2:0> = packetType 5
movd xmm1,edx ; 0 | 0 | 0 | packetHdr
movd xmm4,eax ; 0 | 0 | 0 | TEX_ROW_ADDR_INCR(minT)
psllq xmm2,32 ; 0 | 0 | TEX_ROW_ADDR_INCR(1) | 0
paddd xmm3,xmm4 ; 0 | 0 | 0 | texAddr = texBaseAddr + TEX_ROW_ADDR_INCR(minT)
mov fRoom, [gc + fifoRoom] ; get available fifoRoom (in bytes)
punpckldq xmm1,xmm3 ; 0 | 0 | hdr2 = texAddr | hdr1 = packetHdr
;; ebx = curT, edi = dataPtr, esi = gc, ebp = fifo, ecx = curS = maxS
;; edx = fifoRoom, xmm1 = texAddr|packetHdr, xmm2 = TEX_ROW_ADDR_INCR(1)|0
test fifo, 4 ; is fifo QWORD aligned ?
jz .startDownload ; yup, start texture download
cmp fRoom, 4 ; enough room for NULL packet in fifo?
jge .xmmAlignFifo ; yes, write NULL packet to align fifo
%ifdef USE_PACKET_FIFO
_grCommandTransportMakeRoom 4, 0, __LINE__; make fifo room
%endif
mov fifo, [gc + fifoPtr] ; fifoPtr modified by _grCommandTransportMakeRoom, reload
mov fRoom, [gc + fifoRoom] ; fifoRoom modified by _grCommandTransportMakeRoom, reload
mov curS, [esp + _maxS$] ; reload maxS (destroyed by call to _grCommandTransportMakeRoom)
test fifo, 4 ; new fifoPtr QWORD aligned ?
jz .startDownload ; yup, start texture download
.xmmAlignFifo:
mov DWORD [fifo], 0 ; write NULL packet
sub fRoom, 4 ; fifoRoom -= 4
mov [gc + fifoRoom], fRoom ; store new fifoRoom
add fifo, 4 ; fifoPtr += 4
%IFDEF GLIDE_DEBUG
mov [gc + checkPtr], fifo ; checkPtr
%ENDIF
mov [gc + fifoPtr], fifo ; store new fifoPtr
jmp .startDownload ; fifo aligned, download texture now
align 32
;; ebx = curT, edi = dataPtr, esi = gc, ebp = fifo, ecx = maxS = curS
;; edx=fifoRoom, xmm1 = texAddr|packetHdr, xmm2 = TEX_ROW_ADDR_INCR(1)|0
.loopT:
%IFDEF GLIDE_DEBUG
;; Make sure that we have a QWORD aligned fifoPtr; force GP if not aligned
test fifo, 4 ; is fifoPtr QWORD aligned ?
jz .alignmentOK ; yup, continue
xor eax, eax ; create 0
mov [eax], eax ; move to DS:[0] forces GP
.alignmentOK:
%ENDIF ; GLIDE_DEBUG
;; Compute packet header words
;; hdr1: downloadSpace[31:30] numWords[21:3] packetType[2:0]
;; hdr2: download address[29:0]
movq [fifo],xmm1 ; store hdr2 | hdr1
add fifo, 8 ; increment fifo ptr (hdr1 + hdr2)
;; S coordinate inner loop unrolled for 8 texels a write
.loopS:
movdqu xmm0, [dataPtr] ; load 128 bit data (8 texels) ; isn't 16 bytes aligned?
add fifo, 16 ; pre-increment fifoPtr += 4 * sizeof(FxU32)
add dataPtr, 16 ; dataPtr += 4 * sizeof(FxU32)
sub curS, 16 ; curS -= 4 * sizeof(FxU32)
movdqu [fifo - 16], xmm0 ; *fifoPtr = texelData[128 bits] ; isn't 16 bytes aligned?
jnz .loopS ; loop while curS > 0
mov ecx, [gc + fifoPtr] ; old fifo ptr
nop ; filler
mov eax, fifo ; new fifo ptr
mov [gc + fifoPtr], fifo ; save new fifo ptr
%IFDEF GLIDE_DEBUG
mov [gc + checkPtr], fifo ; checkPtr
%ENDIF
sub eax, ecx ; new fifo ptr - old fifo ptr = fifo space used up
mov curS, [esp + _maxS$] ; curS = maxS = width of scanline (bytes)
sub fRoom, eax ; new fifo space available = old fifo space available - fifo space used up = new fifo space available
sub curT, 1 ; curT--
mov [gc + fifoRoom], fRoom ; save new fifo space available
jz .dlDone ; loop while curT > 0
;; Check for room to write the next texture scanline
;; ebx = curT, edi = dataPtr, esi = gc, ebp = fifo
;; edx = fifoRoom, xmm1 = texAddr|packetHdr, xmm2 = TEX_ROW_ADDR_INCR(1)|0
paddd xmm1,xmm2 ; 0 | 0 | texAddr+=TEX_ROW_ADDR_INCR(1) | packetHdr
mov esp, esp ; filler
.startDownload:
lea eax, [curS+8] ; fifo space needed = scan line width + header size
cmp fRoom, eax ; fifo space available >= fifo space required ?
jge .loopT ; yup, write next scan line
%ifdef USE_PACKET_FIFO
_grCommandTransportMakeRoom eax, 0, __LINE__; make fifo room (if fifoPtr QWORD aligned before
%endif
mov fifo, [gc + fifoPtr] ; fifoPtr was modified by _grCommandTransportMakeRoom, reload
mov fRoom, [gc + fifoRoom] ; fifoRoom was modified by _grCommandTransportMakeRoom, reload
mov curS, [esp + _maxS$] ; curS = maxS = width of scanline (bytes)
jmp .loopT ; we now have enough fifo room, write next scanline
.dlDone:
pop ebp ; restore caller's register variable
pop edi ; restore caller's register variable
pop esi ; restore caller's register variable
pop ebx ; restore caller's register variable
ret ; pop 6 DWORD parameters and return
endp
%ENDIF ; GL_SSE2

View File

@@ -0,0 +1,242 @@
/*
** THIS SOFTWARE IS SUBJECT TO COPYRIGHT PROTECTION AND IS OFFERED ONLY
** PURSUANT TO THE 3DFX GLIDE GENERAL PUBLIC LICENSE. THERE IS NO RIGHT
** TO USE THE GLIDE TRADEMARK WITHOUT PRIOR WRITTEN PERMISSION OF 3DFX
** INTERACTIVE, INC. A COPY OF THIS LICENSE MAY BE OBTAINED FROM THE
** DISTRIBUTOR OR BY CONTACTING 3DFX INTERACTIVE INC(info@3dfx.com).
** THIS PROGRAM IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER
** EXPRESSED OR IMPLIED. SEE THE 3DFX GLIDE GENERAL PUBLIC LICENSE FOR A
** FULL TEXT OF THE NON-WARRANTY PROVISIONS.
**
** USE, DUPLICATION OR DISCLOSURE BY THE GOVERNMENT IS SUBJECT TO
** RESTRICTIONS AS SET FORTH IN SUBDIVISION (C)(1)(II) OF THE RIGHTS IN
** TECHNICAL DATA AND COMPUTER SOFTWARE CLAUSE AT DFARS 252.227-7013,
** AND/OR IN SIMILAR OR SUCCESSOR CLAUSES IN THE FAR, DOD OR NASA FAR
** SUPPLEMENT. UNPUBLISHED RIGHTS RESERVED UNDER THE COPYRIGHT LAWS OF
** THE UNITED STATES.
**
** COPYRIGHT 3DFX INTERACTIVE, INC. 1999, ALL RIGHTS RESERVED
**
** $Header$
** $Log$
** Revision 1.1.1.1 1999/12/07 21:42:38 joseph
** Initial checkin into SourceForge.
**
**
** 1 10/08/98 11:30a Brent
**
** 2 10/05/98 7:43p Peter
** 3DNow!(tm) happiness everywhere
**
** 1 10/03/98 3:37p Peter
** Texture download vectors
**
*/
#include <3dfx.h>
#define FX_DLL_DEFINITION
#include <fxdll.h>
#include <glide.h>
#include "fxglide.h"
void FX_CSTYLE
_grTexDownload_Default_8_1(struct GrGC_s* gc, const FxU32 tmuBaseAddr,
const FxU32 maxS, const FxU32 minT, const FxU32 maxT,
void* texData)
{
#define FN_NAME "_grTexDownload_Default_8_1"
FxU32
t = minT,
tex_address = tmuBaseAddr + TEX_ROW_ADDR_INCR(t);
const FxU8
*src8 = (const FxU8*)texData;
for (; t <= maxT; t++) {
LINEAR_WRITE_BEGIN(1, kLinearWriteTex,
(FxU32)tex_address - (FxU32)gc->tex_ptr,
0x00UL, 0x00UL);
LINEAR_WRITE_SET_8(tex_address, (const FxU32)*(const FxU8*)src8);
LINEAR_WRITE_END();
src8 += 1;
tex_address += TEX_ROW_ADDR_INCR(1);
}
#undef FN_NAME
}
void FX_CSTYLE
_grTexDownload_Default_8_2(struct GrGC_s* gc, const FxU32 tmuBaseAddr,
const FxU32 maxS, const FxU32 minT, const FxU32 maxT,
void* texData)
{
#define FN_NAME "_grTexDownload_Default_8_2"
FxU32
t = minT,
tex_address = tmuBaseAddr + TEX_ROW_ADDR_INCR(t);
const FxU8
*src8 = (const FxU8*)texData;
for (; t <= maxT; t++) {
LINEAR_WRITE_BEGIN(1, kLinearWriteTex,
(FxU32)tex_address - (FxU32)gc->tex_ptr,
0x00UL, 0x00UL);
LINEAR_WRITE_SET_8(tex_address, (const FxU32)*(const FxU16*)src8);
LINEAR_WRITE_END();
src8 += 2;
tex_address += TEX_ROW_ADDR_INCR(1);
}
#undef FN_NAME
}
void FX_CSTYLE
_grTexDownload_Default_8_4(struct GrGC_s* gc, const FxU32 tmuBaseAddr,
const FxU32 maxS, const FxU32 minT, const FxU32 maxT,
void* texData)
{
#define FN_NAME "_grTexDownload_Default_8_4"
FxU32
t = minT,
tex_address = tmuBaseAddr + TEX_ROW_ADDR_INCR(t);
const FxU8
*src8 = (const FxU8*)texData;
for (; t <= maxT; t++) {
LINEAR_WRITE_BEGIN(1, kLinearWriteTex,
(FxU32)tex_address - (FxU32)gc->tex_ptr,
0x00UL, 0x00UL);
LINEAR_WRITE_SET_8(tex_address, *(const FxU32*)src8);
LINEAR_WRITE_END();
src8 += 4;
tex_address += TEX_ROW_ADDR_INCR(1);
}
#undef FN_NAME
}
void FX_CSTYLE
_grTexDownload_Default_8_WideS(struct GrGC_s* gc, const FxU32 tmuBaseAddr,
const FxU32 maxS, const FxU32 minT, const FxU32 maxT,
void* texData)
{
#define FN_NAME "_grTexDownload_Default_8_WideS"
FxU32
t = minT,
tex_address;
const FxU8
*src8 = (const FxU8*)texData;
for (; t <= maxT; t++) {
tex_address = tmuBaseAddr + TEX_ROW_ADDR_INCR(t);
LINEAR_WRITE_BEGIN(maxS, kLinearWriteTex,
(FxU32)tex_address - (FxU32)gc->tex_ptr,
0x00UL, 0x00UL);
{
FxU32 s;
for (s = 0; s < maxS; s += 2) {
const FxU32 t0 = *(const FxU32*)(src8 );
const FxU32 t1 = *(const FxU32*)(src8 + sizeof(FxU32));
LINEAR_WRITE_SET_8(tex_address , t0);
LINEAR_WRITE_SET_8(tex_address + sizeof(FxU32), t1);
src8 += 8;
tex_address += (sizeof(FxU32) << 1);
}
}
LINEAR_WRITE_END();
}
#undef FN_NAME
}
extern void FX_CSTYLE
_grTexDownload_Default_16_1(struct GrGC_s* gc, const FxU32 tmuBaseAddr,
const FxU32 maxS, const FxU32 minT, const FxU32 maxT,
void* texData)
{
#define FN_NAME "_grTexDownload_Default_16_1"
FxU32
t = minT,
tex_address = tmuBaseAddr + TEX_ROW_ADDR_INCR(t);
const FxU16
*src16 = (const FxU16*)texData;
for (; t <= maxT; t++) {
LINEAR_WRITE_BEGIN(1, kLinearWriteTex,
(FxU32)tex_address - (FxU32)gc->tex_ptr,
0x00UL, 0x00UL);
LINEAR_WRITE_SET_16(tex_address, (const FxU32)*(const FxU16*)src16);
LINEAR_WRITE_END();
src16 += 1;
tex_address += TEX_ROW_ADDR_INCR(1);
}
#undef FN_NAME
}
extern void FX_CSTYLE
_grTexDownload_Default_16_2(struct GrGC_s* gc, const FxU32 tmuBaseAddr,
const FxU32 maxS, const FxU32 minT, const FxU32 maxT,
void* texData)
{
#define FN_NAME "_grTexDownload_Default_16_2"
FxU32
t = minT,
tex_address = tmuBaseAddr + TEX_ROW_ADDR_INCR(t);
const FxU16
*src16 = (const FxU16*)texData;
for (; t <= maxT; t++) {
LINEAR_WRITE_BEGIN(1, kLinearWriteTex,
(FxU32)tex_address - (FxU32)gc->tex_ptr,
0x00UL, 0x00UL);
LINEAR_WRITE_SET_16(tex_address, *(const FxU32*)src16);
LINEAR_WRITE_END();
src16 += 2;
tex_address += TEX_ROW_ADDR_INCR(1);
}
#undef FN_NAME
}
extern void FX_CSTYLE
_grTexDownload_Default_16_WideS(struct GrGC_s* gc, const FxU32 tmuBaseAddr,
const FxU32 maxS, const FxU32 minT, const FxU32 maxT,
void* texData)
{
#define FN_NAME "_grTexDownload_Default_16_WideS"
FxU32
t = minT,
tex_address;
const FxU16
*src16 = (const FxU16*)texData;
for (; t <= maxT; t++) {
tex_address = tmuBaseAddr + TEX_ROW_ADDR_INCR(t);
/* Loop unrolled to process 2 dwords per iteration */
LINEAR_WRITE_BEGIN(maxS, kLinearWriteTex,
(FxU32)tex_address - (FxU32)gc->tex_ptr,
0x00UL, 0x00UL);
{
FxU32 s;
for (s = 0; s < maxS; s += 2) {
const FxU32 t0 = *(const FxU32*)(src16 );
const FxU32 t1 = *(const FxU32*)(src16 + 2);
GDBG_INFO(195, "s = %d, t= %d, address = 0x%x\n",
s, t, (FxU32) tex_address - (FxU32) gc->tex_ptr + 0x200000);
LINEAR_WRITE_SET_16(tex_address , t0);
LINEAR_WRITE_SET_16(tex_address + 4, t1);
tex_address += 8;
src16 += 4;
}
}
LINEAR_WRITE_END();
}
#undef FN_NAME
}