fixed 3dnow! and mmx optimizations
This commit is contained in:
@@ -29,6 +29,8 @@
|
||||
# capabilities are still checked at run-time to avoid
|
||||
# crashes.
|
||||
# default = no
|
||||
# USE_MMX=1 allow MMX specializations.
|
||||
# default = no
|
||||
# TEXUS2=1 embed Texus2 functions into Glide2.
|
||||
# default = no
|
||||
# FXOEM2X=1 build fxoem2x.dll
|
||||
@@ -103,18 +105,7 @@ CDEFS += -DGDBG_INFO_ON -DGLIDE_DEBUG -DGLIDE_SANITY_ASSERT -DGLIDE_SANITY_SIZE
|
||||
endif
|
||||
|
||||
override USE_FIFO = 1
|
||||
override USE_X86 = 1
|
||||
|
||||
# cpu optimized triangle
|
||||
ifeq ($(USE_MMX),1)
|
||||
CFLAGS += -DGL_MMX
|
||||
override USE_X86 = 1
|
||||
endif
|
||||
|
||||
ifeq ($(USE_3DNOW),1)
|
||||
CFLAGS += -DGL_AMD3D
|
||||
override USE_X86 = 1
|
||||
endif
|
||||
#override USE_X86 = 1
|
||||
|
||||
ifeq ($(USE_X86),1)
|
||||
CDEFS += -DGLIDE_DISPATCH_SETUP=1 -DGLIDE_DISPATCH_DOWNLOAD=1
|
||||
@@ -165,6 +156,17 @@ CFLAGS += -I$(FX_GLIDE_SW)/fxmisc -I$(FX_GLIDE_SW)/newpci/pcilib -I$(FX_GLIDE_SW
|
||||
CFLAGS += -I$(FX_GLIDE_SW)/texus2/lib
|
||||
CFLAGS += $(CDEFS)
|
||||
|
||||
# cpu optimized triangle
|
||||
ifeq ($(USE_MMX),1)
|
||||
CFLAGS += -DGL_MMX
|
||||
override USE_X86 = 1
|
||||
endif
|
||||
|
||||
ifeq ($(USE_3DNOW),1)
|
||||
CFLAGS += -DGL_AMD3D
|
||||
override USE_X86 = 1
|
||||
endif
|
||||
|
||||
###############################################################################
|
||||
# objects
|
||||
###############################################################################
|
||||
|
||||
@@ -19,6 +19,9 @@
|
||||
;; $Header$
|
||||
;; $Revision$
|
||||
;; $Log$
|
||||
;; Revision 1.1.1.1.2.3 2005/01/22 14:52:02 koolsmoky
|
||||
;; enabled packed argb for cmd packet type 3
|
||||
;;
|
||||
;; Revision 1.1.1.1.2.2 2005/01/13 16:11:39 koolsmoky
|
||||
;; prepare for packed rgb
|
||||
;;
|
||||
@@ -59,33 +62,32 @@
|
||||
|
||||
%include "xos.inc"
|
||||
|
||||
;;; Definitions of cvg regs and glide root structures.
|
||||
%include "fxgasm.h"
|
||||
|
||||
extrn _GlideRoot
|
||||
extrn _FifoMakeRoom
|
||||
extrn _FifoMakeRoom, 12
|
||||
|
||||
%MACRO GR_FIFO_WRITE 3
|
||||
mov [%1 + %2], %3
|
||||
%ENDMACRO ; GR_FIFO_WRITE
|
||||
|
||||
%ifdef GL_AMD3D
|
||||
;; 3dnow!
|
||||
%MACRO WRITE_MM1_FIFO_ALIGNED 0
|
||||
%ifdef GL_AMD3D
|
||||
movq [fifo], mm1 ; store current param | previous param
|
||||
%else
|
||||
;;
|
||||
%endif
|
||||
%ENDMACRO ; WRITE_MM1_FIFO_ALIGNED
|
||||
|
||||
%MACRO WRITE_MM1LOW_FIFO 0
|
||||
%ifdef GL_AMD3D
|
||||
movd [fifo], mm1 ; store current param | previous param
|
||||
%else
|
||||
;;
|
||||
%endif
|
||||
%ENDMACRO ; WRITE_MM1LOW_FIFO
|
||||
|
||||
%MACRO PROC_TYPE 1
|
||||
proc %1_3DNow, 12
|
||||
%ENDM
|
||||
%else
|
||||
;; original code
|
||||
%MACRO PROC_TYPE 1
|
||||
proc %1, 12
|
||||
%ENDM
|
||||
%endif
|
||||
|
||||
segment DATA
|
||||
One DD 1.0
|
||||
Area DD 0
|
||||
@@ -94,11 +96,10 @@ segment DATA
|
||||
bias1 DD 0
|
||||
%ENDIF
|
||||
|
||||
;;; Definitions of cvg regs and glide root structures.
|
||||
%INCLUDE "fxgasm.h"
|
||||
|
||||
;; enables/disables trisProcessed and trisDrawn counters
|
||||
%define STATS 1
|
||||
segment CONST
|
||||
$T2003 DD 12288.0
|
||||
$T2005 DD 1.0
|
||||
$T2006 DD 256.0
|
||||
|
||||
;;; Arguments (STKOFF = 16 from 4 pushes)
|
||||
STKOFF equ 16
|
||||
@@ -113,15 +114,20 @@ _vc$ equ 12 + STKOFF
|
||||
X equ 0
|
||||
Y equ 4
|
||||
|
||||
segment CONST
|
||||
T2003 DD 12288.0 ; 12288
|
||||
T2005 DD 1.0 ; 1
|
||||
T2006 DD 256.0 ; 256
|
||||
%MACRO PROC_TYPE 1
|
||||
%ifdef GL_AMD3D
|
||||
proc %1_3DNow, 12
|
||||
%else
|
||||
proc %1, 12
|
||||
%endif
|
||||
%ENDMACRO ; PROC_TYPE
|
||||
|
||||
;; enables/disables trisProcessed and trisDrawn counters
|
||||
%define STATS 1
|
||||
|
||||
segment TEXT
|
||||
|
||||
ALIGN 32
|
||||
|
||||
PROC_TYPE _trisetup_cull
|
||||
|
||||
%define GLIDE_CULLING 1
|
||||
@@ -139,8 +145,7 @@ endp
|
||||
%IF GLIDE_PACKED_RGB
|
||||
|
||||
ALIGN 32
|
||||
|
||||
PROC_TYPE _trisetup_cull_rgb
|
||||
PROC_TYPE _trisetup_cull_rgb
|
||||
|
||||
%define GLIDE_CULLING 1
|
||||
%define GLIDE_PACK_RGB 1
|
||||
@@ -155,8 +160,7 @@ PROC_TYPE _trisetup_cull_rgb
|
||||
endp
|
||||
|
||||
ALIGN 32
|
||||
|
||||
PROC_TYPE _trisetup_cull_argb
|
||||
PROC_TYPE _trisetup_cull_argb
|
||||
|
||||
%define GLIDE_CULLING 1
|
||||
%define GLIDE_PACK_RGB 1
|
||||
@@ -170,10 +174,9 @@ PROC_TYPE _trisetup_cull_argb
|
||||
|
||||
endp
|
||||
%ENDIF ; GLIDE_PACKED_RGB
|
||||
|
||||
ALIGN 32
|
||||
|
||||
PROC_TYPE _trisetup
|
||||
ALIGN 32
|
||||
PROC_TYPE _trisetup
|
||||
|
||||
%define GLIDE_CULLING 0
|
||||
%define GLIDE_PACK_RGB 0
|
||||
@@ -187,11 +190,10 @@ PROC_TYPE _trisetup
|
||||
|
||||
endp
|
||||
|
||||
%IF GLIDE_PACKED_RGB
|
||||
%IF GLIDE_PACKED_RGB
|
||||
|
||||
ALIGN 32
|
||||
|
||||
PROC_TYPE _trisetup_rgb
|
||||
PROC_TYPE _trisetup_rgb
|
||||
|
||||
%define GLIDE_CULLING 0
|
||||
%define GLIDE_PACK_RGB 1
|
||||
@@ -206,8 +208,7 @@ PROC_TYPE _trisetup_rgb
|
||||
endp
|
||||
|
||||
ALIGN 32
|
||||
|
||||
PROC_TYPE _trisetup_argb
|
||||
PROC_TYPE _trisetup_argb
|
||||
|
||||
%define GLIDE_CULLING 0
|
||||
%define GLIDE_PACK_RGB 1
|
||||
|
||||
@@ -20,6 +20,9 @@
|
||||
;; $Header$
|
||||
;; $Revision$
|
||||
;; $Log$
|
||||
;; Revision 1.1.1.1.2.3 2005/01/22 14:52:02 koolsmoky
|
||||
;; enabled packed argb for cmd packet type 3
|
||||
;;
|
||||
;; Revision 1.1.1.1.2.2 2005/01/13 16:11:39 koolsmoky
|
||||
;; prepare for packed rgb
|
||||
;;
|
||||
@@ -61,15 +64,14 @@
|
||||
|
||||
;; Prologue stuff
|
||||
push edi ; save caller's register variable
|
||||
mov gc, [_GlideRoot+curGC]; GR_DCL_GC
|
||||
|
||||
push esi ; save caller's register variable
|
||||
push ebx ; save caller's register variable
|
||||
push ebp ; save frame pointer
|
||||
|
||||
mov gc, [_GlideRoot+curGC]; GR_DCL_GC
|
||||
mov fa, [esp + _va$] ; get base address of vertex A
|
||||
|
||||
push ebx ; save caller's register variable
|
||||
mov fb, [esp + _vb$] ; get base address of vertex B
|
||||
|
||||
push ebp ; save frame pointer
|
||||
mov cull, [gc + cull_mode]; get cull mode
|
||||
|
||||
mov fc, [esp + _vc$] ; get base address of vertex C
|
||||
@@ -79,13 +81,13 @@
|
||||
|
||||
;; Cull Check
|
||||
|
||||
movq mm2, [fc + X] ; yc | xc
|
||||
movq mm2, [fc + x] ; yc | xc
|
||||
shl cull, 31 ; culltest << 31
|
||||
|
||||
movq mm1, [fb + X] ; yb | xb
|
||||
movq mm1, [fb + x] ; yb | xb
|
||||
add tempVal, 4 ; space required in fifo
|
||||
|
||||
movq mm0, [fa + X] ; ya | xa
|
||||
movq mm0, [fa + x] ; ya | xa
|
||||
mov ebx, [gc + fifoRoom] ; space available in fifo
|
||||
|
||||
;; Area_Computation
|
||||
@@ -129,23 +131,24 @@
|
||||
push tempVal ; fifo space required
|
||||
call _FifoMakeRoom ; note: updates fifoPtr
|
||||
|
||||
add esp, 12 ; remove 3 DWORD arguments from stack
|
||||
nop ; filler
|
||||
|
||||
;add esp, 12 ; remove 3 DWORD arguments from stack
|
||||
;nop ; filler
|
||||
%ELSE ; !GLIDE_CULLING
|
||||
|
||||
;; Prologue stuff
|
||||
push edi ; save caller's register variable
|
||||
mov gc, [_GlideRoot+curGC]; GR_DCL_GC
|
||||
|
||||
push esi ; save caller's register variable
|
||||
push ebx ; save caller's register variable
|
||||
push ebp ; save frame pointer
|
||||
|
||||
mov gc, [_GlideRoot+curGC]; GR_DCL_GC
|
||||
mov tempVal, [_GlideRoot+curTriSize] ; data for whole triangle in bytes
|
||||
|
||||
push ebx ; save caller's register variable
|
||||
mov ebx, [gc + fifoRoom] ; fifo space available
|
||||
|
||||
push ebp ; save frame pointer
|
||||
add tempVal, 4 ; fifo space needed (include 4-byte header)
|
||||
|
||||
femms ; will use AMD3D, clear FPU/MMX registers
|
||||
|
||||
cmp ebx, tempVal ; fifo spce available >= space needed ?
|
||||
jge .__triBegin ; yup, ready to draw triangle
|
||||
|
||||
@@ -155,8 +158,8 @@
|
||||
push tempVal ; fifo space needed
|
||||
call _FifoMakeRoom ; note: updates fifoPtr
|
||||
|
||||
add esp, 12 ; remove 3 DWORD arguments from stack
|
||||
nop ; filler
|
||||
;add esp, 12 ; remove 3 DWORD arguments from stack
|
||||
;nop ; filler
|
||||
%ENDIF ; GLIDE_CULLING
|
||||
|
||||
|
||||
@@ -164,20 +167,20 @@
|
||||
%define dlpstrt ecx ; points to begin of dataList structure
|
||||
%define vertex edx ; the current vertex
|
||||
|
||||
ALIGN 32
|
||||
ALIGN 32
|
||||
|
||||
.__triBegin:
|
||||
mov eax, [gc+triPacketHdr]; Packet 3 header
|
||||
lea dlp,[gc + tsuDataList]; Reset the dataList
|
||||
|
||||
mov fifo, [gc + fifoPtr] ; Fetch Fifo Ptr
|
||||
|
||||
mov vertex, [esp + _va$] ; Current vertex = A
|
||||
|
||||
mov dlpstrt, dlp ; save pointer to start of dataList
|
||||
test fifo, 4 ; is fifo pointer qword aligned ?
|
||||
|
||||
test fifo, 4 ; is fifo pointer qword aligned ?
|
||||
jz .__fifo_aligned ; yes, it is qword aligned
|
||||
movq mm1, [vertex+X] ; y | x
|
||||
|
||||
mov eax, [gc+triPacketHdr]; Packet 3 header
|
||||
movq mm1, [vertex+x] ; y | x
|
||||
|
||||
GR_FIFO_WRITE fifo, 0, eax ; write header to fifo; now qword aligned
|
||||
add fifo, 4 ; advance fifo for hdr; now qword aligned
|
||||
@@ -258,24 +261,22 @@
|
||||
;; here: "write buffer" empty
|
||||
|
||||
mov eax,[dlp] ; Get first offset from the data list
|
||||
test eax, eax ; at end of list ?
|
||||
add dlp, 4 ; dlp++
|
||||
|
||||
lea dlp, [dlp+4] ; dlp++
|
||||
test eax, eax ; at end of list ?
|
||||
jz .__paramLoopDoneWBzero1; yes, "write buffer" empty
|
||||
|
||||
.__paramLoop1a:
|
||||
movd mm1, [eax+vertex] ; get next parameter
|
||||
mov eax, [dlp] ; offset = *(dlp + 1)
|
||||
|
||||
add dlp, 4 ; dlp++
|
||||
test eax, eax ; at end of offset list (offset == 0) ?
|
||||
|
||||
jz .__paramLoopDoneWBone1; exit, write buffer contains one DWORD
|
||||
|
||||
movd mm2, [eax+vertex] ; get next parameter
|
||||
add dlp, 8 ; dlp += 2
|
||||
|
||||
mov eax, [dlp] ; offset = *(dlp + 1)
|
||||
add dlp, 4 ; dlp++
|
||||
|
||||
mov eax, [dlp-4] ; offset = *(dlp + 1)
|
||||
punpckldq mm1, mm2 ; current param | previous param
|
||||
|
||||
WRITE_MM1_FIFO_ALIGNED ; PCI write current param | previous param
|
||||
@@ -284,20 +285,20 @@
|
||||
test eax, eax ; at end of offset list (offset == 0) ?
|
||||
jnz .__paramLoop1a ; nope, copy next parameter
|
||||
|
||||
nop ; filler
|
||||
nop
|
||||
jmp .__paramLoopDoneWBzero1; write buffer empty
|
||||
|
||||
%ENDIF ; GLIDE_PACK_RGB
|
||||
|
||||
.__fifo_aligned:
|
||||
movd mm2, [vertex+X] ; y | x of vertex A
|
||||
movd mm2, [vertex+x] ; y | x of vertex A
|
||||
movd mm1, [gc+triPacketHdr]; Packet 3 header
|
||||
|
||||
punpckldq mm1, mm2 ; x | header
|
||||
WRITE_MM1_FIFO_ALIGNED ; PCI write x | header
|
||||
|
||||
add fifo, 8 ; fifoPtr += 2*sizeof(FxU32)
|
||||
movd mm1, [vertex+Y] ; 0 | y of vertex A
|
||||
movd mm1, [vertex+y] ; 0 | y of vertex A
|
||||
|
||||
%IF GLIDE_PACK_RGB
|
||||
%IF GLIDE_PACK_ALPHA
|
||||
@@ -314,10 +315,10 @@
|
||||
punpcklwd mm2, mm4 ; 00000000 | 00rr00bb
|
||||
psrlq mm4, 24 ; 00000000 | 0000gg00
|
||||
|
||||
add dlp, 8 ; skip data list entry "a"
|
||||
psllq mm3, 24 ; 00000000 | aa000000
|
||||
|
||||
por mm4, mm2 ; 00000000 | 00rrggbb
|
||||
|
||||
add dlp, 8 ; skip data list entry "a"
|
||||
por mm4, mm3 ; 00000000 | aarrggbb
|
||||
%ELSE ; !GLIDE_PACK_ALPHA
|
||||
;; assumes color values < 256.0
|
||||
@@ -355,17 +356,15 @@
|
||||
jz .__paramLoopDoneWBone1 ; exit, write buffer contains one DWORD
|
||||
|
||||
movd mm2, [eax+vertex] ; get next parameter
|
||||
add dlp, 4 ; dlp++
|
||||
|
||||
mov eax, [dlp] ; offset = *(dlp + 1)
|
||||
add dlp, 4 ; dlp++
|
||||
add dlp, 8 ; dlp += 8
|
||||
|
||||
mov eax, [dlp-4] ; offset = *(dlp + 1)
|
||||
punpckldq mm1, mm2 ; current param | previous param
|
||||
|
||||
test eax, eax ; at end of offset list (offset == 0) ?
|
||||
|
||||
WRITE_MM1_FIFO_ALIGNED ; PCI write current param | previous param
|
||||
add fifo, 8 ; fifoPtr += 2*sizeof(FxU32)
|
||||
|
||||
add fifo, 8 ; fifoPtr += 2*sizeof(FxU32)
|
||||
jnz .__paramLoop1b ; nope, copy next parameter
|
||||
|
||||
nop ; filler
|
||||
@@ -380,13 +379,10 @@
|
||||
|
||||
.__paramLoop1b:
|
||||
movd mm2, [eax+vertex] ; get next parameter
|
||||
add fifo, 8 ; fifoPtr += 2*sizeof(FxU32)
|
||||
|
||||
mov eax, [dlp] ; offset = *(dlp + 1)
|
||||
add dlp, 8 ; dlp += 2
|
||||
|
||||
add dlp, 8 ; dlp += 2
|
||||
punpckldq mm1, mm2 ; current param | previous param
|
||||
test eax, eax ; at end of offset list (offset == 0) ?
|
||||
|
||||
WRITE_MM1_FIFO_ALIGNED ; PCI write current param | previous param
|
||||
add fifo, 8 ; fifoPtr += 2*sizeof(FxU32)
|
||||
@@ -395,9 +391,7 @@
|
||||
jz .__paramLoopDoneWBzero1; exit, "write buffer" empty
|
||||
|
||||
movd mm1, [eax+vertex] ; get next parameter
|
||||
mov eax, [dlp] ; offset = *(dlp + 1)
|
||||
|
||||
add dlp, 4 ; dlp++
|
||||
mov eax, [dlp-4] ; offset = *(dlp + 1)
|
||||
|
||||
test eax, eax ; at end of offset list (offset == 0) ?
|
||||
jnz .__paramLoop1b ; nope, copy next parameter
|
||||
@@ -410,13 +404,13 @@
|
||||
mov dlp, dlpstrt ; reset the dataList
|
||||
mov vertex, [esp + _vb$] ; Current vertex = B
|
||||
|
||||
movd mm2, [vertex+X] ; 0 | x if vertex B
|
||||
movd mm2, [vertex+x] ; 0 | x if vertex B
|
||||
punpckldq mm1, mm2 ; x | old param
|
||||
|
||||
WRITE_MM1_FIFO_ALIGNED ; PCI write: x | old param
|
||||
add fifo, 8 ; fifoPtr += 2*sizeof(FxU32)
|
||||
|
||||
movd mm1, [vertex+Y] ; 0 | y of vertex B
|
||||
movd mm1, [vertex+y] ; 0 | y of vertex B
|
||||
nop ; filler
|
||||
|
||||
%IF GLIDE_PACK_RGB
|
||||
@@ -502,7 +496,7 @@
|
||||
movd mm2, [eax+vertex] ; get next parameter
|
||||
mov eax, [dlp] ; offset = *(dlp + 1)
|
||||
|
||||
add dlp, 4 ; dlp++
|
||||
add dlp, 8 ; dlp += 2
|
||||
punpckldq mm1, mm2 ; current param | previous param
|
||||
|
||||
WRITE_MM1_FIFO_ALIGNED ; PCI write current param | previous param
|
||||
@@ -512,9 +506,7 @@
|
||||
jz .__paramLoopDoneWBzero2; exit, "write buffer" empty
|
||||
|
||||
movd mm1, [eax+vertex] ; get next parameter
|
||||
mov eax, [dlp] ; offset = *(dlp + 1)
|
||||
|
||||
add dlp, 4 ; dlp++
|
||||
mov eax, [dlp-4] ; offset = *(dlp + 1)
|
||||
|
||||
test eax, eax ; at end of offset list (offset == 0) ?
|
||||
jnz .__paramLoop2b ; nope, copy next parameter
|
||||
@@ -522,16 +514,16 @@
|
||||
jmp .__paramLoopDoneWBone2; write buffer contains one DWORD
|
||||
%ENDIF
|
||||
|
||||
|
||||
.__paramLoopDoneWBzero1:
|
||||
|
||||
mov vertex, [esp + _vb$] ; Current vertex = B
|
||||
mov dlp, dlpstrt ; Reset the dataList
|
||||
|
||||
movq mm1, [vertex+X] ; y | x of vertex B
|
||||
movq mm1, [vertex+x] ; y | x of vertex B
|
||||
WRITE_MM1_FIFO_ALIGNED ; PCI write y | x of vertex B
|
||||
|
||||
add fifo, 8 ; fifoPtr += 2*sizeof(FxU32)
|
||||
nop
|
||||
|
||||
%IF GLIDE_PACK_RGB
|
||||
%IF GLIDE_PACK_ALPHA
|
||||
@@ -571,7 +563,7 @@
|
||||
|
||||
;; here: one DWORD in "write buffer", RGB(A)
|
||||
|
||||
mov eax, dword [dlp] ; get first offset from the data list
|
||||
mov eax, [dlp] ; get first offset from the data list
|
||||
add dlp, 4 ; dlp++
|
||||
|
||||
test eax, eax ; end of list ?
|
||||
@@ -615,15 +607,13 @@
|
||||
movd mm1, [eax+vertex] ; get next parameter
|
||||
mov eax, [dlp] ; offset = *(dlp + 1)
|
||||
|
||||
add dlp, 4 ; dlp++
|
||||
test eax, eax ; at end of offset list (offset == 0) ?
|
||||
jz .__paramLoopDoneWBone2; exit, write buffer contains one DWORD
|
||||
|
||||
jz .__paramLoopDoneWBone2 ; exit, write buffer contains one DWORD
|
||||
movd mm2, [eax+vertex] ; get next parameter
|
||||
add dlp, 8 ; dlp++
|
||||
|
||||
mov eax, [dlp] ; offset = *(dlp + 1)
|
||||
add dlp, 4 ; dlp++
|
||||
|
||||
mov eax, [dlp-4] ; offset = *(dlp + 1)
|
||||
punpckldq mm1, mm2 ; current param | previous param
|
||||
|
||||
WRITE_MM1_FIFO_ALIGNED ; PCI write current param | previous param
|
||||
@@ -634,16 +624,16 @@
|
||||
|
||||
%ENDIF ; GLIDE_PACK_RGB
|
||||
|
||||
|
||||
.__paramLoopDoneWBzero2:
|
||||
|
||||
mov vertex, [esp + _vc$] ; Current vertex = C
|
||||
mov dlp, dlpstrt ; Reset the dataList
|
||||
|
||||
movq mm1, [vertex+X] ; y | x of vertex C
|
||||
movq mm1, [vertex+x] ; y | x of vertex C
|
||||
WRITE_MM1_FIFO_ALIGNED ; PCI write y | x of vertex C
|
||||
|
||||
add fifo, 8 ; fifoPtr += 2*sizeof(FxU32)
|
||||
nop
|
||||
|
||||
%IF GLIDE_PACK_RGB
|
||||
%IF GLIDE_PACK_ALPHA
|
||||
@@ -727,15 +717,13 @@
|
||||
movd mm1, [eax+vertex] ; get next parameter
|
||||
mov eax, [dlp] ; offset = *(dlp + 1)
|
||||
|
||||
add dlp, 4 ; dlp++
|
||||
test eax, eax ; at end of offset list (offset == 0) ?
|
||||
|
||||
jz .__paramLoopDoneWBone3; exit, write buffer contains one DWORD
|
||||
|
||||
movd mm2, [eax+vertex] ; get next parameter
|
||||
add dlp, 8 ; dlp += 2
|
||||
|
||||
mov eax, [dlp] ; offset = *(dlp + 1)
|
||||
add dlp, 4 ; dlp++
|
||||
|
||||
mov eax, [dlp-4] ; offset = *(dlp + 1)
|
||||
punpckldq mm1, mm2 ; current param | previous param
|
||||
|
||||
WRITE_MM1_FIFO_ALIGNED ; PCI write current param | previous param
|
||||
@@ -748,7 +736,6 @@
|
||||
|
||||
%ENDIF ; GLIDE_PACK_RGB
|
||||
|
||||
|
||||
.__paramLoopDoneWBone2:
|
||||
|
||||
;; here: "write buffer" has one DWORD left over from vertex B
|
||||
@@ -756,13 +743,14 @@
|
||||
mov vertex, [esp + _vc$] ; Current vertex = C
|
||||
mov dlp, dlpstrt ; reset the dataList
|
||||
|
||||
movd mm2, [vertex+X] ; 0 | x if vertex C
|
||||
movd mm2, [vertex+x] ; 0 | x if vertex C
|
||||
punpckldq mm1, mm2 ; x | old param
|
||||
|
||||
WRITE_MM1_FIFO_ALIGNED ; PCI write: x | old param
|
||||
add fifo, 8 ; fifoPtr += 2*sizeof(FxU32)
|
||||
|
||||
movd mm1, [vertex+Y] ; 0 | y of vertex C
|
||||
movd mm1, [vertex+y] ; 0 | y of vertex C
|
||||
nop
|
||||
|
||||
%IF GLIDE_PACK_RGB
|
||||
%IF GLIDE_PACK_ALPHA
|
||||
@@ -849,7 +837,7 @@
|
||||
mov eax, [dlp] ; offset = *(dlp + 1)
|
||||
|
||||
punpckldq mm1, mm2 ; current param | previous param
|
||||
add dlp, 4 ; dlp++
|
||||
add dlp, 8 ; dlp += 2
|
||||
|
||||
WRITE_MM1_FIFO_ALIGNED ; PCI write current param | previous param
|
||||
add fifo, 8 ; fifoPtr += 2*sizeof(FxU32)
|
||||
@@ -858,9 +846,7 @@
|
||||
jz .__paramLoopDoneWBzero3; exit, "write buffer" empty
|
||||
|
||||
movd mm1, [eax+vertex] ; get next parameter
|
||||
mov eax, [dlp] ; offset = *(dlp + 1)
|
||||
|
||||
add dlp, 4 ; dlp++
|
||||
mov eax, [dlp-4] ; offset = *(dlp + 1)
|
||||
|
||||
test eax, eax ; at end of offset list (offset == 0) ?
|
||||
jnz .__paramLoop3b ; nope, copy next parameter
|
||||
@@ -884,7 +870,7 @@
|
||||
mov [gc + fifoPtr], fifo ; save new fifo pointer
|
||||
|
||||
mov edx, [gc + fifoRoom] ; old fifo space available
|
||||
inc ecx ; _GlideRoot.stats.trisDrawn++
|
||||
add ecx, 1 ; _GlideRoot.stats.trisDrawn++
|
||||
|
||||
mov ebp, [_GlideRoot + trisProcessed]; _GlideRoot.stats.trisProcessed
|
||||
sub eax, ebx ; new fifo ptr - old fifo ptr = additional fifo space used
|
||||
@@ -895,45 +881,43 @@
|
||||
mov eax, 1h ; return value = triangle drawn
|
||||
mov [gc + fifoRoom], edx ; new fifo space available
|
||||
|
||||
;; Restore trashed registers
|
||||
|
||||
inc ebp ; _GlideRoot.stats.trisProcessed++
|
||||
pop esi ; restore caller's register variable
|
||||
|
||||
add ebp, 1 ; _GlideRoot.stats.trisProcessed++
|
||||
mov [_GlideRoot + trisProcessed], ebp ;
|
||||
pop ebx ; restore caller's register variable
|
||||
|
||||
pop ebp ; restore frame pointer
|
||||
pop edi ; restore caller's register variable
|
||||
|
||||
femms ; no more AMD3D code, clear FPU/MMX regs
|
||||
|
||||
ret ; return to caller
|
||||
;; Restore trashed registers
|
||||
pop ebp ; restore frame pointer
|
||||
pop ebx ; restore caller's register variable
|
||||
pop esi ; restore caller's register variable
|
||||
pop edi ; restore caller's register variable
|
||||
|
||||
ret ; return to caller
|
||||
|
||||
%IF GLIDE_CULLING
|
||||
.__cullFail:
|
||||
mov ebp, [_GlideRoot + trisProcessed] ; triangles processed so far
|
||||
xor eax, eax ; return value = triangle not drawn
|
||||
|
||||
add ebp, 1 ; _GlideRoot.stats.trisProcessed++;
|
||||
mov [_GlideRoot + trisProcessed], ebp
|
||||
|
||||
femms ; no more AMD3D code, clear FPU/MMX regs
|
||||
|
||||
;; Restore trashed registers
|
||||
inc ebp ; _GlideRoot.stats.trisProcessed++;
|
||||
pop esi
|
||||
|
||||
mov [_GlideRoot + trisProcessed], ebp
|
||||
pop ebx
|
||||
|
||||
;; Restore trashed registers
|
||||
pop ebp ; restore frame pointer
|
||||
pop edi
|
||||
pop ebx ; restore caller's register variable
|
||||
pop esi ; restore caller's register variable
|
||||
pop edi ; restore caller's register variable
|
||||
|
||||
ret
|
||||
%ENDIF ; GLIDE_CULLING
|
||||
;;--------------------------------------------------------------------------
|
||||
;; end AMD3D version
|
||||
;;--------------------------------------------------------------------------
|
||||
|
||||
%else
|
||||
|
||||
;;--------------------------------------------------------------------------
|
||||
;; start original code
|
||||
;;--------------------------------------------------------------------------
|
||||
@@ -950,7 +934,7 @@
|
||||
push ebp
|
||||
nop
|
||||
|
||||
align 4
|
||||
ALIGN 32
|
||||
%IF GLIDE_CULLING
|
||||
%define fa eax ; vtx a from caller
|
||||
%define fb ebx ; vtx b from caller
|
||||
@@ -1014,7 +998,7 @@
|
||||
|
||||
%ENDIF ; GLIDE_CULLING
|
||||
|
||||
align 4
|
||||
ALIGN 32
|
||||
;; Check to make sure that we have enough room for
|
||||
;; the complete triangle packet.
|
||||
mov eax, [_GlideRoot + curTriSize]
|
||||
@@ -1031,7 +1015,7 @@
|
||||
push eax
|
||||
call _FifoMakeRoom
|
||||
|
||||
add esp, 12
|
||||
;add esp, 12
|
||||
|
||||
;; Send triangle parameters
|
||||
|
||||
@@ -1043,7 +1027,7 @@
|
||||
%define packCol edi
|
||||
%define tempVal edi
|
||||
|
||||
align 4
|
||||
ALIGN 32
|
||||
.__triBegin:
|
||||
mov fifo, [gc + fifoPtr] ; Fetch Fifo Ptr
|
||||
mov vOffset, 4 ; Starting vertex
|
||||
@@ -1054,7 +1038,7 @@
|
||||
GR_FIFO_WRITE fifo, 0, eax ; Write packet header to fifo
|
||||
add fifo, 4 ; Advance fifo for hdr & x/y coordinate
|
||||
|
||||
align 4
|
||||
ALIGN 32
|
||||
.__vertexStart:
|
||||
mov vertex, [esp + STKOFF + vOffset] ; Current vertex
|
||||
add fifo, 8
|
||||
@@ -1062,11 +1046,11 @@
|
||||
nop ; Avoid p5 agi w/ load of vertex ptr
|
||||
nop
|
||||
|
||||
mov eax, [vertex + X] ; X
|
||||
mov eax, [vertex + x] ; X
|
||||
lea dlp, [gc + tsuDataList] ; Reset the dataList
|
||||
|
||||
GR_FIFO_WRITE fifo, -8, eax ; PCI write X
|
||||
mov eax, [vertex + Y] ; Y
|
||||
mov eax, [vertex + y] ; Y
|
||||
|
||||
xor packCol, packCol ; Clear packed color
|
||||
GR_FIFO_WRITE fifo, -4, eax ; PCI write Y
|
||||
@@ -1159,7 +1143,7 @@
|
||||
|
||||
jne .__paramLoop
|
||||
|
||||
align 4
|
||||
ALIGN 32
|
||||
.__nextVertex:
|
||||
;; On to the next vertex
|
||||
add vOffset, 4
|
||||
|
||||
@@ -19,6 +19,10 @@
|
||||
;; $Header$
|
||||
;; $Revision$
|
||||
;; $Log$
|
||||
;; Revision 1.1.2.1 2004/12/23 20:45:56 koolsmoky
|
||||
;; converted to nasm syntax
|
||||
;; added x86 asm, 3dnow! triangle and mmx, 3dnow! texture download optimizations
|
||||
;;
|
||||
;; Revision 1.1.1.1.8.1 2003/11/03 13:34:30 dborca
|
||||
;; Voodoo2 happiness (DJGPP & Linux)
|
||||
;;
|
||||
@@ -74,14 +78,14 @@
|
||||
|
||||
%include "xos.inc"
|
||||
|
||||
extrn _FifoMakeRoom
|
||||
extrn _FifoMakeRoom, 12
|
||||
|
||||
%MACRO _grCommandTransportMakeRoom 3
|
||||
push %3
|
||||
push %2
|
||||
push %1
|
||||
call _FifoMakeRoom
|
||||
add esp, 12
|
||||
;add esp, 12
|
||||
%ENDMACRO ; _grCommandTransportMakeRoom
|
||||
|
||||
;;; Definitions of cvg regs and glide root structures.
|
||||
@@ -107,19 +111,15 @@ _texData$ equ 24 + STACKOFFSET
|
||||
%define curS ecx ; texture s-coordinate
|
||||
%define fRoom edx ; room available in fifo (in bytes)
|
||||
|
||||
;--------------------------------------------------------------------------
|
||||
|
||||
%IFNDEF GL_SSE2
|
||||
|
||||
;--------------------------------------------------------------------------
|
||||
;
|
||||
; GL_AMD3D, GL_MMX
|
||||
;
|
||||
;--------------------------------------------------------------------------
|
||||
|
||||
segment TEXT
|
||||
segment TEXT
|
||||
|
||||
ALIGN 32
|
||||
ALIGN 32
|
||||
|
||||
%IFDEF GL_AMD3D
|
||||
proc _grTexDownload_3DNow_MMX, 24
|
||||
@@ -156,7 +156,7 @@ proc _grTexDownload_MMX, 24
|
||||
sub curT, eax ; curT = maxT - minT
|
||||
mov fifo, [gc + fifoPtr] ; fifoPtr
|
||||
|
||||
mov curS, [esp + _maxS$] ; curS = maxS
|
||||
mov curS, [esp + _maxS$] ; curS = maxS = scanline width in DWORDs
|
||||
add curT, 1 ; curT = maxT - minT + 1
|
||||
|
||||
%IFDEF GL_AMD3D
|
||||
@@ -166,23 +166,24 @@ proc _grTexDownload_MMX, 24
|
||||
emms ; we'll use MMX
|
||||
%ENDIF
|
||||
|
||||
mov edx, curS ; curS = maxS = scanline width in DWORDs
|
||||
movd mm3, [esp + _baseAddr$] ; 0 | address of texture to download
|
||||
|
||||
shl curS, 2 ; scan line width (in bytes)
|
||||
add curS, curS ;
|
||||
add curS, curS ; scan line width (in bytes)
|
||||
mov eax, [esp + _minT$] ; 0 | minT
|
||||
|
||||
mov [esp + _maxS$], curS ; save scan line width (in bytes)
|
||||
shl edx, 3 ; packetHdr<21:3> = maxS = scanline width in DWORDs
|
||||
mov edx, curS ;
|
||||
|
||||
shl eax, 9 ; TEX_ROW_ADDR_INCR(minT) = minT << 9
|
||||
add edx, edx ; packetHdr<21:3> = maxS = scanline width in DWORDs
|
||||
|
||||
or edx, 0xc0000005 ; packetHdr<31:30> = texture port
|
||||
; packetHdr<21:3> = maxS
|
||||
; packetHdr<2:0> = packetType 5
|
||||
|
||||
movd mm1, edx ; 0 | packetHdr
|
||||
movd mm2, eax ; 0 | TEX_ROW_ADDR_INCR(minT)
|
||||
movd mm2, eax ; 0 | minT
|
||||
psllq mm2, 9 ; 0 | TEX_ROW_ADDR_INCR(minT) = minT << 9
|
||||
|
||||
paddd mm3, mm2 ; 0 | texAddr = texBaseAddr + TEX_ROW_ADDR_INCR(minT)
|
||||
movd mm2, [gc + tex_ptr] ; 0 | gc->tex_ptr
|
||||
@@ -230,7 +231,7 @@ proc _grTexDownload_MMX, 24
|
||||
mov [gc + fifoPtr], fifo ; store new fifoPtr
|
||||
jmp .startDownload ; fifo aligned, download texture now
|
||||
|
||||
align 32
|
||||
ALIGN 32
|
||||
|
||||
;; ebx = curT, edi = dataPtr, esi = gc, ebp = fifo, ecx = maxS = curS
|
||||
;; edx=fifoRoom, mm1 = texAddr-gc->tex_ptr|packetHdr, mm2 = TEX_ROW_ADDR_INCR(1)|0
|
||||
@@ -327,394 +328,3 @@ proc _grTexDownload_MMX, 24
|
||||
|
||||
ret ; pop 6 DWORD parameters and return
|
||||
endp
|
||||
|
||||
%ELSE ; !GL_SSE2
|
||||
|
||||
;--------------------------------------------------------------------------
|
||||
;
|
||||
; GL_SSE2
|
||||
;
|
||||
;--------------------------------------------------------------------------
|
||||
|
||||
segment TEXT
|
||||
|
||||
ALIGN 32
|
||||
|
||||
proc _grTexDownload_SSE2_64, 24
|
||||
|
||||
push ebx ; save caller's register variable
|
||||
mov curT, [esp + _maxT$ - 12] ; curT = maxT
|
||||
|
||||
push esi ; save caller's register variable
|
||||
mov eax, [esp + _minT$ - 8] ; minT
|
||||
|
||||
push edi ; save caller's register variable
|
||||
mov gc, [esp + _gc$ - 4] ; gc
|
||||
|
||||
push ebp ; save caller's register variable
|
||||
mov dataPtr, [esp + _texData$]; dataPtr
|
||||
|
||||
%IFDEF GLIDE_ALT_TAB
|
||||
test gc, gc
|
||||
je .dlDone
|
||||
; mov edx, [gc + windowed]
|
||||
; test edx, 1
|
||||
; jnz .pastContextTest
|
||||
mov edx, DWORD [gc+lostContext]
|
||||
mov ecx, [edx]
|
||||
test ecx, 1
|
||||
jnz .dlDone
|
||||
;.pastContextTest:
|
||||
%ENDIF
|
||||
|
||||
sub curT, eax ; curT = maxT - minT
|
||||
mov fifo, [gc + fifoPtr] ; fifoPtr
|
||||
|
||||
mov curS, [esp + _maxS$] ; curS = maxS
|
||||
add curT, 1 ; curT = maxT - minT + 1
|
||||
|
||||
mov edx, curS ; curS = maxS = scanline width in DWORDs
|
||||
movd xmm3,[esp + _baseAddr$] ; 0 | 0 | 0 | address of texture to download
|
||||
|
||||
shl curS, 2 ; scan line width (in bytes)
|
||||
mov eax, [esp + _minT$] ; 0 | 0 | 0 | minT
|
||||
|
||||
mov [esp + _maxS$], curS ; save scan line width (in bytes)
|
||||
shl edx, 3 ; packetHdr<21:3> = maxS = scanline width in DWORDs
|
||||
|
||||
imul eax, curS ; TEX_ROW_ADDR_INCR(minT) = minT * TEX_ROW_ADDR_INCR(1)
|
||||
|
||||
movd xmm2,curS ; 0 | 0 | TEX_ROW_ADDR_INCR(1)
|
||||
or edx, 00000005h ; packetHdr<31:30> = lfb port
|
||||
; packetHdr<21:3> = maxS
|
||||
; packetHdr<2:0> = packetType 5
|
||||
|
||||
movd xmm1,edx ; 0 | 0 | packetHdr
|
||||
movd xmm4,eax ; 0 | 0 | TEX_ROW_ADDR_INCR(minT)
|
||||
|
||||
psllq xmm2,32 ; 0 | 0 | TEX_ROW_ADDR_INCR(1) | 0
|
||||
paddd xmm3,xmm4 ; 0 | 0 | texAddr = texBaseAddr + TEX_ROW_ADDR_INCR(minT)
|
||||
|
||||
mov fRoom, [gc + fifoRoom] ; get available fifoRoom (in bytes)
|
||||
punpckldq xmm1,xmm3 ; 0 | 0 | hdr2 = texAddr | hdr1 = packetHdr
|
||||
|
||||
;; ebx = curT, edi = dataPtr, esi = gc, ebp = fifo, ecx = curS = maxS
|
||||
;; edx = fifoRoom, xmm1 = texAddr|packetHdr, xmm2 = TEX_ROW_ADDR_INCR(1)|0
|
||||
|
||||
test fifo, 4 ; is fifo QWORD aligned ?
|
||||
jz .startDownload ; yup, start texture download
|
||||
|
||||
cmp fRoom, 4 ; enough room for NULL packet in fifo?
|
||||
jge .xmmAlignFifo ; yes, write NULL packet to align fifo
|
||||
|
||||
%ifdef USE_PACKET_FIFO
|
||||
_grCommandTransportMakeRoom 4, 0, __LINE__; make fifo room
|
||||
%endif
|
||||
|
||||
mov fifo, [gc + fifoPtr] ; fifoPtr modified by _grCommandTransportMakeRoom, reload
|
||||
|
||||
mov fRoom, [gc + fifoRoom] ; fifoRoom modified by _grCommandTransportMakeRoom, reload
|
||||
mov curS, [esp + _maxS$] ; reload maxS (destroyed by call to _grCommandTransportMakeRoom)
|
||||
|
||||
test fifo, 4 ; new fifoPtr QWORD aligned ?
|
||||
jz .startDownload ; yup, start texture download
|
||||
|
||||
.xmmAlignFifo:
|
||||
|
||||
mov DWORD [fifo], 0 ; write NULL packet
|
||||
sub fRoom, 4 ; fifoRoom -= 4
|
||||
|
||||
mov [gc + fifoRoom], fRoom ; store new fifoRoom
|
||||
add fifo, 4 ; fifoPtr += 4
|
||||
|
||||
%IFDEF GLIDE_DEBUG
|
||||
mov [gc + checkPtr], fifo ; checkPtr
|
||||
%ENDIF
|
||||
|
||||
mov [gc + fifoPtr], fifo ; store new fifoPtr
|
||||
jmp .startDownload ; fifo aligned, download texture now
|
||||
|
||||
align 32
|
||||
|
||||
;; ebx = curT, edi = dataPtr, esi = gc, ebp = fifo, ecx = maxS = curS
|
||||
;; edx=fifoRoom, xmm1 = texAddr|packetHdr, xmm2 = TEX_ROW_ADDR_INCR(1)|0
|
||||
|
||||
.loopT:
|
||||
|
||||
%IFDEF GLIDE_DEBUG
|
||||
|
||||
;; Make sure that we have a QWORD aligned fifoPtr; force GP if not aligned
|
||||
|
||||
test fifo, 4 ; is fifoPtr QWORD aligned ?
|
||||
jz .alignmentOK ; yup, continue
|
||||
|
||||
xor eax, eax ; create 0
|
||||
mov [eax], eax ; move to DS:[0] forces GP
|
||||
.alignmentOK:
|
||||
%ENDIF ; GLIDE_DEBUG
|
||||
|
||||
;; Compute packet header words
|
||||
;; hdr1: downloadSpace[31:30] numWords[21:3] packetType[2:0]
|
||||
;; hdr2: download address[29:0]
|
||||
|
||||
movq [fifo],xmm1 ; store hdr2 | hdr1
|
||||
add fifo, 8 ; increment fifo ptr (hdr1 + hdr2)
|
||||
|
||||
;; S coordinate inner loop unrolled for 8 texels a write
|
||||
|
||||
.loopS:
|
||||
|
||||
movq xmm0,[dataPtr] ; load 64 bit data (8 texels)
|
||||
add fifo, 8 ; pre-increment fifoPtr += 2 * sizeof(FxU32)
|
||||
|
||||
add dataPtr, 8 ; dataPtr += 2 * sizeof(FxU32)
|
||||
sub curS, 8 ; curS -= 2 * sizeof(FxU32)
|
||||
|
||||
movq [fifo - 8],xmm0 ; *fifoPtr = texelData[64 bits]
|
||||
jnz .loopS ; loop while curS > 0
|
||||
|
||||
mov ecx, [gc + fifoPtr] ; old fifo ptr
|
||||
nop ; filler
|
||||
|
||||
mov eax, fifo ; new fifo ptr
|
||||
mov [gc + fifoPtr], fifo ; save new fifo ptr
|
||||
|
||||
%IFDEF GLIDE_DEBUG
|
||||
mov [gc + checkPtr], fifo ; checkPtr
|
||||
%ENDIF
|
||||
|
||||
sub eax, ecx ; new fifo ptr - old fifo ptr = fifo space used up
|
||||
mov curS, [esp + _maxS$] ; curS = maxS = width of scanline (bytes)
|
||||
|
||||
sub fRoom, eax ; new fifo space available = old fifo space available - fifo space used up = new fifo space available
|
||||
sub curT, 1 ; curT--
|
||||
|
||||
mov [gc + fifoRoom], fRoom ; save new fifo space available
|
||||
jz .dlDone ; loop while curT > 0
|
||||
|
||||
;; Check for room to write the next texture scanline
|
||||
|
||||
;; ebx = curT, edi = dataPtr, esi = gc, ebp = fifo
|
||||
;; edx = fifoRoom, xmm1 = texAddr|packetHdr, xmm2 = TEX_ROW_ADDR_INCR(1)|0
|
||||
|
||||
paddd xmm1,xmm2 ; 0 | 0 | texAddr+=TEX_ROW_ADDR_INCR(1) | packetHdr
|
||||
mov esp, esp ; filler
|
||||
.startDownload:
|
||||
lea eax, [curS+8] ; fifo space needed = scan line width + header size
|
||||
|
||||
cmp fRoom, eax ; fifo space available >= fifo space required ?
|
||||
jge .loopT ; yup, write next scan line
|
||||
|
||||
%ifdef USE_PACKET_FIFO
|
||||
_grCommandTransportMakeRoom eax, 0, __LINE__; make fifo room (if fifoPtr QWORD aligned before
|
||||
%endif
|
||||
|
||||
mov fifo, [gc + fifoPtr] ; fifoPtr was modified by _grCommandTransportMakeRoom, reload
|
||||
|
||||
mov fRoom, [gc + fifoRoom] ; fifoRoom was modified by _grCommandTransportMakeRoom, reload
|
||||
mov curS, [esp + _maxS$] ; curS = maxS = width of scanline (bytes)
|
||||
jmp .loopT ; we now have enough fifo room, write next scanline
|
||||
|
||||
.dlDone:
|
||||
pop ebp ; restore caller's register variable
|
||||
pop edi ; restore caller's register variable
|
||||
|
||||
pop esi ; restore caller's register variable
|
||||
pop ebx ; restore caller's register variable
|
||||
|
||||
ret ; pop 6 DWORD parameters and return
|
||||
endp
|
||||
|
||||
|
||||
|
||||
segment TEXT
|
||||
|
||||
ALIGN 32
|
||||
|
||||
proc _grTexDownload_SSE2_128, 24
|
||||
|
||||
push ebx ; save caller's register variable
|
||||
mov curT, [esp + _maxT$ - 12] ; curT = maxT
|
||||
|
||||
push esi ; save caller's register variable
|
||||
mov eax, [esp + _minT$ - 8] ; minT
|
||||
|
||||
push edi ; save caller's register variable
|
||||
mov gc, [esp + _gc$ - 4] ; gc
|
||||
|
||||
push ebp ; save caller's register variable
|
||||
mov dataPtr, [esp + _texData$]; dataPtr
|
||||
|
||||
%IFDEF GLIDE_ALT_TAB
|
||||
test gc, gc
|
||||
je .dlDone
|
||||
; mov edx, [gc + windowed]
|
||||
; test edx, 1
|
||||
; jnz .pastContextTest
|
||||
mov edx, DWORD [gc+lostContext]
|
||||
mov ecx, [edx]
|
||||
test ecx, 1
|
||||
jnz .dlDone
|
||||
;.pastContextTest:
|
||||
%ENDIF
|
||||
|
||||
sub curT, eax ; curT = maxT - minT
|
||||
mov fifo, [gc + fifoPtr] ; fifoPtr
|
||||
|
||||
mov curS, [esp + _maxS$] ; curS = maxS
|
||||
add curT, 1 ; curT = maxT - minT + 1
|
||||
|
||||
mov edx, curS ; curS = maxS = scanline width in DWORDs
|
||||
movd xmm3,[esp + _baseAddr$] ; 0 | 0 | 0 | address of texture to download
|
||||
|
||||
shl curS, 2 ; scan line width (in bytes)
|
||||
mov eax, [esp + _minT$] ; 0 | minT
|
||||
|
||||
mov [esp + _maxS$], curS ; save scan line width (in bytes)
|
||||
shl edx, 3 ; packetHdr<21:3> = maxS = scanline width in DWORDs
|
||||
|
||||
imul eax, curS ; TEX_ROW_ADDR_INCR(minT) = minT * TEX_ROW_ADDR_INCR(1)
|
||||
|
||||
movd xmm2,curS ; 0 | 0 | 0 | TEX_ROW_ADDR_INCR(1)
|
||||
or edx, 00000005h ; packetHdr<31:30> = lfb port
|
||||
; packetHdr<21:3> = maxS
|
||||
; packetHdr<2:0> = packetType 5
|
||||
|
||||
movd xmm1,edx ; 0 | 0 | 0 | packetHdr
|
||||
movd xmm4,eax ; 0 | 0 | 0 | TEX_ROW_ADDR_INCR(minT)
|
||||
|
||||
psllq xmm2,32 ; 0 | 0 | TEX_ROW_ADDR_INCR(1) | 0
|
||||
paddd xmm3,xmm4 ; 0 | 0 | 0 | texAddr = texBaseAddr + TEX_ROW_ADDR_INCR(minT)
|
||||
|
||||
mov fRoom, [gc + fifoRoom] ; get available fifoRoom (in bytes)
|
||||
punpckldq xmm1,xmm3 ; 0 | 0 | hdr2 = texAddr | hdr1 = packetHdr
|
||||
|
||||
;; ebx = curT, edi = dataPtr, esi = gc, ebp = fifo, ecx = curS = maxS
|
||||
;; edx = fifoRoom, xmm1 = texAddr|packetHdr, xmm2 = TEX_ROW_ADDR_INCR(1)|0
|
||||
|
||||
test fifo, 4 ; is fifo QWORD aligned ?
|
||||
jz .startDownload ; yup, start texture download
|
||||
|
||||
cmp fRoom, 4 ; enough room for NULL packet in fifo?
|
||||
jge .xmmAlignFifo ; yes, write NULL packet to align fifo
|
||||
|
||||
%ifdef USE_PACKET_FIFO
|
||||
_grCommandTransportMakeRoom 4, 0, __LINE__; make fifo room
|
||||
%endif
|
||||
|
||||
mov fifo, [gc + fifoPtr] ; fifoPtr modified by _grCommandTransportMakeRoom, reload
|
||||
|
||||
mov fRoom, [gc + fifoRoom] ; fifoRoom modified by _grCommandTransportMakeRoom, reload
|
||||
mov curS, [esp + _maxS$] ; reload maxS (destroyed by call to _grCommandTransportMakeRoom)
|
||||
|
||||
test fifo, 4 ; new fifoPtr QWORD aligned ?
|
||||
jz .startDownload ; yup, start texture download
|
||||
|
||||
.xmmAlignFifo:
|
||||
|
||||
mov DWORD [fifo], 0 ; write NULL packet
|
||||
sub fRoom, 4 ; fifoRoom -= 4
|
||||
|
||||
mov [gc + fifoRoom], fRoom ; store new fifoRoom
|
||||
add fifo, 4 ; fifoPtr += 4
|
||||
|
||||
%IFDEF GLIDE_DEBUG
|
||||
mov [gc + checkPtr], fifo ; checkPtr
|
||||
%ENDIF
|
||||
|
||||
mov [gc + fifoPtr], fifo ; store new fifoPtr
|
||||
jmp .startDownload ; fifo aligned, download texture now
|
||||
|
||||
align 32
|
||||
|
||||
;; ebx = curT, edi = dataPtr, esi = gc, ebp = fifo, ecx = maxS = curS
|
||||
;; edx=fifoRoom, xmm1 = texAddr|packetHdr, xmm2 = TEX_ROW_ADDR_INCR(1)|0
|
||||
|
||||
.loopT:
|
||||
|
||||
%IFDEF GLIDE_DEBUG
|
||||
|
||||
;; Make sure that we have a QWORD aligned fifoPtr; force GP if not aligned
|
||||
|
||||
test fifo, 4 ; is fifoPtr QWORD aligned ?
|
||||
jz .alignmentOK ; yup, continue
|
||||
|
||||
xor eax, eax ; create 0
|
||||
mov [eax], eax ; move to DS:[0] forces GP
|
||||
.alignmentOK:
|
||||
%ENDIF ; GLIDE_DEBUG
|
||||
|
||||
;; Compute packet header words
|
||||
;; hdr1: downloadSpace[31:30] numWords[21:3] packetType[2:0]
|
||||
;; hdr2: download address[29:0]
|
||||
|
||||
movq [fifo],xmm1 ; store hdr2 | hdr1
|
||||
add fifo, 8 ; increment fifo ptr (hdr1 + hdr2)
|
||||
|
||||
;; S coordinate inner loop unrolled for 8 texels a write
|
||||
|
||||
.loopS:
|
||||
|
||||
movdqu xmm0, [dataPtr] ; load 128 bit data (8 texels) ; isn't 16 bytes aligned?
|
||||
add fifo, 16 ; pre-increment fifoPtr += 4 * sizeof(FxU32)
|
||||
|
||||
add dataPtr, 16 ; dataPtr += 4 * sizeof(FxU32)
|
||||
sub curS, 16 ; curS -= 4 * sizeof(FxU32)
|
||||
|
||||
movdqu [fifo - 16], xmm0 ; *fifoPtr = texelData[128 bits] ; isn't 16 bytes aligned?
|
||||
jnz .loopS ; loop while curS > 0
|
||||
|
||||
mov ecx, [gc + fifoPtr] ; old fifo ptr
|
||||
nop ; filler
|
||||
|
||||
mov eax, fifo ; new fifo ptr
|
||||
mov [gc + fifoPtr], fifo ; save new fifo ptr
|
||||
|
||||
%IFDEF GLIDE_DEBUG
|
||||
mov [gc + checkPtr], fifo ; checkPtr
|
||||
%ENDIF
|
||||
|
||||
sub eax, ecx ; new fifo ptr - old fifo ptr = fifo space used up
|
||||
mov curS, [esp + _maxS$] ; curS = maxS = width of scanline (bytes)
|
||||
|
||||
sub fRoom, eax ; new fifo space available = old fifo space available - fifo space used up = new fifo space available
|
||||
sub curT, 1 ; curT--
|
||||
|
||||
mov [gc + fifoRoom], fRoom ; save new fifo space available
|
||||
jz .dlDone ; loop while curT > 0
|
||||
|
||||
;; Check for room to write the next texture scanline
|
||||
|
||||
;; ebx = curT, edi = dataPtr, esi = gc, ebp = fifo
|
||||
;; edx = fifoRoom, xmm1 = texAddr|packetHdr, xmm2 = TEX_ROW_ADDR_INCR(1)|0
|
||||
|
||||
paddd xmm1,xmm2 ; 0 | 0 | texAddr+=TEX_ROW_ADDR_INCR(1) | packetHdr
|
||||
mov esp, esp ; filler
|
||||
.startDownload:
|
||||
lea eax, [curS+8] ; fifo space needed = scan line width + header size
|
||||
|
||||
cmp fRoom, eax ; fifo space available >= fifo space required ?
|
||||
jge .loopT ; yup, write next scan line
|
||||
|
||||
%ifdef USE_PACKET_FIFO
|
||||
_grCommandTransportMakeRoom eax, 0, __LINE__; make fifo room (if fifoPtr QWORD aligned before
|
||||
%endif
|
||||
|
||||
mov fifo, [gc + fifoPtr] ; fifoPtr was modified by _grCommandTransportMakeRoom, reload
|
||||
|
||||
mov fRoom, [gc + fifoRoom] ; fifoRoom was modified by _grCommandTransportMakeRoom, reload
|
||||
mov curS, [esp + _maxS$] ; curS = maxS = width of scanline (bytes)
|
||||
jmp .loopT ; we now have enough fifo room, write next scanline
|
||||
|
||||
.dlDone:
|
||||
pop ebp ; restore caller's register variable
|
||||
pop edi ; restore caller's register variable
|
||||
|
||||
pop esi ; restore caller's register variable
|
||||
pop ebx ; restore caller's register variable
|
||||
|
||||
ret ; pop 6 DWORD parameters and return
|
||||
endp
|
||||
|
||||
|
||||
%ENDIF ; GL_SSE2
|
||||
|
||||
Reference in New Issue
Block a user