test
This commit is contained in:
@@ -1,83 +1,14 @@
|
||||
;; THIS SOFTWARE IS SUBJECT TO COPYRIGHT PROTECTION AND IS OFFERED ONLY
|
||||
;; PURSUANT TO THE 3DFX GLIDE GENERAL PUBLIC LICENSE. THERE IS NO RIGHT
|
||||
;; TO USE THE GLIDE TRADEMARK WITHOUT PRIOR WRITTEN PERMISSION OF 3DFX
|
||||
;; INTERACTIVE, INC. A COPY OF THIS LICENSE MAY BE OBTAINED FROM THE
|
||||
;; DISTRIBUTOR OR BY CONTACTING 3DFX INTERACTIVE INC(info@3dfx.com).
|
||||
;; THIS PROGRAM IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER
|
||||
;; EXPRESSED OR IMPLIED. SEE THE 3DFX GLIDE GENERAL PUBLIC LICENSE FOR A
|
||||
;; FULL TEXT OF THE NON-WARRANTY PROVISIONS.
|
||||
;;
|
||||
;; USE, DUPLICATION OR DISCLOSURE BY THE GOVERNMENT IS SUBJECT TO
|
||||
;; RESTRICTIONS AS SET FORTH IN SUBDIVISION (C)(1)(II) OF THE RIGHTS IN
|
||||
;; TECHNICAL DATA AND COMPUTER SOFTWARE CLAUSE AT DFARS 252.227-7013,
|
||||
;; AND/OR IN SIMILAR OR SUCCESSOR CLAUSES IN THE FAR, DOD OR NASA FAR
|
||||
;; SUPPLEMENT. UNPUBLISHED RIGHTS RESERVED UNDER THE COPYRIGHT LAWS OF
|
||||
;; THE UNITED STATES.
|
||||
;;
|
||||
;; COPYRIGHT 3DFX INTERACTIVE, INC. 1999, ALL RIGHTS RESERVED
|
||||
;; Linux-safe MMX/3DNow texture download path for cvg Glide 2
|
||||
;; Rewritten to avoid:
|
||||
;; - MMX state surviving across C calls to _FifoMakeRoom()
|
||||
;; - 64-bit MMX stores directly into FIFO/MMIO
|
||||
;; - QWORD-alignment dependency for the FIFO path
|
||||
;;
|
||||
;; $Header$
|
||||
;; $Revision$
|
||||
;; $Log$
|
||||
;; Revision 1.1.2.2 2005/04/23 18:36:36 koolsmoky
|
||||
;; fixed 3dnow! and mmx optimizations
|
||||
;;
|
||||
;; Revision 1.1.2.1 2004/12/23 20:45:56 koolsmoky
|
||||
;; converted to nasm syntax
|
||||
;; added x86 asm, 3dnow! triangle and mmx, 3dnow! texture download optimizations
|
||||
;;
|
||||
;; Revision 1.1.1.1.8.1 2003/11/03 13:34:30 dborca
|
||||
;; Voodoo2 happiness (DJGPP & Linux)
|
||||
;;
|
||||
;; Revision 1.1.8.7 2003/09/12 05:08:35 koolsmoky
|
||||
;; preparing for graphic context checks
|
||||
;;
|
||||
;; Revision 1.1.8.6 2003/07/07 23:29:06 koolsmoky
|
||||
;; cleaned logs
|
||||
;;
|
||||
;;
|
||||
;; Revision 1.1 2000/06/15 00:27:43 joseph
|
||||
;; Initial checkin into SourceForge.
|
||||
;;
|
||||
;; 10 8/17/99 6:35p Atai
|
||||
;; fixed amd debug mode
|
||||
;;
|
||||
;; 9 4/08/99 1:22p Atai
|
||||
;; added contect check for _grTexDownload_3DNow_MMX
|
||||
;;
|
||||
;; 8 3/19/99 11:26a Peter
|
||||
;; expose direct fifo for gl
|
||||
;;
|
||||
;; 7 2/02/99 4:36p Peter
|
||||
;; download through lfb rather than texture port
|
||||
;;
|
||||
;; 6 12/17/98 2:36p Atai
|
||||
;; check in Norbert's fix for texture download width correction
|
||||
;;
|
||||
;; 5 12/07/98 11:33a Peter
|
||||
;; norbert's re-fixes of my merge
|
||||
;;
|
||||
;; 4 11/02/98 5:34p Atai
|
||||
;; merge direct i/o code
|
||||
;;
|
||||
;; 3 10/20/98 5:34p Atai
|
||||
;; added #ifdefs for hwc
|
||||
;;
|
||||
;; 2 10/14/98 12:05p Peter
|
||||
;; fixed my effed up assumption about non-volatile regs
|
||||
;;
|
||||
;; 1 10/09/98 6:48p Peter
|
||||
;; 3DNow!(tm) version of wide texture downloads
|
||||
;;
|
||||
;; 3 10/07/98 9:43p Peter
|
||||
;; triangle procs for 3DNow!(tm)
|
||||
;;
|
||||
;; 2 10/05/98 7:43p Peter
|
||||
;; 3DNow!(tm) happiness everywhere
|
||||
;;
|
||||
;; 1 10/05/98 6:01p Peter
|
||||
;; mmx stuff for 3DNow!(tm) capable processors
|
||||
;;
|
||||
;; Functional goal matches the historical wide-S MMX path:
|
||||
;; - one packet header per scanline
|
||||
;; - texel data transferred 8 bytes at a time
|
||||
;; - MMX used for source loads only
|
||||
;; - FIFO written with 32-bit stores only
|
||||
|
||||
%include "xos.inc"
|
||||
|
||||
@@ -89,40 +20,51 @@ extrn _FifoMakeRoom, 12
|
||||
push %1
|
||||
call _FifoMakeRoom
|
||||
add esp, 12
|
||||
%ENDMACRO ; _grCommandTransportMakeRoom
|
||||
%ENDMACRO
|
||||
|
||||
;;; Definitions of cvg regs and glide root structures.
|
||||
%INCLUDE "fxgasm.h"
|
||||
|
||||
; Arguments (STKOFF = 16 from 4 dword pushes)
|
||||
STACKOFFSET equ 16
|
||||
_gc$ equ 4 + STACKOFFSET
|
||||
_baseAddr$ equ 8 + STACKOFFSET
|
||||
_maxS$ equ 12 + STACKOFFSET
|
||||
_minT$ equ 16 + STACKOFFSET
|
||||
_maxT$ equ 20 + STACKOFFSET
|
||||
_texData$ equ 24 + STACKOFFSET
|
||||
|
||||
;; NB: The first set of registers (eax, ecx, and edx) are volatile across
|
||||
;; function calls. The remaining registers are supposedly non-volatile
|
||||
;; so they only store things that are non-volatile across the call.
|
||||
|
||||
%define fifo ebp ; fifo ptr in inner loop
|
||||
%define gc esi ; graphics context
|
||||
%define dataPtr edi ; pointer to exture data to be downloaded
|
||||
%define curT ebx ; counter for texture scan lines (t-coordinate)
|
||||
%define curS ecx ; texture s-coordinate
|
||||
%define fRoom edx ; room available in fifo (in bytes)
|
||||
|
||||
;--------------------------------------------------------------------------
|
||||
; Stack layout after:
|
||||
; push ebx
|
||||
; push esi
|
||||
; push edi
|
||||
; push ebp
|
||||
; sub esp, 8
|
||||
;
|
||||
; GL_AMD3D, GL_MMX
|
||||
;
|
||||
;--------------------------------------------------------------------------
|
||||
; esp+00 : local rowAddrDiff
|
||||
; esp+04 : local strideBytes
|
||||
; esp+08 : saved ebp
|
||||
; esp+12 : saved edi
|
||||
; esp+16 : saved esi
|
||||
; esp+20 : saved ebx
|
||||
; esp+24 : return address
|
||||
; esp+28 : gc
|
||||
; esp+32 : baseAddr
|
||||
; esp+36 : maxS
|
||||
; esp+40 : minT
|
||||
; esp+44 : maxT
|
||||
; esp+48 : texData
|
||||
|
||||
segment TEXT
|
||||
LOCAL_rowAddrDiff$ equ 0
|
||||
LOCAL_strideBytes$ equ 4
|
||||
STACKOFFSET equ 24
|
||||
_gc$ equ 4 + STACKOFFSET
|
||||
_baseAddr$ equ 8 + STACKOFFSET
|
||||
_maxS$ equ 12 + STACKOFFSET
|
||||
_minT$ equ 16 + STACKOFFSET
|
||||
_maxT$ equ 20 + STACKOFFSET
|
||||
_texData$ equ 24 + STACKOFFSET
|
||||
|
||||
ALIGN 32
|
||||
%define fifo ebp
|
||||
%define gc esi
|
||||
%define dataPtr edi
|
||||
%define curT ebx
|
||||
%define curS ecx
|
||||
%define fRoom edx
|
||||
|
||||
segment TEXT
|
||||
|
||||
ALIGN 32
|
||||
|
||||
%IFDEF GL_AMD3D
|
||||
proc _grTexDownload_3DNow_MMX, 24
|
||||
@@ -131,219 +73,118 @@ proc _grTexDownload_3DNow_MMX, 24
|
||||
proc _grTexDownload_MMX, 24
|
||||
%ENDIF
|
||||
|
||||
push ebx ; save caller's register variable
|
||||
mov curT, [esp + _maxT$ - 12] ; curT = maxT
|
||||
push ebx
|
||||
push esi
|
||||
push edi
|
||||
push ebp
|
||||
sub esp, 8
|
||||
|
||||
push esi ; save caller's register variable
|
||||
mov eax, [esp + _minT$ - 8] ; minT
|
||||
|
||||
push edi ; save caller's register variable
|
||||
mov gc, [esp + _gc$ - 4] ; gc
|
||||
|
||||
push ebp ; save caller's register variable
|
||||
mov dataPtr, [esp + _texData$]; dataPtr
|
||||
mov gc, [esp + _gc$]
|
||||
mov dataPtr, [esp + _texData$]
|
||||
|
||||
%IFDEF GLIDE_ALT_TAB
|
||||
test gc, gc
|
||||
je .dlDone
|
||||
; mov edx, [gc + windowed]
|
||||
; test edx, 1
|
||||
; jnz .pastContextTest
|
||||
mov edx, DWORD [gc+lostContext]
|
||||
mov ecx, [edx]
|
||||
test ecx, 1
|
||||
mov eax, DWORD [gc + lostContext]
|
||||
mov eax, [eax]
|
||||
test eax, 1
|
||||
jnz .dlDone
|
||||
;.pastContextTest:
|
||||
%ENDIF
|
||||
|
||||
sub curT, eax ; curT = maxT - minT
|
||||
mov fifo, [gc + fifoPtr] ; fifoPtr
|
||||
mov eax, [esp + _maxT$]
|
||||
mov curT, eax
|
||||
mov eax, [esp + _minT$]
|
||||
sub curT, eax
|
||||
add curT, 1
|
||||
|
||||
mov curS, [esp + _maxS$] ; curS = maxS = scanline width in DWORDs
|
||||
add curT, 1 ; curT = maxT - minT + 1
|
||||
mov curS, [esp + _maxS$]
|
||||
shl curS, 2 ; stride in bytes = maxS * 4
|
||||
mov [esp + LOCAL_strideBytes$], curS
|
||||
|
||||
mov eax, [esp + _baseAddr$]
|
||||
mov fRoom, [esp + _minT$]
|
||||
shl fRoom, 9 ; TEX_ROW_ADDR_INCR(minT)
|
||||
add eax, fRoom
|
||||
sub eax, [gc + tex_ptr]
|
||||
mov [esp + LOCAL_rowAddrDiff$], eax
|
||||
|
||||
mov fifo, [gc + fifoPtr]
|
||||
mov fRoom, [gc + fifoRoom]
|
||||
|
||||
.rowStart:
|
||||
mov curS, [esp + LOCAL_strideBytes$]
|
||||
lea eax, [curS + 8] ; header + payload bytes needed
|
||||
cmp fRoom, eax
|
||||
jge .rowWrite
|
||||
|
||||
%IFDEF GL_AMD3D
|
||||
femms ; we'll use MMX/3DNow!, make sure FPU register cleared
|
||||
femms ; leave MMX/3DNow state before C call
|
||||
%ENDIF
|
||||
%IFDEF GL_MMX
|
||||
; emms ; we'll use MMX
|
||||
emms ; leave MMX state before C call
|
||||
%ENDIF
|
||||
|
||||
movd mm3, [esp + _baseAddr$] ; 0 | address of texture to download
|
||||
|
||||
add curS, curS ;
|
||||
add curS, curS ; scan line width (in bytes)
|
||||
mov eax, [esp + _minT$] ; 0 | minT
|
||||
|
||||
mov [esp + _maxS$], curS ; save scan line width (in bytes)
|
||||
mov edx, curS ;
|
||||
|
||||
add edx, edx ; packetHdr<21:3> = maxS = scanline width in DWORDs
|
||||
|
||||
or edx, 0xc0000005 ; packetHdr<31:30> = texture port
|
||||
; packetHdr<21:3> = maxS
|
||||
; packetHdr<2:0> = packetType 5
|
||||
|
||||
movd mm1, edx ; 0 | packetHdr
|
||||
movd mm2, eax ; 0 | minT
|
||||
psllq mm2, 9 ; 0 | TEX_ROW_ADDR_INCR(minT) = minT << 9
|
||||
|
||||
paddd mm3, mm2 ; 0 | texAddr = texBaseAddr + TEX_ROW_ADDR_INCR(minT)
|
||||
movd mm2, [gc + tex_ptr] ; 0 | gc->tex_ptr
|
||||
psubd mm3, mm2 ; 0 | texAddr - gc->tex_ptr
|
||||
mov eax, 0x200 ; TEX_ROW_ADDR_INCR(1) = 1 << 9
|
||||
movd mm2, eax ; 0 | TEX_ROW_ADDR_INCR(1)
|
||||
psllq mm2, 32 ; TEX_ROW_ADDR_INCR(1) | 0
|
||||
|
||||
mov fRoom, [gc + fifoRoom] ; get available fifoRoom (in bytes)
|
||||
punpckldq mm1, mm3 ; hdr2 = texAddr - gc->tex_ptr | hdr1 = packetHdr
|
||||
|
||||
;; ebx = curT, edi = dataPtr, esi = gc, ebp = fifo, ecx = curS = maxS
|
||||
;; edx = fifoRoom, mm1 = texAddr-gc->tex_ptr|packetHdr, mm2 = TEX_ROW_ADDR_INCR(1)|0
|
||||
|
||||
test fifo, 4 ; is fifo QWORD aligned ?
|
||||
jz .startDownload ; yup, start texture download
|
||||
|
||||
cmp fRoom, 4 ; enough room for NULL packet in fifo?
|
||||
jge .mmxAlignFifo ; yes, write NULL packet to align fifo
|
||||
|
||||
%ifdef USE_PACKET_FIFO
|
||||
sub esp, 16 ; reserve temp space for mm1/mm2
|
||||
movq [esp], mm1 ; save mm1
|
||||
movq [esp + 8], mm2 ; save mm2
|
||||
|
||||
_grCommandTransportMakeRoom 4, 0, __LINE__ ; make fifo room
|
||||
|
||||
movq mm1, [esp] ; restore mm1
|
||||
movq mm2, [esp + 8] ; restore mm2
|
||||
add esp, 16 ; release temp space
|
||||
_grCommandTransportMakeRoom eax, 0, __LINE__
|
||||
%endif
|
||||
|
||||
mov fifo, [gc + fifoPtr] ; fifoPtr modified by _grCommandTransportMakeRoom, reload
|
||||
mov fifo, [gc + fifoPtr]
|
||||
mov fRoom, [gc + fifoRoom]
|
||||
jmp .rowStart
|
||||
|
||||
mov fRoom, [gc + fifoRoom] ; fifoRoom modified by _grCommandTransportMakeRoom, reload
|
||||
mov curS, [esp + _maxS$] ; reload maxS (destroyed by call to _grCommandTransportMakeRoom)
|
||||
ALIGN 32
|
||||
.rowWrite:
|
||||
; packet header word 1: packet type / download space / numWords field
|
||||
mov eax, curS
|
||||
add eax, eax ; bytes * 2 == (dwords << 3)
|
||||
or eax, 0xc0000005
|
||||
mov [fifo], eax
|
||||
|
||||
test fifo, 4 ; new fifoPtr QWORD aligned ?
|
||||
jz .startDownload ; yup, start texture download
|
||||
; packet header word 2: destination texture address relative to tex_ptr
|
||||
mov eax, [esp + LOCAL_rowAddrDiff$]
|
||||
mov [fifo + 4], eax
|
||||
add fifo, 8
|
||||
|
||||
.mmxAlignFifo:
|
||||
; Write scanline payload.
|
||||
; MMX is used only for the source load. FIFO writes remain 32-bit.
|
||||
.dataLoop:
|
||||
movq mm0, [dataPtr]
|
||||
movd eax, mm0
|
||||
psrlq mm0, 32
|
||||
mov [fifo], eax
|
||||
movd eax, mm0
|
||||
mov [fifo + 4], eax
|
||||
|
||||
mov DWORD [fifo], 0 ; write NULL packet
|
||||
sub fRoom, 4 ; fifoRoom -= 4
|
||||
|
||||
mov [gc + fifoRoom], fRoom ; store new fifoRoom
|
||||
add fifo, 4 ; fifoPtr += 4
|
||||
add dataPtr, 8
|
||||
add fifo, 8
|
||||
sub curS, 8
|
||||
jnz .dataLoop
|
||||
|
||||
mov eax, [esp + LOCAL_strideBytes$]
|
||||
add eax, 8
|
||||
sub fRoom, eax
|
||||
mov [gc + fifoPtr], fifo
|
||||
mov [gc + fifoRoom], fRoom
|
||||
%IFDEF GLIDE_DEBUG
|
||||
mov [gc + checkPtr], fifo ; checkPtr
|
||||
mov [gc + checkPtr], fifo
|
||||
%ENDIF
|
||||
|
||||
mov [gc + fifoPtr], fifo ; store new fifoPtr
|
||||
jmp .startDownload ; fifo aligned, download texture now
|
||||
dec curT
|
||||
jz .dlDone
|
||||
|
||||
ALIGN 32
|
||||
add DWORD [esp + LOCAL_rowAddrDiff$], 0200h
|
||||
jmp .rowStart
|
||||
|
||||
;; ebx = curT, edi = dataPtr, esi = gc, ebp = fifo, ecx = maxS = curS
|
||||
;; edx=fifoRoom, mm1 = texAddr-gc->tex_ptr|packetHdr, mm2 = TEX_ROW_ADDR_INCR(1)|0
|
||||
|
||||
.loopT:
|
||||
|
||||
%IFDEF GLIDE_DEBUG
|
||||
|
||||
;; Make sure that we have a QWORD aligned fifoPtr; force GP if not aligned
|
||||
|
||||
test fifo, 4 ; is fifoPtr QWORD aligned ?
|
||||
jz .alignmentOK ; yup, continue
|
||||
|
||||
xor eax, eax ; create 0
|
||||
mov [eax], eax ; move to DS:[0] forces GP
|
||||
.alignmentOK:
|
||||
%ENDIF ; GLIDE_DEBUG
|
||||
|
||||
;; Compute packet header words
|
||||
;; hdr1: downloadSpace[31:30] numWords[21:3] packetType[2:0]
|
||||
;; hdr2: download address[29:0]
|
||||
|
||||
movq [fifo], mm1 ; store hdr2 | hdr1
|
||||
add fifo, 8 ; increment fifo ptr (hdr1 + hdr2)
|
||||
|
||||
;; S coordinate inner loop unrolled for 8 texels a write
|
||||
|
||||
.loopS:
|
||||
|
||||
movq mm0, [dataPtr] ; load 64 bit data (8 texels)
|
||||
add fifo, 8 ; pre-increment fifoPtr += 2 * sizeof(FxU32)
|
||||
|
||||
add dataPtr, 8 ; dataPtr += 2 * sizeof(FxU32)
|
||||
sub curS, 8 ; curS -= 2 * sizeof(FxU32)
|
||||
|
||||
movq [fifo - 8], mm0 ; *fifoPtr = texelData[64 bits]
|
||||
jnz .loopS ; loop while curS > 0
|
||||
|
||||
mov ecx, [gc + fifoPtr] ; old fifo ptr
|
||||
nop ; filler
|
||||
|
||||
mov eax, fifo ; new fifo ptr
|
||||
mov [gc + fifoPtr], fifo ; save new fifo ptr
|
||||
|
||||
%IFDEF GLIDE_DEBUG
|
||||
mov [gc + checkPtr], fifo ; checkPtr
|
||||
%ENDIF
|
||||
|
||||
sub eax, ecx ; new fifo ptr - old fifo ptr = fifo space used up
|
||||
mov curS, [esp + _maxS$] ; curS = maxS = width of scanline (bytes)
|
||||
|
||||
sub fRoom, eax ; new fifo space available = old fifo space available - fifo space used up = new fifo space available
|
||||
sub curT, 1 ; curT--
|
||||
|
||||
mov [gc + fifoRoom], fRoom ; save new fifo space available
|
||||
jz .dlDone ; loop while curT > 0
|
||||
|
||||
;; Check for room to write the next texture scanline
|
||||
|
||||
;; ebx = curT, edi = dataPtr, esi = gc, ebp = fifo
|
||||
;; edx = fifoRoom, mm1 = texAddr|packetHdr, mm2 = TEX_ROW_ADDR_INCR(1)|0
|
||||
|
||||
paddd mm1, mm2 ; texAddr+=TEX_ROW_ADDR_INCR(1) | packetHdr
|
||||
mov esp, esp ; filler
|
||||
.startDownload:
|
||||
lea eax, [curS+8] ; fifo space needed = scan line width + header size
|
||||
|
||||
cmp fRoom, eax ; fifo space available >= fifo space required ?
|
||||
jge .loopT ; yup, write next scan line
|
||||
|
||||
%ifdef USE_PACKET_FIFO
|
||||
sub esp, 16 ; reserve temp space for mm1/mm2
|
||||
movq [esp], mm1 ; save mm1
|
||||
movq [esp + 8], mm2 ; save mm2
|
||||
|
||||
_grCommandTransportMakeRoom eax, 0, __LINE__ ; make fifo room
|
||||
|
||||
movq mm1, [esp] ; restore mm1
|
||||
movq mm2, [esp + 8] ; restore mm2
|
||||
add esp, 16 ; release temp space
|
||||
%endif
|
||||
|
||||
mov fifo, [gc + fifoPtr] ; fifoPtr was modified by _grCommandTransportMakeRoom, reload
|
||||
|
||||
mov fRoom, [gc + fifoRoom] ; fifoRoom was modified by _grCommandTransportMakeRoom, reload
|
||||
mov curS, [esp + _maxS$] ; curS = maxS = width of scanline (bytes)
|
||||
jmp .loopT ; we now have enough fifo room, write next scanline
|
||||
|
||||
.dlDone:
|
||||
.dlDone:
|
||||
%IFDEF GL_AMD3D
|
||||
femms ; exit 3DNow!(tm) state
|
||||
femms
|
||||
%ENDIF
|
||||
%IFDEF GL_MMX
|
||||
emms ; exit MMX state
|
||||
emms
|
||||
%ENDIF
|
||||
|
||||
pop ebp ; restore caller's register variable
|
||||
pop edi ; restore caller's register variable
|
||||
|
||||
pop esi ; restore caller's register variable
|
||||
pop ebx ; restore caller's register variable
|
||||
|
||||
ret ; pop 6 DWORD parameters and return
|
||||
add esp, 8
|
||||
pop ebp
|
||||
pop edi
|
||||
pop esi
|
||||
pop ebx
|
||||
ret
|
||||
endp
|
||||
|
||||
Reference in New Issue
Block a user