This commit is contained in:
2026-03-27 01:43:19 +01:00
parent 4253416778
commit d360e9684e

View File

@@ -1,83 +1,14 @@
;; THIS SOFTWARE IS SUBJECT TO COPYRIGHT PROTECTION AND IS OFFERED ONLY ;; Linux-safe MMX/3DNow texture download path for cvg Glide 2
;; PURSUANT TO THE 3DFX GLIDE GENERAL PUBLIC LICENSE. THERE IS NO RIGHT ;; Rewritten to avoid:
;; TO USE THE GLIDE TRADEMARK WITHOUT PRIOR WRITTEN PERMISSION OF 3DFX ;; - MMX state surviving across C calls to _FifoMakeRoom()
;; INTERACTIVE, INC. A COPY OF THIS LICENSE MAY BE OBTAINED FROM THE ;; - 64-bit MMX stores directly into FIFO/MMIO
;; DISTRIBUTOR OR BY CONTACTING 3DFX INTERACTIVE INC(info@3dfx.com). ;; - QWORD-alignment dependency for the FIFO path
;; THIS PROGRAM IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER
;; EXPRESSED OR IMPLIED. SEE THE 3DFX GLIDE GENERAL PUBLIC LICENSE FOR A
;; FULL TEXT OF THE NON-WARRANTY PROVISIONS.
;;
;; USE, DUPLICATION OR DISCLOSURE BY THE GOVERNMENT IS SUBJECT TO
;; RESTRICTIONS AS SET FORTH IN SUBDIVISION (C)(1)(II) OF THE RIGHTS IN
;; TECHNICAL DATA AND COMPUTER SOFTWARE CLAUSE AT DFARS 252.227-7013,
;; AND/OR IN SIMILAR OR SUCCESSOR CLAUSES IN THE FAR, DOD OR NASA FAR
;; SUPPLEMENT. UNPUBLISHED RIGHTS RESERVED UNDER THE COPYRIGHT LAWS OF
;; THE UNITED STATES.
;;
;; COPYRIGHT 3DFX INTERACTIVE, INC. 1999, ALL RIGHTS RESERVED
;; ;;
;; $Header$ ;; Functional goal matches the historical wide-S MMX path:
;; $Revision$ ;; - one packet header per scanline
;; $Log$ ;; - texel data transferred 8 bytes at a time
;; Revision 1.1.2.2 2005/04/23 18:36:36 koolsmoky ;; - MMX used for source loads only
;; fixed 3dnow! and mmx optimizations ;; - FIFO written with 32-bit stores only
;;
;; Revision 1.1.2.1 2004/12/23 20:45:56 koolsmoky
;; converted to nasm syntax
;; added x86 asm, 3dnow! triangle and mmx, 3dnow! texture download optimizations
;;
;; Revision 1.1.1.1.8.1 2003/11/03 13:34:30 dborca
;; Voodoo2 happiness (DJGPP & Linux)
;;
;; Revision 1.1.8.7 2003/09/12 05:08:35 koolsmoky
;; preparing for graphic context checks
;;
;; Revision 1.1.8.6 2003/07/07 23:29:06 koolsmoky
;; cleaned logs
;;
;;
;; Revision 1.1 2000/06/15 00:27:43 joseph
;; Initial checkin into SourceForge.
;;
;; 10 8/17/99 6:35p Atai
;; fixed amd debug mode
;;
;; 9 4/08/99 1:22p Atai
;; added contect check for _grTexDownload_3DNow_MMX
;;
;; 8 3/19/99 11:26a Peter
;; expose direct fifo for gl
;;
;; 7 2/02/99 4:36p Peter
;; download through lfb rather than texture port
;;
;; 6 12/17/98 2:36p Atai
;; check in Norbert's fix for texture download width correction
;;
;; 5 12/07/98 11:33a Peter
;; norbert's re-fixes of my merge
;;
;; 4 11/02/98 5:34p Atai
;; merge direct i/o code
;;
;; 3 10/20/98 5:34p Atai
;; added #ifdefs for hwc
;;
;; 2 10/14/98 12:05p Peter
;; fixed my effed up assumption about non-volatile regs
;;
;; 1 10/09/98 6:48p Peter
;; 3DNow!(tm) version of wide texture downloads
;;
;; 3 10/07/98 9:43p Peter
;; triangle procs for 3DNow!(tm)
;;
;; 2 10/05/98 7:43p Peter
;; 3DNow!(tm) happiness everywhere
;;
;; 1 10/05/98 6:01p Peter
;; mmx stuff for 3DNow!(tm) capable processors
;;
%include "xos.inc" %include "xos.inc"
@@ -89,40 +20,51 @@ extrn _FifoMakeRoom, 12
push %1 push %1
call _FifoMakeRoom call _FifoMakeRoom
add esp, 12 add esp, 12
%ENDMACRO ; _grCommandTransportMakeRoom %ENDMACRO
;;; Definitions of cvg regs and glide root structures.
%INCLUDE "fxgasm.h" %INCLUDE "fxgasm.h"
; Arguments (STKOFF = 16 from 4 dword pushes) ; Stack layout after:
STACKOFFSET equ 16 ; push ebx
_gc$ equ 4 + STACKOFFSET ; push esi
_baseAddr$ equ 8 + STACKOFFSET ; push edi
_maxS$ equ 12 + STACKOFFSET ; push ebp
_minT$ equ 16 + STACKOFFSET ; sub esp, 8
_maxT$ equ 20 + STACKOFFSET
_texData$ equ 24 + STACKOFFSET
;; NB: The first set of registers (eax, ecx, and edx) are volatile across
;; function calls. The remaining registers are supposedly non-volatile
;; so they only store things that are non-volatile across the call.
%define fifo ebp ; fifo ptr in inner loop
%define gc esi ; graphics context
%define dataPtr edi ; pointer to exture data to be downloaded
%define curT ebx ; counter for texture scan lines (t-coordinate)
%define curS ecx ; texture s-coordinate
%define fRoom edx ; room available in fifo (in bytes)
;--------------------------------------------------------------------------
; ;
; GL_AMD3D, GL_MMX ; esp+00 : local rowAddrDiff
; ; esp+04 : local strideBytes
;-------------------------------------------------------------------------- ; esp+08 : saved ebp
; esp+12 : saved edi
; esp+16 : saved esi
; esp+20 : saved ebx
; esp+24 : return address
; esp+28 : gc
; esp+32 : baseAddr
; esp+36 : maxS
; esp+40 : minT
; esp+44 : maxT
; esp+48 : texData
segment TEXT LOCAL_rowAddrDiff$ equ 0
LOCAL_strideBytes$ equ 4
STACKOFFSET equ 24
_gc$ equ 4 + STACKOFFSET
_baseAddr$ equ 8 + STACKOFFSET
_maxS$ equ 12 + STACKOFFSET
_minT$ equ 16 + STACKOFFSET
_maxT$ equ 20 + STACKOFFSET
_texData$ equ 24 + STACKOFFSET
ALIGN 32 %define fifo ebp
%define gc esi
%define dataPtr edi
%define curT ebx
%define curS ecx
%define fRoom edx
segment TEXT
ALIGN 32
%IFDEF GL_AMD3D %IFDEF GL_AMD3D
proc _grTexDownload_3DNow_MMX, 24 proc _grTexDownload_3DNow_MMX, 24
@@ -131,219 +73,118 @@ proc _grTexDownload_3DNow_MMX, 24
proc _grTexDownload_MMX, 24 proc _grTexDownload_MMX, 24
%ENDIF %ENDIF
push ebx ; save caller's register variable push ebx
mov curT, [esp + _maxT$ - 12] ; curT = maxT push esi
push edi
push ebp
sub esp, 8
push esi ; save caller's register variable mov gc, [esp + _gc$]
mov eax, [esp + _minT$ - 8] ; minT mov dataPtr, [esp + _texData$]
push edi ; save caller's register variable
mov gc, [esp + _gc$ - 4] ; gc
push ebp ; save caller's register variable
mov dataPtr, [esp + _texData$]; dataPtr
%IFDEF GLIDE_ALT_TAB %IFDEF GLIDE_ALT_TAB
test gc, gc test gc, gc
je .dlDone je .dlDone
; mov edx, [gc + windowed] mov eax, DWORD [gc + lostContext]
; test edx, 1 mov eax, [eax]
; jnz .pastContextTest test eax, 1
mov edx, DWORD [gc+lostContext]
mov ecx, [edx]
test ecx, 1
jnz .dlDone jnz .dlDone
;.pastContextTest:
%ENDIF %ENDIF
sub curT, eax ; curT = maxT - minT mov eax, [esp + _maxT$]
mov fifo, [gc + fifoPtr] ; fifoPtr mov curT, eax
mov eax, [esp + _minT$]
sub curT, eax
add curT, 1
mov curS, [esp + _maxS$] ; curS = maxS = scanline width in DWORDs mov curS, [esp + _maxS$]
add curT, 1 ; curT = maxT - minT + 1 shl curS, 2 ; stride in bytes = maxS * 4
mov [esp + LOCAL_strideBytes$], curS
mov eax, [esp + _baseAddr$]
mov fRoom, [esp + _minT$]
shl fRoom, 9 ; TEX_ROW_ADDR_INCR(minT)
add eax, fRoom
sub eax, [gc + tex_ptr]
mov [esp + LOCAL_rowAddrDiff$], eax
mov fifo, [gc + fifoPtr]
mov fRoom, [gc + fifoRoom]
.rowStart:
mov curS, [esp + LOCAL_strideBytes$]
lea eax, [curS + 8] ; header + payload bytes needed
cmp fRoom, eax
jge .rowWrite
%IFDEF GL_AMD3D %IFDEF GL_AMD3D
femms ; we'll use MMX/3DNow!, make sure FPU register cleared femms ; leave MMX/3DNow state before C call
%ENDIF %ENDIF
%IFDEF GL_MMX %IFDEF GL_MMX
; emms ; we'll use MMX emms ; leave MMX state before C call
%ENDIF %ENDIF
movd mm3, [esp + _baseAddr$] ; 0 | address of texture to download
add curS, curS ;
add curS, curS ; scan line width (in bytes)
mov eax, [esp + _minT$] ; 0 | minT
mov [esp + _maxS$], curS ; save scan line width (in bytes)
mov edx, curS ;
add edx, edx ; packetHdr<21:3> = maxS = scanline width in DWORDs
or edx, 0xc0000005 ; packetHdr<31:30> = texture port
; packetHdr<21:3> = maxS
; packetHdr<2:0> = packetType 5
movd mm1, edx ; 0 | packetHdr
movd mm2, eax ; 0 | minT
psllq mm2, 9 ; 0 | TEX_ROW_ADDR_INCR(minT) = minT << 9
paddd mm3, mm2 ; 0 | texAddr = texBaseAddr + TEX_ROW_ADDR_INCR(minT)
movd mm2, [gc + tex_ptr] ; 0 | gc->tex_ptr
psubd mm3, mm2 ; 0 | texAddr - gc->tex_ptr
mov eax, 0x200 ; TEX_ROW_ADDR_INCR(1) = 1 << 9
movd mm2, eax ; 0 | TEX_ROW_ADDR_INCR(1)
psllq mm2, 32 ; TEX_ROW_ADDR_INCR(1) | 0
mov fRoom, [gc + fifoRoom] ; get available fifoRoom (in bytes)
punpckldq mm1, mm3 ; hdr2 = texAddr - gc->tex_ptr | hdr1 = packetHdr
;; ebx = curT, edi = dataPtr, esi = gc, ebp = fifo, ecx = curS = maxS
;; edx = fifoRoom, mm1 = texAddr-gc->tex_ptr|packetHdr, mm2 = TEX_ROW_ADDR_INCR(1)|0
test fifo, 4 ; is fifo QWORD aligned ?
jz .startDownload ; yup, start texture download
cmp fRoom, 4 ; enough room for NULL packet in fifo?
jge .mmxAlignFifo ; yes, write NULL packet to align fifo
%ifdef USE_PACKET_FIFO %ifdef USE_PACKET_FIFO
sub esp, 16 ; reserve temp space for mm1/mm2 _grCommandTransportMakeRoom eax, 0, __LINE__
movq [esp], mm1 ; save mm1
movq [esp + 8], mm2 ; save mm2
_grCommandTransportMakeRoom 4, 0, __LINE__ ; make fifo room
movq mm1, [esp] ; restore mm1
movq mm2, [esp + 8] ; restore mm2
add esp, 16 ; release temp space
%endif %endif
mov fifo, [gc + fifoPtr]
mov fifo, [gc + fifoPtr] ; fifoPtr modified by _grCommandTransportMakeRoom, reload mov fRoom, [gc + fifoRoom]
jmp .rowStart
mov fRoom, [gc + fifoRoom] ; fifoRoom modified by _grCommandTransportMakeRoom, reload ALIGN 32
mov curS, [esp + _maxS$] ; reload maxS (destroyed by call to _grCommandTransportMakeRoom) .rowWrite:
; packet header word 1: packet type / download space / numWords field
mov eax, curS
add eax, eax ; bytes * 2 == (dwords << 3)
or eax, 0xc0000005
mov [fifo], eax
test fifo, 4 ; new fifoPtr QWORD aligned ? ; packet header word 2: destination texture address relative to tex_ptr
jz .startDownload ; yup, start texture download mov eax, [esp + LOCAL_rowAddrDiff$]
mov [fifo + 4], eax
add fifo, 8
.mmxAlignFifo: ; Write scanline payload.
; MMX is used only for the source load. FIFO writes remain 32-bit.
.dataLoop:
movq mm0, [dataPtr]
movd eax, mm0
psrlq mm0, 32
mov [fifo], eax
movd eax, mm0
mov [fifo + 4], eax
mov DWORD [fifo], 0 ; write NULL packet add dataPtr, 8
sub fRoom, 4 ; fifoRoom -= 4 add fifo, 8
sub curS, 8
mov [gc + fifoRoom], fRoom ; store new fifoRoom jnz .dataLoop
add fifo, 4 ; fifoPtr += 4
mov eax, [esp + LOCAL_strideBytes$]
add eax, 8
sub fRoom, eax
mov [gc + fifoPtr], fifo
mov [gc + fifoRoom], fRoom
%IFDEF GLIDE_DEBUG %IFDEF GLIDE_DEBUG
mov [gc + checkPtr], fifo ; checkPtr mov [gc + checkPtr], fifo
%ENDIF %ENDIF
mov [gc + fifoPtr], fifo ; store new fifoPtr dec curT
jmp .startDownload ; fifo aligned, download texture now jz .dlDone
ALIGN 32 add DWORD [esp + LOCAL_rowAddrDiff$], 0200h
jmp .rowStart
;; ebx = curT, edi = dataPtr, esi = gc, ebp = fifo, ecx = maxS = curS .dlDone:
;; edx=fifoRoom, mm1 = texAddr-gc->tex_ptr|packetHdr, mm2 = TEX_ROW_ADDR_INCR(1)|0
.loopT:
%IFDEF GLIDE_DEBUG
;; Make sure that we have a QWORD aligned fifoPtr; force GP if not aligned
test fifo, 4 ; is fifoPtr QWORD aligned ?
jz .alignmentOK ; yup, continue
xor eax, eax ; create 0
mov [eax], eax ; move to DS:[0] forces GP
.alignmentOK:
%ENDIF ; GLIDE_DEBUG
;; Compute packet header words
;; hdr1: downloadSpace[31:30] numWords[21:3] packetType[2:0]
;; hdr2: download address[29:0]
movq [fifo], mm1 ; store hdr2 | hdr1
add fifo, 8 ; increment fifo ptr (hdr1 + hdr2)
;; S coordinate inner loop unrolled for 8 texels a write
.loopS:
movq mm0, [dataPtr] ; load 64 bit data (8 texels)
add fifo, 8 ; pre-increment fifoPtr += 2 * sizeof(FxU32)
add dataPtr, 8 ; dataPtr += 2 * sizeof(FxU32)
sub curS, 8 ; curS -= 2 * sizeof(FxU32)
movq [fifo - 8], mm0 ; *fifoPtr = texelData[64 bits]
jnz .loopS ; loop while curS > 0
mov ecx, [gc + fifoPtr] ; old fifo ptr
nop ; filler
mov eax, fifo ; new fifo ptr
mov [gc + fifoPtr], fifo ; save new fifo ptr
%IFDEF GLIDE_DEBUG
mov [gc + checkPtr], fifo ; checkPtr
%ENDIF
sub eax, ecx ; new fifo ptr - old fifo ptr = fifo space used up
mov curS, [esp + _maxS$] ; curS = maxS = width of scanline (bytes)
sub fRoom, eax ; new fifo space available = old fifo space available - fifo space used up = new fifo space available
sub curT, 1 ; curT--
mov [gc + fifoRoom], fRoom ; save new fifo space available
jz .dlDone ; loop while curT > 0
;; Check for room to write the next texture scanline
;; ebx = curT, edi = dataPtr, esi = gc, ebp = fifo
;; edx = fifoRoom, mm1 = texAddr|packetHdr, mm2 = TEX_ROW_ADDR_INCR(1)|0
paddd mm1, mm2 ; texAddr+=TEX_ROW_ADDR_INCR(1) | packetHdr
mov esp, esp ; filler
.startDownload:
lea eax, [curS+8] ; fifo space needed = scan line width + header size
cmp fRoom, eax ; fifo space available >= fifo space required ?
jge .loopT ; yup, write next scan line
%ifdef USE_PACKET_FIFO
sub esp, 16 ; reserve temp space for mm1/mm2
movq [esp], mm1 ; save mm1
movq [esp + 8], mm2 ; save mm2
_grCommandTransportMakeRoom eax, 0, __LINE__ ; make fifo room
movq mm1, [esp] ; restore mm1
movq mm2, [esp + 8] ; restore mm2
add esp, 16 ; release temp space
%endif
mov fifo, [gc + fifoPtr] ; fifoPtr was modified by _grCommandTransportMakeRoom, reload
mov fRoom, [gc + fifoRoom] ; fifoRoom was modified by _grCommandTransportMakeRoom, reload
mov curS, [esp + _maxS$] ; curS = maxS = width of scanline (bytes)
jmp .loopT ; we now have enough fifo room, write next scanline
.dlDone:
%IFDEF GL_AMD3D %IFDEF GL_AMD3D
femms ; exit 3DNow!(tm) state femms
%ENDIF %ENDIF
%IFDEF GL_MMX %IFDEF GL_MMX
emms ; exit MMX state emms
%ENDIF %ENDIF
pop ebp ; restore caller's register variable add esp, 8
pop edi ; restore caller's register variable pop ebp
pop edi
pop esi ; restore caller's register variable pop esi
pop ebx ; restore caller's register variable pop ebx
ret
ret ; pop 6 DWORD parameters and return
endp endp