From d360e9684efd99dfc7107979e73c8d3c55af9dc9 Mon Sep 17 00:00:00 2001 From: chacha Date: Fri, 27 Mar 2026 01:43:19 +0100 Subject: [PATCH] test --- glide2x/cvg/glide/src/xtexdl.asm | 423 ++++++++++--------------------- 1 file changed, 132 insertions(+), 291 deletions(-) diff --git a/glide2x/cvg/glide/src/xtexdl.asm b/glide2x/cvg/glide/src/xtexdl.asm index 6484f14..1c35305 100644 --- a/glide2x/cvg/glide/src/xtexdl.asm +++ b/glide2x/cvg/glide/src/xtexdl.asm @@ -1,83 +1,14 @@ -;; THIS SOFTWARE IS SUBJECT TO COPYRIGHT PROTECTION AND IS OFFERED ONLY -;; PURSUANT TO THE 3DFX GLIDE GENERAL PUBLIC LICENSE. THERE IS NO RIGHT -;; TO USE THE GLIDE TRADEMARK WITHOUT PRIOR WRITTEN PERMISSION OF 3DFX -;; INTERACTIVE, INC. A COPY OF THIS LICENSE MAY BE OBTAINED FROM THE -;; DISTRIBUTOR OR BY CONTACTING 3DFX INTERACTIVE INC(info@3dfx.com). -;; THIS PROGRAM IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER -;; EXPRESSED OR IMPLIED. SEE THE 3DFX GLIDE GENERAL PUBLIC LICENSE FOR A -;; FULL TEXT OF THE NON-WARRANTY PROVISIONS. -;; -;; USE, DUPLICATION OR DISCLOSURE BY THE GOVERNMENT IS SUBJECT TO -;; RESTRICTIONS AS SET FORTH IN SUBDIVISION (C)(1)(II) OF THE RIGHTS IN -;; TECHNICAL DATA AND COMPUTER SOFTWARE CLAUSE AT DFARS 252.227-7013, -;; AND/OR IN SIMILAR OR SUCCESSOR CLAUSES IN THE FAR, DOD OR NASA FAR -;; SUPPLEMENT. UNPUBLISHED RIGHTS RESERVED UNDER THE COPYRIGHT LAWS OF -;; THE UNITED STATES. -;; -;; COPYRIGHT 3DFX INTERACTIVE, INC. 1999, ALL RIGHTS RESERVED +;; Linux-safe MMX/3DNow texture download path for cvg Glide 2 +;; Rewritten to avoid: +;; - MMX state surviving across C calls to _FifoMakeRoom() +;; - 64-bit MMX stores directly into FIFO/MMIO +;; - QWORD-alignment dependency for the FIFO path ;; -;; $Header$ -;; $Revision$ -;; $Log$ -;; Revision 1.1.2.2 2005/04/23 18:36:36 koolsmoky -;; fixed 3dnow! and mmx optimizations -;; -;; Revision 1.1.2.1 2004/12/23 20:45:56 koolsmoky -;; converted to nasm syntax -;; added x86 asm, 3dnow! triangle and mmx, 3dnow! texture download optimizations -;; -;; Revision 1.1.1.1.8.1 2003/11/03 13:34:30 dborca -;; Voodoo2 happiness (DJGPP & Linux) -;; -;; Revision 1.1.8.7 2003/09/12 05:08:35 koolsmoky -;; preparing for graphic context checks -;; -;; Revision 1.1.8.6 2003/07/07 23:29:06 koolsmoky -;; cleaned logs -;; -;; -;; Revision 1.1 2000/06/15 00:27:43 joseph -;; Initial checkin into SourceForge. -;; -;; 10 8/17/99 6:35p Atai -;; fixed amd debug mode -;; -;; 9 4/08/99 1:22p Atai -;; added contect check for _grTexDownload_3DNow_MMX -;; -;; 8 3/19/99 11:26a Peter -;; expose direct fifo for gl -;; -;; 7 2/02/99 4:36p Peter -;; download through lfb rather than texture port -;; -;; 6 12/17/98 2:36p Atai -;; check in Norbert's fix for texture download width correction -;; -;; 5 12/07/98 11:33a Peter -;; norbert's re-fixes of my merge -;; -;; 4 11/02/98 5:34p Atai -;; merge direct i/o code -;; -;; 3 10/20/98 5:34p Atai -;; added #ifdefs for hwc -;; -;; 2 10/14/98 12:05p Peter -;; fixed my effed up assumption about non-volatile regs -;; -;; 1 10/09/98 6:48p Peter -;; 3DNow!(tm) version of wide texture downloads -;; -;; 3 10/07/98 9:43p Peter -;; triangle procs for 3DNow!(tm) -;; -;; 2 10/05/98 7:43p Peter -;; 3DNow!(tm) happiness everywhere -;; -;; 1 10/05/98 6:01p Peter -;; mmx stuff for 3DNow!(tm) capable processors -;; +;; Functional goal matches the historical wide-S MMX path: +;; - one packet header per scanline +;; - texel data transferred 8 bytes at a time +;; - MMX used for source loads only +;; - FIFO written with 32-bit stores only %include "xos.inc" @@ -89,40 +20,51 @@ extrn _FifoMakeRoom, 12 push %1 call _FifoMakeRoom add esp, 12 -%ENDMACRO ; _grCommandTransportMakeRoom +%ENDMACRO -;;; Definitions of cvg regs and glide root structures. %INCLUDE "fxgasm.h" -; Arguments (STKOFF = 16 from 4 dword pushes) -STACKOFFSET equ 16 -_gc$ equ 4 + STACKOFFSET -_baseAddr$ equ 8 + STACKOFFSET -_maxS$ equ 12 + STACKOFFSET -_minT$ equ 16 + STACKOFFSET -_maxT$ equ 20 + STACKOFFSET -_texData$ equ 24 + STACKOFFSET - - ;; NB: The first set of registers (eax, ecx, and edx) are volatile across - ;; function calls. The remaining registers are supposedly non-volatile - ;; so they only store things that are non-volatile across the call. - -%define fifo ebp ; fifo ptr in inner loop -%define gc esi ; graphics context -%define dataPtr edi ; pointer to exture data to be downloaded -%define curT ebx ; counter for texture scan lines (t-coordinate) -%define curS ecx ; texture s-coordinate -%define fRoom edx ; room available in fifo (in bytes) - -;-------------------------------------------------------------------------- +; Stack layout after: +; push ebx +; push esi +; push edi +; push ebp +; sub esp, 8 ; -; GL_AMD3D, GL_MMX -; -;-------------------------------------------------------------------------- +; esp+00 : local rowAddrDiff +; esp+04 : local strideBytes +; esp+08 : saved ebp +; esp+12 : saved edi +; esp+16 : saved esi +; esp+20 : saved ebx +; esp+24 : return address +; esp+28 : gc +; esp+32 : baseAddr +; esp+36 : maxS +; esp+40 : minT +; esp+44 : maxT +; esp+48 : texData -segment TEXT +LOCAL_rowAddrDiff$ equ 0 +LOCAL_strideBytes$ equ 4 +STACKOFFSET equ 24 +_gc$ equ 4 + STACKOFFSET +_baseAddr$ equ 8 + STACKOFFSET +_maxS$ equ 12 + STACKOFFSET +_minT$ equ 16 + STACKOFFSET +_maxT$ equ 20 + STACKOFFSET +_texData$ equ 24 + STACKOFFSET - ALIGN 32 +%define fifo ebp +%define gc esi +%define dataPtr edi +%define curT ebx +%define curS ecx +%define fRoom edx + +segment TEXT + + ALIGN 32 %IFDEF GL_AMD3D proc _grTexDownload_3DNow_MMX, 24 @@ -131,219 +73,118 @@ proc _grTexDownload_3DNow_MMX, 24 proc _grTexDownload_MMX, 24 %ENDIF - push ebx ; save caller's register variable - mov curT, [esp + _maxT$ - 12] ; curT = maxT + push ebx + push esi + push edi + push ebp + sub esp, 8 - push esi ; save caller's register variable - mov eax, [esp + _minT$ - 8] ; minT - - push edi ; save caller's register variable - mov gc, [esp + _gc$ - 4] ; gc - - push ebp ; save caller's register variable - mov dataPtr, [esp + _texData$]; dataPtr + mov gc, [esp + _gc$] + mov dataPtr, [esp + _texData$] %IFDEF GLIDE_ALT_TAB test gc, gc je .dlDone -; mov edx, [gc + windowed] -; test edx, 1 -; jnz .pastContextTest - mov edx, DWORD [gc+lostContext] - mov ecx, [edx] - test ecx, 1 + mov eax, DWORD [gc + lostContext] + mov eax, [eax] + test eax, 1 jnz .dlDone -;.pastContextTest: %ENDIF - sub curT, eax ; curT = maxT - minT - mov fifo, [gc + fifoPtr] ; fifoPtr + mov eax, [esp + _maxT$] + mov curT, eax + mov eax, [esp + _minT$] + sub curT, eax + add curT, 1 - mov curS, [esp + _maxS$] ; curS = maxS = scanline width in DWORDs - add curT, 1 ; curT = maxT - minT + 1 + mov curS, [esp + _maxS$] + shl curS, 2 ; stride in bytes = maxS * 4 + mov [esp + LOCAL_strideBytes$], curS + + mov eax, [esp + _baseAddr$] + mov fRoom, [esp + _minT$] + shl fRoom, 9 ; TEX_ROW_ADDR_INCR(minT) + add eax, fRoom + sub eax, [gc + tex_ptr] + mov [esp + LOCAL_rowAddrDiff$], eax + + mov fifo, [gc + fifoPtr] + mov fRoom, [gc + fifoRoom] + +.rowStart: + mov curS, [esp + LOCAL_strideBytes$] + lea eax, [curS + 8] ; header + payload bytes needed + cmp fRoom, eax + jge .rowWrite %IFDEF GL_AMD3D - femms ; we'll use MMX/3DNow!, make sure FPU register cleared + femms ; leave MMX/3DNow state before C call %ENDIF %IFDEF GL_MMX -; emms ; we'll use MMX + emms ; leave MMX state before C call %ENDIF - - movd mm3, [esp + _baseAddr$] ; 0 | address of texture to download - - add curS, curS ; - add curS, curS ; scan line width (in bytes) - mov eax, [esp + _minT$] ; 0 | minT - - mov [esp + _maxS$], curS ; save scan line width (in bytes) - mov edx, curS ; - - add edx, edx ; packetHdr<21:3> = maxS = scanline width in DWORDs - - or edx, 0xc0000005 ; packetHdr<31:30> = texture port - ; packetHdr<21:3> = maxS - ; packetHdr<2:0> = packetType 5 - - movd mm1, edx ; 0 | packetHdr - movd mm2, eax ; 0 | minT - psllq mm2, 9 ; 0 | TEX_ROW_ADDR_INCR(minT) = minT << 9 - - paddd mm3, mm2 ; 0 | texAddr = texBaseAddr + TEX_ROW_ADDR_INCR(minT) - movd mm2, [gc + tex_ptr] ; 0 | gc->tex_ptr - psubd mm3, mm2 ; 0 | texAddr - gc->tex_ptr - mov eax, 0x200 ; TEX_ROW_ADDR_INCR(1) = 1 << 9 - movd mm2, eax ; 0 | TEX_ROW_ADDR_INCR(1) - psllq mm2, 32 ; TEX_ROW_ADDR_INCR(1) | 0 - - mov fRoom, [gc + fifoRoom] ; get available fifoRoom (in bytes) - punpckldq mm1, mm3 ; hdr2 = texAddr - gc->tex_ptr | hdr1 = packetHdr - - ;; ebx = curT, edi = dataPtr, esi = gc, ebp = fifo, ecx = curS = maxS - ;; edx = fifoRoom, mm1 = texAddr-gc->tex_ptr|packetHdr, mm2 = TEX_ROW_ADDR_INCR(1)|0 - - test fifo, 4 ; is fifo QWORD aligned ? - jz .startDownload ; yup, start texture download - - cmp fRoom, 4 ; enough room for NULL packet in fifo? - jge .mmxAlignFifo ; yes, write NULL packet to align fifo - %ifdef USE_PACKET_FIFO - sub esp, 16 ; reserve temp space for mm1/mm2 - movq [esp], mm1 ; save mm1 - movq [esp + 8], mm2 ; save mm2 - - _grCommandTransportMakeRoom 4, 0, __LINE__ ; make fifo room - - movq mm1, [esp] ; restore mm1 - movq mm2, [esp + 8] ; restore mm2 - add esp, 16 ; release temp space + _grCommandTransportMakeRoom eax, 0, __LINE__ %endif - - mov fifo, [gc + fifoPtr] ; fifoPtr modified by _grCommandTransportMakeRoom, reload + mov fifo, [gc + fifoPtr] + mov fRoom, [gc + fifoRoom] + jmp .rowStart - mov fRoom, [gc + fifoRoom] ; fifoRoom modified by _grCommandTransportMakeRoom, reload - mov curS, [esp + _maxS$] ; reload maxS (destroyed by call to _grCommandTransportMakeRoom) + ALIGN 32 +.rowWrite: + ; packet header word 1: packet type / download space / numWords field + mov eax, curS + add eax, eax ; bytes * 2 == (dwords << 3) + or eax, 0xc0000005 + mov [fifo], eax - test fifo, 4 ; new fifoPtr QWORD aligned ? - jz .startDownload ; yup, start texture download + ; packet header word 2: destination texture address relative to tex_ptr + mov eax, [esp + LOCAL_rowAddrDiff$] + mov [fifo + 4], eax + add fifo, 8 -.mmxAlignFifo: + ; Write scanline payload. + ; MMX is used only for the source load. FIFO writes remain 32-bit. +.dataLoop: + movq mm0, [dataPtr] + movd eax, mm0 + psrlq mm0, 32 + mov [fifo], eax + movd eax, mm0 + mov [fifo + 4], eax - mov DWORD [fifo], 0 ; write NULL packet - sub fRoom, 4 ; fifoRoom -= 4 - - mov [gc + fifoRoom], fRoom ; store new fifoRoom - add fifo, 4 ; fifoPtr += 4 + add dataPtr, 8 + add fifo, 8 + sub curS, 8 + jnz .dataLoop + mov eax, [esp + LOCAL_strideBytes$] + add eax, 8 + sub fRoom, eax + mov [gc + fifoPtr], fifo + mov [gc + fifoRoom], fRoom %IFDEF GLIDE_DEBUG - mov [gc + checkPtr], fifo ; checkPtr + mov [gc + checkPtr], fifo %ENDIF - mov [gc + fifoPtr], fifo ; store new fifoPtr - jmp .startDownload ; fifo aligned, download texture now + dec curT + jz .dlDone - ALIGN 32 + add DWORD [esp + LOCAL_rowAddrDiff$], 0200h + jmp .rowStart - ;; ebx = curT, edi = dataPtr, esi = gc, ebp = fifo, ecx = maxS = curS - ;; edx=fifoRoom, mm1 = texAddr-gc->tex_ptr|packetHdr, mm2 = TEX_ROW_ADDR_INCR(1)|0 - -.loopT: - -%IFDEF GLIDE_DEBUG - - ;; Make sure that we have a QWORD aligned fifoPtr; force GP if not aligned - - test fifo, 4 ; is fifoPtr QWORD aligned ? - jz .alignmentOK ; yup, continue - - xor eax, eax ; create 0 - mov [eax], eax ; move to DS:[0] forces GP -.alignmentOK: -%ENDIF ; GLIDE_DEBUG - - ;; Compute packet header words - ;; hdr1: downloadSpace[31:30] numWords[21:3] packetType[2:0] - ;; hdr2: download address[29:0] - - movq [fifo], mm1 ; store hdr2 | hdr1 - add fifo, 8 ; increment fifo ptr (hdr1 + hdr2) - - ;; S coordinate inner loop unrolled for 8 texels a write - -.loopS: - - movq mm0, [dataPtr] ; load 64 bit data (8 texels) - add fifo, 8 ; pre-increment fifoPtr += 2 * sizeof(FxU32) - - add dataPtr, 8 ; dataPtr += 2 * sizeof(FxU32) - sub curS, 8 ; curS -= 2 * sizeof(FxU32) - - movq [fifo - 8], mm0 ; *fifoPtr = texelData[64 bits] - jnz .loopS ; loop while curS > 0 - - mov ecx, [gc + fifoPtr] ; old fifo ptr - nop ; filler - - mov eax, fifo ; new fifo ptr - mov [gc + fifoPtr], fifo ; save new fifo ptr - -%IFDEF GLIDE_DEBUG - mov [gc + checkPtr], fifo ; checkPtr -%ENDIF - - sub eax, ecx ; new fifo ptr - old fifo ptr = fifo space used up - mov curS, [esp + _maxS$] ; curS = maxS = width of scanline (bytes) - - sub fRoom, eax ; new fifo space available = old fifo space available - fifo space used up = new fifo space available - sub curT, 1 ; curT-- - - mov [gc + fifoRoom], fRoom ; save new fifo space available - jz .dlDone ; loop while curT > 0 - - ;; Check for room to write the next texture scanline - - ;; ebx = curT, edi = dataPtr, esi = gc, ebp = fifo - ;; edx = fifoRoom, mm1 = texAddr|packetHdr, mm2 = TEX_ROW_ADDR_INCR(1)|0 - - paddd mm1, mm2 ; texAddr+=TEX_ROW_ADDR_INCR(1) | packetHdr - mov esp, esp ; filler -.startDownload: - lea eax, [curS+8] ; fifo space needed = scan line width + header size - - cmp fRoom, eax ; fifo space available >= fifo space required ? - jge .loopT ; yup, write next scan line - -%ifdef USE_PACKET_FIFO - sub esp, 16 ; reserve temp space for mm1/mm2 - movq [esp], mm1 ; save mm1 - movq [esp + 8], mm2 ; save mm2 - - _grCommandTransportMakeRoom eax, 0, __LINE__ ; make fifo room - - movq mm1, [esp] ; restore mm1 - movq mm2, [esp + 8] ; restore mm2 - add esp, 16 ; release temp space -%endif - - mov fifo, [gc + fifoPtr] ; fifoPtr was modified by _grCommandTransportMakeRoom, reload - - mov fRoom, [gc + fifoRoom] ; fifoRoom was modified by _grCommandTransportMakeRoom, reload - mov curS, [esp + _maxS$] ; curS = maxS = width of scanline (bytes) - jmp .loopT ; we now have enough fifo room, write next scanline - -.dlDone: +.dlDone: %IFDEF GL_AMD3D - femms ; exit 3DNow!(tm) state + femms %ENDIF %IFDEF GL_MMX - emms ; exit MMX state + emms %ENDIF - pop ebp ; restore caller's register variable - pop edi ; restore caller's register variable - - pop esi ; restore caller's register variable - pop ebx ; restore caller's register variable - - ret ; pop 6 DWORD parameters and return + add esp, 8 + pop ebp + pop edi + pop esi + pop ebx + ret endp