Skip to content

Commit

Permalink
#407 32X geometry transform optimization
Browse files Browse the repository at this point in the history
  • Loading branch information
XProger committed Apr 23, 2022
1 parent e821f23 commit 93058da
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 37 deletions.
20 changes: 9 additions & 11 deletions src/platform/32x/asm/transformMesh.s
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ _transformMesh_asm:
add #CLIP_NEAR, vg
.clip_z_far:
bf/s .project
mov z, dz // dz = z (delay slot)
mov z, dz // [delay slot] dz = z
mov maxZ, z
add #CLIP_FAR, vg

Expand All @@ -121,19 +121,17 @@ _transformMesh_asm:
shll dz
mov.w @(dz, divLUT), dz

add #-M03, m // reset matrix ptr

// x = x * dz >> (16 - PROJ_SHIFT)
// x = x * dz >> 12
// y = y * dz >> 12
muls.w dz, x
sts MACL, x
shll2 x
shll2 x
shlr16 x
exts.w x, x

// y = y * dz >> (16 - PROJ_SHIFT)
add #-M03, m // reset matrix ptr
muls.w dz, y
shll2 x
shll2 x
shlr16 x
sts MACL, y
exts.w x, x
shll2 y
shll2 y
shlr16 y
Expand All @@ -151,7 +149,7 @@ _transformMesh_asm:
shll2 tmp // tmp = 80 * 4 = 320 = FRAME_WIDTH
cmp/hi tmp, x
bt/s .clip_frame
add #-96, tmp // tmp = 320 - 96 = 224 = FRAME_HEIGHT (delay slot)
add #-96, tmp // [delay slot] tmp = 320 - 96 = 224 = FRAME_HEIGHT
.clip_frame_y: // 0 < y > FRAME_HEIGHT
cmp/hi tmp, y
.clip_frame:
Expand Down
42 changes: 16 additions & 26 deletions src/platform/32x/asm/transformRoom.s
Original file line number Diff line number Diff line change
Expand Up @@ -29,17 +29,6 @@ SEG_TRANS

#define SP_SIZE (18 + 6) // mat3x3 + vec3

.macro transform v, offset
lds \offset, MACL
mac.w @stackVtx+, @stackMtx+
mac.w @stackVtx+, @stackMtx+
mac.w @stackVtx+, @stackMtx+
add #-6, stackVtx
sts MACL, \v
shlr8 \v
exts.w \v, \v
.endm

.align 4
.global _transformRoom_asm
_transformRoom_asm:
Expand Down Expand Up @@ -82,13 +71,14 @@ _transformRoom_asm:
shll8 mz

add #8, res // extra offset for @-Rn
nop

.loop:
// unpack vertex
mov.b @vertices+, x
mov.b @vertices+, y
mov.b @vertices+, z

shll2 x
shll2 y
shll2 z
Expand All @@ -98,19 +88,19 @@ _transformRoom_asm:
add #6, stackVtx
mov stackVtx, stackMtx

//shll16 x
//xtrct y, x
mov.w x, @-stackVtx
mov.w y, @-stackVtx
mov.w z, @-stackVtx

// transform to view space
//transform z, mz

//transform z
lds mz, MACL
mac.w @stackVtx+, @stackMtx+
mac.w @stackVtx+, @stackMtx+
mac.w @stackVtx+, @stackMtx+
add #-6, stackVtx
sts MACL, z
add #-6, stackVtx
shlr8 z
exts.w z, z

Expand All @@ -126,7 +116,7 @@ _transformRoom_asm:
// check if z in [-VIEW_OFF..VIEW_MAX + VIEW_OFF]
cmp/hi maxZ, tmp
bf/s .visible
mov #40, maxZ // maxZ = 40 (delay slot)
mov #40, maxZ // [delay slot] maxZ = 40
mov #(CLIP_NEAR + CLIP_FAR), vg
mov.w vg, @-res
add #1, vertices
Expand All @@ -137,24 +127,23 @@ _transformRoom_asm:
nop

.visible:
//transform y, my
//transform y
lds my, MACL
mac.w @stackVtx+, @stackMtx+
mac.w @stackVtx+, @stackMtx+
mac.w @stackVtx+, @stackMtx+
add #-6, stackVtx
sts MACL, y
add #-6, stackVtx
shlr8 y
exts.w y, y


//transform x, mx
//transform x
lds mx, MACL
mac.w @stackVtx+, @stackMtx+
mac.w @stackVtx+, @stackMtx+
mac.w @stackVtx+, @stackMtx+
shll8 maxZ // maxZ = VIEW_MAX = (1024 * 10) = (40 << 8)
sts MACL, x
shll8 maxZ // maxZ = VIEW_MAX = (1024 * 10) = (40 << 8)
shlr8 x
exts.w x, x

Expand Down Expand Up @@ -183,7 +172,7 @@ _transformRoom_asm:
mov #VIEW_MIN, minZ // minZ = VIEW_MIN = 64
cmp/gt z, minZ
bf/s .clip_z_far
shll8 vg // clear lower 8-bits of vg for clipping flags (delay slot)
shll8 vg // [delay slot] clear lower 8-bits of vg for clipping flags
mov minZ, z
add #CLIP_NEAR, vg
.clip_z_far:
Expand All @@ -205,8 +194,9 @@ _transformRoom_asm:

.proj_y: // y = y * dz >> 12
muls.w dz, y
shar12 x, tmp // do it here to hide muls.w latency
sts MACL, y

shar12 x, tmp
shar12 y, tmp

// portal rect clipping
Expand All @@ -229,7 +219,7 @@ _transformRoom_asm:
.clip_vp_maxY:
cmp/ge maxY, y
bf/s .apply_offset
mov #80, tmp // tmp = 80 (delay slot)
mov #80, tmp // [delay slot] tmp = 80
add #CLIP_BOTTOM, vg

.apply_offset:
Expand All @@ -244,7 +234,7 @@ _transformRoom_asm:
shll2 tmp // tmp = 80 * 4 = 320 = FRAME_WIDTH
cmp/hi tmp, x
bt/s .clip_frame
add #-96, tmp // tmp = 320 - 96 = 224 = FRAME_HEIGHT (delay slot)
add #-96, tmp // [delay slot] tmp = 320 - 96 = 224 = FRAME_HEIGHT
.clip_frame_y: // 0 < y > FRAME_HEIGHT
cmp/hi tmp, y
.clip_frame:
Expand Down

0 comments on commit 93058da

Please sign in to comment.