Created
July 10, 2018 01:13
-
-
Save devinacker/b3658c6c671bdf3abf58da33f15ef284 to your computer and use it in GitHub Desktop.
WIP (sorta) Exomizer decruncher for SNES / 65c816 (with ca65)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.include "libSFX.i" | |
.feature force_range | |
.export Decrunch | |
/* | |
Exomizer (raw mode) decruncher for 65c816 / ca65. | |
by Devin Acker (Revenant/RSE), 2018 | |
For use on SNES, and theoretically other 65c816-based platforms. | |
Currently requires libSFX for some convenience macros and stuff, but it | |
will probably be made more assembler-agnostic (or at least to support | |
stock ca65) at some point. | |
Unlike some other implementations, this one provides its own read/write | |
calls internally, instead of requiring you to export your own, since it is | |
assumed everything will just involve ROM/RAM instead of external I/O. | |
To use, just import and JSL to Decrunch with the following register values: | |
Input: X = destination address | |
AL = destination address (bank) | |
Y = source address | |
AH = source address (bank) | |
Output: X = decrunched size | |
See the defines/comments below for some fine tuning options. | |
Current assembled code size: | |
With default options: 272 bytes | |
Without literal support: 250 bytes | |
*/ | |
; comment out to use stack memory instead of a fixed buffer in RAM. | |
; using stack memory makes the decrunch routine reentrant, and doesn't require | |
; having a buffer available at a fixed address all the time. | |
; using a page-aligned scratchpad provides a speed advantage, though. | |
EXO_SCRATCHPAD = RPAD | |
; set to 1 to inline reads/writes. makes the code faster, but slightly larger | |
EXO_INLINE_RW = 0 | |
; set to 1 to disable literal support. makes the decrunch code smaller, | |
; if you don't need literals in your crunched data | |
EXO_NO_LITERALS = 0 | |
;------------------------------------------------------------------------------- | |
.code | |
TablesBits = 0 | |
LengthsBits = TablesBits | |
Offsets3Bits = LengthsBits+16*2 | |
Offsets2Bits = Offsets3Bits+16*2 | |
Offsets1Bits = Offsets2Bits+16*2 | |
TablesBase = Offsets1Bits+4*2 | |
LengthsBase = TablesBase | |
Offsets3Base = LengthsBase+16*2 | |
Offsets2Base = Offsets3Base+16*2 | |
Offsets1Base = Offsets2Base+16*2 | |
SequenceLen = Offsets1Base+4*2 | |
BitBuffer = SequenceLen+2 | |
SourceAddr = BitBuffer+2 | |
DestAddr = SourceAddr+3 | |
OrigDest = DestAddr+2 | |
LocalsSize = OrigDest+2 | |
; for measuring code size | |
StartExoCode: | |
;------------------------------------------------------------------------------- | |
.macro _read | |
lda [SourceAddr] | |
inc z:SourceAddr | |
and #$00ff | |
.endmac | |
.macro _write | |
RW a8 | |
sta (DestAddr) | |
RW a16 | |
inc z:DestAddr | |
.endmac | |
.if EXO_INLINE_RW <> 1 | |
; non-inline read/write calls | |
proc ReadByte, a16 | |
_read | |
rts | |
endproc | |
.macro read_byte | |
jsr ReadByte | |
.endmac | |
proc WriteByte, a16 | |
_write | |
rts | |
endproc | |
.macro write_byte | |
jsr WriteByte | |
.endmac | |
proc CopyByte, a16 | |
jsr ReadByte | |
bra WriteByte | |
endproc | |
.macro copy_byte | |
jsr CopyByte | |
.endmac | |
.else | |
; inline read/write calls | |
.macro read_byte | |
_read | |
.endmac | |
.macro write_byte | |
_write | |
.endmac | |
.macro copy_byte | |
RW a8 | |
lda [SourceAddr] | |
sta (DestAddr) | |
RW a16 | |
inc z:SourceAddr | |
inc z:DestAddr | |
.endmac | |
.endif | |
;------------------------------------------------------------------------------- | |
/* | |
Input: X = destination address | |
AL = destination address (bank) | |
Y = source address | |
AH = source address (bank) | |
Output: X = decrunched size | |
*/ | |
proc Decrunch, a16 | |
; break | |
php | |
phd | |
phb | |
RW a8 | |
pha | |
plb | |
xba | |
.ifdef ::EXO_SCRATCHPAD | |
; use scratchpad | |
pea EXO_SCRATCHPAD | |
pld | |
sty z:SourceAddr | |
sta z:SourceAddr+2 | |
stx z:DestAddr | |
stx z:OrigDest | |
RW a16 | |
.else | |
; use stack | |
phx ; OrigDest | |
phx ; DestAddr | |
pha ; SourceAddr+2 | |
phy ; SourceAddr | |
RW a16 | |
tsc | |
sec | |
sbc #LocalsSize-7 ; allocate everything but what we just pushed | |
tcs | |
inc | |
tcd | |
.endif | |
read_byte | |
sta z:BitBuffer | |
RW i8 | |
; lengths table | |
ldx #TablesBits | |
ldy #16 | |
jsr GenerateTable | |
; offsets table 3 | |
jsr GenerateTable | |
; offsets table 2 | |
jsr GenerateTable | |
; offsets table 1 | |
ldy #4 | |
jsr GenerateTable | |
NextByte: | |
ldy #1 | |
jsr ReadBits | |
lsr | |
bcc GetGamma | |
copy_byte | |
bra NextByte | |
GetGamma: | |
; X is the 'length index' but treat it as a 16-bit pointer index (i.e. mult by 2) | |
ldx #-2 | |
: inx | |
inx | |
ldy #1 | |
jsr ReadBits | |
lsr | |
bcc :- | |
cpx #16*2 | |
beq end | |
.if ::EXO_NO_LITERALS <> 1 | |
cpx #17*2 | |
bne Sequence | |
; literal data block | |
ldy #16 | |
jsr ReadBits | |
RW i16 | |
tax | |
: copy_byte | |
dex | |
bne :- | |
RW i8 | |
bra NextByte | |
.endif | |
Sequence: | |
; sliding window sequence | |
; at this point A = 0 (from checking/shifting out gamma bits) | |
ldy z:LengthsBits,x | |
beq :+ | |
jsr ReadBits | |
; at this point carry is clear either from comparing gamma code or calling ReadBits | |
: adc z:LengthsBase,x | |
sta z:SequenceLen | |
ldy #4 ; argument of ReadBits | |
cmp #1 | |
beq len1 | |
cmp #2 | |
beq len2 | |
jsr ReadBits | |
adc #Offsets3Bits>>1 | |
bra :+ | |
len2: | |
jsr ReadBits | |
adc #Offsets2Bits>>1 | |
bra :+ | |
len1: | |
ldy #2 | |
jsr ReadBits | |
adc #Offsets1Bits>>1 | |
; get sequence offset | |
: asl | |
tax | |
ldy z:TablesBits,x | |
tya ; use 0 if we don't call readbits | |
beq :+ | |
jsr ReadBits | |
; at this point carry is clear either from shifting out or calling ReadBits | |
: adc z:TablesBase,x | |
; make offset relative to current output position | |
eor #-1 | |
sec | |
adc z:DestAddr | |
RW i16 | |
tax | |
; destination bank in DBR will be used here | |
: lda a:0,x | |
write_byte | |
inx | |
dec z:SequenceLen | |
bne :- | |
; end of sequence | |
RW i8 | |
brl NextByte | |
end: | |
; break | |
RW i16 | |
lda z:DestAddr | |
sec | |
sbc z:OrigDest | |
tax | |
.ifndef ::EXO_SCRATCHPAD | |
; deallocate stack mem | |
tsc | |
clc | |
adc #LocalsSize | |
tcs | |
.endif | |
plb | |
pld | |
plp | |
rtl | |
endproc | |
;------------------------------------------------------------------------------- | |
/* | |
Input: Y = number of bits to read (1-16) | |
Output: A = value | |
carry clear | |
*/ | |
proc ReadBits, a16 | |
lda #0 | |
loop: | |
lsr z:BitBuffer | |
bne :+ | |
pha | |
read_byte | |
ora #$0100 | |
lsr | |
sta z:BitBuffer | |
pla | |
: rol | |
dey | |
bne loop | |
end: | |
rts | |
endproc | |
;------------------------------------------------------------------------------- | |
/* | |
Input: X = offset into bits/base tables | |
Y = number of table entries (preserved to make repeated calls smaller) | |
Output: X = next offset into tables | |
*/ | |
proc GenerateTable, a16i8 | |
phy | |
lda #1 | |
loop: | |
sta z:TablesBase,x | |
phy | |
ldy #4 | |
jsr ReadBits | |
sta z:TablesBits,x | |
tay | |
lda #0 | |
sec | |
: rol | |
dey | |
bpl :- | |
; carry will already be clear from previous shifts | |
adc z:TablesBase,x | |
inx | |
inx | |
ply | |
dey | |
bne loop | |
ply | |
rts | |
endproc | |
.out .sprintf("exomizer code size: %u", *-StartExoCode) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment