-
Notifications
You must be signed in to change notification settings - Fork 25
/
tiboyce.asm
1921 lines (1749 loc) · 48.1 KB
/
tiboyce.asm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#ifndef VERSION
; "*" indicates a potentially modified, or "dirty" in Git terminology, version.
#define VERSION "v0.3.0*"
#endif
#ifdef FASTLOG
#define FASTLOG_EVENT_RUNTIME_ERROR $DE
#define FASTLOG_EVENT_INVALID_OPCODE $AD
#define FASTLOG_EVENT_JIT_FLUSH $BE
#define FASTLOG_EVENT_CACHE_FLUSH $EF
#define FASTLOG_EVENT_RECOMPILE 1
#define FASTLOG_EVENT_RERECOMPILE 2
#define FASTLOG_EVENT_CACHE_MISS 3
#define FASTLOG_EVENT_PADDING_UPDATE 4
#define FASTLOG_EVENT_LOOKUP_GB 5
#define FASTLOG_EVENT_LOOKUP_GB_FOUND 6
#define FASTLOG_EVENT_LOOKUP_JIT 7
#define FASTLOG_EVENT_LOOKUP_JIT_INTERNAL 8
#define FASTLOG_EVENT_WAITLOOP_CHECK 9
#define FASTLOG_EVENT_WAITLOOP_IDENTIFIED 10
#define FASTLOG_EVENT_TRIGGER_EVENT 11
#define FASTLOG_EVENT_DECODE_JUMP 12
#define FASTLOG_EVENT_SHIFT_STACK_HIGHER 13
#define FASTLOG_EVENT_SHIFT_STACK_LOWER 14
#define FASTLOG_EVENT_SET_STACK 15
#define FASTLOG_EVENT_APPLY_STACK_OFFSET 16
#define FASTLOG_EVENT_CALLSTACK_OVERFLOW 17
#macro FASTLOG_EVENT(type, length)
#define EVENT_TYPE concat("FASTLOG_EVENT_", type)
ld hl,(EVENT_TYPE << 16) | ((length+1) << 8)
#undef EVENT_TYPE
push hl
call fastlog
ld sp,hl
#endmacro
#macro FASTLOG_EVENT_Z80(type, length)
#define EVENT_TYPE concat("FASTLOG_EVENT_", type)
ld hl,(EVENT_TYPE << 8) | (length+1)
#undef EVENT_TYPE
push hl
call fastlog_z80
ld sp,hl
#endmacro
#endif
#define CALL_STACK_DEPTH 32
#define CALL_STACK_ENTRY_SIZE_Z80 4
#define CALL_STACK_ENTRY_SIZE_ADL 3
#define ERROR_CATCHER (Z80Error << 8) | $C3
#macro FIXME
jr $
#endmacro
; Bypass a SPASM bug where forward-referenced values are not allowed in .fill
#macro SAFE_FILL(count, value)
#if count > 0
.db value
SAFE_FILL(count - 1, value)
#endif
#endmacro
#define .safe_fill SAFE_FILL(
#macro CPU_SPEED_START()
#define CPU_SPEED_BASE eval($)
#endmacro
#macro CPU_SPEED_IMM(addr, width)
#define CPU_SPEED_OFFSET eval(addr - CPU_SPEED_BASE)
buf(1)
#if CPU_SPEED_OFFSET < $40
#define CPU_SPEED_VALUE eval((CPU_SPEED_OFFSET << 2) | width)
wr(".db ", CPU_SPEED_VALUE)
#else
#define CPU_SPEED_VALUE eval((CPU_SPEED_OFFSET >> 8 << 2) | width | 1)
wr(".db ", CPU_SPEED_VALUE)
#define CPU_SPEED_VALUE eval(CPU_SPEED_OFFSET & $FF)
wr(".db ", CPU_SPEED_VALUE)
#endif
#undef CPU_SPEED_VALUE
#undef CPU_SPEED_OFFSET
#endmacro
#macro CPU_SPEED_IMM8(addr)
CPU_SPEED_IMM(addr, 0)
#define CPU_SPEED_BASE eval(addr)
#endmacro
#macro CPU_SPEED_IMM16(addr)
CPU_SPEED_IMM(addr, 2)
#define CPU_SPEED_BASE eval(addr+1)
#endmacro
#macro CPU_SPEED_END()
buf(1)
wr(".db 0")
#undef CPU_SPEED_BASE
#endmacro
; Gets the 24-bit base pointer for a given Game Boy address.
; The base plus the address can be used to directly read GB memory.
;
; Inputs: DE = GB address
; Outputs: HL = base pointer
; Destroys: F
#macro GET_BASE_ADDR_NO_ASSERT
ld hl,z80codebase+mem_read_lut
ld l,d
ld l,(hl)
inc h ;mem_get_ptr_routines
inc l \ inc l
ld hl,(hl)
#endmacro
#macro GET_BASE_ADDR_FAST
#ifdef DEBUG
; Assert high byte of DE is 0
ld hl,$FF0000
add hl,de
jr c,$
#endif
GET_BASE_ADDR_NO_ASSERT
#endmacro
#macro ASSERT_NC
#ifdef DEBUG
jr c,$
#endif
#endmacro
#macro ASSERT_C
#ifdef DEBUG
jr nc,$
#endif
#endmacro
; A call to a routine located in the archived appvar.
; Destroys flags before entry to routine.
#macro ACALL_SAFERET(address)
call ArcCallArcReturn
.dw address+1
#endmacro
; A call to a routine located in the archived appvar.
; Destroys flags before entry to routine. Nothing above this may archive.
#macro ACALL(address)
call ArcCall
.dw address+1
#endmacro
; A jump to a label located in the archived appvar.
; Destroys flags.
#macro AJUMP(address)
call ArcJump
.dw address+1
#endmacro
; Puts the pointer to a label in the archived appvar into HL.
; Destroys DE and flags. Clears carry flag.
#macro APTR(address)
call ArcPtr
.dw address+1
#endmacro
; Inline version of APTR, with the same effects.
#macro APTR_INLINE(address)
ld hl,(ArcBase)
ld de,address
add hl,de
#endmacro
; Debug-prints using a format string in the archived appvar.
; The variable argument list must first be pushed to the stack.
#macro APRINTF(text)
APTR(text)
push hl
ACALL(debug_printf)
pop hl
#endmacro
#macro SPI_START
#define SPI_BIT $80
#define SPI_VALUE $00
#endmacro
#macro SPI_CMD(cmd)
.db ((cmd * SPI_BIT) >> 8) | SPI_VALUE
#define SPI_VALUE eval((cmd * SPI_BIT) & $FF)
#if SPI_BIT == $01
SPI_END
SPI_START
#else
#define SPI_BIT eval(SPI_BIT >> 1)
#endif
#endmacro
#macro SPI_PARAM(param)
.db ((param * SPI_BIT) >> 8) | (SPI_VALUE | SPI_BIT)
#define SPI_VALUE eval((param * SPI_BIT) & $FF)
#if SPI_BIT == $01
SPI_END
SPI_START
#else
#define SPI_BIT eval(SPI_BIT >> 1)
#endif
#endmacro
#macro SPI_PARAM16(param)
SPI_PARAM(param >> 8)
SPI_PARAM(param & $FF)
#endmacro
#macro SPI_PARAM_CHECKED(param, limit)
#if (param < 0) || (param > limit)
.error "Parameter ", param, " not within limit ", limit
#endif
SPI_PARAM(param)
#endmacro
#macro SPI_PARAM2_CHECKED(param1, limit1, param2, limit2)
#if (param1 < 0) || (param1 > limit1)
.error "Parameter ", param1, " not within limit ", limit1
#endif
#if (param2 < 0) || (param2 > limit2)
.error "Parameter ", param2, " not within limit ", limit2
#endif
SPI_PARAM((param2 << 4) | param1)
#endmacro
#macro SPI_END
#if SPI_BIT != $80
.db SPI_VALUE
#endif
#undef SPI_VALUE
#undef SPI_BIT
#endmacro
; Gamma voltage levels are specified here.
; V0-V2, V20, V43, V61-V63 are the main points, ranging from 129 to 0.
; V4, V6, V13 are interpolated between V2 and V20, ranging from 60 to 0.
; V27, V36 are interpolated between V20 and V43, ranging from 25 to 0.
; V50, V57, V59 are interpolated between V43 and V61, ranging from 60 to 0.
; J0 and J1 are values between 0 and 3 affecting interpolations for remaining voltages.
#macro SPI_GAMMA(V0, V1, V2, V20, V43, V61, V62, V63, V4, V6, V13, V27, V36, V50, V57, V59)
SPI_PARAM2_CHECKED(129-V0, $0F, 23-V63, $0F)
SPI_PARAM_CHECKED(128-V1, $3F)
SPI_PARAM_CHECKED(128-V2, $3F)
SPI_PARAM_CHECKED(57-V4, $1F)
SPI_PARAM_CHECKED(47-V6, $1F)
SPI_PARAM2_CHECKED(21-V13, $0F, J0, $03)
SPI_PARAM_CHECKED(128-V20, $7F)
SPI_PARAM2_CHECKED(20-V27, $07, 11-V36, $07)
SPI_PARAM_CHECKED(128-V43, $7F)
SPI_PARAM2_CHECKED(54-V50, $0F, J1, $03)
SPI_PARAM_CHECKED(44-V57, $1F)
SPI_PARAM_CHECKED(34-V59, $1F)
SPI_PARAM_CHECKED(64-V61, $3F)
SPI_PARAM_CHECKED(64-V62, $3F)
#endmacro
; Sets both positive and negative gamma curves using the same parameters.
#macro SPI_GAMMA_BOTH(V0, V1, V2, V20, V43, V61, V62, V63, V4, V6, V13, V27, V36, V50, V57, V59)
SPI_CMD($E0)
SPI_GAMMA(V0, V1, V2, V20, V43, V61, V62, V63, V4, V6, V13, V27, V36, V50, V57, V59)
SPI_CMD($E1)
SPI_GAMMA(V0, V1, V2, V20, V43, V61, V62, V63, V4, V6, V13, V27, V36, V50, V57, V59)
#endmacro
; State variable indices
STATE_SYSTEM_TYPE = 0
STATE_INTERRUPTS = 1
STATE_REG_AF = 2
STATE_REG_BC = 4
STATE_REG_DE = 6
STATE_REG_HL = 8
STATE_REG_SP = 10
STATE_REG_PC = 12
STATE_FRAME_COUNTER = 14
STATE_SERIAL_COUNTER = 16
STATE_DIV_COUNTER = 18
STATE_ROM_BANK = 20
STATE_RAM_BANK = 21
STATE_MBC_MODE = 22
STATE_CPU_MODE = 23
STATE_END = 24
; Palette entries representing remapped BGP colors.
BG_PALETTE_COLOR_0 = 255
BG_PALETTE_COLOR_1 = 0
BG_PALETTE_COLOR_2 = 1
BG_PALETTE_COLOR_3 = 2
; Constant color palette entries
BLUE = 9
MAGENTA = 10
OLIVE = 11
GRAY = 12
BLACK = 13
WHITE = 14
; Palette entries representing the raw BGP colors. Must precede OBP0 colors.
BG_COLOR_0 = 15
BG_COLOR_1 = 16
BG_COLOR_2 = 17
BG_COLOR_3 = 18
; Palette entries representing every possible combination of three OBJ colors.
; The data is overlapped such that a unique sequence of three colors begins
; at each entry. Note that this means each table is (64 + 2) entries large.
; Additionally, the original sequence of colors 0, 1, 2, 3 is present starting
; at the offset corresponding to palette %100100, which is $13.
OBP0_COLORS_START = 19
OBP0_ORIG_COLORS = OBP0_COLORS_START+$13
OBP1_COLORS_START = OBP0_COLORS_START + (64 + 2)
OBP1_ORIG_COLORS = OBP1_COLORS_START+$13
; Palette entry representing transparent OBJ pixels on GBC.
; Never rendered, only used to determine priority against rendered pixels.
GBC_OBJ_TRANSPARENT_COLOR = 0
; Palette entries representing transparent BG pixels on GBC.
; One per palette, for 8 in total.
GBC_BG_TRANSPARENT_COLORS = 1
; Palette entries representing low-priority opaque OBJ pixels on GBC.
; Never rendered, only used to determine priority against rendered pixels.
; Three are allocated, to enable translation to actual OBJ colors.
GBC_OBJ_LOW_PRIO_COLORS = 13
; Color used when the screen is off. Set to white on GBC.
SCREEN_OFF_COLOR = BG_COLOR_0
; Palette entries representing normal-priority opaque BG pixels on GBC.
; Three per palette, for 24 in total.
GBC_BG_OPAQUE_COLORS = GBC_OBJ_LOW_PRIO_COLORS + 3
; Palette entries representing normal-priority opaque OBJ pixels on GBC.
; Never rendered, only used to determine priority against rendered pixels.
; Three are allocated, to enable translation to actual OBJ colors.
GBC_OBJ_NORMAL_PRIO_COLORS = GBC_BG_OPAQUE_COLORS + 24
; Palette entries representing high-priority opaque BG pixels on GBC.
; These represent the same literal colors as the normal-priority pixels.
; Three per palette, for 24 in total.
GBC_BG_HIGH_PRIO_COLORS = GBC_OBJ_NORMAL_PRIO_COLORS + 3
; Palette entries representing high-priority opaque OBJ pixels on GBC.
; Never rendered, only used to determine priority against rendered pixels.
; Three are allocated, to enable translation to actual OBJ colors.
GBC_OBJ_HIGH_PRIO_COLORS = GBC_BG_HIGH_PRIO_COLORS + 24
; Palette entries representing rendered opaque OBJ pixels on GBC.
; Three per palette, for 24 in total.
GBC_OBJ_OPAQUE_COLORS = GBC_OBJ_HIGH_PRIO_COLORS + 3
; System calls used
_sprintf = $0000BC
_GetFieldSizeFromType = $00030C
_FindField = $000314
_boot_InitializeHardware = $000384
_OSHeader = $020000
_Mov9ToOP1 = $020320
_MemChk = $0204FC
_CmpPrgNamLen = $020504
_chkFindSym = $02050C
_InsertMem = $020514
_CreatePVar4 = $020524
_DelMem = $020590
_ErrUndefined = $020764
_JError = $020790
_PushErrorHandler = $020798
_PopErrorHandler = $02079C
_ClrLCDFull = $020808
_HomeUp = $020828
_RunIndicOff = $020848
_DelVarArc = $021434
_Arc_Unarc = $021448
_DrawStatusBar = $021A3C
_DivHLByA = $021D90
_ChkInRam = $021F98
_FindFreeArcSpot = $022078
; RAM addresses used
ramStart = $D00000
flags = $D00080
brightness = $D0058F
asm_data_ptr1 = $D0067E
penCol = $D008D2
penRow = $D008D5
asm_prgm_size = $D0118C
tSymPtr1 = $D0257B
FPS = $D0258D
OPS = $D02593
pTemp = $D0259A
progPtr = $D0259D
drawFGColor = $D026AC
pixelShadow = $D031F6 ; Start of SafeRAM
usbArea = $D13FD8 ; End of SafeRAM
osYear = $D177CF
osDay = $D177D8
osMonth = $D177DB
userMem = $D1A881
vRam = $D40000
; Tokens/characters used
appVarObj = $15
tExtTok = $EF
tAsm84CeCmp = $7B
LlBrack = $C1
; OS flags used
graphFlags = $03
graphDraw = 0 ;0=graph is valid, 1=redraw graph(dirty)
; 84+CE IO definitions
mpFlashWaitStates = $E00005
mpShaData = $E10010
mpLcdTiming0 = $E30000
mpLcdTiming1 = $E30004
mpLcdTiming2 = $E30008
mpLcdBase = $E30010
mpLcdCtrl = $E30018
mpLcdImsc = $E3001C
mpLcdRis = $E30020
mpLcdMis = $E30024
mpLcdIcr = $E30028
mpLcdCurr = $E3002C
mpLcdPalette = $E30200
mpLcdCursorImg = $E30800
mpIntRawStatus = $F00000
mpIntEnable = $F00004
mpIntAcknowledge = $F00008
mpIntLatch = $F0000C
mpIntMaskedStatus = $F00014
TMR_ENABLE = %00101001
mpTimer1Count = $F20000
mpTimer1Reset = $F20004
mpTimer1Match1 = $F20008
mpTimer1Match2 = $F2000C
mpTimer2Count = $F20010
mpTimer2Reset = $F20014
mpTimer2Match1 = $F20018
mpTimer2Match2 = $F2001C
mpTimer3Count = $F20020
mpTimer3Reset = $F20024
mpTimer3Match1 = $F20028
mpTimer3Match2 = $F2002C
mpTimerCtrl = $F20030
mpTimerIntStatus = $F20034
mpRtcSecondCount = $F30000
mpRtcMinuteCount = $F30004
mpRtcHourCount = $F30008
mpRtcDayCount = $F3000C
mpRtcCtrl = $F30020
mpRtcIntStatus = $F30034
mpKeypadScanMode = $F50000
mpKeypadGrp0 = $F50010
mpKeypadGrp1 = $F50012
mpKeypadGrp2 = $F50014
mpKeypadGrp3 = $F50016
mpKeypadGrp4 = $F50018
mpKeypadGrp5 = $F5001A
mpKeypadGrp6 = $F5001C
mpKeypadGrp7 = $F5001E
mpBlLevel = $F60024
mpSpiConfig = $F80000
mpSpiDivider = $F80004
mpSpiTransfer = $F80008
mpSpiStatus = $F8000C
mpSpiUnknown1 = $F80010
mpSpiUnknown2 = $F80014
mpSpiFifo = $F80018
#ifdef DEBUG
mpCEmuDbg = $FB0000
#endif
mpZeroPage = $FF0000
;GB IO equates
MODE_0_CYCLES = 51
MODE_2_CYCLES = 20
MODE_3_CYCLES = 43
CYCLES_PER_SCANLINE = MODE_2_CYCLES + MODE_3_CYCLES + MODE_0_CYCLES
SCANLINES_PER_FRAME = 154
CYCLES_PER_FRAME = CYCLES_PER_SCANLINE * SCANLINES_PER_FRAME
VBLANK_SCANLINE = 144
SCANLINES_PER_VBLANK = SCANLINES_PER_FRAME - VBLANK_SCANLINE
CYCLES_PER_VBLANK = CYCLES_PER_SCANLINE * SCANLINES_PER_VBLANK
ioregs = $ff00
P1 = $ff00
SB = $ff01
SC = $ff02
DIV = $ff04
TIMA = $ff05
TMA = $ff06
TAC = $ff07
IF = $ff0f
NR10 = $ff10
NR11 = $ff11
NR12 = $ff12
NR13 = $ff13
NR14 = $ff14
NR21 = $ff16
NR22 = $ff17
NR23 = $ff18
NR24 = $ff19
NR30 = $ff1a
NR31 = $ff1b
NR32 = $ff1c
NR33 = $ff1d
NR34 = $ff1e
NR41 = $ff20
NR42 = $ff21
NR43 = $ff22
NR44 = $ff23
NR50 = $ff24
NR51 = $ff25
NR52 = $ff26
WavePatternRAM = $ff30
LCDC = $ff40
STAT = $ff41
SCY = $ff42
SCX = $ff43
LY = $ff44
LYC = $ff45
DMA = $ff46
BGP = $ff47
OBP0 = $ff48
OBP1 = $ff49
WY = $ff4a
WX = $ff4b
KEY1 = $ff4d
VBK = $ff4f
HDMA1 = $ff51
HDMA2 = $ff52
HDMA3 = $ff53
HDMA4 = $ff54
HDMA5 = $ff55
RP = $ff56
BGPI = $ff68
BGPD = $ff69
OBPI = $ff6a
OBPD = $ff6b
OPRI = $ff6c
SVBK = $ff70
IE = $ffff
; Memory areas used by the emulator
; The 16-bit Z80 address space starts here.
decompress_buffer = vRam
z80codebase = vRam
#ifdef SHADOW_STACK
; The Z80-mode shadow stack. A sliding window of the game's "main" stack.
; The "main" stack is set when a call/return is executed on a read/write stack.
; The memory LUTs are updated to direct other reads/writes into this window.
; When the stack pointer moves out of the window, it is shifted by 256 bytes.
shadow_stack_end = $FE00
shadow_stack_start = shadow_stack_end - 512
; The bottom of the Z80 stack. Grows down from the shadow stack start.
myz80stack = shadow_stack_start
#else
; The bottom of the Z80 stack. Grows down from the Game Boy HRAM start.
myz80stack = $FE00
#endif
; The lower bound of the call stack.
call_stack_lower_bound = myz80stack - 4 - (CALL_STACK_DEPTH * CALL_STACK_ENTRY_SIZE_Z80)
; The flags translation LUT.
flags_lut = myz80stack - 512
; The end of the trampoline high allocation pool.
trampoline_end = flags_lut - 3
; The bottom of the ADL stack. Grows down from the end of SafeRAM.
myADLstack = usbArea - 3
; Preprocessed Game Boy tilemap entries. 16KB in size.
; Game Boy only:
; Each tile entry is a 2-byte offset of the pixel data from vram_pixels_start.
; Every row of 32 tiles is duplicated into 64, to facilitate wraparound.
; Game Boy Color only:
; Each tile entry is a 2-byte offset of the pixel data from vram_pixels_start,
; followed by a 2-byte offset to the palette table from vram_pixels_start.
; Bank/hflip/vflip attributes are included in the pixel data offset,
; and palette/priority attributes are included in the palette table offset.
; See gbc_tile_attributes_lut for a description of attribute bit mapping.
; In addition, each row is stored twice with different tilesets.
; So, each GB tilemap row takes a total of 256 bytes here.
; Buffer must be 256-byte aligned and contained within one 64KB-aligned block.
vram_tiles_start = (pixelShadow | $FF) + 1
; LUT for LSB of green component adjustment.
; Input: Bit 7: Bit 2 of G
; Bits 6-4: Bits 5-3 of B
; Bits 3-2: Bits 1-0 of G
; Bits 1-0: Bits 5-4 of G
; Must be 256-byte aligned and precede adjust_color_lut.
adjust_green_lsb_lut = vram_tiles_start + $4000
; LUT to remap the color's low byte for color adjustment.
; Input: Bits 5-7: Bits 2-0 of G
; Output: Bit 7: Bit 2 of G
; Bits 3-2: Bits 1-0 of G
; Must be 256-byte aligned, and middle byte must equal $73 for optimization.
adjust_color_lut = adjust_green_lsb_lut + 256
; LUT for MSB of green component adjustment.
; Same input format as the LSB LUT.
; Must be 256-byte aligned and follow adjust_color_lut.
adjust_green_msb_lut = adjust_color_lut + 256
; Preprocessed Game Boy tile pixel entries. 24KB in size.
; Game Boy only:
; Each tile is converted into one byte per pixel, for 64 bytes per tile.
; Game Boy Color only:
; Each tile is converted into four bytes per row, for 32 bytes per tile.
; Byte 0: Index into any palette table for the first 4 pixels.
; Byte 1: Offset from the first 4 pixels to the last 4 pixels.
; Byte 2: Index into any palette table for the last 4 pixels, reversed.
; Byte 3: Offset from the last 4 pixels to the first 4 pixels, reversed.
; Additionally, each VRAM bank is interleaved after each row,
; which makes it simpler to represent the bank in the tile attributes.
; This also effectively keeps tile lookups at a scale of 64.
; Buffer must be 256-byte aligned.
vram_pixels_start = adjust_green_msb_lut + 256
; The mini frame backup in the menu is temporarily stored in this area.
; 160 * 144 = 22.5 KB in size.
mini_frame_backup = vram_pixels_start
; Start of recompiler struct index lookup table. 512 bytes in size.
; The first 256 bytes are the LSBs and the next 256 are the MSBs.
; The lookup table is indexed by the high byte of a JIT address,
; and each pointer gives the last struct entry touching that range.
recompile_index_LUT = vram_pixels_start + $6000
; Start of recompiler cached jump index lookup table. 512 bytes in size.
; The first 256 bytes are the LSBs and the next 256 are the MSBs.
; The lookup table is indexed by the low byte of a GB address,
; and each pointer gives the first cache entry corresponding to that LSB.
recompile_cache_LUT = recompile_index_LUT + $0200
; A lookup table for converting BG palettes to raw colors. 256 bytes in size.
; Must be 256-byte aligned.
convert_palette_LUT = recompile_cache_LUT + $0200
; A table with the largest Y position of a sprite using the corresponding tile,
; or 0 if the tile is not used by any onscreen sprite.
; Must be 256-byte aligned.
oam_tile_usage_lut = convert_palette_LUT + 256
; Specifies offsets into a buffer of pixel data corresponding to the
; input 2bpp pixel data. Note that the input has the high palette bits
; grouped in the high nibble, and the low palette bits in the low nibble.
; Must be 256-byte aligned.
overlapped_pixel_index_lut = oam_tile_usage_lut + 256
; A table representing every possible combination of four 2bpp pixels.
; The data is overlapped such that a unique sequence of four pixels begins
; at each byte. Note that this means the table is 256 + 3 bytes large.
; Must be 256-byte aligned and directly follow the LUT. Game Boy only.
overlapped_pixel_data = overlapped_pixel_index_lut + 256
; Specifies offsets into a buffer of pixel data corresponding to the reverse
; of the input 2bpp pixel data. Note that the input has the high palette bits
; grouped in the high nibble, and the low palette bits in the low nibble.
; Must be 256-byte aligned and directly follow the non-reversed LUT. GBC only.
overlapped_pixel_rev_index_lut = overlapped_pixel_data
; Preconverted digit pixels for displaying FPS quickly. 24 bytes per character, 264 bytes total.
; Must be 8-byte aligned.
digits = (((overlapped_pixel_data + (256 + 3)) - 1) | 7) + 1
; Temporary backup for 16 palette entries. 32 bytes in size.
palette_backup = digits + 264
; Queue for BGP writes during a frame. 192+1 bytes in size.
; Stored in a compressed format, with either:
; Byte 0 = Number of scanlines, Byte 1 = BGP value for all scanlines
; Byte 0 = Number of scanlines (N) + 144, Bytes 1-N = BGP values per scanline
; Must end at a 256-byte aligned address. Game Boy only.
BGP_write_queue = (((palette_backup + 32) + 192) | 255) - 192
; Tracks the frequency of each BGP value as they are queued.
; The BGP value with the highest frequency will use the native palette.
; Must be 256-byte aligned and directly follow the queue. Game Boy only.
BGP_frequencies = BGP_write_queue + 192 + 1
; Converts GBC tile attributes for use in the low 6 bits of the tile cache.
; This bitmask is XORed with the tile row offset in bits 3-5 during rendering.
; Bit 1: Horizontal flip
; Bit 2: VRAM bank
; Bits 3-5: Vertical flip (all 1 or all 0)
; Must be 256-byte aligned. GBC only.
gbc_tile_attributes_lut = BGP_frequencies
; Specifies indices into an array of color data corresponding to the
; input 2bpp palette data. Note that the input is a BGP, OBP0, or OBJ1
; value, and identifies three colors corresponding to the upper 6 bits.
; Must be 256-byte aligned (and currently, follow BGP_frequencies).
overlapped_palette_index_lut = BGP_frequencies + 256
; Converts GBC tile attributes for use in 8x16 sprite rendering.
; This bitmask is XORed with the tile row offset in bits 3-6 during rendering.
; Bit 1: Horizontal flip
; Bit 2: VRAM bank
; Bits 3-6: Vertical flip (all 1 or all 0)
; Must be 256-byte aligned and follow the first LUT. GBC only.
gbc_tile_attributes_lut_2 = gbc_tile_attributes_lut + 256
; Table representing every possible combination of three BGP colors.
; The data is overlapped such that a unique sequence of three colors begins
; at each word. Note that this means the table is (64 + 2) * 2 bytes large.
; Additionally, the original sequence of colors 0, 1, 2, 3 is present starting
; at the index corresponding to palette %100100, which is $13.
; This table must be 256-byte aligned. Game Boy only.
overlapped_bg_palette_colors = overlapped_palette_index_lut + 256
bg_palette_colors = overlapped_bg_palette_colors + ($13*2)
; Table of GBC palette colors ready to copy to the native palette.
; There are 8 transparent BG colors, followed by 24 opaque BG colors,
; followed by 24 opaque OBJ colors, for a total of (8 + 24 + 24) * 2 bytes.
; This table must be 256-byte aligned. GBC only.
gbc_bg_transparent_colors = overlapped_bg_palette_colors
gbc_bg_opaque_colors = gbc_bg_transparent_colors + (8*2)
gbc_obj_opaque_colors = gbc_bg_opaque_colors + (24*2)
; Stack of cycle offset fixup locations during JIT block recompilation.
; Up to one pointer may be allocated for each recompiled opcode, meaning
; the stack size MAX_OPCODE_BYTES_PER_BLOCK * 2 = 124 bytes large.
; Conveniently this fits in a 256-byte space along with the BG palette colors.
recompile_cycle_offset_stack = overlapped_bg_palette_colors + 256
; List of LYC write predictions, based on writes in previous frames.
; The values in indices 1 to 144 correspond to the last value written to LYC
; after LYC matched LY at that index, only if the new value is between
; LY and 144. Otherwise, the value in the index equals 144.
; Index 0 is special, and indicates the first LYC value after vblank.
; This table must be 256-byte aligned.
lyc_prediction_list = recompile_cycle_offset_stack
; Two arrays of scanline information, one for each double buffer.
; Offset 0: Pointer to scanline sprite usage count.
; Offset 3: Pointer to the first pixel of the scanline in the frame buffer.
scanlineLUT_1 = lyc_prediction_list + 256
scanlineLUT_2 = scanlineLUT_1 + (144*6)
; One byte for each scanline indicating the number of sprites remaining
; in the current frame. Filled with 10 at the start of each frame, and
; decremented by 1 each time a sprite is rendered on that scanline.
scanline_sprite_counts = scanlineLUT_2 + (144*6)
; A list of VAT entries for found ROM files. 256 pointers in size.
romListStart = scanline_sprite_counts + 144
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Game Boy Color only data
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Tables mapping sprite attributes to palette offsets.
; The first table applies to high sprite priority,
; and the second table applies to low/normal sprite priority.
; Must be at a 256-byte offset from a 512-byte alignment.
high_prio_sprite_palette_lut = ((romListStart + (256*3) - 257) | $1FF) + 257
low_normal_prio_sprite_palette_lut = high_prio_sprite_palette_lut + 256
; Tables representing every possible combination of four sprite pixels.
;
; The pixels contained in these LUTs correspond to indices with no color,
; and rather are compared to pixels in the framebuffer to determine priority.
; Transparent pixels are lower priority than every other pixel, and are never
; drawn. Opaque pixels have values that can be offset to produce sprite pixels.
;
; The data is overlapped such that a unique sequence of four pixels begins
; at each byte. Sprite rendering uses wrapping logic rather than LDIR,
; so only 256 bytes are required per table.
; Must be 256-byte aligned, and follow the palette LUTs.
high_prio_sprite_pixel_lut = low_normal_prio_sprite_palette_lut + 256
normal_prio_sprite_pixel_lut = high_prio_sprite_pixel_lut + 256
low_prio_sprite_pixel_lut = normal_prio_sprite_pixel_lut + 256
; Tables representing every possible combination of four BG pixels, for each
; possible palette. High-priority BG tiles are considered as unique palettes,
; so there are 8*2=16 total palettes. Opaque pixels in high-priority tiles
; render with larger pixel values referring to duplicated colors.
; The data is overlapped such that a unique sequence of four pixels begins
; at each byte. Additionally, the data is repeated such that the copy pointer
; can move forward directly from one set of 4 pixels to the next 4 pixels.
; As such, each table is (256+3)*2 bytes in size, for 8288 bytes in total.
; The first LUT must be 512-byte aligned, to allow using INC H from the start
; of each palette to get a contiguous 256-byte region.
gbc_overlapped_pixel_data = low_prio_sprite_pixel_lut + 256
gbc_overlapped_pixel_data_end = gbc_overlapped_pixel_data + ((256+3)*2*16)
; The ROM bank lookup table. 256 entries in size.
; Offset 0: The MSB of the first trimmed portion of the bank, minus 1 ($3F-$7F).
; Offset 1: The base address of the ROM bank, minus $4000.
; The pre-subtracted $4000 means that the memory can be indexed by GB address.
; This must be located beyond the decompression buffer.
rombankLUT_end = (usbArea & ~$FF) - 256
rombankLUT = rombankLUT_end - (256*4)
; Start of Game Boy HRAM region. 512 bytes in size, includes OAM and MMIO.
hram_start = z80codebase + $FE00
; Start of state saving/loading area.
state_start = hram_start + $00A0
state_size = $60
; Base address of HRAM, can be indexed directly by Game Boy address.
hram_base = z80codebase
; Start of menu 8bpp frame buffer. 320*240 bytes in size.
menu_frame_buffer = vRam + (320*240)
; Start of first 8bpp frame buffer. 160*240 bytes in size.
gb_frame_buffer_1 = vRam + (320*240)
; Start of second 8bpp frame buffer. 160*240 bytes in size.
gb_frame_buffer_2 = gb_frame_buffer_1 + (160*240)
; Start of structure array keeping track of recompiled code blocks. 11KB max.
; Grows forward into the recompiled code mapping cache (see below).
; Each entry is 8 bytes in size, and contains the following members:
; +0: A 16-bit pointer to the start of the recompiled Z80 code block.
; +2: A 24-bit banked GB address of the first source opcode.
; +5: The 8-bit size of the GB opcode block.
; +7: The 8-bit total cycle count of the block.
; The address of the first unused entry is stored in (recompile_struct_end).
; The blocks are sorted in ascending order by Z80 block start address.
; Note: The first unused entry always contains a *24-bit* pointer to the next
; available block start address. The upper 8 bits will be overwritten.
; Buffer must be 64KB-aligned, typically located directly after Z80 code space.
recompile_struct = z80codebase + $010000
; End of array caching mappings of GB addresses to recompiled code. 11KB max.
; Grows backward into the recompiled code block information (see above).
; Each entry is 5 bytes in size, and contains the following members:
; +0: 16-bit pointer to recompiled code. May be inside a non-RAM-based block.
; +2: 8-bit clock cycle index within the block.
; +3: Upper 16 bits of the 24-bit banked Game Boy address.
; The start of the array is stored in (recompile_cache).
; The array is logically separated into ranges, each range corresponding to
; the LSB of the contained GB addresses, in decreasing order ($FF to $00).
; Range bounds are tracked in the recompile_cache_LUT and used for fast lookup.
; Lookup is O(n) on number of entries with a given LSB. Insertion is O(n) on
; number of total entries, plus the address lookup and updating LSB ranges.
; Only addresses reached via RET (when callstack fails) or JP HL use the cache.
; Additionally, jumps from RAM-based GB code use the cache to speed up SMC,
; and jumps/calls to banked regions use the cache upon a bank mismatch.
; Buffer end must be 256-byte aligned.
recompile_cache_end = gb_frame_buffer_1
.db "TIBoyEXE",$01
.dw program_size
header_size = $
.org userMem
program_start:
; Calls a routine located in the archived appvar.
; The 16-bit offset (plus 1) is stored at the return address.
ArcCall:
ex (sp),hl
ld (ArcCallSMC),hl
inc hl
inc hl
ex (sp),hl
ArcCallEntry:
push hl
push de
ArcCallSMC = $+1
ld hl,(0)
dec.s hl
ArcBase = $+1
ld de,0
add hl,de
pop de
ex (sp),hl
ret
; Calls a routine located in the archived appvar. Returns to the appvar.
; The 16-bit offset (plus 1) is stored at the return address.
ArcCallArcReturn:
ex (sp),hl
ld (ArcCallSMC),hl
inc hl
inc hl
push de
ld de,(ArcBase)
or a
sbc hl,de
pop de
ex (sp),hl
call ArcCallEntry
ex (sp),hl
push af
push de
ld de,(ArcBase)
add hl,de
pop de
pop af
ex (sp),hl
ret
; Jumps to an address located in the archived appvar.
; The 16-bit offset (plus 1) is stored at the return address.
ArcJump:
ex (sp),hl
push de
ld de,(hl)
dec.s de
ld hl,(ArcBase)
add hl,de
pop de
ex (sp),hl
ret
; Puts a pointer located in the archived appvar in HL.
; The 16-bit offset (plus 1) is stored at the return address.
ArcPtr:
pop hl
ld e,(hl)
inc hl
ld d,(hl)
inc hl
push hl
dec.s de
ld hl,(ArcBase)
add hl,de
ret
; Archives or unarchives a variable. Updates appvar in case of garbage collect.
; Returns carry set on failure.
Arc_Unarc_Safe:
call _chkFindSym
ret c
call _ChkInRam
jr nz,++_
; If archiving, check if there is a free spot in archive
ex de,hl
ld hl,(hl)
ld a,c
add a,12
ld c,a
ld b,0
add.s hl,bc
jr c,_
push hl
pop bc
call _FindFreeArcSpot
jr nz,++_
_
; No free spot, so prepare for Garbage Collect message
ACALL(RestoreHomeScreen)
; Set Z flag
cp a
_
push ix
ld ix,(tSymPtr1)
ld hl,(ix-7)
ld h,(ix-4)
ld l,(ix-3)
pop ix
push hl
scf
push af
ld hl,Arc_Unarc_ErrorHandler
call _PushErrorHandler
call _Arc_Unarc
call _PopErrorHandler
pop af
ccf
push af
Arc_Unarc_ErrorHandler:
ld hl,SelfName
call _Mov9ToOP1
call _chkFindSym
jr c,EpicFailure
ld (tSymPtr1),hl
pop bc
pop hl
ex de,hl
or a
sbc hl,de
ex de,hl
ld hl,(ArcBase)
add hl,de
ld (ArcBase),hl
pop hl
add hl,de
push hl
push bc