-
Notifications
You must be signed in to change notification settings - Fork 25
/
waitloop.asm
398 lines (382 loc) · 7.9 KB
/
waitloop.asm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
waitloop_found_uncond_jump:
; See if we reached the loop end
inc de
inc de
inc de
inc de
pop.s hl
push.s hl
sbc.s hl,de ;Carry is reset
ret nz
ld b,h
ld c,l
jp waitloop_identified
; Filters output from decode_branch based on whether a waitloop is detected.
;
; In general, a waitloop consists of a loop for which the output state of the
; loop does not feed into the input state, which means that it will loop
; forever until an external source (usually an interrupt) changes the input.
; In most cases, this means the loop is just a memory read and a comparison.
; Occasionally a waitloop is nested and checks multiple sources - this
; is also allowed as long as all intermediate branches jump to the start.
;
; If a waitloop is detected, a handler is inserted in the mem_routine buffer
; and the branch target address is set to that handler.
; The handler used is determined based on the memory address being accessed
; in the loop - MMIO such as the LY register uses a different handler.
;
; Inputs: IX = branch target recompiled code
; DE = branch target GB address
; (SPS) = branch recompiled code address (plus 9, or 7 if uncond)
; (SPL+4) = number of cycles to sub-block end from target
; (SPL+7) = number of cycles taken by jump
; (SPS+2),BC',DE' = Game Boy BC,DE,HL registers
; Outputs: IX = filtered branch target
identify_waitloop:
#ifdef 0
push de
push ix
pop af
push af
APRINTF(WaitLoopSearchMessage)
pop ix
pop de
#endif
#ifdef FASTLOG
push ix
push de
FASTLOG_EVENT(WAITLOOP_CHECK, 5)
inc sp
#endif
GET_BASE_ADDR_FAST
add hl,de
lea de,ix
xor a
ld (waitloop_length_smc),a
; Check for a read
ld a,(hl)
cp $BE ;CP (HL)
jr z,waitloop_found_read_hl_trampoline
cp $CB ;Bitwise ops
inc hl
jr z,waitloop_found_read_bitwise
cp $F0 ;LD A,($FF00+nn)
jr z,waitloop_found_read_1
cp $FA ;LD A,(nnnn)
jr z,waitloop_found_read_2
; Consume 3 bytes of recompiled code
inc de
inc de
inc de
cp $18 ;JR
jr z,waitloop_found_uncond_jump
cp $C3 ;JP
jr z,waitloop_found_uncond_jump
cp $0A ;LD A,(BC)
jr z,waitloop_found_read_bc
cp $1A ;LD A,(DE)
jr z,waitloop_found_read_de
cp $F2 ;LD A,($FF00+C)
jr z,waitloop_found_read_c
xor $46 ;LD r,(HL)
tst a,$C7
ret nz
cp $10 ;LD B/C,(HL)
jr nc,waitloop_found_read_hl
; Consume 4 bytes of recompiled code
inc de
inc de
inc de
inc de
waitloop_found_read_hl_trampoline:
jr waitloop_found_read_hl
waitloop_found_read_1:
; Use 8-bit immediate as HRAM read address
ld c,(hl)
; Check for direct vs. port read
ld.s a,(de)
rra
; Set -1 for read in 3rd cycle
ld a,-1
; Z flag is set from earlier comparison
jr nc,waitloop_found_read_3byte
; Consume 3 bytes of recompiled code
inc de
inc de
inc de
jr waitloop_found_read_3byte
waitloop_found_read_2:
; Use 16-bit immediate as read address
ld c,(hl)
inc hl
; Set Z flag for HRAM read
ld.s a,(de)
bit 0,a
jr z,_
; Consume 2 bytes of recompiled code
inc de
ld.s a,(de)
inc de
; Reset Z flag for direct read
bit 5,a
jr nz,_
; Consume 1 byte of recompiled code
inc de
; Set Z flag for port read
ld a,(hl)
inc a
jr z,_
; Reset Z flag for banked read
; Consume 2 bytes of recompiled code
inc de
inc de
_
; Set -2 for read in fourth cycle, and preserve Z flag
ld a,-2
waitloop_found_read_3byte:
; Consume immediate value
inc hl
; Consume 3 bytes of recompiled code
inc de
inc de
inc de
jr waitloop_resolve_read
waitloop_found_read_bc:
; Consume one extra byte of recompiled code
inc de
; Use BC as read address
pop.s af
pop.s bc
push.s bc
push.s af
xor a
jr waitloop_found_read_any
waitloop_found_read_de:
exx
; Use DE as read address
push bc
jr waitloop_found_read_rr
waitloop_found_read_c:
; Use C as HRAM read address
pop.s af
pop.s bc
push.s bc
push.s af
; Set 0 for read in 2nd cycle, and set Z flag to indicate HRAM
xor a
; Consume 3 more bytes of recompiled code
inc de
inc de
inc de
jr waitloop_resolve_read
waitloop_found_read_bitwise:
ld a,(hl)
and $C7
cp $46 ;BIT b,(HL)
ret nz
; Parse this opcode as a data op
dec hl
; Adjust the read cycle forward by 1
scf
waitloop_found_read_hl:
exx
; Use HL as read address
push de
waitloop_found_read_rr:
; Use stack value as read address
exx
pop bc
; Set A to 0 for read in 2nd cycle, -1 for read in 3rd cycle
sbc a,a
waitloop_found_read_any:
; Set Z flag if read is HRAM
inc b
waitloop_resolve_read:
; Save the read cycle offset
ld b,a
; Put the waitloop variable type in C, or return if invalid
; 0 = RAM-like variable, $41 = STAT, $44 = LY
call z,waitloop_resolve_read_hram
ret z
xor b
ld c,a
; Consume first byte of recompiled code
inc de
waitloop_find_data_op_again_loop:
ld a,(hl)
; Consume first byte of opcode
inc hl
cp $CB ;Bitwise ops
jr z,waitloop_found_data_op_bitwise
and $C7
cp $C6 ;Immediate ALU ops
jr z,waitloop_found_data_op_1
cp $86 ;(HL) ALU ops
jr z,waitloop_found_data_op_3byte
cp $07 ;Special data processing
jr z,waitloop_found_data_op_special
and $C6
xor $80 ;Register ALU ops using BC (IX prefix)
jr z,waitloop_found_data_op_ix
and $C0 ;Register ALU ops
jr z,waitloop_found_data_op
ret
waitloop_found_data_op_special:
dec hl
ld a,(hl)
inc hl
cp $2F ;CPL
jr z,waitloop_found_data_op
cp $10 ;RLCA/RRCA
jr c,waitloop_found_data_op_3byte
cp $20 ;RLA/RRA
ret nc
inc de
waitloop_found_data_op_3byte:
inc de
inc de
jr waitloop_found_data_op
waitloop_found_data_op_bitwise:
ld a,(hl)
add a,$40 ;BIT b,r
ret po
and 7
cp 6 ;BIT b,(HL)
jr z,waitloop_found_data_op_4byte
cp 2 ;BIT b,B/C
jr nc,waitloop_found_data_op_1
inc de
waitloop_found_data_op_4byte:
inc de
inc de
waitloop_found_data_op_1:
; Consume second byte of opcode and recompiled code
inc hl
waitloop_found_data_op_ix:
inc de
waitloop_found_data_op:
; See if we reached the loop end
push hl
ld hl,9
waitloop_found_jump_next:
add hl,de
ex de,hl
pop.s hl
push.s hl
sbc.s hl,de ;Carry is reset
pop hl
jr z,waitloop_identified
ret c
ld a,(hl)
and $E7
cp $20 ;JR cc
jr z,waitloop_found_jr
cp $C2 ;JP cc
jr nz,waitloop_find_data_op_again ;Allow multiple data operations
; Skip the JP opcode
inc hl
waitloop_found_jr:
; Skip the JR opcode
inc hl
inc hl
; Advance past the JIT implementation and add its untaken cycles
push hl
ex de,hl
ld de,15-9
add hl,de
ld a,(waitloop_length_smc)
add.s a,(hl)
ld (waitloop_length_smc),a
ld e,(19-15)+9
jr waitloop_found_jump_next
waitloop_find_data_op_again:
ld a,e
sub 8
ld e,a
jr nc,waitloop_find_data_op_again_loop
dec d
jp waitloop_find_data_op_again_loop
waitloop_identified:
#ifdef DEBUG
push bc
pop af
push af
push ix
pop af
push af
APRINTF(WaitLoopIdentifiedMessage)
pop ix
pop bc
#endif
#ifdef FASTLOG
push ix
ld hl,2
add hl,sp
ld (hl),c
FASTLOG_EVENT(WAITLOOP_IDENTIFIED, 3)
#endif
; Get the end of the recompiled code to overwrite
pop.s hl
pop de ; Pop the return address
pop af ; Pop the target cycle count into A
pop de ; Pop the negative jump cycle count into D
; Store the target cycle count
sub d
inc hl
ld.s (hl),a
dec hl
; Store the length of the loop in cycles
waitloop_length_smc = $+1
add a,0
ld.s (hl),a
dec hl
; Store the cycle offset of the variable read from the end of the loop
add a,d
add a,b
cpl
add a,2
ld.s (hl),a
dec hl
dec hl
; Choose handler based on variable accessed
ld a,c
or a
ld bc,handle_waitloop_variable
jr z,_
ld bc,handle_waitloop_ly
rra
jr nc,_
ld bc,handle_waitloop_stat
_
ld.s (hl),bc
dec hl
ld.s (hl),$CD ;CALL handler
; Store the target jump address
dec hl
dec hl
ld.s (hl),ix
dec hl
ld.s (hl),$21 ;LD HL,target
jp.sis decode_jump_waitloop_return
waitloop_resolve_read_hram:
; Fast return for HRAM, treat as normal variable
bit 7,c
ret nz
; Check for special registers
ld a,c
; Allow LY and STAT
cp LY & $FF
jr z,_
cp STAT & $FF
jr z,_
; Disallow DIV and TIMA
cp DIV & $FF
ret z
cp TIMA & $FF
; Treat everything else as a normal variable
ld a,b
ret
_
; Resets Z flag
xor b
ret