Here's what I consider fun:
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Level B & 13D
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
level13D:
JSR SUB_CEILING ; force a ceiling for level 13D
level13F:
levelB:
LVLB:
REP #$20 ; 16 bit A
LDA $1462 ; layer 1 x
LSR
LSR
STA $22 ; layer 3 x
LDA $1464 ; layer 1 y
LSR
LSR
LSR
STA $24 ; layer 3 y
SEP #$20 ; 16 bit A
LDA #$19 ; set initial scanline count for HDMA gradient
SEC ; subtract the low x from 19
SBC $24 ; makes the gradient scroll with layer 3
STA $7FB400 ; rest of table is set in init
LDA $14 ; frame counter
AND #$01 ; get first bit
STA $04 ; store to scratch
ASL ; x2
CLC
ADC $04 ; plus self (so all possibilities being 0 and 3)
TAY ; stick in Y
LDA LVLB_HDMA_POINTERS,y
STA $05 ; stick these in scratch
LDA LVLB_HDMA_POINTERS+1,y
STA $06
LDA LVLB_HDMA_POINTERS+2,y
STA $07
LDA LVLB_HDMA_POINTERS+6,y
STA $08 ; stick values in scratch too
LDA LVLB_HDMA_POINTERS+7,y
STA $09
LDA LVLB_HDMA_POINTERS+8,y
STA $0A
REP #$20 ; 16 bit A
LDA #$00D5
SEC ; get initial scanline count
SBC $20
STA $00 ; store to scratch
LDA $010B ; low byte of level
CMP #$013D ; if we're in level 13D
BEQ LVLB3D ; set initial scanline count accordingly
CMP #$013F
BEQ LVLB_NO_HDMA; no HDMA (except gradient) on 13F
SEP #$20 ; 8 bit A
LDA $1F ; high byte of layer 2 x
CMP #$05 ; if 5 or above...
REP #$20 ; 16 bit A
LDA $00 ; load initial scanline count
BCS LVLB_2 ; offset accordingly. carry flag not affected by REP or LDA here
;First half of level B
CLC
ADC #$0080
LVLB_2: ; second half of level B
LVLB_CHECK_HDMA:
STA $02 ; store in scratch
CLC
ADC $24 ; add layer 3 y to it
STA $4204 ; store in division registers
LDY #$06 ; divide it by 6
STY $4206 ; these values are used MUCH later. doing it here so I don't have to waste cycles
LDA $02 ; get old value back
CMP #$0081
BCC LVLB_0 ; if <81, continue
SEC
SBC #$007F ; subtract 80 from total scanline count
AND #$007F ; max = 7F
SEP #$20 ; 8 bit A
STA [$05] ; set scanline count
LDA #$7F
LDY #$03 ; y = 3
STA [$05],y ; set second scanline count
LDA #$00 ; set HDMA pointer to beginning of table
STA $4342 ; set low byte of source
BRA LVLB_FIN
LVLB3D: ; Level 13D
CLC
ADC #$00A0
BRA LVLB_CHECK_HDMA
LVLB_NO_HDMA:
SEP #$20 ; 8 bit A
LDA #$08 ; skip HDMAing channel 4
TSB $0D9F ; set bit for channel 3
LDA #$10
TRB $0D9F ; clear for 5
RTS ; return
LVLB_0:
SEP #$20 ; 8 bit A
LDY #$03 ; y = 3
STA [$05],y ; store initial scanline count
LDA #$03 ; set HDMA pointer to second value of table
STA $4342 ; set low byte of source
LVLB_FIN:
REP #$20 ; 16 bit A
LDA $22 ; layer 3 x
STA [$08] ; set
SEP #$20 ; 8 bit A
LDA $9D ; check if game paused
BNE LVLB_TIMER_SKIP ; skip if so
LDA $14 ; frame timer
AND #$07 ; only DECREMENT timer every 8 frames
BNE LVLB_TIMER_SKIP
LDA $7EC28F ; timer
DEC A
BNE LVLB_TIMER
LDA #$06 ; reset timer at 0
STA $7EC28F
LDA $7EC290
INC A ; decrement secondary timer
CMP #$0E
BCC LVLB_TIMER2 ; if E+...
LDA #$00 ; reset to 00
LVLB_TIMER2:
STA $7EC290
BRA LVLB_TIMER_SKIP
LVLB_TIMER:
STA $7EC28F
LVLB_TIMER_SKIP:
LDA $4214 ; division quotient
STA $0C ; store to scratch
LDA $7EC28F ; division remainder
SEC
SBC $4216
BEQ LVLB_DIV4 ; if not 0...
BCS LVLB_DIV ; and not negative... continue
LVLB_DIV4:
CLC ; otherwise add 6
ADC #$06
INC $0C ; increment table offset
LVLB_DIV:
LDY #$06 ; y = 6
STA [$05],y ; set scanline count
LDA $7EC290 ; set x
CLC
ADC $0C ; add table offset
CMP #$0E ; if below E...
BCC LVLB_DIV2 ; continue
SEC ; else subtract D until it's D or below
LVLB_DIV3:
SBC #$0E
CMP #$0E
BCS LVLB_DIV3
LVLB_DIV2:
ASL A ; two byte table
CLC
ADC #$1A ; for table offset
TAX
LDY #$1A ; set y too
INC $08 ; increase pointer by 2
INC $08
REP #$20 ; 16 bit A
LVLB_LOOP: ; set values
LDA LVLB_HDMA,x ; get offset
CLC
ADC $22 ; add in layer 3 x
STA [$08],y ; set to table
DEX
DEX
DEY
DEY
BPL LVLB_LOOP
SEP #$20 ; 8 bit A
LDA $04 ; get frame number
TAX ; stick in x
LDA LVLB_HDMA_SOURCE,x
STA $4343 ; set high byte of source.
REP #$20 ; 16 bit A
LDA $02 ; total scanline count before water
CMP #$00E0
SEP #$20 ; 8 bit A
BCS LVLB_NO_HDMA_2 ; if above 00E0, skip HMDA for this frame (larger than the screen anyway)
; table needs to be set up every frame regardless, hence this check after instead of before table set up code
LDA #$18 ; channel 4, 5
TSB $0D9F ; Set HDMA channels
RTS
LVLB_NO_HDMA_2:
LDA #$08 ; skip HDMAing channel 4
TSB $0D9F ; set bit for channel 3
LDA #$10
TRB $0D9F ; clear for 5
RTS
LVLB_HDMA: ; table for HDMA effect on layer 3
db $00,$00,$FF,$FF,$FE,$FF,$FD,$FF,$FD,$FF,$FE,$FF,$FF,$FF,$00,$00,$01,$00,$02,$00,$03,$00,$03,$00,$02,$00,$01,$00 ; two copies made for efficiency
db $00,$00,$FF,$FF,$FE,$FF,$FD,$FF,$FD,$FF,$FE,$FF,$FF,$FF,$00,$00,$01,$00,$02,$00,$03,$00,$03,$00,$02,$00 ; calculates initial table offset and uses part of each table
LVLB_HDMA_POINTERS:
db $00,$B6,$7F ;frame 1, pointers + scanline count table
db $00,$B7,$7F ;frame 2
db $A0,$B6,$7F ;frame 1, values
db $A0,$B7,$7F ;frame 2
LVLB_HDMA_SOURCE:
db $B6,$B7