]> cloudbase.mooo.com Git - avrcpm.git/blobdiff - avrcpm/avr/z80.asm
From experimental:
[avrcpm.git] / avrcpm / avr / z80.asm
old mode 100755 (executable)
new mode 100644 (file)
index 873c922..b7e77ce
 #if defined atmega8
        .include "m8def.inc"
 #elif defined atmega168
-       .include "m8def.inc"
+       .include "m168def.inc"
 #else                               /* default */
        .include "m88def.inc"
        ;FUSE_H=0xDF
        ;FUSE_L=0xF7
 #endif
 .list
-
+.listmac
 
 #ifndef DRAM_DQ_ORDER                  /* If this is set to 1, the portbits  */
        #define DRAM_DQ_ORDER 0         /* for DRAM D1 and WE are swapped.    */
@@ -59,8 +59,9 @@
        .equ refr_vect = OC2Aaddr
 #endif
 
+#define DRAM_WORD_ACCESS 0     /* experimental */
 
-#define EM_Z80 0       /* we don't have any z80 instructions yet */
+#define EM_Z80 0               /* we don't have any z80 instructions yet */
 
 .equ MMC_DEBUG   = 0
 .equ INS_DEBUG   = 0
 .equ ram_a6 = 6
 .equ ram_a7 = 7
 
-.equ RAM_AH_MASK = 0xE0             ; ram_a[7..5]
-.equ PD_OUTPUT_MASK = 0xFE
+.equ P_OE  = PORTD
+.equ P_AH  = PORTD
+.equ P_A8  = PORTD
+.equ P_MMC_CS = PORTD
+                    ; ram_a[7..5]
+.equ RAM_AH_MASK = (1<<ram_a8)|(1<<ram_a7)|(1<<ram_a6)|(1<<ram_a5)
+.equ PD_OUTPUT_MASK = (1<<mmc_cs) | (1<<ram_oe) | RAM_AH_MASK
 
 
 ;Port B
 .equ ram_ras = 5
 .equ mmc_sck = 5
 
-.equ RAM_AL_MASK = 0x1F             ; ram_a[4..0]
-.equ PB_OUTPUT_MASK = 0x3F
+
+.equ P_RAS = PORTB
+.equ P_AL  = PORTB
+                       ; ram_a[4..0]
+.equ RAM_AL_MASK = (1<<ram_a4)|(1<<ram_a3)|(1<<ram_a2)|(1<<ram_a1)|(1<<ram_a0)
+.equ PB_OUTPUT_MASK = (1<<ram_ras) | RAM_AL_MASK
 
 ;Port C
 #if DRAM_DQ_ORDER == 1
 .equ ram_d3 =  3
 .equ ram_cas=  5
 
+.equ P_DQ  = PORTC
+.equ P_W   = PORTC
+.equ P_CAS = PORTC
 .equ RAM_DQ_MASK = (1<<ram_d3)|(1<<ram_d2)|(1<<ram_d1)|(1<<ram_d0)
 .equ PC_OUTPUT_MASK = (1<<ram_cas)|(1<<ram_w)
 
 .equ ZFL_C     =       0
 
 ;Register definitions
-.def z_a      = r2
+.undef xl              ;r26
+.undef xh              ;r27
+
+.def   _tmp    = r0    ;  0
+.def   _0      = r1
+
+.def   z_c     = r4
+.def   z_b     = r5
+.def   z_e     = r6
+.def   z_d     = r7
+.def   z_l     = r8
+.def   z_h     = r9
+.def   z_a     = r10
+
+.def   insdecl = r12   ;
+.def   insdech = r13   ;
+.def   z_spl   = r14
+.def   z_sph   = r15   ;
+.def   temp    = r16   ;
+.def   temp2   = r17   ;
+.def   temp3   = r18
+.def   temp4   = r19
+.def   z_flags = r20   ;
+                       ;
+.def   opl     = r22   ;
+.def   oph     = r23   ;
+.def   z_pcl   = r24   ;
+.def   z_pch   = r25   ;
+.def   adrl    = r26   ;
+.def   adrh    = r27   ;
+; yl           ;r28
+; yh           ;r29
+; zl           ;r30    ;
+; zh           ;r31    ;
+
+
+#if 0
+;Register definitions
+.def   _tmp    = r0    ;  0
+.def   _0      = r1
+;.def z_a      = r2
 .def z_b      = r3
 .def z_c      = r4
 .def z_d      = r5
 .def z_e      = r6
 .def z_l      = r7
 .def z_h      = r8
-.def z_spl    = r9
-.def z_sph    = r10
-
-.def dsk_trk  = r11
-.def dsk_sec  = r12
-.def dsk_dmah = r13
-.def dsk_dmal = r14
-
-;.def parityb  = r15
-
-.def temp     = R16    ;The temp register
-.def temp2    = R17    ;Second temp register
-.def trace    = r18
-.def opl      = r19
-.def oph      = r20
-.def adrl     = r21
-.def adrh     = r22
-.def insdecl  = r23
-.def z_pcl    = r24
-.def z_pch    = r25
-.undef xl
-.undef xh
-.def insdech  = r26
-.def z_flags  = r27
+;.def z_spl    = r9
+;.def z_sph    = r10
+.def   z_a     = r11
+.def   _wl     = r12
+.def   _wh     = r13
+.def   z_spl   = r14
+.def   z_sph   = r15   ;
+.def   temp    = r16   ;
+.def   temp2   = r17   ;
+.def   temp3   = r18
+.def   temp4   = r19
+.def   z_flags = r20   ;
+.def   trace   = r21   ;
+.def   insdecl = r22   ;
+.def   insdech = r23   ;
+.def   z_pcl   = r24   ;
+.def   z_pch   = r25   ;
+.undef xl              ;r26
+.undef xh              ;r27
+.undef yl              ;r28
+.undef yh              ;r29
+.def opl       = r26   ;
+.def oph       = r27   ;
+.def adrl      = r28   ;
+.def adrh      = r29   ;
+; zl           ;r30    ;
+; zh           ;r31    ;
+#endif
+
+#if defined __ATmega8__
+       .equ    flags = TWBR
+#else
+       .equ    flags = GPIOR0
+#endif
 
+       .equ    hostact = 7             ;host active flag
+       .equ    hostwrt = 6             ;host written flag
+       .equ    rsflag  = 5             ;read sector flag
+       .equ    readop  = 4             ;1 if read operation
+       .equ    trace   = 0
 
 ; This is the base z80 port address for clock access
 #define        TIMERPORT 0x40
 
 
 
-       ;SRAM
-       .dseg
-       
-;Sector buffer for 512 byte reads/writes from/to SD-card
-
-sectbuff:
-    .byte   512
-
-
 .cseg
 .org 0
        rjmp start              ; reset vector
@@ -206,21 +265,23 @@ start:
 ; - Kill wdt
        wdr
 #if defined __ATmega8__
-       ldi temp,0
-       out MCUCSR,temp
+       out MCUCSR,_0
        
        ldi temp,(1<<WDCE) | (1<<WDE)
        out WDTCSR,temp
        ldi temp,(1<<WDCE)
        out WDTCSR,temp
+       ldi temp,(1<<PUD)       ;disable pullups
+       out SFIOR,temp
 #else
-       ldi temp,0
-       out MCUSR,temp
+       out MCUSR,_0
 
        ldi temp,(1<<WDCE) | (1<<WDE)
        sts WDTCSR,temp
        ldi temp,(1<<WDCE)
        sts WDTCSR,temp
+       ldi temp,(1<<PUD)       ;disable pullups
+       out MCUCR,temp
 #endif
 
 ; - Setup Ports
@@ -231,19 +292,18 @@ start:
        ldi temp,PC_OUTPUT_MASK
        out DDRC,temp
 
-       sbi PORTC,ram_w
-       sbi PORTC,ram_cas
-       sbi PORTB,ram_ras
-       sbi PORTD,ram_oe
-       sbi PORTD,mmc_cs
+       sbi P_W,ram_w
+       sbi P_CAS,ram_cas
+       sbi P_RAS,ram_ras
+       sbi P_OE,ram_oe
+       sbi P_MMC_CS,mmc_cs
 
 
 ; - Init serial port
 
-       ldi     temp,0          ; reset receive buffer
-       sts     rxcount,temp
-       sts     rxidx_r,temp
-       sts     rxidx_w,temp
+       sts     rxcount,_0      ; reset receive buffer
+       sts     rxidx_r,_0
+       sts     rxidx_w,_0
        
 
 #if defined __ATmega8__
@@ -291,10 +351,9 @@ start:
 
        ldi     zl,low(timer_base)
        ldi     zh,high(timer_base)
-       ldi     temp,0
        ldi     temp2,timer_size
 ti_loop:
-       st      z+,temp
+       st      z+,_0
        dec     temp2
        brne    ti_loop
 
@@ -325,16 +384,8 @@ ti_loop:
 
 
 .if BOOTWAIT
-       ldi temp,0
-bootwait1:                     
-       push temp               ;2
-       ldi temp,0              ;1
-bootwait2:                     
-       dec temp                ;1
-       brne bootwait2          ;2
-       pop temp                ;2
-       dec temp                ;1
-       brne bootwait1          ;2   (3*256 + 5) * 256 = 198K cycles
+       ldi temp,10
+       rcall delay_ms
 
 .endif
 
@@ -349,7 +400,7 @@ bootwait2:
 
 .if MEMTEST
        rcall printstr
-       .db "Testing RAM...",13,0
+       .db "Testing RAM: fill...",0,0
 
 ;Fill RAM
        ldi adrl,0
@@ -358,11 +409,20 @@ ramtestw:
        mov temp,adrh
        eor temp,adrl
        rcall memwritebyte
-       ldi temp,1
-       ldi temp2,0
-       add adrl,temp
-       adc adrh,temp2
+       adiw adrl,1
        brcc ramtestw
+       rcall printstr
+       .db "wait...",0
+
+       ldi     temp2,8
+ramtestwl:
+       ldi     temp,255
+       rcall   delay_ms
+       dec     temp2
+       brne    ramtestwl
+
+       rcall printstr
+       .db "reread...",13,0,0
 
 ;re-read RAM
        ldi adrl,0
@@ -388,10 +448,7 @@ ramtestr:
        ldi temp,13
        rcall uartPutc
 ramtestrok:
-       ldi temp,1
-       ldi temp2,0
-       add adrl,temp
-       adc adrh,temp2
+       adiw adrl,1
        brcc ramtestr
 
 .endif
@@ -403,10 +460,7 @@ ramtestrok:
 ramfillw:
        ldi temp,0xcb
        rcall memwritebyte
-       ldi temp,1
-       ldi temp2,0
-       add adrl,temp
-       adc adrh,temp2
+       adiw adrl,1
        brcc ramfillw
 .endif
 
@@ -418,8 +472,8 @@ ramfillw:
        rcall mmcReadSect
 
 ;Save to Z80 RAM (only 128 bytes because that's retro)
-       ldi zl,low(sectbuff)
-       ldi zh,high(sectbuff)
+       ldi zl,low(hostbuf)
+       ldi zh,high(hostbuf)
        ldi adrh,0x20
        ldi adrl,0x00
 iplwriteloop:
@@ -429,15 +483,12 @@ iplwriteloop:
        rcall memWriteByte
        pop zl
        pop zh
-       ldi temp,1
-       ldi temp2,0
-       add adrl,temp
-       adc adrh,temp2
-       cpi zl,low(sectbuff+128)
+       adiw adrl,1
+       cpi zl,low(hostbuf+128)
        brne iplwriteloop
-       cpi zh,high(sectbuff+128)
+       cpi zh,high(hostbuf+128)
        brne iplwriteloop
-
+       rcall   dsk_boot                ;init (de)blocking buffer
 
 
 ;Init z80
@@ -446,17 +497,17 @@ iplwriteloop:
        ldi temp,0x20
        mov z_pch,temp
 
-       ldi trace,0
+       cbi     flags,trace
        rcall printstr
        .db 13,"Ok, CPU is live!",13,0,0
 
 main:
-       ldi trace,0
+       cbi     flags,trace
        cpi z_pch,1
        brlo notraceon
        cpi z_pch,$dc
        brsh notraceon
-       ldi trace,1
+       sbi     flags,trace
 notraceon:
 
 
@@ -478,15 +529,14 @@ noprintpc:
 .endif
 
        ; *** Stage 1: Fetch next opcode
-       mov adrl,z_pcl
-       mov adrh,z_pch
+       movw adrl,z_pcl
        rcall memReadByte
        adiw z_pcl,1
 
 
 .if INS_DEBUG
-       cpi trace,0
-       breq notrace1
+       sbis    flags,trace
+       rjmp    notrace1
        rcall printstr
        .db "PC=",0
        push temp
@@ -502,19 +552,16 @@ notrace1:
 .endif
 
        ; *** Stage 2: Decode it using the ins_table.
-       ldi temp2,0
-       ldi zl,low(inst_table*2)
        ldi zh,high(inst_table*2)
+       mov zl,temp
        add zl,temp
-       adc zh,temp2
-       add zl,temp
-       adc zh,temp2
+       adc zh,_0
        lpm insdecl,Z+
        lpm insdech,Z
 
 .if INS_DEBUG
-       cpi trace,0
-       breq notrace2
+       sbis    flags,trace
+       rjmp    notrace2
        rcall printstr
        .db ", decoded=",0
        mov temp,insdech
@@ -529,23 +576,16 @@ notrace2:
        ; *** Stage 3: Fetch operand. Use the fetch jumptable for this.
        mov temp,insdecl
        andi temp,0x1F
-       cpi temp,0
        breq nofetch
-       ldi temp2,0
-       lsl temp
-       ldi zl,low(fetchjumps*2)
-       ldi zh,high(fetchjumps*2)
+       ldi zl,low(fetchjumps)
+       ldi zh,high(fetchjumps)
        add zl,temp
-       adc zh,temp2
-       lpm temp,Z+
-       lpm temp2,Z
-       mov zl,temp
-       mov zh,temp2
+       adc zh,_0
        icall
 
 .if INS_DEBUG
-       cpi trace,0
-       breq notrace3
+       sbis    flags,trace
+       rjmp    notrace3
        rcall printstr
        .db "pre: oph:l=",0
        mov temp,oph
@@ -561,23 +601,18 @@ nofetch:
        ; *** Stage 4: Execute operation :) Use the op jumptable for this.
        mov temp,insdech
        andi temp,0xFC
-       lsr temp
-       cpi temp,0
        breq nooper
-       ldi zl,low(opjumps*2)
-       ldi zh,high(opjumps*2)
-       ldi temp2,0
+       lsr temp
+       lsr temp
+       ldi zl,low(opjumps)
+       ldi zh,high(opjumps)
        add zl,temp
-       adc zh,temp2
-       lpm temp,Z+
-       lpm temp2,Z
-       mov zl,temp
-       mov zh,temp2
+       adc zh,_0
        icall
 
 .if INS_DEBUG
-       cpi trace,0
-       breq notrace4
+       sbis    flags,trace
+       rjmp    notrace4
        rcall printstr
        .db ",post:oph:l=",0
        mov temp,oph
@@ -591,25 +626,21 @@ nooper:
        ; *** Stage 5: Store operand. Use the store jumptable for this.
        swap insdecl
        swap insdech
-       mov temp,insdecl
+       movw temp,insdecl
        andi temp,0x0E
-       andi insdech,0x30
-       or temp,insdech
+       andi temp2,0x30
+       or temp,temp2
        breq nostore
-       ldi zl,low(storejumps*2)
-       ldi zh,high(storejumps*2)
-       ldi temp2,0
+       lsr temp
+       ldi zl,low(storejumps)
+       ldi zh,high(storejumps)
        add zl,temp
-       adc zh,temp2
-       lpm temp,Z+
-       lpm temp2,Z
-       mov zl,temp
-       mov zh,temp2
+       adc zh,_0
        icall
 
 .if INS_DEBUG
-       cpi trace,0
-       breq notrace5
+       sbis    flags,trace
+       rjmp    notrace5
        rcall printstr
        .db ", stored.",0
 notrace5:
@@ -618,8 +649,8 @@ notrace5:
 nostore:
 
 .if INS_DEBUG
-       cpi trace,0
-       breq notrace6
+       sbis    flags,trace
+       rjmp    notrace6
        rcall printstr
        .db 13,0
 notrace6:
@@ -633,16 +664,67 @@ notrace6:
 
 ;The hw is modelled to make writing a CPM BIOS easier.
 ;Ports:
-;0 - Con status. Returns 0xFF if the UART has a byte, 0 otherwise.
-;1 - Console input, aka UDR.
-;2 - Console output
-;16 - Track select
-;18 - Sector select
-;20 - Write addr l
-;21 - Write addr h
-;22 - Trigger - write 1 to read, 2 to write a sector using the above info.
-;      This will automatically move track, sector and dma addr to the next sector.
+;0     - Con status. Returns 0xFF if the UART has a byte, 0 otherwise.
+;1     - Console input, aka UDR.
+;2     - Console output
+;15    - Disk select
+;16,17         - Track select
+;18    - Sector select
+;20    - Write addr l
+;21    - Write addr h
+;22    - Trigger - write 1 to read, 2 to write a sector using the above info;
+;                      3 - 5, write to allocated/dirctory/unallocated
+
+       .equ    READ_FUNC  = 7
+       .equ    WRITE_FUNC = 6
+       .equ    BOOT_FUNC  = 5
+       .equ    HOME_FUNC  = 4
+
+
+
+;*****************************************************
+;*         CP/M to host disk constants               *
+;*****************************************************
+       .equ    blksize = 1024          ;CP/M allocation size
+       .equ    hostsize = 512          ;host disk sector size
+;      .equ    hostspt = 20            ;host disk sectors/trk
+       .equ    hostblk = hostsize/128  ;CP/M sects/host buff
+;      .equ    CPMSPT = hostblk*hostspt;CP/M sectors/track
+       .equ    CPMSPT = 26             ;
+       .equ    SECMSK = hostblk-1      ;sector mask
+       .equ    SECSHF = log2(hostblk)  ;sector shift
+
+;*****************************************************
+;*        BDOS constants on entry to write           *
+;*****************************************************
+       .equ    WRALL = 0               ;write to allocated
+       .equ    WRDIR = 1               ;write to directory
+       .equ    WRUAL = 2               ;write to unallocated
+       .equ    WRTMSK= 3               ;write type mask
+
+
+       .dseg
+
+seekdsk:       .byte   1       ;seek disk number
+seektrk:       .byte   2       ;seek track number
+seeksec:       .byte   1       ;seek sector number
+
+hostdsk:       .byte   1       ;host disk number
+hostlba:       .byte   2       ;host track number
+
+unacnt:                .byte   1       ;unalloc rec cnt
+unadsk:                .byte   1       ;last unalloc disk
+unatrk:                .byte   2       ;last unalloc track
+unasec:                .byte   1       ;last unalloc sector
 
+erflag:                .byte   1       ;error reporting
+wrtype:                .byte   1       ;write operation type
+dmaadr:                .byte   2       ;last dma address
+hostbuf:       .byte   hostsize;host buffer (from/to SD-card)
+
+
+       .cseg
+       
 ;Called with port in temp2. Should return value in temp.
 portRead:
        cpi temp2,0
@@ -667,13 +749,16 @@ portWrite:
        cpi temp2,2
        breq conOut
        cpi temp2,16
-       breq dskTrackSel
+       breq dskTrackSel_l
+       cpi temp2,17
+       breq dskTrackSel_h
        cpi temp2,18
        breq dskSecSel
        cpi temp2,20
        breq dskDmaL
        cpi temp2,21
        breq dskDmaH
+
        cpi temp2,22
        breq dskDoIt
        
@@ -713,203 +798,382 @@ conOut:
 
 
 
-dskTrackSel:
-       mov dsk_trk,temp
+dskTrackSel_l:
+       sts seektrk,temp
+       sts seektrk+1,_0
+       ret
+
+dskTrackSel_h:
+       sts seektrk+1,temp
        ret
 
 dskSecSel:
-       mov dsk_sec,temp
+       sts seeksec,temp
        ret
 
 dskDmal:
-       mov dsk_dmal,temp
+       sts dmaadr,temp
        ret
 
 dskDmah:
-       mov dsk_dmah,temp
+       sts dmaadr+1,temp
        ret
 
 dskDoIt:
+       ;See what has to be done.
+       sbrc    temp,READ_FUNC
+        rjmp   dsk_read
+       sbrc    temp,WRITE_FUNC
+        rjmp   dsk_write
+       sbrc    temp,HOME_FUNC
+        rjmp   dsk_home
+       sbrc    temp,BOOT_FUNC
+        rjmp   dsk_boot
+
+       rcall   printstr
+       .db "DISK I/O: Invalid Function code: ",0
+       rcall   printhex
+       rjmp    haltinv
+
+dsk_boot:
+       cbi     flags,hostact           ;host buffer inactive
+       sts     unacnt,_0               ;clear unalloc count
+       ret
+
+dsk_home:
+       sbis    flags,hostwrt           ;check for pending write
+       cbi     flags,hostact           ;clear host active flag
+       ret
+
+
+dsk_read:
+
+.if 0
+       rcall   timer_quit
+       rcall printstr
+       .db " In",0
+.endif
+
 .if DISK_DEBUG
        push temp
        rcall printstr
+       .db 13,0
+       rcall printstr
        .db "Disk read: track ",0
-       mov temp,dsk_trk
+       lds temp,seektrk+1
+       rcall printhex
+       lds temp,seektrk
        rcall printhex
        rcall printstr
-       .db " sector ",0
-       mov temp,dsk_sec
+       .db " sector ",0,0
+       lds temp,seeksec
        rcall printhex
        rcall printstr
-       .db " dma-addr ",0
-       mov temp,dsk_dmah
+       .db " dma-addr ",0,0
+       lds temp,dmaadr+1
        rcall printhex
-       mov temp,dsk_dmal
+       lds temp,dmaadr
        rcall printhex
-       rcall printstr
-       .db ".",13,0
+;      rcall printstr
+;      .db ".",13,0,0
        pop temp
 .endif
+       sts     unacnt,_0
+       sbi     flags,readop            ;read operation
+       sbi     flags,rsflag            ;must read data
+       ldi     temp,WRUAL              ;write type
+       sts     wrtype,temp             ;treat as unalloc
+       rjmp    dsk_rwoper              ;to perform the read
 
-       ;First, convert track/sector to an LBA address (in 128byte blocks)
-       push temp
-       mov adrl,dsk_sec
-       ldi adrh,0
-       mov temp2,dsk_trk
-dskXlateLoop:
-       cpi temp2,0
-       breq dskXlateLoopEnd
-       ldi temp,26
-       add adrl,temp
-       ldi temp,0
-       adc adrh,temp
-       dec temp2
-       rjmp dskXlateLoop
-dskXlateLoopEnd:
-       pop temp
 
-       ;Now, see what has to be done.
-       cpi temp,1
-       breq dskDoItRead
-       cpi temp,2
-       breq dskDoItWrite
-
-dskDoItRead:
-       push adrl
-       ;Convert from 128-byte LBA blocks to 512-byte LBA blocks
-       lsr adrh
-       ror adrl
-       lsr adrh
-       ror adrl
-       ;Read 512-byte sector
-       rcall mmcReadSect
-       pop adrl
-
-       ;Now, move the correct portion of the sector from AVR ram to Z80 ram
-       ldi zl,low(sectbuff)
-       ldi zh,high(sectbuff)
-       ldi temp,128
-       ldi temp2,0
-       sbrc adrl,0
-        add zl,temp
-       sbrc adrl,0
-        adc zh,temp2
-       sbrc adrl,1
-        inc zh
-
-       mov adrh,dsk_dmah
-       mov adrl,dsk_dmal
-
-       ldi temp2,128
-dskDoItReadMemLoop:
-       push temp2
-       ld temp,z+
-       push zh
-       push zl
-       rcall memWriteByte
-       pop zl
-       pop zh
-       ldi temp,1
-       ldi temp2,0
-       add adrl,temp
-       adc adrh,temp2
-       pop temp2
-       dec temp2
-       brne dskDoItReadMemLoop
-       ret
+dsk_write:
+       ;write the selected CP/M sector
 
-dskDoItWrite:
-;The write routines is a bit naive: it'll read the 512-byte sector the 128byte CPM-sector
-;resides in into memory, will overwrite the needed 128 byte with the Z80s memory buffer
-;and will then write it back to disk. In theory, this would mean that every 512 bytes
-;written will take 4 write cycles, while theoretically the writes could be deferred so we
-;would only have to do one write cycle.
+       andi    temp,WRTMSK
+       sts     wrtype,temp             ;save write type
+       cbi     flags,readop            ;not a read operation
 
 .if DISK_DEBUG
        push temp
        rcall printstr
-       .db "Disk write: track ",0
-       mov temp,dsk_trk
+       .db 13,0
+       rcall printstr
+       .db "Disk write: track ",0,0
+       lds temp,seektrk+1
+       rcall printhex
+       lds temp,seektrk
        rcall printhex
        rcall printstr
-       .db " sector ",0
-       mov temp,dsk_sec
+       .db " sector ",0,0
+       lds temp,seeksec
        rcall printhex
        rcall printstr
-       .db " dma-addr ",0
-       mov temp,dsk_dmah
+       .db " dma-addr ",0,0
+       lds temp,dmaadr+1
        rcall printhex
-       mov temp,dsk_dmal
+       lds temp,dmaadr
        rcall printhex
        rcall printstr
-       .db ".",13,0
+       .db " wrtype ",0,0
+       lds temp,wrtype
+       rcall printhex
+;      rcall printstr
+;      .db ".",13,0,0
        pop temp
 .endif
 
+       cpi     temp,WRUAL              ;write unallocated?
+       brne    dsk_chkuna              ;check for unalloc
+
+;      write to unallocated, set parameters
+       ldi     temp,blksize/128        ;next unalloc recs
+       sts     unacnt,temp
+       lds     temp,seekdsk            ;disk to seek
+       sts     unadsk,temp             ;unadsk = sekdsk
+       lds     temp,seektrk
+       sts     unatrk,temp             ;unatrk = sectrk
+       lds     temp,seektrk+1
+       sts     unatrk+1,temp           ;unatrk = sectrk
+       lds     temp,seeksec
+       sts     unasec,temp             ;unasec = seksec
+;
+dsk_chkuna:
+       ;check for write to unallocated sector
+       lds     temp,unacnt             ;any unalloc remain?
+       tst     temp
+       breq    dsk_alloc               ;skip if not
+
+;      more unallocated records remain
+       dec     temp                    ;unacnt = unacnt-1
+       sts     unacnt,temp
+       lds     temp,seekdsk            ;same disk?
+       lds     temp2,unadsk
+       cp      temp,temp2              ;seekdsk = unadsk?
+       brne    dsk_alloc               ;skip if not
+
+;      disks are the same
+       lds     temp,unatrk
+       lds     temp2,unatrk+1
+       lds     temp3,seektrk
+       lds     temp4,seektrk+1
+       cp      temp,temp3              ;seektrk = unatrk?
+       cpc     temp2,temp4
+       brne    dsk_alloc               ;skip if not
+
+;      tracks are the same
+       lds     temp,seeksec            ;same sector?
+       lds     temp2,unasec
+       cp      temp,temp2              ;seeksec = unasec?
+       brne    dsk_alloc               ;skip if not
+
+;      match, move to next sector for future ref
+       inc     temp2                   ;unasec = unasec+1
+       sts     unasec,temp2
+       cpi     temp2,CPMSPT            ;end of track? (count CP/M sectors)
+       brlo    dsk_noovf               ;skip if no overflow
+
+;      overflow to next track
+       sts     unasec,_0               ;unasec = 0
+       lds     temp,unatrk
+       lds     temp2,unatrk+1
+       subi    temp, low(-1)           ;unatrk = unatrk+1
+       sbci    temp2,high(-1)
+       sts     unatrk,temp
+       sts     unatrk+1,temp2
+;
+dsk_noovf:
+       cbi     flags,rsflag            ;rsflag = 0
+       rjmp    dsk_rwoper              ;to perform the write
+;
+dsk_alloc:
+       ;not an unallocated record, requires pre-read
+       sts     unacnt,_0               ;unacnt = 0
+       sbi     flags,rsflag            ;rsflag = 1
+
+;*****************************************************
+;*     Common code for READ and WRITE follows       *
+;*****************************************************
+
+dsk_rwoper:
+       ;enter here to perform the read/write
+       sts     erflag,_0       ;no errors (yet)
+
+       ;Convert track/sector to an LBA address (in 128byte blocks)
+
+       lds     adrl,seeksec            ;
+       ldi     adrh,0                  ;
+       lds     temp3,seektrk           ;
+       lds     temp4,seektrk+1         ;
+       ldi     temp,CPMSPT             ;
+       mul     temp3,temp              ;
+       add     adrl,r0                 ;
+       adc     adrh,r1                 ;
+       mul     temp4,temp              ;
+       add     adrh,r0                 ;adrh:adrl := sec + trk * SectorsPerTrack
+       clr     _0
+
+       mov     temp,adrl
+       andi    temp,SECMSK             ;mask buffer number
+       push    temp                    ;save for later
+.if DISK_DEBUG
+       rcall printstr
+       .db "; bufnr: ",0,0
+       rcall printhex
+.endif
+
+       ;Convert from CP/M LBA blocks to host LBA blocks
+       ldi temp,SECSHF
+dsk_sh1:
+       lsr     adrh
+       ror     adrl
+       dec     temp
+       brne    dsk_sh1
+                                       ;adrh:adrl = host block to seek
+;      active host sector?
+       sbis    flags,hostact           ;host active?
+        rjmp   dsk_filhst              ;fill host if not
+
+;      host buffer active, same as seek buffer?
+       lds     temp,seekdsk
+       lds     temp3,hostdsk           ;same disk?
+       cp      temp,temp3              ;seekdsk = hostdsk?
+       brne    dsk_nomatch
+
+;      same disk, same block?
+       lds     temp3,hostlba
+       lds     temp4,hostlba+1
+       cp      adrl,temp3
+       cpc     adrh,temp4      
+       breq    dsk_match
+;
+dsk_nomatch:
+       ;proper disk, but not correct sector
+       sbic    flags,hostwrt           ;host written?
+        rcall  dsk_writehost           ;clear host buff
+
+dsk_filhst:
+       ;may have to fill the host buffer
+       lds     temp,seekdsk
+       sts     hostdsk,temp
+       sts     hostlba,adrl
+       sts     hostlba+1,adrh
+
+       sbic    flags,rsflag            ;need to read?
+        rcall  dsk_readhost            ;yes, if 1
+       cbi     flags,hostwrt           ;no pending write
+
+dsk_match:
+       sbi     flags,hostact           ;host buffer active now
+
+       ;copy data to or from buffer
+       ldi     zl,low(hostbuf)
+       ldi     zh,high(hostbuf)
+       ldi     temp,128
+       pop     temp2                   ;get buffer number (which part of hostbuf)
+       mul     temp2,temp
+       add     zl,r0                   ;offset in hostbuf
+       adc     zh,r1
+.if DISK_DEBUG
+       push    r0
+       push    r1
+       clr     _0
+       rcall printstr
+       .db "; host buf adr: ",0,0
+       pop     temp
+       rcall printhex
+       pop     temp
+       rcall printhex
+.endif
+       clr     _0
 
-       push adrl
-       push adrh
-       ;Convert from 128-byte LBA blocks to 512-byte LBA blocks
-       lsr adrh
-       ror adrl
-       lsr adrh
-       ror adrl
-       ;Read 512-byte sector
-       rcall mmcReadSect
-       pop adrh
-       pop adrl
-
-       push adrl
-       push adrh
-
-;Copy the data from the Z80 DMA buffer in external memory to the right place in the
-;sector buffer.
-       ;Now, move the correct portion of the sector from AVR ram to Z80 ram
-       ldi zl,low(sectbuff)
-       ldi zh,high(sectbuff)
-       ldi temp,128
-       ldi temp2,0
-       sbrc adrl,0
-        add zl,temp
-       sbrc adrl,0
-        adc zh,temp2
-       sbrc adrl,1
-        inc zh
-       mov adrh,dsk_dmah
-       mov adrl,dsk_dmal
-       ldi temp2,128
-dskDoItWriteMemLoop:
-       push temp2
+       lds     adrl,dmaadr
+       lds     adrh,dmaadr+1
+       push    yl
+       ldi     yl,128                  ;length of move
+       sbic    flags,readop            ;which way?
+        rjmp   dsk_rmove               ;skip if read
 
-       push zh
-       push zl
+;      mark write operation
+       sbi     flags,hostwrt           ;hostwrt = 1
+dsk_wmove:
        rcall memReadByte
-       pop zl
-       pop zh
        st z+,temp
-       ldi temp,1
-       ldi temp2,0
-       add adrl,temp
-       adc adrh,temp2
+       adiw adrl,1
+       dec yl
+       brne dsk_wmove
+       rjmp    dsk_rwmfin
+       
+dsk_rmove:
+       ld      temp,z+
+       rcall   memWriteByte
+       adiw    adrl,1
+       dec     yl
+       brne    dsk_rmove
+dsk_rwmfin:
+       pop     yl
+;      data has been moved to/from host buffer
+       lds     temp,wrtype     ;write type
+       cpi     temp,WRDIR      ;to directory?
+       breq    dsk_wdir
+       lds     temp,erflag
+.if 0
+       rcall   timer_quit
+       rcall printstr
+       .db " Out",0
+.endif
+       ret                     ;no further processing
+dsk_wdir:
+;      clear host buffer for directory write
+       lds     temp,erflag
+       tst     temp            ;errors?
+       breq    dsk_wdir1
+       ret                     ;skip if so
+dsk_wdir1:
+       rcall   dsk_writehost   ;clear host buff
+       cbi     flags,hostwrt   ;buffer written
+       lds     temp,erflag
+       ret
+
+
+;*****************************************************
+;*     WRITEhost performs the physical write to     *
+;*     the host disk, READhost reads the physical   *
+;*     disk.                                        *
+;*****************************************************
+
+dsk_writehost:
+       ;hostdsk = host disk #, hostlba = host block #.
+       ;Write "hostsize" bytes from hostbuf and return 
+       ;error flag in erflag.
+       ;Return erflag non-zero if error
+       
+       push    adrh
+       push    adrl
+       lds     adrl,hostlba
+       lds     adrh,hostlba+1
+       rcall   mmcWriteSect
+       pop     adrl
+       pop     adrh
+       sts     erflag,_0
+       ret
 
-       pop temp2
-       dec temp2
-       brne dskDoItWriteMemLoop
+dsk_readhost:
+       ;hostdsk = host disk #, hostlba = host block #.
+       ;Read "hostsiz" bytes into hostbuf and return 
+       ;error flag in erflag.
 
-       pop adrh
-       pop adrl
+       push    adrh
+       push    adrl
+       lds     adrl,hostlba
+       lds     adrh,hostlba+1
+       rcall   mmcReadSect
+       pop     adrl
+       pop     adrh
+       sts     erflag,_0
+       ret
 
-       ;Convert from 128-byte LBA blocks to 512-byte LBA blocks
-       lsr adrh
-       ror adrl
-       lsr adrh
-       ror adrl
-       ;Write the sector back.
-       rcall mmcWriteSect
 
-       ;All done :)
-       ret
-       
+;***************************************************************************
 
 ; ----------------- MMC/SD routines ------------------
 
@@ -974,30 +1238,31 @@ mmcInit:
        out SPCR,temp
        
        ;Init start: send 80 clocks with cs disabled
-       sbi PORTD,mmc_cs
+       sbi P_MMC_CS,mmc_cs
 
-       ldi temp2,20
+;      ldi temp2,20
+       ldi temp2,10     ; exactly 80 clocks
 mmcInitLoop:
        mov temp,temp2
        rcall mmcByte
        dec temp2
        brne mmcInitLoop
 
-       cbi PORTD,mmc_cs
+       cbi P_MMC_CS,mmc_cs
        rcall mmcByteNoSend
        rcall mmcByteNoSend
        rcall mmcByteNoSend
        rcall mmcByteNoSend
        rcall mmcByteNoSend
        rcall mmcByteNoSend
-       sbi PORTD,mmc_cs
+       sbi P_MMC_CS,mmc_cs
        rcall mmcByteNoSend
        rcall mmcByteNoSend
        rcall mmcByteNoSend
        rcall mmcByteNoSend
 
        ;Send init command
-       cbi PORTD,mmc_cs
+       cbi P_MMC_CS,mmc_cs
        ldi temp,0xff   ;dummy
        rcall mmcByte
        ldi temp,0xff   ;dummy
@@ -1017,19 +1282,19 @@ mmcInitLoop:
        ldi temp,0xff   ;return byte
        rcall mmcByte
 
-       ldi temp2,0
-       rcall mmcWaitResp
+       ldi temp2,0                     ;Error Code 0
+       rcall mmcWaitResp               ;Test on CMD0 is OK
 
-       sbi PORTD,mmc_cs
+       sbi P_MMC_CS,mmc_cs             ;disable /CS
        rcall mmcByteNoSend
 
 
 ;Read OCR till card is ready
-       ldi temp2,150
+       ldi temp2,20                    ;repeat counter
 mmcInitOcrLoop:        
        push temp2
 
-       cbi PORTD,mmc_cs
+       cbi P_MMC_CS,mmc_cs             ;enable /CS
        ldi temp,0xff   ;dummy
        rcall mmcByte
        ldi temp,0x41   ;cmd
@@ -1042,33 +1307,38 @@ mmcInitOcrLoop:
        rcall mmcByte
        ldi temp,0      ;pyl
        rcall mmcByte
-       ldi temp,0x95   ;crc
+;      ldi temp,0x95                   ;crc
+       ldi temp,0x01                   ;crc
        rcall mmcByte
        rcall mmcByteNoSend
 
        ldi temp2,1
-       rcall mmcWaitResp
+       rcall mmcWaitResp               ;wait until mmc-card send a byte <> 0xFF
+                                                       ;the first answer must be 0x01 (Idle-Mode)
        cpi temp,0
-       breq mmcInitOcrLoopDone
+       breq mmcInitOcrLoopDone ;second answer is 0x00 (Idle-Mode leave) CMD1 is OK
 
-       sbi PORTD,mmc_cs
-       rcall mmcByteNoSend
+       sbi P_MMC_CS,mmc_cs             ;disable /CS
+
+;      rcall mmcByteNoSend     ;unnecessary
+
+       ldi     temp,10
+       rcall   delay_ms
        
        pop temp2
        dec temp2
        cpi temp2,0
-       brne mmcInitOcrLoop
+       brne mmcInitOcrLoop             ;repeat 
 
-       ldi temp,4
+       ldi temp2,4  
        rjmp mmcWaitErr
 
 mmcInitOcrLoopDone:
        pop temp2
-       sbi PORTD,mmc_cs
+       sbi P_MMC_CS,mmc_cs             ;disable /CS
        rcall mmcByteNoSend
 
-       ldi temp,0
-       out SPCR,temp
+       out SPCR,_0
        ret
 
 
@@ -1078,7 +1348,7 @@ mmcReadSect:
        ldi temp,0x50
        out SPCR,temp
 
-       cbi PORTD,mmc_cs
+       cbi P_MMC_CS,mmc_cs
        rcall mmcByteNoSend
        ldi temp,0x51   ;cmd (read sector)
        rcall mmcByte
@@ -1107,25 +1377,24 @@ mmcReadSect:
        rcall mmcWaitResp
 
        ;Read sector to AVR RAM
-       ldi zl,low(sectbuff)
-       ldi zh,high(sectbuff)
+       ldi zl,low(hostbuf)
+       ldi zh,high(hostbuf)
 mmcreadloop:
        rcall mmcByteNoSend
        st z+,temp
-       cpi zl,low(sectbuff+512)
+       cpi zl,low(hostbuf+512)
        brne mmcreadloop
-       cpi zh,high(sectbuff+512)
+       cpi zh,high(hostbuf+512)
        brne mmcreadloop
 
        ;CRC
        rcall mmcByteNoSend
        rcall mmcByteNoSend
 
-       sbi PORTD,mmc_cs
+       sbi P_MMC_CS,mmc_cs
        rcall mmcByteNoSend
 
-       ldi temp,0
-       out SPCR,temp
+       out SPCR,_0
        ret
 
 
@@ -1135,7 +1404,7 @@ mmcWriteSect:
        ldi temp,0x50
        out SPCR,temp
 
-       cbi PORTD,mmc_cs
+       cbi P_MMC_CS,mmc_cs
        rcall mmcByteNoSend
 
        ldi temp,0x58   ;cmd (write sector)
@@ -1165,14 +1434,14 @@ mmcWriteSect:
        rcall mmcByte
 
        ;Write sector from AVR RAM
-       ldi zl,low(sectbuff)
-       ldi zh,high(sectbuff)
+       ldi zl,low(hostbuf)
+       ldi zh,high(hostbuf)
 mmcwriteloop:
        ld temp,z+
        rcall mmcByte
-       cpi zl,low(sectbuff+512)
+       cpi zl,low(hostbuf+512)
        brne mmcwriteloop
-       cpi zh,high(sectbuff+512)
+       cpi zh,high(hostbuf+512)
        brne mmcwriteloop
 
        ;CRC
@@ -1188,11 +1457,10 @@ mmcwaitwritten:
        cpi temp,0xff
        brne mmcwaitwritten
 
-       sbi PORTD,mmc_cs
+       sbi P_MMC_CS,mmc_cs
        rcall mmcByteNoSend
 
-       ldi temp,0
-       out SPCR,temp
+       out SPCR,_0
        ret
 
 
@@ -1213,466 +1481,237 @@ resetAVR:
 resetwait:
        rjmp resetwait
 
-
 ; ------------------ DRAM routines -------------
 
-; TODO: 
-
-#if DRAM_DQ_ORDER == 1
- #define CLASSIC_DRAM 0
-#else
- #define CLASSIC_DRAM 1                /* Change manualy, if you want new hw w/ old sw */
-#endif
-
-
-#if DRAM_DQ_ORDER == 0
- #if CLASSIC_DRAM == 1
-   #error "Old harware can not work with new software!"
- #endif
-#endif
-
-; ****************************************************************************
-
-#if CLASSIC_DRAM
-
-; ********************** DRAM routines from Sprite_tm ************************
-
-;Sends the address in zh:zl to the ram
-dram_setaddr:
-       push temp
-       in temp,portd
-       andi temp,0x17
-       out portd,temp
-       in temp,portb
-       andi temp,0xE0
-       out portb,temp
-       sbrc zl,0
-        sbi portb,ram_a0
-       sbrc zl,1
-        sbi portb,ram_a1
-       sbrc zl,2
-        sbi portb,ram_a2
-       sbrc zl,3
-        sbi portb,ram_a3
-       sbrc zl,4
-        sbi portb,ram_a4
-       sbrc zl,5
-        sbi portd,ram_a5
-       sbrc zl,6
-        sbi portd,ram_a6
-       sbrc zl,7
-        sbi portd,ram_a7
-       sbrc zh,0
-        sbi portd,ram_a8
-       pop temp
-       ret
-
-dram_getnibble:
-       andi temp,0xf0
-       sbic pinc,ram_d0
-        ori temp,0x1
-       sbic pinc,ram_d1
-        ori temp,0x2
-       sbic pinc,ram_d2
-        ori temp,0x4
-       sbic pinc,ram_d3
-        ori temp,0x8
-       ret
-
-dram_sendnibble:
-       push temp2
-       in temp2,portc
-       andi temp2,~RAM_DQ_MASK
-
-       sbrc temp,0
-        ori temp2,(1<<ram_d0)
-       sbrc temp,1
-        ori temp2,(1<<ram_d1)
-       sbrc temp,2
-        ori temp2,(1<<ram_d2)
-       sbrc temp,3
-        ori temp2,(1<<ram_d3)
-
-       out portc,temp2
-       pop temp2
-       ret
-
+; DRAM_SETADDR val, low_and_mask, low_or_mask, high_and_mask, high_or_mask
+.macro DRAM_SETADDR
+       mov temp,@0
+.if low(@1) != 0xff
+       andi temp,@1
+.endif
+.if  low(@2) != 0
+       ori temp, @2
+.endif
+       out P_AL,temp
+       
+       mov temp,@0
+.if low(@3) != 0xff
+       andi temp,@3
+.endif
+       ori temp, @4 | (1<<mmc_cs)
+       out P_AH,temp
+.endm
 
 ;Loads the byte on address adrh:adrl into temp.
+;must not alter adrh:adrl
+
 dram_read:
        cli
-       mov zl,adrh
-       ldi zh,0
-       mov temp2,adrl
-       lsl temp2
-       rol zl
-       rol zh
-       ;z=addr[15-7]
-       rcall dram_setaddr
-       cbi portb,ram_ras
-
-       ldi zh,0
-       mov zl,adrl
-       andi zl,0x7F
-       rcall dram_setaddr
-       nop
-       cbi portc,ram_cas
-       nop
-       nop
-       cbi portd,ram_oe
-       nop
-       rcall dram_getnibble    
-       sbi portd,ram_oe
+       DRAM_SETADDR adrh, ~0,(1<<ram_ras), ~(1<<ram_a8), (1<<ram_oe)
+       cbi P_RAS,ram_ras
+       DRAM_SETADDR adrl, ~(1<<ram_ras),0, ~((1<<ram_oe)), (1<<ram_a8)
+       cbi P_CAS,ram_cas
+       cbi P_A8,ram_a8
+       in  temp,P_DQ-2         ; PIN
+       sbi P_CAS,ram_cas
+
+       cbi P_CAS,ram_cas
+       andi temp,0x0f
        swap temp
-       sbi portc,ram_cas
-
-       ldi zh,0
-       mov zl,adrl
-       ori zl,0x80
-       rcall dram_setaddr
-       nop
-       cbi portc,ram_cas
-       nop
-       cbi portd,ram_oe
-       nop
-       nop
-       rcall dram_getnibble    
-
-       sbi portd,ram_oe
-       sbi portc,ram_cas
-       sbi portb,ram_ras
-       sei
-       ret
-
-;Writes the byte in temp to  adrh:adrl
-dram_write:
-       cli
-
-       in temp2,ddrc
-       ori temp2,RAM_DQ_MASK
-       out ddrc,temp2
-
-       rcall dram_sendnibble
-
-       mov zl,adrh
-       ldi zh,0
-       mov temp2,adrl
-       lsl temp2
-       rol zl
-       rol zh
-       ;z=addr[15-7]
-       rcall dram_setaddr
-       nop
-       nop
-       cbi portb,ram_ras
-
-       ldi zh,0
-       mov zl,adrl
-       ori zl,0x80
-       rcall dram_setaddr
-       nop
-       nop
-       cbi portc,ram_cas
-       nop
-       nop
-       cbi portc,ram_w
-       nop
-       nop
-       nop
-       sbi portc,ram_w
-       sbi portc,ram_cas
-
+       in  temp2,P_DQ-2        ; PIN
+       andi temp2,0x0f
+       or  temp,temp2
 
-       ldi zh,0
-       mov zl,adrl
-       andi zl,0x7F
-       rcall dram_setaddr
-       swap temp
-       rcall dram_sendnibble
-       cbi portc,ram_cas
-       nop
-       nop
-       cbi portc,ram_w
-       nop
-       nop
-       sbi portc,ram_w
-       nop
-       nop
-       sbi portc,ram_cas
-       sbi portb,ram_ras
-
-       in temp,ddrc
-       andi temp,~RAM_DQ_MASK
-       out ddrc,temp
-       in temp,portc
-       andi temp,~RAM_DQ_MASK
-       out portc,temp
+       sbi P_OE,ram_oe
+       sbi P_CAS,ram_cas
+       sbi P_RAS,ram_ras
        sei
        ret
-#endif  /* CLASSIC_DRAM == 1 */
-
-; ****************************************************************************
-
-#if ! CLASSIC_DRAM
-
-; ***************************** New DRAM routines ****************************
-
-; Defines how the dram nibbles are arganized.
-; RAMORG == 0 : A7 == 0: low nibble, A7 == 1: high nibble (Original Sprite_tm design)
-; RAMORG == 1 : A8 == 0: low nibble, A8 == 1: high nibble (faster)
-; 
-#define RAMORG 1
 
-#if RAMORG == 0
-;Sends the address in zh:zl to the ram
-dram_setaddr:
-       push temp
-       in temp,PORTB
-       andi temp,~RAM_AL_MASK
-       sbrc zl,0
-        ori temp,(1<<ram_a0)
-       sbrc zl,1
-        ori temp,(1<<ram_a1)
-       sbrc zl,2
-        ori temp,(1<<ram_a2)
-       sbrc zl,3
-        ori temp,(1<<ram_a3)
-       sbrc zl,4
-        ori temp,(1<<ram_a4)
-       out PORTB,temp
-
-       in temp,PORTD
-       andi temp,~RAM_AH_MASK
-       sbrc zl,5
-        ori temp,(1<<ram_a5)
-       sbrc zl,6
-        ori temp,(1<<ram_a6)
-       sbrc zl,7
-        ori temp,(1<<ram_a7)
-       sbrc zh,0
-        ori temp,(1<<ram_a8)
-       out PORTD,temp
-       pop temp
-#else /* RAMORG == 1 */
-.macro DRAM_SETADDR
+#if DRAM_WORD_ACCESS
+dram_read_w:
+       cpi adrl,255
+       brne dram_read_w1
+       
+       rcall dram_read
        push temp
-       in temp,PORTB
-       andi temp,~RAM_AL_MASK
-       sbrc @0,0
-        ori temp,(1<<ram_a0)
-       sbrc @0,1
-        ori temp,(1<<ram_a1)
-       sbrc @0,2
-        ori temp,(1<<ram_a2)
-       sbrc @0,3
-        ori temp,(1<<ram_a3)
-       sbrc @0,4
-        ori temp,(1<<ram_a4)
-       out PORTB,temp
-
-       in temp,PORTD
-       andi temp,~RAM_AH_MASK
-       sbrc @0,5
-        ori temp,(1<<ram_a5)
-       sbrc @0,6
-        ori temp,(1<<ram_a6)
-       sbrc @0,7
-        ori temp,(1<<ram_a7)
-       out PORTD,temp
+       adiw adrl,1
+       rcall dram_read
+       mov temp2,temp
        pop temp
-.endm
-       ret
-#endif /* RAMORG */
-
-.macro DRAM_SENDNIBBLE
-       in temp2,PORTC
-       andi temp2,~RAM_DQ_MASK
-       andi temp,RAM_DQ_MASK
-       or  temp2,temp
-       out PORTC,temp2
-.endm
-
-
-;Loads the byte on address adrh:adrl into temp.
-;must not alter adrh:adrl
+       ret     
 
-dram_read:
+dram_read_w1:
        cli
-#if RAMORG == 0
-       mov zl,adrh
-       ldi zh,0
-       mov temp2,adrl
-       lsl temp2
-       rol zl
-       rol zh
-       ;z=addr[15-7]
-       rcall dram_setaddr
-       cbi PORTB,ram_ras
-
-       ldi zh,0
-       mov zl,adrl
-       andi zl,0x7F
-       rcall dram_setaddr
-       cbi PORTC,ram_cas
-       cbi PORTD,ram_oe
-       ldi zh,0
-       in  temp,PINC
+       DRAM_SETADDR adrh, ~0,(1<<ram_ras), ~(1<<ram_a8),(1<<ram_oe)
+       cbi P_RAS,ram_ras
+       DRAM_SETADDR adrl, ~(1<<ram_ras),0, ~((1<<ram_oe)), (1<<ram_a8)
+       cbi P_CAS,ram_cas
+       cbi P_A8,ram_a8
+       in  temp,P_DQ-2         ; PIN
+       sbi P_CAS,ram_cas
+       cbi P_CAS,ram_cas
        andi temp,0x0f
        swap temp
-       sbi PORTC,ram_cas
-
-       mov zl,adrl
-       ori zl,0x80
-       rcall dram_setaddr
-       cbi PORTC,ram_cas
-       nop
-       in  temp2,PINC
+       in  temp2,P_DQ-2        ; PIN
+       sbi P_CAS,ram_cas
        andi temp2,0x0f
        or  temp,temp2
-
-       sbi PORTD,ram_oe
-       sbi PORTC,ram_cas
-       sbi PORTB,ram_ras
-#else
-       cbi PORTD,ram_a8
-       DRAM_SETADDR adrh
-       cbi PORTB,ram_ras
-
-       DRAM_SETADDR adrl
-       cbi PORTC,ram_cas
-       cbi PORTD,ram_oe
-       nop
-       in  temp,PINC
+       
+;      push temp
+       mov _wl,temp
+       inc adrl
+       DRAM_SETADDR adrl, ~(1<<ram_ras),0, ~((1<<ram_oe)), (1<<ram_a8)
+       cbi P_CAS,ram_cas
+       cbi P_A8,ram_a8
+       in  temp,P_DQ-2         ; PIN
+       sbi P_CAS,ram_cas
+       cbi P_CAS,ram_cas
        andi temp,0x0f
        swap temp
-       sbi PORTC,ram_cas
-
-       sbi PORTD,ram_a8
-       cbi PORTC,ram_cas
-       nop
-       in  temp2,PINC
+       in  temp2,P_DQ-2        ; PIN
+       sbi P_CAS,ram_cas
        andi temp2,0x0f
-       or  temp,temp2
-       swap temp
+       or  temp2,temp
+;      pop temp
+       mov temp,_wl
 
-       sbi PORTD,ram_oe
-       sbi PORTC,ram_cas
-       sbi PORTB,ram_ras
-#endif
+       sbi P_OE,ram_oe
+       sbi P_RAS,ram_ras
        sei
        ret
-
+#endif
 
 ;Writes the byte in temp to  adrh:adrl
 ;must not alter adrh:adrl
 
 dram_write:
        cli
-#if RAMORG == 0
-       in temp2,DDRC               ;DRAM data ports as outputs
-       ori temp2,RAM_DQ_MASK
+       ldi temp2,RAM_DQ_MASK | (1<<ram_w) | (1<<ram_cas)
        out DDRC,temp2
 
-       push temp
-       DRAM_SENDNIBBLE
-       pop temp
-
-       mov zl,adrh
-       ldi zh,0
-       mov temp2,adrl
-       lsl temp2
-       rol zl
-       rol zh
-       ;z=addr[15-7]
-       rcall dram_setaddr
-       cbi PORTB,ram_ras
-
-       ldi zh,0
-       mov zl,adrl
-       ori zl,0x80
-       cbi PORTC,ram_w             ;early write
-       rcall dram_setaddr
+       mov  temp2,temp
+       andi temp,RAM_DQ_MASK & ~(1<<ram_w)
+       ori temp,(1<<ram_cas)
+       out PORTC,temp
+       DRAM_SETADDR adrh, ~0,(1<<ram_ras), ~(1<<ram_a8),(1<<ram_oe)
+       cbi P_RAS,ram_ras
+       DRAM_SETADDR adrl, ~(1<<ram_ras),0, ~((1<<ram_a8)),(1<<ram_oe)
        cbi PORTC,ram_cas
        sbi PORTC,ram_cas
 
-       ldi zh,0
-       mov zl,adrl
-       andi zl,0x7F
-       rcall dram_setaddr
-       swap temp
+       sbi PORTD,ram_a8
+       swap temp2
 
-       DRAM_SENDNIBBLE
+       andi temp2,RAM_DQ_MASK & ~(1<<ram_w)
+       ori temp2,(1<<ram_cas)
+       out PORTC,temp2
 
        cbi PORTC,ram_cas
-       sbi PORTC,ram_cas
-       sbi PORTC,ram_w
-       sbi PORTB,ram_ras
+       sbi P_RAS,ram_ras
 
-       in temp,DDRC
-       andi temp,~RAM_DQ_MASK
+       ldi temp,~RAM_DQ_MASK | (1<<ram_w) | (1<<ram_cas)
        out DDRC,temp
-       in temp,PORTC
-       andi temp,~RAM_DQ_MASK
        out PORTC,temp
-#else /* RAMORG == 1 */
-       in temp2,DDRC               ;DRAM data ports as outputs
-       ori temp2,RAM_DQ_MASK
-       out DDRC,temp2
+       sei
+       ret
 
-       push temp
-       DRAM_SENDNIBBLE
+#if DRAM_WORD_ACCESS
+dram_write_w:
+       cpi adrl,255
+       brne dram_write_w1
+       
+       push temp2
+       rcall dram_write
        pop temp
+       adiw adrl,1
+       rcall dram_write
+       ret     
+
+dram_write_w1:
+       cli
+       push temp2
+       ldi temp2,RAM_DQ_MASK | (1<<ram_w) | (1<<ram_cas)
+       out DDRC,temp2
 
-       cbi PORTD,ram_a8
-       DRAM_SETADDR adrh
-       cbi PORTB,ram_ras
+       mov  temp2,temp
+       andi temp,RAM_DQ_MASK & ~(1<<ram_w)
+       ori temp,(1<<ram_cas)
+       out PORTC,temp
 
-       cbi PORTC,ram_w             ;early write
-       DRAM_SETADDR adrl
+       DRAM_SETADDR adrh, ~0,(1<<ram_ras), ~(1<<ram_a8),(1<<ram_oe)
+       cbi P_RAS,ram_ras
+       DRAM_SETADDR adrl, ~(1<<ram_ras),0, ~((1<<ram_a8)),(1<<ram_oe)
        cbi PORTC,ram_cas
        sbi PORTC,ram_cas
 
        sbi PORTD,ram_a8
-       swap temp
+       swap temp2
+
+       andi temp2,RAM_DQ_MASK & ~(1<<ram_w)
+       ori temp2,(1<<ram_cas)
+       out PORTC,temp2
+
+       cbi PORTC,ram_cas
+       sbi PORTC,ram_cas
 
-       DRAM_SENDNIBBLE
+       pop temp
+       inc adrl
+       mov  temp2,temp
+       andi temp,RAM_DQ_MASK & ~(1<<ram_w)
+       ori temp,(1<<ram_cas)
+       out PORTC,temp
 
+       DRAM_SETADDR adrl, ~(1<<ram_ras),0, ~((1<<ram_a8)),(1<<ram_oe)
        cbi PORTC,ram_cas
-       nop
        sbi PORTC,ram_cas
-       sbi PORTC,ram_w
-       sbi PORTB,ram_ras
 
-       in temp,DDRC
-       andi temp,~RAM_DQ_MASK
+       sbi PORTD,ram_a8
+       swap temp2
+
+       andi temp2,RAM_DQ_MASK & ~(1<<ram_w)
+       ori temp2,(1<<ram_cas)
+       out PORTC,temp2
+       cbi PORTC,ram_cas
+
+       sbi P_RAS,ram_ras
+
+       ldi temp,~RAM_DQ_MASK | (1<<ram_w) | (1<<ram_cas)
        out DDRC,temp
-       in temp,PORTC
-       andi temp,~RAM_DQ_MASK
        out PORTC,temp
-#endif /* RAMORG */
        sei
        ret
-
-#endif  /* CLASSIC_DRAM == 0 */
+#endif
 
 ; ****************************************************************************
 
 ; refresh interupt; exec 2 cbr cycles
 refrint:
-       cbi PORTC,ram_cas
-       cbi PORTB,ram_ras
-       nop
-       sbi PORTB,ram_ras
-       cbi PORTB,ram_ras
-       sbi PORTC,ram_cas
-       sbi PORTB,ram_ras
-       reti
+                               ;4      CAS  RAS  
+       cbi P_CAS,ram_cas       ;2       1|   1|  
+                               ;        1|   1|  
+       cbi P_RAS,ram_ras       ;2      |0    1|  
+                               ;       |0    1|  
+       nop                     ;1      |0   |0   
+;      nop                     ;1      |0   |0   
+       sbi P_RAS,ram_ras       ;2      |0   |0   
+                               ;       |0   |0   
+;      nop                     ;1      |0   |0   
+       cbi P_RAS,ram_ras       ;2      |0    1|  
+                               ;       |0    1|  
+       sbi P_CAS,ram_cas       ;2      |0   |0   
+                               ;       |0   |0   
+       sbi P_RAS,ram_ras       ;2       1|  |0   
+                               ;        1|   1|  
+       reti                    ;4  --> 21 cycles
 
 ; ****************************************************************************
 
 ; ------------- system timer 10ms ---------------
     .dseg
 
+delay_timer:
+       .byte   1
 timer_base:
 timer_ms:
        .byte   2
@@ -1700,6 +1739,11 @@ sysclockint:
        push    zl
        push    zh
        
+       lds     zl,delay_timer
+       subi    zl,1
+       brcs    syscl1
+       sts     delay_timer,zl
+syscl1:        
        lds     zl,cnt_1ms
        lds     zh,cnt_1ms+1
        adiw    z,1
@@ -1711,9 +1755,8 @@ sysclockint:
        cpc     zh,zl
        brlo    syscl_end
        
-       ldi     zl,0
-       sts     cnt_1ms,zl
-       sts     cnt_1ms+1,zl
+       sts     cnt_1ms,_0
+       sts     cnt_1ms+1,_0
 
        lds     zl,uptime+0
        inc     zl
@@ -1738,6 +1781,17 @@ syscl_end:
        pop     zl
        reti
 
+; wait for temp ms
+
+delay_ms:
+       sts     delay_timer,temp
+dly_loop:
+       lds     temp,delay_timer
+       cpi     temp,0
+       brne    dly_loop
+       ret
+
+; 
 
 clockget:
        ldi     temp,0xFF
@@ -1842,74 +1896,66 @@ ts_loop:
 ;
        
 timer_print:
-       push    adrh
-       push    adrl
-       push    oph
-       push    opl
+       push    yh
+       push    yl
        ldi     zl,low(timer_ms)
        ldi     zh,high(timer_ms)
 
 ; put ms on stack (16 bit)
 
        cli
-       ldd     adrl,z+timerofs
+       ldd     yl,z+timerofs
        ld      temp2,z+
-       sub     adrl,temp2
-       ldd     adrh,z+timerofs
+       sub     yl,temp2
+       ldd     yh,z+timerofs
        ld      temp2,z+
-       sbc     adrh,temp2
+       sbc     yh,temp2
        brsh    tp_s
        
-       subi    adrl,low(-1000)
-       sbci    adrh,high(-1000)
+       subi    yl,low(-1000)
+       sbci    yh,high(-1000)
        sec     
 tp_s:
-       push    adrh
-       push    adrl
+       push    yh
+       push    yl
 
-; 
-       
        ldd     temp,z+timerofs
-       ld      adrl,z+
-       sbc     temp,adrl
+       ld      yl,z+
+       sbc     temp,yl
 
        ldd     temp2,z+timerofs
-       ld      adrh,z+
-       sbc     temp2,adrh
+       ld      yh,z+
+       sbc     temp2,yh
 
-       ldd     opl,z+timerofs
-       ld      adrl,z+
-       sbc     opl,adrl
+       ldd     temp3,z+timerofs
+       ld      yl,z+
+       sbc     temp3,yl
 
        sei
-       ldd     oph,z+timerofs
-       ld      adrh,z+
-       sbc     oph,adrh
+       ldd     temp4,z+timerofs
+       ld      yh,z+
+       sbc     temp4,yh
        
        rcall printstr
        .db 13,"Timer running. Elapsed: ",0
        rcall   print_ultoa
 
        rcall printstr
-       .db ",",0
-       ldi     opl,0
-       ldi     oph,0
+       .db ".",0
        pop     temp
        pop     temp2
+       ldi     temp3,0
+       ldi     temp4,0
        rcall   print_ultoa
        rcall printstr
-       .db "s. ",0,0
+       .db "s.",0,0
 
-       pop     opl
-       pop     oph
-       pop     adrl
-       pop     adrh
+       pop     yl
+       pop     yh
        ret
        
 uptime_print:
 
-       push    oph
-       push    opl
        ldi     zl,low(cnt_1ms)
        ldi     zh,high(cnt_1ms)
        
@@ -1921,9 +1967,9 @@ uptime_print:
        
        ld      temp,z+
        ld      temp2,z+
-       ld      opl,z+
+       ld      temp3,z+
        sei
-       ld      oph,z+
+       ld      temp4,z+
        
        rcall printstr
        .db 13,"Uptime: ",0
@@ -1932,16 +1978,14 @@ uptime_print:
        rcall printstr
        .db ",",0
 
-       ldi     opl,0
-       ldi     oph,0
+       ldi     temp3,0
+       ldi     temp4,0
        pop     temp2
        pop     temp
        rcall print_ultoa
        rcall printstr
        .db "s.",0,0
 
-       pop     opl
-       pop     oph
        ret
 
 
@@ -1949,47 +1993,53 @@ uptime_print:
 ; --------------- Debugging stuff ---------------
 
 ;Print a unsigned lonng value to the uart
-; oph:opl:temp2:temp = value
+; temp4:temp3:temp2:temp = value
 
 print_ultoa:
-       push    adrh
-       push    adrl
+       push    yh
        push    yl
+       push    z_flags
                                
-       clr     adrl            ;adrl = stack level
+       clr     yl              ;yl = stack level
 
-ultoa1:        ldi     yl, 32          ;adrh = oph:temp % 10
-       clr     adrh            ;oph:temp /= 10
+ultoa1:        ldi     z_flags, 32     ;yh = temp4:temp % 10
+       clr     yh              ;temp4:temp /= 10
 ultoa2:        lsl     temp    
        rol     temp2   
-       rol     opl     
-       rol     oph     
-       rol     adrh    
-       cpi     adrh,10 
+       rol     temp3   
+       rol     temp4   
+       rol     yh      
+       cpi     yh,10   
        brcs    ultoa3  
-       subi    adrh,10 
+       subi    yh,10   
        inc     temp
-ultoa3:        dec     yl      
+ultoa3:        dec     z_flags 
        brne    ultoa2
-       cpi     adrh, 10        ;adrh is a numeral digit '0'-'9'
-       subi    adrh, -'0'
-       push    adrh            ;Stack it
-       inc     adrl    
-       ldi     yl,0
-       cp      temp,yl         ;Repeat until oph:temp gets zero
-       cpc     temp2,yl
-       cpc     opl,yl  
-       cpc     oph,yl  
+       cpi     yh, 10  ;yh is a numeral digit '0'-'9'
+       subi    yh, -'0'
+       push    yh              ;Stack it
+       inc     yl      
+       cp      temp,_0         ;Repeat until temp4:temp gets zero
+       cpc     temp2,_0
+       cpc     temp3,_0
+       cpc     temp4,_0
        brne    ultoa1  
+       
+       ldi     temp, '0'
+ultoa5:        cpi     yl,3            ; at least 3 digits (ms)
+       brge    ultoa6
+       push    temp    
+       inc     yl
+       rjmp    ultoa5
 
 ultoa6:        pop     temp            ;Flush stacked digits
        rcall   uartputc
-       dec     adrl    
+       dec     yl      
        brne    ultoa6  
 
+       pop     z_flags
        pop     yl
-       pop     adrl
-       pop     adrh
+       pop     yh
        ret
 
 
@@ -2017,15 +2067,21 @@ printhex:
        rcall printhexn
        ret
 
-;Prints the zero-terminated string following the call statement. WARNING: Destroys temp.
+;Prints the zero-terminated string following the call statement. 
+
 printstr:
-       pop zh
-       pop zl
-       push temp
+       push    zh
+       push    zl
+       push    r29
+       push    r28
+       push    temp
+       in      r29,sph
+       in      r28,spl
+       ldd     zl,y+7
+       ldd     zh,y+6
 
        lsl zl
        rol zh
-
 printstr_loop:
        lpm temp,z+
        cpi temp,0
@@ -2042,16 +2098,23 @@ printstr_end:
        lsr zh
        ror zl
 
-       pop temp
-       push zl
-       push zh
+       std     y+7,zl
+       std     y+6,zh
+       pop     temp
+       pop     r28
+       pop     r29
+       pop     zl
+       pop     zh
        ret
        
-
 ; --------------- AVR HW <-> Z80 periph stuff ------------------
 
 .equ memReadByte       =       dram_read
 .equ memWriteByte      =       dram_write
+#if DRAM_WORD_ACCESS
+.equ memReadWord       =       dram_read_w
+.equ memWriteWord      =       dram_write_w
+#endif
 
 ; --------------------------------------------------------------
 
@@ -2084,23 +2147,23 @@ rxint:
 #else
        lds     temp,UDR0
 #endif
-       lds     zh,rxcount
-       cpi     zh,RXBUFSIZE
-       brsh    rxi_ov
-       inc     zh
-       sts     rxcount,zh
-
-       ldi     zl,low(rxfifo)
-       lds     zh,rxidx_w
-       add     zl,zh
-       inc     zh
-       andi    zh,RXBUFMASK
-       sts     rxidx_w,zh
-       ldi     zh,high(rxfifo)
-       brcc    PC+2
-       inc     zh
-       st      z,temp
-rxi_ov:
+       lds     zh,rxcount              ;if rxcount < RXBUFSIZE
+       cpi     zh,RXBUFSIZE            ;   (room for at least 1 char?)
+       brsh    rxi_ov                  ; 
+       inc     zh                      ;
+       sts     rxcount,zh              ;   rxcount++
+
+       ldi     zl,low(rxfifo)          ;  
+       lds     zh,rxidx_w              ;
+       add     zl,zh                   ;
+       inc     zh                      ;
+       andi    zh,RXBUFMASK            ;
+       sts     rxidx_w,zh              ;   rxidx_w = ++rxidx_w % RXBUFSIZE
+       ldi     zh,high(rxfifo)         ;
+       brcc    PC+2                    ;
+       inc     zh                      ;
+       st      z,temp                  ;   rxfifo[rxidx_w] = char
+rxi_ov:                                        ;endif
        pop     zl
        pop     zh
        pop     temp
@@ -2183,26 +2246,26 @@ uartputc_l:
 
 ;Jump table for fetch routines. Make sure to keep this in sync with the .equs!
 fetchjumps:
-.dw do_fetch_nop
-.dw do_fetch_a
-.dw do_fetch_b
-.dw do_fetch_c
-.dw do_fetch_d
-.dw do_fetch_e
-.dw do_fetch_h
-.dw do_fetch_l
-.dw do_fetch_af
-.dw do_fetch_bc
-.dw do_fetch_de
-.dw do_fetch_hl
-.dw do_fetch_sp
-.dw do_fetch_mbc
-.dw do_fetch_mde
-.dw do_fetch_mhl
-.dw do_fetch_msp
-.dw do_fetch_dir8
-.dw do_fetch_dir16
-.dw do_fetch_rst
+       rjmp do_fetch_nop
+       rjmp do_fetch_a
+       rjmp do_fetch_b
+       rjmp do_fetch_c
+       rjmp do_fetch_d
+       rjmp do_fetch_e
+       rjmp do_fetch_h
+       rjmp do_fetch_l
+       rjmp do_fetch_af
+       rjmp do_fetch_bc
+       rjmp do_fetch_de
+       rjmp do_fetch_hl
+       rjmp do_fetch_sp
+       rjmp do_fetch_mbc
+       rjmp do_fetch_mde
+       rjmp do_fetch_mhl
+       rjmp do_fetch_msp
+       rjmp do_fetch_dir8
+       rjmp do_fetch_dir16
+       rjmp do_fetch_rst
 
 do_fetch_nop:
        ret
@@ -2241,84 +2304,77 @@ do_fetch_af:
        ret
 
 do_fetch_bc:
-       mov opl,z_c
-       mov oph,z_b
+       movw opl,z_c
        ret
 
 do_fetch_de:
-       mov opl,z_e
-       mov oph,z_d
+       movw opl,z_e
        ret
 
 do_fetch_hl:
-       mov opl,z_l
-       mov oph,z_h
+       movw opl,z_l
        ret
 
 do_fetch_sp:
-       mov opl,z_spl
-       mov oph,z_sph
+       movw opl,z_spl
        ret
 
 do_fetch_mbc:
-       mov adrh,z_b
-       mov adrl,z_c
+       movw adrl,z_c
        rcall memReadByte
        mov opl,temp
        ret
 
 do_fetch_mde:
-       mov adrh,z_d
-       mov adrl,z_e
+       movw adrl,z_e
        rcall memReadByte
        mov opl,temp
        ret
 
 do_fetch_mhl:
-       mov adrh,z_h
-       mov adrl,z_l
+       movw adrl,z_l
        rcall memReadByte
        mov opl,temp
        ret
 
 do_fetch_msp:
-       mov adrh,z_sph
-       mov adrl,z_spl
+       movw adrl,z_spl
+#if DRAM_WORD_ACCESS
+       rcall memReadWord
+       movw opl,temp
+#else
        rcall memReadByte
        mov opl,temp
-
-       ldi temp,1
-       ldi temp2,0
-       add adrl,temp
-       adc adrh,temp2
+       adiw adrl,1
        rcall memReadByte
        mov oph,temp
+#endif 
        ret
 
 do_fetch_dir8:
-       mov adrl,z_pcl
-       mov adrh,z_pch
+       movw adrl,z_pcl
        rcall memReadByte
        adiw z_pcl,1
        mov opl,temp
        ret
 
 do_fetch_dir16:
-       mov adrl,z_pcl
-       mov adrh,z_pch
+       movw adrl,z_pcl
+#if DRAM_WORD_ACCESS
+       rcall memReadWord
+       movw opl,temp
+#else
        rcall memReadByte
        mov opl,temp
-       adiw z_pcl,1
-       mov adrl,z_pcl
-       mov adrh,z_pch
+       adiw adrl,1
        rcall memReadByte
-       adiw z_pcl,1
        mov oph,temp
+#endif 
+       adiw z_pcl,2
        ret
 
 do_fetch_rst:
-       mov adrl,z_pcl
-       mov adrh,z_pch
+       movw adrl,z_pcl
        subi adrl,1
        sbci adrh,0
        rcall memReadByte
@@ -2355,27 +2411,27 @@ do_fetch_rst:
 
 ;Jump table for store routines. Make sure to keep this in sync with the .equs!
 storejumps:
-.dw do_store_nop
-.dw do_store_a
-.dw do_store_b
-.dw do_store_c
-.dw do_store_d
-.dw do_store_e
-.dw do_store_h
-.dw do_store_l
-.dw do_store_af
-.dw do_store_bc
-.dw do_store_de
-.dw do_store_hl
-.dw do_store_sp
-.dw do_store_pc
-.dw do_store_mbc
-.dw do_store_mde
-.dw do_store_mhl
-.dw do_store_msp
-.dw do_store_ret
-.dw do_store_call
-.dw do_store_am
+       rjmp do_store_nop
+       rjmp do_store_a
+       rjmp do_store_b
+       rjmp do_store_c
+       rjmp do_store_d
+       rjmp do_store_e
+       rjmp do_store_h
+       rjmp do_store_l
+       rjmp do_store_af
+       rjmp do_store_bc
+       rjmp do_store_de
+       rjmp do_store_hl
+       rjmp do_store_sp
+       rjmp do_store_pc
+       rjmp do_store_mbc
+       rjmp do_store_mde
+       rjmp do_store_mhl
+       rjmp do_store_msp
+       rjmp do_store_ret
+       rjmp do_store_call
+       rjmp do_store_am
 
 
 do_store_nop:
@@ -2430,70 +2486,61 @@ do_store_hl:
        ret
 
 do_store_mbc:
-       mov adrh,z_b
-       mov adrl,z_c
+       movw adrl,z_c
        mov temp,opl
        rcall memWriteByte
        ret
 
 do_store_mde:
-       mov adrh,z_d
-       mov adrl,z_e
+       movw adrl,z_e
        mov temp,opl
        rcall memWriteByte
        ret
 
 do_store_mhl:
-       mov adrh,z_h
-       mov adrl,z_l
+       movw adrl,z_l
        mov temp,opl
        rcall memWriteByte
        ret
 
 do_store_msp:
-       mov adrh,z_sph
-       mov adrl,z_spl
+       movw adrl,z_spl
+#if DRAM_WORD_ACCESS
+       movw temp,opl
+       rcall memWriteWord
+#else
        mov temp,opl
        rcall memWriteByte
-
-       ldi temp,1
-       ldi temp2,0
-       add adrl,temp
-       adc adrh,temp2
+       adiw adrl,1
        mov temp,oph
        rcall memWriteByte
-
+#endif
        ret
 
 do_store_sp:
-       mov z_sph,oph
-       mov z_spl,opl
+       movw z_spl,opl
        ret
 
 do_store_pc:
-       mov z_pch,oph
-       mov z_pcl,opl
+       movw z_pcl,opl
        ret
 
 do_store_ret:
        rcall do_op_pop16
-       mov z_pcl,opl
-       mov z_pch,oph
+       movw z_pcl,opl
        ret
 
 do_store_call:
        push opl
        push oph
-       mov opl,z_pcl
-       mov oph,z_pch
+       movw opl,z_pcl
        rcall do_op_push16
        pop z_pch
        pop z_pcl
        ret
 
 do_store_am:
-       mov adrh,oph
-       mov adrl,opl
+       movw adrl,opl
        mov temp,z_a
        rcall memWriteByte
        ret
@@ -2544,46 +2591,46 @@ do_store_am:
 .equ OP_INV            = (39<<10)
 
 opjumps:
-.dw do_op_nop
-.dw do_op_inc
-.dw do_op_dec
-.dw do_op_inc16
-.dw do_op_dec16
-.dw do_op_rlc
-.dw do_op_rrc
-.dw do_op_rr
-.dw do_op_rl
-.dw do_op_adda
-.dw do_op_adca
-.dw do_op_subfa
-.dw do_op_sbcfa
-.dw do_op_anda
-.dw do_op_ora
-.dw do_op_xora
-.dw do_op_addhl
-.dw do_op_sthl
-.dw do_op_rmem16
-.dw do_op_rmem8
-.dw do_op_da
-.dw do_op_scf
-.dw do_op_cpl
-.dw do_op_ccf
-.dw do_op_pop16
-.dw do_op_push16
-.dw do_op_ifnz
-.dw do_op_ifz
-.dw do_op_ifnc
-.dw do_op_ifc
-.dw do_op_ifpo
-.dw do_op_ifpe
-.dw do_op_ifp
-.dw do_op_ifm
-.dw do_op_outa
-.dw do_op_in
-.dw do_op_exhl
-.dw do_op_di
-.dw do_op_ei
-.dw do_op_inv
+       rjmp do_op_nop
+       rjmp do_op_inc
+       rjmp do_op_dec
+       rjmp do_op_inc16
+       rjmp do_op_dec16
+       rjmp do_op_rlc
+       rjmp do_op_rrc
+       rjmp do_op_rr
+       rjmp do_op_rl
+       rjmp do_op_adda
+       rjmp do_op_adca
+       rjmp do_op_subfa
+       rjmp do_op_sbcfa
+       rjmp do_op_anda
+       rjmp do_op_ora
+       rjmp do_op_xora
+       rjmp do_op_addhl
+       rjmp do_op_sthl
+       rjmp do_op_rmem16
+       rjmp do_op_rmem8
+       rjmp do_op_da
+       rjmp do_op_scf
+       rjmp do_op_cpl
+       rjmp do_op_ccf
+       rjmp do_op_pop16
+       rjmp do_op_push16
+       rjmp do_op_ifnz
+       rjmp do_op_ifz
+       rjmp do_op_ifnc
+       rjmp do_op_ifc
+       rjmp do_op_ifpo
+       rjmp do_op_ifpe
+       rjmp do_op_ifp
+       rjmp do_op_ifm
+       rjmp do_op_outa
+       rjmp do_op_in
+       rjmp do_op_exhl
+       rjmp do_op_di
+       rjmp do_op_ei
+       rjmp do_op_inv
 
 
 ;How the flags are supposed to work:
@@ -2707,6 +2754,66 @@ opjumps:
 ;|SUB s     |***V1*|Subtract             |A=A-s                 |
 ;|XOR s     |**0P00|Logical Exclusive OR |A=Axs                 |
 ;|----------+------+--------------------------------------------|
+;| F        |-*01? |Flag unaffected/affected/reset/set/unknown  |
+;| S        |S     |Sign flag (Bit 7)                           |
+;| Z        | Z    |Zero flag (Bit 6)                           |
+;| HC       |  H   |Half Carry flag (Bit 4)                     |
+;| P/V      |   P  |Parity/Overflow flag (Bit 2, V=overflow)    |
+;| N        |    N |Add/Subtract flag (Bit 1)                   |
+;| CY       |     C|Carry flag (Bit 0)                          |
+;|-----------------+--------------------------------------------|
+;| n               |Immediate addressing                        |
+;| nn              |Immediate extended addressing               |
+;| e               |Relative addressing (PC=PC+2+offset)        |
+;| [nn]            |Extended addressing                         |
+;| [xx+d]          |Indexed addressing                          |
+;| r               |Register addressing                         |
+;| [rr]            |Register indirect addressing                |
+;|                 |Implied addressing                          |
+;| b               |Bit addressing                              |
+;| p               |Modified page zero addressing (see RST)     |
+;|-----------------+--------------------------------------------|
+;|DEFB n(,...)     |Define Byte(s)                              |
+;|DEFB 'str'(,...) |Define Byte ASCII string(s)                 |
+;|DEFS nn          |Define Storage Block                        |
+;|DEFW nn(,...)    |Define Word(s)                              |
+;|-----------------+--------------------------------------------|
+;| A  B  C  D  E   |Registers (8-bit)                           |
+;| AF  BC  DE  HL  |Register pairs (16-bit)                     |
+;| F               |Flag register (8-bit)                       |
+;| I               |Interrupt page address register (8-bit)     |
+;| IX IY           |Index registers (16-bit)                    |
+;| PC              |Program Counter register (16-bit)           |
+;| R               |Memory Refresh register                     |
+;| SP              |Stack Pointer register (16-bit)             |
+;|-----------------+--------------------------------------------|
+;| b               |One bit (0 to 7)                            |
+;| cc              |Condition (C,M,NC,NZ,P,PE,PO,Z)             |
+;| d               |One-byte expression (-128 to +127)          |
+;| dst             |Destination s, ss, [BC], [DE], [HL], [nn]   |
+;| e               |One-byte expression (-126 to +129)          |
+;| m               |Any register r, [HL] or [xx+d]              |
+;| n               |One-byte expression (0 to 255)              |
+;| nn              |Two-byte expression (0 to 65535)            |
+;| pp              |Register pair BC, DE, IX or SP              |
+;| qq              |Register pair AF, BC, DE or HL              |
+;| qq'             |Alternative register pair AF, BC, DE or HL  |
+;| r               |Register A, B, C, D, E, H or L              |
+;| rr              |Register pair BC, DE, IY or SP              |
+;| s               |Any register r, value n, [HL] or [xx+d]     |
+;| src             |Source s, ss, [BC], [DE], [HL], nn, [nn]    |
+;| ss              |Register pair BC, DE, HL or SP              |
+;| xx              |Index register IX or IY                     |
+;|-----------------+--------------------------------------------|
+;| +  -  *  /  ^   |Add/subtract/multiply/divide/exponent       |
+;| &  ~  v  x      |Logical AND/NOT/inclusive OR/exclusive OR   |
+;| <-  ->          |Rotate left/right                           |
+;| [ ]             |Indirect addressing                         |
+;| [ ]+  -[ ]      |Indirect addressing auto-increment/decrement|
+;| { }             |Combination of operands                     |
+;| #               |Also BC=BC-1,DE=DE-1                        |
+;| ##              |Only lower 4 bits of accumulator A used     |
+;----------------------------------------------------------------
 
 
 .equ AVR_T = 6
@@ -2736,14 +2843,10 @@ opjumps:
 ; (6 words, 8 cycles)
 
 .macro ldpmx
-       ldi     zl,low (@1*2)
-       ldi     zh,high(@1*2)
-       add     zl,@2                  
-       brcc    PC+2
-       inc     zh                     
+       ldi     zh,high(@1*2)   ; table must be page aligned
+       mov     zl,@2                  
        lpm     @0,z    
 .endm
-
 .macro do_z80_flags_HP
 #if EM_Z80
        bmov    z_flags, ZFL_P, temp, AVR_V
@@ -2846,10 +2949,8 @@ do_op_dec:
 ;
 ; 
 do_op_inc16:
-       inc     opl
-       brne    op_i16x
-       inc     oph
-op_i16x:
+       subi    opl,low(-1)
+       sbci    oph,high(-1)
        ret
 
 ;----------------------------------------------------------------
@@ -3085,20 +3186,17 @@ do_op_addhl:
 ;
 ;
 do_op_sthl: ;store hl to mem loc in opl:h
-       mov adrl,opl
-       mov adrh,oph
+       movw adrl,opl
+#if DRAM_WORD_ACCESS
+       movw temp,z_l
+       rcall memWriteWord
+#else
        mov temp,z_l
        rcall memWriteByte
-
-       inc     opl
-       brne    op_sthlx
-       inc     oph
-op_sthlx:
-       mov adrl,opl
-       mov adrh,oph
+       adiw adrl,1
        mov temp,z_h
        rcall memWriteByte
-
+#endif
        ret
 
 ;----------------------------------------------------------------
@@ -3108,16 +3206,17 @@ op_sthlx:
 ;
 ; 
 do_op_rmem16:
-       mov adrl,opl
-       mov adrh,oph
+       movw adrl,opl
+#if DRAM_WORD_ACCESS
+       rcall memReadWord
+       movw opl,temp
+#else
        rcall memReadByte
        mov opl,temp
-       ldi temp,1
-       add adrl,temp
-       ldi temp,0
-       adc adrh,temp
+       adiw adrl,1
        rcall memReadByte
        mov oph,temp
+#endif 
        ret
 
 ;----------------------------------------------------------------
@@ -3127,8 +3226,7 @@ do_op_rmem16:
 ;
 ;
 do_op_rmem8:
-       mov adrl,opl
-       mov adrh,oph
+       movw adrl,opl
        rcall memReadByte
        mov opl,temp
        ret
@@ -3313,42 +3411,19 @@ do_op_cpl:
 ;
 ;
 do_op_push16:
-#if 1
-       ldi temp,1
-       ldi temp2,0
-       sub z_spl,temp
-       sbc z_sph,temp2
-
-       mov adrl,z_spl
-       mov adrh,z_sph
-       mov temp,oph
-       rcall memWriteByte
-
-       ldi temp,1
-       ldi temp2,0
-       sub z_spl,temp
-       sbc z_sph,temp2
-
-       mov adrl,z_spl
-       mov adrh,z_sph
+       movw adrl,z_spl
+       subi adrl,2
+       sbci adrh,0
+       movw z_spl,adrl
+#if DRAM_WORD_ACCESS   
+       movw temp,opl
+       rcall memWriteWord
+#else
        mov temp,opl
        rcall memWriteByte
-#else
-       subi z_spl,1
-       sbci z_sph,0
-
-       mov adrl,z_spl
-       mov adrh,z_sph
+       adiw adrl,1
        mov temp,oph
        rcall memWriteByte
-
-       subi z_spl,1
-       sbci z_sph,0
-
-       mov adrl,z_spl
-       mov adrh,z_sph
-       mov temp,opl
-       rcall memWriteByte
 #endif
 
 .if STACK_DBG
@@ -3378,25 +3453,22 @@ do_op_push16:
 ;
 ;
 do_op_pop16:
-       mov adrl,z_spl
-       mov adrh,z_sph
+       movw adrl,z_spl
+#if DRAM_WORD_ACCESS
+       rcall memReadWord
+       movw opl,temp
+#else
        rcall memReadByte
        mov opl,temp
-
-       ldi temp,1
-       ldi temp2,0
-       add z_spl,temp
-       adc z_sph,temp2
-
-       mov adrl,z_spl
-       mov adrh,z_sph
+       adiw adrl,1
        rcall memReadByte
        mov oph,temp
+#endif 
 
-       ldi temp,1
-       ldi temp2,0
+       ldi temp,2
        add z_spl,temp
-       adc z_sph,temp2
+       adc z_sph,_0
+
 
 .if STACK_DBG
        rcall printstr
@@ -3459,8 +3531,8 @@ do_op_ei:
 do_op_ifnz:
        sbrs z_flags, ZFL_Z
        ret
-       ldi insdech, 0
-       ldi insdecl, 0
+       clr insdech
+       clr insdecl
        ret
 
 ;----------------------------------------------------------------
@@ -3474,8 +3546,8 @@ do_op_ifnz:
 do_op_ifz:
        sbrc z_flags, ZFL_Z
        ret
-       ldi insdech, 0
-       ldi insdecl, 0
+       clr insdech
+       clr insdecl
        ret
 
 ;----------------------------------------------------------------
@@ -3489,8 +3561,8 @@ do_op_ifz:
 do_op_ifnc:
        sbrs z_flags, ZFL_C
        ret
-       ldi insdech, 0
-       ldi insdecl, 0
+       clr insdech
+       clr insdecl
        ret
 
 ;----------------------------------------------------------------
@@ -3504,8 +3576,8 @@ do_op_ifnc:
 do_op_ifc:
        sbrc z_flags, ZFL_C
        ret
-       ldi insdech, 0
-       ldi insdecl, 0
+       clr insdech
+       clr insdecl
        ret
 
 ;----------------------------------------------------------------
@@ -3519,8 +3591,8 @@ do_op_ifc:
 do_op_ifpo:
        sbrs z_flags, ZFL_P
        ret
-       ldi insdech, 0
-       ldi insdecl, 0
+       clr insdech
+       clr insdecl
        ret
 
 ;----------------------------------------------------------------
@@ -3534,8 +3606,8 @@ do_op_ifpo:
 do_op_ifpe:
        sbrc z_flags, ZFL_P
        ret
-       ldi insdech, 0
-       ldi insdecl, 0
+       clr insdech
+       clr insdecl
        ret
 
 ;----------------------------------------------------------------
@@ -3549,8 +3621,8 @@ do_op_ifpe:
 do_op_ifp: ;sign positive, aka s=0
        sbrs z_flags, ZFL_S
         ret
-       ldi insdech,0
-       ldi insdecl,0
+       clr insdech
+       clr insdecl
        ret
 
 ;----------------------------------------------------------------
@@ -3564,8 +3636,8 @@ do_op_ifp: ;sign positive, aka s=0
 do_op_ifm: ;sign negative, aka s=1
        sbrc z_flags, ZFL_S
         ret
-       ldi insdech, 0
-       ldi insdecl, 0
+       clr insdech
+       clr insdecl
        ret
 
 ;----------------------------------------------------------------
@@ -3620,30 +3692,6 @@ do_op_in:        ; in a,(opl)
 .endif
        ret
 
-;----------------------------------------------------------------
-
-#if 0
-do_op_calcparity:
-       ldi temp2,1
-       sbrc parityb,0
-        inc temp2
-       sbrc parityb,1
-        inc temp2
-       sbrc parityb,2
-        inc temp2
-       sbrc parityb,3
-        inc temp2
-       sbrc parityb,4
-        inc temp2
-       sbrc parityb,5
-        inc temp2
-       sbrc parityb,6
-        inc temp2
-       sbrc parityb,7
-        inc temp2
-       andi temp2,1
-       ret
-#endif
 
 ;----------------------------------------------------------------
 do_op_inv:
@@ -3663,6 +3711,7 @@ haltinv:
 ; http://z80ex.sourceforge.net/
 
 ; The S, Z, 5 and 3 bits and the parity of the lookup value 
+.org (PC+255) & 0xff00
 sz53p_tab:
        .db 0x44,0x00,0x00,0x04,0x00,0x04,0x04,0x00
        .db 0x08,0x0c,0x0c,0x08,0x0c,0x08,0x08,0x0c
@@ -3705,28 +3754,28 @@ sz53p_tab:
 ; The table is made of 256 words. These 16-bit words consist of 
 ; the fetch operation (bit 0-4), the processing operation (bit 10-16) and the store 
 ; operation (bit 5-9).
-
+.org (PC+255) & 0xff00
 inst_table:
-.dw (FETCH_NOP  | OP_NOP   | STORE_NOP)         ; 00               NOP
-.dw (FETCH_DIR16| OP_NOP   | STORE_BC )         ; 01 nn nn     LD BC,nn
-.dw (FETCH_A    | OP_NOP   | STORE_MBC ) ; 02              LD (BC),A
-.dw (FETCH_BC   | OP_INC16 | STORE_BC )         ; 03               INC BC
-.dw (FETCH_B    | OP_INC   | STORE_B )  ; 04       INC B
-.dw (FETCH_B    | OP_DEC   | STORE_B )  ; 05       DEC B
-.dw (FETCH_DIR8        | OP_NOP        | STORE_B  )     ; 06 nn    LD B,n
-.dw (FETCH_A    | OP_RLC       | STORE_A  )     ; 07       RLCA
-.dw (FETCH_NOP | OP_INV        | STORE_NOP)     ; 08       EX AF,AF'   (Z80)
-.dw (FETCH_BC   | OP_ADDHL     | STORE_HL ) ; 09       ADD HL,BC
-.dw (FETCH_MBC | OP_NOP        | STORE_A  )     ; 0A       LD A,(BC)
-.dw (FETCH_BC   | OP_DEC16     | STORE_BC ) ; 0B       DEC BC
-.dw (FETCH_C    | OP_INC       | STORE_C  )     ; 0C       INC C
-.dw (FETCH_C    | OP_DEC       | STORE_C  )     ; 0D       DEC C
-.dw (FETCH_DIR8 | OP_NOP       | STORE_C  )     ; 0E nn    LD C,n
-.dw (FETCH_A    | OP_RRC       | STORE_A  )     ; 0F       RRCA
-.dw (FETCH_NOP  | OP_INV       | STORE_NOP)     ; 10 oo    DJNZ o              (Z80)
+.dw (FETCH_NOP  | OP_NOP       | STORE_NOP)     ; 00           NOP
+.dw (FETCH_DIR16| OP_NOP       | STORE_BC )     ; 01 nn nn     LD BC,nn
+.dw (FETCH_A    | OP_NOP       | STORE_MBC)     ; 02           LD (BC),A
+.dw (FETCH_BC   | OP_INC16     | STORE_BC )     ; 03           INC BC
+.dw (FETCH_B    | OP_INC       | STORE_B  )     ; 04           INC B
+.dw (FETCH_B    | OP_DEC       | STORE_B  )     ; 05           DEC B
+.dw (FETCH_DIR8        | OP_NOP        | STORE_B  )     ; 06 nn        LD B,n
+.dw (FETCH_A    | OP_RLC       | STORE_A  )     ; 07           RLCA
+.dw (FETCH_NOP | OP_INV        | STORE_NOP)     ; 08           EX AF,AF'       (Z80)
+.dw (FETCH_BC   | OP_ADDHL     | STORE_HL )     ; 09           ADD HL,BC
+.dw (FETCH_MBC | OP_NOP        | STORE_A  )     ; 0A           LD A,(BC)
+.dw (FETCH_BC   | OP_DEC16     | STORE_BC )     ; 0B           DEC BC
+.dw (FETCH_C    | OP_INC       | STORE_C  )     ; 0C           INC C
+.dw (FETCH_C    | OP_DEC       | STORE_C  )     ; 0D           DEC C
+.dw (FETCH_DIR8 | OP_NOP       | STORE_C  )     ; 0E nn        LD C,n
+.dw (FETCH_A    | OP_RRC       | STORE_A  )     ; 0F           RRCA
+.dw (FETCH_NOP  | OP_INV       | STORE_NOP)     ; 10 oo        DJNZ o          (Z80)
 .dw (FETCH_DIR16| OP_NOP       | STORE_DE )     ; 11 nn nn     LD DE,nn
 .dw (FETCH_A    | OP_NOP       | STORE_MDE)     ; 12           LD (DE),A
-.dw (FETCH_DE  | OP_INC16  | STORE_DE )         ; 13           INC DE
+.dw (FETCH_DE  | OP_INC16      | STORE_DE )     ; 13           INC DE
 .dw (FETCH_D   | OP_INC        | STORE_D  )     ; 14           INC D
 .dw (FETCH_D   | OP_DEC        | STORE_D  )     ; 15           DEC D
 .dw (FETCH_DIR8        | OP_NOP        | STORE_D  )     ; 16 nn        LD D,n
@@ -3924,7 +3973,7 @@ inst_table:
 .dw (FETCH_DIR8        | OP_SUBFA      | STORE_A  )     ; D6 nn        SUB n
 .dw (FETCH_RST | OP_NOP        | STORE_CALL)    ; D7           RST 10H
 .dw (FETCH_NOP | OP_IFC        | STORE_RET)     ; D8           RET C
-.dw (FETCH_NOP | OP_INV        | STORE_NOP)     ; D9           EXX                     (Z80)
+.dw (FETCH_NOP | OP_INV        | STORE_NOP)     ; D9           EXX             (Z80)
 .dw (FETCH_DIR16| OP_IFC       | STORE_PC )     ; DA nn nn     JP C,nn
 .dw (FETCH_DIR8        | OP_IN         | STORE_A  )     ; DB nn        IN A,(n)
 .dw (FETCH_DIR16| OP_IFC       | STORE_CALL)    ; DC nn nn     CALL C,nn
@@ -3965,4 +4014,3 @@ inst_table:
 .dw (FETCH_RST | OP_NOP        | STORE_CALL)    ; FF           RST 38H
 
 ; vim:set ts=8 noet nowrap
-