git @ Cat's Eye Technologies Shelta / master src / shelta86.s
master

Tree @master (Download .tar.gz)

shelta86.s @masterraw · history · blame

;  shelta86.s
;  v1.2-2013.1130
;  (c)2009-2013 Chris Pressey, Cat's Eye Technologies.  All rights reserved.

;  Implements an assembler/compiler for the Shelta language, in 8086 machine
;  language, in the format of the NASM assembler.

;  * Special thanks to Ben Olmstead (BEM) for his suggestions for how to
;    reduce SHELTA86.COM's size even further.

org             0100h
bits            16
cpu             8086

;-------------- Code

; Main program.

WhileFile:

; ----- begin scanning token

                call    word ScanChar   ; get char -> al
                cmp     al, 0bh         ; is it a vertical tab?
                je      EndFile
                cmp     al, 32
                jbe     WhileFile       ; repeat if char is whitespace

                mov     di, token
                cld

.TokenLoop:     stosb                   ; put char in token
                call    word ScanChar   ; get char
                cmp     al, 32
                ja      .TokenLoop      ; repeat if char is not whitespace

                mov     byte [di], 0    ; null-terminate the token

; ----- end scanning token

                mov     si, token + 1

                mov     al, [token]
                sub     al, '['
                cmp     al, 5
                ja      .Unroll
                xor     ah, ah
                shl     ax, 1
                xchg    bx, ax
                mov     ax, [ttable + bx]
                call    ax              ; call handler as listed in ttable
                jmp     short WhileFile

.Unroll:        dec     si              ; start at first character of token
                call    word LookupSymbol ; destroys DI & SI, but that's OK

                ; copy cx bytes from ax to codeh

                xchg    ax, si
                mov     di, [codeh]     ; use di to track codeh
                rep     movsb

UpCodeH:        mov     [codeh], di
                jmp     short WhileFile

EndFile:        ; put in a jump over the safe area

                mov     di, token       ; re-use token
                mov     al, 0e9h
                stosb
                mov     ax, [safeh]
                sub     ax, safe - 1
                stosw
                mov     al, 090h
                stosb

                mov     cx, 4
                mov     dx, token
                call    word WriteIt

                ; make the first word of the safe area an offset
                ; to just past the last word of the code 

                mov     cx, [safeh]
                mov     dx, safe
                sub     cx, dx
                mov     ax, cx
                add     ax, [codeh]
                sub     ax, codeadj
                mov     [safe], ax

                call    word WriteIt

                mov     cx, [codeh]
                mov     dx, code
                sub     cx, dx
                call    word WriteIt

                xor     al, al

GlobalExit:     mov     ah, 4ch         ; exit to DOS
                int     21h

WriteIt:
                mov     ah, 40h         ; write data to file
                mov     bx, 1           ; filehandle #1 = stdout
                int     21h
                jnc     .OK
                mov     al, 32
                jmp     short GlobalExit
.OK:            ret

; -------------------------------- HANDLERS --------------------------- ;
; When coming into any handler, di will equal the address of the null
; (that is, the number of characters in the token + offset token)

; ==== [ ==== BEGIN BLOCK ==== ;

BeginBlock:     mov     di, [stach]     ; push [ onto stack
                mov     ax, [codeh]
                stosw                   ; mov   [bx], ax
                mov     [stach], di
                ret

; ==== ] ==== END BLOCK ==== ;

EndBlock:       dec     di              ; di left over from scanning token

                mov     bx, di          ; di now free to hold something until .WName
                sub     bx, si          ; get length of token
                mov     [toklength], bx ; store it for later

                mov     ax, [safeh]
                mov     [safestart], ax
                mov     dx, ax          ; dx = namestart initially = safestart = safeh
                xchg    ax, di          ; di now holds safe area head location

                sub     word [stach], byte 2
                mov     bx, [stach]     ; pop [ from stack
                mov     ax, [bx]        ; ax = codeh when [ happened

                mov     bp, [codeh]     ; find length
                sub     bp, ax          ; bp = length of code between [ ... ] (codeh - old codeh)

                cmp     word [stach], stac
                je      .StackEmpty

                mov     cx, [bx - 2]     ; cx = contents popped from stack

                ; namestart:dx = namestart:dx - (contents:cx - tokenlength:ax)

                sub     cx, ax
                sub     dx, cx

.StackEmpty:    cmp     byte [si], ':'  ; si still = offset token + 1
                jne     .PreCopy

                mov     di, [macrh]     ; copy into macro area instead of safe area if :
                mov     dx, di

                ; copy everything from ax to codeh into the di area

.PreCopy:       push    ax
                mov     cx, bp
                push    si
                xchg    si, ax
                rep     movsb
                pop     si
                pop     ax

                ; restore codeh back to old codeh before [

                mov     [codeh], ax
                cmp     byte [si], ':'  ; si still = offset token + 1
                jne     .UpdateSafe

                mov     [macrh], di
                jmp     short .NameIt

.UpdateSafe:    mov     [safeh], di

                ; write push instruction if '=' or ':' not used

                cmp     byte [si], '='  ; si still = offset token + 1
                je      .NameIt

                mov     ax, [safestart]
                sub     ax, safeadj

                mov     di, [codeh]      ; di no longer contains macrh/safeh
                jmp     short WritePush

                ; insert namestart into dictionary

.NameIt:        mov     cx, dx
                mov     ax, [toklength]

                inc     si

.WName:         ; Insert token into the symbol table.
                ; DESTROYS: DI
                ; INPUT:    si = pointer to token text
                ;           ax = length of token text
                ;           cx = pointer to data associated with token
                ;           bp = length of data associated with token

                mov     di, [symth]     ; di no longer contains macrh/safeh
                add     ax, 6           ; 1 word for length, 1 for ptr, 1 for data length

                stosw                   ; place ax length in symt

                sub     ax, 6
                xchg    cx, ax          ; cx <- ax; ax <- cx
                stosw                   ; place cx (ptr to data)
                xchg    ax, bp          
                stosw                   ; place bp (ptr length)

                rep     movsb

                mov     [symth], di

                ret

; ==== ^ ==== PUSH POINTER ==== ;

PushPointer:    call    LookupSymbol    ; destroys di & si, should be OK

                sub     ax, safeadj
                mov     di, [codeh]
                jmp     short WritePush

; ==== ` ==== STRING ==== ;

String:         mov     di, [codeh]
.Loop:          mov     al, [si]
                stosb
                inc     si
                cmp     byte [si], 0
                jne     .Loop
                mov     [codeh], di
                ret

; ==== _ ==== LITERAL BYTE ==== ;

LiteralByte:    cmp     byte [si], '_'
                je      LiteralWord
                cmp     byte [si], '^'
                je      LiteralSymbol
                call    DecipherDecimal ; sets DI to [codeh]
                jmp     short GnarlyTrick

; ==== __ ==== LITERAL WORD ==== ;

LiteralWord:    inc     si
                call    DecipherDecimal ; sets DI to [codeh]
FunkyTrick:     stosw
                jmp     short CheapTrick

; ==== _^ ==== LITERAL SYMBOL ==== ;

LiteralSymbol:  inc     si
                call    LookupSymbol    ; destroys DI & SI, that's OK

                sub     ax, safeadj

                mov     di, [codeh]
                jmp     short FunkyTrick

; ==== \ ==== PUSH WORD ==== ;

PushWord:       call    DecipherDecimal ; sets DI to [codeh]

WritePush:      mov     byte [di], 0b8h ; B8h, low byte, high byte, 50h
                inc     di
                stosw
                mov     al, 50h
GnarlyTrick:    stosb
CheapTrick:     mov     [codeh], di
                ret

; -------------------------------- SUBROUTINES --------------------------- ;

DecipherDecimal:
                ; INPUT: si = address of token
                ; OUTPUT: ax = value, di = codeh
                ; uses and destroys DI

                xor     di, di

.Loop:          lodsb

                mov     bx, di
                mov     cl, 3
                shl     bx, cl
                mov     cx, di
                shl     cx, 1
                add     bx, cx

                sub     al, '0'
                cbw
                add     bx, ax
                mov     di, bx

                cmp     byte [si], '0'
                jae     .Loop

                xchg    ax, di
                mov     di, [codeh]
                ret

; Scans a single character from the input file, placing
; it in register al, which will be 0 upon error
; or eof (so don't embed nulls in the Shelta source...)

ScanChar:
                mov     ah, 7           ; read from stdin one byte
                int     21h
                cmp     al, ';'         ; check for comment
                je      .Comment
                ret
.Comment:       mov     ah, 7           ; read from stdin one byte
                int     21h
                cmp     al, ';'         ; check for comment
                jne     .Comment
                jmp     short ScanChar

LookupSymbol:
                ; INPUT:  si = address of symbol to find, di = address of null termination
                ; OUTPUT: ds:ax = pointer to contents or zero if not found
                ; cx = length of contents

                mov     bx, symt        ; bx starts at symbol table
                mov     bp, si
                sub     di, si

.Loop:          mov     ax, [bx]        ; first word = token size

                mov     dx, bx          ; keep track of start of this symt entry

                sub     ax, 6
                cmp     ax, di
                jne     .Exit           ; if it doesn't fit, you must acquit

;   exit if right token

                xor     si, si          ; reset si to token
.Inner:         mov     al, [bx + 6]    ; get byte from bx+6=pointer to token text
                cmp     [bp + si], al   ; compare to si=token
                jne     .Exit
                inc     bx
                inc     si
                cmp     si, di          ; hit the length yet?
                jb      .Inner          ; no, repeat

                ;   a match!

                mov     bx, dx
                mov     cx, [bx + 4]    ; third word = data length
                mov     ax, [bx + 2]    ; second word = data ptr 
                ret

.Exit:          mov     bx, dx
                mov     ax, [bx]
                add     bx, ax
                cmp     bx, [symth]
                jb      .Loop

                mov     al, 16          ; return 16 if unknown identifier
                jmp     GlobalExit

;-------------- Initialized Data

symth:          dw      symt
codeh:          dw      code
stach:          dw      stac
safeh:          dw      safe + 2
macrh:          dw      macr

ttable:         dw      BeginBlock, PushWord, EndBlock, PushPointer, LiteralByte, String
;                       [           \         ]         ^            _            `

;-------------- Uninitialized Data

section .bss

token:          resb    128

safestart:      resw    1
toklength:      resw    1

safe:           resb    16384
symt:           resb    16384   ; 16K + 16K = 32K
code:           resb    4096
macr:           resb    4096    ; + 8K = 40K
stac:           resb    256

;-------------- Equates

safeadj         equ     (safe - 0104h)
codeadj         equ     (code - 0104h)