IDEAL
; shelta86.asm v1999.10.20 (c)1999 Chris Pressey, Cat's-Eye Technologies.
; Implements an assembler/compiler for the Shelta language, in 8086 assembly.
; * Special thanks to Ben Olmstead (BEM) for his suggestions for how to
; reduce SHELTA86.COM's size even further.
MODEL tiny
P8086
DATASEG
symth dw symt
codeh dw code
stach dw stac
safeh dw safe + 2
macrh dw macr
ttable dw BeginBlock, PushWord, EndBlock, PushPointer, LiteralByte ; , String
; [ \ ] ^ _ `
UDATASEG
token db 128 dup (?)
safestart dw ?
namestart dw ?
toklength dw ?
safe db 16384 dup (?)
symt db 16384 dup (?) ; 16K + 16K = 32K
code db 4096 dup (?) ;
macr db 4096 dup (?) ; + 8K = 40K
stac db 256 dup (?)
CODESEG
ORG 0100h
; EQUATES
safeadj EQU (offset safe - 0104h)
codeadj EQU (offset code - 0104h)
; Main program.
PROC Main
WhileFile:
; ----- begin scanning token
call ScanChar ; get char -> al
or al, al
jz @@EndFile
cmp al, 32
jbe WhileFile ; repeat if char is whitespace
mov di, offset token
cld
@@TokenLoop: stosb ; put char in token
call ScanChar ; get char
cmp al, 32
ja @@TokenLoop ; repeat if char is not whitespace
@@Terminate: mov [byte di], 0 ; return null-terminated token
; ----- end scanning token
mov si, offset token + 1
mov al, [byte token]
sub al, '['
cmp al, 4
ja @@Unroll
xor ah, ah
shl ax, 1
xchg bx, ax
mov ax, [offset ttable + bx]
jmp ax ; jump to handler as listed in ttable
@@Unroll: dec si ; start at first character of token
call LookupSymbol ; destroys DI & SI, but that's OK
; copy cx bytes from ax to codeh
xchg ax, si
mov di, [codeh] ; use di to track codeh
rep movsb
UpCodeH: mov [codeh], di
jmp short WhileFile
@@EndFile: ; put in a jump over the safe area
mov ax, [safeh]
sub ax, offset safe - 1
mov bx, offset token ; re-use token
mov [byte bx], 0e9h
mov [word bx + 1], ax
mov [byte bx + 3], 90h
mov cx, 4
mov dx, offset token
call WriteIt
; make the first word of the safe area an offset
; to just past the last word of the code
mov cx, [safeh]
mov dx, offset safe
sub cx, dx
mov ax, cx
add ax, [codeh]
sub ax, codeadj
mov [word safe], ax
call WriteIt
mov cx, [codeh]
mov dx, offset code
sub cx, dx
call WriteIt
xor al, al
GlobalExit: mov ah, 4ch ; exit to DOS
int 21h
ENDP Main
PROC WriteIt
mov ah, 40h
mov bx, 1
int 21h
jnc @@OK
mov al, 32
jmp short GlobalExit
@@OK: ret
ENDP WriteIt
; -------------------------------- HANDLERS --------------------------- ;
; When coming into any handler, di will equal the address of the null
; (that is, the number of characters in the token + offset token)
; ==== [ ==== BEGIN BLOCK ==== ;
BeginBlock: mov di, [stach] ; push [ onto stack
mov ax, [codeh]
stosw ; mov [bx], ax
mov [stach], di
jmp WhileFile
; ==== ] ==== END BLOCK ==== ;
EndBlock: ;mov si, offset token + 1 ; si = token + 1 until...
;cmp [byte ds:si], '='
;je @@Smaller
;cmp [byte ds:si], ':'
;je @@Smaller
;jmp short @@CarryOn
; remove : or = from length
@@Smaller: dec di ; di left over from scanning token
@@CarryOn: mov bx, di ; di now free to hold something until @@WName
sub bx, si ; get length
mov ax, [safeh]
mov [safestart], ax
mov [namestart], ax
xchg ax, di ; di now holds safe area head location
mov [toklength], bx ; length of token
sub [stach], 2
mov bx, [stach] ; pop [ from stack
mov ax, [bx] ; ax = codeh when [ happened
mov bp, [codeh] ; find length
sub bp, ax
; mov bp, bx ; bp = length of data between [ ... ]
; until @@WName below... ugh
cmp [stach], offset stac
je @@StackEmpty
mov bx, [stach]
sub bx, 2
mov cx, [bx]
; namestart = [namestart] - (cx - ax)
sub cx, ax
sub [namestart], cx
; if dlength > 0,
@@StackEmpty: ;or bp, bp
;jz @@Empty
cmp [byte si], ':' ; si still = offset token + 1
jne @@PreCopyLoop
mov di, [macrh] ; use macro area instead of safe if :
mov [namestart], di
; copy everything from ax to codeh into the di area
@@PreCopyLoop: mov dx, ax
mov cx, bp ; [codeh] sub cx, ax
push si
xchg si, ax
rep movsb
pop si
; change codeh back to dx (old codeh before [)
mov [codeh], dx
;mov si, offset token + 1
cmp [byte si], ':' ; si still = offset token + 1
je @@UpdateMacr
mov [safeh], di
jmp short @@Empty
@@UpdateMacr: mov [macrh], di
;jmp short @@NameIt
; write push instruction if '=' or ':' not used
@@Empty: ;cmp [byte si], '=' ; si still = offset token + 1
;je @@NameIt
;mov ax, [safestart]
;sub ax, safeadj
;mov bx, [word codeh]
;mov [byte bx], 0b8h
;mov [word bx + 1], ax
;mov [byte bx + 3], 50h
;add [codeh], 4
;cmp [byte si], 0 ; still offset token + 1!
;je @@Anonymous
; insert namestart into dictionary
@@NameIt: mov cx, [namestart]
mov ax, [toklength]
;cmp [byte si], '='
;je @@Bigger
;cmp [byte si], ':'
;je @@Bigger
;jmp short @@WName
@@Bigger: inc si
@@WName: ; Destroys DI but that's OK.
; INPUT: bx = ADDRESS of token to insert, ax = length of symbol,
; cx = pointer to data, dx = length of data
; OUTPUT: ds:bx = pointer to newly allocated symbol
mov di, [symth] ; di no longer contains macrh/safeh
add ax, 6 ; 1 word for length, 1 for ptr, 1 for data length
add [symth], ax
stosw ; mov [word di], ax ; place ax length in symt
sub ax, 6
xchg cx, ax ; cx <- ax; ax <- cx
stosw ; mov [word di], cx ; place cx (ptr to data)
xchg ax, bp
stosw ; mov [word di], bp ; place bp (ptr length)
rep movsb
mov [symth], di
@@Anonymous: jmp WhileFile
; ==== ^ ==== PUSH POINTER ==== ;
PushPointer: ;mov si, offset token + 1
call LookupSymbol ; destroys di & si, should be OK
sub ax, safeadj
mov di, [word codeh]
jmp short WritePush
; ==== ` ==== STRING ==== ;
;
;String: ;mov si, offset token + 1
; mov di, [codeh]
;@@Loop: mov al, [byte ds:si]
; stosb
; inc si
; cmp [byte ds:si], 0
; jne @@Loop
; jmp UpCodeH
; ==== _ ==== LITERAL BYTE ==== ;
LiteralByte: ;mov si, offset token + 1
cmp [byte si], '_'
je LiteralWord
cmp [byte si], '^'
je LiteralSymbol
call DecipherDecimal ; destroys DI, that's OK
stosb ; mov [byte bx], al
CheapTrick: mov [codeh], di
jmp WhileFile
; ==== __ ==== LITERAL WORD ==== ;
LiteralWord: inc si
call DecipherDecimal ; destroys DI, that's OK
FunkyTrick: stosw ; mov [word bx], ax
jmp short CheapTrick
; ==== _^ ==== LITERAL SYMBOL ==== ;
LiteralSymbol: inc si
call LookupSymbol ; destroys DI & SI, that's OK
sub ax, safeadj
mov di, [word codeh]
jmp short FunkyTrick
;mov [word bx], ax
;inc [codeh]
;jmp short CheapTrick
; ==== \ ==== PUSH WORD ==== ;
PushWord: ;mov si, offset token + 1
call DecipherDecimal ; destroys di, that's OK
WritePush: mov [byte di], 0b8h ; B8h, low byte, high byte, 50h
inc di
stosw ; mov [word di + 1], ax
mov al, 50h
stosb
mov [codeh], di
jmp WhileFile
; -------------------------------- SUBROUTINES --------------------------- ;
PROC DecipherDecimal ; uses and destroys DI
; INPUT: si = address of token
; OUTPUT: ax = value, di = codeh
xor di, di
@@Loop: lodsb ; mov al, [byte ds:si], inc si
mov bx, di
mov cl, 3
shl bx, cl
mov cx, di
shl cx, 1
add bx, cx
sub al, '0'
cbw
add bx, ax
mov di, bx
cmp [byte ds:si], '0'
jae @@Loop
xchg ax, di
mov di, [word codeh]
ret
ENDP DecipherDecimal
PROC ScanChar
; Scans a single character from the input file, placing
; it in register al, which will be 0 upon error
; or eof (so don't embed nulls in the Shelta source...)
mov ah, 7 ; read from stdin one byte
int 21h
cmp al, ';' ; check for comment
je @@Comment
ret
@@Comment: mov ah, 7 ; read from stdin one byte
int 21h
cmp al, ';' ; check for comment
jne @@Comment
jmp short ScanChar
ENDP ScanChar
PROC LookupSymbol
; INPUT: si = address of symbol to find, di = address of null termination
; OUTPUT: ds:ax = pointer to contents or zero if not found
; cx = length of contents
mov bx, offset symt ; bx starts at symbol table
mov bp, si
sub di, si
@@Loop: mov ax, [word bx] ; first word = token size
mov dx, bx ; keep track of start of this symt entry
sub ax, 6
cmp ax, di
jne @@Exit ; if it doesn't fit, you must acquit
add bx, 6 ; bx now points to token in symbol table
; exit if right token
xor si, si ; reset si to token
@@Inner: mov al, [byte ds:bx] ; get byte from bx=symt
cmp [byte bp + si], al ; compare to si=token
jne @@Exit
inc bx
inc si
cmp si, di ; hit the length yet?
jb @@Inner ; no, repeat
; a match!
mov bx, dx
mov cx, [word bx + 4] ; third word = data length
mov ax, [word bx + 2] ; second word = data ptr
ret
@@Exit: mov bx, dx
mov ax, [word bx]
add bx, ax
cmp bx, [symth]
jb @@Loop
mov al, 16 ; return 16 if unknown identifier
jmp GlobalExit
ENDP LookupSymbol
END Main