git @ Cat's Eye Technologies Shelta / master src / shelta86.asm
master

Tree @master (Download .tar.gz)

shelta86.asm @masterraw · history · blame

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
IDEAL

;  shelta86.asm v1999.10.20 (c)1999 Chris Pressey, Cat's-Eye Technologies.
;  Implements an assembler/compiler for the Shelta language, in 8086 assembly.

;  * Special thanks to Ben Olmstead (BEM) for his suggestions for how to
;    reduce SHELTA86.COM's size even further.

MODEL	tiny
P8086

DATASEG

symth		dw	symt
codeh		dw	code
stach		dw	stac
safeh		dw	safe + 2
macrh		dw	macr

ttable		dw	BeginBlock, PushWord, EndBlock, PushPointer, LiteralByte ; , String
;			[           \         ]         ^            _               `

UDATASEG

token		db	128 dup (?)

safestart	dw	?
namestart	dw	?
toklength	dw	?

safe		db	16384 dup (?)
symt		db	16384 dup (?)	; 16K + 16K = 32K
code		db	4096 dup (?)	; 
macr		db	4096 dup (?)	; + 8K = 40K
stac		db	256 dup (?)

CODESEG
ORG 0100h

; EQUATES

safeadj		EQU	(offset safe - 0104h)
codeadj		EQU	(offset code - 0104h)

; Main program.
PROC		Main

WhileFile:

; ----- begin scanning token

		call	ScanChar	; get char -> al
		or	al, al
		jz	@@EndFile
		cmp	al, 32
		jbe	WhileFile	; repeat if char is whitespace

		mov	di, offset token
		cld

@@TokenLoop:	stosb			; put char in token
		call	ScanChar	; get char
		cmp	al, 32
		ja	@@TokenLoop	; repeat if char is not whitespace

@@Terminate:	mov	[byte di], 0  ; return null-terminated token

; ----- end scanning token

		mov	si, offset token + 1

		mov	al, [byte token]
		sub	al, '['
		cmp	al, 4
		ja	@@Unroll

		xor	ah, ah
		shl	ax, 1
		xchg	bx, ax
		mov	ax, [offset ttable + bx]
		jmp	ax		; jump to handler as listed in ttable

@@Unroll:	dec	si		; start at first character of token
		call	LookupSymbol	; destroys DI & SI, but that's OK

		; copy cx bytes from ax to codeh

		xchg	ax, si
		mov	di, [codeh]		; use di to track codeh
		rep	movsb

UpCodeH:	mov	[codeh], di
		jmp	short WhileFile

@@EndFile:	; put in a jump over the safe area

		mov	ax, [safeh]
		sub	ax, offset safe - 1
		mov	bx, offset token	; re-use token
		mov	[byte bx], 0e9h
		mov	[word bx + 1], ax
		mov	[byte bx + 3], 90h

		mov	cx, 4
		mov	dx, offset token
		call	WriteIt

		; make the first word of the safe area an offset
		; to just past the last word of the code 

		mov	cx, [safeh]
		mov	dx, offset safe
		sub	cx, dx
		mov	ax, cx
		add	ax, [codeh]
		sub	ax, codeadj
		mov	[word safe], ax

		call	WriteIt

		mov	cx, [codeh]
		mov	dx, offset code
		sub	cx, dx
		call	WriteIt
		
		xor	al, al

GlobalExit:	mov     ah, 4ch		; exit to DOS
		int     21h
ENDP		Main

PROC		WriteIt

		mov	ah, 40h
		mov	bx, 1
		int	21h
		jnc	@@OK
		mov	al, 32
		jmp	short GlobalExit
@@OK:		ret
ENDP		WriteIt

; -------------------------------- HANDLERS --------------------------- ;
; When coming into any handler, di will equal the address of the null
; (that is, the number of characters in the token + offset token)

; ==== [ ==== BEGIN BLOCK ==== ;

BeginBlock:	mov	di, [stach]			; push [ onto stack
		mov	ax, [codeh]
		stosw					; mov	[bx], ax
		mov	[stach], di
                jmp     WhileFile

; ==== ] ==== END BLOCK ==== ;

EndBlock:	;mov	si, offset token + 1	; si = token + 1 until...
		;cmp	[byte ds:si], '='
		;je	@@Smaller
		;cmp	[byte ds:si], ':'
		;je	@@Smaller
		;jmp	short @@CarryOn
						; remove : or = from length
@@Smaller:	dec	di			; di left over from scanning token

@@CarryOn:	mov	bx, di			; di now free to hold something until @@WName
		sub	bx, si			; get length

		mov	ax, [safeh]
		mov	[safestart], ax
		mov	[namestart], ax
		xchg	ax, di			; di now holds safe area head location

		mov	[toklength], bx		; length of token
		sub	[stach], 2
		mov	bx, [stach]		; pop [ from stack

		mov	ax, [bx]		; ax = codeh when [ happened

		mov	bp, [codeh]		; find length
		sub	bp, ax
		; mov	bp, bx			; bp = length of data between [ ... ]
						; until @@WName below... ugh

		cmp	[stach], offset stac
		je	@@StackEmpty


		mov	bx, [stach]
		sub	bx, 2
		mov	cx, [bx]

		; namestart = [namestart] - (cx - ax)

		sub	cx, ax
		sub	[namestart], cx

		; if dlength > 0,

@@StackEmpty:	;or	bp, bp
		;jz	@@Empty

		cmp	[byte si], ':'		; si still = offset token + 1
		jne	@@PreCopyLoop

		mov	di, [macrh]		; use macro area instead of safe if :
		mov	[namestart], di

		; copy everything from ax to codeh into the di area

@@PreCopyLoop:	mov	dx, ax
		mov	cx, bp 	; 		[codeh]		sub	cx, ax
		push	si
		xchg	si, ax
		rep	movsb
		pop	si

		; change codeh back to dx (old codeh before [)

		mov	[codeh], dx

		;mov	si, offset token + 1
		cmp	[byte si], ':'		; si still = offset token + 1
		je	@@UpdateMacr

		mov	[safeh], di
		jmp	short @@Empty
@@UpdateMacr:	mov	[macrh], di
		;jmp	short @@NameIt

		; write push instruction if '=' or ':' not used

@@Empty:	;cmp	[byte si], '='			; si still = offset token + 1
		;je	@@NameIt

		;mov	ax, [safestart]
		;sub	ax, safeadj
		;mov	bx, [word codeh]
		;mov	[byte bx], 0b8h
		;mov	[word bx + 1], ax
		;mov	[byte bx + 3], 50h
		;add	[codeh], 4

		;cmp	[byte si], 0			; still offset token + 1!
                ;je      @@Anonymous

		; insert namestart into dictionary

@@NameIt:	mov	cx, [namestart]
		mov	ax, [toklength]

		;cmp	[byte si], '='
		;je	@@Bigger
		;cmp	[byte si], ':'
		;je	@@Bigger
		;jmp	short @@WName

@@Bigger:	inc	si

@@WName:	; Destroys DI but that's OK.
		; INPUT:  bx = ADDRESS of token to insert, ax = length of symbol,
		; cx = pointer to data, dx = length of data
		; OUTPUT: ds:bx = pointer to newly allocated symbol

		mov	di, [symth]		; di no longer contains macrh/safeh
		add	ax, 6			; 1 word for length, 1 for ptr, 1 for data length
		add	[symth], ax

		stosw	; mov	[word di], ax	; place ax length in symt

		sub	ax, 6
		xchg	cx, ax			; cx <- ax; ax <- cx
		stosw	; mov	[word di], cx	; place cx (ptr to data)
		xchg	ax, bp		
		stosw	; mov	[word di], bp	; place bp (ptr length)

		rep	movsb

		mov	[symth], di

@@Anonymous:    jmp     WhileFile

; ==== ^ ==== PUSH POINTER ==== ;

PushPointer:	;mov	si, offset token + 1
		call	LookupSymbol		; destroys di & si, should be OK

		sub	ax, safeadj
		mov	di, [word codeh]
		jmp	short WritePush

; ==== ` ==== STRING ==== ;
;
;String:		;mov	si, offset token + 1
;		mov	di, [codeh]
;@@Loop:		mov	al, [byte ds:si]
;		stosb
;		inc	si
;		cmp	[byte ds:si], 0
;		jne	@@Loop
;                jmp     UpCodeH

; ==== _ ==== LITERAL BYTE ==== ;

LiteralByte:	;mov	si, offset token + 1
		cmp	[byte si], '_'
		je	LiteralWord
		cmp	[byte si], '^'
		je	LiteralSymbol
		call	DecipherDecimal		; destroys DI, that's OK
		stosb	; mov	[byte bx], al
CheapTrick:	mov	[codeh], di
                jmp     WhileFile

; ==== __ ==== LITERAL WORD ==== ;

LiteralWord:	inc	si
		call	DecipherDecimal		; destroys DI, that's OK
FunkyTrick:	stosw	; mov	[word bx], ax
		jmp	short CheapTrick

; ==== _^ ==== LITERAL SYMBOL ==== ;

LiteralSymbol:	inc	si
		call	LookupSymbol		; destroys DI & SI, that's OK

		sub	ax, safeadj

		mov	di, [word codeh]
		jmp	short FunkyTrick
		;mov	[word bx], ax
		;inc	[codeh]
		;jmp	short CheapTrick

; ==== \ ==== PUSH WORD ==== ;

PushWord:	;mov	si, offset token + 1
		call	DecipherDecimal		; destroys di, that's OK

WritePush:	mov	[byte di], 0b8h	; B8h, low byte, high byte, 50h
		inc	di
		stosw   ;	mov	[word di + 1], ax
		mov	al, 50h
		stosb
		mov	[codeh], di
                jmp     WhileFile

; -------------------------------- SUBROUTINES --------------------------- ;

PROC		DecipherDecimal   ; uses and destroys DI
		; INPUT: si = address of token
		; OUTPUT: ax = value, di = codeh


		xor	di, di

@@Loop:		lodsb	; mov	al, [byte ds:si], inc si

		mov	bx, di
		mov	cl, 3
		shl	bx, cl
		mov	cx, di
		shl	cx, 1
		add	bx, cx

		sub	al, '0'
		cbw
		add	bx, ax
		mov	di, bx

		cmp	[byte ds:si], '0'
		jae	@@Loop

		xchg	ax, di
		mov	di, [word codeh]
		ret
ENDP		DecipherDecimal

PROC            ScanChar
; Scans a single character from the input file, placing
; it in register al, which will be 0 upon error
; or eof (so don't embed nulls in the Shelta source...)

		mov	ah, 7		; read from stdin one byte
		int	21h
		cmp	al, ';'		; check for comment
		je	@@Comment
		ret
@@Comment:	mov	ah, 7		; read from stdin one byte
		int	21h
		cmp	al, ';'		; check for comment
		jne	@@Comment
		jmp	short ScanChar

ENDP            ScanChar

PROC		LookupSymbol
		; INPUT:  si = address of symbol to find, di = address of null termination
		; OUTPUT: ds:ax = pointer to contents or zero if not found
		; cx = length of contents

		mov	bx, offset symt		; bx starts at symbol table
		mov	bp, si
		sub	di, si

@@Loop:		mov	ax, [word bx]		; first word = token size

		mov	dx, bx			; keep track of start of this symt entry

		sub	ax, 6
		cmp	ax, di
		jne	@@Exit			; if it doesn't fit, you must acquit

		add	bx, 6			; bx now points to token in symbol table

;   exit if right token

		xor	si, si			; reset si to token
@@Inner:	mov	al, [byte ds:bx]	; get byte from bx=symt
		cmp	[byte bp + si], al	; compare to si=token
		jne	@@Exit
		inc	bx
		inc	si
		cmp	si, di			; hit the length yet?
		jb	@@Inner			; no, repeat

		;   a match!

		mov	bx, dx
		mov	cx, [word bx + 4]	; third word = data length
		mov	ax, [word bx + 2]	; second word = data ptr 
		ret

@@Exit:		mov	bx, dx
		mov	ax, [word bx]
		add	bx, ax
		cmp	bx, [symth]
		jb	@@Loop

		mov	al, 16		; return 16 if unknown identifier
		jmp	GlobalExit

ENDP		LookupSymbol

END		Main