[colorforth] AHA compiler prototype
- Subject: [colorforth] AHA compiler prototype
- From: Adam Marquis <adam.marquis@xxxxxxxxxxxx>
- Date: Wed, 18 Jun 2003 19:14:53 -0400
Hi everyone,
Sorry for the big message, but here's a little project
I'm working on, in assembly source.
Maybe not truly in the spirit of forth, but anyway:
It's a 32 bit, byte oriented AHA style compiler for x86
implementing dictionary unification (I just read your comments
about it) and 5 token types.
-Compile a relative call using index of dictionary (7 bits index)
-Jump to absolute address written in dictionary (6 bits index)
-Copy following bytes (5 bits count)
-Definition (4 bits count)
-Comment (4 bits count)
The dictionary array is 128 adresses long. The first 64
entries are executable at compile time.
Dictionary searches are resolved in the editor, the AHA way of
doing things. Tokens are left shifted (huffman encoding) and
branches on carry are performed
A typical program start with definitions and bincopy tokens
(no macro/forth words and definition conflicts) and then have
compileword tokens, along with comments spread anywhere.
Litterals are implemented in a special macro, implemented at edit time,
see the code below.
So there's at least one mandatory macro in every program, "end".
The others are the one needed by the program only.
Or maybe in a complete system they could be written
in a static dictionary.
All his there, its far from finished but it's on its way.
I got ?dup and ?lit working, thats the trickiest part.
Look carefully at the register usage before reading
and look at the "Cbinary" label, its almost straigth from
colorforth source taken at merlintec.
I have yet to write the editor, but before I want a top notch compiler
so feel free to comment.
Adam
=============================
;Written in FASM v1.46 GUI
;
;Register usage @ Compile time
;EAX: Main scratch register
;EBX: Most recent optimizable instruction
;ECX: scratch for "rep mosvsb"
;EDX: Second optimizable instruction
;ESI: next source code byte pointer
;EDI: next compiled byte
;EBP: Next dictionary entry pointer
;ESP: Top of return stack pointer
use32
macro align value { rb (value-1)-($+value-1) mod value }
CompileCall:
mov [EDI], byte 0E8h ;call rel32
mov EDX, EBX ;Needed only for push2 jump1
mov EBX, EDI ;Semicolon optimization
mov EAX, [EAX*4+Dictionary]
lea EDI, [EDI+5]
sub EAX, EDI
mov [EDI-4], EAX
ret
ExecuteWord:
jmp dword [EAX*4+Dictionary]
BinaryRecord:
inc EAX
mov ECX, EAX
rep movsb ;cld
ret
Detoken:
and EAX, 0FFh ;Tokens are 1 byte wide
shr EAX, 1
jc CompileCall
shr EAX, 1
jc ExecuteWord
shr EAX, 1
jc BinaryRecord
shr EAX, 1
jc SkipComment ;forward reference
WriteDefinition:
mov [EBP], EDI
lea EBP, [EBP+4]
mov EBX, ESP ;Optimization, see below
SkipComment:
inc ESI
add ESI, EAX
ret
Compiler:
push ESI
mov ESI, EAX ;start of source code in Top of stack
mov EBP, Dictionary ;Start defining macros at first
@@: mov EAX, [ESI]
inc ESI ;thanks KC5TJA ;pp
call Detoken
jmp @b
pop ESI ;Reenable the data stack
lodsd ;Runtime drop
ret
align 4
Dictionary:
times 128 dd 0
;The compiler code ends here. The rest is macros
;To end compilation, the end word is mandatory.
;==========
; end
;End of source code, stop the compiler.
Stopcompiler:
add byte [ESP], 2
ret
;==========
; dup
; ?dup
;Optimization used inside primitive
;definitions, get rid of "drop dup"
;in compiled code.
;* lodsd could be replaced with its longer
; version to gain speed over size
Cdup:
mov dword [EDI], 89FC768Dh
mov byte [EDI+4], 06h
add EDI, 5
ret
Qdup:
lea EAX, [EDI-1]
cmp EBX, EAX
jnz Cdup
cmp byte [EAX], 0ADh
jnz Cdup
mov EDI, EAX
ret
; ; (semicolon)
; ret
; nop
;Compiles a ret, or if preceded by a call
;transform "call ret" into "jump" and
;"call1 jmp2" into "push2 jmp1".
;nop is used to prevent lookback optimization
;and for alignement purpose.
Semicolon:
lea EAX, [EDI-5]
cmp EBX, EAX
jnz Cret
cmp byte [EAX], 0E8h
jnz Cret
inc byte [EAX] ;jmp
lea EAX, [EAX-5]
cmp EDX, EAX
jnz @f
cmp byte [EAX], 0E8h
jnz @f
mov byte [EAX], 68h ;push
inc EAX
push dword [EAX] ;\
push dword [EAX+5] ; Swap the 2
pop dword [EAX] ; addresses
pop dword [EAX+5] ;/
ret
Cret:
mov byte [EDI], 0C3h ;ret
inc EDI
ret
Cnop:
mov byte [EDI], 90h ;nop
inc EDI
ret
; lit
; ?lit
;?lit is used by primitives (as ?dup is) and clear the zero flag
;if there's no litteral value behind the actually compiled primitive.
;The code handles the ?dup optimization.
Clit:
call Qdup
mov EDX, EBX
mov EBX, EDI
mov byte [EDI], 0B8h ;mov EAX, imm32
inc EDI
movsd
ret
Qlit:
lea EAX, [EDI-5]
cmp EBX, EAX
jnz @f
cmp byte [EAX], 0B8h
jnz @f
mov EBX, EDX
mov EAX, dword [EDI+1]
cmp dword [EDI-5], 89FC768Dh ;dup
jz nodup
add EDI, -5
Cdrop:
mov byte [EDI], 0ADh ;lodsd
mov EBX, EDI ;Optimization
inc EDI ;Zero flag clear
ret
nodup:
add EDI, -10 ;Zero flag clear
ret
@@: xor EAX, EAX ;Zero flag set
ret
; push
; pop
; swap
; over
;The other missing stack manipulation word definitions.
Cpush:
call Qlit
jz @f
mov byte [EDI], 68h ;push imm32
inc EDI
stosd
ret
@@: mov byte [EDI], 50h ;push EAX
inc EDI
jmp Cdrop
Cpop:
call Qdup
mov byte [EDI], 58h ;pop EAX
inc EDI
ret
Cswap:
mov dword [EDI], 5836FF50h ;push EAX, push [ESI], pop EAX
add ESI, 4
mov word [EDI], 068Fh ;pop [ESI]
inc EDI
inc EDI
ret
Cover:
call Qdup
mov dword [EDI], 04468Bh ;mov EAX, [ESI+4]
add EDI, 3
ret
; +
; binary
; nip
; and
; xor
; -
; 2*
; 2/
; *
; /mod
; /
; mod
; */
;Those are the main words for binary arithmetic.
Cadd:
mov word [EDI], 603h ;add EAX, [ESI]
Cbinary:
call Qlit
jz @f
add byte [EDI], 2 ;*** EAX, imm32
inc EDI ;overwrite 06h byte
stosd
ret
@@:
inc EDI ;\keep the written code
inc EDI ;/and add a nip
Cnip:
mov dword [EDI], 04768Dh ;lea ESI, [ESI+4]
add EDI, 3
ret
Cand:
mov word [EDI], 623h ;and EAX, [ESI]
jmp Cbinary
Cxor:
mov word [EDI], 633h ;xor EAX, [ESI]
jmp Cbinary
Cnot:
mov word [EDI], 0D0F7h ;not EAX
inc EDI
inc EDI
ret
C2star:
mov word [EDI], 0E0D1h ;shr EAX, 1
inc EDI
inc EDI
ret
C2slash:
mov word [EDI], 0E8D1h ;shl EAX, 1
inc EDI
inc EDI
ret
Cstar: mov dword [EDI], 06AF0Fh ;imul EAX, [ESI]
add EDI, 3
ret
Cdivmod: ;nd - rq
call Cswap
mov byte [EDI], 99h ;cdq
inc EDI
mov dword [EDI], 16893EF7h ;idiv [ESI] | mov [ESI], EDX
add ESI, 4
ret
Cslash: call Cdivmod
call Cnip
ret
Cmod: call Cdivmod
call Cdrop
ret
CStarSlash:
; @
; a@
; a!
; !
; !+
; @+
;Those words use dword adressing.
;Store use the EDI register as Address register
;! use ?lit for the two operands.
Cfetch:
call Qlit
jz @f
call Qdup
mov word [EDI], 0A1h ;mov EAX, [imm32]
inc EDI
shl EAX, 1 ;\Dword adressing
shl EAX, 1 ;/optimized at compile time
stosd
ret
@@:
mov dword [EDI],0E0D1E0D1h ;2x"shl EAX, 1"
add EDI, 4
mov word [EDI], 008Bh ;mov EAX, [EAX]
add EDI, 2
ret
CAfetch:
call Qdup
mov word [EDI], 0F889h ;mov EAX, EDI
add ESI, 2
ret
CAstore:
call Qlit
jz @f
mov byte [EDI], 0BFh ;mov EDI, imm32
inc EDI
stosd
ret
@@:
mov word [EDI], 0C789h ;mov EDI, EAX
add ESI, 2
jmp Cdrop
Cstore:
call Qlit
jz Cstorenolit
push EAX
call Qlit
jz @f
mov word [EDI], 05C7h ;mov [imm32], imm32
inc EDI
inc EDI
pop dword [EDI]
shl dword [EDI], 2
add EDI, 4
stosd
ret
@@:
pop EAX
mov word [EDI], 0589h ;mov [imm32], eax
inc EDI
inc EDI
stosd
ret
Cstorenolit:
call CAstore
mov dword [EDI], 0BD0489h ;mov [EDI*4]
add ESI, 3
stosd ;0
jmp Cdrop
---------------------------------------------------------------------
To unsubscribe, e-mail: colorforth-unsubscribe@xxxxxxxxxxxxxxxxxx
For additional commands, e-mail: colorforth-help@xxxxxxxxxxxxxxxxxx
Main web page - http://www.colorforth.com