\ compiler.fs -- a simplified Prolog -> Warren Abstract Machine (WAM) compiler \ \ Copyright (C) December 2008 and January 2009 \ Adrian Prantl and Gergö Barany \ \ This program is free software: you can redistribute it and/or modify \ it under the terms of the GNU General Public License as published by \ the Free Software Foundation, either version 3 of the License, or \ (at your option) any later version. \ \ This program is distributed in the hope that it will be useful, \ but WITHOUT ANY WARRANTY; without even the implied warranty of \ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the \ GNU General Public License for more details. \ \ You should have received a copy of the GNU General Public License \ along with this program. If not, see \ \ Features: \ \ This file contains a recursive-descent parser for a subset of Prolog \ that generates WAM instructions on-the-fly, using a lookahead of 1 goal. \ It copes with facts, clauses and queries. Supported primitives are \ atoms, lists, named and anonymous variables, the :- operator and structures. \ Definitely NOT supported are integers, DCGs and infix operators apart from \ the ":-". \ The generated code tries to follow the one described in [Warren 1983] as \ closely as possible. The most significant difference is that the machine \ code ist output in postfix notation. This makes it possible to directly \ evaluate the generated code as forth words. The virtual machine \ implementation can be found in the file 'wam.fs'. \ This file also contains a toplevel shell that allows for readline-like \ command editing. \ \ Things to try out: \ \ Start the prolog shell with \ $ gforth wam.fs compiler.fs -e queries \ ... \ Enter queries, one per line; empty line when you're done. \ ?- concatenate(A,B,[a,b]). \ B = [a, b] \ A = [] \ ; \ B = [b] \ A = [a] \ ; \ B = [] \ A = [a, b] \ ; \ No (further) solutions \ ?- \ \ Type ';' to ask the system for more solutions. \ \ \ Literature: \ \ [Warren 1983] David H. D. Warren. "An abstract Prolog instruction set". \ Technical Note 309, SRI International, Menlo Park, CA, October 1983. Create VAR-Dict table , \  Create a case-sensitive wordlist Create X 0 , \ FIXME: use a struct instead Create Y 0 , Create A 0 , Create cur-mode 0 , Create compile-buf-start 4096 chars allot Create compile-buf compile-buf-start , Create last-clause 256 chars allot Create last-clause-u 0 , Create last-nargs 0 , Create clause# 1 , Create tailcall-flag 0 , Create tracing -1 , defer term defer structure defer cons defer cons, defer bind-new-var : not ( b b ) invert ; : nand ( b b ) invert and ; : set? ( u bit -- b ) tuck and = ; \ check whether bit is set : 3drop 2drop drop ; : 3dup { a b c } a b c a b c ; : inc ( addr -- u ) dup @ dup 1+ ( addr old new ) rot ! ; : dec ( addr -- ) dup @ 1- ( addr new ) swap ! ; : newA ( -- u ) A inc ; : newX ( -- u ) X inc ; : lastX ( -- u ) X @ 1- ; : x-mode 1 ; \ Compile into X-regs : a-mode 2 ; \ Compile into A-regs : c-mode 4 ; \ Compile against A-regs (clause mode) : FutureArg 256 ; \ Register assigned by the regalloc prepass : DirectArg 512 ; \ Register was assigned in the head : head? ( -- b ) cur-mode @ c-mode set? ; : query? ( -- b ) s" (toplevel)" last-clause last-clause-u @ compare 0 = ; : tailcall? ( -- b ) tailcall-flag @ ; \  Compilation buffer handling : uc->c+str { u c -- "cuuu(decimal)" } \ works only for u in 0-99 noname create here { addr } addr 3 chars allot c over ! 1 chars + u 10 / dup 0 > if [Char] 0 + over ! 1 chars + else drop endif u 10 mod [Char] 0 + over ! 1 chars + addr - addr swap ; : cb-next ( -- ) compile-buf @ 1 chars + compile-buf ! ; : push-ws ( -- ) ( ." " ) 32 compile-buf @ ! cb-next ; : push-cr ( -- ) ( cr ) 10 compile-buf @ ! cb-next ; : push-xt, ( addr u -- ) \  push the xt^H^H the string on the compile-buf \ 2dup type \ sadly doesn't work... \ nextname ' ( xt ) { u } compile-buf @ u cmove compile-buf @ u chars + compile-buf ! ; : push-xt ( addr u -- ) \  push the xt^H^H the string on the compile-buf push-xt, push-ws \ debug.. \ compile-buf @ u chars - u type cr ; : push-xt; ( addr u -- ) push-xt, push-cr ; : yreg ( u -- ) [Char] Y uc->c+str push-xt ; : xreg ( u -- ) [Char] X uc->c+str push-xt ; : areg ( u -- ) [Char] A uc->c+str push-xt ; : push-functor { addrF uF nargs -- } \ build and push 'f/3' addrF uF push-xt, nargs [Char] / uc->c+str push-xt, ; : push-comment; ( addr u -- ) s" (" push-xt push-xt s" )" push-xt; ; : push-clause-name ( addr u nargs ) push-functor s" -clause" push-xt, clause# @ [Char] - uc->c+str push-xt, ; : end-last-clause last-clause-u @ 0 > if \ let the last one fail s" : " push-xt, last-clause last-clause-u @ last-nargs @ push-clause-name s" pl-fail ;" push-xt; endif ; : clause-header { addr u nargs } addr u last-clause last-clause-u @ compare 0 <> if \ cr \ addr u type cr \ last-clause last-clause-u @ type cr \ cr end-last-clause nargs last-nargs ! \ inc 1 clause# ! addr last-clause u cmove u last-clause-u ! endif clause# inc drop \ s" Create " push-xt, addr u nargs push-clause-name push-cr clause# dec s" : " push-xt, addr u nargs push-clause-name s" " push-xt; ( debugging ) s\" tracing flag-set? if .\" \" cr endif" push-xt; clause# inc drop ( Backtracking ) s" (toplevel)" addr u cr ( 2dup type cr cr ) compare 0 = if s\" s\" ' pl-fail\" try_me_else" push-xt; else s\" s\" '" push-xt addr u nargs push-clause-name s\" \" try_me_else" push-xt; endif ( Trail stack management ) query? not if s" wam-allocate" push-xt; endif ; \ ---------------------------------------------------------------------- \ SCANNING \ ---------------------------------------------------------------------- : between ( c c1 c2 -- b ) rot tuck ( c1 c c2 c ) >= -rot ( b c1 c ) <= and ; : peek ( addr u -- addr u c ) \ dup invert throw \ assert u>0 over c@ \ dup emit .\" peeked\n" ; : peek-next ( addr u -- addr u c ) over 1+ c@ ; : expect { addr u c -- addr u } u 0 <= throw addr c@ c <> throw addr chars 1+ u 1- ; : alpha? { c -- b } c [Char] a [Char] z between c [Char] . = or c [Char] _ = or ; : vALPHA? ( c -- b ) [Char] A [Char] Z between ; : Num? ( c -- b ) [Char] 0 [Char] 9 between ; : (? ( c -- b ) [Char] ( = ; : )? ( c -- b ) [Char] ) = ; : [? ( c -- b ) [Char] [ = ; : ]? ( c -- b ) [Char] ] = ; : |? ( c -- b ) [Char] | = ; : ,? ( c -- b ) [Char] , = ; : .? ( c -- b ) [Char] . = ; : _? ( c -- b ) [Char] _ = ; : ws? 32 = ; : AlphaNum? ( c -- b ) dup ( c c ) alpha? ( c b ) swap ( b c ) dup ( b c c ) vALPHA? ( b c b ) swap ( b b c ) Num? ( b b c ) or or ; : Arrow? ( addr u -- addr u b ) peek [Char] : = >r peek-next [Char] - = r> and ; : Nil? ( addr u -- addr u b ) peek [Char] [ = >r peek-next [Char] ] = r> and ; : next-char ( addr u -- addr u ) swap chars 1+ swap 1- ; : prev-char ( addr u -- addr u ) swap chars 1- swap 1+ ; : skip-ws ( addr u -- addr u ) dup 0 <> if begin dup 0 <> >r peek ws? r> and while next-char repeat endif ; : scan-tok ( addr u -- addrRest u addrTok u ) 2dup dup 1+ 1 +do ( addr u addr u ) drop i peek ( addr u addr i c ) AlphaNum? not if 1- leave endif swap chars 1+ swap loop { addrTok u addrRest tok_length } \ ." TOK: " addrTok tok_length type cr addrRest u tok_length - addrTok tok_length ; : until) ( addr u -- addr u ) \ skip til matching ') dup 0= throw \ eof 0 begin ( addr u cnt ) >r \ 2dup type cr peek (? if r> 1+ >r else peek )? if r> 1- >r endif endif next-char r@ r> 0 = until drop ; : until] ( addr u -- addr u ) \ skip til matching '] dup 0= throw \ eof 0 begin ( addr u cnt ) >r \ 2dup type cr peek [? if r> 1+ >r else peek ]? if r> 1- >r endif endif next-char r@ r> 0 = until drop ; : IfxExp? ( addr u -- addr u b ) 2dup scan-tok 2drop peek (? if until) endif skip-ws Arrow? -rot 2drop ; \ Prepass: count #arguments, allocate registers for variables. : regalloc ( i addr u -- ) peek vAlpha? if rot FutureArg or -rot bind-new-var else 3drop endif ; : num-args-regalloc ( addr u -- addr u nargs ) \ count the number of args of a functor 2dup next-char 1 >r begin skip-ws scan-tok 2drop peek (? if until) endif skip-ws peek [? if until] endif skip-ws peek ,? if r> 1+ >r next-char skip-ws endif peek )? until \ register allocation for VAR-Dict next-char skip-ws dup 0 > if \ eof? Arrow? dup if >r next-char r> endif >r peek ,? r> or if next-char skip-ws scan-tok 2drop peek (? if \ functor next-char 1 >r begin skip-ws scan-tok r@ -rot regalloc peek (? if until) endif skip-ws peek [? if until] endif skip-ws peek ,? if r> 1+ >r next-char skip-ws endif peek )? until r> drop endif endif endif 2drop r> ; \ \\\\\\\\\\\\\\\ : drop3rd rot drop ; \ collect all arguments of a functor : args { mode addr u -- ... mode count addr u } 0 >r addr u prev-char begin r> 1+ >r mode r@ 8 lshift or -rot next-char term drop3rd peek ,? not until mode r> 2swap ; : paren ( addr u xt -- addr u ) >r next-char r> execute skip-ws peek )? not throw next-char ; : lowermode drop3rd x-mode -rot ; : nil { mode addr u -- addr u mode addr u } s" []" mode addr u ; \ cons for the [a,b,c] syntax \ the difference is that ']' generates an implicit '|[]' : cons,' { mode addr u -- ... mode addr u } mode addr u next-char peek ]? if next-char nil else \ Head lowermode term skip-ws \ Tail peek ]? if next-char nil else \ end of list peek ,? if cons, else cr ." SYNTAX ERROR: expected ']' or ',' before " type cr bye endif endif 2>r drop mode 2 s" ." structure mode 2r> endif ; ' cons,' is cons, : cons' { mode addr u -- ... mode addr u } mode addr u next-char peek ]? if next-char nil else \ Head lowermode term skip-ws \ Tail peek ]? if next-char nil else \ end of list peek ,? if cons, else peek |? if next-char term peek ]? not throw next-char else \ if next-char Nil? if next-char else prev-char endif cons else cr ." SYNTAX ERROR: expected ']', '|' or ',' before " type cr bye endif endif endif 2>r drop mode 2 s" ." structure mode 2r> endif ; ' cons' is cons \ ---------------------------------------------------------------------- \ COMPILATION \ ---------------------------------------------------------------------- \ data types : atom? peek alpha? ; : var? peek vALPHA? ; : nil? ( addr u -- addr u b ) dup 1 > if peek [? -rot peek-next ]? and else 0 endif ; : reg? over 0 = ; : void? over 1 = ; \ register allocator : find-var ( addr u -- xt? b ) \ 2dup ." searching for " type cr VAR-Dict search-wordlist ; : bind-new-var' ( val addr u -- ) 2dup find-var if \ update nip nip execute ! else \ bind new get-current { old } VAR-Dict set-current \ ." --> binding " 2dup type ." to " rot dup . -rot cr nextname create , \ alloc and initialize old set-current endif ; ' bind-new-var' is bind-new-var : ->ar ( u -- u ) FutureArg nand ; : unify-temporary ( addr u ) 2dup newX -rot bind-new-var lastX Yreg s" unify_variable" push-xt push-comment; ; : future-arg? ( u -- u b ) dup FutureArg set? ; : direct-arg? ( u -- u b ) dup DirectArg set? ; : compile-var-struct { i addr u } addr u find-var if \ already alloc'd execute @ future-arg? if \ needed in future Ax dup ->ar i < if \ Ax is not live any more \ use the target Ax register directly dup ->ar Areg s" unify_variable" push-xt addr u push-comment; \ mark as directly alloc'd DirectArg or addr u bind-new-var \ s" direct " push-comment; else drop addr u unify-temporary endif else ->ar Yreg s" unify_value" push-xt; endif else \   create new addr u unify-temporary endif ; : getput ( -- ) head? if s" get_" else s" put_" endif push-xt, ; : getunify ( -- ) head? if s" get_" else s" unify_" endif push-xt, ; : getput-variable ( -- ) head? if s" get_variable" else s" put_new_variable" endif push-xt, ; : getput-value ( -- ) getput s" value" push-xt ; : getput-list ( -- ) getput s" list" push-xt ; : getput-atom ( -- ) getput s" atom" push-xt ; : getput-nil ( -- ) getput s" nil" push-xt ; : getput-structure ( -- ) getput s" structure" push-xt ; : getunify-value ( -- ) getunify s" value" push-xt ; : getunify-nil ( -- ) getunify s" nil" push-xt ; : getunify-void ( -- ) s" 1 unify_void ( _ )" push-xt ; : copy-temporary { i addr u -- } query? if \ head? newX addr u bind-new-var lastX yreg i areg getput-variable addr u lastX register-query-var addr u push-comment; else newX addr u bind-new-var lastX yreg i areg getput-variable addr u push-comment; endif ; : compile-var-arg { i addr u -- } \ addr u push-comment; addr u find-var if \  already alloc'd execute @ direct-arg? if \ skip (already unified) ->ar addr u bind-new-var else future-arg? head? nand if drop \ ignore i addr u copy-temporary else dup ->ar i = if \ skip (identical) copy and mark as directly alloc'd ->ar DirectArg or addr u bind-new-var \ s" direct " push-comment; else future-arg? if ->ar addr u copy-temporary else Yreg i Areg getput-value endif addr u push-comment; endif endif endif else \  create/use new temporary i addr u copy-temporary endif ; : compile-structval ( i addr u -- ) reg? if Xreg getunify-value 2drop push-cr else void? if getunify-void 3drop push-cr else var? if compile-var-struct else nil? if 3drop getunify-nil push-cr else -1 throw endif endif endif endif ; : compile-arg ( i addr u -- ) \ 3dup ." \ compiling " type ." , #" . cr reg? if 3drop ( ." set_value A" A @ . cr ) else void? if 3drop else atom? if throw else \ 0 -rot structure var? if compile-var-arg else nil? if 2drop Areg getput-nil push-cr else -1 throw endif endif endif endif endif ; : 2roll ( ... n -- ... ) \ roll n pairs of arguments on the stack dup + 1- dup >r roll r> roll ; : for-each-arg ( compilation: xt -- ; run-time: addr u ... nargs -- ) { xt } 1 postpone literal postpone A postpone ! \  FIXME do something better 0 postpone literal postpone swap postpone u-do postpone i postpone 2roll \ reverse argument list postpone newA postpone -rot \ narg xt compile, 1 postpone literal postpone -loop ; : isList? { nargs addr u } nargs 2 = u 1 = addr c@ [Char] . = and and ; \ Argument order: \ head? -> outer .. inner \ query,clause? -> inner .. outer : quote-atom ( addr u -- ) s\" s\"" push-xt push-xt, s\" \"" push-xt, ; : structure' ( ... ) { mode nargs addrF uF -- 0 u } mode cur-mode ! nargs 0 = if addrF uF quote-atom s" " push-xt endif mode x-mode set? if newX Xreg else mode 8 rshift Areg endif nargs 0 = if getput-atom else nargs addrF uF isList? if getput-list else addrF uF quote-atom nargs 32 uc->c+str push-xt getput-structure endif endif push-cr nargs [ ' compile-structval for-each-arg ] 0 lastX ; ' structure' is structure : head ( ... ) { mode nargs addrF uF } mode cur-mode ! nargs [ ' compile-arg for-each-arg ] ; : push-call ( addrF uF nargs -- ) tailcall? if query? not if s" wam-deallocate" push-xt; endif s\" s\" '" push-xt push-functor s\" -clause-1\" pl-execute" push-xt; else s\" s\" '" push-xt push-functor s\" -clause-1\" pl-call" push-xt; endif ; : query ( ... ) { mode nargs addrF uF } mode nargs addrF uF head addrF uF nargs push-call ; : goal ( ... ) { mode nargs addrF uF } mode cur-mode ! nargs [ ' compile-arg for-each-arg ] addrF uF nargs push-call ; : functor|atom ( mode addr u -- ... mode addr u ) scan-tok { mode addrR uR addrF uF } addrR uR peek (? if \ functor x-mode -rot ['] args paren ( args... ) { _mode nargs addrR1 uR1 } mode nargs addrF uF structure \ push reg mode addrR1 uR1 else \ atom 2drop mode 0 addrF uF structure \ push reg mode addrR uR endif ; : void { mode addr u -- 1 u mode addr u } 1 u mode addr u next-char ; : var ( mode addr u -- addr u mode addr u ) scan-tok { mode addr u addrV uV } \ 2dup ." set_variable " type cr addrV uV mode addr u \ push VAR ; : term' ( mode addr u -- ... mode addr u ) skip-ws peek _? if void else peek dup alpha? swap Num? or if functor|atom else var? if var else peek (? if ['] term paren else peek [? if cons else throw endif endif endif endif endif \ IfxOp? if operator endif \ throw \ "if u != 0" throw ; ' term' is term : init ( nargs -- ) 2 + X ! 0 A ! ; : freshdict \ set the marker get-current { old } VAR-Dict set-current s" new-dict" nextname marker old set-current ; : cleardict \ clear VAR-Dict dictionary get-current { old } VAR-Dict set-current s" new-dict" VAR-Dict search-wordlist if execute endif old set-current ; : set-tailcall-flag ( addr u -- ) 2dup skip-ws peek .? tailcall-flag ! 2drop ; : compile-query ( addr u -- ) s" (toplevel)" 0 clause-header 1 clause# ! clear-query-vars a-mode cur-mode ! skip-ws scan-tok { addrF uF } peek (? if \ functor num-args-regalloc init cur-mode @ -rot ['] args paren set-tailcall-flag { addrR uR } addrF uF query addrR uR \ push X else \ atom 0 init cur-mode @ 0 addrF uF query \ push ATOM endif s" ;" push-xt; ; : compile-head ( addr u -- addr u ) c-mode cur-mode ! skip-ws scan-tok { addrF uF } peek (? if \ functor num-args-regalloc dup { nargs } init addrF uF nargs clause-header cur-mode @ -rot ['] args paren { addrR uR } addrF uF head addrR uR \ push X else \ atom 0 init addrF uF 0 clause-header cur-mode @ 0 addrF uF head \ push ATOM compile-arg endif ; : compile-body ( addr u -- addr u ) a-mode cur-mode ! skip-ws scan-tok { addrF uF } peek (? if \ functor cur-mode @ -rot ['] args paren \  process args set-tailcall-flag { addrR uR } addrF uF goal \  compile.. addrR uR \ push X else \ atom 0 init cur-mode @ 0 addrF uF query \ push ATOM endif ; : compile-clause ( addr u -- ) freshdict skip-ws Arrow? if \ query next-char next-char compile-query else IfxExp? if \ clause compile-head skip-ws Arrow? not throw next-char next-char begin skip-ws compile-body skip-ws peek ,? while next-char repeat else \ fact compile-head query? not if s" wam-deallocate" push-xt; endif s" proceed" push-xt; endif [Char] . expect s" ; " push-xt; clause# dec \ s" '" push-xt last-clause last-clause-u @ last-nargs @ push-clause-name s" . cr" push-xt; clause# inc drop type endif cleardict ; : eval compile-buf-start compile-buf @ over - ( Debug print ) \ 2dup type ( let gforth compile the clause ) evaluate ( run, if query ) query? if cr \ so we are not on the same line as the "redefined" messages s" ' (toplevel)/0-clause-1 prolog-shell" evaluate endif ; : compile ( addr u -- ) ( echo ) 2dup cr type cr ( Clear buffer ) compile-buf-start compile-buf ! ( Compile ) compile-clause ( Debug print ) compile-buf-start compile-buf @ over - type clearstack ; \ ---------------------------------------------------------------------- \ SHELL \ ---------------------------------------------------------------------- 256 constant max-line create clause-input-buf max-line chars allot : clauses ( -- ) cr ." Enter clauses, one per line; empty line when you're done." cr begin ." > " clause-input-buf max-line accept ( input-len ) dup 0<> while clause-input-buf swap compile eval repeat drop ; : queries ( -- ) cr ." Enter queries, one per line; empty line when you're done." cr tracing off begin ." ?- " clause-input-buf max-line accept ( input-len ) dup 0<> while interactive-mode on clause-input-buf swap compile-query eval repeat drop interactive-mode off tracing on ; \ concatenate([], L, L). \ concatenate([X|L1], L2, [X|L3]) :- concatenate(L1, L2, L3). \ \ concatenate/3: switch_on_term C1a, C1, C2, fail \ \ C1a: try_me_else C2a \ C1: get_nil A1 \ get_value A2, A3 \ proceed \ \ C2a: trust_me_else fail \ C2: get_list A1 \ unify_variable X4 \ unify_variable A1 \ get_list A3 \ unify_variable X4 \ unify_variable A3 \ execute concatenate/3 \ ---------------------------------------------------------------------- \ TESTS \ ---------------------------------------------------------------------- s" :- test" scan-tok clearstack s" :- atom1" compile s" :- f(a,b)" compile s" :- f(f(a,b),g(c,h(d,e)))" compile s" :- p(Z,h(Z,W),f(W))" compile s" :- f([])" compile s" :- f([X])" compile s" :- f(.(a,.(b,.(c,[]))))" compile s" :- f([a,b,c])" compile s" :- do(parse(s(np,vp),[birds,fly,[]]))" compile s" :- f([X|[]])" compile s" :- f([X|Xs])" compile s" missing_feature(garbage_collection)." compile eval s" missing_feature(bug_fixes)." compile eval s" concatenate([],L,L)." compile eval s" concatenate([X|L1],L2,[X|L3]) :- concatenate(L1,L2,L3)." compile eval s" :- concatenate([],[],[])." compile eval s" :- concatenate([],X,X)." compile eval s" :- concatenate([a,b],[c],X)." compile eval s" :- concatenate(A,B,[a,b])." compile eval s" split([], L, [], [])." compile eval s" split([X|Xs], X, [X|Ls], Gs) :- split(Xs, X, Ls, Gs)." compile eval s" split([L|Xs], X, [L|Ls], Gs) :- compare_terms(less, L, X), split(Xs, X, Ls, Gs)." compile eval s" split([G|Xs], X, Ls, [G|Gs]) :- compare_terms(greater, G, X), split(Xs, X, Ls, Gs)." compile eval s" :- split([], a, A, B)." compile eval s" qsort([],R,R)." compile eval s" qsort([X|L],R0,R) :- split(L,X,L1,L2), qsort(L1,R0,[X|R1]), qsort(L2,R1,R)." compile eval s" :- qsort([a], [], R)." compile eval s" :- qsort([], [a,b,c], R)." compile eval s" :- qsort([a,b,c], [], R)." compile eval s" :- qsort([b,a,c], [], R)." compile eval s" :- compare_terms(Y,a,b)." compile eval s" member(X,[X|_])." compile eval s" member(X,[_|L]) :- member(X,L)." compile eval s" :- member(X,[a,b,c])." compile eval