\ This file contains a proof-of-concept dot-parser recognizer. It is
\ modeled on a subset of the ClassVFX as described in Chapter 29 of
\ the VFX Forth manual. The subset is reduced to the minimum
\ necessary for the demonstration. This code has been implemented
\ without looking at the ClassVFX implementation.
\ This code is in the public domain
\ Tested successfully with gforth 0.7.9_20200917, invoked as follows:
\ gforth test/ttester.fs ~/forth/dot-parser/dot-parser.fs -e "cr bye"
\ You define
\
\ type:
\ \ int: is predefined as type-nme
\ ...
\ end-type
\
\ instance:
\
\ Every type has a separate name space. You can then write
\
\ ( addr ) ..
\
\ to get to the address of the final field in the sequence. The
\ sequence of fields can be arbitrarily long.
\
\ Fields for user-defined types contain addresses, not the structures
\ themselves; I think that in ClassVFX they contain the structures,
\ but implementing that would be too easy: without the address
\ indirection, you can access the final field through a single offset,
\ while with the indirection you need to deal with an arbitrary
\ number. So I demonstrate how to solve the harder problem.
\ proof-of-concept ClassVFX implementation
: type: ( "type-name" -- old-current type-addr 0 )
\ defines type; first cell contains wordlist, second the size
get-current wordlist
create dup set-current here swap , 0 , 0
does> ( type-addr uoffset1 "field-name" -- type-addr uoffset2 )
\ defines field; first cell contains offset, the second the type
create over , , cell+
does> ( addr -- addr+uoffset1 )
@ + ;
: end-type ( old-current type-addr usize -- )
\ store size of type
swap cell+ ! set-current ;
type: int:
end-type
: instance: ( "type-name" "instance-name" -- )
' >body cell+ @ create allot ;
\ Usage examples for the ClassVFX implementation above:
type: point:
int: x
int: y
end-type
type: line:
point: start
point: end
end-type
instance: point: p1
instance: point: p2
instance: line: l1
t{ 2@ drop ( ... wordlist-words ) -> 2 cells }t
t{ 2@ drop ( ... wordlist-words ) -> 2 cells }t
t{ "start" @ find-name-in name>interpret >body 2@ -> 0 }t
\ dot-parser recognizer
\ We cannot combine the offsets beforehand, so we have to store them
\ on the stack as an arbitrarily large thing. We do this by storing
\ the sequence of xts to execute for performing the dot-parsed
\ sequence.
: dp-int ( ... xt1 .. xtn n -- ... )
\ remove xt1 .. xtn n from the data stack, then execute xt1 .. xtn.
dup if
swap >r 1- recurse r> execute exit then
drop ;
t{ 2 3 4 ' + ' * 2 dp-int -> 14 }t
: dp-comp ( xt1 .. xtn n -- )
\ compile, xt1 .. xtn, in this order
dup if
swap >r 1- recurse r> compile, exit then
drop ;
t{ 2 3 4 :noname [ ' + ' * 2 dp-comp ] ; execute }t
: dp-lit1 ( x1 .. xn n -- )
\ compile x1 .. xn as literals, in this order
dup if
swap >r 1- recurse r> postpone literal exit then
drop ;
t{ :noname [ 2 3 4 3 dp-lit1 ] ; execute -> 2 3 4 }t
: dp-lit ( xt1 .. xtn n -- )
\ compile xt1 .. xtn n as literals
dup >r dp-lit1 r> postpone literal ;
t{ :noname [ 2 3 4 3 dp-lit ] ; execute -> 2 3 4 3 }t
' dp-int ' dp-comp ' dp-lit rectype: rectype-dp
: split ( c-addr1 u1 c-addr2 u2 -- c-addr3 u3 c-addr4 u4 true | c-addr1 u1 false )
\ If c-addr2 u2 is found in c-addr1 u1, return true, and c-addr3
\ u3 and caddr4 u4 are the parts to the left and the right of the
\ found string. If not return c-addr1 u1 and false
2over 2>r dup >r search if
over swap r> /string 2r> 2swap 2>r drop tuck - 2r> true
else
r> drop 2r> 2drop false
then ;
t{ "abcdefgh" "cd" split -rot "efgh" str= 2swap "ab" str= -> true true true }t
t{ "abcdefgh" "cc" split -rot "abcdefgh" str= -> false true }t
: rec-dot-parser ( c-addr u -- xt1 ... xtn n rectype-dp | rectype-null )
\ this leaves out the handling of a number of cases resulting in
\ rectype-null in the interest of showing the successful case more clearly
s" ." split 0= if
2drop rectype-null exit then
2swap find-name \ !! deal with not-found and not-
name>interpret >body @ >r 1 -rot begin ( xt1 .. xtn n c-addr1 u1 r:wid )
s" ." split while
2swap r> find-name-in \ !! deal with not-found and not-
name>interpret dup >body cell+ @ @ >r
-rot 2>r ['] @ rot 2 + 2r>
repeat
r> find-name-in \ !! deal with not-found and not-
name>interpret swap rectype-dp ;
t{ "\\" rec-dot-parser -> rectype-null }t
t{ "line:.start" rec-dot-parser rot drop -> 1 rectype-dp }t
t{ "line:.start.x" rec-dot-parser rot drop 3 roll drop -> ' @ 3 rectype-dp }t
\ use this recognizer last, because it cannot handle regular words
\ containing '.'.
' rec-dot-parser get-recognizers 1+ set-recognizers
\ dot-parser example
p1 l1 line:.start !
p2 l1 line:.end !
5 l1 line:.start.x !
8 l1 line:.start.y !
9 l1 line:.end.x !
3 l1 line:.end.y !
: foo line:.start.y @ ;
: bar postpone line:.start.x ; immediate
t{ l1 line:.end.x @ -> 9 }t
t{ l1 foo -> 8 }t
t{ :noname l1 bar @ ; execute -> 5 }t