\ This file contains a proof-of-concept dot-parser recognizer.  It is
\ modeled on a subset of the ClassVFX as described in Chapter 29 of
\ the VFX Forth manual.  The subset is reduced to the minimum
\ necessary for the demonstration.  This code has been implemented
\ without looking at the ClassVFX implementation.

\ This code is in the public domain

\ Tested successfully with gforth 0.7.9_20200917, invoked as follows:
\ gforth test/ttester.fs ~/forth/dot-parser/dot-parser.fs -e "cr bye"

\ You define
\ 
\ type: <type-name>
\   <type-name> <field-name> \ int: is predefined as type-nme
\   ...
\ end-type
\ 
\ instance: <type-name> <instance-name>
\ 
\ Every type has a separate name space.  You can then write
\ 
\ ( addr ) <type-name>.<field-name>.<field-name>
\ 
\ to get to the address of the final field in the sequence.  The
\ sequence of fields can be arbitrarily long.
\ 
\ Fields for user-defined types contain addresses, not the structures
\ themselves; I think that in ClassVFX they contain the structures,
\ but implementing that would be too easy: without the address
\ indirection, you can access the final field through a single offset,
\ while with the indirection you need to deal with an arbitrary
\ number.  So I demonstrate how to solve the harder problem.

\ proof-of-concept ClassVFX implementation

: type: ( "type-name" -- old-current type-addr 0 )
    \ defines type; first cell contains wordlist, second the size
    get-current wordlist
    create dup set-current here swap , 0 , 0
  does> ( type-addr uoffset1 "field-name" -- type-addr uoffset2 )
    \ defines field; first cell contains offset, the second the type
    create over , , cell+
  does> ( addr -- addr+uoffset1 )
    @ + ;

: end-type ( old-current type-addr usize -- )
    \ store size of type
    swap cell+ ! set-current ;

type: int:
end-type

: instance: ( "type-name" "instance-name" -- )
    ' >body cell+ @ create allot ;

\ Usage examples for the ClassVFX implementation above:

type: point:
  int: x
  int: y
end-type

type: line:
  point: start
  point: end
end-type

instance: point: p1
instance: point: p2
instance: line: l1

t{ <line:> 2@ drop ( ... wordlist-words ) -> 2 cells }t
t{ <point:> 2@ drop ( ... wordlist-words ) -> 2 cells }t
t{ "start" <line:> @ find-name-in name>interpret >body 2@ -> <point:> 0 }t

\ dot-parser recognizer

\ We cannot combine the offsets beforehand, so we have to store them
\ on the stack as an arbitrarily large thing.  We do this by storing
\ the sequence of xts to execute for performing the dot-parsed
\ sequence.

: dp-int ( ... xt1 .. xtn n -- ... )
    \ remove xt1 .. xtn n from the data stack, then execute xt1 .. xtn.
    dup if
	swap >r 1- recurse r> execute exit then
    drop ;

t{ 2 3 4 ' + ' * 2 dp-int -> 14 }t

: dp-comp ( xt1 .. xtn n -- )
    \ compile, xt1 .. xtn, in this order
    dup if
	swap >r 1- recurse r> compile, exit then
    drop ;

t{ 2 3 4 :noname [ ' + ' * 2 dp-comp ] ; execute }t

: dp-lit1 ( x1 .. xn n -- )
    \ compile x1 .. xn as literals, in this order
    dup if
	swap >r 1- recurse r> postpone literal exit then
    drop ;

t{ :noname [ 2 3 4 3 dp-lit1 ] ; execute -> 2 3 4 }t

: dp-lit ( xt1 .. xtn n -- )
    \ compile xt1 .. xtn n as literals
    dup >r dp-lit1 r> postpone literal ;

t{ :noname [ 2 3 4 3 dp-lit ] ; execute -> 2 3 4 3 }t

' dp-int ' dp-comp ' dp-lit rectype: rectype-dp

: split ( c-addr1 u1 c-addr2 u2 -- c-addr3 u3 c-addr4 u4 true | c-addr1 u1 false )
    \ If c-addr2 u2 is found in c-addr1 u1, return true, and c-addr3
    \ u3 and caddr4 u4 are the parts to the left and the right of the
    \ found string.  If not return c-addr1 u1 and false
    2over 2>r dup >r search if
	over swap r> /string 2r> 2swap 2>r drop tuck - 2r> true
    else
	r> drop 2r> 2drop false
    then ;

t{ "abcdefgh" "cd" split -rot "efgh" str= 2swap "ab" str= -> true true true }t
t{ "abcdefgh" "cc" split -rot "abcdefgh" str= -> false true }t

: rec-dot-parser ( c-addr u -- xt1 ... xtn n rectype-dp | rectype-null )
    \ this leaves out the handling of a number of cases resulting in
    \ rectype-null in the interest of showing the successful case more clearly
    s" ." split 0= if
	2drop rectype-null exit then
    2swap find-name \ !! deal with not-found and not-<typename>
    name>interpret >body @ >r 1 -rot begin ( xt1 .. xtn n c-addr1 u1 r:wid )
	s" ." split while
	    2swap r> find-name-in \ !! deal with not-found and not-<fieldname>
	    name>interpret dup >body cell+ @ @ >r
	    -rot 2>r ['] @ rot 2 + 2r>
    repeat
    r> find-name-in \ !! deal with not-found and not-<fieldname>
    name>interpret swap rectype-dp ;

t{ "\\" rec-dot-parser -> rectype-null }t
t{ "line:.start" rec-dot-parser rot drop -> 1 rectype-dp }t
t{ "line:.start.x" rec-dot-parser rot drop 3 roll drop -> ' @ 3 rectype-dp }t

\ use this recognizer last, because it cannot handle regular words
\ containing '.'.
' rec-dot-parser get-recognizers 1+ set-recognizers

\ dot-parser example

p1 l1 line:.start !
p2 l1 line:.end !
5  l1 line:.start.x !
8  l1 line:.start.y !
9  l1 line:.end.x !
3  l1 line:.end.y !

: foo line:.start.y @ ;
: bar postpone line:.start.x ; immediate

t{ l1 line:.end.x @ -> 9 }t
t{ l1 foo -> 8 }t
t{ :noname l1 bar @ ; execute -> 5 }t