: is-a ( c "name" -- ) create here 1 chars allot c! DOES> c@ = ; : match. ( c -- ) drop -1 ; : getcompxt ( c -- ) dup '. = if drop ['] match. else noname is-a latestxt endif ; : getChar ( u-addr u -- c ) chars + c@ ; : Charclass ( -- ) create here 256 chars allot 256 chars erase DOES> + c@ 1 = ; : NegCharclass ( -- ) create here 256 chars allot 256 chars erase DOES> + c@ 0 = ; : getClassXt ( rs pos rl -- pos' xt ) \ get execution token for a char class { rs pos rl } pos chars rs + c@ '^ = if noname NegCharclass latestxt pos 1+ else noname Charclass latestxt pos endif swap { xt } BEGIN { pos } pos rs getChar { c } c '- = if pos 1- rs getChar { ug } pos 1+ rs getChar { og } ug emit og emit og ug ?DO 1 i chars xt >body + c! LOOP else 1 c chars xt >body + c! endif pos 1+ c '] = UNTIL xt swap ; 0 Value regstart 0 Value NNode : allocregspace 500 cells allocate drop dup 500 cells erase ; : nodes ( n -- n ) 10 cells * ; : getNumOfEdges ( n -- n ) nodes regstart + @ ; : setNumOfEdges ( n e -- ) swap nodes regstart + ! ; : getPosOfEdge ( n e -- a-addr ) swap nodes regstart + swap 2 cells * + cell+ ; : getXtOfEdge ( n e -- xt ) getPosOfEdge @ ; : setXtOfEdge ( xt n e -- ) getPosOfEdge ! ; : getGoalOfEdge ( n e -- n ) getPosOfEdge cell+ @ ; : setGoalOfEdge ( g n e -- ) getPosOfEdge cell+ ! ; : NewNode ( -- n ) NNode 1+ dup TO NNode ; : NewEdge ( xt n g -- ) { xt n g } xt n dup getNumOfEdges getPosOfEdge ! g n dup getNumOfEdges getPosOfEdge cell+ ! \ Verweis auf naechsten Knoten n n getNumOfEdges 1+ setNumOfEdges ; : lastGroupClosed ( s e c num -- s e c num f ) dup 0 > if >r dup r> swap else 0 endif ; : dropGroupIfClosed ( s e c num -- s e c num | num-1 ) lastGroupClosed if >r 2drop drop r> 1- endif ; : NewGroup ( num b -- b -1 0 num+1 ) { b } dropGroupIfClosed 1+ >r b -1 0 r> ; : EndGroup ( s e c num en -- s en c num ) { en } dropGroupIfClosed { c num } drop en c num ; : CloseGroup ( c num -- -1 num ) dropGroupIfClosed >r drop -1 r> ; : CreateStub allocregspace to regstart \ 500 Zellen fuer Graphen reservieren -1 0 1 NewEdge \ epsilon zum 2. Node ['] match. 0 0 NewEdge \ beim 1. Node eine Kante einfuegen die . matcht 0 1 NewGroup 1 TO NNode ; : process* lastGroupClosed invert if ." Fehlerhafter Ausdruck" -1 EXIT endif >r drop { Nbeg Nend } r> -1 Nend NewNode NewEdge -1 Nbeg NNode NewEdge -1 Nend Nbeg NewEdge NNode ; : process+ lastGroupClosed invert if ." Fehlerhafter Ausdruck" -1 EXIT endif >r drop { Nbeg Nend } r> -1 Nend NewNode NewEdge -1 Nend Nbeg NewEdge NNode ; : process? lastGroupClosed invert if ." Fehlerhafter Ausdruck" -1 EXIT endif >r drop { Nbeg Nend } r> -1 Nbeg Nend NewEdge Nend ; : process( { CurNode pos } -1 CurNode NewNode NewEdge NNode NewGroup NNode pos 1+ ; : process) { CurNode pos } dropGroupIfClosed { te tc tn } te tc tn te -1 = if -1 CurNode NewNode NewEdge NNode EndGroup CloseGroup NNode pos 1+ else -1 CurNode te NewEdge CloseGroup te pos 1+ endif ; : process| { CurNode pos } dropGroupIfClosed { tb te tc tn } tb te tc tn te -1 = if -1 CurNode NewNode NewEdge NNode EndGroup tb pos 1+ else -1 CurNode te NewEdge tb pos 1+ endif ; : processchar { c CurNode pos rs rl } c '[ = if rs pos 1+ rl getClassXt else c getcompxt pos 1+ endif { xt neupos } CurNode NewGroup xt CurNode NewNode NewEdge NNode EndGroup CloseGroup NNode neupos c ; : processend { CurNode pos } \ NNode EndGroup CloseGroup dropGroupIfClosed { te tc tn } te tc tn te -1 = if CurNode EndGroup else -1 CurNode te NewEdge endif CloseGroup ; : processstep { CurNode pos rs rl } rs pos getChar CASE '* OF process* pos 1+ ENDOF '+ OF process+ pos 1+ ENDOF '? OF process? pos 1+ ENDOF '( OF CurNode pos process( ENDOF ') OF CurNode pos process) ENDOF '| OF CurNode pos process| ENDOF \ default CurNode pos rs rl processchar ENDCASE ; : build-nfa ( u-addr u -- a-addr ) \ build nfa from string at u-addr \ returns address of built nfa { rs rl } \ regex-string length CreateStub 1 0 BEGIN rs rl processstep dup rl >= UNTIL processend dropGroupIfClosed drop \ cleanup regstart \ return address of fsm ; : cleanmatchstack BEGIN -9999 = UNTIL ; : followeps \ follow an epsilon edge { n e pos } n e pos n e GetGoalOfEdge 0 pos ; : processNode \ 1 step of nfa evaluation { n e pos s l } n e getXtOfEdge -1 = if n e pos followeps else pos l 1- > if n e 1+ pos else s pos getChar n e getXtOfEdge execute if \ Zeichen matcht -> Kante verfolgen n e pos n e GetGoalOfEdge 0 pos 1+ else \ Zeichen matcht nicht -> naechste Kante? n e 1+ pos endif endif endif ; : test-nfa ( a-addr u-addr u -- f ) \ check if string at u-addr matches the nfa at a-addr { s l } TO regstart -9999 \ set mark for cleanup 0 0 0 \ node edge position BEGIN { n e pos } \ Zielknoten erreicht? -> Ende n GetNumOfEdges 0= if cleanmatchstack -1 EXIT endif e n GetNumOfEdges >= if \ Backtracking wenn keine Kanten uebrig n 0= if cleanmatchstack 0 EXIT else swap 1+ swap endif else \ Kanten durchlaufen n e pos s l processNode endif AGAIN ; : sbsregex ( u-addr u u-addr u -- ) { s l } build-nfa dup 0 <> if s l test-nfa endif ; : dumpreg \ creates human-readable output for last built nfa for debugging purposes cr 0 BEGIN { num } num nodes regstart + @ { numEdges } num . ': emit '( emit numEdges . ') emit numEdges 0 ?DO num i getXtOfEdge -1 = if s" eps" type else s" fun" type endif ', emit num i getGoalOfEdge . LOOP cr num 1+ num NNode = UNTIL drop ;