--- gforth/prims2x.fs 2002/06/02 15:46:16 1.106 +++ gforth/prims2x.fs 2003/10/09 14:15:19 1.144 @@ -1,6 +1,6 @@ \ converts primitives to, e.g., C code -\ Copyright (C) 1995,1996,1997,1998,2000 Free Software Foundation, Inc. +\ Copyright (C) 1995,1996,1997,1998,2000,2003 Free Software Foundation, Inc. \ This file is part of Gforth. @@ -51,8 +51,15 @@ \ (stack-in-index-xt and a test for stack==instruction-stream); there \ should be only one. +\ for backwards compatibility, jaw +require compat/strcomp.fs + warnings off +\ redefinitions of kernel words not present in gforth-0.6.1 +: latestxt lastcfa @ ; +: latest last @ ; + [IFUNDEF] try include startup.fs [THEN] @@ -60,10 +67,10 @@ include startup.fs : struct% struct ; \ struct is redefined in gray warnings off +\ warnings on include ./gray.fs - -32 constant max-effect \ number of things on one side of a stack effect +128 constant max-effect \ number of things on one side of a stack effect 4 constant max-stacks \ the max. number of stacks (including inst-stream). 255 constant maxchar maxchar 1+ constant eof-char @@ -78,10 +85,25 @@ variable line-start \ pointer to start o 0 line ! 2variable filename \ filename of original input file 0 0 filename 2! +2variable out-filename \ filename of the output file (for sync lines) +0 0 out-filename 2! 2variable f-comment 0 0 f-comment 2! variable skipsynclines \ are sync lines ("#line ...") invisible to the parser? -skipsynclines on +skipsynclines on +variable out-nls \ newlines in output (for output sync lines) +0 out-nls ! +variable store-optimization \ use store optimization? +store-optimization off + +variable include-skipped-insts +\ does the threaded code for a combined instruction include the cells +\ for the component instructions (true) or only the cells for the +\ inline arguments (false) +include-skipped-insts off + +variable immarg \ values for immediate arguments (to be used in IMM_ARG macros) +$12340000 immarg ! : th ( addr1 n -- addr2 ) cells + ; @@ -121,12 +143,24 @@ skipsynclines on 0 recover endtry r> to outfile-id throw - abort + 1 (bye) \ abort endif ; : quote ( -- ) [char] " emit ; +\ count output lines to generate sync lines for output + +: count-nls ( addr u -- ) + bounds u+do + i c@ nl-char = negate out-nls +! + loop ; + +:noname ( addr u -- ) + 2dup count-nls + defers type ; +is type + variable output \ xt ( -- ) of output word for simple primitives variable output-combined \ xt ( -- ) of output word for combined primitives @@ -135,6 +169,7 @@ struct% cell% 2* field stack-pointer \ stackpointer name cell% field stack-type \ name for default type of stack items cell% field stack-in-index-xt \ ( in-size item -- in-index ) + cell% field stack-access-transform \ ( nitem -- index ) end-struct stack% struct% @@ -153,8 +188,29 @@ struct% cell% field type-store \ xt of store code generator ( item -- ) end-struct type% +struct% + cell% field register-number + cell% field register-type \ pointer to type + cell% 2* field register-name \ c name +end-struct register% + +struct% + cell% 2* field ss-registers \ addr u; ss-registers[0] is TOS + \ 0 means: use memory + cell% field ss-offset \ stack pointer offset: sp[-offset] is TOS +end-struct ss% \ stack-state + +struct% + cell% field state-number + cell% max-stacks * field state-sss +end-struct state% + variable next-stack-number 0 next-stack-number ! create stacks max-stacks cells allot \ array of stacks +256 constant max-registers +create registers max-registers cells allot \ array of registers +variable nregisters 0 nregisters ! \ number of registers +variable next-state-number 0 next-state-number ! \ next state number : stack-in-index ( in-size item -- in-index ) item-offset @ - 1- ; @@ -170,9 +226,17 @@ create stacks max-stacks cells allot \ a 1 next-stack-number +! r@ stack-type ! save-mem r@ stack-pointer 2! - ['] stack-in-index r> stack-in-index-xt ! ; + ['] stack-in-index r@ stack-in-index-xt ! + ['] noop r@ stack-access-transform ! + rdrop ; : map-stacks { xt -- } + \ perform xt for all stacks + next-stack-number @ 0 +do + stacks i th @ xt execute + loop ; + +: map-stacks1 { xt -- } \ perform xt for all stacks except inst-stream next-stack-number @ 1 +do stacks i th @ xt execute @@ -226,10 +290,28 @@ end-struct prim% 0 value combined \ in combined prims the combined prim variable in-part \ true if processing a part in-part off +0 value state-in \ state on entering prim +0 value state-out \ state on exiting prim +0 value state-default \ canonical state at bb boundaries + +: prim-context ( ... p xt -- ... ) + \ execute xt with prim set to p + prim >r + swap to prim + catch + r> to prim + throw ; 1000 constant max-combined create combined-prims max-combined cells allot variable num-combined +variable part-num \ current part number during process-combined + +: map-combined { xt -- } + \ perform xt for all components of the current combined instruction + num-combined @ 0 +do + combined-prims i th @ xt execute + loop ; table constant combinations \ the keys are the sequences of pointers to primitives @@ -238,6 +320,19 @@ create current-depth max-stacks cells al create max-depth max-stacks cells allot create min-depth max-stacks cells allot +create sp-update-in max-stacks cells allot +\ where max-depth occured the first time +create max-depths max-stacks max-combined 1+ * cells allot +\ maximum depth at start of each part: array[parts] of array[stack] +create max-back-depths max-stacks max-combined 1+ * cells allot +\ maximun depth from end of the combination to the start of the each part + +: s-c-max-depth ( nstack ncomponent -- addr ) + max-stacks * + cells max-depths + ; + +: s-c-max-back-depth ( nstack ncomponent -- addr ) + max-stacks * + cells max-back-depths + ; + wordlist constant primitives : create-prim ( prim -- ) @@ -258,6 +353,13 @@ variable name-line 2variable name-filename 2variable last-name-filename Variable function-number 0 function-number ! +Variable function-old 0 function-old ! +: function-diff ( n -- ) + ." GROUPADD(" function-number @ function-old @ - 0 .r ." )" cr + function-number @ function-old ! ; +: forth-fdiff ( -- ) + function-number @ function-old @ - 0 .r ." groupadd" cr + function-number @ function-old ! ; \ a few more set ops @@ -267,25 +369,56 @@ Variable function-number 0 function-numb : complement ( set1 -- set2 ) empty ['] bit-equivalent binary-set-operation ; +\ forward declaration for inst-stream (breaks cycle in definitions) +defer inst-stream-f ( -- stack ) + \ stack access stuff -: normal-stack-access ( n stack -- ) - stack-pointer 2@ type - dup - if - ." [" 0 .r ." ]" +: normal-stack-access0 { n stack -- } + \ n has the ss-offset already applied (see ...-access1) + n stack stack-access-transform @ execute ." [" 0 .r ." ]" ; + +: state-ss { stack state -- ss } + state state-sss stack stack-number @ th @ ; + +: stack-reg { n stack state -- reg } + \ n is the index (TOS=0); reg is 0 if the access is to memory + stack state state-ss ss-registers 2@ n u> if ( addr ) \ in ss-registers? + n th @ else - drop ." TOS" + drop 0 endif ; -\ forward declaration for inst-stream (breaks cycle in definitions) -defer inst-stream-f ( -- stack ) +: .reg ( reg -- ) + register-name 2@ type ; + +: stack-offset ( stack state -- n ) + \ offset for stack in state + state-ss ss-offset @ ; + +: normal-stack-access1 { n stack state -- } + n stack state stack-reg ?dup-if + .reg exit + endif + stack stack-pointer 2@ type + n stack state stack-offset - stack normal-stack-access0 ; + +: normal-stack-access ( n stack state -- ) + over inst-stream-f = if + ." IMM_ARG(" normal-stack-access1 ." ," immarg ? ." )" + 1 immarg +! + else + normal-stack-access1 + endif ; + +: stack-depth { stack -- n } + current-depth stack stack-number @ th @ ; : part-stack-access { n stack -- } \ print _, x=inst-stream? n : maxdepth-currentdepth-n-1 ." _" stack stack-pointer 2@ type stack stack-number @ { stack# } - current-depth stack# th @ n + { access-depth } + stack stack-depth n + { access-depth } stack inst-stream-f = if access-depth else @@ -295,12 +428,44 @@ defer inst-stream-f ( -- stack ) endif 0 .r ; -: stack-access ( n stack -- ) +: part-stack-read { n stack -- } + stack stack-depth n + ( ndepth ) + stack stack-number @ part-num @ s-c-max-depth @ +\ max-depth stack stack-number @ th @ ( ndepth nmaxdepth ) + over <= if ( ndepth ) \ load from memory + stack state-in normal-stack-access + else + drop n stack part-stack-access + endif ; + +: stack-diff ( stack -- n ) + \ in-out + dup stack-in @ swap stack-out @ - ; + +: part-stack-write { n stack -- } + stack stack-depth n + + stack stack-number @ part-num @ s-c-max-back-depth @ + over <= if ( ndepth ) + stack combined ['] stack-diff prim-context - + stack state-out normal-stack-access + else + drop n stack part-stack-access + endif ; + +: stack-read ( n stack -- ) \ print a stack access at index n of stack in-part @ if - part-stack-access + part-stack-read else - normal-stack-access + state-in normal-stack-access + endif ; + +: stack-write ( n stack -- ) + \ print a stack access at index n of stack + in-part @ if + part-stack-write + else + state-out normal-stack-access endif ; : item-in-index { item -- n } @@ -316,7 +481,7 @@ defer inst-stream-f ( -- stack ) >r ." vm_" r@ item-stack-type-name type ." 2" r@ item-type @ print-type-prefix ." (" - r@ item-in-index r@ item-stack @ stack-access ." ," + r@ item-in-index r@ item-stack @ stack-read ." ," r@ item-name 2@ type ." );" cr rdrop ; @@ -327,8 +492,8 @@ defer inst-stream-f ( -- stack ) ." vm_two" r@ item-stack-type-name type ." 2" r@ item-type @ print-type-prefix ." (" - r@ item-in-index r@ item-stack @ 2dup ." (Cell)" stack-access - ." , " -1 under+ ." (Cell)" stack-access + r@ item-in-index r@ item-stack @ 2dup ." (Cell)" stack-read + ." , " -1 under+ ." (Cell)" stack-read ." , " r@ item-name 2@ type ." )" cr rdrop ; @@ -351,7 +516,7 @@ defer inst-stream-f ( -- stack ) rdrop ; : item-out-index ( item -- n ) - \ n is the index of item (in the in-effect) + \ n is the index of item (in the out-effect) >r r@ item-stack @ stack-out @ r> item-offset @ - 1- ; : really-store-single ( item -- ) @@ -360,22 +525,17 @@ defer inst-stream-f ( -- stack ) r@ item-type @ print-type-prefix ." 2" r@ item-stack-type-name type ." (" r@ item-name 2@ type ." ," - r@ item-out-index r@ item-stack @ stack-access ." );" + r@ item-out-index r@ item-stack @ stack-write ." );" rdrop ; -: store-single ( item -- ) - >r - r@ same-as-in? - if - r@ item-in-index 0= r@ item-out-index 0= xor - if - ." IF_" r@ item-stack @ stack-pointer 2@ type - ." TOS(" r@ really-store-single ." );" cr - endif - else - r@ really-store-single cr - endif - rdrop ; +: store-single { item -- } + item item-stack @ { stack } + store-optimization @ in-part @ 0= and item same-as-in? and + item item-in-index stack state-in stack-reg 0= and \ in in memory? + item item-out-index stack state-out stack-reg 0= and \ out in memory? + 0= if + item really-store-single cr + endif ; : store-double ( item -- ) \ !! store optimization is not performed, because it is not yet needed @@ -384,8 +544,8 @@ defer inst-stream-f ( -- stack ) r@ item-type @ print-type-prefix ." 2two" r@ item-stack-type-name type ." (" r@ item-name 2@ type ." , " - r@ item-out-index r@ item-stack @ 2dup stack-access - ." , " -1 under+ stack-access + r@ item-out-index r@ item-stack @ 2dup stack-write + ." , " -1 under+ stack-write ." )" cr rdrop ; @@ -493,21 +653,68 @@ does> ( item -- ) wordlist constant type-names \ this is here just to meet the requirement \ that a type be a word; it is never used for lookup +: define-type ( addr u -- xt ) + \ define single type with name addr u, without stack + get-current type-names set-current >r + 2dup nextname stack-type-name + r> set-current + latestxt ; + : stack ( "name" "stack-pointer" "type" -- ) \ define stack name { d: stack-name } name { d: stack-pointer } name { d: stack-type } - get-current type-names set-current - stack-type 2dup nextname stack-type-name - set-current - stack-pointer lastxt >body stack-name nextname make-stack ; + stack-type define-type + stack-pointer rot >body stack-name nextname make-stack ; stack inst-stream IP Cell ' inst-in-index inst-stream stack-in-index-xt ! ' inst-stream inst-stream-f \ !! initialize stack-in and stack-out +\ registers + +: make-register ( type addr u -- ) + \ define register with type TYPE and name ADDR U. + nregisters @ max-registers < s" too many registers" ?print-error + 2dup nextname create register% %allot >r + r@ register-name 2! + r@ register-type ! + nregisters @ r@ register-number ! + 1 nregisters +! + rdrop ; + +: register ( "name" "type" -- ) + \ define register + name { d: reg-name } + name { d: reg-type } + reg-type define-type >body + reg-name make-register ; + +\ stack-states + +: stack-state ( a-addr u uoffset "name" -- ) + create ss% %allot >r + r@ ss-offset ! + r@ ss-registers 2! + rdrop ; + +0 0 0 stack-state default-ss + +\ state + +: state ( "name" -- ) + \ create a state initialized with default-sss + create state% %allot { s } + next-state-number @ s state-number ! 1 next-state-number +! + max-stacks 0 ?do + default-ss s state-sss i th ! + loop ; + +: set-ss ( ss stack state -- ) + state-sss swap stack-number @ th ! ; + \ offset computation \ the leftmost (i.e. deepest) item has offset 0 \ the rightmost item has the highest offset @@ -524,12 +731,9 @@ stack inst-stream IP Cell : compute-offset-out ( addr1 addr2 -- ) ['] stack-out compute-offset ; -: clear-stack { -- } - dup stack-in off stack-out off ; - : compute-offsets ( -- ) - ['] clear-stack map-stacks - inst-stream clear-stack + prim prim-stacks-in max-stacks cells erase + prim prim-stacks-out max-stacks cells erase prim prim-effect-in prim prim-effect-in-end @ ['] compute-offset-in map-items prim prim-effect-out prim prim-effect-out-end @ ['] compute-offset-out map-items inst-stream stack-out @ 0= s" # can only be on the input side" ?print-error ; @@ -540,26 +744,42 @@ stack inst-stream IP Cell declarations compute-offsets output @ execute ; -: flush-a-tos { stack -- } - stack stack-out @ 0<> stack stack-in @ 0= and - if - ." IF_" stack stack-pointer 2@ 2dup type ." TOS(" - 2dup type ." [0] = " type ." TOS);" cr - endif ; +: stack-state-items ( stack state -- n ) + state-ss ss-registers 2@ nip ; -: flush-tos ( -- ) - ['] flush-a-tos map-stacks ; +: unused-stack-items { stack -- n-in n-out } + \ n-in are the stack items in state-in not used by prim + \ n-out are the stack items in state-out not written by prim + stack state-in stack-state-items stack stack-in @ - 0 max + stack state-out stack-state-items stack stack-out @ - 0 max ; + +: spill-stack { stack -- } + \ spill regs of state-in that are not used by prim and are not in state-out + stack state-in stack-offset { offset } + stack state-in stack-state-items ( items ) + dup stack unused-stack-items - - +do + \ loop through the bottom items + stack stack-pointer 2@ type + i offset - stack normal-stack-access0 ." = " + i stack state-in normal-stack-access1 ." ;" cr + loop ; -: fill-a-tos { stack -- } - stack stack-out @ 0= stack stack-in @ 0<> and - if - ." IF_" stack stack-pointer 2@ 2dup type ." TOS(" - 2dup type ." TOS = " type ." [0]);" cr - endif ; +: spill-state ( -- ) + ['] spill-stack map-stacks1 ; -: fill-tos ( -- ) +: fill-stack { stack -- } + stack state-out stack-offset { offset } + stack state-out stack-state-items ( items ) + dup stack unused-stack-items - + +do + \ loop through the bottom items + i stack state-out normal-stack-access1 ." = " + stack stack-pointer 2@ type + i offset - stack normal-stack-access0 ." ;" cr + loop ; + +: fill-state ( -- ) \ !! inst-stream for prefetching? - ['] fill-a-tos map-stacks ; + ['] fill-stack map-stacks1 ; : fetch ( addr -- ) dup item-type @ type-fetch @ execute ; @@ -567,20 +787,62 @@ stack inst-stream IP Cell : fetches ( -- ) prim prim-effect-in prim prim-effect-in-end @ ['] fetch map-items ; -: stack-pointer-update { stack -- } - \ stack grow downwards - stack stack-in @ stack stack-out @ - - ?dup-if \ this check is not necessary, gcc would do this for us - stack stack-pointer 2@ type ." += " 0 .r ." ;" cr +: reg-reg-move ( reg-from reg-to -- ) + 2dup = if + 2drop + else + .reg ." = " .reg ." ;" cr endif ; -: inst-pointer-update ( -- ) - inst-stream stack-in @ ?dup-if - ." INC_IP(" 0 .r ." );" cr +: stack-bottom-reg { n stack state -- reg } + stack state stack-state-items n - 1- stack state stack-reg ; + +: stack-moves { stack -- } + \ generate moves between registers in state-in/state-out that are + \ not spilled or consumed/produced by prim. + \ !! this works only for a simple stack cache, not e.g., for + \ rotating stack caches, or registers shared between stacks (the + \ latter would also require a change in interface) + \ !! maybe place this after NEXT_P1? + stack unused-stack-items 2dup < if ( n-in n-out ) + \ move registers from 0..n_in-1 to n_out-n_in..n_out-1 + over - { diff } ( n-in ) + -1 swap 1- -do + i stack state-in stack-bottom-reg ( reg-from ) + i diff + stack state-out stack-bottom-reg reg-reg-move + 1 -loop + else + \ move registers from n_in-n_out..n_in-1 to 0..n_out-1 + swap over - { diff } ( n-out ) + 0 +do + i diff + stack state-in stack-bottom-reg ( reg-from ) + i stack state-out stack-bottom-reg reg-reg-move + loop endif ; +: stack-update-transform ( n1 stack -- n2 ) + \ n2 is the number by which the stack pointer should be + \ incremented to pop n1 items + stack-access-transform @ dup >r execute + 0 r> execute - ; + +: stack-pointer-update { stack -- } + \ and moves + \ stacks grow downwards + stack stack-diff ( in-out ) + stack state-in stack-offset - + stack state-out stack-offset + ( [in-in_offset]-[out-out_offset] ) + ?dup-if \ this check is not necessary, gcc would do this for us + stack inst-stream = if + ." INC_IP(" 0 .r ." );" cr + else + stack stack-pointer 2@ type ." += " + stack stack-update-transform 0 .r ." ;" cr + endif + endif + stack stack-moves ; + : stack-pointer-updates ( -- ) - inst-pointer-update ['] stack-pointer-update map-stacks ; : store ( item -- ) @@ -625,60 +887,89 @@ stack inst-stream IP Cell endif 2drop ; -: output-c-tail ( -- ) - \ the final part of the generated C code +: output-nextp2 ( -- ) + ." NEXT_P2;" cr ; + +variable tail-nextp2 \ xt to execute for printing NEXT_P2 in INST_TAIL +' output-nextp2 tail-nextp2 ! + +: output-label2 ( -- ) + ." LABEL2(" prim prim-c-name 2@ type ." )" cr + ." NEXT_P2;" cr ; + +: output-c-tail1 { xt -- } + \ the final part of the generated C code, with xt printing LABEL2 or not. output-super-end print-debug-results ." NEXT_P1;" cr stores - fill-tos - ." NEXT_P2;" ; + fill-state + xt execute ; + +: output-c-tail1-no-stores { xt -- } + \ the final part of the generated C code for combinations + output-super-end + ." NEXT_P1;" cr + fill-state + xt execute ; + +: output-c-tail ( -- ) + tail-nextp2 @ output-c-tail1 ; + +: output-c-tail2 ( -- ) + ['] output-label2 output-c-tail1 ; + +: output-c-tail-no-stores ( -- ) + tail-nextp2 @ output-c-tail1-no-stores ; + +: output-c-tail2-no-stores ( -- ) + ['] output-label2 output-c-tail1-no-stores ; : type-c-code ( c-addr u xt -- ) - \ like TYPE, but replaces "TAIL;" with tail code produced by xt + \ like TYPE, but replaces "INST_TAIL;" with tail code produced by xt { xt } + ." {" cr + ." #line " c-line @ . quote c-filename 2@ type quote cr begin ( c-addr1 u1 ) - 2dup s" TAIL;" search + 2dup s" INST_TAIL;" search while ( c-addr1 u1 c-addr3 u3 ) 2dup 2>r drop nip over - type xt execute - 2r> 5 /string + 2r> 10 /string \ !! resync #line missing repeat - 2drop type ; + 2drop type + ." #line " out-nls @ 2 + . quote out-filename 2@ type quote cr + ." }" cr ; : print-entry ( -- ) - ." LABEL(" prim prim-c-name 2@ type ." ):" ; + ." LABEL(" prim prim-c-name 2@ type ." )" ; : output-c ( -- ) - print-entry ." /* " prim prim-name 2@ type ." ( " prim prim-stack-string 2@ type ." ) */" cr - ." /* " prim prim-doc 2@ type ." */" cr - ." NAME(" quote prim prim-name 2@ type quote ." )" cr \ debugging - ." {" cr - ." DEF_CA" cr - print-declarations - ." NEXT_P0;" cr - flush-tos - fetches - print-debug-args - stack-pointer-updates - ." {" cr - ." #line " c-line @ . quote c-filename 2@ type quote cr - prim prim-c-code 2@ ['] output-c-tail type-c-code - ." }" cr - output-c-tail - ." }" cr - cr + print-entry ." /* " prim prim-name 2@ type ." ( " prim prim-stack-string 2@ type ." ) */" cr + ." /* " prim prim-doc 2@ type ." */" cr + ." NAME(" quote prim prim-name 2@ type quote ." )" cr \ debugging + ." {" cr + ." DEF_CA" cr + print-declarations + ." NEXT_P0;" cr + spill-state + fetches + print-debug-args + stack-pointer-updates + prim prim-c-code 2@ ['] output-c-tail type-c-code + output-c-tail2 + ." }" cr + cr ; : disasm-arg { item -- } item item-stack @ inst-stream = if - ." fputc(' ', vm_out); " - \ !! change this to first convert args to the right type and - \ then print them - ." /* printarg_" item item-type @ print-type-prefix - ." ((" item item-type @ type-c-name 2@ type ." )" - ." ip[" item item-offset @ 1+ 0 .r ." ]); */" cr + ." {" cr + item print-declaration + item fetch + item print-debug-arg + ." }" cr endif ; : disasm-args ( -- ) @@ -706,14 +997,15 @@ stack inst-stream IP Cell endif ." }" cr ; +: output-profile-part ( p ) + ." add_inst(b, " quote + prim-name 2@ type + quote ." );" cr ; + : output-profile-combined ( -- ) \ generate code for postprocessing the VM block profile stuff ." if (VM_IS_INST(*ip, " function-number @ 0 .r ." )) {" cr - num-combined @ 0 +do - ." add_inst(b, " quote - combined-prims i th @ prim-name 2@ type - quote ." );" cr - loop + ['] output-profile-part map-combined ." ip += " inst-stream stack-in @ 1+ 0 .r ." ;" cr combined-prims num-combined @ 1- th @ prim-c-code 2@ s" SET_IP" search nip nip combined-prims num-combined @ 1- th @ prim-c-code 2@ s" SUPER_END" search nip nip or if @@ -723,10 +1015,14 @@ stack inst-stream IP Cell endif ." }" cr ; +: prim-branch? { prim -- f } + \ true if prim is a branch or super-end + prim prim-c-code 2@ s" SET_IP" search nip nip 0<> ; + : output-superend ( -- ) \ output flag specifying whether the current word ends a dynamic superinst - prim prim-c-code 2@ s" SET_IP" search nip nip - prim prim-c-code 2@ s" SUPER_END" search nip nip or 0<> + prim prim-branch? + prim prim-c-code 2@ s" SUPER_END" search nip nip 0<> or prim prim-c-code 2@ s" SUPER_CONTINUE" search nip nip 0= and negate 0 .r ." , /* " prim prim-name 2@ type ." */" cr ; @@ -779,7 +1075,7 @@ stack inst-stream IP Cell \ data-stack stack-used? IF ." Cell *sp=SP;" cr THEN \ fp-stack stack-used? IF ." Cell *fp=*FP;" cr THEN \ return-stack stack-used? IF ." Cell *rp=*RP;" cr THEN -\ flush-tos +\ spill-state \ fetches \ stack-pointer-updates \ fp-stack stack-used? IF ." *FP=fp;" cr THEN @@ -788,7 +1084,7 @@ stack inst-stream IP Cell \ prim prim-c-code 2@ type \ ." }" cr \ stores -\ fill-tos +\ fill-state \ ." return (sp);" cr \ ." }" cr \ cr ; @@ -799,6 +1095,9 @@ stack inst-stream IP Cell : output-alias ( -- ) ( primitive-number @ . ." alias " ) ." Primitive " prim prim-name 2@ type cr ; +: output-c-prim-num ( -- ) + ." N_" prim prim-c-name 2@ type ." ," cr ; + : output-forth ( -- ) prim prim-forth-code @ 0= IF \ output-alias @@ -900,9 +1199,7 @@ stack inst-stream IP Cell \ #line 516 "./prim" \ n = n1+n2; \ } -\ NEXT_P1; \ _x_sp0 = (Cell)n; -\ NEXT_P2; \ } \ NEXT_P1; \ spTOS = (Cell)_x_sp0; @@ -912,6 +1209,7 @@ stack inst-stream IP Cell prim to combined 0 num-combined ! current-depth max-stacks cells erase + include-skipped-insts @ current-depth 0 th ! max-depth max-stacks cells erase min-depth max-stacks cells erase prim prim-effect-in prim prim-effect-in-end ! @@ -923,24 +1221,33 @@ stack inst-stream IP Cell : min! ( n addr -- ) tuck @ min swap ! ; +: inst-stream-adjustment ( nstack -- n ) + \ number of stack items to add for each part + 0= include-skipped-insts @ and negate ; + : add-depths { p -- } \ combine stack effect of p with *-depths max-stacks 0 ?do current-depth i th @ - p prim-stacks-in i th @ + + p prim-stacks-in i th @ + i inst-stream-adjustment + dup max-depth i th max! p prim-stacks-out i th @ - dup min-depth i th min! current-depth i th ! loop ; +: copy-maxdepths ( n -- ) + max-depth max-depths rot max-stacks * th max-stacks cells move ; + : add-prim ( addr u -- ) \ add primitive given by "addr u" to combined-prims primitives search-wordlist s" unknown primitive" ?print-error execute { p } p combined-prims num-combined @ th ! + num-combined @ copy-maxdepths 1 num-combined +! - p add-depths ; + p add-depths + num-combined @ copy-maxdepths ; : compute-effects { q -- } \ compute the stack effects of q from the depths @@ -973,6 +1280,27 @@ stack inst-stream IP Cell i q prim-stacks-out i th @ q prim-effect-out-end make-effect-items loop ; +: compute-stack-max-back-depths ( stack -- ) + stack-number @ { stack# } + current-depth stack# th @ dup + dup stack# num-combined @ s-c-max-back-depth ! + -1 num-combined @ 1- -do ( max-depth current-depth ) + combined-prims i th @ { p } + p prim-stacks-out stack# th @ + + dup >r max r> + over stack# i s-c-max-back-depth ! + p prim-stacks-in stack# th @ - + stack# inst-stream-adjustment - + 1 -loop + assert( dup stack# inst-stream-adjustment negate = ) + assert( over max-depth stack# th @ = ) + 2drop ; + +: compute-max-back-depths ( -- ) + \ compute max-back-depths. + \ assumes that current-depths is correct for the end of the combination + ['] compute-stack-max-back-depths map-stacks ; + : process-combined ( -- ) combined combined-prims num-combined @ cells combinations ['] constant insert-wordlist @@ -980,14 +1308,91 @@ stack inst-stream IP Cell @ prim-c-code 2@ prim prim-c-code 2! \ used by output-super-end prim compute-effects prim init-effects + compute-max-back-depths output-combined perform ; +\ reprocessing (typically to generate versions for another cache states) +\ !! use prim-context + +variable reprocessed-num 0 reprocessed-num ! + +: new-name ( -- c-addr u ) + reprocessed-num @ 0 + 1 reprocessed-num +! + <# #s 'p hold '_ hold #> save-mem ; + +: reprocess-simple ( prim -- ) + to prim + new-name prim prim-c-name 2! + output @ execute ; + +: lookup-prim ( c-addr u -- prim ) + primitives search-wordlist 0= -13 and throw execute ; + +: state-prim1 { in-state out-state prim -- } + in-state out-state state-default dup d= ?EXIT + in-state to state-in + out-state to state-out + prim reprocess-simple ; + +: state-prim ( in-state out-state "name" -- ) + parse-word lookup-prim state-prim1 ; + +\ reprocessing with default states + +\ This is a simple scheme and should be generalized +\ assumes we only cache one stack and use simple states for that + +0 value cache-stack \ stack that we cache +2variable cache-states \ states of the cache, starting with the empty state + +: compute-default-state-out ( n-in -- n-out ) + \ for the current prim + cache-stack stack-in @ - 0 max + cache-stack stack-out @ + cache-states 2@ nip 1- min ; + +: gen-prim-states ( prim -- ) + to prim + cache-states 2@ swap { states } ( nstates ) + cache-stack stack-in @ +do + states i th @ + states i compute-default-state-out th @ + prim state-prim1 + loop ; + +: prim-states ( "name" -- ) + parse-word lookup-prim gen-prim-states ; + +: gen-branch-states ( prim -- ) + \ generate versions that produce state-default; useful for branches + to prim + cache-states 2@ swap { states } ( nstates ) + cache-stack stack-in @ +do + states i th @ state-default prim state-prim1 + loop ; + +: branch-states ( out-state "name" -- ) + parse-word lookup-prim gen-branch-states ; + +\ producing state transitions + +: gen-transitions ( "name" -- ) + parse-word lookup-prim { prim } + cache-states 2@ { states nstates } + nstates 0 +do + nstates 0 +do + i j <> if + states i th @ states j th @ prim state-prim1 + endif + loop + loop ; + \ C output : print-item { n stack -- } \ print nth stack item name stack stack-type @ type-c-name 2@ type space - ." _" stack stack-pointer 2@ type n 0 .r ; + ." MAYBE_UNUSED _" stack stack-pointer 2@ type n 0 .r ; : print-declarations-combined ( -- ) max-stacks 0 ?do @@ -1005,10 +1410,20 @@ stack inst-stream IP Cell : output-combined-tail ( -- ) part-output-c-tail - prim >r combined to prim in-part @ >r in-part off - output-c-tail - r> in-part ! r> to prim ; + combined ['] output-c-tail-no-stores prim-context + r> in-part ! ; + +: part-stack-pointer-updates ( -- ) + next-stack-number @ 0 +do + i part-num @ 1+ s-c-max-depth @ dup + i num-combined @ s-c-max-depth @ = \ final depth + swap i part-num @ s-c-max-depth @ <> \ just reached now + part-num @ 0= \ first part + or and if + stacks i th @ stack-pointer-update + endif + loop ; : output-part ( p -- ) to prim @@ -1018,20 +1433,18 @@ stack inst-stream IP Cell print-declarations part-fetches print-debug-args + combined ['] part-stack-pointer-updates prim-context + 1 part-num +! prim add-depths \ !! right place? - ." {" cr - ." #line " c-line @ . quote c-filename 2@ type quote cr prim prim-c-code 2@ ['] output-combined-tail type-c-code - ." }" cr part-output-c-tail ." }" cr ; : output-parts ( -- ) prim >r in-part on current-depth max-stacks cells erase - num-combined @ 0 +do - combined-prims i th @ output-part - loop + 0 part-num ! + ['] output-part map-combined in-part off r> to prim ; @@ -1042,12 +1455,12 @@ stack inst-stream IP Cell ." DEF_CA" cr print-declarations-combined ." NEXT_P0;" cr - flush-tos - fetches + spill-state + \ fetches \ now in parts \ print-debug-args - stack-pointer-updates + \ stack-pointer-updates now in parts output-parts - output-c-tail + output-c-tail2-no-stores ." }" cr cr ; @@ -1057,13 +1470,16 @@ stack inst-stream IP Cell \ peephole optimization rules +\ data for a simple peephole optimizer that always tries to combine +\ the currently compiled instruction with the last one. + \ in order for this to work as intended, shorter combinations for each \ length must be present, and the longer combinations must follow \ shorter ones (this restriction may go away in the future). : output-peephole ( -- ) combined-prims num-combined @ 1- cells combinations search-wordlist - s" the prefix for this combination must be defined earlier" ?print-error + s" the prefix for this superinstruction must be defined earlier" ?print-error ." {" execute prim-num @ 5 .r ." ," combined-prims num-combined @ 1- th @ prim-num @ 5 .r ." ," @@ -1071,15 +1487,94 @@ stack inst-stream IP Cell combined prim-c-name 2@ type ." */" cr ; -: output-forth-peephole ( -- ) - combined-prims num-combined @ 1- cells combinations search-wordlist - s" the prefix for this combination must be defined earlier" ?print-error - execute prim-num @ 5 .r - combined-prims num-combined @ 1- th @ prim-num @ 5 .r - combined prim-num @ 5 .r ." prim, \ " - combined prim-c-name 2@ type + +\ cost and superinstruction data for a sophisticated combiner (e.g., +\ shortest path) + +\ This is intended as initializer for a structure like this + +\ struct cost { +\ int loads; /* number of stack loads */ +\ int stores; /* number of stack stores */ +\ int updates; /* number of stack pointer updates */ +\ int offset; /* offset into super2 table */ +\ int length; /* number of components */ +\ }; + +\ How do you know which primitive or combined instruction this +\ structure refers to? By the order of cost structures, as in most +\ other cases. + +: super2-length ( -- n ) + combined if + num-combined @ + else + 1 + endif ; + +: compute-costs { p -- nloads nstores nupdates } + \ compute the number of loads, stores, and stack pointer updates + \ of a primitive or combined instruction; does not take TOS + \ caching into account + 0 max-stacks 0 +do + p prim-stacks-in i th @ + + loop + super2-length 1- - \ don't count instruction fetches of subsumed insts + 0 max-stacks 0 +do + p prim-stacks-out i th @ + + loop + 0 max-stacks 1 +do \ don't count ip updates, therefore "1 +do" + p prim-stacks-in i th @ p prim-stacks-out i th @ <> - + loop ; + +: output-num-part ( p -- ) + ." N_" prim-c-name 2@ type ." ," ; + \ prim-num @ 4 .r ." ," ; + +: output-name-comment ( -- ) + ." /* " prim prim-name 2@ type ." */" ; + +variable offset-super2 0 offset-super2 ! \ offset into the super2 table + +: output-costs-prefix ( -- ) + ." {" prim compute-costs + rot 2 .r ." ," swap 2 .r ." ," 2 .r ." , " + prim prim-branch? negate . ." ," + state-in state-number @ 2 .r ." ," + state-out state-number @ 2 .r ." ," ; + +: output-costs-gforth-simple ( -- ) + output-costs-prefix + prim output-num-part + 1 2 .r ." }," + output-name-comment + cr ; + +: output-costs-gforth-combined ( -- ) + output-costs-prefix + ." N_START_SUPER+" offset-super2 @ 5 .r ." ," + super2-length dup 2 .r ." }," offset-super2 +! + output-name-comment + cr ; + +: output-costs ( -- ) + \ description of superinstructions and simple instructions + ." {" prim compute-costs + rot 2 .r ." ," swap 2 .r ." ," 2 .r ." ," + offset-super2 @ 5 .r ." ," + super2-length dup 2 .r ." }," offset-super2 +! + output-name-comment cr ; +: output-super2 ( -- ) + \ table of superinstructions without requirement for existing prefixes + combined if + ['] output-num-part map-combined + else + prim output-num-part + endif + output-name-comment + cr ; \ the parser @@ -1105,25 +1600,25 @@ print-token ! getinput member? ; ' testchar? test-vector ! -: checksyncline ( -- ) +: checksynclines ( -- ) \ when input points to a newline, check if the next line is a \ sync line. If it is, perform the appropriate actions. - rawinput @ >r - s" #line " r@ over compare 0<> if - rdrop 1 line +! EXIT - endif - 0. r> 6 chars + 20 >number drop >r drop line ! r> ( c-addr ) - dup c@ bl = if - char+ dup c@ [char] " <> 0= s" sync line syntax" ?print-error - char+ dup 100 [char] " scan drop swap 2dup - save-mem filename 2! - char+ - endif - dup c@ nl-char <> 0= s" sync line syntax" ?print-error - skipsynclines @ if - dup char+ rawinput ! - rawinput @ c@ cookedinput @ c! - endif - drop ; + rawinput @ begin >r + s" #line " r@ over compare if + rdrop 1 line +! EXIT + endif + 0. r> 6 chars + 20 >number drop >r drop line ! r> ( c-addr ) + dup c@ bl = if + char+ dup c@ [char] " <> 0= s" sync line syntax" ?print-error + char+ dup 100 [char] " scan drop swap 2dup - save-mem filename 2! + char+ + endif + dup c@ nl-char <> 0= s" sync line syntax" ?print-error + skipsynclines @ if + char+ dup rawinput ! + rawinput @ c@ cookedinput @ c! + endif + again ; : ?nextchar ( f -- ) s" syntax error, wrong char" ?print-error @@ -1132,10 +1627,11 @@ print-token ! 1 chars rawinput +! 1 chars cookedinput +! nl-char = if - checksyncline + checksynclines rawinput @ line-start ! endif - rawinput @ c@ cookedinput @ c! + rawinput @ c@ + cookedinput @ c! endif ; : charclass ( set "name" -- ) @@ -1158,6 +1654,8 @@ bl singleton tab-char over add-member nl-char singleton eof-char over add-member complement charclass nonl nl-char singleton eof-char over add-member char : over add-member complement charclass nocolonnl +nl-char singleton eof-char over add-member + char } over add-member complement charclass nobracenl bl 1+ maxchar .. char \ singleton complement intersection charclass nowhitebq bl 1+ maxchar .. charclass nowhite @@ -1169,7 +1667,7 @@ nl-char singleton eof-char over add-memb (( letter (( letter || digit )) ** )) <- c-ident ( -- ) -(( ` # ?? (( letter || digit || ` : )) ** +(( ` # ?? (( letter || digit || ` : )) ++ )) <- stack-ident ( -- ) (( nowhitebq nowhite ** )) @@ -1191,27 +1689,33 @@ Variable c-flag )) <- c-comment ( -- ) (( ` - nonl ** {{ - forth-flag @ IF ." [ELSE]" cr THEN - c-flag @ IF ." #else" cr THEN }} + forth-flag @ IF forth-fdiff ." [ELSE]" cr THEN + c-flag @ IF + function-diff + ." #else /* " function-number @ 0 .r ." */" cr THEN }} )) <- else-comment (( ` + {{ start }} nonl ** {{ end dup IF c-flag @ - IF ." #ifdef HAS_" bounds ?DO I c@ toupper emit LOOP cr + IF + function-diff + ." #ifdef HAS_" bounds ?DO I c@ toupper emit LOOP cr THEN forth-flag @ - IF ." has? " type ." [IF]" cr THEN + IF forth-fdiff ." has? " type ." [IF]" cr THEN ELSE 2drop - c-flag @ IF ." #endif" cr THEN - forth-flag @ IF ." [THEN]" cr THEN + c-flag @ IF + function-diff ." #endif" cr THEN + forth-flag @ IF forth-fdiff ." [THEN]" cr THEN THEN }} )) <- if-comment (( (( ` g || ` G )) {{ start }} nonl ** {{ end - forth-flag @ IF ." group " type cr THEN - c-flag @ IF ." GROUP(" type ." )" cr THEN }} + forth-flag @ IF forth-fdiff ." group " type cr THEN + c-flag @ IF function-diff + ." GROUP(" type ." , " function-number @ 0 .r ." )" cr THEN }} )) <- group-comment (( (( eval-comment || forth-comment || c-comment || else-comment || if-comment || group-comment )) ?? nonl ** )) <- comment-body @@ -1232,7 +1736,10 @@ Variable c-flag (( {{ start }} c-ident {{ end prim prim-c-name 2! }} )) ?? )) ?? nleof (( ` " ` " {{ start }} (( noquote ++ ` " )) ++ {{ end 1- prim prim-doc 2! }} ` " white ** nleof )) ?? - {{ skipsynclines off line @ c-line ! filename 2@ c-filename 2! start }} (( nocolonnl nonl ** nleof white ** )) ** {{ end prim prim-c-code 2! skipsynclines on }} + {{ skipsynclines off line @ c-line ! filename 2@ c-filename 2! start }} + (( (( ` { nonl ** nleof (( (( nobracenl {{ line @ drop }} nonl ** )) ?? nleof )) ** ` } white ** nleof white ** )) + || (( nocolonnl nonl ** nleof white ** )) ** )) + {{ end prim prim-c-code 2! skipsynclines on }} (( ` : white ** nleof {{ start }} (( nonl ++ nleof white ** )) ++ {{ end prim prim-forth-code 2! }} )) ?? {{ process-simple }} @@ -1247,9 +1754,11 @@ Variable c-flag (( {{ make-prim to prim 0 to combined line @ name-line ! filename 2@ name-filename 2! function-number @ prim prim-num ! - start }} forth-ident {{ end 2dup prim prim-name 2! prim prim-c-name 2! }} white ++ + start }} [ifdef] vmgen c-ident [else] forth-ident [then] {{ end + 2dup prim prim-name 2! prim prim-c-name 2! }} white ** (( ` / white ** {{ start }} c-ident {{ end prim prim-c-name 2! }} white ** )) ?? - (( simple-primitive || combined-primitive )) {{ 1 function-number +! }} + (( simple-primitive || combined-primitive )) + {{ 1 function-number +! }} )) <- primitive ( -- ) (( (( comment || primitive || nl white ** )) ** eof )) @@ -1274,13 +1783,24 @@ warnings @ [IF] \ process the string at addr u over dup rawinput ! dup line-start ! cookedinput ! + endrawinput ! - checksyncline + checksynclines primitives2something ; +: unixify ( c-addr u1 -- c-addr u2 ) + \ delete crs from the string + bounds tuck tuck ?do ( c-addr1 ) + i c@ dup #cr <> if + over c! char+ + else + drop + endif + loop + over - ; + : process-file ( addr u xt-simple x-combined -- ) output-combined ! output ! save-mem 2dup filename 2! - slurp-file + slurp-file unixify warnings @ if ." ------------ CUT HERE -------------" cr endif primfilter ;