--- gforth/prims2x.fs 2002/08/20 07:59:01 1.110 +++ gforth/prims2x.fs 2002/10/12 19:06:37 1.120 @@ -78,10 +78,22 @@ variable line-start \ pointer to start o 0 line ! 2variable filename \ filename of original input file 0 0 filename 2! +2variable out-filename \ filename of the output file (for sync lines) +0 0 out-filename 2! 2variable f-comment 0 0 f-comment 2! variable skipsynclines \ are sync lines ("#line ...") invisible to the parser? -skipsynclines on +skipsynclines on +variable out-nls \ newlines in output (for output sync lines) +0 out-nls ! +variable store-optimization \ use store optimization? +store-optimization off + +variable include-skipped-insts +\ does the threaded code for a combined instruction include the cells +\ for the component instructions (true) or only the cells for the +\ inline arguments (false) +include-skipped-insts off : th ( addr1 n -- addr2 ) cells + ; @@ -121,12 +133,24 @@ skipsynclines on 0 recover endtry r> to outfile-id throw - abort + 1 (bye) \ abort endif ; : quote ( -- ) [char] " emit ; +\ count output lines to generate sync lines for output + +: count-nls ( addr u -- ) + bounds u+do + i c@ nl-char = negate out-nls +! + loop ; + +:noname ( addr u -- ) + 2dup count-nls + defers type ; +is type + variable output \ xt ( -- ) of output word for simple primitives variable output-combined \ xt ( -- ) of output word for combined primitives @@ -173,6 +197,12 @@ create stacks max-stacks cells allot \ a ['] stack-in-index r> stack-in-index-xt ! ; : map-stacks { xt -- } + \ perform xt for all stacks + next-stack-number @ 0 +do + stacks i th @ xt execute + loop ; + +: map-stacks1 { xt -- } \ perform xt for all stacks except inst-stream next-stack-number @ 1 +do stacks i th @ xt execute @@ -227,9 +257,24 @@ end-struct prim% variable in-part \ true if processing a part in-part off +: prim-context ( ... p xt -- ... ) + \ execute xt with prim set to p + prim >r + swap to prim + catch + r> to prim + throw ; + 1000 constant max-combined create combined-prims max-combined cells allot variable num-combined +variable part-num \ current part number during process-combined + +: map-combined { xt -- } + \ perform xt for all components of the current combined instruction + num-combined @ 0 +do + combined-prims i th @ xt execute + loop ; table constant combinations \ the keys are the sequences of pointers to primitives @@ -238,6 +283,19 @@ create current-depth max-stacks cells al create max-depth max-stacks cells allot create min-depth max-stacks cells allot +create sp-update-in max-stacks cells allot +\ where max-depth occured the first time +create max-depths max-stacks max-combined 1+ * cells allot +\ maximum depth at start of each part: array[parts] of array[stack] +create max-back-depths max-stacks max-combined 1+ * cells allot +\ maximun depth from end of the combination to the start of the each part + +: s-c-max-depth ( nstack ncomponent -- addr ) + max-stacks * + cells max-depths + ; + +: s-c-max-back-depth ( nstack ncomponent -- addr ) + max-stacks * + cells max-back-depths + ; + wordlist constant primitives : create-prim ( prim -- ) @@ -281,11 +339,14 @@ Variable function-number 0 function-numb \ forward declaration for inst-stream (breaks cycle in definitions) defer inst-stream-f ( -- stack ) +: stack-depth { stack -- n } + current-depth stack stack-number @ th @ ; + : part-stack-access { n stack -- } \ print _, x=inst-stream? n : maxdepth-currentdepth-n-1 ." _" stack stack-pointer 2@ type stack stack-number @ { stack# } - current-depth stack# th @ n + { access-depth } + stack stack-depth n + { access-depth } stack inst-stream-f = if access-depth else @@ -295,10 +356,42 @@ defer inst-stream-f ( -- stack ) endif 0 .r ; -: stack-access ( n stack -- ) +: part-stack-read { n stack -- } + stack stack-depth n + ( ndepth ) + stack stack-number @ part-num @ s-c-max-depth @ +\ max-depth stack stack-number @ th @ ( ndepth nmaxdepth ) + over <= if ( ndepth ) \ load from memory + stack normal-stack-access + else + drop n stack part-stack-access + endif ; + +: stack-diff ( stack -- n ) + \ in-out + dup stack-in @ swap stack-out @ - ; + +: part-stack-write { n stack -- } + stack stack-depth n + + stack stack-number @ part-num @ s-c-max-back-depth @ + over <= if ( ndepth ) + stack combined ['] stack-diff prim-context - + stack normal-stack-access + else + drop n stack part-stack-access + endif ; + +: stack-read ( n stack -- ) \ print a stack access at index n of stack in-part @ if - part-stack-access + part-stack-read + else + normal-stack-access + endif ; + +: stack-write ( n stack -- ) + \ print a stack access at index n of stack + in-part @ if + part-stack-write else normal-stack-access endif ; @@ -316,7 +409,7 @@ defer inst-stream-f ( -- stack ) >r ." vm_" r@ item-stack-type-name type ." 2" r@ item-type @ print-type-prefix ." (" - r@ item-in-index r@ item-stack @ stack-access ." ," + r@ item-in-index r@ item-stack @ stack-read ." ," r@ item-name 2@ type ." );" cr rdrop ; @@ -327,8 +420,8 @@ defer inst-stream-f ( -- stack ) ." vm_two" r@ item-stack-type-name type ." 2" r@ item-type @ print-type-prefix ." (" - r@ item-in-index r@ item-stack @ 2dup ." (Cell)" stack-access - ." , " -1 under+ ." (Cell)" stack-access + r@ item-in-index r@ item-stack @ 2dup ." (Cell)" stack-read + ." , " -1 under+ ." (Cell)" stack-read ." , " r@ item-name 2@ type ." )" cr rdrop ; @@ -360,22 +453,20 @@ defer inst-stream-f ( -- stack ) r@ item-type @ print-type-prefix ." 2" r@ item-stack-type-name type ." (" r@ item-name 2@ type ." ," - r@ item-out-index r@ item-stack @ stack-access ." );" + r@ item-out-index r@ item-stack @ stack-write ." );" rdrop ; : store-single ( item -- ) - >r - r@ same-as-in? - if - r@ item-in-index 0= r@ item-out-index 0= xor - if - ." IF_" r@ item-stack @ stack-pointer 2@ type - ." TOS(" r@ really-store-single ." );" cr - endif - else - r@ really-store-single cr - endif - rdrop ; + >r + store-optimization @ in-part @ 0= and r@ same-as-in? and if + r@ item-in-index 0= r@ item-out-index 0= xor if + ." IF_" r@ item-stack @ stack-pointer 2@ type + ." TOS(" r@ really-store-single ." );" cr + endif + else + r@ really-store-single cr + endif + rdrop ; : store-double ( item -- ) \ !! store optimization is not performed, because it is not yet needed @@ -384,8 +475,8 @@ defer inst-stream-f ( -- stack ) r@ item-type @ print-type-prefix ." 2two" r@ item-stack-type-name type ." (" r@ item-name 2@ type ." , " - r@ item-out-index r@ item-stack @ 2dup stack-access - ." , " -1 under+ stack-access + r@ item-out-index r@ item-stack @ 2dup stack-write + ." , " -1 under+ stack-write ." )" cr rdrop ; @@ -524,12 +615,11 @@ stack inst-stream IP Cell : compute-offset-out ( addr1 addr2 -- ) ['] stack-out compute-offset ; -: clear-stack { -- } +: clear-stack ( stack -- ) dup stack-in off stack-out off ; : compute-offsets ( -- ) ['] clear-stack map-stacks - inst-stream clear-stack prim prim-effect-in prim prim-effect-in-end @ ['] compute-offset-in map-items prim prim-effect-out prim prim-effect-out-end @ ['] compute-offset-out map-items inst-stream stack-out @ 0= s" # can only be on the input side" ?print-error ; @@ -548,7 +638,7 @@ stack inst-stream IP Cell endif ; : flush-tos ( -- ) - ['] flush-a-tos map-stacks ; + ['] flush-a-tos map-stacks1 ; : fill-a-tos { stack -- } stack stack-out @ 0= stack stack-in @ 0<> and @@ -559,7 +649,7 @@ stack inst-stream IP Cell : fill-tos ( -- ) \ !! inst-stream for prefetching? - ['] fill-a-tos map-stacks ; + ['] fill-a-tos map-stacks1 ; : fetch ( addr -- ) dup item-type @ type-fetch @ execute ; @@ -569,18 +659,16 @@ stack inst-stream IP Cell : stack-pointer-update { stack -- } \ stack grow downwards - stack stack-in @ stack stack-out @ - + stack stack-diff ?dup-if \ this check is not necessary, gcc would do this for us - stack stack-pointer 2@ type ." += " 0 .r ." ;" cr - endif ; - -: inst-pointer-update ( -- ) - inst-stream stack-in @ ?dup-if - ." INC_IP(" 0 .r ." );" cr + stack inst-stream = if + ." INC_IP(" 0 .r ." );" cr + else + stack stack-pointer 2@ type ." += " 0 .r ." ;" cr + endif endif ; : stack-pointer-updates ( -- ) - inst-pointer-update ['] stack-pointer-update map-stacks ; : store ( item -- ) @@ -625,28 +713,44 @@ stack inst-stream IP Cell endif 2drop ; -: output-c-tail1 ( -- ) - \ the final part of the generated C code except LABEL2 and NEXT_P2 +: output-label2 ( -- ) + ." LABEL2(" prim prim-c-name 2@ type ." )" cr ; + +: output-c-tail1 { xt -- } + \ the final part of the generated C code, with xt printing LABEL2 or not. output-super-end print-debug-results ." NEXT_P1;" cr stores - fill-tos ; + fill-tos + xt execute + ." NEXT_P2;" cr ; + +: output-c-tail1-no-stores { xt -- } + \ the final part of the generated C code for combinations + output-super-end + ." NEXT_P1;" cr + fill-tos + xt execute + ." NEXT_P2;" cr ; : output-c-tail ( -- ) - \ the final part of the generated C code, without LABEL2 - output-c-tail1 - ." NEXT_P2;" ; + ['] noop output-c-tail1 ; : output-c-tail2 ( -- ) - \ the final part of the generated C code, including LABEL2 - output-c-tail1 - ." LABEL2(" prim prim-c-name 2@ type ." )" cr - ." NEXT_P2;" cr ; + ['] output-label2 output-c-tail1 ; + +: output-c-tail-no-stores ( -- ) + ['] noop output-c-tail1-no-stores ; + +: output-c-tail2-no-stores ( -- ) + ['] output-label2 output-c-tail1-no-stores ; : type-c-code ( c-addr u xt -- ) \ like TYPE, but replaces "INST_TAIL;" with tail code produced by xt { xt } + ." {" cr + ." #line " c-line @ . quote c-filename 2@ type quote cr begin ( c-addr1 u1 ) 2dup s" INST_TAIL;" search while ( c-addr1 u1 c-addr3 u3 ) @@ -655,30 +759,29 @@ stack inst-stream IP Cell 2r> 10 /string \ !! resync #line missing repeat - 2drop type ; + 2drop type + ." #line " out-nls @ 2 + . quote out-filename 2@ type quote cr + ." }" cr ; : print-entry ( -- ) ." LABEL(" prim prim-c-name 2@ type ." )" ; : output-c ( -- ) - print-entry ." /* " prim prim-name 2@ type ." ( " prim prim-stack-string 2@ type ." ) */" cr - ." /* " prim prim-doc 2@ type ." */" cr - ." NAME(" quote prim prim-name 2@ type quote ." )" cr \ debugging - ." {" cr - ." DEF_CA" cr - print-declarations - ." NEXT_P0;" cr - flush-tos - fetches - print-debug-args - stack-pointer-updates - ." {" cr - ." #line " c-line @ . quote c-filename 2@ type quote cr - prim prim-c-code 2@ ['] output-c-tail type-c-code - ." }" cr - output-c-tail2 - ." }" cr - cr + print-entry ." /* " prim prim-name 2@ type ." ( " prim prim-stack-string 2@ type ." ) */" cr + ." /* " prim prim-doc 2@ type ." */" cr + ." NAME(" quote prim prim-name 2@ type quote ." )" cr \ debugging + ." {" cr + ." DEF_CA" cr + print-declarations + ." NEXT_P0;" cr + flush-tos + fetches + print-debug-args + stack-pointer-updates + prim prim-c-code 2@ ['] output-c-tail type-c-code + output-c-tail2 + ." }" cr + cr ; : disasm-arg { item -- } @@ -715,14 +818,15 @@ stack inst-stream IP Cell endif ." }" cr ; +: output-profile-part ( p ) + ." add_inst(b, " quote + prim-name 2@ type + quote ." );" cr ; + : output-profile-combined ( -- ) \ generate code for postprocessing the VM block profile stuff ." if (VM_IS_INST(*ip, " function-number @ 0 .r ." )) {" cr - num-combined @ 0 +do - ." add_inst(b, " quote - combined-prims i th @ prim-name 2@ type - quote ." );" cr - loop + ['] output-profile-part map-combined ." ip += " inst-stream stack-in @ 1+ 0 .r ." ;" cr combined-prims num-combined @ 1- th @ prim-c-code 2@ s" SET_IP" search nip nip combined-prims num-combined @ 1- th @ prim-c-code 2@ s" SUPER_END" search nip nip or if @@ -808,6 +912,9 @@ stack inst-stream IP Cell : output-alias ( -- ) ( primitive-number @ . ." alias " ) ." Primitive " prim prim-name 2@ type cr ; +: output-prim-num ( -- ) + prim prim-num @ 8 + 4 .r space prim prim-name 2@ type cr ; + : output-forth ( -- ) prim prim-forth-code @ 0= IF \ output-alias @@ -919,6 +1026,7 @@ stack inst-stream IP Cell prim to combined 0 num-combined ! current-depth max-stacks cells erase + include-skipped-insts @ current-depth 0 th ! max-depth max-stacks cells erase min-depth max-stacks cells erase prim prim-effect-in prim prim-effect-in-end ! @@ -930,24 +1038,33 @@ stack inst-stream IP Cell : min! ( n addr -- ) tuck @ min swap ! ; +: inst-stream-adjustment ( nstack -- n ) + \ number of stack items to add for each part + 0= include-skipped-insts @ and negate ; + : add-depths { p -- } \ combine stack effect of p with *-depths max-stacks 0 ?do current-depth i th @ - p prim-stacks-in i th @ + + p prim-stacks-in i th @ + i inst-stream-adjustment + dup max-depth i th max! p prim-stacks-out i th @ - dup min-depth i th min! current-depth i th ! loop ; +: copy-maxdepths ( n -- ) + max-depth max-depths rot max-stacks * th max-stacks cells move ; + : add-prim ( addr u -- ) \ add primitive given by "addr u" to combined-prims primitives search-wordlist s" unknown primitive" ?print-error execute { p } p combined-prims num-combined @ th ! + num-combined @ copy-maxdepths 1 num-combined +! - p add-depths ; + p add-depths + num-combined @ copy-maxdepths ; : compute-effects { q -- } \ compute the stack effects of q from the depths @@ -980,6 +1097,27 @@ stack inst-stream IP Cell i q prim-stacks-out i th @ q prim-effect-out-end make-effect-items loop ; +: compute-stack-max-back-depths ( stack -- ) + stack-number @ { stack# } + current-depth stack# th @ dup + dup stack# num-combined @ s-c-max-back-depth ! + -1 num-combined @ 1- -do ( max-depth current-depth ) + combined-prims i th @ { p } + p prim-stacks-out stack# th @ + + dup >r max r> + over stack# i s-c-max-back-depth ! + p prim-stacks-in stack# th @ - + stack# inst-stream-adjustment - + 1 -loop + assert( dup stack# inst-stream-adjustment negate = ) + assert( over max-depth stack# th @ = ) + 2drop ; + +: compute-max-back-depths ( -- ) + \ compute max-back-depths. + \ assumes that current-depths is correct for the end of the combination + ['] compute-stack-max-back-depths map-stacks ; + : process-combined ( -- ) combined combined-prims num-combined @ cells combinations ['] constant insert-wordlist @@ -987,6 +1125,7 @@ stack inst-stream IP Cell @ prim-c-code 2@ prim prim-c-code 2! \ used by output-super-end prim compute-effects prim init-effects + compute-max-back-depths output-combined perform ; \ C output @@ -1012,10 +1151,20 @@ stack inst-stream IP Cell : output-combined-tail ( -- ) part-output-c-tail - prim >r combined to prim in-part @ >r in-part off - output-c-tail - r> in-part ! r> to prim ; + combined ['] output-c-tail-no-stores prim-context + r> in-part ! ; + +: part-stack-pointer-updates ( -- ) + max-stacks 0 +do + i part-num @ 1+ s-c-max-depth @ dup + i num-combined @ s-c-max-depth @ = \ final depth + swap i part-num @ s-c-max-depth @ <> \ just reached now + part-num @ 0= \ first part + or and if + stacks i th @ stack-pointer-update + endif + loop ; : output-part ( p -- ) to prim @@ -1025,20 +1174,18 @@ stack inst-stream IP Cell print-declarations part-fetches print-debug-args + combined ['] part-stack-pointer-updates prim-context + 1 part-num +! prim add-depths \ !! right place? - ." {" cr - ." #line " c-line @ . quote c-filename 2@ type quote cr prim prim-c-code 2@ ['] output-combined-tail type-c-code - ." }" cr part-output-c-tail ." }" cr ; : output-parts ( -- ) prim >r in-part on current-depth max-stacks cells erase - num-combined @ 0 +do - combined-prims i th @ output-part - loop + 0 part-num ! + ['] output-part map-combined in-part off r> to prim ; @@ -1050,11 +1197,11 @@ stack inst-stream IP Cell print-declarations-combined ." NEXT_P0;" cr flush-tos - fetches + \ fetches \ now in parts \ print-debug-args - stack-pointer-updates + \ stack-pointer-updates now in parts output-parts - output-c-tail2 + output-c-tail2-no-stores ." }" cr cr ; @@ -1064,13 +1211,16 @@ stack inst-stream IP Cell \ peephole optimization rules +\ data for a simple peephole optimizer that always tries to combine +\ the currently compiled instruction with the last one. + \ in order for this to work as intended, shorter combinations for each \ length must be present, and the longer combinations must follow \ shorter ones (this restriction may go away in the future). : output-peephole ( -- ) combined-prims num-combined @ 1- cells combinations search-wordlist - s" the prefix for this combination must be defined earlier" ?print-error + s" the prefix for this superinstruction must be defined earlier" ?print-error ." {" execute prim-num @ 5 .r ." ," combined-prims num-combined @ 1- th @ prim-num @ 5 .r ." ," @@ -1078,15 +1228,53 @@ stack inst-stream IP Cell combined prim-c-name 2@ type ." */" cr ; -: output-forth-peephole ( -- ) - combined-prims num-combined @ 1- cells combinations search-wordlist - s" the prefix for this combination must be defined earlier" ?print-error - execute prim-num @ 5 .r - combined-prims num-combined @ 1- th @ prim-num @ 5 .r - combined prim-num @ 5 .r ." prim, \ " - combined prim-c-name 2@ type - cr ; +\ cost and superinstruction data for a sophisticated combiner (e.g., +\ shortest path) + +\ This is intended as initializer for a structure like this + +\ struct cost { +\ int loads; /* number of stack loads */ +\ int stores; /* number of stack stores */ +\ int updates; /* number of stack pointer updates */ +\ int length; /* number of components */ +\ int *components; /* array of vm_prim indexes of components */ +\ }; + +\ How do you know which primitive or combined instruction this +\ structure refers to? By the order of cost structures, as in most +\ other cases. + +: compute-costs { p -- nloads nstores nupdates } + \ compute the number of loads, stores, and stack pointer updates + \ of a primitive or combined instruction; does not take TOS + \ caching into account, nor that IP updates are combined with + \ other stuff + 0 max-stacks 0 +do + p prim-stacks-in i th @ + + loop + 0 max-stacks 0 +do + p prim-stacks-out i th @ + + loop + 0 max-stacks 0 +do + p prim-stacks-in i th @ p prim-stacks-out i th @ <> - + loop ; + +: output-num-part ( p -- ) + prim-num @ 4 .r ." ," ; + +: output-costs ( -- ) + ." {" prim compute-costs + rot 2 .r ." ," swap 2 .r ." ," 2 .r ." ," + combined if + num-combined @ 2 .r + ." , ((int []){" ['] output-num-part map-combined ." })}, /* " + else + ." 1, ((int []){" prim prim-num @ 4 .r ." })}, /* " + endif + prim prim-name 2@ type ." */" + cr ; \ the parser @@ -1116,7 +1304,7 @@ print-token ! \ when input points to a newline, check if the next line is a \ sync line. If it is, perform the appropriate actions. rawinput @ >r - s" #line " r@ over compare 0<> if + s" #line " r@ over compare if rdrop 1 line +! EXIT endif 0. r> 6 chars + 20 >number drop >r drop line ! r> ( c-addr )