--- gforth/prof-inline.fs 2004/09/03 14:04:47 1.1 +++ gforth/prof-inline.fs 2004/09/13 07:32:37 1.7 @@ -45,41 +45,104 @@ true constant count-calls? \ do some pro \ how many static calls are there to a word? How many of the dynamic \ calls call just a single word? +\ how much does inlining called-once words help? +\ how much does inlining words without control flow help? +\ how much does partial inlining help? +\ what's the overlap? +\ optimizing return-to-returns (tail calls), return-to-calls, call-to-calls + struct - cell% field profile-next - cell% 2* field profile-count + cell% field list-next +end-struct list% + +list% + cell% 2* field profile-count \ how often this profile point is performed cell% 2* field profile-sourcepos - cell% field profile-char \ character position in line - count-calls? [if] - cell% field profile-colondef? \ is this a colon definition start - cell% field profile-calls \ static calls to the colon def - cell% field profile-straight-line \ may contain calls, but no other CF - cell% field profile-calls-from \ static calls in the colon def - [endif] -end-struct profile% \ profile point + cell% field profile-char \ character position in line + cell% field profile-bblen \ number of primitives in BB + cell% field profile-bblenpi \ bblen after partial inlining + cell% field profile-callee-postlude \ 0 or (for calls) callee postlude len + cell% field profile-tailof \ 0 or (for tail bbs) pointer to coldef bb + cell% field profile-colondef? \ is this a colon definition start + cell% field profile-calls \ static calls to the colon def (calls%) + cell% field profile-straight-line \ may contain calls, but no other CF + cell% field profile-calls-from \ static calls in the colon def + cell% field profile-exits \ number of exits in this colon def + cell% 2* field profile-execs \ number of EXECUTEs etc. of this colon def + cell% field profile-prelude \ first BB-len of colon def (incl. callee) + cell% field profile-postlude \ last BB-len of colon def (incl. callee) +end-struct profile% \ profile point + +list% + cell% field calls-call \ ptr to profile point of bb containing the call +end-struct calls% variable profile-points \ linked list of profile% 0 profile-points ! variable next-profile-point-p \ the address where the next pp will be stored profile-points next-profile-point-p ! -count-calls? [if] - variable last-colondef-profile \ pointer to the pp of last colon definition -[endif] - +variable last-colondef-profile \ pointer to the pp of last colon definition +variable current-profile-point +variable library-calls 0 library-calls ! \ list of calls to library colon defs +variable in-compile,? in-compile,? off +variable all-bbs 0 all-bbs ! \ list of all basic blocks + +\ list stuff + +: map-list ( ... list xt -- ... ) + { xt } begin { list } + list while + list xt execute + list list-next @ + repeat ; + +: drop-1+ drop 1+ ; + +: list-length ( list -- u ) + 0 swap ['] drop-1+ map-list ; + +: insert-list ( listp listpp -- ) + \ insert list node listp into list pointed to by listpp in front + tuck @ over list-next ! + swap ! ; + +: insert-list-end ( listp listppp -- ) + \ insert list node listp into list, with listppp indicating the + \ position to insert at, and indicating the position behind the + \ new element afterwards. + 2dup @ insert-list + swap list-next swap ! ; + +\ calls + +: new-call ( profile-point -- call ) + calls% %alloc tuck calls-call ! ; + +\ profile-point stuff + : new-profile-point ( -- addr ) profile% %alloc >r 0. r@ profile-count 2! current-sourcepos r@ profile-sourcepos 2! >in @ r@ profile-char ! - [ count-calls? ] [if] - r@ profile-colondef? off - 0 r@ profile-calls ! - r@ profile-straight-line on - 0 r@ profile-calls-from ! - [endif] - 0 r@ profile-next ! - r@ next-profile-point-p @ ! - r@ profile-next next-profile-point-p ! + 0 r@ profile-callee-postlude ! + 0 r@ profile-tailof ! + r@ profile-colondef? off + 0 r@ profile-bblen ! + -100000000 r@ profile-bblenpi ! + current-profile-point @ profile-bblenpi @ -100000000 = if + current-profile-point @ dup profile-bblen @ swap profile-bblenpi ! + endif + 0 r@ profile-calls ! + r@ profile-straight-line on + 0 r@ profile-calls-from ! + 0 r@ profile-exits ! + 0. r@ profile-execs 2! + 0 r@ profile-prelude ! + 0 r@ profile-postlude ! + r@ next-profile-point-p insert-list-end + r@ current-profile-point ! + r@ new-call all-bbs insert-list r> ; : print-profile ( -- ) @@ -89,7 +152,7 @@ count-calls? [if] r@ profile-sourcepos 2@ .sourcepos ." :" r@ profile-char @ 0 .r ." : " r@ profile-count 2@ 0 d.r cr - r> profile-next @ + r> list-next @ repeat drop ; @@ -102,19 +165,92 @@ count-calls? [if] r@ profile-char @ 3 .r ." : " r@ profile-count 2@ 10 d.r r@ profile-straight-line @ space 2 .r - r@ profile-calls @ 4 .r + r@ profile-calls @ list-length 4 .r cr endif - r> profile-next @ + r> list-next @ repeat drop ; +: 1= ( u -- f ) + 1 = ; + +: 2= ( u -- f ) + 2 = ; + +: 3= ( u -- f ) + 3 = ; + +: 1u> ( u -- f ) + 1 u> ; + +: call-count+ ( ud1 callp -- ud2 ) + calls-call @ profile-count 2@ d+ ; + +: count-dyncalls ( calls -- ud ) + 0. rot ['] call-count+ map-list ; + +: add-calls ( statistics1 xt-test profpp -- statistics2 xt-test ) + \ add statistics for callee profpp up, if the number of static + \ calls to profpp satisfies xt-test ( u -- f ); see below for what + \ statistics are computed. + { xt-test p } + p profile-colondef? @ if + p profile-calls @ { calls } + calls list-length { stat } + stat xt-test execute if + { d: ud-dyn-callee d: ud-dyn-caller u-stat u-exec-callees u-callees } + ud-dyn-callee p profile-count 2@ 2dup { d: de } d+ + ud-dyn-caller calls count-dyncalls 2dup { d: dr } d+ + u-stat stat + + u-exec-callees de dr d<> - + u-callees 1+ + endif + endif + xt-test ; + +: print-stat-line ( xt -- ) + >r 0. 0. 0 0 0 r> profile-points @ ['] add-calls map-list drop + ( ud-dyn-callee ud-dyn-caller u-stat ) + 6 u.r 7 u.r 7 u.r 12 ud.r 12 ud.r space ; + +: print-library-stats ( -- ) + library-calls @ list-length 20 u.r \ static callers + library-calls @ count-dyncalls 12 ud.r \ dynamic callers + 13 spaces ; + +: bblen+ ( u1 callp -- u2 ) + calls-call @ profile-bblen @ + ; + +: dyn-bblen+ ( ud1 callp -- ud2 ) + calls-call @ dup profile-count 2@ rot profile-bblen @ 1 m*/ d+ ; + +: print-bb-statistics ( -- ) + ." static dynamic" cr + all-bbs @ list-length 6 u.r all-bbs @ count-dyncalls 12 ud.r ." basic blocks" cr + 0 all-bbs @ ['] bblen+ map-list 6 u.r + 0. all-bbs @ ['] dyn-bblen+ map-list 12 ud.r ." primitives" cr + ; + +: print-statistics ( -- ) + ." callee exec'd static dyn-caller dyn-callee condition" cr + ['] 0= print-stat-line ." calls to coldefs with 0 callers" cr + ['] 1= print-stat-line ." calls to coldefs with 1 callers" cr + ['] 2= print-stat-line ." calls to coldefs with 2 callers" cr + ['] 3= print-stat-line ." calls to coldefs with 3 callers" cr + ['] 1u> print-stat-line ." calls to coldefs with >1 callers" cr + print-library-stats ." library calls" cr + print-bb-statistics + ; + : dinc ( profilep -- ) \ increment double pointed to by d-addr profile-count dup 2@ 1. d+ rot 2! ; : profile-this ( -- ) - new-profile-point POSTPONE literal POSTPONE dinc ; + in-compile,? @ in-compile,? on + new-profile-point POSTPONE literal POSTPONE dinc + in-compile,? ! ; \ Various words trigger PROFILE-THIS. In order to avoid getting \ several calls to PROFILE-THIS from a compiling word (like ?EXIT), we @@ -172,44 +308,85 @@ Defer before-word-profile ( -- ) r> ! ; \ change hook behaviour : note-execute ( -- ) - \ end of BB due to execute + \ end of BB due to execute, dodefer, perform + profile-this \ should actually happen after the word, but the + \ error is probably small ; : note-call ( addr -- ) \ addr is the body address of a called colon def or does handler - dup 3 cells + @ ['] dinc >body = if - 1 over cell+ @ profile-calls +! + dup ['] (does>2) >body = if \ adjust does handler address + 4 cells here 1 cells - +! endif - drop ; - + { addr } + current-profile-point @ { lastbb } + profile-this + current-profile-point @ { thisbb } + thisbb new-call { call-node } + over 3 cells + @ ['] dinc >body = if + \ non-library call + !! update profile-bblenpi of last and current pp + addr cell+ @ { callee-pp } + callee-pp profile-postlude @ thisbb profile-callee-postlude ! + call-node callee-pp profile-calls insert-list + else ( addr call-prof-point ) + call-node library-calls insert-list + endif ; + : prof-compile, ( xt -- ) - dup >does-code if - dup >does-code note-call - then - dup >code-address CASE - docol: OF dup >body note-call ENDOF - dodefer: OF note-execute ENDOF - dofield: OF >body @ ['] lit+ peephole-compile, , EXIT ENDOF - \ dofield: OF >body @ POSTPONE literal ['] + peephole-compile, EXIT ENDOF - \ code words and ;code-defined words (code words could be optimized): - dup in-dictionary? IF drop POSTPONE literal ['] execute peephole-compile, EXIT THEN + in-compile,? @ if + DEFERS compile, EXIT + endif + 1 current-profile-point @ profile-bblen +! + dup CASE + ['] execute of note-execute endof + ['] perform of note-execute endof + dup >does-code if + dup >does-code note-call + then + dup >code-address CASE + docol: OF dup >body note-call ENDOF + dodefer: OF note-execute ENDOF + \ dofield: OF >body @ POSTPONE literal ['] + peephole-compile, EXIT ENDOF + \ code words and ;code-defined words (code words could be optimized): + ENDCASE ENDCASE DEFERS compile, ; -\ hook-profiling-into then-like -\ \ hook-profiling-into if-like \ subsumed by other-control-flow -\ \ hook-profiling-into ahead-like \ subsumed by other-control-flow -\ hook-profiling-into other-control-flow -\ hook-profiling-into begin-like -\ hook-profiling-into again-like -\ hook-profiling-into until-like - : :-hook-profile ( -- ) defers :-hook next-profile-point-p @ profile-this - @ dup last-colondef-profile ! + @ dup last-colondef-profile ! ( current-profile-point ) + 1 over profile-bblenpi ! profile-colondef? on ; +: exit-hook-profile ( -- ) + defers exit-hook + 1 last-colondef-profile @ profile-exits +! ; + +: ;-hook-profile ( -- ) + \ ;-hook is called before the POSTPONE EXIT + defers ;-hook + last-colondef-profile @ { col } + current-profile-point @ { bb } + col profile-bblen @ col profile-prelude +! + col profile-exits @ 0= if + col bb profile-tailof ! + bb profile-bblen @ bb profile-callee-postlude @ + + col profile-postlude ! + 1 bb profile-bblenpi ! + \ not counting the EXIT + endif ; + +hook-profiling-into then-like +\ hook-profiling-into if-like \ subsumed by other-control-flow +\ hook-profiling-into ahead-like \ subsumed by other-control-flow +hook-profiling-into other-control-flow +hook-profiling-into begin-like +hook-profiling-into again-like +hook-profiling-into until-like ' :-hook-profile IS :-hook -' prof-compile, IS compile, \ No newline at end of file +' prof-compile, IS compile, +' exit-hook-profile IS exit-hook +' ;-hook-profile IS ;-hook