Annotation of gforth/engine/threaded.h, revision 1.40

1.1       anton       1: /* This file defines a number of threading schemes.
                      2: 
1.40    ! anton       3:   Copyright (C) 1995, 1996,1997,1999,2003,2004,2005,2007,2008 Free Software Foundation, Inc.
1.1       anton       4: 
                      5:   This file is part of Gforth.
                      6: 
                      7:   Gforth is free software; you can redistribute it and/or
                      8:   modify it under the terms of the GNU General Public License
1.38      anton       9:   as published by the Free Software Foundation, either version 3
1.1       anton      10:   of the License, or (at your option) any later version.
                     11: 
                     12:   This program is distributed in the hope that it will be useful,
                     13:   but WITHOUT ANY WARRANTY; without even the implied warranty of
                     14:   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
                     15:   GNU General Public License for more details.
                     16: 
                     17:   You should have received a copy of the GNU General Public License
1.38      anton      18:   along with this program; if not, see http://www.gnu.org/licenses/.
1.1       anton      19: 
                     20: 
                     21:   This files defines macros for threading. Many sets of macros are
                     22:   defined. Functionally they have only one difference: Some implement
                     23:   direct threading, some indirect threading. The other differences are
                     24:   just variations to help GCC generate faster code for various
                     25:   machines.
                     26: 
                     27:   (Well, to tell the truth, there actually is another functional
                     28:   difference in some pathological cases: e.g., a '!' stores into the
                     29:   cell where the next executed word comes from; or, the next word
                     30:   executed comes from the top-of-stack. These differences are one of
                     31:   the reasons why GCC cannot produce the right variation by itself. We
                     32:   chose disallowing such practices and using the added implementation
                     33:   freedom to achieve a significant speedup, because these practices
                     34:   are not common in Forth (I have never heard of or seen anyone using
                     35:   them), and it is easy to circumvent problems: A control flow change
                     36:   will flush any prefetched words; you may want to do a "0
                     37:   drop" before that to write back the top-of-stack cache.)
                     38: 
                     39:   These macro sets are used in the following ways: After translation
                     40:   to C a typical primitive looks like
                     41: 
                     42:   ...
                     43:   {
                     44:   DEF_CA
                     45:   other declarations
                     46:   NEXT_P0;
                     47:   main part of the primitive
                     48:   NEXT_P1;
                     49:   store results to stack
                     50:   NEXT_P2;
                     51:   }
                     52: 
                     53:   DEF_CA and all the NEXT_P* together must implement NEXT; In the main
                     54:   part the instruction pointer can be read with IP, changed with
                     55:   INC_IP(const_inc), and the cell right behind the presently executing
                     56:   word (i.e. the value of *IP) is accessed with NEXT_INST.
                     57: 
                     58:   If a primitive does not fall through the main part, it has to do the
                     59:   rest by itself. If it changes ip, it has to redo NEXT_P0 (perhaps we
                     60:   should define a macro SET_IP).
                     61: 
                     62:   Some primitives (execute, dodefer) do not end with NEXT, but with
                     63:   EXEC(.). If NEXT_P0 has been called earlier, it has to perform
                     64:   "ip=IP;" to ensure that ip has the right value (NEXT_P0 may change
                     65:   it).
                     66: 
                     67:   Finally, there is NEXT1_P1 and NEXT1_P2, which are parts of EXEC
                     68:   (EXEC(XT) could be defined as "cfa=XT; NEXT1_P1; NEXT1_P2;" (is this
                     69:   true?)) and are used for making docol faster.
                     70: 
                     71:   We can define the ways in which these macros are used with a regular
                     72:   expression:
                     73: 
                     74:   For a primitive
                     75: 
                     76:   DEF_CA NEXT_P0 ( IP | INC_IP | NEXT_INST | ip=...; NEXT_P0 ) * ( NEXT_P1 NEXT_P2 | EXEC(...) )
                     77: 
                     78:   For a run-time routine, e.g., docol:
                     79:   PFA1(cfa) ( NEXT_P0 NEXT | cfa=...; NEXT1_P1; NEXT1_P2 | EXEC(...) )
                     80: 
                     81:   This comment does not yet describe all the dependences that the
                     82:   macros have to satisfy.
                     83: 
                     84:   To organize the former ifdef chaos, each path is separated
                     85:   This gives a quite impressive number of paths, but you clearly
                     86:   find things that go together.
                     87: 
                     88:   It should be possible to organize the whole thing in a way that
                     89:   contains less redundancy and allows a simpler description.
                     90: 
                     91: */
                     92: 
1.36      anton      93: #if !defined(GCC_PR15242_WORKAROUND)
                     94: #if __GNUC__ == 3
                     95: /* various gcc-3.x version have problems (including PR15242) that are
                     96:    solved with this workaround */
                     97: #define GCC_PR15242_WORKAROUND 1
                     98: #else
                     99: /* other gcc versions are better off without the workaround for
                    100:    primitives that are not relocatable */
                    101: #define GCC_PR15242_WORKAROUND 0
                    102: #endif
                    103: #endif
                    104: 
                    105: #if GCC_PR15242_WORKAROUND
1.29      anton     106: #define DO_GOTO goto before_goto
                    107: #else
                    108: #define DO_GOTO goto *real_ca
                    109: #endif
1.36      anton     110: 
1.30      pazsan    111: #ifndef GOTO_ALIGN
                    112: #define GOTO_ALIGN
                    113: #endif
1.29      anton     114: 
1.28      anton     115: #define GOTO(target) do {(real_ca=(target));} while(0)
                    116: #define NEXT_P2 do {NEXT_P1_5; DO_GOTO;} while(0)
1.32      anton     117: #define EXEC(XT) do { real_ca=EXEC1(XT); DO_GOTO;} while (0)
1.33      anton     118: #define VM_JUMP(target) do {GOTO(target);} while (0)
1.28      anton     119: #define NEXT do {DEF_CA NEXT_P1; NEXT_P2;} while(0)
1.30      pazsan    120: #define FIRST_NEXT_P2 NEXT_P1_5; GOTO_ALIGN; \
                    121: before_goto: goto *real_ca; after_goto:
1.31      anton     122: #define FIRST_NEXT do {DEF_CA NEXT_P1; FIRST_NEXT_P2;} while(0)
1.28      anton     123: #define IPTOS NEXT_INST
                    124: 
                    125: 
1.1       anton     126: #ifdef DOUBLY_INDIRECT
1.19      pazsan    127: # ifndef DEBUG_DITC
                    128: #  define DEBUG_DITC 0
                    129: # endif
                    130: /* define to 1 if you want to check consistency */
1.26      pazsan    131: #  define NEXT_P0      do {cfa1=cfa; cfa=*ip;} while(0)
1.23      anton     132: #  define CFA          cfa1
                    133: #  define MORE_VARS     Xt cfa1;
1.1       anton     134: #  define IP           (ip)
1.26      pazsan    135: #  define SET_IP(p)    do {ip=(p); cfa=*ip;} while(0)
1.1       anton     136: #  define NEXT_INST    (cfa)
1.26      pazsan    137: #  define INC_IP(const_inc)    do {cfa=IP[const_inc]; ip+=(const_inc);} while(0)
1.39      anton     138: #  define DEF_CA       Label MAYBE_UNUSED ca;
1.26      pazsan    139: #  define NEXT_P1      do {\
1.19      pazsan    140:   if (DEBUG_DITC && (cfa<=vm_prims+DOESJUMP || cfa>=vm_prims+npriminfos)) \
                    141:     fprintf(stderr,"NEXT encountered prim %p at ip=%p\n", cfa, ip); \
1.26      pazsan    142:   ip++;} while(0)
1.28      anton     143: #  define NEXT_P1_5    do {ca=**cfa; GOTO(ca);} while(0)
1.32      anton     144: #  define EXEC1(XT)    ({DEF_CA cfa=(XT);\
1.19      pazsan    145:   if (DEBUG_DITC && (cfa>vm_prims+DOESJUMP && cfa<vm_prims+npriminfos)) \
1.14      anton     146:     fprintf(stderr,"EXEC encountered xt %p at ip=%p, vm_prims=%p, xts=%p\n", cfa, ip, vm_prims, xts); \
1.32      anton     147:  ca=**cfa; ca;})
1.1       anton     148: 
1.16      anton     149: #elif defined(NO_IP)
                    150: 
                    151: #define NEXT_P0
1.25      anton     152: #  define CFA          cfa
1.16      anton     153: #define SET_IP(target) assert(0)
                    154: #define INC_IP(n)      ((void)0)
                    155: #define DEF_CA
                    156: #define NEXT_P1
1.28      anton     157: #define NEXT_P1_5              do {goto *next_code;} while(0)
1.16      anton     158: /* set next_code to the return address before performing EXEC */
1.32      anton     159: /* original: */
                    160: /* #define EXEC1(XT)   do {cfa=(XT); goto **cfa;} while(0) */
                    161: /* fake, to make syntax check work */
                    162: #define EXEC1(XT)      ({cfa=(XT); *cfa;})
1.16      anton     163: 
                    164: #else  /* !defined(DOUBLY_INDIRECT) && !defined(NO_IP) */
1.1       anton     165: 
1.3       anton     166: #if defined(DIRECT_THREADED)
                    167: 
1.17      anton     168: /* This lets the compiler know that cfa is dead before; we place it at
                    169:    "goto *"s that perform direct threaded dispatch (i.e., not EXECUTE
                    170:    etc.), and thus do not reach doers, which would use cfa; the only
                    171:    way to a doer is through EXECUTE etc., which set the cfa
                    172:    themselves.
                    173: 
                    174:    Some of these direct threaded schemes use "cfa" to hold the code
                    175:    address in normal direct threaded code.  Of course we cannot use
                    176:    KILLS there.
                    177: 
                    178:    KILLS works by having an empty asm instruction, and claiming to the
                    179:    compiler that it writes to cfa.
                    180: 
                    181:    KILLS is optional.  You can write
                    182: 
                    183: #define KILLS
                    184: 
                    185:    and lose just a little performance.
                    186: */
                    187: #define KILLS asm("":"=X"(cfa));
                    188: 
1.39      anton     189: /* #warning direct threading scheme 8: cfa dead, i386 hack */
1.1       anton     190: #  define NEXT_P0
1.23      anton     191: #  define CFA          cfa
1.1       anton     192: #  define IP           (ip)
1.26      pazsan    193: #  define SET_IP(p)    do {ip=(p); NEXT_P0;} while(0)
1.1       anton     194: #  define NEXT_INST    (*IP)
1.26      pazsan    195: #  define INC_IP(const_inc)    do { ip+=(const_inc);} while(0)
1.1       anton     196: #  define DEF_CA
                    197: #  define NEXT_P1      (ip++)
1.28      anton     198: #  define NEXT_P1_5    do {KILLS GOTO(*(ip-1));} while(0)
1.32      anton     199: #  define EXEC1(XT)    ({cfa=(XT); *cfa;})
1.1       anton     200: 
1.3       anton     201: /* direct threaded */
                    202: #else
1.1       anton     203: /* indirect THREADED  */
1.20      anton     204: 
1.39      anton     205: /* #warning indirect threading scheme 8: low latency,cisc */
1.1       anton     206: #  define NEXT_P0
1.23      anton     207: #  define CFA          cfa
1.1       anton     208: #  define IP           (ip)
1.26      pazsan    209: #  define SET_IP(p)    do {ip=(p); NEXT_P0;} while(0)
1.1       anton     210: #  define NEXT_INST    (*ip)
1.26      pazsan    211: #  define INC_IP(const_inc)    do {ip+=(const_inc);} while(0)
1.1       anton     212: #  define DEF_CA
                    213: #  define NEXT_P1
1.28      anton     214: #  define NEXT_P1_5    do {cfa=*ip++; GOTO(*cfa);} while(0)
1.32      anton     215: #  define EXEC1(XT)    ({cfa=(XT); *cfa;})
1.1       anton     216: 
1.3       anton     217: /* indirect threaded */
1.1       anton     218: #endif
                    219: 
1.16      anton     220: #endif /* !defined(DOUBLY_INDIRECT) && !defined(NO_IP) */
1.1       anton     221: 

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>