File:  [gforth] / gforth / engine / threaded.h
Revision 1.42: download - view: text, annotated - select for diffs
Fri Dec 31 18:09:02 2010 UTC (9 years, 9 months ago) by anton
Branches: MAIN
CVS tags: HEAD
updated copyright years

    1: /* This file defines a number of threading schemes.
    2: 
    3:   Copyright (C) 1995, 1996,1997,1999,2003,2004,2005,2007,2008,2010 Free Software Foundation, Inc.
    4: 
    5:   This file is part of Gforth.
    6: 
    7:   Gforth is free software; you can redistribute it and/or
    8:   modify it under the terms of the GNU General Public License
    9:   as published by the Free Software Foundation, either version 3
   10:   of the License, or (at your option) any later version.
   11: 
   12:   This program is distributed in the hope that it will be useful,
   13:   but WITHOUT ANY WARRANTY; without even the implied warranty of
   14:   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   15:   GNU General Public License for more details.
   16: 
   17:   You should have received a copy of the GNU General Public License
   18:   along with this program; if not, see http://www.gnu.org/licenses/.
   19: 
   20: 
   21:   This files defines macros for threading. Many sets of macros are
   22:   defined. Functionally they have only one difference: Some implement
   23:   direct threading, some indirect threading. The other differences are
   24:   just variations to help GCC generate faster code for various
   25:   machines.
   26: 
   27:   (Well, to tell the truth, there actually is another functional
   28:   difference in some pathological cases: e.g., a '!' stores into the
   29:   cell where the next executed word comes from; or, the next word
   30:   executed comes from the top-of-stack. These differences are one of
   31:   the reasons why GCC cannot produce the right variation by itself. We
   32:   chose disallowing such practices and using the added implementation
   33:   freedom to achieve a significant speedup, because these practices
   34:   are not common in Forth (I have never heard of or seen anyone using
   35:   them), and it is easy to circumvent problems: A control flow change
   36:   will flush any prefetched words; you may want to do a "0
   37:   drop" before that to write back the top-of-stack cache.)
   38: 
   39:   These macro sets are used in the following ways: After translation
   40:   to C a typical primitive looks like
   41: 
   42:   ...
   43:   {
   44:   DEF_CA
   45:   other declarations
   46:   NEXT_P0;
   47:   main part of the primitive
   48:   NEXT_P1;
   49:   store results to stack
   50:   NEXT_P2;
   51:   }
   52: 
   53:   DEF_CA and all the NEXT_P* together must implement NEXT; In the main
   54:   part the instruction pointer can be read with IP, changed with
   55:   INC_IP(const_inc), and the cell right behind the presently executing
   56:   word (i.e. the value of *IP) is accessed with NEXT_INST.
   57: 
   58:   If a primitive does not fall through the main part, it has to do the
   59:   rest by itself. If it changes ip, it has to redo NEXT_P0 (perhaps we
   60:   should define a macro SET_IP).
   61: 
   62:   Some primitives (execute, dodefer) do not end with NEXT, but with
   63:   EXEC(.). If NEXT_P0 has been called earlier, it has to perform
   64:   "ip=IP;" to ensure that ip has the right value (NEXT_P0 may change
   65:   it).
   66: 
   67:   Finally, there is NEXT1_P1 and NEXT1_P2, which are parts of EXEC
   68:   (EXEC(XT) could be defined as "cfa=XT; NEXT1_P1; NEXT1_P2;" (is this
   69:   true?)) and are used for making docol faster.
   70: 
   71:   We can define the ways in which these macros are used with a regular
   72:   expression:
   73: 
   74:   For a primitive
   75: 
   76:   DEF_CA NEXT_P0 ( IP | INC_IP | NEXT_INST | ip=...; NEXT_P0 ) * ( NEXT_P1 NEXT_P2 | EXEC(...) )
   77: 
   78:   For a run-time routine, e.g., docol:
   79:   PFA1(cfa) ( NEXT_P0 NEXT | cfa=...; NEXT1_P1; NEXT1_P2 | EXEC(...) )
   80: 
   81:   This comment does not yet describe all the dependences that the
   82:   macros have to satisfy.
   83: 
   84:   To organize the former ifdef chaos, each path is separated
   85:   This gives a quite impressive number of paths, but you clearly
   86:   find things that go together.
   87: 
   88:   It should be possible to organize the whole thing in a way that
   89:   contains less redundancy and allows a simpler description.
   90: 
   91: */
   92: 
   93: #if !defined(GCC_PR15242_WORKAROUND)
   94: #if __GNUC__ == 3
   95: /* various gcc-3.x version have problems (including PR15242) that are
   96:    solved with this workaround */
   97: #define GCC_PR15242_WORKAROUND 1
   98: #else
   99: /* other gcc versions are better off without the workaround for
  100:    primitives that are not relocatable */
  101: #define GCC_PR15242_WORKAROUND 0
  102: #endif
  103: #endif
  104: 
  105: #if GCC_PR15242_WORKAROUND
  106: #define DO_GOTO goto before_goto
  107: #else
  108: #define DO_GOTO goto *real_ca
  109: #endif
  110: 
  111: #ifndef GOTO_ALIGN
  112: #define GOTO_ALIGN
  113: #endif
  114: 
  115: #define GOTO(target) do {(real_ca=(target));} while(0)
  116: #define NEXT_P2 do {NEXT_P1_5; DO_GOTO;} while(0)
  117: #define EXEC(XT) do { real_ca=EXEC1(XT); DO_GOTO;} while (0)
  118: #define VM_JUMP(target) do {GOTO(target);} while (0)
  119: #define NEXT do {DEF_CA NEXT_P1; NEXT_P2;} while(0)
  120: #define FIRST_NEXT_P2 NEXT_P1_5; GOTO_ALIGN; \
  121: before_goto: goto *real_ca; after_goto:
  122: #define FIRST_NEXT do {DEF_CA NEXT_P1; FIRST_NEXT_P2;} while(0)
  123: #define IPTOS NEXT_INST
  124: 
  125: 
  126: #ifdef DOUBLY_INDIRECT
  127: # ifndef DEBUG_DITC
  128: #  define DEBUG_DITC 0
  129: # endif
  130: /* define to 1 if you want to check consistency */
  131: #  define NEXT_P0	do {cfa1=cfa; cfa=*ip;} while(0)
  132: #  define CFA		cfa1
  133: #  define MORE_VARS     Xt cfa1;
  134: #  define IP		(ip)
  135: #  define SET_IP(p)	do {ip=(p); cfa=*ip;} while(0)
  136: #  define NEXT_INST	(cfa)
  137: #  define INC_IP(const_inc)	do {cfa=IP[const_inc]; ip+=(const_inc);} while(0)
  138: #  define DEF_CA	Label MAYBE_UNUSED ca;
  139: #  define NEXT_P1	do {\
  140:   if (DEBUG_DITC && (cfa<=vm_prims+DOER_MAX || cfa>=vm_prims+npriminfos)) \
  141:     fprintf(stderr,"NEXT encountered prim %p at ip=%p\n", cfa, ip); \
  142:   ip++;} while(0)
  143: #  define NEXT_P1_5	do {ca=**cfa; GOTO(ca);} while(0)
  144: #  define EXEC1(XT)	({DEF_CA cfa=(XT);\
  145:   if (DEBUG_DITC && (cfa>vm_prims+DOER_MAX && cfa<vm_prims+npriminfos)) \
  146:     fprintf(stderr,"EXEC encountered xt %p at ip=%p, vm_prims=%p, xts=%p\n", cfa, ip, vm_prims, xts); \
  147:  ca=**cfa; ca;})
  148: 
  149: #elif defined(NO_IP)
  150: 
  151: #define NEXT_P0
  152: #  define CFA		cfa
  153: #define SET_IP(target)	assert(0)
  154: #define INC_IP(n)	((void)0)
  155: #define DEF_CA
  156: #define NEXT_P1
  157: #define NEXT_P1_5		do {goto *next_code;} while(0)
  158: /* set next_code to the return address before performing EXEC */
  159: /* original: */
  160: /* #define EXEC1(XT)	do {cfa=(XT); goto **cfa;} while(0) */
  161: /* fake, to make syntax check work */
  162: #define EXEC1(XT)	({cfa=(XT); *cfa;})
  163: 
  164: #else  /* !defined(DOUBLY_INDIRECT) && !defined(NO_IP) */
  165: 
  166: #if defined(DIRECT_THREADED)
  167: 
  168: /* This lets the compiler know that cfa is dead before; we place it at
  169:    "goto *"s that perform direct threaded dispatch (i.e., not EXECUTE
  170:    etc.), and thus do not reach doers, which would use cfa; the only
  171:    way to a doer is through EXECUTE etc., which set the cfa
  172:    themselves.
  173: 
  174:    Some of these direct threaded schemes use "cfa" to hold the code
  175:    address in normal direct threaded code.  Of course we cannot use
  176:    KILLS there.
  177: 
  178:    KILLS works by having an empty asm instruction, and claiming to the
  179:    compiler that it writes to cfa.
  180: 
  181:    KILLS is optional.  You can write
  182: 
  183: #define KILLS
  184: 
  185:    and lose just a little performance.
  186: */
  187: #define KILLS asm("":"=X"(cfa));
  188: 
  189: /* #warning direct threading scheme 8: cfa dead, i386 hack */
  190: #  define NEXT_P0
  191: #  define CFA		cfa
  192: #  define IP		(ip)
  193: #  define SET_IP(p)	do {ip=(p); NEXT_P0;} while(0)
  194: #  define NEXT_INST	(*IP)
  195: #  define INC_IP(const_inc)	do { ip+=(const_inc);} while(0)
  196: #  define DEF_CA
  197: #  define NEXT_P1	(ip++)
  198: #  define NEXT_P1_5	do {KILLS GOTO(*(ip-1));} while(0)
  199: #  define EXEC1(XT)	({cfa=(XT); *cfa;})
  200: 
  201: /* direct threaded */
  202: #else
  203: /* indirect THREADED  */
  204: 
  205: /* #warning indirect threading scheme 8: low latency,cisc */
  206: #  define NEXT_P0
  207: #  define CFA		cfa
  208: #  define IP		(ip)
  209: #  define SET_IP(p)	do {ip=(p); NEXT_P0;} while(0)
  210: #  define NEXT_INST	(*ip)
  211: #  define INC_IP(const_inc)	do {ip+=(const_inc);} while(0)
  212: #  define DEF_CA
  213: #  define NEXT_P1
  214: #  define NEXT_P1_5	do {cfa=*ip++; GOTO(*cfa);} while(0)
  215: #  define EXEC1(XT)	({cfa=(XT); *cfa;})
  216: 
  217: /* indirect threaded */
  218: #endif
  219: 
  220: #endif /* !defined(DOUBLY_INDIRECT) && !defined(NO_IP) */
  221: 

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>