Annotation of gforth/engine/threaded.h, revision 1.17

1.1       anton       1: /* This file defines a number of threading schemes.
                      2: 
1.6       anton       3:   Copyright (C) 1995, 1996,1997,1999 Free Software Foundation, Inc.
1.1       anton       4: 
                      5:   This file is part of Gforth.
                      6: 
                      7:   Gforth is free software; you can redistribute it and/or
                      8:   modify it under the terms of the GNU General Public License
                      9:   as published by the Free Software Foundation; either version 2
                     10:   of the License, or (at your option) any later version.
                     11: 
                     12:   This program is distributed in the hope that it will be useful,
                     13:   but WITHOUT ANY WARRANTY; without even the implied warranty of
                     14:   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
                     15:   GNU General Public License for more details.
                     16: 
                     17:   You should have received a copy of the GNU General Public License
                     18:   along with this program; if not, write to the Free Software
1.7       anton      19:   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
1.1       anton      20: 
                     21: 
                     22:   This files defines macros for threading. Many sets of macros are
                     23:   defined. Functionally they have only one difference: Some implement
                     24:   direct threading, some indirect threading. The other differences are
                     25:   just variations to help GCC generate faster code for various
                     26:   machines.
                     27: 
                     28:   (Well, to tell the truth, there actually is another functional
                     29:   difference in some pathological cases: e.g., a '!' stores into the
                     30:   cell where the next executed word comes from; or, the next word
                     31:   executed comes from the top-of-stack. These differences are one of
                     32:   the reasons why GCC cannot produce the right variation by itself. We
                     33:   chose disallowing such practices and using the added implementation
                     34:   freedom to achieve a significant speedup, because these practices
                     35:   are not common in Forth (I have never heard of or seen anyone using
                     36:   them), and it is easy to circumvent problems: A control flow change
                     37:   will flush any prefetched words; you may want to do a "0
                     38:   drop" before that to write back the top-of-stack cache.)
                     39: 
                     40:   These macro sets are used in the following ways: After translation
                     41:   to C a typical primitive looks like
                     42: 
                     43:   ...
                     44:   {
                     45:   DEF_CA
                     46:   other declarations
                     47:   NEXT_P0;
                     48:   main part of the primitive
                     49:   NEXT_P1;
                     50:   store results to stack
                     51:   NEXT_P2;
                     52:   }
                     53: 
                     54:   DEF_CA and all the NEXT_P* together must implement NEXT; In the main
                     55:   part the instruction pointer can be read with IP, changed with
                     56:   INC_IP(const_inc), and the cell right behind the presently executing
                     57:   word (i.e. the value of *IP) is accessed with NEXT_INST.
                     58: 
                     59:   If a primitive does not fall through the main part, it has to do the
                     60:   rest by itself. If it changes ip, it has to redo NEXT_P0 (perhaps we
                     61:   should define a macro SET_IP).
                     62: 
                     63:   Some primitives (execute, dodefer) do not end with NEXT, but with
                     64:   EXEC(.). If NEXT_P0 has been called earlier, it has to perform
                     65:   "ip=IP;" to ensure that ip has the right value (NEXT_P0 may change
                     66:   it).
                     67: 
                     68:   Finally, there is NEXT1_P1 and NEXT1_P2, which are parts of EXEC
                     69:   (EXEC(XT) could be defined as "cfa=XT; NEXT1_P1; NEXT1_P2;" (is this
                     70:   true?)) and are used for making docol faster.
                     71: 
                     72:   We can define the ways in which these macros are used with a regular
                     73:   expression:
                     74: 
                     75:   For a primitive
                     76: 
                     77:   DEF_CA NEXT_P0 ( IP | INC_IP | NEXT_INST | ip=...; NEXT_P0 ) * ( NEXT_P1 NEXT_P2 | EXEC(...) )
                     78: 
                     79:   For a run-time routine, e.g., docol:
                     80:   PFA1(cfa) ( NEXT_P0 NEXT | cfa=...; NEXT1_P1; NEXT1_P2 | EXEC(...) )
                     81: 
                     82:   This comment does not yet describe all the dependences that the
                     83:   macros have to satisfy.
                     84: 
                     85:   To organize the former ifdef chaos, each path is separated
                     86:   This gives a quite impressive number of paths, but you clearly
                     87:   find things that go together.
                     88: 
                     89:   It should be possible to organize the whole thing in a way that
                     90:   contains less redundancy and allows a simpler description.
                     91: 
                     92: */
                     93: 
                     94: #ifdef DOUBLY_INDIRECT
                     95: #  define NEXT_P0      ({cfa=*ip;})
                     96: #  define IP           (ip)
1.3       anton      97: #  define SET_IP(p)    ({ip=(p); NEXT_P0;})
1.1       anton      98: #  define NEXT_INST    (cfa)
                     99: #  define INC_IP(const_inc)    ({cfa=IP[const_inc]; ip+=(const_inc);})
                    100: #  define DEF_CA       Label ca;
1.13      anton     101: #  define NEXT_P1      ({\
1.14      anton     102:   if (cfa<=vm_prims+DOESJUMP || cfa>=vm_prims+npriminfos) \
1.13      anton     103:     fprintf(stderr,"NEXT encountered prim %p at ip=%p\n", cfa, ip); \
                    104:   ip++; ca=**cfa;})
1.1       anton     105: #  define NEXT_P2      ({goto *ca;})
1.13      anton     106: #  define EXEC(XT)     ({DEF_CA cfa=(XT);\
1.14      anton     107:   if (cfa>vm_prims+DOESJUMP && cfa<vm_prims+npriminfos) \
                    108:     fprintf(stderr,"EXEC encountered xt %p at ip=%p, vm_prims=%p, xts=%p\n", cfa, ip, vm_prims, xts); \
1.13      anton     109:  ca=**cfa; goto *ca;})
1.1       anton     110: 
1.16      anton     111: #elif defined(NO_IP)
                    112: 
                    113: #define NEXT_P0
                    114: #define SET_IP(target) assert(0)
                    115: #define INC_IP(n)      ((void)0)
                    116: #define DEF_CA
                    117: #define NEXT_P1
                    118: #define NEXT_P2                ({goto *next_code;})
                    119: /* set next_code to the return address before performing EXEC */
                    120: #define EXEC(XT)       ({cfa=(XT); goto **cfa;})
                    121: 
                    122: #else  /* !defined(DOUBLY_INDIRECT) && !defined(NO_IP) */
1.1       anton     123: 
1.3       anton     124: #if defined(DIRECT_THREADED)
                    125: 
1.17    ! anton     126: /* This lets the compiler know that cfa is dead before; we place it at
        !           127:    "goto *"s that perform direct threaded dispatch (i.e., not EXECUTE
        !           128:    etc.), and thus do not reach doers, which would use cfa; the only
        !           129:    way to a doer is through EXECUTE etc., which set the cfa
        !           130:    themselves.
        !           131: 
        !           132:    Some of these direct threaded schemes use "cfa" to hold the code
        !           133:    address in normal direct threaded code.  Of course we cannot use
        !           134:    KILLS there.
        !           135: 
        !           136:    KILLS works by having an empty asm instruction, and claiming to the
        !           137:    compiler that it writes to cfa.
        !           138: 
        !           139:    KILLS is optional.  You can write
        !           140: 
        !           141: #define KILLS
        !           142: 
        !           143:    and lose just a little performance.
        !           144: */
        !           145: #define KILLS asm("":"=X"(cfa));
        !           146: 
1.3       anton     147: #if THREADING_SCHEME==1
                    148: #warning direct threading scheme 1: autoinc, long latency, cfa live
1.1       anton     149: #  define NEXT_P0      ({cfa=*ip++;})
                    150: #  define IP           (ip-1)
1.3       anton     151: #  define SET_IP(p)    ({ip=(p); NEXT_P0;})
1.1       anton     152: #  define NEXT_INST    (cfa)
                    153: #  define INC_IP(const_inc)    ({cfa=IP[const_inc]; ip+=(const_inc);})
                    154: #  define DEF_CA
                    155: #  define NEXT_P1
                    156: #  define NEXT_P2      ({goto *cfa;})
1.15      anton     157: #  define EXEC(XT)     ({cfa=(XT); goto **cfa;})
1.1       anton     158: #endif
                    159: 
1.3       anton     160: #if THREADING_SCHEME==2
                    161: #warning direct threading scheme 2: autoinc, long latency, cfa dead
1.1       anton     162: #  define NEXT_P0      (ip++)
                    163: #  define IP           (ip-1)
1.3       anton     164: #  define SET_IP(p)    ({ip=(p); NEXT_P0;})
1.1       anton     165: #  define NEXT_INST    (*(ip-1))
                    166: #  define INC_IP(const_inc)    ({ ip+=(const_inc);})
                    167: #  define DEF_CA
                    168: #  define NEXT_P1
1.17    ! anton     169: #  define NEXT_P2      ({KILLS goto **(ip-1);})
1.15      anton     170: #  define EXEC(XT)     ({cfa=(XT); goto **cfa;})
1.1       anton     171: #endif
                    172: 
                    173: 
1.3       anton     174: #if THREADING_SCHEME==3
                    175: #warning direct threading scheme 3: autoinc, low latency, cfa live
1.1       anton     176: #  define NEXT_P0
                    177: #  define IP           (ip)
1.3       anton     178: #  define SET_IP(p)    ({ip=(p); NEXT_P0;})
1.1       anton     179: #  define NEXT_INST    (*ip)
                    180: #  define INC_IP(const_inc)    ({ip+=(const_inc);})
                    181: #  define DEF_CA
                    182: #  define NEXT_P1      ({cfa=*ip++;})
                    183: #  define NEXT_P2      ({goto *cfa;})
1.15      anton     184: #  define EXEC(XT)     ({cfa=(XT); goto **cfa;})
1.1       anton     185: #endif
                    186: 
1.3       anton     187: #if THREADING_SCHEME==4
                    188: #warning direct threading scheme 4: autoinc, low latency, cfa dead
1.1       anton     189: #  define NEXT_P0
                    190: #  define IP           (ip)
1.3       anton     191: #  define SET_IP(p)    ({ip=(p); NEXT_P0;})
1.1       anton     192: #  define NEXT_INST    (*ip)
                    193: #  define INC_IP(const_inc)    ({ ip+=(const_inc);})
                    194: #  define DEF_CA
                    195: #  define NEXT_P1
1.17    ! anton     196: #  define NEXT_P2      ({KILLS goto **(ip++);})
1.15      anton     197: #  define EXEC(XT)     ({cfa=(XT); goto **cfa;})
1.1       anton     198: #endif
                    199: 
1.3       anton     200: #if THREADING_SCHEME==5
                    201: #warning direct threading scheme 5: long latency, cfa live
1.1       anton     202: #  define NEXT_P0      ({cfa=*ip;})
                    203: #  define IP           (ip)
1.3       anton     204: #  define SET_IP(p)    ({ip=(p); NEXT_P0;})
1.1       anton     205: #  define NEXT_INST    (cfa)
                    206: #  define INC_IP(const_inc)    ({cfa=IP[const_inc]; ip+=(const_inc);})
                    207: #  define DEF_CA
                    208: #  define NEXT_P1      (ip++)
                    209: #  define NEXT_P2      ({goto *cfa;})
1.15      anton     210: #  define EXEC(XT)     ({cfa=(XT); goto **cfa;})
1.1       anton     211: #endif
                    212: 
1.3       anton     213: #if THREADING_SCHEME==6
                    214: #warning direct threading scheme 6: long latency, cfa dead
1.1       anton     215: #  define NEXT_P0
                    216: #  define IP           (ip)
1.3       anton     217: #  define SET_IP(p)    ({ip=(p); NEXT_P0;})
1.1       anton     218: #  define NEXT_INST    (*ip)
                    219: #  define INC_IP(const_inc)    ({ip+=(const_inc);})
                    220: #  define DEF_CA
                    221: #  define NEXT_P1      (ip++)
1.17    ! anton     222: #  define NEXT_P2      ({KILLS goto **(ip-1);})
1.15      anton     223: #  define EXEC(XT)     ({cfa=(XT); goto **cfa;})
1.1       anton     224: #endif
                    225: 
                    226: 
1.3       anton     227: #if THREADING_SCHEME==7
                    228: #warning direct threading scheme 7: low latency, cfa live
1.1       anton     229: #  define NEXT_P0
                    230: #  define IP           (ip)
1.3       anton     231: #  define SET_IP(p)    ({ip=(p); NEXT_P0;})
1.1       anton     232: #  define NEXT_INST    (*ip)
                    233: #  define INC_IP(const_inc)    ({ip+=(const_inc);})
                    234: #  define DEF_CA
                    235: #  define NEXT_P1      ({cfa=*ip++;})
                    236: #  define NEXT_P2      ({goto *cfa;})
1.15      anton     237: #  define EXEC(XT)     ({cfa=(XT); goto **cfa;})
1.1       anton     238: #endif
                    239: 
1.3       anton     240: #if THREADING_SCHEME==8
                    241: #warning direct threading scheme 8: cfa dead, i386 hack
1.1       anton     242: #  define NEXT_P0
                    243: #  define IP           (ip)
1.3       anton     244: #  define SET_IP(p)    ({ip=(p); NEXT_P0;})
1.1       anton     245: #  define NEXT_INST    (*IP)
                    246: #  define INC_IP(const_inc)    ({ ip+=(const_inc);})
                    247: #  define DEF_CA
                    248: #  define NEXT_P1      (ip++)
1.17    ! anton     249: #  define NEXT_P2      ({KILLS goto **(ip-1);})
1.15      anton     250: #  define EXEC(XT)     ({cfa=(XT); goto **cfa;})
1.1       anton     251: #endif
                    252: 
1.3       anton     253: #if THREADING_SCHEME==9
                    254: #warning direct threading scheme 9: Power/PPC hack, long latency
                    255: /* Power uses a prepare-to-branch instruction, and the latency between
                    256:    this inst and the branch is 5 cycles on a PPC604; so we utilize this
                    257:    to do some prefetching in between */
                    258: #  define NEXT_P0
                    259: #  define IP           ip
                    260: #  define SET_IP(p)    ({ip=(p); next_cfa=*ip; NEXT_P0;})
                    261: #  define NEXT_INST    (next_cfa)
                    262: #  define INC_IP(const_inc)    ({next_cfa=IP[const_inc]; ip+=(const_inc);})
1.8       anton     263: #  define DEF_CA       
                    264: #  define NEXT_P1      ({cfa=next_cfa; ip++; next_cfa=*ip;})
                    265: #  define NEXT_P2      ({goto *cfa;})
1.15      anton     266: #  define EXEC(XT)     ({cfa=(XT); goto **cfa;})
1.3       anton     267: #  define MORE_VARS    Xt next_cfa;
                    268: #endif
1.1       anton     269: 
1.3       anton     270: #if THREADING_SCHEME==10
                    271: #warning direct threading scheme 10: plain (no attempt at scheduling)
                    272: #  define NEXT_P0
                    273: #  define IP           (ip)
                    274: #  define SET_IP(p)    ({ip=(p); NEXT_P0;})
                    275: #  define NEXT_INST    (*ip)
                    276: #  define INC_IP(const_inc)    ({ip+=(const_inc);})
                    277: #  define DEF_CA
                    278: #  define NEXT_P1
                    279: #  define NEXT_P2      ({cfa=*ip++; goto *cfa;})
1.15      anton     280: #  define EXEC(XT)     ({cfa=(XT); goto **cfa;})
1.3       anton     281: #endif
1.1       anton     282: 
1.3       anton     283: /* direct threaded */
                    284: #else
1.1       anton     285: /* indirect THREADED  */
                    286: 
1.3       anton     287: #if THREADING_SCHEME==1
                    288: #warning indirect threading scheme 1: autoinc, long latency, cisc
1.1       anton     289: #  define NEXT_P0      ({cfa=*ip++;})
                    290: #  define IP           (ip-1)
1.3       anton     291: #  define SET_IP(p)    ({ip=(p); NEXT_P0;})
1.1       anton     292: #  define NEXT_INST    (cfa)
                    293: #  define INC_IP(const_inc)    ({cfa=IP[const_inc]; ip+=(const_inc);})
                    294: #  define DEF_CA
                    295: #  define NEXT_P1
                    296: #  define NEXT_P2      ({goto **cfa;})
                    297: #  define EXEC(XT)     ({cfa=(XT); goto **cfa;})
                    298: #endif
                    299: 
1.3       anton     300: #if THREADING_SCHEME==2
                    301: #warning indirect threading scheme 2: autoinc, long latency
1.1       anton     302: #  define NEXT_P0      ({cfa=*ip++;})
                    303: #  define IP           (ip-1)
1.3       anton     304: #  define SET_IP(p)    ({ip=(p); NEXT_P0;})
1.1       anton     305: #  define NEXT_INST    (cfa)
                    306: #  define INC_IP(const_inc)    ({cfa=IP[const_inc]; ip+=(const_inc);})
                    307: #  define DEF_CA       Label ca;
                    308: #  define NEXT_P1      ({ca=*cfa;})
                    309: #  define NEXT_P2      ({goto *ca;})
                    310: #  define EXEC(XT)     ({DEF_CA cfa=(XT); ca=*cfa; goto *ca;})
                    311: #endif
                    312: 
                    313: 
1.3       anton     314: #if THREADING_SCHEME==3
                    315: #warning indirect threading scheme 3: autoinc, low latency, cisc
1.1       anton     316: #  define NEXT_P0
                    317: #  define IP           (ip)
1.3       anton     318: #  define SET_IP(p)    ({ip=(p); NEXT_P0;})
1.1       anton     319: #  define NEXT_INST    (*ip)
                    320: #  define INC_IP(const_inc)    ({ip+=(const_inc);})
                    321: #  define DEF_CA
                    322: #  define NEXT_P1
                    323: #  define NEXT_P2      ({cfa=*ip++; goto **cfa;})
                    324: #  define EXEC(XT)     ({cfa=(XT); goto **cfa;})
                    325: #endif
                    326: 
1.3       anton     327: #if THREADING_SCHEME==4
                    328: #warning indirect threading scheme 4: autoinc, low latency
1.1       anton     329: #  define NEXT_P0      ({cfa=*ip++;})
                    330: #  define IP           (ip-1)
1.3       anton     331: #  define SET_IP(p)    ({ip=(p); NEXT_P0;})
1.1       anton     332: #  define NEXT_INST    (cfa)
                    333: #  define INC_IP(const_inc)    ({cfa=IP[const_inc]; ip+=(const_inc);})
                    334: #  define DEF_CA       Label ca;
                    335: #  define NEXT_P1      ({ca=*cfa;})
                    336: #  define NEXT_P2      ({goto *ca;})
                    337: #  define EXEC(XT)     ({DEF_CA cfa=(XT); ca=*cfa; goto *ca;})
                    338: #endif
                    339: 
                    340: 
1.3       anton     341: #if THREADING_SCHEME==5
                    342: #warning indirect threading scheme 5: long latency, cisc
1.1       anton     343: #  define NEXT_P0      ({cfa=*ip;})
                    344: #  define IP           (ip)
1.3       anton     345: #  define SET_IP(p)    ({ip=(p); NEXT_P0;})
1.1       anton     346: #  define NEXT_INST    (cfa)
                    347: #  define INC_IP(const_inc)    ({cfa=IP[const_inc]; ip+=(const_inc);})
                    348: #  define DEF_CA
                    349: #  define NEXT_P1      (ip++)
                    350: #  define NEXT_P2      ({goto **cfa;})
                    351: #  define EXEC(XT)     ({cfa=(XT); goto **cfa;})
                    352: #endif
                    353: 
1.3       anton     354: #if THREADING_SCHEME==6
                    355: #warning indirect threading scheme 6: long latency
1.1       anton     356: #  define NEXT_P0      ({cfa=*ip;})
                    357: #  define IP           (ip)
1.3       anton     358: #  define SET_IP(p)    ({ip=(p); NEXT_P0;})
1.1       anton     359: #  define NEXT_INST    (cfa)
                    360: #  define INC_IP(const_inc)    ({cfa=IP[const_inc]; ip+=(const_inc);})
                    361: #  define DEF_CA       Label ca;
                    362: #  define NEXT_P1      ({ip++; ca=*cfa;})
                    363: #  define NEXT_P2      ({goto *ca;})
                    364: #  define EXEC(XT)     ({DEF_CA cfa=(XT); ca=*cfa; goto *ca;})
                    365: #endif
                    366: 
1.3       anton     367: #if THREADING_SCHEME==7
                    368: #warning indirect threading scheme 7: low latency
                    369: #  define NEXT_P0      ({cfa=*ip;})
                    370: #  define IP           (ip)
                    371: #  define SET_IP(p)    ({ip=(p); NEXT_P0;})
                    372: #  define NEXT_INST    (cfa)
                    373: #  define INC_IP(const_inc)    ({cfa=IP[const_inc]; ip+=(const_inc);})
                    374: #  define DEF_CA       Label ca;
                    375: #  define NEXT_P1      ({ip++; ca=*cfa;})
                    376: #  define NEXT_P2      ({goto *ca;})
                    377: #  define EXEC(XT)     ({DEF_CA cfa=(XT); ca=*cfa; goto *ca;})
                    378: #endif
1.1       anton     379: 
1.3       anton     380: #if THREADING_SCHEME==8
                    381: #warning indirect threading scheme 8: low latency,cisc
1.1       anton     382: #  define NEXT_P0
                    383: #  define IP           (ip)
1.3       anton     384: #  define SET_IP(p)    ({ip=(p); NEXT_P0;})
1.1       anton     385: #  define NEXT_INST    (*ip)
                    386: #  define INC_IP(const_inc)    ({ip+=(const_inc);})
                    387: #  define DEF_CA
                    388: #  define NEXT_P1
                    389: #  define NEXT_P2      ({cfa=*ip++; goto **cfa;})
                    390: #  define EXEC(XT)     ({cfa=(XT); goto **cfa;})
                    391: #endif
                    392: 
1.3       anton     393: /* indirect threaded */
1.1       anton     394: #endif
                    395: 
1.16      anton     396: #endif /* !defined(DOUBLY_INDIRECT) && !defined(NO_IP) */
1.1       anton     397: 
                    398: #define NEXT ({DEF_CA NEXT_P1; NEXT_P2;})
1.10      anton     399: #define IPTOS NEXT_INST

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>