1: /* This file defines a number of threading schemes.
2:
3: Copyright (C) 1995, 1996,1997,1999,2003,2004,2005,2007,2008 Free Software Foundation, Inc.
4:
5: This file is part of Gforth.
6:
7: Gforth is free software; you can redistribute it and/or
8: modify it under the terms of the GNU General Public License
9: as published by the Free Software Foundation, either version 3
10: of the License, or (at your option) any later version.
11:
12: This program is distributed in the hope that it will be useful,
13: but WITHOUT ANY WARRANTY; without even the implied warranty of
14: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15: GNU General Public License for more details.
16:
17: You should have received a copy of the GNU General Public License
18: along with this program; if not, see http://www.gnu.org/licenses/.
19:
20:
21: This files defines macros for threading. Many sets of macros are
22: defined. Functionally they have only one difference: Some implement
23: direct threading, some indirect threading. The other differences are
24: just variations to help GCC generate faster code for various
25: machines.
26:
27: (Well, to tell the truth, there actually is another functional
28: difference in some pathological cases: e.g., a '!' stores into the
29: cell where the next executed word comes from; or, the next word
30: executed comes from the top-of-stack. These differences are one of
31: the reasons why GCC cannot produce the right variation by itself. We
32: chose disallowing such practices and using the added implementation
33: freedom to achieve a significant speedup, because these practices
34: are not common in Forth (I have never heard of or seen anyone using
35: them), and it is easy to circumvent problems: A control flow change
36: will flush any prefetched words; you may want to do a "0
37: drop" before that to write back the top-of-stack cache.)
38:
39: These macro sets are used in the following ways: After translation
40: to C a typical primitive looks like
41:
42: ...
43: {
44: DEF_CA
45: other declarations
46: NEXT_P0;
47: main part of the primitive
48: NEXT_P1;
49: store results to stack
50: NEXT_P2;
51: }
52:
53: DEF_CA and all the NEXT_P* together must implement NEXT; In the main
54: part the instruction pointer can be read with IP, changed with
55: INC_IP(const_inc), and the cell right behind the presently executing
56: word (i.e. the value of *IP) is accessed with NEXT_INST.
57:
58: If a primitive does not fall through the main part, it has to do the
59: rest by itself. If it changes ip, it has to redo NEXT_P0 (perhaps we
60: should define a macro SET_IP).
61:
62: Some primitives (execute, dodefer) do not end with NEXT, but with
63: EXEC(.). If NEXT_P0 has been called earlier, it has to perform
64: "ip=IP;" to ensure that ip has the right value (NEXT_P0 may change
65: it).
66:
67: Finally, there is NEXT1_P1 and NEXT1_P2, which are parts of EXEC
68: (EXEC(XT) could be defined as "cfa=XT; NEXT1_P1; NEXT1_P2;" (is this
69: true?)) and are used for making docol faster.
70:
71: We can define the ways in which these macros are used with a regular
72: expression:
73:
74: For a primitive
75:
76: DEF_CA NEXT_P0 ( IP | INC_IP | NEXT_INST | ip=...; NEXT_P0 ) * ( NEXT_P1 NEXT_P2 | EXEC(...) )
77:
78: For a run-time routine, e.g., docol:
79: PFA1(cfa) ( NEXT_P0 NEXT | cfa=...; NEXT1_P1; NEXT1_P2 | EXEC(...) )
80:
81: This comment does not yet describe all the dependences that the
82: macros have to satisfy.
83:
84: To organize the former ifdef chaos, each path is separated
85: This gives a quite impressive number of paths, but you clearly
86: find things that go together.
87:
88: It should be possible to organize the whole thing in a way that
89: contains less redundancy and allows a simpler description.
90:
91: */
92:
93: #if !defined(GCC_PR15242_WORKAROUND)
94: #if __GNUC__ == 3
95: /* various gcc-3.x version have problems (including PR15242) that are
96: solved with this workaround */
97: #define GCC_PR15242_WORKAROUND 1
98: #else
99: /* other gcc versions are better off without the workaround for
100: primitives that are not relocatable */
101: #define GCC_PR15242_WORKAROUND 0
102: #endif
103: #endif
104:
105: #if GCC_PR15242_WORKAROUND
106: #define DO_GOTO goto before_goto
107: #else
108: #define DO_GOTO goto *real_ca
109: #endif
110:
111: #ifndef GOTO_ALIGN
112: #define GOTO_ALIGN
113: #endif
114:
115: #define GOTO(target) do {(real_ca=(target));} while(0)
116: #define NEXT_P2 do {NEXT_P1_5; DO_GOTO;} while(0)
117: #define EXEC(XT) do { real_ca=EXEC1(XT); DO_GOTO;} while (0)
118: #define VM_JUMP(target) do {GOTO(target);} while (0)
119: #define NEXT do {DEF_CA NEXT_P1; NEXT_P2;} while(0)
120: #define FIRST_NEXT_P2 NEXT_P1_5; GOTO_ALIGN; \
121: before_goto: goto *real_ca; after_goto:
122: #define FIRST_NEXT do {DEF_CA NEXT_P1; FIRST_NEXT_P2;} while(0)
123: #define IPTOS NEXT_INST
124:
125:
126: #ifdef DOUBLY_INDIRECT
127: # ifndef DEBUG_DITC
128: # define DEBUG_DITC 0
129: # endif
130: /* define to 1 if you want to check consistency */
131: # define NEXT_P0 do {cfa1=cfa; cfa=*ip;} while(0)
132: # define CFA cfa1
133: # define MORE_VARS Xt cfa1;
134: # define IP (ip)
135: # define SET_IP(p) do {ip=(p); cfa=*ip;} while(0)
136: # define NEXT_INST (cfa)
137: # define INC_IP(const_inc) do {cfa=IP[const_inc]; ip+=(const_inc);} while(0)
138: # define DEF_CA Label MAYBE_UNUSED ca;
139: # define NEXT_P1 do {\
140: if (DEBUG_DITC && (cfa<=vm_prims+DOER_MAX || cfa>=vm_prims+npriminfos)) \
141: fprintf(stderr,"NEXT encountered prim %p at ip=%p\n", cfa, ip); \
142: ip++;} while(0)
143: # define NEXT_P1_5 do {ca=**cfa; GOTO(ca);} while(0)
144: # define EXEC1(XT) ({DEF_CA cfa=(XT);\
145: if (DEBUG_DITC && (cfa>vm_prims+DOER_MAX && cfa<vm_prims+npriminfos)) \
146: fprintf(stderr,"EXEC encountered xt %p at ip=%p, vm_prims=%p, xts=%p\n", cfa, ip, vm_prims, xts); \
147: ca=**cfa; ca;})
148:
149: #elif defined(NO_IP)
150:
151: #define NEXT_P0
152: # define CFA cfa
153: #define SET_IP(target) assert(0)
154: #define INC_IP(n) ((void)0)
155: #define DEF_CA
156: #define NEXT_P1
157: #define NEXT_P1_5 do {goto *next_code;} while(0)
158: /* set next_code to the return address before performing EXEC */
159: /* original: */
160: /* #define EXEC1(XT) do {cfa=(XT); goto **cfa;} while(0) */
161: /* fake, to make syntax check work */
162: #define EXEC1(XT) ({cfa=(XT); *cfa;})
163:
164: #else /* !defined(DOUBLY_INDIRECT) && !defined(NO_IP) */
165:
166: #if defined(DIRECT_THREADED)
167:
168: /* This lets the compiler know that cfa is dead before; we place it at
169: "goto *"s that perform direct threaded dispatch (i.e., not EXECUTE
170: etc.), and thus do not reach doers, which would use cfa; the only
171: way to a doer is through EXECUTE etc., which set the cfa
172: themselves.
173:
174: Some of these direct threaded schemes use "cfa" to hold the code
175: address in normal direct threaded code. Of course we cannot use
176: KILLS there.
177:
178: KILLS works by having an empty asm instruction, and claiming to the
179: compiler that it writes to cfa.
180:
181: KILLS is optional. You can write
182:
183: #define KILLS
184:
185: and lose just a little performance.
186: */
187: #define KILLS asm("":"=X"(cfa));
188:
189: /* #warning direct threading scheme 8: cfa dead, i386 hack */
190: # define NEXT_P0
191: # define CFA cfa
192: # define IP (ip)
193: # define SET_IP(p) do {ip=(p); NEXT_P0;} while(0)
194: # define NEXT_INST (*IP)
195: # define INC_IP(const_inc) do { ip+=(const_inc);} while(0)
196: # define DEF_CA
197: # define NEXT_P1 (ip++)
198: # define NEXT_P1_5 do {KILLS GOTO(*(ip-1));} while(0)
199: # define EXEC1(XT) ({cfa=(XT); *cfa;})
200:
201: /* direct threaded */
202: #else
203: /* indirect THREADED */
204:
205: /* #warning indirect threading scheme 8: low latency,cisc */
206: # define NEXT_P0
207: # define CFA cfa
208: # define IP (ip)
209: # define SET_IP(p) do {ip=(p); NEXT_P0;} while(0)
210: # define NEXT_INST (*ip)
211: # define INC_IP(const_inc) do {ip+=(const_inc);} while(0)
212: # define DEF_CA
213: # define NEXT_P1
214: # define NEXT_P1_5 do {cfa=*ip++; GOTO(*cfa);} while(0)
215: # define EXEC1(XT) ({cfa=(XT); *cfa;})
216:
217: /* indirect threaded */
218: #endif
219:
220: #endif /* !defined(DOUBLY_INDIRECT) && !defined(NO_IP) */
221:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>