File:
[gforth] /
gforth /
engine /
threaded.h
Revision
1.23:
download - view:
text,
annotated -
select for diffs
Wed Aug 20 09:23:46 2003 UTC (20 years, 8 months ago) by
anton
Branches:
MAIN
CVS tags:
v0-6-2,
HEAD
Defined run-time routines (docol etc.) as primitives, eliminated some
special-casing and "+DOESJUMP+1" etc. in engine.c and main.c
Defined the enum type PrimNum and replaced many shorts with PrimNums
(gives nicer gdb output)
Workaround for the absence of the FPE_... macros in FreeBSD/Alpha 4_STABLE
1: /* This file defines a number of threading schemes.
2:
3: Copyright (C) 1995, 1996,1997,1999,2003 Free Software Foundation, Inc.
4:
5: This file is part of Gforth.
6:
7: Gforth is free software; you can redistribute it and/or
8: modify it under the terms of the GNU General Public License
9: as published by the Free Software Foundation; either version 2
10: of the License, or (at your option) any later version.
11:
12: This program is distributed in the hope that it will be useful,
13: but WITHOUT ANY WARRANTY; without even the implied warranty of
14: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15: GNU General Public License for more details.
16:
17: You should have received a copy of the GNU General Public License
18: along with this program; if not, write to the Free Software
19: Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
20:
21:
22: This files defines macros for threading. Many sets of macros are
23: defined. Functionally they have only one difference: Some implement
24: direct threading, some indirect threading. The other differences are
25: just variations to help GCC generate faster code for various
26: machines.
27:
28: (Well, to tell the truth, there actually is another functional
29: difference in some pathological cases: e.g., a '!' stores into the
30: cell where the next executed word comes from; or, the next word
31: executed comes from the top-of-stack. These differences are one of
32: the reasons why GCC cannot produce the right variation by itself. We
33: chose disallowing such practices and using the added implementation
34: freedom to achieve a significant speedup, because these practices
35: are not common in Forth (I have never heard of or seen anyone using
36: them), and it is easy to circumvent problems: A control flow change
37: will flush any prefetched words; you may want to do a "0
38: drop" before that to write back the top-of-stack cache.)
39:
40: These macro sets are used in the following ways: After translation
41: to C a typical primitive looks like
42:
43: ...
44: {
45: DEF_CA
46: other declarations
47: NEXT_P0;
48: main part of the primitive
49: NEXT_P1;
50: store results to stack
51: NEXT_P2;
52: }
53:
54: DEF_CA and all the NEXT_P* together must implement NEXT; In the main
55: part the instruction pointer can be read with IP, changed with
56: INC_IP(const_inc), and the cell right behind the presently executing
57: word (i.e. the value of *IP) is accessed with NEXT_INST.
58:
59: If a primitive does not fall through the main part, it has to do the
60: rest by itself. If it changes ip, it has to redo NEXT_P0 (perhaps we
61: should define a macro SET_IP).
62:
63: Some primitives (execute, dodefer) do not end with NEXT, but with
64: EXEC(.). If NEXT_P0 has been called earlier, it has to perform
65: "ip=IP;" to ensure that ip has the right value (NEXT_P0 may change
66: it).
67:
68: Finally, there is NEXT1_P1 and NEXT1_P2, which are parts of EXEC
69: (EXEC(XT) could be defined as "cfa=XT; NEXT1_P1; NEXT1_P2;" (is this
70: true?)) and are used for making docol faster.
71:
72: We can define the ways in which these macros are used with a regular
73: expression:
74:
75: For a primitive
76:
77: DEF_CA NEXT_P0 ( IP | INC_IP | NEXT_INST | ip=...; NEXT_P0 ) * ( NEXT_P1 NEXT_P2 | EXEC(...) )
78:
79: For a run-time routine, e.g., docol:
80: PFA1(cfa) ( NEXT_P0 NEXT | cfa=...; NEXT1_P1; NEXT1_P2 | EXEC(...) )
81:
82: This comment does not yet describe all the dependences that the
83: macros have to satisfy.
84:
85: To organize the former ifdef chaos, each path is separated
86: This gives a quite impressive number of paths, but you clearly
87: find things that go together.
88:
89: It should be possible to organize the whole thing in a way that
90: contains less redundancy and allows a simpler description.
91:
92: */
93:
94: #ifdef DOUBLY_INDIRECT
95: # ifndef DEBUG_DITC
96: # define DEBUG_DITC 0
97: # endif
98: /* define to 1 if you want to check consistency */
99: # define NEXT_P0 ({cfa1=cfa; cfa=*ip;})
100: # define CFA cfa1
101: # define MORE_VARS Xt cfa1;
102: # define IP (ip)
103: # define SET_IP(p) ({ip=(p); cfa=*ip;})
104: # define NEXT_INST (cfa)
105: # define INC_IP(const_inc) ({cfa=IP[const_inc]; ip+=(const_inc);})
106: # define DEF_CA Label ca;
107: # define NEXT_P1 ({\
108: if (DEBUG_DITC && (cfa<=vm_prims+DOESJUMP || cfa>=vm_prims+npriminfos)) \
109: fprintf(stderr,"NEXT encountered prim %p at ip=%p\n", cfa, ip); \
110: ip++;})
111: # define NEXT_P2 ({ca=**cfa; goto *ca;})
112: # define EXEC(XT) ({DEF_CA cfa=(XT);\
113: if (DEBUG_DITC && (cfa>vm_prims+DOESJUMP && cfa<vm_prims+npriminfos)) \
114: fprintf(stderr,"EXEC encountered xt %p at ip=%p, vm_prims=%p, xts=%p\n", cfa, ip, vm_prims, xts); \
115: ca=**cfa; goto *ca;})
116:
117: #elif defined(NO_IP)
118:
119: #define NEXT_P0
120: #define SET_IP(target) assert(0)
121: #define INC_IP(n) ((void)0)
122: #define DEF_CA
123: #define NEXT_P1
124: #define NEXT_P2 ({goto *next_code;})
125: /* set next_code to the return address before performing EXEC */
126: #define EXEC(XT) ({cfa=(XT); goto **cfa;})
127:
128: #else /* !defined(DOUBLY_INDIRECT) && !defined(NO_IP) */
129:
130: #if defined(DIRECT_THREADED)
131:
132: /* This lets the compiler know that cfa is dead before; we place it at
133: "goto *"s that perform direct threaded dispatch (i.e., not EXECUTE
134: etc.), and thus do not reach doers, which would use cfa; the only
135: way to a doer is through EXECUTE etc., which set the cfa
136: themselves.
137:
138: Some of these direct threaded schemes use "cfa" to hold the code
139: address in normal direct threaded code. Of course we cannot use
140: KILLS there.
141:
142: KILLS works by having an empty asm instruction, and claiming to the
143: compiler that it writes to cfa.
144:
145: KILLS is optional. You can write
146:
147: #define KILLS
148:
149: and lose just a little performance.
150: */
151: #define KILLS asm("":"=X"(cfa));
152:
153: #ifndef THREADING_SCHEME
154: #define THREADING_SCHEME 6
155: #endif
156:
157: #if THREADING_SCHEME==1
158: #warning direct threading scheme 1: autoinc, long latency, cfa live
159: # define NEXT_P0 ({cfa1=cfa; cfa=*ip++;})
160: # define CFA cfa1
161: # define MORE_VARS Xt cfa1;
162: # define IP (ip-1)
163: # define SET_IP(p) ({ip=(p); cfa=*ip++;})
164: # define NEXT_INST (cfa)
165: # define INC_IP(const_inc) ({cfa=IP[const_inc]; ip+=(const_inc);})
166: # define DEF_CA
167: # define NEXT_P1
168: # define NEXT_P2 ({goto *cfa;})
169: # define EXEC(XT) ({cfa=(XT); goto **cfa;})
170: #endif
171:
172: #if THREADING_SCHEME==2
173: #warning direct threading scheme 2: autoinc, long latency, cfa dead
174: # define NEXT_P0 (ip++)
175: # define CFA cfa
176: # define IP (ip-1)
177: # define SET_IP(p) ({ip=(p); NEXT_P0;})
178: # define NEXT_INST (*(ip-1))
179: # define INC_IP(const_inc) ({ ip+=(const_inc);})
180: # define DEF_CA
181: # define NEXT_P1
182: # define NEXT_P2 ({KILLS goto **(ip-1);})
183: # define EXEC(XT) ({cfa=(XT); goto **cfa;})
184: #endif
185:
186:
187: #if THREADING_SCHEME==3
188: #warning direct threading scheme 3: autoinc, low latency, cfa live
189: # define NEXT_P0
190: # define CFA cfa
191: # define IP (ip)
192: # define SET_IP(p) ({ip=(p); NEXT_P0;})
193: # define NEXT_INST (*ip)
194: # define INC_IP(const_inc) ({ip+=(const_inc);})
195: # define DEF_CA
196: # define NEXT_P1 ({cfa=*ip++;})
197: # define NEXT_P2 ({goto *cfa;})
198: # define EXEC(XT) ({cfa=(XT); goto **cfa;})
199: #endif
200:
201: #if THREADING_SCHEME==4
202: #warning direct threading scheme 4: autoinc, low latency, cfa dead
203: # define NEXT_P0
204: # define CFA cfa
205: # define IP (ip)
206: # define SET_IP(p) ({ip=(p); NEXT_P0;})
207: # define NEXT_INST (*ip)
208: # define INC_IP(const_inc) ({ ip+=(const_inc);})
209: # define DEF_CA
210: # define NEXT_P1
211: # define NEXT_P2 ({KILLS goto **(ip++);})
212: # define EXEC(XT) ({cfa=(XT); goto **cfa;})
213: #endif
214:
215: #if THREADING_SCHEME==5
216: #warning direct threading scheme 5: long latency, cfa live
217: # define NEXT_P0 ({cfa1=cfa; cfa=*ip;})
218: # define CFA cfa1
219: # define MORE_VARS Xt cfa1;
220: # define IP (ip)
221: # define SET_IP(p) ({ip=(p); cfa=*ip;})
222: # define NEXT_INST (cfa)
223: # define INC_IP(const_inc) ({cfa=IP[const_inc]; ip+=(const_inc);})
224: # define DEF_CA
225: # define NEXT_P1 (ip++)
226: # define NEXT_P2 ({goto *cfa;})
227: # define EXEC(XT) ({cfa=(XT); goto **cfa;})
228: #endif
229:
230: #if THREADING_SCHEME==6
231: #warning direct threading scheme 6: long latency, cfa dead
232: # define NEXT_P0
233: # define CFA cfa
234: # define IP (ip)
235: # define SET_IP(p) ({ip=(p); NEXT_P0;})
236: # define NEXT_INST (*ip)
237: # define INC_IP(const_inc) ({ip+=(const_inc);})
238: # define DEF_CA
239: # define NEXT_P1 (ip++)
240: # define NEXT_P2 ({KILLS goto **(ip-1);})
241: # define EXEC(XT) ({cfa=(XT); goto **cfa;})
242: #endif
243:
244:
245: #if THREADING_SCHEME==7
246: #warning direct threading scheme 7: low latency, cfa live
247: # define NEXT_P0
248: # define CFA cfa
249: # define IP (ip)
250: # define SET_IP(p) ({ip=(p); NEXT_P0;})
251: # define NEXT_INST (*ip)
252: # define INC_IP(const_inc) ({ip+=(const_inc);})
253: # define DEF_CA
254: # define NEXT_P1 ({cfa=*ip++;})
255: # define NEXT_P2 ({goto *cfa;})
256: # define EXEC(XT) ({cfa=(XT); goto **cfa;})
257: #endif
258:
259: #if THREADING_SCHEME==8
260: #warning direct threading scheme 8: cfa dead, i386 hack
261: # define NEXT_P0
262: # define CFA cfa
263: # define IP (ip)
264: # define SET_IP(p) ({ip=(p); NEXT_P0;})
265: # define NEXT_INST (*IP)
266: # define INC_IP(const_inc) ({ ip+=(const_inc);})
267: # define DEF_CA
268: # define NEXT_P1 (ip++)
269: # define NEXT_P2 ({KILLS goto **(ip-1);})
270: # define EXEC(XT) ({cfa=(XT); goto **cfa;})
271: #endif
272:
273: #if THREADING_SCHEME==9
274: #warning direct threading scheme 9: Power/PPC hack, long latency
275: /* Power uses a prepare-to-branch instruction, and the latency between
276: this inst and the branch is 5 cycles on a PPC604; so we utilize this
277: to do some prefetching in between */
278: # define NEXT_P0
279: # define CFA cfa
280: # define IP ip
281: # define SET_IP(p) ({ip=(p); next_cfa=*ip; NEXT_P0;})
282: # define NEXT_INST (next_cfa)
283: # define INC_IP(const_inc) ({next_cfa=IP[const_inc]; ip+=(const_inc);})
284: # define DEF_CA
285: # define NEXT_P1 ({cfa=next_cfa; ip++; next_cfa=*ip;})
286: # define NEXT_P2 ({goto *cfa;})
287: # define EXEC(XT) ({cfa=(XT); goto **cfa;})
288: # define MORE_VARS Xt next_cfa;
289: #endif
290:
291: #if THREADING_SCHEME==10
292: #warning direct threading scheme 10: plain (no attempt at scheduling)
293: # define NEXT_P0
294: # define CFA cfa
295: # define IP (ip)
296: # define SET_IP(p) ({ip=(p); NEXT_P0;})
297: # define NEXT_INST (*ip)
298: # define INC_IP(const_inc) ({ip+=(const_inc);})
299: # define DEF_CA
300: # define NEXT_P1
301: # define NEXT_P2 ({cfa=*ip++; goto *cfa;})
302: # define EXEC(XT) ({cfa=(XT); goto **cfa;})
303: #endif
304:
305: /* direct threaded */
306: #else
307: /* indirect THREADED */
308:
309: #ifndef THREADING_SCHEME
310: #define THREADING_SCHEME 6
311: #endif
312:
313: #if THREADING_SCHEME==1
314: #warning indirect threading scheme 1: autoinc, long latency, cisc
315: # define NEXT_P0 ({cfa1=cfa; cfa=*ip++;})
316: # define CFA cfa1
317: # define MORE_VARS Xt cfa1;
318: # define IP (ip-1)
319: # define SET_IP(p) ({ip=(p); cfa=*ip++;})
320: # define NEXT_INST (cfa)
321: # define INC_IP(const_inc) ({cfa=IP[const_inc]; ip+=(const_inc);})
322: # define DEF_CA
323: # define NEXT_P1
324: # define NEXT_P2 ({goto **cfa;})
325: # define EXEC(XT) ({cfa=(XT); goto **cfa;})
326: #endif
327:
328: #if THREADING_SCHEME==2
329: #warning indirect threading scheme 2: autoinc, long latency
330: # define NEXT_P0 ({cfa1=cfa; cfa=*ip++;})
331: # define CFA cfa1
332: # define MORE_VARS Xt cfa1;
333: # define IP (ip-1)
334: # define SET_IP(p) ({ip=(p); cfa=*ip++;})
335: # define NEXT_INST (cfa)
336: # define INC_IP(const_inc) ({cfa=IP[const_inc]; ip+=(const_inc);})
337: # define DEF_CA Label ca;
338: # define NEXT_P1 ({ca=*cfa;})
339: # define NEXT_P2 ({goto *ca;})
340: # define EXEC(XT) ({DEF_CA cfa=(XT); ca=*cfa; goto *ca;})
341: #endif
342:
343:
344: #if THREADING_SCHEME==3
345: #warning indirect threading scheme 3: autoinc, low latency, cisc
346: # define NEXT_P0
347: # define CFA cfa
348: # define IP (ip)
349: # define SET_IP(p) ({ip=(p); NEXT_P0;})
350: # define NEXT_INST (*ip)
351: # define INC_IP(const_inc) ({ip+=(const_inc);})
352: # define DEF_CA
353: # define NEXT_P1
354: # define NEXT_P2 ({cfa=*ip++; goto **cfa;})
355: # define EXEC(XT) ({cfa=(XT); goto **cfa;})
356: #endif
357:
358: #if THREADING_SCHEME==4
359: #warning indirect threading scheme 4: autoinc, low latency
360: # define NEXT_P0 ({cfa1=cfa; cfa=*ip++;})
361: # define CFA cfa1
362: # define MORE_VARS Xt cfa1;
363: # define IP (ip-1)
364: # define SET_IP(p) ({ip=(p); cfa=*ip++;})
365: # define NEXT_INST (cfa)
366: # define INC_IP(const_inc) ({cfa=IP[const_inc]; ip+=(const_inc);})
367: # define DEF_CA Label ca;
368: # define NEXT_P1 ({ca=*cfa;})
369: # define NEXT_P2 ({goto *ca;})
370: # define EXEC(XT) ({DEF_CA cfa=(XT); ca=*cfa; goto *ca;})
371: #endif
372:
373:
374: #if THREADING_SCHEME==5
375: #warning indirect threading scheme 5: long latency, cisc
376: # define NEXT_P0 ({cfa1=cfa; cfa=*ip;})
377: # define CFA cfa1
378: # define MORE_VARS Xt cfa1;
379: # define IP (ip)
380: # define SET_IP(p) ({ip=(p); cfa=*ip;})
381: # define NEXT_INST (cfa)
382: # define INC_IP(const_inc) ({cfa=IP[const_inc]; ip+=(const_inc);})
383: # define DEF_CA
384: # define NEXT_P1 (ip++)
385: # define NEXT_P2 ({goto **cfa;})
386: # define EXEC(XT) ({cfa=(XT); goto **cfa;})
387: #endif
388:
389: #if THREADING_SCHEME==6
390: #warning indirect threading scheme 6: long latency
391: # define NEXT_P0 ({cfa1=cfa; cfa=*ip;})
392: # define CFA cfa1
393: # define MORE_VARS Xt cfa1;
394: # define IP (ip)
395: # define SET_IP(p) ({ip=(p); cfa=*ip;})
396: # define NEXT_INST (cfa)
397: # define INC_IP(const_inc) ({cfa=IP[const_inc]; ip+=(const_inc);})
398: # define DEF_CA Label ca;
399: # define NEXT_P1 ({ip++; ca=*cfa;})
400: # define NEXT_P2 ({goto *ca;})
401: # define EXEC(XT) ({DEF_CA cfa=(XT); ca=*cfa; goto *ca;})
402: #endif
403:
404: #if THREADING_SCHEME==7
405: #warning indirect threading scheme 7: low latency
406: # define NEXT_P0 ({cfa1=cfa; cfa=*ip;})
407: # define CFA cfa1
408: # define MORE_VARS Xt cfa1;
409: # define IP (ip)
410: # define SET_IP(p) ({ip=(p); cfa=*ip;})
411: # define NEXT_INST (cfa)
412: # define INC_IP(const_inc) ({cfa=IP[const_inc]; ip+=(const_inc);})
413: # define DEF_CA Label ca;
414: # define NEXT_P1 ({ip++; ca=*cfa;})
415: # define NEXT_P2 ({goto *ca;})
416: # define EXEC(XT) ({DEF_CA cfa=(XT); ca=*cfa; goto *ca;})
417: #endif
418:
419: #if THREADING_SCHEME==8
420: #warning indirect threading scheme 8: low latency,cisc
421: # define NEXT_P0
422: # define CFA cfa
423: # define IP (ip)
424: # define SET_IP(p) ({ip=(p); NEXT_P0;})
425: # define NEXT_INST (*ip)
426: # define INC_IP(const_inc) ({ip+=(const_inc);})
427: # define DEF_CA
428: # define NEXT_P1
429: # define NEXT_P2 ({cfa=*ip++; goto **cfa;})
430: # define EXEC(XT) ({cfa=(XT); goto **cfa;})
431: #endif
432:
433: /* indirect threaded */
434: #endif
435:
436: #endif /* !defined(DOUBLY_INDIRECT) && !defined(NO_IP) */
437:
438: #define NEXT ({DEF_CA NEXT_P1; NEXT_P2;})
439: #define IPTOS NEXT_INST
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>