1: /* This file defines a number of threading schemes.
2:
3: Copyright (C) 1995, 1996,1997,1999,2003,2004 Free Software Foundation, Inc.
4:
5: This file is part of Gforth.
6:
7: Gforth is free software; you can redistribute it and/or
8: modify it under the terms of the GNU General Public License
9: as published by the Free Software Foundation; either version 2
10: of the License, or (at your option) any later version.
11:
12: This program is distributed in the hope that it will be useful,
13: but WITHOUT ANY WARRANTY; without even the implied warranty of
14: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15: GNU General Public License for more details.
16:
17: You should have received a copy of the GNU General Public License
18: along with this program; if not, write to the Free Software
19: Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
20:
21:
22: This files defines macros for threading. Many sets of macros are
23: defined. Functionally they have only one difference: Some implement
24: direct threading, some indirect threading. The other differences are
25: just variations to help GCC generate faster code for various
26: machines.
27:
28: (Well, to tell the truth, there actually is another functional
29: difference in some pathological cases: e.g., a '!' stores into the
30: cell where the next executed word comes from; or, the next word
31: executed comes from the top-of-stack. These differences are one of
32: the reasons why GCC cannot produce the right variation by itself. We
33: chose disallowing such practices and using the added implementation
34: freedom to achieve a significant speedup, because these practices
35: are not common in Forth (I have never heard of or seen anyone using
36: them), and it is easy to circumvent problems: A control flow change
37: will flush any prefetched words; you may want to do a "0
38: drop" before that to write back the top-of-stack cache.)
39:
40: These macro sets are used in the following ways: After translation
41: to C a typical primitive looks like
42:
43: ...
44: {
45: DEF_CA
46: other declarations
47: NEXT_P0;
48: main part of the primitive
49: NEXT_P1;
50: store results to stack
51: NEXT_P2;
52: }
53:
54: DEF_CA and all the NEXT_P* together must implement NEXT; In the main
55: part the instruction pointer can be read with IP, changed with
56: INC_IP(const_inc), and the cell right behind the presently executing
57: word (i.e. the value of *IP) is accessed with NEXT_INST.
58:
59: If a primitive does not fall through the main part, it has to do the
60: rest by itself. If it changes ip, it has to redo NEXT_P0 (perhaps we
61: should define a macro SET_IP).
62:
63: Some primitives (execute, dodefer) do not end with NEXT, but with
64: EXEC(.). If NEXT_P0 has been called earlier, it has to perform
65: "ip=IP;" to ensure that ip has the right value (NEXT_P0 may change
66: it).
67:
68: Finally, there is NEXT1_P1 and NEXT1_P2, which are parts of EXEC
69: (EXEC(XT) could be defined as "cfa=XT; NEXT1_P1; NEXT1_P2;" (is this
70: true?)) and are used for making docol faster.
71:
72: We can define the ways in which these macros are used with a regular
73: expression:
74:
75: For a primitive
76:
77: DEF_CA NEXT_P0 ( IP | INC_IP | NEXT_INST | ip=...; NEXT_P0 ) * ( NEXT_P1 NEXT_P2 | EXEC(...) )
78:
79: For a run-time routine, e.g., docol:
80: PFA1(cfa) ( NEXT_P0 NEXT | cfa=...; NEXT1_P1; NEXT1_P2 | EXEC(...) )
81:
82: This comment does not yet describe all the dependences that the
83: macros have to satisfy.
84:
85: To organize the former ifdef chaos, each path is separated
86: This gives a quite impressive number of paths, but you clearly
87: find things that go together.
88:
89: It should be possible to organize the whole thing in a way that
90: contains less redundancy and allows a simpler description.
91:
92: */
93:
94: #ifdef GCC_PR15242_WORKAROUND
95: #define DO_GOTO goto before_goto
96: #else
97: #define DO_GOTO goto *real_ca
98: #endif
99: #ifndef GOTO_ALIGN
100: #define GOTO_ALIGN
101: #endif
102:
103: #define GOTO(target) do {(real_ca=(target));} while(0)
104: #define NEXT_P2 do {NEXT_P1_5; DO_GOTO;} while(0)
105: #define EXEC(XT) do { EXEC1(XT); DO_GOTO;} while (0)
106: #define NEXT do {DEF_CA NEXT_P1; NEXT_P2;} while(0)
107: #define FIRST_NEXT_P2 NEXT_P1_5; GOTO_ALIGN; \
108: before_goto: goto *real_ca; after_goto:
109: #define FIRST_NEXT DEF_CA NEXT_P1; FIRST_NEXT_P2;
110: #define IPTOS NEXT_INST
111:
112:
113: #ifdef DOUBLY_INDIRECT
114: # ifndef DEBUG_DITC
115: # define DEBUG_DITC 0
116: # endif
117: /* define to 1 if you want to check consistency */
118: # define NEXT_P0 do {cfa1=cfa; cfa=*ip;} while(0)
119: # define CFA cfa1
120: # define MORE_VARS Xt cfa1;
121: # define IP (ip)
122: # define SET_IP(p) do {ip=(p); cfa=*ip;} while(0)
123: # define NEXT_INST (cfa)
124: # define INC_IP(const_inc) do {cfa=IP[const_inc]; ip+=(const_inc);} while(0)
125: # define DEF_CA Label ca;
126: # define NEXT_P1 do {\
127: if (DEBUG_DITC && (cfa<=vm_prims+DOESJUMP || cfa>=vm_prims+npriminfos)) \
128: fprintf(stderr,"NEXT encountered prim %p at ip=%p\n", cfa, ip); \
129: ip++;} while(0)
130: # define NEXT_P1_5 do {ca=**cfa; GOTO(ca);} while(0)
131: # define EXEC1(XT) do {DEF_CA cfa=(XT);\
132: if (DEBUG_DITC && (cfa>vm_prims+DOESJUMP && cfa<vm_prims+npriminfos)) \
133: fprintf(stderr,"EXEC encountered xt %p at ip=%p, vm_prims=%p, xts=%p\n", cfa, ip, vm_prims, xts); \
134: ca=**cfa; GOTO(ca);} while(0)
135:
136: #elif defined(NO_IP)
137:
138: #define NEXT_P0
139: # define CFA cfa
140: #define SET_IP(target) assert(0)
141: #define INC_IP(n) ((void)0)
142: #define DEF_CA
143: #define NEXT_P1
144: #define NEXT_P1_5 do {goto *next_code;} while(0)
145: /* set next_code to the return address before performing EXEC */
146: #define EXEC1(XT) do {cfa=(XT); goto **cfa;} while(0)
147:
148: #else /* !defined(DOUBLY_INDIRECT) && !defined(NO_IP) */
149:
150: #if defined(DIRECT_THREADED)
151:
152: /* This lets the compiler know that cfa is dead before; we place it at
153: "goto *"s that perform direct threaded dispatch (i.e., not EXECUTE
154: etc.), and thus do not reach doers, which would use cfa; the only
155: way to a doer is through EXECUTE etc., which set the cfa
156: themselves.
157:
158: Some of these direct threaded schemes use "cfa" to hold the code
159: address in normal direct threaded code. Of course we cannot use
160: KILLS there.
161:
162: KILLS works by having an empty asm instruction, and claiming to the
163: compiler that it writes to cfa.
164:
165: KILLS is optional. You can write
166:
167: #define KILLS
168:
169: and lose just a little performance.
170: */
171: #define KILLS asm("":"=X"(cfa));
172:
173: #ifndef THREADING_SCHEME
174: #define THREADING_SCHEME 7
175: #endif
176:
177: #if THREADING_SCHEME==1
178: #warning direct threading scheme 1: autoinc, long latency, cfa live
179: # define NEXT_P0 do {cfa1=cfa; cfa=*ip++;} while(0)
180: # define CFA cfa1
181: # define MORE_VARS Xt cfa1;
182: # define IP (ip-1)
183: # define SET_IP(p) do {ip=(p); cfa=*ip++;} while(0)
184: # define NEXT_INST (cfa)
185: # define INC_IP(const_inc) do {cfa=IP[const_inc]; ip+=(const_inc);} while(0)
186: # define DEF_CA
187: # define NEXT_P1
188: # define NEXT_P1_5 do {GOTO(cfa);} while(0)
189: # define EXEC1(XT) do {cfa=(XT); GOTO(*cfa);} while(0)
190: #endif
191:
192: #if THREADING_SCHEME==2
193: #warning direct threading scheme 2: autoinc, long latency, cfa dead
194: # define NEXT_P0 (ip++)
195: # define CFA cfa
196: # define IP (ip-1)
197: # define SET_IP(p) do {ip=(p); NEXT_P0;} while(0)
198: # define NEXT_INST (*(ip-1))
199: # define INC_IP(const_inc) do { ip+=(const_inc);} while(0)
200: # define DEF_CA
201: # define NEXT_P1
202: # define NEXT_P1_5 do {KILLS GOTO(*(ip-1));} while(0)
203: # define EXEC1(XT) do {cfa=(XT); GOTO(*cfa);} while(0)
204: #endif
205:
206:
207: #if THREADING_SCHEME==3
208: #warning direct threading scheme 3: autoinc, low latency, cfa live
209: # define NEXT_P0
210: # define CFA cfa
211: # define IP (ip)
212: # define SET_IP(p) do {ip=(p); NEXT_P0;} while(0)
213: # define NEXT_INST (*ip)
214: # define INC_IP(const_inc) do {ip+=(const_inc);} while(0)
215: # define DEF_CA
216: # define NEXT_P1 do {cfa=*ip++;} while(0)
217: # define NEXT_P1_5 do {GOTO(cfa);} while(0)
218: # define EXEC1(XT) do {cfa=(XT); GOTO(*cfa);} while(0)
219: #endif
220:
221: #if THREADING_SCHEME==4
222: #warning direct threading scheme 4: autoinc, low latency, cfa dead
223: # define NEXT_P0
224: # define CFA cfa
225: # define IP (ip)
226: # define SET_IP(p) do {ip=(p); NEXT_P0;} while(0)
227: # define NEXT_INST (*ip)
228: # define INC_IP(const_inc) do { ip+=(const_inc);} while(0)
229: # define DEF_CA
230: # define NEXT_P1
231: # define NEXT_P1_5 do {KILLS GOTO(*(ip++));} while(0)
232: # define EXEC1(XT) do {cfa=(XT); GOTO(*cfa);} while(0)
233: #endif
234:
235: #if THREADING_SCHEME==5
236: #warning direct threading scheme 5: long latency, cfa live
237: # define NEXT_P0 do {cfa1=cfa; cfa=*ip;} while(0)
238: # define CFA cfa1
239: # define MORE_VARS Xt cfa1;
240: # define IP (ip)
241: # define SET_IP(p) do {ip=(p); cfa=*ip;} while(0)
242: # define NEXT_INST (cfa)
243: # define INC_IP(const_inc) do {cfa=IP[const_inc]; ip+=(const_inc);} while(0)
244: # define DEF_CA
245: # define NEXT_P1 (ip++)
246: # define NEXT_P1_5 do {GOTO(cfa);} while(0)
247: # define EXEC1(XT) do {cfa=(XT); GOTO(*cfa);} while(0)
248: #endif
249:
250: #if THREADING_SCHEME==6
251: #warning direct threading scheme 6: long latency, cfa dead
252: # define NEXT_P0
253: # define CFA cfa
254: # define IP (ip)
255: # define SET_IP(p) do {ip=(p); NEXT_P0;} while(0)
256: # define NEXT_INST (*ip)
257: # define INC_IP(const_inc) do {ip+=(const_inc);} while(0)
258: # define DEF_CA
259: # define NEXT_P1 (ip++)
260: # define NEXT_P1_5 do {KILLS GOTO(*(ip-1));} while(0)
261: # define EXEC1(XT) do {cfa=(XT); GOTO(*cfa);} while(0)
262: #endif
263:
264:
265: #if THREADING_SCHEME==7
266: #warning direct threading scheme 7: low latency, cfa live
267: # define NEXT_P0
268: # define CFA cfa
269: # define IP (ip)
270: # define SET_IP(p) do {ip=(p); NEXT_P0;} while(0)
271: # define NEXT_INST (*ip)
272: # define INC_IP(const_inc) do {ip+=(const_inc);} while(0)
273: # define DEF_CA
274: # define NEXT_P1 do {cfa=*ip++;} while(0)
275: # define NEXT_P1_5 do {GOTO(cfa);} while(0)
276: # define EXEC1(XT) do {cfa=(XT); GOTO(*cfa);} while(0)
277: #endif
278:
279: #if THREADING_SCHEME==8
280: #warning direct threading scheme 8: cfa dead, i386 hack
281: # define NEXT_P0
282: # define CFA cfa
283: # define IP (ip)
284: # define SET_IP(p) do {ip=(p); NEXT_P0;} while(0)
285: # define NEXT_INST (*IP)
286: # define INC_IP(const_inc) do { ip+=(const_inc);} while(0)
287: # define DEF_CA
288: # define NEXT_P1 (ip++)
289: # define NEXT_P1_5 do {KILLS GOTO(*(ip-1));} while(0)
290: # define EXEC1(XT) do {cfa=(XT); GOTO(*cfa);} while(0)
291: #endif
292:
293: #if THREADING_SCHEME==9
294: #warning direct threading scheme 9: Power/PPC hack, long latency
295: /* Power uses a prepare-to-branch instruction, and the latency between
296: this inst and the branch is 5 cycles on a PPC604; so we utilize this
297: to do some prefetching in between */
298: # define NEXT_P0
299: # define CFA cfa
300: # define IP ip
301: # define SET_IP(p) do {ip=(p); next_cfa=*ip; NEXT_P0;} while(0)
302: # define NEXT_INST (next_cfa)
303: # define INC_IP(const_inc) do {next_cfa=IP[const_inc]; ip+=(const_inc);} while(0)
304: # define DEF_CA
305: # define NEXT_P1 do {cfa=next_cfa; ip++; next_cfa=*ip;} while(0)
306: # define NEXT_P1_5 do {GOTO(cfa);} while(0)
307: # define EXEC1(XT) do {cfa=(XT); GOTO(*cfa);} while(0)
308: # define MORE_VARS Xt next_cfa;
309: #endif
310:
311: #if THREADING_SCHEME==10
312: #warning direct threading scheme 10: plain (no attempt at scheduling)
313: # define NEXT_P0
314: # define CFA cfa
315: # define IP (ip)
316: # define SET_IP(p) do {ip=(p); NEXT_P0;} while(0)
317: # define NEXT_INST (*ip)
318: # define INC_IP(const_inc) do {ip+=(const_inc);} while(0)
319: # define DEF_CA
320: # define NEXT_P1
321: # define NEXT_P1_5 do {cfa=*ip++; GOTO(cfa);} while(0)
322: # define EXEC1(XT) do {cfa=(XT); GOTO(*cfa);} while(0)
323: #endif
324:
325: /* direct threaded */
326: #else
327: /* indirect THREADED */
328:
329: #ifndef THREADING_SCHEME
330: #define THREADING_SCHEME 6
331: #endif
332:
333: #if THREADING_SCHEME==1
334: #warning indirect threading scheme 1: autoinc, long latency, cisc
335: # define NEXT_P0 do {cfa1=cfa; cfa=*ip++;} while(0)
336: # define CFA cfa1
337: # define MORE_VARS Xt cfa1;
338: # define IP (ip-1)
339: # define SET_IP(p) do {ip=(p); cfa=*ip++;} while(0)
340: # define NEXT_INST (cfa)
341: # define INC_IP(const_inc) do {cfa=IP[const_inc]; ip+=(const_inc);} while(0)
342: # define DEF_CA
343: # define NEXT_P1
344: # define NEXT_P1_5 do {GOTO(*cfa);} while(0)
345: # define EXEC1(XT) do {cfa=(XT); GOTO(*cfa);} while(0)
346: #endif
347:
348: #if THREADING_SCHEME==2
349: #warning indirect threading scheme 2: autoinc, long latency
350: # define NEXT_P0 do {cfa1=cfa; cfa=*ip++;} while(0)
351: # define CFA cfa1
352: # define MORE_VARS Xt cfa1;
353: # define IP (ip-1)
354: # define SET_IP(p) do {ip=(p); cfa=*ip++;} while(0)
355: # define NEXT_INST (cfa)
356: # define INC_IP(const_inc) do {cfa=IP[const_inc]; ip+=(const_inc);} while(0)
357: # define DEF_CA Label ca;
358: # define NEXT_P1 do {ca=*cfa;} while(0)
359: # define NEXT_P1_5 do {GOTO(ca);} while(0)
360: # define EXEC1(XT) do {DEF_CA cfa=(XT); ca=*cfa; GOTO(ca);} while(0)
361: #endif
362:
363:
364: #if THREADING_SCHEME==3
365: #warning indirect threading scheme 3: autoinc, low latency, cisc
366: # define NEXT_P0
367: # define CFA cfa
368: # define IP (ip)
369: # define SET_IP(p) do {ip=(p); NEXT_P0;} while(0)
370: # define NEXT_INST (*ip)
371: # define INC_IP(const_inc) do {ip+=(const_inc);} while(0)
372: # define DEF_CA
373: # define NEXT_P1
374: # define NEXT_P1_5 do {cfa=*ip++; GOTO(*cfa);} while(0)
375: # define EXEC1(XT) do {cfa=(XT); GOTO(*cfa);} while(0)
376: #endif
377:
378: #if THREADING_SCHEME==4
379: #warning indirect threading scheme 4: autoinc, low latency
380: # define NEXT_P0 do {cfa1=cfa; cfa=*ip++;} while(0)
381: # define CFA cfa1
382: # define MORE_VARS Xt cfa1;
383: # define IP (ip-1)
384: # define SET_IP(p) do {ip=(p); cfa=*ip++;} while(0)
385: # define NEXT_INST (cfa)
386: # define INC_IP(const_inc) do {cfa=IP[const_inc]; ip+=(const_inc);} while(0)
387: # define DEF_CA Label ca;
388: # define NEXT_P1 do {ca=*cfa;} while(0)
389: # define NEXT_P1_5 do {GOTO(ca);} while(0)
390: # define EXEC1(XT) do {DEF_CA cfa=(XT); ca=*cfa; GOTO(ca);} while(0)
391: #endif
392:
393:
394: #if THREADING_SCHEME==5
395: #warning indirect threading scheme 5: long latency, cisc
396: # define NEXT_P0 do {cfa1=cfa; cfa=*ip;} while(0)
397: # define CFA cfa1
398: # define MORE_VARS Xt cfa1;
399: # define IP (ip)
400: # define SET_IP(p) do {ip=(p); cfa=*ip;} while(0)
401: # define NEXT_INST (cfa)
402: # define INC_IP(const_inc) do {cfa=IP[const_inc]; ip+=(const_inc);} while(0)
403: # define DEF_CA
404: # define NEXT_P1 (ip++)
405: # define NEXT_P1_5 do {GOTO(*cfa);} while(0)
406: # define EXEC1(XT) do {cfa=(XT); GOTO(*cfa);} while(0)
407: #endif
408:
409: #if THREADING_SCHEME==6
410: #warning indirect threading scheme 6: long latency
411: # define NEXT_P0 do {cfa1=cfa; cfa=*ip;} while(0)
412: # define CFA cfa1
413: # define MORE_VARS Xt cfa1;
414: # define IP (ip)
415: # define SET_IP(p) do {ip=(p); cfa=*ip;} while(0)
416: # define NEXT_INST (cfa)
417: # define INC_IP(const_inc) do {cfa=IP[const_inc]; ip+=(const_inc);} while(0)
418: # define DEF_CA Label ca;
419: # define NEXT_P1 do {ip++; ca=*cfa;} while(0)
420: # define NEXT_P1_5 do {GOTO(ca);} while(0)
421: # define EXEC1(XT) do {DEF_CA cfa=(XT); ca=*cfa; GOTO(ca);} while(0)
422: #endif
423:
424: #if THREADING_SCHEME==7
425: #warning indirect threading scheme 7: low latency
426: # define NEXT_P0 do {cfa1=cfa; cfa=*ip;} while(0)
427: # define CFA cfa1
428: # define MORE_VARS Xt cfa1;
429: # define IP (ip)
430: # define SET_IP(p) do {ip=(p); cfa=*ip;} while(0)
431: # define NEXT_INST (cfa)
432: # define INC_IP(const_inc) do {cfa=IP[const_inc]; ip+=(const_inc);} while(0)
433: # define DEF_CA Label ca;
434: # define NEXT_P1 do {ip++; ca=*cfa;} while(0)
435: # define NEXT_P1_5 do {GOTO(ca);} while(0)
436: # define EXEC1(XT) do {DEF_CA cfa=(XT); ca=*cfa; GOTO(ca);} while(0)
437: #endif
438:
439: #if THREADING_SCHEME==8
440: #warning indirect threading scheme 8: low latency,cisc
441: # define NEXT_P0
442: # define CFA cfa
443: # define IP (ip)
444: # define SET_IP(p) do {ip=(p); NEXT_P0;} while(0)
445: # define NEXT_INST (*ip)
446: # define INC_IP(const_inc) do {ip+=(const_inc);} while(0)
447: # define DEF_CA
448: # define NEXT_P1
449: # define NEXT_P1_5 do {cfa=*ip++; GOTO(*cfa);} while(0)
450: # define EXEC1(XT) do {cfa=(XT); GOTO(*cfa);} while(0)
451: #endif
452:
453: /* indirect threaded */
454: #endif
455:
456: #endif /* !defined(DOUBLY_INDIRECT) && !defined(NO_IP) */
457:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>