1: /* Peephole optimization routines and tables
2:
3: Copyright (C) 2001 Free Software Foundation, Inc.
4:
5: This file is part of Gforth.
6:
7: Gforth is free software; you can redistribute it and/or
8: modify it under the terms of the GNU General Public License
9: as published by the Free Software Foundation; either version 2
10: of the License, or (at your option) any later version.
11:
12: This program is distributed in the hope that it will be useful,
13: but WITHOUT ANY WARRANTY; without even the implied warranty of
14: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15: GNU General Public License for more details.
16:
17: You should have received a copy of the GNU General Public License
18: along with this program; if not, write to the Free Software
19: Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
20: */
21:
22: #include "config.h"
23: #include "forth.h"
24: #include <stdlib.h>
25: #include <string.h>
26: #include <assert.h>
27:
28: /* the numbers in this struct are primitive indices */
29: typedef struct Combination {
30: int prefix;
31: int lastprim;
32: int combination_prim;
33: } Combination;
34:
35: Combination peephole_table[] = {
36: #include "peephole.i"
37: };
38:
39: int use_super = 1;
40:
41: typedef Xt Inst;
42:
43: typedef struct Peeptable_entry {
44: struct Peeptable_entry *next;
45: Inst prefix;
46: Inst lastprim;
47: Inst combination_prim;
48: } Peeptable_entry;
49:
50: #define HASH_SIZE 1024
51: #define hash(_i1,_i2) (((((Cell)(_i1))^((Cell)(_i2)))>>4)&(HASH_SIZE-1))
52:
53: Cell peeptable;
54:
55: Cell prepare_peephole_table(Inst insts[])
56: {
57: Cell i;
58: Peeptable_entry **pt = (Peeptable_entry **)calloc(HASH_SIZE,sizeof(Peeptable_entry *));
59:
60: for (i=0; i<sizeof(peephole_table)/sizeof(peephole_table[0]); i++) {
61: Combination *c = &peephole_table[i];
62: Peeptable_entry *p = (Peeptable_entry *)malloc(sizeof(Peeptable_entry));
63: Cell h;
64: p->prefix = insts[c->prefix];
65: p->lastprim = insts[c->lastprim];
66: p->combination_prim = insts[c->combination_prim];
67: h = hash(p->prefix,p->lastprim);
68: p->next = pt[h];
69: pt[h] = p;
70: }
71: return (Cell)pt;
72: }
73:
74: Inst peephole_opt(Inst inst1, Inst inst2, Cell peeptable)
75: {
76: Peeptable_entry **pt = (Peeptable_entry **)peeptable;
77: Peeptable_entry *p;
78:
79: if (use_super == 0)
80: return 0;
81: for (p = pt[hash(inst1,inst2)]; p != NULL; p = p->next)
82: if (inst1 == p->prefix && inst2 == p->lastprim)
83: return p->combination_prim;
84: return NULL;
85: }
86:
87:
88: /* hashtable stuff (again) */
89:
90: #undef hash
91:
92: typedef int (*Pred1) (void* arg1);
93: typedef int (*Pred2) (void* arg1, void* arg2);
94: typedef void* (*Proc1) (void* arg1);
95: typedef void* (*Proc2) (void* arg1, void* arg2);
96: typedef void* (*Proc3) (void* arg1, void* arg2, void* arg3);
97: typedef int (*Key_Eq) (void* key1, void* key2);
98: typedef unsigned (*Key_Hash) (void* key, unsigned modulus);
99:
100: typedef struct hash_table *Hash_Table;
101:
102: Hash_Table make_hash_table (unsigned size, Key_Eq eq, Key_Hash hash);
103: void* hash_table_ref (Hash_Table table, void* key, void* deflt);
104: void hash_table_set (Hash_Table table, void *key, void* value);
105: void* hash_table_find_value (Hash_Table table, Pred1 p, void* deflt);
106: void* hash_table_with_env_find_value (Hash_Table table,
107: Pred2 p, void* env,
108: void* deflt);
109: void* hash_table_fold (Hash_Table table, Proc3 proc, void *knil);
110: void hash_table_print (Hash_Table table);
111: int hash_table_addr_eq (void* key1, void* key2);
112: unsigned hash_table_addr_hash (void* key, unsigned modulus);
113: int hash_table_string_eq (char* k1, char* k2);
114: unsigned hash_table_string_hash (char* key, unsigned modulus);
115:
116: #include <stdlib.h>
117: #include <assert.h>
118:
119: typedef struct hash_bucket {
120: void* key;
121: void* value;
122: struct hash_bucket* next;
123: } *Hash_Bucket;
124:
125: struct hash_table {
126: unsigned size;
127: Hash_Bucket* buckets;
128: Key_Eq eq;
129: Key_Hash hash;
130: };
131:
132: Hash_Table make_hash_table (unsigned size, Key_Eq eq, Key_Hash hash) {
133: Hash_Table tab = malloc (sizeof *tab);
134: Hash_Bucket* buckets = calloc (size, sizeof *buckets);
135: assert (tab);
136: assert (buckets);
137: tab->size = size;
138: tab->buckets = buckets;
139: tab->eq = eq;
140: tab->hash = hash;
141: return tab;
142: }
143:
144: static void* hash_bucket_search (Hash_Bucket* next, Pred1 p,
145: Proc1 found, Proc1 not_found) {
146: for (;;) {
147: Hash_Bucket bucket = *next;
148: if (bucket == 0) return not_found (next);
149: else if (p (bucket->key)) return found (next);
150: else { next = &(bucket->next); continue; }
151: }
152: }
153:
154: static void* hash_table_search (Hash_Table table, void* key, Key_Eq eq,
155: Proc1 found, Proc1 not_found) {
156: int pred (void* key2) { return eq (key, key2); }
157: unsigned idx= table->hash(key, table->size);
158: assert (idx < table->size);
159: return hash_bucket_search (table->buckets+idx, pred, found, not_found);
160: }
161:
162: void* hash_table_ref (Hash_Table table, void* key, void* deflt) {
163: void* found (Hash_Bucket* next) { return (*next)->value; }
164: void* not_found (Hash_Bucket* next) { return deflt; }
165: return hash_table_search (table, key, table->eq,
166: (Proc1)found, (Proc1)not_found);
167: }
168:
169: void hash_table_set (Hash_Table table, void *key, void* value) {
170: void* found (Hash_Bucket* next) {
171: Hash_Bucket bucket = *next;
172: bucket->key = key;
173: bucket->value = value;
174: return value;
175: }
176: void* not_found (Hash_Bucket* next) {
177: Hash_Bucket bucket = malloc (sizeof (struct hash_bucket));
178: assert (bucket);
179: bucket->key = key;
180: bucket->value = value;
181: bucket->next = *next;
182: *next = bucket;
183: return value;
184: }
185: hash_table_search (table, key, table->eq,
186: (Proc1)found,(Proc1)not_found);
187: }
188:
189: void* hash_table_find_value (Hash_Table table, Pred1 p, void* deflt) {
190: unsigned i = 0;
191: for (;;)
192: if (i == table->size) return deflt;
193: else {
194: Hash_Bucket bucket = table->buckets[i];
195: for (;;)
196: if (bucket == 0) { i++; break; }
197: else if (p (bucket->value)) return bucket->value;
198: else { bucket = bucket->next; continue; }
199: }
200: }
201:
202: void* hash_table_with_env_find_value (Hash_Table table,
203: Pred2 p, void* env,
204: void* deflt) {
205: int pred (void* val) { return p (env, val); }
206: return hash_table_find_value (table, pred, deflt);
207: }
208:
209:
210: static void* hash_bucket_fold (Hash_Bucket bucket, Proc3 proc, void *init) {
211: for (;;)
212: if (bucket == 0) return init;
213: else {
214: init = proc (bucket->key, bucket->value, init);
215: bucket = bucket->next; continue;
216: }
217: }
218:
219: void* hash_table_fold (Hash_Table table, Proc3 proc, void *init) {
220: unsigned i = 0, size = table->size;
221: Hash_Bucket* buckets = table->buckets;
222: for (;;)
223: if (i == size) return init;
224: else {
225: init = hash_bucket_fold (buckets [i], proc, init);
226: i++; continue;
227: }
228: }
229:
230: void hash_table_print (Hash_Table table) {
231: void* print (void* key, void* value, void* init) {
232: printf ("[%p, %p]\n", key, value);
233: return init;
234: }
235: hash_table_fold (table, print, 0);
236: }
237:
238: int hash_table_addr_eq (void* key1, void* key2) {
239: return key1 == key2;
240: }
241:
242: unsigned hash_table_addr_hash (void* key, unsigned modulus) {
243: return (((unsigned long)key) >> 3) % modulus;
244: }
245:
246: int hash_table_string_eq (char* k1, char* k2) {
247: return strcmp (k1, k2) == 0;
248: }
249:
250: unsigned hash_table_string_hash (char* key, unsigned modulus) {
251: unsigned sum = 0;
252: for (;;)
253: if (*key == 0) return (sum >> 3) % modulus;
254: else { sum += *key; key++; continue; }
255: }
256:
257: /*
258:
259: Select super instructions with dynamic programming.
260:
261: A buffer `dp_table' stores an instruction sequence, i.e. a basic
262: block. Instructions can be added with `peephole_dp_append'.
263: `peephole_dp_append' takes the Prim_Descriptor of the operator and a
264: pointer to a gap where to place the super-instruction, i.e. the
265: current `here' pointer.
266:
267: The actual instructions are copied into the gaps by calling
268: `peephole_dp_flush'. This selects an optimal cover for the stored
269: sequence, copies the corresponding XTs into the gaps, moves inline
270: arguments (this may be required, since some gaps may not be filled
271: with instructions), and finally flushes the buffer `dp_table'.
272:
273: The rest of the system must cooperate, i.e. has to call
274: `peephole_dp_flush' in the right places, e.g. before taking labels.
275: Non-cooperation will be punished with crashes.
276:
277: */
278:
279: /* #define XDP_DEBUG(stm) stm; */
280: #define XDP_DEBUG(stm)
281:
282: #define dprintf(format, args...) XDP_DEBUG(fprintf (stderr, format , ## args))
283:
284: typedef struct dp_table_entry {
285: Prim_Descriptor op; /* NULL marks the end. */
286: Cell* mark; /* the gap. */
287: struct burm_state* state; /* iburg needs this. */
288: /* unsigned state; */
289: }* Dp_Table_Entry;
290:
291: #define DP_TABLE_SIZE 25
292: static struct dp_table_entry dp_table[DP_TABLE_SIZE+1];
293: static unsigned dp_table_len = 0;
294:
295: static int dp_table_end_p (Dp_Table_Entry entry) { return entry->op == 0; }
296: static void dp_table_clear (void) {
297: dp_table_len = 0;
298: dp_table[0].op = 0;
299: /* dprintf ("dp_table_clear\n"); */
300: }
301: static void dump_dp_table (void) {
302: Dp_Table_Entry e = dp_table;
303: for (;;)
304: if (dp_table_end_p (e)) return;
305: else {
306: printf ("[%s, %p]\n", e->op->name, e->mark);
307: e++; continue;
308: }
309: }
310:
311: Cell* peephole_dp_flush (Cell* mark);
312:
313: Cell* peephole_dp_append (Prim_Descriptor desc, Cell* mark) {
314: if (dp_table_len == DP_TABLE_SIZE) {
315: dp_table[dp_table_len].op = 0;
316: mark = peephole_dp_flush (mark);
317: }
318: dp_table[dp_table_len].op = desc;
319: assert (dp_table_len == 0 ||
320: dp_table[dp_table_len-1].mark < mark);
321: dp_table[dp_table_len].mark = mark;
322: dp_table_len ++;
323: dp_table[dp_table_len].op = 0;
324: return mark+1;
325: }
326:
327: /* burg stuff */
328:
329: #define NIL_TERM_NUM 1
330: typedef struct dp_table_entry* NODEPTR_TYPE;
331: #define OP_LABEL(p) (((p)->op ? (p)->op->num+2 : NIL_TERM_NUM))
332: #define STATE_LABEL(p) ((p)->state)
333: #define LEFT_CHILD(p) ((p)+1)
334: #define RIGHT_CHILD(p) ((NODEPTR_TYPE)(assert (0), 0L))
335: #define PANIC dprintf
336:
337: #include "prim_burm.i"
338:
339: static Label* peephole_symbols = 0;
340: static Xt desc_to_xt (Prim_Descriptor desc) {
341: static Xt* primtab = 0;
342: if (primtab == 0) {
343: Label* symbols = peephole_symbols;
344: unsigned symbols_size = DOESJUMP+1;
345: assert (peephole_symbols);
346: for (;;)
347: if (symbols[symbols_size] == 0) break;
348: else { symbols_size ++; continue; }
349: primtab = primtable(symbols+DOESJUMP+1,
350: symbols_size-DOESJUMP-1);
351: }
352: return primtab[desc->num];
353: }
354:
355:
356: static Prim_Descriptor peephole_descs = 0;
357: static Prim_Descriptor external_rule_number_to_desc (int eruleno) {
358: assert (peephole_descs);
359: return &peephole_descs[eruleno-NIL_TERM_NUM-1];
360: }
361:
362: static void move_args_left (Dp_Table_Entry entry, unsigned n, unsigned offset){
363: if (n == 0) return;
364: else {
365: Cell* from = entry->mark+1;
366: size_t size = entry[1].mark - from;
367: memmove (from-offset, from, size * sizeof *from);
368: move_args_left (entry+1, n-1, offset+1);
369: }
370: }
371:
372:
373: static unsigned reduce (Dp_Table_Entry entry, int goalnt, unsigned saved) {
374: if (dp_table_end_p (entry))
375: return saved;
376: else {
377: int eruleno = burm_rule (STATE_LABEL(entry), goalnt);
378: short *nts = burm_nts[eruleno];
379: NODEPTR_TYPE kids[2];
380: Prim_Descriptor desc = external_rule_number_to_desc (eruleno);
381: Label* xt = (Label*)desc_to_xt (desc);;
382: dprintf ("burm_string = %s, desc = %s, xt = %p\n",
383: burm_string[eruleno], desc->name, xt);
384: *(entry->mark-saved) = (Cell)xt;
385: move_args_left (entry, desc->len, saved);
386: burm_kids (entry, eruleno, kids);
387: return reduce (kids[0], nts[0], saved+desc->len-1);
388: }
389: }
390:
391:
392: static void dump_match (NODEPTR_TYPE p, int goalnt, int indent) {
393: int eruleno = burm_rule (STATE_LABEL(p), goalnt);
394: short *nts = burm_nts[eruleno];
395: NODEPTR_TYPE kids[2];
396: int i;
397: for (i = 0; i < indent; i++) printf (" ");
398: printf ("%s [%d]\n", burm_string[eruleno], eruleno);
399: burm_kids(p, eruleno, kids);
400: for (i = 0; nts[i]; i++)
401: dump_match (kids[i], nts[i], indent+1);
402: }
403:
404: #ifdef PIUMARTA
405:
406: /* Return the size (in bytes) for the superinstrcution from first to
407: (exluding) last. */
408: static unsigned
409: super_code_size (Dp_Table_Entry first, Dp_Table_Entry last, unsigned length) {
410: if (first == last)
411: return length;
412: else if (first+1 == last)
413: return length + (first->op->after_next - first->op->after_trace);
414: else
415: return super_code_size (first+1,
416: last,
417: length + (first->op->before_next
418: - first->op->after_trace));
419: }
420:
421: static void
422: concat_code (Dp_Table_Entry first, Dp_Table_Entry last, char* code) {
423: if (first == last)
424: return;
425: else if (first+1 == last)
426: memcpy (code, first->op->after_trace,
427: first->op->after_next - first->op->after_trace);
428: else {
429: unsigned size = first->op->before_next-first->op->after_trace;
430: memcpy (code, first->op->after_trace, size);
431: concat_code (first+1, last, code+size);
432: }
433: }
434:
435: static void apply_icache_magic (char* addr, unsigned size) {
436: FLUSH_ICACHE(addr, size);
437: }
438:
439: static void print_from_to (Dp_Table_Entry first, Dp_Table_Entry last) {
440: if (first == last) printf ("\n");
441: else {
442: printf ("[%s]", first->op->name);
443: print_from_to (first+1, last);
444: }
445: }
446:
447: static Prim_Descriptor search_desc (char *name) {
448: Prim_Descriptor d = peephole_descs;
449: for (;;)
450: if (d->name == 0) assert (0);
451: else if (strcmp (d->name, name) == 0) return d;
452: else { d++; continue; }
453: }
454:
455: static Xt get_trace_xt (void) {
456: static Xt trace_xt = 0;
457: if (trace_xt == 0) trace_xt = desc_to_xt (search_desc ("noop"));
458: return trace_xt;
459: }
460:
461: static unsigned
462: piumarta_gen_simple_inst (Dp_Table_Entry first, unsigned saved) {
463: Label* xt = (Label*)desc_to_xt (first->op);;
464: *(first->mark-saved) = (Cell)xt;
465: move_args_left (first, 1, saved);
466: return saved;
467: }
468:
469:
470: #if 0
471: static void alloc_xt (unsigned size, Xt *xt, char** code) {
472: # if defined(DIRECT_THREADED)
473: *xt = malloc (size);
474: assert (*xt);
475: *code = *xt;
476: return ;
477: # elsif DOUBLY_INDIRECT
478: /* this is never reached. blocked in comp.fs. */
479: assert (0); return;
480: # else
481: # warning Assuming indirect threaded
482: Xt* xtb = malloc (size + sizeof xt);
483: assert (xtb);
484: xtb[0] = (Xt)&xtb[1];
485: *xt = (Xt)xtb;
486: *code = (char*)&xtb[1];
487: return;
488: # endif
489: }
490: #else
491:
492: static Xt make_xt (char* code) {
493: # if defined(DIRECT_THREADED)
494: return code;
495: # elif defined(DOUBLY_INDIRECT)
496: /* this is never reached. blocked in comp.fs. */
497: assert (0); return 0;
498: # else
499: # warning Assuming indirect threaded
500: Xt xt = malloc (sizeof xt);
501: assert (xt);
502: *xt = code;
503: return xt;
504: # endif
505: }
506: static void alloc_xt (unsigned size, Xt *xt, char** code) {
507: *code = malloc (size);
508: assert (code);
509: *xt = make_xt (*code);
510: }
511:
512: #endif
513:
514:
515:
516: int peephole_enable_tracing = 0;
517: typedef struct sequence {
518: unsigned length;
519: Dp_Table_Entry start;
520: }* Sequence;
521:
522: static int sequence_eq (Sequence s1, Sequence s2) {
523: Dp_Table_Entry e1=s1->start, e2=s2->start, last=e1+s1->length;
524: XDP_DEBUG ({
525: print_from_to (e1, last);
526: print_from_to (e2, e2+s2->length); });
527: if (s1->length != s2->length) return 0;
528: for (;;)
529: if (e1 == last) return 1;
530: else if (e1->op == e2->op) { e1++; e2++; continue; }
531: else return 0;
532: }
533:
534: static unsigned sequence_hash (Sequence s, unsigned modulus) {
535: unsigned long h=0;
536: Dp_Table_Entry e=s->start, last=e+s->length;
537: for (;;)
538: if (e == last) return (h>>3) % modulus;
539: else { h+=(unsigned long)e->op; e++; continue; }
540: }
541:
542: static Hash_Table generated_insts = 0;
543: typedef Dp_Table_Entry Entry;
544:
545: static Xt lookup (Entry first, Entry last) {
546: unsigned len = last-first;
547: struct sequence seq = { len, first };
548: return hash_table_ref (generated_insts, &seq, 0);
549: }
550:
551: static void memoize (Entry first, Entry last, Xt xt) {
552: Sequence seq = malloc (sizeof *seq);
553: unsigned size = (last-first) * sizeof *first;
554: Dp_Table_Entry start = malloc (size);
555: assert (seq); assert (start);
556: memcpy (start, first, size);
557: seq->length = last-first;
558: seq->start = start;
559: XDP_DEBUG(print_from_to (start, start+(last-first)));
560: hash_table_set (generated_insts, seq, xt);
561: assert (lookup (first, last) == xt);
562: }
563:
564: static Xt combine_insts (Entry first, Entry last) {
565: Xt xt = lookup (first, last);
566: if (xt == 0) {
567: unsigned size = super_code_size (first, last, 0);
568: char* code = 0;
569: alloc_xt (size, &xt, &code);
570: concat_code (first, last, code);
571: apply_icache_magic (code, size);
572: memoize (first, last, xt);
573: return xt;
574: }
575: else { dprintf ("reusing combination.\n"); return xt; }
576: }
577:
578: static unsigned
579: piumarta_gen_combined_inst (Entry first, Entry last, unsigned saved) {
580: Xt xt = combine_insts (first, last);
581: unsigned len = last-first;
582: unsigned offset = saved-(len-1);
583: dprintf ("len = %d, saved = %d\n", len, saved);
584: if (!peephole_enable_tracing) {
585: move_args_left (first, len, offset);
586: *(first->mark-offset) = (Cell)xt;
587: return saved;
588: } else {
589: /** hairy: Move operands for the first op 1 slot to
590: right (possible because len>=2). Move operands of
591: the remaining ops offset-1 slots to left (and
592: eliminate empty slots). */
593: Cell* from = first->mark+1;
594: Cell* to = from-offset+1;
595: size_t size = first[1].mark-from;
596: memmove (to, from, size * sizeof *from);
597: move_args_left (first+1, len-1, offset);
598: *(to-1) = (Cell)xt;
599: *(to-2) = (Cell)get_trace_xt ();
600: return saved-1;
601: }
602: }
603:
604: static unsigned piumarta_gen_inst (Entry first, Entry last, unsigned saved) {
605: XDP_DEBUG (print_from_to (first, last));
606: switch (last-first) {
607: case 0: return saved;
608: case 1: return piumarta_gen_simple_inst (first, saved);
609: default: return piumarta_gen_combined_inst (first, last, saved);
610: }
611: }
612:
613: static unsigned
614: piumartaize (Dp_Table_Entry lag, Dp_Table_Entry tail, unsigned saved) {
615: assert (tail >= lag);
616: if (dp_table_end_p (tail) && tail == lag) return saved;
617: else if (dp_table_end_p (tail)) {
618: return piumarta_gen_inst (lag, tail, saved);
619: }
620: else if (tail->op->relocatable)
621: return piumartaize (lag, tail+1, saved+((tail==lag)?0:1));
622: else {
623: saved = piumarta_gen_inst (lag, tail, saved);
624: saved = piumarta_gen_inst (tail, tail+1, saved);
625: return piumartaize (tail+1, tail+1, saved);
626: }
627: }
628:
629: #endif /* PIUMARTA */
630:
631: Cell* peephole_dp_flush (Cell* mark) {
632: assert (dp_table[dp_table_len].op == 0);
633: assert (dp_table_len == 0 ||
634: dp_table[dp_table_len-1].mark < mark );
635: dp_table[dp_table_len].mark = mark;
636: XDP_DEBUG(dump_dp_table ());
637: #ifndef PIUMARTA
638: burm_label (dp_table);
639: XDP_DEBUG(dump_match (dp_table, 1, 0));
640: {
641: unsigned saved = reduce (dp_table, 1, 0);
642: dp_table_clear ();
643: return mark-saved;
644: }
645: #else
646: {
647: unsigned saved = piumartaize (dp_table, dp_table, 0);
648: dp_table_clear ();
649: return mark-saved;
650: }
651: #endif
652: }
653:
654:
655: static Hash_Table ca_table = 0;
656: Prim_Descriptor peephole_code_address_to_desc (void* ca) {
657: assert (ca_table);
658: return hash_table_ref (ca_table, ca, 0);
659: }
660:
661: static void init_ca_table (Prim_Descriptor descs) {
662: unsigned len = 0;
663: Prim_Descriptor d = descs;
664: for (;;)
665: if (d->name == 0) break;
666: else { len++; d++; continue; }
667: ca_table = make_hash_table (len, hash_table_addr_eq,
668: hash_table_addr_hash);
669: d=descs;
670: for (;;)
671: if (d->name == 0) break;
672: else { hash_table_set (ca_table, d->start, d); d++; continue; }
673: }
674:
675: void peephole_init (Label* symbols, Prim_Descriptor descs) {
676: peephole_symbols = symbols;
677: peephole_descs = descs;
678: init_ca_table (descs);
679: generated_insts = make_hash_table (2000,
680: (Key_Eq)sequence_eq,
681: (Key_Hash)sequence_hash);
682: }
683:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>