1: \ recursive descent parser generator )
2:
3: \ Copyright (C) 1995,1996,1997,2000,2003,2007,2008,2012 Free Software Foundation, Inc.
4: \ Copyright 1990, 1991, 1994 Martin Anton Ertl
5:
6: \ This file is part of Gforth.
7:
8: \ Gforth is free software; you can redistribute it and/or
9: \ modify it under the terms of the GNU General Public License
10: \ as published by the Free Software Foundation, either version 3
11: \ of the License, or (at your option) any later version.
12:
13: \ This program is distributed in the hope that it will be useful,
14: \ but WITHOUT ANY WARRANTY; without even the implied warranty of
15: \ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16: \ GNU General Public License for more details.
17:
18: \ You should have received a copy of the GNU General Public License
19: \ along with this program. If not, see http://www.gnu.org/licenses/.
20:
21: \ ANS FORTH prologue
22:
23: : defined? ( "word" -- flag ) bl word find nip ;
24: defined? WARNINGS 0=
25: [IF]
26: Variable warnings
27: warnings on
28: [THEN]
29:
30: \ end of ANS FORTH prolog
31:
32: \ misc )
33: : noop ;
34:
35: 1 cells constant cell
36: s" address-unit-bits" environment? 0=
37: [IF]
38: warnings @ [IF]
39: cr .( environmental attribute address-units-bits unknown, computing... ) cr
40: [THEN]
41: \ if your machine has more bits/au, this assumption wastes space
42: \ if your machine has fewer bits/au, gray will not work
43: : (bits/cell) ( -- n ) 1 0 invert dup 1 rshift xor
44: BEGIN dup 1 = 0= WHILE 1 rshift swap 1+ swap REPEAT drop ;
45: (bits/cell)
46: warnings @ [IF]
47: .( You seem to have ) dup 1 cells / . .( bits/address unit) cr
48: [THEN]
49: [ELSE]
50: cells
51: [THEN]
52: constant bits/cell \ !! implementation dependent )
53:
54: : ?not? ( f -- f )
55: postpone 0= ; immediate
56:
57: : 2, ( w1 w2 -- )
58: here 2 cells allot 2! ;
59:
60: : endif postpone then ; immediate
61:
62: : ?pairs ( n1 n2 -- )
63: ( aborts, if the numbers are not equal )
64: = ?not? abort" mismatched parenthesis" ;
65:
66: : ', \ -- ) ( use: ', name )
67: ' , ;
68:
69: 1 0= constant false
70: 0 0= constant true
71:
72: \ stack administration )
73: \ this implementation is completely unsafe )
74:
75: : stack \ n -- )
76: \ use: n stack word )
77: \ creates a stack called word with n cells )
78: \ the first cell is the stackpointer )
79: create here , cells allot ;
80:
81: : push \ n stack -- )
82: cell over +! @ ! ;
83:
84: : top \ stack -- n )
85: @ @ ;
86:
87: : pop \ stack -- )
88: [ -1 cells ] literal swap +! ;
89:
90: : clear? \ stack -- f )
91: dup @ = ;
92:
93: : clear \ stack -- )
94: dup ! ;
95:
96:
97: \ sets - represented as bit arrays )
98: \ bits that represent no elements, must be 0 )
99: \ all operations assume valid parameters )
100: \ elements must be unsigned numbers )
101: \ the max. element size must be declared with max-member )
102: \ no checking is performed )
103: \ set operations allot memory )
104:
105: : decode \ u -- w )
106: \ returns a cell with bit# u set and everyting else clear )
107: 1 swap lshift ;
108:
109: variable cells/set 0 cells/set !
110: variable empty-ptr 0 empty-ptr ! \ updatd by max-member )
111: : empty \ -- set )
112: empty-ptr @ ;
113:
114: : max-member \ u -- )
115: \ declares u to be the maximum member of sets generated afterwards )
116: \ must be called before using any set word except member?, add-member )
117: bits/cell / 1+
118: dup cells/set !
119: here empty-ptr ! \ make empty set )
120: 0 do 0 , loop ;
121:
122: : copy-set \ set1 -- set2 )
123: \ makes a copy of set1 )
124: here swap
125: cells/set @ 0 do
126: dup @ ,
127: cell+ loop
128: drop ;
129:
130: : normalize-bit-addr \ addr1 u1 -- addr2 u2 )
131: \ addr1*bits/cell+u1=addr2*bits/cell+u2, u2<bits/cell )
132: bits/cell /mod
133: cells rot +
134: swap ;
135: \ the /mod could be optimized into a RSHIFT and an AND, if bits/cell is
136: \ a power of 2, but in an interpreted implementation this would only be
137: \ faster if the machine has very slow division and in a native code
138: \ implementation the compiler should be intelligent enough to optimize
139: \ without help.
140:
141: : add-member \ u set -- )
142: \ changes set to include u )
143: swap normalize-bit-addr
144: decode
145: over @ or swap ! ;
146:
147: : singleton \ u -- set )
148: \ makes a set that contains u and nothing else )
149: empty copy-set swap over add-member ;
150:
151: : member? \ set u -- f )
152: \ returns true if u is in set )
153: normalize-bit-addr
154: decode
155: swap @ and
156: 0= ?not? ;
157:
158: : binary-set-operation \ set1 set2 [w1 w2 -- w3] -- set )
159: \ creates set from set1 and set2 by applying [w1 w2 -- w3] on members )
160: \ e.g. ' or binary-set-operation is the union operation )
161: here >r
162: cells/set @ 0 do >r
163: over @ over @ r@ execute ,
164: cell+ swap cell+ swap
165: r> loop
166: drop 2drop r> ;
167:
168: : union1 \ set1 set2 -- set )
169: ['] or binary-set-operation ;
170:
171: : intersection \ set1 set2 -- set )
172: ['] and binary-set-operation ;
173:
174: : binary-set-test? \ set1 set2 [w1 w2 -- w3] -- f )
175: \ returns true, if [w1 w2 -- w3] binary-set-operation returns empty )
176: \ e.g. set1 set2 ' and binary-set-test? is true, if set1 and set2
177: \ are disjoint, i.e. they contain no common members )
178: >r true rot rot r>
179: cells/set @ 0 do >r
180: over @ over @ r@ execute 0= ?not? if
181: rot drop false rot rot
182: endif
183: cell+ swap cell+ swap
184: r> loop
185: drop 2drop ;
186:
187: : notb&and \ w1 w2 -- w3 )
188: -1 xor and ;
189:
190: : subset? \ set1 set2 -- f )
191: \ returns true if every member of set1 is in set2 )
192: ['] notb&and binary-set-test? ;
193:
194: : disjoint? \ set1 set2 -- f )
195: \ returns true if set1 and set2 heve no common members )
196: ['] and binary-set-test? ;
197:
198: : apply-to-members \ set [ u -- ] -- )
199: \ executes [ u -- ] for every member of set )
200: cells/set @ bits/cell * 0 do
201: over i member? if
202: i over execute
203: endif
204: loop
205: 2drop ;
206:
207: : union \ set1 set2 -- set )
208: \ just a little more space-efficient )
209: 2dup subset? if
210: swap drop
211: else 2dup swap subset? if
212: drop
213: else
214: union1
215: endif endif ;
216:
217:
218: \ tests )
219: variable test-vector ' abort test-vector !
220: \ here you should store the execution address of a word ( set -- f )
221: \ that returns true if the token of the current symbol is in set )
222:
223: : compile-test \ set -- )
224: postpone literal
225: test-vector @ compile, ;
226:
227:
228: \ context management )
229: 500 stack context-stack
230: \ this stack holds the syntax-exprs currently being treated )
231: \ enlarge it, if your grammar is large and complex )
232: context-stack clear
233:
234: : this \ -- syntax-expr )
235: \ get current syntax-expr )
236: context-stack top ;
237:
238: : new-context \ syntax-expr -- )
239: context-stack push ;
240:
241: : old-context \ -- )
242: context-stack pop ;
243:
244:
245: \ structures )
246: : <builds-field \ n1 n2 -- n3 ) ( defining-word )
247: \ n1 is the offset of the field, n2 its length, n3 the offset of the
248: \ next field; creates a word that contains the offset )
249: create over , + ;
250:
251: 0 constant struct
252: \ initial offset
253:
254: : context-var \ use: < offset > size context-var name < offset2 > )
255: \ name returns the address of the offset field of "this" )
256: <builds-field \ n1 n2 -- n3 )
257: does> \ -- addr )
258: @ this + ;
259:
260: : context-const \ use: < offset > context-const name < offset2 > )
261: \ name returns the contents of the field of this at offset )
262: cell <builds-field \ n1 -- n2 )
263: does> \ -- n )
264: @ this + @ ;
265:
266:
267: \ syntax-exprs )
268: struct
269: aligned context-const methods
270: \ table of words applicable to the syntax-expr (a map)
271: 1 context-var mark-propagate \ used to ensure that "propagate" is
272: \ called at least once for each syntax-expr )
273: 1 context-var mark-pass2
274: \ make sure pass2 is called exactly once )
275: aligned cell context-var first-set
276: \ all tokens a nonempty path may begin with )
277: \ if it's equal to 0, the first-set has not been computed yet )
278: 1 context-var maybe-empty
279: \ true if the syntax-expr can derive eps )
280: aligned cell context-var follow-set
281: \ the tokens of the terminals that can follow the syntax-expr )
282: s" gforth" environment?
283: [IF] 2drop \ clear gforth's version numbers )
284: aligned 2 cells context-var source-location \ for error msgs )
285: [ELSE]
286: s" bigFORTH" environment?
287: [IF] 2drop \ clear bigFORTH' version numbers )
288: aligned cell context-var source-location
289: \ for error msgs
290: [ELSE]
291: \ !! replace the stuff until constant with something working on your system
292: aligned 3 cells context-var source-location
293: \ for error msgs
294: 80 chars context-var error-info
295: \ string
296: [THEN] [THEN]
297: aligned constant syntax-expr \ length of a syntax-expr )
298:
299: : make-syntax-expr \ map -- syntax-expr )
300: \ allocate a syntax-expr and initialize it )
301: here swap , false c, false c,
302: align 0 , false c, align empty ,
303: \ source location. !! replace the stuff until `;' with your stuff
304: \ if you use blocks, use:
305: \ blk @ >in @ 2,
306: \ the following is just a dummy
307: [ s" gforth" environment? ]
308: [IF] [ 2drop ]
309: 0 sourceline# 2,
310: [ELSE]
311: [ s" bigFORTH" environment? ]
312: [IF] [ 2drop ]
313: makeview w, >in @ w,
314: [ELSE]
315: source 80 min >r here 3 cells + r@ cmove
316: here 3 cells + , r@ , >in @ 80 min , r> chars allot align
317: [THEN] [THEN]
318: ;
319:
320:
321: \ warnings and errors )
322: : .in \ -- )
323: \ !! implementation dependent )
324: \ prints the info stored in source-location in a usable way )
325: \ prints where the error happened )
326: [ s" gforth" environment? ]
327: [IF] [ 2drop ]
328: source-location 2@ ." line" . drop ." :" ;
329: [ELSE]
330: [ s" bigFORTH" environment? ]
331: [IF] [ 2drop ]
332: source-location dup w@ $3FF and scr ! 2+ w@ r# ! ;
333: [ELSE]
334: source-location 2@ swap cr type cr
335: error-info @ 2 - spaces ." ^" cr ." ::: " ;
336: [THEN] [THEN]
337:
338: : gray-error abort ;
339:
340: : internal-error
341: cr .in ." you found a bug" gray-error ;
342:
343: variable print-token ' . print-token !
344: \ contains execution address of a word < token -- > to print a token )
345:
346: : check-conflict \ set1 set2 -- )
347: \ print the intersection of set1 and set2 if it isn't empty )
348: 2dup disjoint? ?not? warnings @ and if
349: cr .in ." conflict:"
350: intersection print-token @ apply-to-members
351: else
352: 2drop
353: endif ;
354:
355:
356: \ methods and maps )
357: : method \ use: < offset > method name < offset2 > )
358: \ executes the word whose execution address is stored in the field
359: \ at offset of a table pointed to by the "methods" field of "this" )
360: cell <builds-field \ n1 -- n2 )
361: does>
362: @ methods + @ execute ;
363:
364: \ method table for syntax-exprs
365: struct
366: method compute-method
367: method propagate-method
368: method generate-method
369: method pass2-method
370: constant syntax-expr-methods
371:
372:
373: \ general routines )
374: : compute \ syntax-expr -- first-set maybe-empty )
375: \ compute the first-set and maybe-empty of a syntax-expr )
376: \ a bit of memoization is used here )
377: new-context
378: first-set @ 0= if
379: compute-method
380: maybe-empty c!
381: first-set !
382: endif
383: first-set @ maybe-empty c@
384: old-context ;
385:
386: : get-first \ syntax-expr -- first-set )
387: compute drop ;
388:
389: : check-cycle \ syntax-expr -- )
390: \ just check for left recursion )
391: compute 2drop ;
392:
393: : propagate \ follow-set syntax-expr -- )
394: \ add follow-set to the follow set of syntax-expr and its children )
395: new-context
396: dup follow-set @ subset? ?not? \ would everything stay the same
397: mark-propagate c@ ?not? or if \ and was propagate here already
398: true mark-propagate c! \ NO, do propagate
399: follow-set @ union dup follow-set !
400: propagate-method
401: else
402: drop
403: endif
404: old-context ;
405:
406: : generate \ syntax-expr -- )
407: \ this one gets things done )
408: new-context generate-method old-context ;
409:
410: : pass2 \ syntax-expr -- )
411: \ computes all necessary first sets, checks for left recursions
412: \ and conflicts and generates code for rules )
413: new-context
414: mark-pass2 c@ ?not? if
415: true mark-pass2 c!
416: this check-cycle
417: pass2-method
418: endif
419: old-context ;
420:
421:
422: \ main routine )
423: : parser \ syntax-expr -- )
424: \ use: syntax-expr parser xxx )
425: context-stack clear
426: empty over propagate
427: dup pass2
428: \ : should not be immediate
429: >r : r> generate postpone ; ;
430:
431:
432: \ eps - empty syntax-expr )
433: create eps-map
434: ', internal-error
435: ', drop
436: ', noop
437: ', noop
438:
439:
440: create eps1
441: \ the eps syntax-expr proper
442: eps-map make-syntax-expr
443: drop
444:
445:
446: : eps \ -- syntax-expr )
447: \ just adjusts eps1 and returns it
448: eps1 new-context
449: empty first-set ! ( empty changes due to max-member )
450: empty follow-set !
451: true maybe-empty c!
452: old-context
453: eps1 ;
454:
455:
456: \ terminals )
457: \ a terminal is a syntax-expr with an extra field )
458: syntax-expr
459: context-const check&next
460: \ contains address of a word < f -- > that checks
461: \ if f is true and reads the next terminal symbol )
462: constant terminal-syntax-expr
463:
464: : generate-terminal \ -- )
465: this get-first compile-test
466: check&next compile, ;
467:
468: create terminal-map
469: ', internal-error
470: ', drop
471: ', generate-terminal
472: ', noop
473:
474: : make-terminal \ first-set cfa -- syntax-expr )
475: terminal-map make-syntax-expr
476: new-context
477: ,
478: first-set !
479: this old-context ;
480:
481: : terminal \ first-set cfa -- )
482: create make-terminal drop ;
483:
484:
485: \ binary syntax-exprs )
486: syntax-expr
487: context-const operand1
488: context-const operand2
489: constant binary-syntax-expr
490:
491: : make-binary \ syntax-expr1 syntax-expr2 map -- syntax-expr )
492: make-syntax-expr rot , swap , ;
493:
494: : pass2-binary
495: operand1 pass2
496: operand2 pass2 ;
497:
498:
499: \ concatenations )
500: : compute-concatenation \ -- first maybe-empty )
501: operand1 compute dup if
502: drop
503: operand2 compute
504: >r union r>
505: endif ;
506:
507: : propagate-concatenation \ follow-set -- )
508: operand2 compute if
509: over union
510: endif \ follow follow1 )
511: operand1 propagate
512: operand2 propagate ;
513:
514: : generate-concatenation \ -- )
515: operand1 generate
516: operand2 generate ;
517:
518: create concatenation-map
519: ', compute-concatenation
520: ', propagate-concatenation
521: ', generate-concatenation
522: ', pass2-binary
523:
524: : concat \ syntax-expr1 syntax-expr2 -- syntax-expr )
525: concatenation-map make-binary ;
526: \ this is the actual concatenation operator )
527: \ but for safety and readability the parenthesised notation )
528: \ is preferred )
529:
530:
531: \ alternatives )
532: : compute-alternative \ -- first maybe-empty )
533: operand1 compute
534: operand2 compute
535: rot 2dup and warnings @ and if
536: cr .in ." warning: two branches may be empty" endif
537: or >r union r> ;
538:
539: : propagate-alternative \ follow -- )
540: dup operand1 propagate
541: operand2 propagate ;
542:
543: : generate-alternative1 \ -- )
544: operand1 get-first compile-test
545: postpone if
546: operand1 generate
547: postpone else
548: operand2 generate
549: postpone endif ;
550:
551: : generate-alternative2 \ -- )
552: operand1 get-first compile-test postpone ?not?
553: operand2 get-first compile-test postpone and
554: postpone if
555: operand2 generate
556: postpone else
557: operand1 generate
558: postpone endif ;
559:
560: : generate-alternative \ -- )
561: operand1 compute if
562: generate-alternative2
563: else
564: generate-alternative1
565: endif
566: drop ;
567:
568: : pass2-alternative \ -- )
569: this compute if
570: follow-set @ check-conflict
571: else
572: drop
573: endif
574: operand1 get-first operand2 get-first check-conflict
575: pass2-binary ;
576:
577: create alternative-map
578: ', compute-alternative
579: ', propagate-alternative
580: ', generate-alternative
581: ', pass2-alternative
582:
583: : alt \ syntax-expr1 syntax-expr2 -- syntax-expr )
584: alternative-map make-binary ;
585: \ this is the actual alternative operator )
586: \ but for safety and readability the parenthesised notation )
587: \ is preferred )
588:
589:
590: \ unary syntax-exprs )
591: syntax-expr
592: context-const operand
593: constant unary-syntax-expr
594:
595: : make-unary \ syntax-expr1 map -- syntax-expr2 )
596: make-syntax-expr swap , ;
597:
598:
599: \ options and repetitions )
600: : pass2-option&repetition \ -- )
601: follow-set @ operand get-first check-conflict
602: operand pass2 ;
603:
604:
605: \ options )
606: : compute-option \ -- set f )
607: operand compute warnings @ and if
608: cr .in ." warning: unnessesary option" endif
609: true ;
610:
611: : propagate-option \ follow -- )
612: operand propagate ;
613:
614: : generate-option \ -- )
615: operand get-first compile-test
616: postpone if
617: operand generate
618: postpone endif ;
619:
620: create option-map
621: ', compute-option
622: ', propagate-option
623: ', generate-option
624: ', pass2-option&repetition
625:
626: : ?? \ syntax-expr1 -- syntax-expr2 )
627: option-map make-unary ;
628:
629:
630: \ repetitions )
631: : propagate-repetition \ follow-set -- )
632: operand get-first union operand propagate ;
633:
634:
635: \ *-repetitions )
636: : compute-*repetition \ -- set f )
637: operand compute warnings @ and if
638: cr .in ." warning: *repetition of optional term" endif
639: true ;
640:
641: : generate-*repetition \ -- )
642: postpone begin
643: operand get-first compile-test
644: postpone while
645: operand generate
646: postpone repeat ;
647:
648: create *repetition-map
649: ', compute-*repetition
650: ', propagate-repetition
651: ', generate-*repetition
652: ', pass2-option&repetition
653:
654: : ** \ syntax-expr1 -- syntax-expr2 )
655: *repetition-map make-unary ;
656:
657:
658: \ +-repetitions )
659: : compute-+repetition \ -- set f )
660: operand compute ;
661:
662: : generate-+repetition \ -- )
663: postpone begin
664: operand generate
665: operand get-first compile-test
666: postpone ?not? postpone until ;
667:
668: create +repetition-map
669: ', compute-+repetition
670: ', propagate-repetition
671: ', generate-+repetition
672: ', pass2-option&repetition
673:
674: : ++ \ syntax-expr1 -- syntax-expr2 )
675: +repetition-map make-unary ;
676:
677:
678: \ actions )
679: syntax-expr
680: context-const action
681: constant action-syntax-expr
682:
683: : generate-action \ syntax-expr -- )
684: action compile, ;
685:
686: create action-map
687: ', internal-error
688: ', drop
689: ', generate-action
690: ', noop
691:
692: : {{ \ -- syntax-expr addr colon-sys )
693: action-map make-syntax-expr
694: new-context
695: empty first-set !
696: true maybe-empty c!
697: this old-context
698: \ ?exec !csp )
699: here cell allot
700: :noname ;
701:
702: : }} \ syntax-expr addr colon-sys -- syntax-expr )
703: \ ?csp )
704: postpone ;
705: swap !
706: ; immediate
707:
708:
709: \ nonterminals )
710: syntax-expr
711: 1 context-var mark-compute
712: aligned cell context-var rule-body \ in forth left side of rule )
713: cell context-var exec \ cfa of code for rule )
714: constant nt-syntax-expr
715:
716: : get-body \ -- syntax-expr )
717: \ get the body of the rule for the nt in "this" )
718: rule-body @ if
719: rule-body @
720: else
721: cr .in ." no rule for nonterminal" gray-error
722: endif ;
723:
724: : compute-nt \ -- set f )
725: mark-compute c@ if
726: cr .in ." left recursion" gray-error
727: else
728: true mark-compute c!
729: get-body compute
730: endif ;
731:
732: : propagate-nt \ follow-set -- )
733: get-body propagate ;
734:
735: : code-nt \ -- )
736: \ generates the code for a rule )
737: :noname
738: get-body generate
739: postpone ;
740: exec ! ;
741:
742: : generate-nt \ -- )
743: \ generates a call to the code for the rule )
744: \ since the code needs not be generated yet, an indirect call is used )
745: exec dup @ if
746: @ compile,
747: else
748: postpone literal
749: postpone @
750: postpone execute
751: endif ;
752:
753: : pass2-nt \ -- )
754: \ apart from the usual duties, this pass2 also has to code-nt )
755: get-body pass2
756: code-nt ;
757:
758: create nt-map
759: ', compute-nt
760: ', propagate-nt
761: ', generate-nt
762: ', pass2-nt
763:
764: : make-nt \ syntax-expr -- nt )
765: nt-map make-syntax-expr
766: false c, align swap , 0 , ;
767:
768: : <- \ use: syntax-expr <- xxx )
769: \ xxx: -- syntax-expr )
770: create make-nt drop ;
771:
772: : nonterminal \ use: nonterminal xxx )
773: 0 <- ; \ forward declaration )
774:
775: : rule \ syntax-expr nt -- )
776: \ makes a rule )
777: new-context
778: rule-body @ if
779: .in ." multiple rules for nonterminal" gray-error endif
780: rule-body !
781: old-context ;
782:
783:
784: \ syntactic sugar )
785: : reduce \ 0 x1 ... [x2 x3 -- x4] -- x )
786: \ e.g. 0 5 6 7 ' + reduce = 5 6 7 + + = 18 )
787: >r dup 0= if
788: ." no operand" abort
789: endif
790: begin
791: over 0= ?not? while
792: r@ execute
793: repeat \ 0 x )
794: swap drop r> drop ;
795:
796: 7 constant concatenation-id
797: : (- \ -- n 0 )
798: concatenation-id 0 ;
799: : -) \ n 0 syntax-expr1 syntax-expr2 .. -- syntax-expr )
800: ['] concat reduce
801: swap concatenation-id ?pairs ;
802:
803: 8 constant alternative-id
804: : (| \ -- n 0 )
805: alternative-id 0 ;
806: : |) \ n 0 syntax-expr1 syntax-expr2 .. -- syntax-expr )
807: ['] alt reduce
808: swap alternative-id ?pairs ;
809:
810: : (( (| (- ;
811: : )) -) |) ;
812: : || -) (- ;
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>