--- gforth/prim	2005/01/19 22:11:52	1.158
+++ gforth/prim	2005/08/21 22:09:14	1.175
@@ -100,6 +100,8 @@
 \E s" struct F83Name *"	single data-stack type-prefix f83name
 \E s" struct Longname *" single data-stack type-prefix longname
 \E 
+\E data-stack   stack-prefix S:
+\E fp-stack     stack-prefix F:
 \E return-stack stack-prefix R:
 \E inst-stream  stack-prefix #
 \E 
@@ -138,7 +140,7 @@
 
 \ Stack caching setup
 
-ifdef(`M4_ENGINE_FAST', `include(cache1.vmg)', `include(cache0.vmg)')
+ifdef(`STACK_CACHE_FILE', `include(STACK_CACHE_FILE)', `include(cache0.vmg)')
 
 \ these m4 macros would collide with identifiers
 undefine(`index')
@@ -191,7 +193,7 @@ goto *next_code;
 ip=IP; /* undo any ip updating that may have been performed by NEXT_P0 */
 #endif /* !defined(NO_IP) */
 SUPER_END; /* !! probably unnecessary and may lead to measurement errors */
-EXEC(*(Xt *)PFA(CFA));
+VM_JUMP(EXEC1(*(Xt *)PFA(CFA)));
 
 (dofield) ( n1 -- n2 )	gforth-internal	paren_field
 ""run-time routine for fields""
@@ -249,9 +251,8 @@ execute	( xt -- )		core
 #ifndef NO_IP
 ip=IP;
 #endif
-IF_spTOS(spTOS = sp[0]); /* inst_tail would produce a NEXT_P1 */
 SUPER_END;
-EXEC(xt);
+VM_JUMP(EXEC1(xt));
 
 perform	( a_addr -- )	gforth
 ""@code{@@ execute}.""
@@ -259,9 +260,8 @@ perform	( a_addr -- )	gforth
 #ifndef NO_IP
 ip=IP;
 #endif
-IF_spTOS(spTOS = sp[0]); /* inst_tail would produce a NEXT_P1 */
 SUPER_END;
-EXEC(*(Xt *)a_addr);
+VM_JUMP(EXEC1(*(Xt *)a_addr));
 :
  @ execute ;
 
@@ -284,7 +284,7 @@ lit-perform	( #a_addr -- )	new	lit_perfo
 ip=IP;
 #endif
 SUPER_END;
-EXEC(*(Xt *)a_addr);
+VM_JUMP(EXEC1(*(Xt *)a_addr));
 
 does-exec ( #a_cfa -- R:nest a_pfa )	new	does_exec
 #ifdef NO_IP
@@ -293,7 +293,6 @@ assert(0);
 #else
 a_pfa = PFA(a_cfa);
 nest = (Cell)IP;
-IF_spTOS(spTOS = sp[0]);
 #ifdef DEBUG
     {
       CFA_TO_NAME(a_cfa);
@@ -324,15 +323,14 @@ INST_TAIL;
 JUMP(a_target);
 #else
 SET_IP((Xt *)a_target);
-INST_TAIL;
-NEXT_P2;
 #endif
-SUPER_CONTINUE;  /* we do our own control flow, so don't append NEXT etc. */
 :
  r> @ >r ;
 
 \ condbranch(forthname,stackeffect,restline,code1,code2,forthcode)
 \ this is non-syntactical: code must open a brace that is closed by the macro
+\ condbranch(forthname,stackeffect,restline,code1,code2,forthcode)
+\ this is non-syntactical: code must open a brace that is closed by the macro
 define(condbranch,
 $1 ( `#'a_target $2 ) $3
 $4	#ifdef NO_IP
@@ -342,6 +340,37 @@ $5	#ifdef NO_IP
 JUMP(a_target);
 #else
 SET_IP((Xt *)a_target);
+#endif
+}
+$6
+
+\+glocals
+
+$1-lp+!`#' ( `#'a_target `#'nlocals $2 ) $3_lp_plus_store_number
+$4	#ifdef NO_IP
+INST_TAIL;
+#endif
+$5	lp += nlocals;
+#ifdef NO_IP
+JUMP(a_target);
+#else
+SET_IP((Xt *)a_target);
+#endif
+}
+
+\+
+)
+
+\ version that generates two jumps (not good for PR 15242 workaround)
+define(condbranch_twojump,
+$1 ( `#'a_target $2 ) $3
+$4	#ifdef NO_IP
+INST_TAIL;
+#endif
+$5	#ifdef NO_IP
+JUMP(a_target);
+#else
+SET_IP((Xt *)a_target);
 INST_TAIL; NEXT_P2;
 #endif
 }
@@ -380,37 +409,31 @@ condbranch(?branch,f --,f83	question_bra
 
 \+xconds
 
-?dup-?branch	( #a_target f -- f )	new	question_dupe_question_branch
+?dup-?branch	( #a_target f -- S:... )	new	question_dupe_question_branch
 ""The run-time procedure compiled by @code{?DUP-IF}.""
 if (f==0) {
-  sp++;
-  IF_spTOS(spTOS = sp[0]);
 #ifdef NO_IP
 INST_TAIL;
 JUMP(a_target);
 #else
 SET_IP((Xt *)a_target);
-  INST_TAIL; NEXT_P2;
 #endif
+} else {
+sp--;
+sp[0]=f;
 }
-SUPER_CONTINUE;
 
-?dup-0=-?branch ( #a_target f -- ) new	question_dupe_zero_equals_question_branch
+?dup-0=-?branch ( #a_target f -- S:... ) new	question_dupe_zero_equals_question_branch
 ""The run-time procedure compiled by @code{?DUP-0=-IF}.""
-/* the approach taken here of declaring the word as having the stack
-effect ( f -- ) and correcting for it in the branch-taken case costs a
-few cycles in that case, but is easy to convert to a CONDBRANCH
-invocation */
 if (f!=0) {
   sp--;
+  sp[0]=f;
 #ifdef NO_IP
   JUMP(a_target);
 #else
   SET_IP((Xt *)a_target);
-  NEXT;
 #endif
 }
-SUPER_CONTINUE;
 
 \+
 \fhas? skiploopprims 0= [IF]
@@ -491,10 +514,8 @@ if (nstart == nlimit) {
     JUMP(a_target);
 #else
     SET_IP((Xt *)a_target);
-    INST_TAIL; NEXT_P2;
 #endif
 }
-SUPER_CONTINUE;
 :
   2dup =
   IF   r> swap rot >r >r
@@ -514,10 +535,8 @@ if (nstart >= nlimit) {
     JUMP(a_target);
 #else
     SET_IP((Xt *)a_target);
-    INST_TAIL; NEXT_P2;
 #endif
 }
-SUPER_CONTINUE;
 :
  swap 2dup
  r> swap >r swap >r
@@ -537,10 +556,8 @@ if (ustart >= ulimit) {
 JUMP(a_target);
 #else
 SET_IP((Xt *)a_target);
-INST_TAIL; NEXT_P2;
 #endif
 }
-SUPER_CONTINUE;
 :
  swap 2dup
  r> swap >r swap >r
@@ -560,10 +577,8 @@ if (nstart <= nlimit) {
 JUMP(a_target);
 #else
 SET_IP((Xt *)a_target);
-INST_TAIL; NEXT_P2;
 #endif
 }
-SUPER_CONTINUE;
 :
  swap 2dup
  r> swap >r swap >r
@@ -583,10 +598,8 @@ if (ustart <= ulimit) {
 JUMP(a_target);
 #else
 SET_IP((Xt *)a_target);
-INST_TAIL; NEXT_P2;
 #endif
 }
-SUPER_CONTINUE;
 :
  swap 2dup
  r> swap >r swap >r
@@ -787,20 +800,67 @@ n = n1*n2;
 
 /	( n1 n2 -- n )		core	slash
 n = n1/n2;
+if(FLOORED_DIV && ((n1^n2) < 0) && (n1%n2 != 0)) n--;
 :
  /mod nip ;
 
 mod	( n1 n2 -- n )		core
 n = n1%n2;
+if(FLOORED_DIV && ((n1^n2) < 0) && n!=0) n += n2;
 :
  /mod drop ;
 
 /mod	( n1 n2 -- n3 n4 )		core		slash_mod
 n4 = n1/n2;
 n3 = n1%n2; /* !! is this correct? look into C standard! */
+if (FLOORED_DIV && ((n1^n2) < 0) && n3!=0) {
+  n4--;
+  n3+=n2;
+}
 :
  >r s>d r> fm/mod ;
 
+*/mod	( n1 n2 n3 -- n4 n5 )	core	star_slash_mod
+""n1*n2=n3*n5+n4, with the intermediate result (n1*n2) being double.""
+#ifdef BUGGY_LL_MUL
+DCell d = mmul(n1,n2);
+#else
+DCell d = (DCell)n1 * (DCell)n2;
+#endif
+#ifdef BUGGY_LL_DIV
+DCell r = fmdiv(d,n3);
+n4=DHI(r);
+n5=DLO(r);
+#else
+/* assumes that the processor uses either floored or symmetric division */
+n5 = d/n3;
+n4 = d%n3;
+if (FLOORED_DIV && ((DHI(d)^n3)<0) && n4!=0) {
+  n5--;
+  n4+=n3;
+}
+#endif
+:
+ >r m* r> fm/mod ;
+
+*/	( n1 n2 n3 -- n4 )	core	star_slash
+""n4=(n1*n2)/n3, with the intermediate result being double.""
+#ifdef BUGGY_LL_MUL
+DCell d = mmul(n1,n2);
+#else
+DCell d = (DCell)n1 * (DCell)n2;
+#endif
+#ifdef BUGGY_LL_DIV
+DCell r = fmdiv(d,n3);
+n4=DLO(r);
+#else
+/* assumes that the processor uses either floored or symmetric division */
+n4 = d/n3;
+if (FLOORED_DIV && ((DHI(d)^n3)<0) && (d%n3)!=0) n4--;
+#endif
+:
+ */mod nip ;
+
 2*	( n1 -- n2 )		core		two_star
 ""Shift left by 1; also works on unsigned numbers""
 n2 = 2*n1;
@@ -821,18 +881,34 @@ n2 = n1>>1;
 fm/mod	( d1 n1 -- n2 n3 )		core		f_m_slash_mod
 ""Floored division: @i{d1} = @i{n3}*@i{n1}+@i{n2}, @i{n1}>@i{n2}>=0 or 0>=@i{n2}>@i{n1}.""
 #ifdef BUGGY_LL_DIV
+#ifdef ASM_SM_SLASH_REM
+ASM_SM_SLASH_REM(d1.lo, d1.hi, n1, n2, n3);
+if (((DHI(d1)^n1)<0) && n2!=0) {
+  n3--;
+  n2+=n1;
+}
+#else /* !defined(ASM_SM_SLASH_REM) */
 DCell r = fmdiv(d1,n1);
-n2=r.hi;
-n3=r.lo;
-#else
+n2=DHI(r);
+n3=DLO(r);
+#endif /* !defined(ASM_SM_SLASH_REM) */
+#else
+#ifdef ASM_SM_SLASH_REM4
+ASM_SM_SLASH_REM4(d1, n1, n2, n3);
+if (((DHI(d1)^n1)<0) && n2!=0) {
+  n3--;
+  n2+=n1;
+}
+#else /* !defined(ASM_SM_SLASH_REM4) */
 /* assumes that the processor uses either floored or symmetric division */
 n3 = d1/n1;
 n2 = d1%n1;
 /* note that this 1%-3>0 is optimized by the compiler */
-if (1%-3>0 && (d1<0) != (n1<0) && n2!=0) {
+if (1%-3>0 && ((DHI(d1)^n1)<0) && n2!=0) {
   n3--;
   n2+=n1;
 }
+#endif /* !defined(ASM_SM_SLASH_REM4) */
 #endif
 :
  dup >r dup 0< IF  negate >r dnegate r>  THEN
@@ -843,18 +919,26 @@ if (1%-3>0 && (d1<0) != (n1<0) && n2!=0)
 sm/rem	( d1 n1 -- n2 n3 )		core		s_m_slash_rem
 ""Symmetric division: @i{d1} = @i{n3}*@i{n1}+@i{n2}, sign(@i{n2})=sign(@i{d1}) or 0.""
 #ifdef BUGGY_LL_DIV
+#ifdef ASM_SM_SLASH_REM
+ASM_SM_SLASH_REM(d1.lo, d1.hi, n1, n2, n3);
+#else /* !defined(ASM_SM_SLASH_REM) */
 DCell r = smdiv(d1,n1);
-n2=r.hi;
-n3=r.lo;
-#else
+n2=DHI(r);
+n3=DLO(r);
+#endif /* !defined(ASM_SM_SLASH_REM) */
+#else
+#ifdef ASM_SM_SLASH_REM4
+ASM_SM_SLASH_REM4(d1, n1, n2, n3);
+#else /* !defined(ASM_SM_SLASH_REM4) */
 /* assumes that the processor uses either floored or symmetric division */
 n3 = d1/n1;
 n2 = d1%n1;
 /* note that this 1%-3<0 is optimized by the compiler */
-if (1%-3<0 && (d1<0) != (n1<0) && n2!=0) {
+if (1%-3<0 && ((DHI(d1)^n1)<0) && n2!=0) {
   n3++;
   n2-=n1;
 }
+#endif /* !defined(ASM_SM_SLASH_REM4) */
 #endif
 :
  over >r dup >r abs -rot
@@ -892,12 +976,20 @@ ud = (UDCell)u1 * (UDCell)u2;
 um/mod	( ud u1 -- u2 u3 )		core	u_m_slash_mod
 ""ud=u3*u1+u2, u1>u2>=0""
 #ifdef BUGGY_LL_DIV
+#ifdef ASM_UM_SLASH_MOD
+ASM_UM_SLASH_MOD(ud.lo, ud.hi, u1, u2, u3);
+#else /* !defined(ASM_UM_SLASH_MOD) */
 UDCell r = umdiv(ud,u1);
-u2=r.hi;
-u3=r.lo;
-#else
+u2=DHI(r);
+u3=DLO(r);
+#endif /* !defined(ASM_UM_SLASH_MOD) */
+#else
+#ifdef ASM_UM_SLASH_MOD4
+ASM_UM_SLASH_MOD4(ud, u1, u2, u3);
+#else /* !defined(ASM_UM_SLASH_MOD4) */
 u3 = ud/u1;
 u2 = ud%u1;
+#endif /* !defined(ASM_UM_SLASH_MOD4) */
 #endif
 :
    0 swap [ 8 cells 1 + ] literal 0
@@ -1142,12 +1234,11 @@ UP=up=(char *)a_addr;
  up ! ;
 Variable UP
 
-sp@	( -- a_addr )		gforth		sp_fetch
-a_addr = sp+1;
+sp@	( S:... -- a_addr )		gforth		sp_fetch
+a_addr = sp;
 
-sp!	( a_addr -- )		gforth		sp_store
+sp!	( a_addr -- S:... )		gforth		sp_store
 sp = a_addr;
-/* works with and without spTOS caching */
 
 rp@	( -- a_addr )		gforth		rp_fetch
 a_addr = rp;
@@ -1157,10 +1248,10 @@ rp = a_addr;
 
 \+floating
 
-fp@	( -- f_addr )	gforth	fp_fetch
+fp@	( f:... -- f_addr )	gforth	fp_fetch
 f_addr = fp;
 
-fp!	( f_addr -- )	gforth	fp_store
+fp!	( f_addr -- f:... )	gforth	fp_store
 fp = f_addr;
 
 \+
@@ -1233,21 +1324,18 @@ tuck	( w1 w2 -- w2 w1 w2 )	core-ext
 :
  swap over ;
 
-?dup	( w -- w )			core	question_dupe
+?dup	( w -- S:... w )	core	question_dupe
 ""Actually the stack effect is: @code{( w -- 0 | w w )}.  It performs a
 @code{dup} if w is nonzero.""
 if (w!=0) {
-  IF_spTOS(*sp-- = w;)
-#ifndef USE_TOS
   *--sp = w;
-#endif
 }
 :
  dup IF dup THEN ;
 
-pick	( u -- w )			core-ext
+pick	( S:... u -- S:... w )		core-ext
 ""Actually the stack effect is @code{ x0 ... xu u -- x0 ... xu x0 }.""
-w = sp[u+1];
+w = sp[u];
 :
  1+ cells sp@ + @ ;
 
@@ -1694,20 +1782,17 @@ strsignal	( n -- c_addr u )	gforth
 c_addr = (Address)strsignal(n);
 u = strlen(c_addr);
 
-call-c	( w -- )	gforth	call_c
+call-c	( ... w -- ... )	gforth	call_c
 ""Call the C function pointed to by @i{w}. The C function has to
 access the stack itself. The stack pointers are exported in the global
 variables @code{SP} and @code{FP}.""
 /* This is a first attempt at support for calls to C. This may change in
    the future */
-IF_fpTOS(fp[0]=fpTOS);
 FP=fp;
 SP=sp;
 ((void (*)())w)();
 sp=SP;
 fp=FP;
-IF_spTOS(spTOS=sp[0]);
-IF_fpTOS(fpTOS=fp[0]);
 
 \+
 \+file
@@ -2053,10 +2138,13 @@ f2=FLAG(isdigit((unsigned)(sig[0]))!=0);
 siglen=strlen(sig);
 if (siglen>u) /* happens in glibc-2.1.3 if 999.. is rounded up */
   siglen=u;
+if (!f2) /* workaround Cygwin trailing 0s for Inf and Nan */
+  for (; sig[siglen-1]=='0'; siglen--);
+    ;
 memcpy(c_addr,sig,siglen);
 memset(c_addr+siglen,f2?'0':' ',u-siglen);
 
->float	( c_addr u -- flag )	float	to_float
+>float	( c_addr u -- f:... flag )	float	to_float
 ""Actual stack effect: ( c_addr u -- r t | f ).  Attempt to convert the
 character string @i{c-addr u} to internal floating-point
 representation. If the string represents a valid floating-point number
@@ -2066,9 +2154,8 @@ case and represents the floating-point n
 Float r;
 flag = to_float(c_addr, u, &r);
 if (flag) {
-  IF_fpTOS(fp[0] = fpTOS);
-  fp += -1;
-  fpTOS = r;
+  fp--;
+  fp[0]=r;
 }
 
 fabs	( r1 -- r2 )	float-ext	f_abs
@@ -2294,9 +2381,9 @@ f>l	( r -- )	gforth	f_to_l
 lp -= sizeof(Float);
 *(Float *)lp = r;
 
-fpick	( u -- r )		gforth
+fpick	( f:... u -- f:... r )		gforth
 ""Actually the stack effect is @code{ r0 ... ru u -- r0 ... ru r0 }.""
-r = fp[u+1]; /* +1, because update of fp happens before this fragment */
+r = fp[u];
 :
  floats fp@ + f@ ;
 
@@ -2334,13 +2421,10 @@ u3 = 0;
 #  endif
 #endif
 
-wcall	( u -- )	gforth
-IF_fpTOS(fp[0]=fpTOS);
+wcall	( ... u -- ... )	gforth
 FP=fp;
 sp=(Cell*)(SYSCALL(Cell*(*)(Cell *, void *))u)(sp, &FP);
 fp=FP;
-IF_spTOS(spTOS=sp[0];)
-IF_fpTOS(fpTOS=fp[0]);
 
 \+FFCALL
 
@@ -2404,30 +2488,30 @@ av_longlong(alist, d);
 av-ptr-r	( R:c_addr -- )	gforth  av_ptr_r
 av_ptr(alist, void*, c_addr);
 
-av-call-void	( -- )	gforth  av_call_void
+av-call-void	( ... -- ... )	gforth  av_call_void
 SAVE_REGS
 av_call(alist);
 REST_REGS
 
-av-call-int	( -- w )	gforth  av_call_int
+av-call-int	( ... -- ... w )	gforth  av_call_int
 SAVE_REGS
 av_call(alist);
 REST_REGS
 w = irv;
 
-av-call-float	( -- r )	gforth  av_call_float
+av-call-float	( ... -- ... r )	gforth  av_call_float
 SAVE_REGS
 av_call(alist);
 REST_REGS
 r = frv;
 
-av-call-double	( -- r )	gforth  av_call_double
+av-call-double	( ... -- ... r )	gforth  av_call_double
 SAVE_REGS
 av_call(alist);
 REST_REGS
 r = drv;
 
-av-call-longlong	( -- d )	gforth  av_call_longlong
+av-call-longlong	( ... -- ... d )	gforth  av_call_longlong
 SAVE_REGS
 av_call(alist);
 REST_REGS
@@ -2438,7 +2522,7 @@ DHI_IS(d, 0);
 d = llrv;
 #endif
 
-av-call-ptr	( -- c_addr )	gforth  av_call_ptr
+av-call-ptr	( ... -- ... c_addr )	gforth  av_call_ptr
 SAVE_REGS
 av_call(alist);
 REST_REGS
@@ -2515,6 +2599,89 @@ return 0;
 
 \+
 
+\+LIBFFI
+
+ffi-type ( n -- a_type )	gforth ffi_type
+static void* ffi_types[] =
+    { &ffi_type_void,
+      &ffi_type_uint8, &ffi_type_sint8,
+      &ffi_type_uint16, &ffi_type_sint16,
+      &ffi_type_uint32, &ffi_type_sint32,
+      &ffi_type_uint64, &ffi_type_sint64,
+      &ffi_type_float, &ffi_type_double, &ffi_type_longdouble,
+      &ffi_type_pointer };
+a_type = ffi_types[n];
+
+ffi-size ( n1 -- n2 )	gforth ffi_size
+static int ffi_sizes[] =
+    { sizeof(ffi_cif), sizeof(ffi_closure) };
+n2 = ffi_sizes[n1];
+
+ffi-prep-cif ( a_atypes n a_rtype a_cif -- w )	gforth ffi_prep_cif
+w = ffi_prep_cif(a_cif, FFI_DEFAULT_ABI, n, a_rtype, a_atypes);
+
+ffi-call ( a_avalues a_rvalue a_ip a_cif -- )	gforth ffi_call
+ffi_call(a_cif, a_ip, a_rvalue, a_avalues);
+
+ffi-prep-closure ( a_ip a_cif a_closure -- w )	gforth ffi_prep_closure
+w = ffi_prep_closure(a_closure, a_cif, ffi_callback, a_ip);
+
+ffi-2@ ( a_addr -- d )	gforth ffi_2fetch
+#ifdef BUGGY_LONG_LONG
+DLO_IS(d, (Cell*)(*a_addr));
+DHI_IS(d, 0);
+#else
+d = *(DCell*)(*a_addr);
+#endif
+
+ffi-2! ( d a_addr -- )	gforth ffi_2store
+#ifdef BUGGY_LONG_LONG
+*(Cell*)(a_addr) = DLO(d);
+#else
+*(DCell*)(a_addr) = d;
+#endif
+
+ffi-arg-int ( -- w )	gforth ffi_arg_int
+w = *(Cell*)(*clist++);
+
+ffi-arg-longlong ( -- d )	gforth ffi_arg_longlong
+#ifdef BUGGY_LONG_LONG
+DLO_IS(d, (Cell*)(*clist++));
+DHI_IS(d, 0);
+#else
+d = *(DCell*)(*clist++);
+#endif
+
+ffi-arg-ptr ( -- c_addr )	gforth ffi_arg_ptr
+c_addr = *(char **)(*clist++);
+
+ffi-arg-float ( -- r )	gforth ffi_arg_float
+r = *(float*)(*clist++);
+
+ffi-arg-double ( -- r )	gforth ffi_arg_double
+r = *(double*)(*clist++);
+
+ffi-ret-int ( -- w )	gforth ffi_ret_int
+*(int*)(ritem) = w;
+
+ffi-ret-longlong ( -- d )	gforth ffi_ret_longlong
+#ifdef BUGGY_LONG_LONG
+*(Cell*)(ritem) = DLO(d);
+#else
+*(DCell*)(ritem) = d;
+#endif
+
+ffi-ret-ptr ( -- c_addr )	gforth ffi_ret_ptr
+*(char **)(ritem) = c_addr;
+
+ffi-ret-float ( -- r )	gforth ffi_ret_float
+*(float*)(ritem) = r;
+
+ffi-ret-double ( -- r )	gforth ffi_ret_double
+*(double*)(ritem) = r;
+
+\+
+
 \+OLDCALL
 
 define(`uploop',
@@ -2564,13 +2731,12 @@ compile-prim1 ( a_prim -- ) gforth compi
 ""compile prim (incl. immargs) at @var{a_prim}""
 compile_prim1(a_prim);
 
-finish-code ( -- ) gforth finish_code
+finish-code ( ... -- ... ) gforth finish_code
 ""Perform delayed steps in code generation (branch resolution, I-cache
 flushing).""
-IF_spTOS(sp[0]=spTOS); /* workaround for failing to save spTOS
-			  (gcc-2.95.1, gforth-fast --enable-force-reg) */
+/* The ... above are a workaround for a bug in gcc-2.95, which fails
+   to save spTOS (gforth-fast --enable-force-reg) */
 finish_code();
-IF_spTOS(spTOS=sp[0]);
 
 forget-dyncode ( c_code -- f ) gforth-internal forget_dyncode
 f = forget_dyncode(c_code);
@@ -2605,7 +2771,7 @@ a_addr = groups;
 
 \g static_super
 
-ifdef(`M4_ENGINE_FAST',
+ifdef(`STACK_CACHE_FILE',
 `include(peeprules.vmg)')
 
 \g end