--- gforth/utf-8.fs	2004/12/28 17:27:24	1.6
+++ gforth/utf-8.fs	2004/12/31 11:01:21	1.9
@@ -20,6 +20,8 @@
 
 \ short: u8 means utf-8 encoded address
 
+s" malformed UTF-8 character" exception Constant UTF-8-err
+
 : u8len ( u8 -- n )
     dup      $80 u< IF  drop 1  EXIT  THEN \ special case ASCII
     $800  2 >r
@@ -31,7 +33,7 @@
     $7F and  $40 >r
     BEGIN  dup r@ and  WHILE  r@ xor
 	    6 lshift r> 5 lshift >r >r count
-\	    dup $C0 and $80 <> abort" malformed character"
+	    dup $C0 and $80 <> IF   UTF-8-err throw  THEN
 	    $3F and r> or
     REPEAT  rdrop ;
 
@@ -43,6 +45,19 @@
     REPEAT  $7F xor 2* or  r>
     BEGIN   over $80 u>= WHILE  tuck c! 1+  REPEAT  nip ;
 
+\ plug-in so that char and '<char> work for UTF-8
+
+[ifundef] char@ \ !! bootstrapping help
+    Defer char@ ( addr u -- char addr' u' )
+    :noname  over c@ -rot 1 /string ; IS char@
+[then]
+
+:noname  ( addr u -- char addr' u' )
+    \ !! the if here seems to work around some breakage, but not
+    \ entirely; e.g., try 'į' with LANG=C.
+    dup 1 u<= IF defers char@ EXIT THEN
+    over + >r u8@+ swap r> over - ; IS char@
+
 \ scan to next/previous character
 
 : u8>> ( u8addr -- u8addr' )
@@ -57,7 +72,7 @@
     $7F and  $40 >r
     BEGIN  dup r@ and  WHILE  r@ xor
 	    6 lshift r> 5 lshift >r >r defers key
-\	    dup $C0 and $80 <> abort" malformed character"
+	    dup $C0 and $80 <> IF  UTF-8-err throw  THEN
 	    $3F and r> or
     REPEAT  rdrop ;
 
@@ -71,8 +86,10 @@
 
 \ input editor
 
-: save-cursor ( -- )  27 emit '7 emit ;
-: restore-cursor ( -- )  27 emit '8 emit ;
+[IFUNDEF] #esc  27 Constant #esc  [THEN]
+
+: save-cursor ( -- )  #esc emit '7 emit ;
+: restore-cursor ( -- )  #esc emit '8 emit ;
 : .rest ( addr pos1 -- addr pos1 )
     restore-cursor 2dup type ;
 : .all ( span addr pos1 -- span addr pos1 )