Diff for /gforth/utf-8.fs between versions 1.14 and 1.20

version 1.14, 2005/01/10 10:30:55 version 1.20, 2006/02/19 22:15:05
Line 1 Line 1
 \ UTF-8 handling                                       12dec04py  \ UTF-8 handling                                       12dec04py
   
 \ Copyright (C) 2004 Free Software Foundation, Inc.  \ Copyright (C) 2004,2005 Free Software Foundation, Inc.
   
 \ This file is part of Gforth.  \ This file is part of Gforth.
   
Line 27  $80 Value max-single-byte Line 27  $80 Value max-single-byte
 : u8len ( u8 -- n )  : u8len ( u8 -- n )
     dup      max-single-byte u< IF  drop 1  EXIT  THEN \ special case ASCII      dup      max-single-byte u< IF  drop 1  EXIT  THEN \ special case ASCII
     $800  2 >r      $800  2 >r
     BEGIN  2dup u>=  WHILE  5 lshift r> 1+ >r  REPEAT      BEGIN  2dup u>=  WHILE  5 lshift r> 1+ >r  dup 0= UNTIL  THEN
     2drop r> ;      2drop r> ;
   
 : u8@+ ( u8addr -- u8addr' u )  : u8@+ ( u8addr -- u8addr' u )
     count  dup max-single-byte u< ?EXIT  \ special case ASCII      count  dup max-single-byte u< ?EXIT  \ special case ASCII
       dup $C2 u< IF  UTF-8-err throw  THEN  \ malformed character
     $7F and  $40 >r      $7F and  $40 >r
     BEGIN  dup r@ and  WHILE  r@ xor      BEGIN  dup r@ and  WHILE  r@ xor
             6 lshift r> 5 lshift >r >r count              6 lshift r> 5 lshift >r >r count
Line 72  $80 Value max-single-byte Line 73  $80 Value max-single-byte
   
 : u8key ( -- u )  : u8key ( -- u )
     defers key dup max-single-byte u< ?EXIT  \ special case ASCII      defers key dup max-single-byte u< ?EXIT  \ special case ASCII
       dup $C2 u< IF  UTF-8-err throw  THEN  \ malformed character
     $7F and  $40 >r      $7F and  $40 >r
     BEGIN  dup r@ and  WHILE  r@ xor      BEGIN  dup r@ and  WHILE  r@ xor
             6 lshift r> 5 lshift >r >r defers key              6 lshift r> 5 lshift >r >r defers key
Line 126  $80 Value max-single-byte Line 128  $80 Value max-single-byte
         nip nip over -          nip nip over -
     then ;      then ;
   
   : u8width ( xcaddr u -- n )
       0 rot rot over + swap ?DO
           I xc@+ swap >r wcwidth +
       r> I - +LOOP ;
   
 : set-encoding-utf-8 ( -- )  : set-encoding-utf-8 ( -- )
     ['] u8emit is xemit      ['] u8emit is xemit
     ['] u8key is xkey      ['] u8key is xkey
Line 137  $80 Value max-single-byte Line 144  $80 Value max-single-byte
     ['] u8!+? is xc!+?      ['] u8!+? is xc!+?
     ['] u8@+ is xc@+      ['] u8@+ is xc@+
     ['] u8len is xc-size      ['] u8len is xc-size
   [ [IFDEF] x-width ]
       ['] u8width is x-width
   [ [THEN] ]
     ['] -u8trailing-garbage is -trailing-garbage      ['] -u8trailing-garbage is -trailing-garbage
 ;  ;
   
   : utf-8-cold ( -- )
       s" LC_ALL" getenv 2dup d0= IF  2drop
           s" LC_CTYPE" getenv 2dup d0= IF  2drop
               s" LANG" getenv 2dup d0= IF  2drop
                   s" C"  THEN THEN THEN
       s" UTF-8" search nip nip
       IF  set-encoding-utf-8  ELSE  set-encoding-fixed-width  THEN ;
   
   ' utf-8-cold INIT8 chained
   
   utf-8-cold

Removed from v.1.14  
changed lines
  Added in v.1.20


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>