Diff for /gforth/utf-8.fs between versions 1.36 and 1.41

version 1.36, 2008/07/15 16:11:49 version 1.41, 2010/02/14 18:04:16
Line 1 Line 1
 \ UTF-8 handling                                       12dec04py  \ UTF-8 handling                                       12dec04py
   
 \ Copyright (C) 2004,2005,2006,2007,2008 Free Software Foundation, Inc.  \ Copyright (C) 2004,2005,2006,2007,2008,2009 Free Software Foundation, Inc.
   
 \ This file is part of Gforth.  \ This file is part of Gforth.
   
Line 73  $80 Value max-single-byte Line 73  $80 Value max-single-byte
 Defer check-xy  ' noop IS check-xy  Defer check-xy  ' noop IS check-xy
   
 : u8key ( -- u )  : u8key ( -- u )
     defers key dup max-single-byte u< ?EXIT  \ special case ASCII      key dup max-single-byte u< ?EXIT  \ special case ASCII
     dup $FF = ?EXIT  \ special resize character      dup $FF = ?EXIT  \ special resize character
     dup $C2 u< IF  UTF-8-err throw  THEN  \ malformed character      dup $C2 u< IF  UTF-8-err throw  THEN  \ malformed character
     $7F and  $40 >r      $7F and  $40 >r
     BEGIN  dup r@ and  WHILE  r@ xor      BEGIN  dup r@ and  WHILE  r@ xor
             6 lshift r> 5 lshift >r >r defers key              6 lshift r> 5 lshift >r >r key
             dup $C0 and $80 <> IF  UTF-8-err throw  THEN              dup $C0 and $80 <> IF  UTF-8-err throw  THEN
             $3F and r> or              $3F and r> or
     REPEAT  rdrop ;      REPEAT  rdrop ;
   
 : u8emit ( u -- )  : u8emit ( u -- )
     dup max-single-byte u< IF  defers emit  EXIT  THEN \ special case ASCII      dup max-single-byte u< IF  emit  EXIT  THEN \ special case ASCII
     0 swap  $3F      0 swap  $3F
     BEGIN  2dup u>  WHILE      BEGIN  2dup u>  WHILE
             2/ >r  dup $3F and $80 or swap 6 rshift r>              2/ >r  dup $3F and $80 or swap 6 rshift r>
     REPEAT  $7F xor 2* or      REPEAT  $7F xor 2* or
     BEGIN   dup $80 u>= WHILE  defers emit  REPEAT  drop ;      BEGIN   dup $80 u>= WHILE  emit  REPEAT  drop ;
   
 \ utf-8 stuff for xchars  \ utf-8 stuff for xchars
   
Line 119  Defer check-xy  ' noop IS check-xy Line 119  Defer check-xy  ' noop IS check-xy
     dup $f8 u< if drop 4 exit endif      dup $f8 u< if drop 4 exit endif
     dup $fc u< if drop 5 exit endif      dup $fc u< if drop 5 exit endif
     dup $fe u< if drop 6 exit endif      dup $fe u< if drop 6 exit endif
     UTF-8-err throw ;      dup $ff u< if drop 7 exit endif
       drop 8 ;
   
 : -u8trailing-garbage ( addr u1 -- addr u2 )  : -u8trailing-garbage ( addr u1 -- addr u2 )
     2dup + dup u8<< ( addr u1 end1 end2 )      2dup + dup u8<< ( addr u1 end1 end2 )
Line 272  here wc-table - Constant #wc-table Line 273  here wc-table - Constant #wc-table
   
 \ inefficient table walk:  \ inefficient table walk:
   
 : wcwidth ( xc -- n )  : xc-width ( xc -- n )
     wc-table #wc-table over + swap ?DO      wc-table #wc-table over + swap ?DO
         dup I 2@ within IF  I 2 cells + @  UNLOOP EXIT  THEN          dup I 2@ within IF  I 2 cells + @  UNLOOP EXIT  THEN
     3 cells +LOOP  1 ;      3 cells +LOOP  1 ;
   [ELSE]
       ' wcwidth Alias xc-width
 [THEN]  [THEN]
           
 : u8width ( xcaddr u -- n )  : u8width ( xcaddr u -- n )
     0 rot rot over + swap ?DO      0 rot rot over + swap ?DO
         I xc@+ swap >r wcwidth +          I xc@+ swap >r xc-width +
     r> I - +LOOP ;      r> I - +LOOP ;
   
 : set-encoding-utf-8 ( -- )  : set-encoding-utf-8 ( -- )
Line 325  environment-wordlist set-current Line 328  environment-wordlist set-current
     \G ``ISO-LATIN-1'' or ``UTF-8'', with the exception of ``ASCII'', where      \G ``ISO-LATIN-1'' or ``UTF-8'', with the exception of ``ASCII'', where
     \G we prefer the alias ``ASCII''.      \G we prefer the alias ``ASCII''.
     max-single-byte $80 = IF s" UTF-8" ELSE s" ISO-LATIN-1" THEN ;      max-single-byte $80 = IF s" UTF-8" ELSE s" ISO-LATIN-1" THEN ;
   : max-xchar ( -- xchar )
       max-single-byte $80 = IF $7FFFFFFF  ELSE  $FF  THEN ;
   ' noop Alias X:xchar
 forth definitions  forth definitions
   
 :noname ( -- )  :noname ( -- )

Removed from v.1.36  
changed lines
  Added in v.1.41


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>